From bde99f0eefbf01ae7a9f0450c8c3a026dbbe5566 Mon Sep 17 00:00:00 2001 From: pwygab <88221256+merelymyself@users.noreply.github.com> Date: Mon, 26 Sep 2022 12:33:27 +0800 Subject: [PATCH 1/2] Add suffix array (#383) --- README.md | 1 + src/string/mod.rs | 2 + src/string/suffix_array.rs | 95 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+) create mode 100644 src/string/suffix_array.rs diff --git a/README.md b/README.md index 83b9e5ff8c5..1f668657f40 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,7 @@ These are for demonstration purposes only. - [x] [Run Length Encoding](.src/string/run_length_encoding.rs) - [x] [Hamming Distance](./src/string/hamming_distance.rs) - [x] [Suffix Tree](./src/string/suffix_tree.rs) +- [x] [Suffix Array](./src/string/suffix_array.rs) ## [General](./src/general) diff --git a/src/string/mod.rs b/src/string/mod.rs index 931999822ca..b8e6feefc36 100644 --- a/src/string/mod.rs +++ b/src/string/mod.rs @@ -6,6 +6,7 @@ mod manacher; mod rabin_karp; mod reverse; mod run_length_encoding; +mod suffix_array; mod suffix_tree; mod z_algorithm; @@ -19,6 +20,7 @@ pub use self::manacher::manacher; pub use self::rabin_karp::rabin_karp; pub use self::reverse::reverse; pub use self::run_length_encoding::{run_length_decoding, run_length_encoding}; +pub use self::suffix_array::generate_suffix_array; pub use self::suffix_tree::{Node, SuffixTree}; pub use self::z_algorithm::match_pattern; pub use self::z_algorithm::z_array; diff --git a/src/string/suffix_array.rs b/src/string/suffix_array.rs new file mode 100644 index 00000000000..a89575fc8e8 --- /dev/null +++ b/src/string/suffix_array.rs @@ -0,0 +1,95 @@ +// In computer science, a suffix array is a sorted array of all suffixes of a string. +// It is a data structure used in, among others, full-text indices, data-compression algorithms, +// and the field of bibliometrics. Source: https://en.wikipedia.org/wiki/Suffix_array + +use std::cmp::Ordering; + +#[derive(Clone)] +struct Suffix { + index: usize, + rank: (i32, i32), +} + +impl Suffix { + fn cmp(&self, b: &Self) -> Ordering { + let a = self; + let ((a1, a2), (b1, b2)) = (a.rank, b.rank); + match a1.cmp(&b1) { + Ordering::Equal => { + if a2 < b2 { + Ordering::Less + } else { + Ordering::Greater + } + } + o => o, + } + } +} + +pub fn generate_suffix_array(txt: &str) -> Vec { + let n = txt.len(); + let mut suffixes: Vec = vec![ + Suffix { + index: 0, + rank: (-1, -1) + }; + n + ]; + for (i, suf) in suffixes.iter_mut().enumerate() { + suf.index = i; + suf.rank.0 = (txt.chars().nth(i).expect("this should exist") as u32 - 'a' as u32) as i32; + suf.rank.1 = if (i + 1) < n { + (txt.chars().nth(i + 1).expect("this should exist") as u32 - 'a' as u32) as i32 + } else { + -1 + } + } + suffixes.sort_by(|a, b| a.cmp(b)); + let mut ind = vec![0; n]; + let mut k = 4; + while k < 2 * n { + let mut rank = 0; + let mut prev_rank = suffixes[0].rank.0; + suffixes[0].rank.0 = rank; + ind[suffixes[0].index] = 0; + + for i in 1..n { + if suffixes[i].rank.0 == prev_rank && suffixes[i].rank.1 == suffixes[i - 1].rank.1 { + prev_rank = suffixes[i].rank.0; + suffixes[i].rank.0 = rank; + } else { + prev_rank = suffixes[i].rank.0; + rank += 1; + suffixes[i].rank.0 = rank; + } + ind[suffixes[i].index] = i; + } + for i in 0..n { + let next_index = suffixes[i].index + (k / 2); + suffixes[i].rank.1 = if next_index < n { + suffixes[ind[next_index]].rank.0 + } else { + -1 + } + } + suffixes.sort_by(|a, b| a.cmp(b)); + k *= 2; + } + let mut suffix_arr = Vec::new(); + for suf in suffixes { + suffix_arr.push(suf.index); + } + suffix_arr +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_suffix_array() { + let a = generate_suffix_array("banana"); + assert_eq!(a, vec![5, 3, 1, 0, 4, 2]); + } +} From 5290810db989b6c457bd66af8735d9124e712f60 Mon Sep 17 00:00:00 2001 From: kou <109428396+kou-sia@users.noreply.github.com> Date: Mon, 26 Sep 2022 13:44:46 +0900 Subject: [PATCH 2/2] Add sleep_sort (#384) --- README.md | 1 + src/sorting/README.md | 13 +++++- src/sorting/mod.rs | 2 + src/sorting/sleep_sort.rs | 88 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 102 insertions(+), 2 deletions(-) create mode 100644 src/sorting/sleep_sort.rs diff --git a/README.md b/README.md index 1f668657f40..a848d8b7bcc 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,7 @@ These are for demonstration purposes only. - [x] [Comb](./src/sorting/comb_sort.rs) - [x] [Bucket](./src/sorting/bucket_sort.rs) - [x] [Timsort](./src/sorting/tim_sort.rs) +- [x] [Sleep](./src/sorting/sleep_sort.rs) ## [Graphs](./src/graph) diff --git a/src/sorting/README.md b/src/sorting/README.md index ed7feb42cac..e90a21ebab0 100644 --- a/src/sorting/README.md +++ b/src/sorting/README.md @@ -180,8 +180,14 @@ __Properties__ From [Wikipedia][tim-wiki]: Timsort is a hybrid stable sorting algorithm, derived from merge sort and insertion sort, designed to perform well on many kinds of real-world data. It was implemented by Tim Peters in 2002 for use in the Python programming language. The algorithm finds subsequences of the data that are already ordered (runs) and uses them to sort the remainder more efficiently. This is done by merging runs until certain criteria are fulfilled. Timsort has been Python's standard sorting algorithm since version 2.3. It is also used to sort arrays of non-primitive type in Java SE 7, on the Android platform, in GNU Octave, on V8, Swift, and Rust. __Properties__ -* Worst-case performance O(n log n) -* Best-case performance O(n) +* Worst-case performance O(max element size(ms)) +* Best-case performance O(max element size(ms)) + +### [Sleep](./sleep_sort.rs) +![alt text][sleep-image] + +From [Wikipedia][bucket-sort-wiki]: This is an idea that was originally posted on the message board 4chan, replacing the bucket in bucket sort with time instead of memory space. +It is actually possible to sort by "maximum of all elements x unit time to sleep". The only case where this would be useful would be in examples. [bogo-wiki]: https://en.wikipedia.org/wiki/Bogosort [bogo-image]: https://upload.wikimedia.org/wikipedia/commons/7/7b/Bogo_sort_animation.gif @@ -235,3 +241,6 @@ __Properties__ [comb-sort]: https://upload.wikimedia.org/wikipedia/commons/4/46/Comb_sort_demo.gif [comb-sort-wiki]: https://en.wikipedia.org/wiki/Comb_sort + +[sleep-sort]: +[sleep-sort-wiki]https://ja.m.wikipedia.org/wiki/バケットソート#.E3.82.B9.E3.83.AA.E3.83.BC.E3.83.97.E3.82.BD.E3.83.BC.E3.83.88 diff --git a/src/sorting/mod.rs b/src/sorting/mod.rs index aa33979b819..aba3368e0f9 100644 --- a/src/sorting/mod.rs +++ b/src/sorting/mod.rs @@ -17,6 +17,7 @@ mod quick_sort; mod radix_sort; mod selection_sort; mod shell_sort; +mod sleep_sort; mod stooge_sort; mod tim_sort; @@ -41,6 +42,7 @@ pub use self::quick_sort::{partition, quick_sort}; pub use self::radix_sort::radix_sort; pub use self::selection_sort::selection_sort; pub use self::shell_sort::shell_sort; +pub use self::sleep_sort::sleep_sort; pub use self::stooge_sort::stooge_sort; pub use self::tim_sort::tim_sort; diff --git a/src/sorting/sleep_sort.rs b/src/sorting/sleep_sort.rs new file mode 100644 index 00000000000..ca019d860d8 --- /dev/null +++ b/src/sorting/sleep_sort.rs @@ -0,0 +1,88 @@ +use std::sync::mpsc; +use std::thread; +use std::time::Duration; + +pub fn sleep_sort(vec: &[usize]) -> Vec { + let len = vec.len(); + let (tx, rx) = mpsc::channel(); + + for &x in vec.iter() { + let tx: mpsc::Sender = tx.clone(); + thread::spawn(move || { + thread::sleep(Duration::from_millis((10 * x) as u64)); + tx.send(x).expect("panic"); + }); + } + let mut sorted_list: Vec = Vec::new(); + + for _ in 0..len { + sorted_list.push(rx.recv().unwrap()) + } + + sorted_list +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn empty() { + let mut arr: Vec = Vec::new(); + let res = sleep_sort(&mut arr); + assert_eq!(res, &[]); + } + + #[test] + fn single_element() { + let mut arr = vec![1]; + let res = sleep_sort(&mut arr); + assert_eq!(res, &[1]); + } + + #[test] + fn sorted_array() { + let mut arr = vec![1, 2, 3, 4]; + let res = sleep_sort(&mut arr); + assert_eq!(res, &[1, 2, 3, 4]); + } + + #[test] + fn unsorted_array() { + let mut arr = vec![3, 4, 2, 1]; + let res = sleep_sort(&mut arr); + assert_eq!(res, &[1, 2, 3, 4]); + } + + #[test] + fn odd_number_of_elements() { + let mut arr = vec![3, 4, 2, 1, 7]; + let res = sleep_sort(&mut arr); + assert_eq!(res, &[1, 2, 3, 4, 7]); + } + + #[test] + fn repeated_elements() { + let mut arr = vec![542, 542, 542, 542]; + let res = sleep_sort(&mut arr); + assert_eq!(res, &[542, 542, 542, 542]); + } + + #[test] + fn random_elements() { + let mut arr = vec![ + 52, 958, 385, 130, 687, 86, 480, 329, 269, 648, 112, 286, 222, 844, 463, 982, 571, 104, + 491, 223, 791, 90, 43, 884, 518, 680, 347, 822, 505, 778, 62, 743, 775, 8, 357, 532, + 53, 680, 32, 271, 267, 306, 20, 915, 374, 477, 272, 638, 18, 299, + ]; + let res = sleep_sort(&mut arr); + assert_eq!( + res, + &[ + 8, 18, 20, 32, 43, 52, 53, 62, 86, 90, 104, 112, 130, 222, 223, 267, 269, 271, 272, + 286, 299, 306, 329, 347, 357, 374, 385, 463, 477, 480, 491, 505, 518, 532, 571, + 638, 648, 680, 680, 687, 743, 775, 778, 791, 822, 844, 884, 915, 958, 982 + ] + ); + } +}