Skip to content

Commit bde99f0

Browse files
authored
Add suffix array (TheAlgorithms#383)
1 parent 9f194ca commit bde99f0

File tree

3 files changed

+98
-0
lines changed

3 files changed

+98
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ These are for demonstration purposes only.
120120
- [x] [Run Length Encoding](.src/string/run_length_encoding.rs)
121121
- [x] [Hamming Distance](./src/string/hamming_distance.rs)
122122
- [x] [Suffix Tree](./src/string/suffix_tree.rs)
123+
- [x] [Suffix Array](./src/string/suffix_array.rs)
123124

124125
## [General](./src/general)
125126

src/string/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ mod manacher;
66
mod rabin_karp;
77
mod reverse;
88
mod run_length_encoding;
9+
mod suffix_array;
910
mod suffix_tree;
1011
mod z_algorithm;
1112

@@ -19,6 +20,7 @@ pub use self::manacher::manacher;
1920
pub use self::rabin_karp::rabin_karp;
2021
pub use self::reverse::reverse;
2122
pub use self::run_length_encoding::{run_length_decoding, run_length_encoding};
23+
pub use self::suffix_array::generate_suffix_array;
2224
pub use self::suffix_tree::{Node, SuffixTree};
2325
pub use self::z_algorithm::match_pattern;
2426
pub use self::z_algorithm::z_array;

src/string/suffix_array.rs

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
// In computer science, a suffix array is a sorted array of all suffixes of a string.
2+
// It is a data structure used in, among others, full-text indices, data-compression algorithms,
3+
// and the field of bibliometrics. Source: https://en.wikipedia.org/wiki/Suffix_array
4+
5+
use std::cmp::Ordering;
6+
7+
#[derive(Clone)]
8+
struct Suffix {
9+
index: usize,
10+
rank: (i32, i32),
11+
}
12+
13+
impl Suffix {
14+
fn cmp(&self, b: &Self) -> Ordering {
15+
let a = self;
16+
let ((a1, a2), (b1, b2)) = (a.rank, b.rank);
17+
match a1.cmp(&b1) {
18+
Ordering::Equal => {
19+
if a2 < b2 {
20+
Ordering::Less
21+
} else {
22+
Ordering::Greater
23+
}
24+
}
25+
o => o,
26+
}
27+
}
28+
}
29+
30+
pub fn generate_suffix_array(txt: &str) -> Vec<usize> {
31+
let n = txt.len();
32+
let mut suffixes: Vec<Suffix> = vec![
33+
Suffix {
34+
index: 0,
35+
rank: (-1, -1)
36+
};
37+
n
38+
];
39+
for (i, suf) in suffixes.iter_mut().enumerate() {
40+
suf.index = i;
41+
suf.rank.0 = (txt.chars().nth(i).expect("this should exist") as u32 - 'a' as u32) as i32;
42+
suf.rank.1 = if (i + 1) < n {
43+
(txt.chars().nth(i + 1).expect("this should exist") as u32 - 'a' as u32) as i32
44+
} else {
45+
-1
46+
}
47+
}
48+
suffixes.sort_by(|a, b| a.cmp(b));
49+
let mut ind = vec![0; n];
50+
let mut k = 4;
51+
while k < 2 * n {
52+
let mut rank = 0;
53+
let mut prev_rank = suffixes[0].rank.0;
54+
suffixes[0].rank.0 = rank;
55+
ind[suffixes[0].index] = 0;
56+
57+
for i in 1..n {
58+
if suffixes[i].rank.0 == prev_rank && suffixes[i].rank.1 == suffixes[i - 1].rank.1 {
59+
prev_rank = suffixes[i].rank.0;
60+
suffixes[i].rank.0 = rank;
61+
} else {
62+
prev_rank = suffixes[i].rank.0;
63+
rank += 1;
64+
suffixes[i].rank.0 = rank;
65+
}
66+
ind[suffixes[i].index] = i;
67+
}
68+
for i in 0..n {
69+
let next_index = suffixes[i].index + (k / 2);
70+
suffixes[i].rank.1 = if next_index < n {
71+
suffixes[ind[next_index]].rank.0
72+
} else {
73+
-1
74+
}
75+
}
76+
suffixes.sort_by(|a, b| a.cmp(b));
77+
k *= 2;
78+
}
79+
let mut suffix_arr = Vec::new();
80+
for suf in suffixes {
81+
suffix_arr.push(suf.index);
82+
}
83+
suffix_arr
84+
}
85+
86+
#[cfg(test)]
87+
mod tests {
88+
use super::*;
89+
90+
#[test]
91+
fn test_suffix_array() {
92+
let a = generate_suffix_array("banana");
93+
assert_eq!(a, vec![5, 3, 1, 0, 4, 2]);
94+
}
95+
}

0 commit comments

Comments
 (0)