Skip to content

Commit 7662e11

Browse files
authored
Add Van Emde Boas (VEB) tree (TheAlgorithms#506)
1 parent 47962ca commit 7662e11

File tree

2 files changed

+344
-0
lines changed

2 files changed

+344
-0
lines changed

src/data_structures/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ mod stack_using_singly_linked_list;
1515
mod treap;
1616
mod trie;
1717
mod union_find;
18+
mod veb_tree;
1819

1920
pub use self::avl_tree::AVLTree;
2021
pub use self::b_tree::BTree;
@@ -33,3 +34,4 @@ pub use self::stack_using_singly_linked_list::Stack;
3334
pub use self::treap::Treap;
3435
pub use self::trie::Trie;
3536
pub use self::union_find::UnionFind;
37+
pub use self::veb_tree::VebTree;

src/data_structures/veb_tree.rs

Lines changed: 342 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,342 @@
1+
// This struct implements Van Emde Boas tree (VEB tree). It stores integers in range [0, U), where
2+
// O is any integer that is a power of 2. It supports operations such as insert, search,
3+
// predecessor, and successor in O(log(log(U))) time. The structure takes O(U) space.
4+
pub struct VebTree {
5+
size: u32,
6+
child_size: u32, // Set to square root of size. Cache here to avoid recomputation.
7+
min: u32,
8+
max: u32,
9+
summary: Option<Box<VebTree>>,
10+
cluster: Vec<VebTree>,
11+
}
12+
13+
impl VebTree {
14+
/// Create a new, empty VEB tree. The tree will contain number of elements equal to size
15+
/// rounded up to the nearest power of two.
16+
pub fn new(size: u32) -> VebTree {
17+
let rounded_size = size.next_power_of_two();
18+
let child_size = (size as f64).sqrt().ceil() as u32;
19+
20+
let mut cluster = Vec::new();
21+
if rounded_size > 2 {
22+
for _ in 0..rounded_size {
23+
cluster.push(VebTree::new(child_size));
24+
}
25+
}
26+
27+
VebTree {
28+
size: rounded_size,
29+
child_size,
30+
min: u32::MAX,
31+
max: u32::MIN,
32+
cluster,
33+
summary: if rounded_size <= 2 {
34+
None
35+
} else {
36+
Some(Box::new(VebTree::new(child_size)))
37+
},
38+
}
39+
}
40+
41+
fn high(&self, value: u32) -> u32 {
42+
value / self.child_size
43+
}
44+
45+
fn low(&self, value: u32) -> u32 {
46+
value % self.child_size
47+
}
48+
49+
fn index(&self, cluster: u32, offset: u32) -> u32 {
50+
cluster * self.child_size + offset
51+
}
52+
53+
pub fn min(&self) -> u32 {
54+
self.min
55+
}
56+
57+
pub fn max(&self) -> u32 {
58+
self.max
59+
}
60+
61+
pub fn iter(&self) -> VebTreeIter {
62+
VebTreeIter::new(self)
63+
}
64+
65+
// A VEB tree is empty if the min is greater than the max.
66+
pub fn empty(&self) -> bool {
67+
self.min > self.max
68+
}
69+
70+
// Returns true if value is in the tree, false otherwise.
71+
pub fn search(&self, value: u32) -> bool {
72+
if self.empty() {
73+
return false;
74+
} else if value == self.min || value == self.max {
75+
return true;
76+
} else if value < self.min || value > self.max {
77+
return false;
78+
}
79+
self.cluster[self.high(value) as usize].search(self.low(value))
80+
}
81+
82+
fn insert_empty(&mut self, value: u32) {
83+
assert!(self.empty(), "tree should be empty");
84+
self.min = value;
85+
self.max = value;
86+
}
87+
88+
// Inserts value into the tree.
89+
pub fn insert(&mut self, mut value: u32) {
90+
assert!(value < self.size);
91+
92+
if self.empty() {
93+
self.insert_empty(value);
94+
return;
95+
}
96+
97+
if value < self.min {
98+
// If the new value is less than the current tree's min, set the min to the new value
99+
// and insert the old min.
100+
(value, self.min) = (self.min, value);
101+
}
102+
103+
if self.size > 2 {
104+
// Non base case. The checks for min/max will handle trees of size 2.
105+
let high = self.high(value);
106+
let low = self.low(value);
107+
if self.cluster[high as usize].empty() {
108+
// If the cluster tree for the value is empty, we set the min/max of the tree to
109+
// value and record that the cluster tree has an elements in the summary.
110+
self.cluster[high as usize].insert_empty(low);
111+
if let Some(summary) = self.summary.as_mut() {
112+
summary.insert(high);
113+
}
114+
} else {
115+
// If the cluster tree already has a value, the summary does not need to be
116+
// updated. Recursively insert the value into the cluster tree.
117+
self.cluster[high as usize].insert(low);
118+
}
119+
}
120+
121+
if value > self.max {
122+
self.max = value;
123+
}
124+
}
125+
126+
// Returns the next greatest value(successor) in the tree after pred. Returns
127+
// `None` if there is no successor.
128+
pub fn succ(&self, pred: u32) -> Option<u32> {
129+
if self.empty() {
130+
return None;
131+
}
132+
133+
if self.size == 2 {
134+
// Base case. If pred is 0, and 1 exists in the tree (max is set to 1), the successor
135+
// is 1.
136+
return if pred == 0 && self.max == 1 {
137+
Some(1)
138+
} else {
139+
None
140+
};
141+
}
142+
143+
if pred < self.min {
144+
// If the predecessor is less than the minimum of this tree, the successor is the min.
145+
return Some(self.min);
146+
}
147+
148+
let low = self.low(pred);
149+
let high = self.high(pred);
150+
151+
if !self.cluster[high as usize].empty() && low < self.cluster[high as usize].max {
152+
// The successor is within the same cluster as the predecessor
153+
return Some(self.index(high, self.cluster[high as usize].succ(low).unwrap()));
154+
};
155+
156+
// If we reach this point, the successor exists in a different cluster. We use the summary
157+
// to efficiently query which cluster the successor lives in. If there is no successor
158+
// cluster, return None.
159+
let succ_cluster = self.summary.as_ref().unwrap().succ(high);
160+
succ_cluster
161+
.map(|succ_cluster| self.index(succ_cluster, self.cluster[succ_cluster as usize].min))
162+
}
163+
164+
// Returns the next smallest value(predecessor) in the tree after succ. Returns
165+
// `None` if there is no predecessor. pred() is almost a mirror of succ().
166+
// Differences are noted in comments.
167+
pub fn pred(&self, succ: u32) -> Option<u32> {
168+
if self.empty() {
169+
return None;
170+
}
171+
172+
// base case.
173+
if self.size == 2 {
174+
return if succ == 1 && self.min == 0 {
175+
Some(0)
176+
} else {
177+
None
178+
};
179+
}
180+
181+
if succ > self.max {
182+
return Some(self.max);
183+
}
184+
185+
let low = self.low(succ);
186+
let high = self.high(succ);
187+
188+
if !self.cluster[high as usize].empty() && low > self.cluster[high as usize].min {
189+
return Some(self.index(high, self.cluster[high as usize].pred(low).unwrap()));
190+
};
191+
192+
// Find the cluster that has the predecessor. The successor will be that cluster's max.
193+
let succ_cluster = self.summary.as_ref().unwrap().pred(high);
194+
match succ_cluster {
195+
Some(succ_cluster) => {
196+
Some(self.index(succ_cluster, self.cluster[succ_cluster as usize].max))
197+
}
198+
// Special case for pred() that does not exist in succ(). The current tree's min
199+
// does not exist in a cluster. So if we cannot find a cluster that could have the
200+
// predecessor, the predecessor could be the min of the current tree.
201+
None => {
202+
if succ > self.min {
203+
Some(self.min)
204+
} else {
205+
None
206+
}
207+
}
208+
}
209+
}
210+
}
211+
212+
pub struct VebTreeIter<'a> {
213+
tree: &'a VebTree,
214+
curr: Option<u32>,
215+
}
216+
217+
impl<'a> VebTreeIter<'a> {
218+
pub fn new(tree: &'a VebTree) -> VebTreeIter {
219+
let curr = if tree.empty() { None } else { Some(tree.min) };
220+
VebTreeIter { tree, curr }
221+
}
222+
}
223+
224+
impl<'a> Iterator for VebTreeIter<'a> {
225+
type Item = u32;
226+
227+
fn next(&mut self) -> Option<u32> {
228+
let curr = self.curr;
229+
curr?;
230+
self.curr = self.tree.succ(curr.unwrap());
231+
curr
232+
}
233+
}
234+
235+
#[cfg(test)]
236+
mod test {
237+
use super::VebTree;
238+
use rand::{rngs::StdRng, Rng, SeedableRng};
239+
240+
fn test_veb_tree(size: u32, mut elements: Vec<u32>, exclude: Vec<u32>) {
241+
// Insert elements
242+
let mut tree = VebTree::new(size);
243+
for element in elements.iter() {
244+
tree.insert(*element);
245+
}
246+
247+
// Test search
248+
for element in elements.iter() {
249+
assert!(tree.search(*element));
250+
}
251+
for element in exclude {
252+
assert!(!tree.search(element));
253+
}
254+
255+
// Test iterator and successor, and predecessor
256+
elements.sort();
257+
elements.dedup();
258+
for (i, element) in tree.iter().enumerate() {
259+
assert!(elements[i] == element);
260+
}
261+
for i in 1..elements.len() {
262+
assert!(tree.succ(elements[i - 1]) == Some(elements[i]));
263+
assert!(tree.pred(elements[i]) == Some(elements[i - 1]));
264+
}
265+
}
266+
267+
#[test]
268+
fn test_empty() {
269+
test_veb_tree(16, Vec::new(), (0..16).collect());
270+
}
271+
272+
#[test]
273+
fn test_single() {
274+
test_veb_tree(16, Vec::from([5]), (0..16).filter(|x| *x != 5).collect());
275+
}
276+
277+
#[test]
278+
fn test_two() {
279+
test_veb_tree(
280+
16,
281+
Vec::from([4, 9]),
282+
(0..16).filter(|x| *x != 4 && *x != 9).collect(),
283+
);
284+
}
285+
286+
#[test]
287+
fn test_repeat_insert() {
288+
let mut tree = VebTree::new(16);
289+
for _ in 0..5 {
290+
tree.insert(10);
291+
}
292+
assert!(tree.search(10));
293+
let elements: Vec<u32> = (0..16).filter(|x| *x != 10).collect();
294+
for element in elements {
295+
assert!(!tree.search(element));
296+
}
297+
}
298+
299+
#[test]
300+
fn test_linear() {
301+
test_veb_tree(16, (0..10).collect(), (10..16).collect());
302+
}
303+
304+
fn test_full(size: u32) {
305+
test_veb_tree(size, (0..size).collect(), Vec::new());
306+
}
307+
308+
#[test]
309+
fn test_full_small() {
310+
test_full(8);
311+
test_full(10);
312+
test_full(16);
313+
test_full(20);
314+
test_full(32);
315+
}
316+
317+
#[test]
318+
fn test_full_256() {
319+
test_full(256);
320+
}
321+
322+
#[test]
323+
fn test_10_256() {
324+
let mut rng = StdRng::seed_from_u64(0);
325+
let elements: Vec<u32> = (0..10).map(|_| rng.gen_range(0..255)).collect();
326+
test_veb_tree(256, elements, Vec::new());
327+
}
328+
329+
#[test]
330+
fn test_100_256() {
331+
let mut rng = StdRng::seed_from_u64(0);
332+
let elements: Vec<u32> = (0..100).map(|_| rng.gen_range(0..255)).collect();
333+
test_veb_tree(256, elements, Vec::new());
334+
}
335+
336+
#[test]
337+
fn test_100_300() {
338+
let mut rng = StdRng::seed_from_u64(0);
339+
let elements: Vec<u32> = (0..100).map(|_| rng.gen_range(0..255)).collect();
340+
test_veb_tree(300, elements, Vec::new());
341+
}
342+
}

0 commit comments

Comments
 (0)