|
| 1 | +// This struct implements Van Emde Boas tree (VEB tree). It stores integers in range [0, U), where |
| 2 | +// O is any integer that is a power of 2. It supports operations such as insert, search, |
| 3 | +// predecessor, and successor in O(log(log(U))) time. The structure takes O(U) space. |
| 4 | +pub struct VebTree { |
| 5 | + size: u32, |
| 6 | + child_size: u32, // Set to square root of size. Cache here to avoid recomputation. |
| 7 | + min: u32, |
| 8 | + max: u32, |
| 9 | + summary: Option<Box<VebTree>>, |
| 10 | + cluster: Vec<VebTree>, |
| 11 | +} |
| 12 | + |
| 13 | +impl VebTree { |
| 14 | + /// Create a new, empty VEB tree. The tree will contain number of elements equal to size |
| 15 | + /// rounded up to the nearest power of two. |
| 16 | + pub fn new(size: u32) -> VebTree { |
| 17 | + let rounded_size = size.next_power_of_two(); |
| 18 | + let child_size = (size as f64).sqrt().ceil() as u32; |
| 19 | + |
| 20 | + let mut cluster = Vec::new(); |
| 21 | + if rounded_size > 2 { |
| 22 | + for _ in 0..rounded_size { |
| 23 | + cluster.push(VebTree::new(child_size)); |
| 24 | + } |
| 25 | + } |
| 26 | + |
| 27 | + VebTree { |
| 28 | + size: rounded_size, |
| 29 | + child_size, |
| 30 | + min: u32::MAX, |
| 31 | + max: u32::MIN, |
| 32 | + cluster, |
| 33 | + summary: if rounded_size <= 2 { |
| 34 | + None |
| 35 | + } else { |
| 36 | + Some(Box::new(VebTree::new(child_size))) |
| 37 | + }, |
| 38 | + } |
| 39 | + } |
| 40 | + |
| 41 | + fn high(&self, value: u32) -> u32 { |
| 42 | + value / self.child_size |
| 43 | + } |
| 44 | + |
| 45 | + fn low(&self, value: u32) -> u32 { |
| 46 | + value % self.child_size |
| 47 | + } |
| 48 | + |
| 49 | + fn index(&self, cluster: u32, offset: u32) -> u32 { |
| 50 | + cluster * self.child_size + offset |
| 51 | + } |
| 52 | + |
| 53 | + pub fn min(&self) -> u32 { |
| 54 | + self.min |
| 55 | + } |
| 56 | + |
| 57 | + pub fn max(&self) -> u32 { |
| 58 | + self.max |
| 59 | + } |
| 60 | + |
| 61 | + pub fn iter(&self) -> VebTreeIter { |
| 62 | + VebTreeIter::new(self) |
| 63 | + } |
| 64 | + |
| 65 | + // A VEB tree is empty if the min is greater than the max. |
| 66 | + pub fn empty(&self) -> bool { |
| 67 | + self.min > self.max |
| 68 | + } |
| 69 | + |
| 70 | + // Returns true if value is in the tree, false otherwise. |
| 71 | + pub fn search(&self, value: u32) -> bool { |
| 72 | + if self.empty() { |
| 73 | + return false; |
| 74 | + } else if value == self.min || value == self.max { |
| 75 | + return true; |
| 76 | + } else if value < self.min || value > self.max { |
| 77 | + return false; |
| 78 | + } |
| 79 | + self.cluster[self.high(value) as usize].search(self.low(value)) |
| 80 | + } |
| 81 | + |
| 82 | + fn insert_empty(&mut self, value: u32) { |
| 83 | + assert!(self.empty(), "tree should be empty"); |
| 84 | + self.min = value; |
| 85 | + self.max = value; |
| 86 | + } |
| 87 | + |
| 88 | + // Inserts value into the tree. |
| 89 | + pub fn insert(&mut self, mut value: u32) { |
| 90 | + assert!(value < self.size); |
| 91 | + |
| 92 | + if self.empty() { |
| 93 | + self.insert_empty(value); |
| 94 | + return; |
| 95 | + } |
| 96 | + |
| 97 | + if value < self.min { |
| 98 | + // If the new value is less than the current tree's min, set the min to the new value |
| 99 | + // and insert the old min. |
| 100 | + (value, self.min) = (self.min, value); |
| 101 | + } |
| 102 | + |
| 103 | + if self.size > 2 { |
| 104 | + // Non base case. The checks for min/max will handle trees of size 2. |
| 105 | + let high = self.high(value); |
| 106 | + let low = self.low(value); |
| 107 | + if self.cluster[high as usize].empty() { |
| 108 | + // If the cluster tree for the value is empty, we set the min/max of the tree to |
| 109 | + // value and record that the cluster tree has an elements in the summary. |
| 110 | + self.cluster[high as usize].insert_empty(low); |
| 111 | + if let Some(summary) = self.summary.as_mut() { |
| 112 | + summary.insert(high); |
| 113 | + } |
| 114 | + } else { |
| 115 | + // If the cluster tree already has a value, the summary does not need to be |
| 116 | + // updated. Recursively insert the value into the cluster tree. |
| 117 | + self.cluster[high as usize].insert(low); |
| 118 | + } |
| 119 | + } |
| 120 | + |
| 121 | + if value > self.max { |
| 122 | + self.max = value; |
| 123 | + } |
| 124 | + } |
| 125 | + |
| 126 | + // Returns the next greatest value(successor) in the tree after pred. Returns |
| 127 | + // `None` if there is no successor. |
| 128 | + pub fn succ(&self, pred: u32) -> Option<u32> { |
| 129 | + if self.empty() { |
| 130 | + return None; |
| 131 | + } |
| 132 | + |
| 133 | + if self.size == 2 { |
| 134 | + // Base case. If pred is 0, and 1 exists in the tree (max is set to 1), the successor |
| 135 | + // is 1. |
| 136 | + return if pred == 0 && self.max == 1 { |
| 137 | + Some(1) |
| 138 | + } else { |
| 139 | + None |
| 140 | + }; |
| 141 | + } |
| 142 | + |
| 143 | + if pred < self.min { |
| 144 | + // If the predecessor is less than the minimum of this tree, the successor is the min. |
| 145 | + return Some(self.min); |
| 146 | + } |
| 147 | + |
| 148 | + let low = self.low(pred); |
| 149 | + let high = self.high(pred); |
| 150 | + |
| 151 | + if !self.cluster[high as usize].empty() && low < self.cluster[high as usize].max { |
| 152 | + // The successor is within the same cluster as the predecessor |
| 153 | + return Some(self.index(high, self.cluster[high as usize].succ(low).unwrap())); |
| 154 | + }; |
| 155 | + |
| 156 | + // If we reach this point, the successor exists in a different cluster. We use the summary |
| 157 | + // to efficiently query which cluster the successor lives in. If there is no successor |
| 158 | + // cluster, return None. |
| 159 | + let succ_cluster = self.summary.as_ref().unwrap().succ(high); |
| 160 | + succ_cluster |
| 161 | + .map(|succ_cluster| self.index(succ_cluster, self.cluster[succ_cluster as usize].min)) |
| 162 | + } |
| 163 | + |
| 164 | + // Returns the next smallest value(predecessor) in the tree after succ. Returns |
| 165 | + // `None` if there is no predecessor. pred() is almost a mirror of succ(). |
| 166 | + // Differences are noted in comments. |
| 167 | + pub fn pred(&self, succ: u32) -> Option<u32> { |
| 168 | + if self.empty() { |
| 169 | + return None; |
| 170 | + } |
| 171 | + |
| 172 | + // base case. |
| 173 | + if self.size == 2 { |
| 174 | + return if succ == 1 && self.min == 0 { |
| 175 | + Some(0) |
| 176 | + } else { |
| 177 | + None |
| 178 | + }; |
| 179 | + } |
| 180 | + |
| 181 | + if succ > self.max { |
| 182 | + return Some(self.max); |
| 183 | + } |
| 184 | + |
| 185 | + let low = self.low(succ); |
| 186 | + let high = self.high(succ); |
| 187 | + |
| 188 | + if !self.cluster[high as usize].empty() && low > self.cluster[high as usize].min { |
| 189 | + return Some(self.index(high, self.cluster[high as usize].pred(low).unwrap())); |
| 190 | + }; |
| 191 | + |
| 192 | + // Find the cluster that has the predecessor. The successor will be that cluster's max. |
| 193 | + let succ_cluster = self.summary.as_ref().unwrap().pred(high); |
| 194 | + match succ_cluster { |
| 195 | + Some(succ_cluster) => { |
| 196 | + Some(self.index(succ_cluster, self.cluster[succ_cluster as usize].max)) |
| 197 | + } |
| 198 | + // Special case for pred() that does not exist in succ(). The current tree's min |
| 199 | + // does not exist in a cluster. So if we cannot find a cluster that could have the |
| 200 | + // predecessor, the predecessor could be the min of the current tree. |
| 201 | + None => { |
| 202 | + if succ > self.min { |
| 203 | + Some(self.min) |
| 204 | + } else { |
| 205 | + None |
| 206 | + } |
| 207 | + } |
| 208 | + } |
| 209 | + } |
| 210 | +} |
| 211 | + |
| 212 | +pub struct VebTreeIter<'a> { |
| 213 | + tree: &'a VebTree, |
| 214 | + curr: Option<u32>, |
| 215 | +} |
| 216 | + |
| 217 | +impl<'a> VebTreeIter<'a> { |
| 218 | + pub fn new(tree: &'a VebTree) -> VebTreeIter { |
| 219 | + let curr = if tree.empty() { None } else { Some(tree.min) }; |
| 220 | + VebTreeIter { tree, curr } |
| 221 | + } |
| 222 | +} |
| 223 | + |
| 224 | +impl<'a> Iterator for VebTreeIter<'a> { |
| 225 | + type Item = u32; |
| 226 | + |
| 227 | + fn next(&mut self) -> Option<u32> { |
| 228 | + let curr = self.curr; |
| 229 | + curr?; |
| 230 | + self.curr = self.tree.succ(curr.unwrap()); |
| 231 | + curr |
| 232 | + } |
| 233 | +} |
| 234 | + |
| 235 | +#[cfg(test)] |
| 236 | +mod test { |
| 237 | + use super::VebTree; |
| 238 | + use rand::{rngs::StdRng, Rng, SeedableRng}; |
| 239 | + |
| 240 | + fn test_veb_tree(size: u32, mut elements: Vec<u32>, exclude: Vec<u32>) { |
| 241 | + // Insert elements |
| 242 | + let mut tree = VebTree::new(size); |
| 243 | + for element in elements.iter() { |
| 244 | + tree.insert(*element); |
| 245 | + } |
| 246 | + |
| 247 | + // Test search |
| 248 | + for element in elements.iter() { |
| 249 | + assert!(tree.search(*element)); |
| 250 | + } |
| 251 | + for element in exclude { |
| 252 | + assert!(!tree.search(element)); |
| 253 | + } |
| 254 | + |
| 255 | + // Test iterator and successor, and predecessor |
| 256 | + elements.sort(); |
| 257 | + elements.dedup(); |
| 258 | + for (i, element) in tree.iter().enumerate() { |
| 259 | + assert!(elements[i] == element); |
| 260 | + } |
| 261 | + for i in 1..elements.len() { |
| 262 | + assert!(tree.succ(elements[i - 1]) == Some(elements[i])); |
| 263 | + assert!(tree.pred(elements[i]) == Some(elements[i - 1])); |
| 264 | + } |
| 265 | + } |
| 266 | + |
| 267 | + #[test] |
| 268 | + fn test_empty() { |
| 269 | + test_veb_tree(16, Vec::new(), (0..16).collect()); |
| 270 | + } |
| 271 | + |
| 272 | + #[test] |
| 273 | + fn test_single() { |
| 274 | + test_veb_tree(16, Vec::from([5]), (0..16).filter(|x| *x != 5).collect()); |
| 275 | + } |
| 276 | + |
| 277 | + #[test] |
| 278 | + fn test_two() { |
| 279 | + test_veb_tree( |
| 280 | + 16, |
| 281 | + Vec::from([4, 9]), |
| 282 | + (0..16).filter(|x| *x != 4 && *x != 9).collect(), |
| 283 | + ); |
| 284 | + } |
| 285 | + |
| 286 | + #[test] |
| 287 | + fn test_repeat_insert() { |
| 288 | + let mut tree = VebTree::new(16); |
| 289 | + for _ in 0..5 { |
| 290 | + tree.insert(10); |
| 291 | + } |
| 292 | + assert!(tree.search(10)); |
| 293 | + let elements: Vec<u32> = (0..16).filter(|x| *x != 10).collect(); |
| 294 | + for element in elements { |
| 295 | + assert!(!tree.search(element)); |
| 296 | + } |
| 297 | + } |
| 298 | + |
| 299 | + #[test] |
| 300 | + fn test_linear() { |
| 301 | + test_veb_tree(16, (0..10).collect(), (10..16).collect()); |
| 302 | + } |
| 303 | + |
| 304 | + fn test_full(size: u32) { |
| 305 | + test_veb_tree(size, (0..size).collect(), Vec::new()); |
| 306 | + } |
| 307 | + |
| 308 | + #[test] |
| 309 | + fn test_full_small() { |
| 310 | + test_full(8); |
| 311 | + test_full(10); |
| 312 | + test_full(16); |
| 313 | + test_full(20); |
| 314 | + test_full(32); |
| 315 | + } |
| 316 | + |
| 317 | + #[test] |
| 318 | + fn test_full_256() { |
| 319 | + test_full(256); |
| 320 | + } |
| 321 | + |
| 322 | + #[test] |
| 323 | + fn test_10_256() { |
| 324 | + let mut rng = StdRng::seed_from_u64(0); |
| 325 | + let elements: Vec<u32> = (0..10).map(|_| rng.gen_range(0..255)).collect(); |
| 326 | + test_veb_tree(256, elements, Vec::new()); |
| 327 | + } |
| 328 | + |
| 329 | + #[test] |
| 330 | + fn test_100_256() { |
| 331 | + let mut rng = StdRng::seed_from_u64(0); |
| 332 | + let elements: Vec<u32> = (0..100).map(|_| rng.gen_range(0..255)).collect(); |
| 333 | + test_veb_tree(256, elements, Vec::new()); |
| 334 | + } |
| 335 | + |
| 336 | + #[test] |
| 337 | + fn test_100_300() { |
| 338 | + let mut rng = StdRng::seed_from_u64(0); |
| 339 | + let elements: Vec<u32> = (0..100).map(|_| rng.gen_range(0..255)).collect(); |
| 340 | + test_veb_tree(300, elements, Vec::new()); |
| 341 | + } |
| 342 | +} |
0 commit comments