Skip to content

Commit aa3194f

Browse files
authored
fix(huffman): Handle edge cases and improve error handling (TheAlgorithms#912)
* fix(huffman): Handle edge cases and improve error handling - Change HuffmanDictionary::new() to return Option<Self> for safer API - Add proper handling for empty alphabet (returns None) - Add special case handling for single-symbol alphabets - Replace unwrap() calls with ? operator in decode() for better error handling - Add #[inline(always)] optimization for frequently called get_bit() - Add comprehensive tests for edge cases - Improve documentation with usage examples BREAKING CHANGE: HuffmanDictionary::new() now returns Option<Self> * Test: Increase coverage for huffman_encoding.rs decode method Adds two new test cases to ensure 100% patch coverage for HuffmanEncoding::decode: 1. test_decode_empty_encoding_struct: Covers the edge case where num_bits == 0. 2. minimal_decode_end_check: Ensures the final 'if self.num_bits > 0' check in the multi-symbol decode path is fully covered. Corrects 'char-lit-as-u8' and 'unnecessary-cast' lints in the newly added coverage tests to satisfy GitHub Actions.
1 parent f2a23e9 commit aa3194f

File tree

1 file changed

+163
-14
lines changed

1 file changed

+163
-14
lines changed

src/general/huffman_encoding.rs

Lines changed: 163 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,50 @@ pub struct HuffmanDictionary<T> {
7777
}
7878

7979
impl<T: Clone + Copy + Ord> HuffmanDictionary<T> {
80-
/// The list of alphabet symbols and their respective frequency should
81-
/// be given as input
82-
pub fn new(alphabet: &[(T, u64)]) -> Self {
80+
/// Creates a new Huffman dictionary from alphabet symbols and their frequencies.
81+
///
82+
/// Returns `None` if the alphabet is empty.
83+
///
84+
/// # Arguments
85+
/// * `alphabet` - A slice of tuples containing symbols and their frequencies
86+
///
87+
/// # Example
88+
/// ```
89+
/// # use the_algorithms_rust::general::HuffmanDictionary;
90+
/// let freq = vec![('a', 5), ('b', 2), ('c', 1)];
91+
/// let dict = HuffmanDictionary::new(&freq).unwrap();
92+
///
93+
pub fn new(alphabet: &[(T, u64)]) -> Option<Self> {
94+
if alphabet.is_empty() {
95+
return None;
96+
}
97+
8398
let mut alph: BTreeMap<T, HuffmanValue> = BTreeMap::new();
99+
100+
// Special case: single symbol
101+
if alphabet.len() == 1 {
102+
let (symbol, _freq) = alphabet[0];
103+
alph.insert(
104+
symbol,
105+
HuffmanValue {
106+
value: 0,
107+
bits: 1, // Must use at least 1 bit per symbol
108+
},
109+
);
110+
111+
let root = HuffmanNode {
112+
left: None,
113+
right: None,
114+
symbol: Some(symbol),
115+
frequency: alphabet[0].1,
116+
};
117+
118+
return Some(HuffmanDictionary {
119+
alphabet: alph,
120+
root,
121+
});
122+
}
123+
84124
let mut queue: BinaryHeap<HuffmanNode<T>> = BinaryHeap::new();
85125
for (symbol, freq) in alphabet.iter() {
86126
queue.push(HuffmanNode {
@@ -101,11 +141,14 @@ impl<T: Clone + Copy + Ord> HuffmanDictionary<T> {
101141
frequency: sm_freq,
102142
});
103143
}
104-
let root = queue.pop().unwrap();
105-
HuffmanNode::get_alphabet(0, 0, &root, &mut alph);
106-
HuffmanDictionary {
107-
alphabet: alph,
108-
root,
144+
if let Some(root) = queue.pop() {
145+
HuffmanNode::get_alphabet(0, 0, &root, &mut alph);
146+
Some(HuffmanDictionary {
147+
alphabet: alph,
148+
root,
149+
})
150+
} else {
151+
None
109152
}
110153
}
111154
pub fn encode(&self, data: &[T]) -> HuffmanEncoding {
@@ -143,27 +186,48 @@ impl HuffmanEncoding {
143186
}
144187
self.num_bits += data.bits as u64;
145188
}
189+
190+
#[inline]
146191
fn get_bit(&self, pos: u64) -> bool {
147192
(self.data[(pos >> 6) as usize] & (1 << (pos & 63))) != 0
148193
}
194+
149195
/// In case the encoding is invalid, `None` is returned
150196
pub fn decode<T: Clone + Copy + Ord>(&self, dict: &HuffmanDictionary<T>) -> Option<Vec<T>> {
197+
// Handle empty encoding
198+
if self.num_bits == 0 {
199+
return Some(vec![]);
200+
}
201+
202+
// Special case: single symbol in dictionary
203+
if dict.alphabet.len() == 1 {
204+
//all bits represent the same symbol
205+
let symbol = dict.alphabet.keys().next()?;
206+
let result = vec![*symbol; self.num_bits as usize];
207+
return Some(result);
208+
}
209+
210+
// Normal case: multiple symbols
151211
let mut state = &dict.root;
152212
let mut result: Vec<T> = vec![];
213+
153214
for i in 0..self.num_bits {
154-
if state.symbol.is_some() {
155-
result.push(state.symbol.unwrap());
215+
if let Some(symbol) = state.symbol {
216+
result.push(symbol);
156217
state = &dict.root;
157218
}
158219
state = if self.get_bit(i) {
159-
state.right.as_ref().unwrap()
220+
state.right.as_ref()?
160221
} else {
161-
state.left.as_ref().unwrap()
222+
state.left.as_ref()?
162223
}
163224
}
225+
226+
// Check if we ended on a symbol
164227
if self.num_bits > 0 {
165228
result.push(state.symbol?);
166229
}
230+
167231
Some(result)
168232
}
169233
}
@@ -181,12 +245,97 @@ mod tests {
181245
.for_each(|(b, &cnt)| result.push((b as u8, cnt)));
182246
result
183247
}
248+
249+
#[test]
250+
fn empty_text() {
251+
let text = "";
252+
let bytes = text.as_bytes();
253+
let freq = get_frequency(bytes);
254+
let dict = HuffmanDictionary::new(&freq);
255+
assert!(dict.is_none());
256+
}
257+
258+
#[test]
259+
fn one_symbol_text() {
260+
let text = "aaaa";
261+
let bytes = text.as_bytes();
262+
let freq = get_frequency(bytes);
263+
let dict = HuffmanDictionary::new(&freq).unwrap();
264+
let encoded = dict.encode(bytes);
265+
assert_eq!(encoded.num_bits, 4);
266+
let decoded = encoded.decode(&dict).unwrap();
267+
assert_eq!(decoded, bytes);
268+
}
269+
270+
#[test]
271+
fn test_decode_empty_encoding_struct() {
272+
// Create a minimal but VALID HuffmanDictionary.
273+
// This is required because decode() expects a dictionary, even though
274+
// the content of the dictionary doesn't matter when num_bits == 0.
275+
let freq = vec![(b'a', 1)];
276+
let dict = HuffmanDictionary::new(&freq).unwrap();
277+
278+
// Manually create the target state: an encoding with 0 bits.
279+
let empty_encoding = HuffmanEncoding {
280+
data: vec![],
281+
num_bits: 0,
282+
};
283+
284+
let result = empty_encoding.decode(&dict);
285+
286+
assert_eq!(result, Some(vec![]));
287+
}
288+
289+
#[test]
290+
fn minimal_decode_end_check() {
291+
let freq = vec![(b'a', 1), (b'b', 1)];
292+
let bytes = b"ab";
293+
294+
let dict = HuffmanDictionary::new(&freq).unwrap();
295+
let encoded = dict.encode(bytes);
296+
297+
// This decode will go through the main loop and hit the final 'if self.num_bits > 0' check.
298+
let decoded = encoded.decode(&dict).unwrap();
299+
300+
assert_eq!(decoded, bytes);
301+
}
302+
303+
#[test]
304+
fn test_decode_corrupted_stream_dead_end() {
305+
// Create a dictionary with three symbols to ensure a deeper tree.
306+
// This makes hitting a dead-end (None pointer) easier.
307+
let freq = vec![(b'a', 1), (b'b', 1), (b'c', 1)];
308+
let bytes = b"ab";
309+
let dict = HuffmanDictionary::new(&freq).unwrap();
310+
311+
let encoded = dict.encode(bytes);
312+
313+
// Manually corrupt the stream to stop mid-symbol.
314+
// We will truncate num_bits by a small amount (e.g., 1 bit).
315+
// This forces the loop to stop on an *intermediate* node.
316+
let corrupted_encoding = HuffmanEncoding {
317+
data: encoded.data,
318+
// Shorten the bit count by one. The total length of the 'ab' stream
319+
// is likely 4 or 5 bits. This forces the loop to end one bit early,
320+
// leaving the state on an internal node.
321+
num_bits: encoded
322+
.num_bits
323+
.checked_sub(1)
324+
.expect("Encoding should be > 0 bits"),
325+
};
326+
327+
// Assert that the decode fails gracefully.
328+
// The loop finishes, the final 'if self.num_bits > 0' executes,
329+
// and result.push(state.symbol?) fails because state.symbol is None.
330+
assert_eq!(corrupted_encoding.decode(&dict), None);
331+
}
332+
184333
#[test]
185334
fn small_text() {
186335
let text = "Hello world";
187336
let bytes = text.as_bytes();
188337
let freq = get_frequency(bytes);
189-
let dict = HuffmanDictionary::new(&freq);
338+
let dict = HuffmanDictionary::new(&freq).unwrap();
190339
let encoded = dict.encode(bytes);
191340
assert_eq!(encoded.num_bits, 32);
192341
let decoded = encoded.decode(&dict).unwrap();
@@ -208,7 +357,7 @@ mod tests {
208357
);
209358
let bytes = text.as_bytes();
210359
let freq = get_frequency(bytes);
211-
let dict = HuffmanDictionary::new(&freq);
360+
let dict = HuffmanDictionary::new(&freq).unwrap();
212361
let encoded = dict.encode(bytes);
213362
assert_eq!(encoded.num_bits, 2372);
214363
let decoded = encoded.decode(&dict).unwrap();

0 commit comments

Comments
 (0)