@@ -77,10 +77,50 @@ pub struct HuffmanDictionary<T> {
7777} 
7878
7979impl < T :  Clone  + Copy  + Ord >  HuffmanDictionary < T >  { 
80-     /// The list of alphabet symbols and their respective frequency should 
81- /// be given as input 
82- pub  fn  new ( alphabet :  & [ ( T ,  u64 ) ] )  -> Self  { 
80+     /// Creates a new Huffman dictionary from alphabet symbols and their frequencies. 
81+ /// 
82+ /// Returns `None` if the alphabet is empty. 
83+ /// 
84+ /// # Arguments 
85+ /// * `alphabet` - A slice of tuples containing symbols and their frequencies 
86+ /// 
87+ /// # Example 
88+ /// ``` 
89+ /// # use the_algorithms_rust::general::HuffmanDictionary; 
90+ /// let freq = vec![('a', 5), ('b', 2), ('c', 1)]; 
91+ /// let dict = HuffmanDictionary::new(&freq).unwrap(); 
92+ /// 
93+ pub  fn  new ( alphabet :  & [ ( T ,  u64 ) ] )  -> Option < Self >  { 
94+         if  alphabet. is_empty ( )  { 
95+             return  None ; 
96+         } 
97+ 
8398        let  mut  alph:  BTreeMap < T ,  HuffmanValue >  = BTreeMap :: new ( ) ; 
99+ 
100+         // Special case: single symbol 
101+         if  alphabet. len ( )  == 1  { 
102+             let  ( symbol,  _freq)  = alphabet[ 0 ] ; 
103+             alph. insert ( 
104+                 symbol, 
105+                 HuffmanValue  { 
106+                     value :  0 , 
107+                     bits :  1 ,  // Must use at least 1 bit per symbol 
108+                 } , 
109+             ) ; 
110+ 
111+             let  root = HuffmanNode  { 
112+                 left :  None , 
113+                 right :  None , 
114+                 symbol :  Some ( symbol) , 
115+                 frequency :  alphabet[ 0 ] . 1 , 
116+             } ; 
117+ 
118+             return  Some ( HuffmanDictionary  { 
119+                 alphabet :  alph, 
120+                 root, 
121+             } ) ; 
122+         } 
123+ 
84124        let  mut  queue:  BinaryHeap < HuffmanNode < T > >  = BinaryHeap :: new ( ) ; 
85125        for  ( symbol,  freq)  in  alphabet. iter ( )  { 
86126            queue. push ( HuffmanNode  { 
@@ -101,11 +141,14 @@ impl<T: Clone + Copy + Ord> HuffmanDictionary<T> {
101141                frequency :  sm_freq, 
102142            } ) ; 
103143        } 
104-         let  root = queue. pop ( ) . unwrap ( ) ; 
105-         HuffmanNode :: get_alphabet ( 0 ,  0 ,  & root,  & mut  alph) ; 
106-         HuffmanDictionary  { 
107-             alphabet :  alph, 
108-             root, 
144+         if  let  Some ( root)  = queue. pop ( )  { 
145+             HuffmanNode :: get_alphabet ( 0 ,  0 ,  & root,  & mut  alph) ; 
146+             Some ( HuffmanDictionary  { 
147+                 alphabet :  alph, 
148+                 root, 
149+             } ) 
150+         }  else  { 
151+             None 
109152        } 
110153    } 
111154    pub  fn  encode ( & self ,  data :  & [ T ] )  -> HuffmanEncoding  { 
@@ -143,27 +186,48 @@ impl HuffmanEncoding {
143186        } 
144187        self . num_bits  += data. bits  as  u64 ; 
145188    } 
189+ 
190+     #[ inline]  
146191    fn  get_bit ( & self ,  pos :  u64 )  -> bool  { 
147192        ( self . data [ ( pos >> 6 )  as  usize ]  &  ( 1  << ( pos &  63 ) ) )  != 0 
148193    } 
194+ 
149195    /// In case the encoding is invalid, `None` is returned 
150196pub  fn  decode < T :  Clone  + Copy  + Ord > ( & self ,  dict :  & HuffmanDictionary < T > )  -> Option < Vec < T > >  { 
197+         // Handle empty encoding 
198+         if  self . num_bits  == 0  { 
199+             return  Some ( vec ! [ ] ) ; 
200+         } 
201+ 
202+         // Special case: single symbol in dictionary 
203+         if  dict. alphabet . len ( )  == 1  { 
204+             //all bits represent the same symbol 
205+             let  symbol = dict. alphabet . keys ( ) . next ( ) ?; 
206+             let  result = vec ! [ * symbol;  self . num_bits as  usize ] ; 
207+             return  Some ( result) ; 
208+         } 
209+ 
210+         // Normal case: multiple symbols 
151211        let  mut  state = & dict. root ; 
152212        let  mut  result:  Vec < T >  = vec ! [ ] ; 
213+ 
153214        for  i in  0 ..self . num_bits  { 
154-             if  state. symbol . is_some ( )  { 
155-                 result. push ( state . symbol . unwrap ( ) ) ; 
215+             if  let   Some ( symbol )  =  state. symbol  { 
216+                 result. push ( symbol) ; 
156217                state = & dict. root ; 
157218            } 
158219            state = if  self . get_bit ( i)  { 
159-                 state. right . as_ref ( ) . unwrap ( ) 
220+                 state. right . as_ref ( ) ? 
160221            }  else  { 
161-                 state. left . as_ref ( ) . unwrap ( ) 
222+                 state. left . as_ref ( ) ? 
162223            } 
163224        } 
225+ 
226+         // Check if we ended on a symbol 
164227        if  self . num_bits  > 0  { 
165228            result. push ( state. symbol ?) ; 
166229        } 
230+ 
167231        Some ( result) 
168232    } 
169233} 
@@ -181,12 +245,97 @@ mod tests {
181245            . for_each ( |( b,  & cnt) | result. push ( ( b as  u8 ,  cnt) ) ) ; 
182246        result
183247    } 
248+ 
249+     #[ test]  
250+     fn  empty_text ( )  { 
251+         let  text = "" ; 
252+         let  bytes = text. as_bytes ( ) ; 
253+         let  freq = get_frequency ( bytes) ; 
254+         let  dict = HuffmanDictionary :: new ( & freq) ; 
255+         assert ! ( dict. is_none( ) ) ; 
256+     } 
257+ 
258+     #[ test]  
259+     fn  one_symbol_text ( )  { 
260+         let  text = "aaaa" ; 
261+         let  bytes = text. as_bytes ( ) ; 
262+         let  freq = get_frequency ( bytes) ; 
263+         let  dict = HuffmanDictionary :: new ( & freq) . unwrap ( ) ; 
264+         let  encoded = dict. encode ( bytes) ; 
265+         assert_eq ! ( encoded. num_bits,  4 ) ; 
266+         let  decoded = encoded. decode ( & dict) . unwrap ( ) ; 
267+         assert_eq ! ( decoded,  bytes) ; 
268+     } 
269+ 
270+     #[ test]  
271+     fn  test_decode_empty_encoding_struct ( )  { 
272+         // Create a minimal but VALID HuffmanDictionary. 
273+         // This is required because decode() expects a dictionary, even though 
274+         // the content of the dictionary doesn't matter when num_bits == 0. 
275+         let  freq = vec ! [ ( b'a' ,  1 ) ] ; 
276+         let  dict = HuffmanDictionary :: new ( & freq) . unwrap ( ) ; 
277+ 
278+         // Manually create the target state: an encoding with 0 bits. 
279+         let  empty_encoding = HuffmanEncoding  { 
280+             data :  vec ! [ ] , 
281+             num_bits :  0 , 
282+         } ; 
283+ 
284+         let  result = empty_encoding. decode ( & dict) ; 
285+ 
286+         assert_eq ! ( result,  Some ( vec![ ] ) ) ; 
287+     } 
288+ 
289+     #[ test]  
290+     fn  minimal_decode_end_check ( )  { 
291+         let  freq = vec ! [ ( b'a' ,  1 ) ,  ( b'b' ,  1 ) ] ; 
292+         let  bytes = b"ab" ; 
293+ 
294+         let  dict = HuffmanDictionary :: new ( & freq) . unwrap ( ) ; 
295+         let  encoded = dict. encode ( bytes) ; 
296+ 
297+         // This decode will go through the main loop and hit the final 'if self.num_bits > 0' check. 
298+         let  decoded = encoded. decode ( & dict) . unwrap ( ) ; 
299+ 
300+         assert_eq ! ( decoded,  bytes) ; 
301+     } 
302+ 
303+     #[ test]  
304+     fn  test_decode_corrupted_stream_dead_end ( )  { 
305+         // Create a dictionary with three symbols to ensure a deeper tree. 
306+         // This makes hitting a dead-end (None pointer) easier. 
307+         let  freq = vec ! [ ( b'a' ,  1 ) ,  ( b'b' ,  1 ) ,  ( b'c' ,  1 ) ] ; 
308+         let  bytes = b"ab" ; 
309+         let  dict = HuffmanDictionary :: new ( & freq) . unwrap ( ) ; 
310+ 
311+         let  encoded = dict. encode ( bytes) ; 
312+ 
313+         // Manually corrupt the stream to stop mid-symbol. 
314+         // We will truncate num_bits by a small amount (e.g., 1 bit). 
315+         // This forces the loop to stop on an *intermediate* node. 
316+         let  corrupted_encoding = HuffmanEncoding  { 
317+             data :  encoded. data , 
318+             // Shorten the bit count by one. The total length of the 'ab' stream 
319+             // is likely 4 or 5 bits. This forces the loop to end one bit early, 
320+             // leaving the state on an internal node. 
321+             num_bits :  encoded
322+                 . num_bits 
323+                 . checked_sub ( 1 ) 
324+                 . expect ( "Encoding should be > 0 bits" ) , 
325+         } ; 
326+ 
327+         // Assert that the decode fails gracefully. 
328+         // The loop finishes, the final 'if self.num_bits > 0' executes, 
329+         // and result.push(state.symbol?) fails because state.symbol is None. 
330+         assert_eq ! ( corrupted_encoding. decode( & dict) ,  None ) ; 
331+     } 
332+ 
184333    #[ test]  
185334    fn  small_text ( )  { 
186335        let  text = "Hello world" ; 
187336        let  bytes = text. as_bytes ( ) ; 
188337        let  freq = get_frequency ( bytes) ; 
189-         let  dict = HuffmanDictionary :: new ( & freq) ; 
338+         let  dict = HuffmanDictionary :: new ( & freq) . unwrap ( ) ; 
190339        let  encoded = dict. encode ( bytes) ; 
191340        assert_eq ! ( encoded. num_bits,  32 ) ; 
192341        let  decoded = encoded. decode ( & dict) . unwrap ( ) ; 
@@ -208,7 +357,7 @@ mod tests {
208357        ) ; 
209358        let  bytes = text. as_bytes ( ) ; 
210359        let  freq = get_frequency ( bytes) ; 
211-         let  dict = HuffmanDictionary :: new ( & freq) ; 
360+         let  dict = HuffmanDictionary :: new ( & freq) . unwrap ( ) ; 
212361        let  encoded = dict. encode ( bytes) ; 
213362        assert_eq ! ( encoded. num_bits,  2372 ) ; 
214363        let  decoded = encoded. decode ( & dict) . unwrap ( ) ; 
0 commit comments