11use io:: statistics:: Instances ;
22
3+ use std:: borrow:: Cow ;
34use std:: cmp:: Ordering ;
45use std:: collections:: { BinaryHeap , HashMap } ;
56use std:: hash:: Hash ;
67
8+ /// Reading from bitstreams and decoding their contents using Huffman tables.
9+ pub mod read;
10+
711/// A newtype for `u8` used to count the length of a key in bits.
812#[ derive(
913 Debug ,
@@ -25,6 +29,11 @@ use std::hash::Hash;
2529 Eq ,
2630) ]
2731pub struct BitLen ( u8 ) ;
32+ impl BitLen {
33+ pub fn as_u8 ( & self ) -> u8 {
34+ self . 0
35+ }
36+ }
2837
2938/// Convenience implementation of operator `<<` in
3039/// `bits << bit_len`
@@ -34,6 +43,12 @@ impl std::ops::Shl<BitLen> for u32 {
3443 self << Into :: < u8 > :: into ( rhs)
3544 }
3645}
46+ impl std:: ops:: Shl < BitLen > for usize {
47+ type Output = usize ;
48+ fn shl ( self , rhs : BitLen ) -> usize {
49+ self << Into :: < u8 > :: into ( rhs)
50+ }
51+ }
3752
3853/// Convenience implementation of operator `>>` in
3954/// `bits >> bit_len`
@@ -43,56 +58,125 @@ impl std::ops::Shr<BitLen> for u32 {
4358 self >> Into :: < u8 > :: into ( rhs)
4459 }
4560}
61+ impl std:: ops:: Shr < BitLen > for usize {
62+ type Output = usize ;
63+ fn shr ( self , rhs : BitLen ) -> usize {
64+ self >> Into :: < u8 > :: into ( rhs)
65+ }
66+ }
4667
4768/// The largerst acceptable length for a key.
4869///
4970/// Hardcoded in the format.
5071const MAX_CODE_BIT_LENGTH : u8 = 20 ;
5172
52- // privacy barrier
53- mod key {
54- use context:: huffman:: BitLen ;
55-
56- /// A Huffman key
57- #[ derive( Debug ) ]
58- pub struct Key {
59- /// The bits in the key.
60- ///
61- /// Note that we only use the `bit_len` lowest-weight bits.
62- /// Any other bit MUST BE 0.
73+ /// A sequence of bits, read from a bit stream.
74+ ///
75+ /// Typically used for lookup of entries in Huffman tables.
76+ #[ derive( Clone , Debug , PartialEq , Eq ) ]
77+ pub struct BitSequence {
6378 bits : u32 ,
64-
65- /// The number of bits of `bits` to use.
6679 bit_len : BitLen ,
6780}
68- impl Key {
69- /// Create a new Key.
81+ impl BitSequence {
7082 pub fn new ( bits : u32 , bit_len : BitLen ) -> Self {
71- debug_assert ! ( { let bit_len : u8 = bit_len. into( ) ; bit_len <= 32 } ) ;
72- debug_assert ! ( { let bit_len : u8 = bit_len. into( ) ; if bit_len < 32 { bits >> bit_len == 0 } else { true } } ) ;
73- Key {
74- bits,
75- bit_len,
83+ Self { bits, bit_len }
84+ }
85+ pub fn bits ( & self ) -> u32 {
86+ self . bits
87+ }
88+ /// The number of bits of `bits` to use.
89+ pub fn bit_len ( & self ) -> BitLen {
90+ self . bit_len
91+ }
92+ /// Split the bits into a prefix of `bit_len` bits and a suffix of `self.bit_len - bit_len`
93+ /// bits.
94+ ///
95+ /// # Failure
96+ ///
97+ /// This function panics if `bit_len > self.bit_len`.
98+ pub fn split ( & self , bit_len : BitLen ) -> ( u32 , u32 ) {
99+ let shift = self . bit_len - bit_len;
100+ match shift. into ( ) {
101+ 0u8 => ( self . bits , 0 ) , // Special case: cannot >> 32
102+ 32u8 => ( 0 , self . bits ) , // Special case: cannot >> 32
103+ shift => (
104+ self . bits >> shift,
105+ self . bits & ( std:: u32:: MAX >> 32 - shift) ,
106+ ) ,
76107 }
77108 }
109+ pub fn pad_lowest_to ( & self , total_bit_len : BitLen ) -> Cow < BitSequence > {
110+ assert ! ( total_bit_len. 0 <= 32u8 ) ;
111+ if total_bit_len <= self . bit_len {
112+ return Cow :: Borrowed ( self ) ;
113+ }
114+ let shift = total_bit_len - self . bit_len ;
115+ if shift. 0 == 32u8 {
116+ return Cow :: Owned ( BitSequence :: new ( 0 , BitLen ( 32 ) ) ) ;
117+ }
118+ Cow :: Owned ( BitSequence :: new ( self . bits << shift, total_bit_len) )
119+ }
120+ }
78121
79- /// The bits in the key.
122+ #[ test]
123+ fn test_bit_sequence_split ( ) {
124+ let bits = 0b11111111_11111111_00000000_00000000 ;
125+ let key = BitSequence :: new ( bits, BitLen ( 32 ) ) ;
126+ assert_eq ! ( key. split( BitLen ( 0 ) ) , ( 0 , bits) ) ;
127+ assert_eq ! ( key. split( BitLen ( 32 ) ) , ( bits, 0 ) ) ;
128+ assert_eq ! ( key. split( BitLen ( 16 ) ) , ( 0b11111111_11111111 , 0 ) ) ;
129+
130+ let bits = 0b00000000_00000000_00000000_11111111 ;
131+ let key = BitSequence :: new ( bits, BitLen ( 16 ) ) ;
132+ assert_eq ! ( key. split( BitLen ( 0 ) ) , ( 0 , bits) ) ;
133+ assert_eq ! ( key. split( BitLen ( 16 ) ) , ( bits, 0 ) ) ;
134+ assert_eq ! ( key. split( BitLen ( 8 ) ) , ( 0 , 0b11111111 ) ) ;
135+ }
136+
137+ /// A Huffman key
138+ #[ derive( Clone , Debug , PartialEq , Eq ) ]
139+ pub struct Key ( BitSequence ) ;
140+
141+ impl Key {
142+ /// Create a new Key.
80143 ///
81144 /// Note that we only use the `bit_len` lowest-weight bits.
82- /// Any other bit is guaranteed to be 0.
145+ /// Any other bit MUST BE 0.
146+ pub fn new ( bits : u32 , bit_len : BitLen ) -> Self {
147+ debug_assert ! ( {
148+ let bit_len: u8 = bit_len. into( ) ;
149+ bit_len <= 32
150+ } ) ;
151+ debug_assert ! ( {
152+ let bit_len: u8 = bit_len. into( ) ;
153+ if bit_len < 32 {
154+ bits >> bit_len == 0
155+ } else {
156+ true
157+ }
158+ } ) ;
159+ Key ( BitSequence { bits, bit_len } )
160+ }
161+
162+ /// The bits in this Key.
163+ ///
164+ /// # Invariant
165+ ///
166+ /// Only the `self.bit_len()` lowest-weight bits may be non-0.
83167 pub fn bits ( & self ) -> u32 {
84- self . bits
168+ self . 0 . bits
85169 }
86170
87171 /// The number of bits of `bits` to use.
88172 pub fn bit_len ( & self ) -> BitLen {
89- self . bit_len
173+ self . 0 . bit_len
90174 }
91- }
92-
93- } // mod key
94175
95- use self :: key:: Key ;
176+ pub fn as_bit_sequence ( & self ) -> & BitSequence {
177+ & self . 0
178+ }
179+ }
96180
97181/// A node in the Huffman tree.
98182struct Node < T > {
@@ -136,17 +220,34 @@ impl<T> PartialEq for Node<T> {
136220impl < T > Eq for Node < T > { }
137221
138222/// Keys associated to a sequence of values.
139- #[ derive( Debug ) ]
140- pub struct Keys < T >
141- where
142- T : Ord + Clone ,
143- {
223+ #[ derive( Clone , Debug ) ]
224+ pub struct Keys < T > {
225+ /// The longest bit length that actually appears in `keys`.
226+ highest_bit_len : BitLen ,
227+
144228 /// The sequence of keys.
145229 ///
146230 /// Order is meaningful.
147231 keys : Vec < ( T , Key ) > ,
148232}
149233
234+ impl < T > Keys < T > {
235+ pub fn len ( & self ) -> usize {
236+ self . keys . len ( )
237+ }
238+ pub fn highest_bit_len ( & self ) -> BitLen {
239+ self . highest_bit_len
240+ }
241+ }
242+
243+ impl < T > IntoIterator for Keys < T > {
244+ type Item = ( T , Key ) ;
245+ type IntoIter = std:: vec:: IntoIter < ( T , Key ) > ;
246+ fn into_iter ( self ) -> Self :: IntoIter {
247+ self . keys . into_iter ( )
248+ }
249+ }
250+
150251impl < T > Keys < T >
151252where
152253 T : Ord + Clone ,
@@ -155,12 +256,12 @@ where
155256 ///
156257 /// Optionally, `max_bit_len` may specify a largest acceptable bit length.
157258 /// If `Keys` may not be computed without exceeding this bit length,
158- /// fail with `Err(problemantic_bit_length )`.
259+ /// fail with `Err(problemantic_bit_len )`.
159260 ///
160261 /// The current implementation only attempts to produce the best compression
161- /// level. This may cause us to exceed `max_bit_length ` even though an
262+ /// level. This may cause us to exceed `max_bit_len ` even though an
162263 /// alternative table, with a lower compression level, would let us
163- /// proceed without exceeding `max_bit_length `.
264+ /// proceed without exceeding `max_bit_len `.
164265 ///
165266 /// # Performance
166267 ///
@@ -185,9 +286,9 @@ where
185286 /// with a number of instances already attached.
186287 ///
187288 /// The current implementation only attempts to produce the best compression
188- /// level. This may cause us to exceed `max_bit_length ` even though an
289+ /// level. This may cause us to exceed `max_bit_len ` even though an
189290 /// alternative table, with a lower compression level, would let us
190- /// proceed without exceeding `max_bit_length `.
291+ /// proceed without exceeding `max_bit_len `.
191292 ///
192293 /// # Requirement
193294 ///
@@ -197,9 +298,9 @@ where
197298 S : IntoIterator < Item = ( T , Instances ) > ,
198299 {
199300 let mut bit_lengths = Self :: compute_bit_lengths ( source, max_bit_len) ?;
301+ let mut highest_bit_len = BitLen ( 0 ) ;
200302
201303 // Canonicalize order: (BitLen, T)
202- // As values of `T` are
203304 bit_lengths. sort_unstable_by_key ( |& ( ref value, ref bit_len) | ( * bit_len, value. clone ( ) ) ) ;
204305
205306 // The bits associated to the next value.
@@ -214,12 +315,18 @@ where
214315 ) ;
215316 keys. push ( ( symbol. clone ( ) , Key :: new ( bits, bit_len) ) ) ;
216317 bits = ( bits + 1 ) << ( next_bit_len - bit_len) ;
318+ if bit_len > highest_bit_len {
319+ highest_bit_len = bit_len;
320+ }
217321 }
218322 // Handle the last element.
219323 let ( ref symbol, bit_len) = bit_lengths[ bit_lengths. len ( ) - 1 ] ;
220324 keys. push ( ( symbol. clone ( ) , Key :: new ( bits, bit_len) ) ) ;
221325
222- return Ok ( Self { keys } ) ;
326+ return Ok ( Self {
327+ highest_bit_len,
328+ keys,
329+ } ) ;
223330 }
224331
225332 /// Convert a sequence of values labelled by their number of instances
0 commit comments