diff --git a/Cargo.toml b/Cargo.toml index ba27b2e..06504bf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,8 @@ homepage = "https://github.com/image-rs/fdeflate" categories = ["compression"] [dependencies] +fnv = "1.0.7" +innumerable = "0.1.0" simd-adler32 = "0.3.4" [dev-dependencies] diff --git a/src/compress/bt_matchfinder.rs b/src/compress/bt_matchfinder.rs new file mode 100644 index 0000000..381987e --- /dev/null +++ b/src/compress/bt_matchfinder.rs @@ -0,0 +1,203 @@ +use super::{compute_hash, compute_hash3, WINDOW_SIZE}; + +const CACHE3_SIZE: usize = 1 << 15; +const CACHE_SIZE: usize = 1 << 16; + +/// Find the length of the match between the current position and the previous position, searching +/// both forwards and backwards from the starting position. +fn match_length(data: &[u8], ip: usize, prev_index: usize) -> u16 { + assert!( + prev_index < ip, + "Match past current position: {prev_index} {ip}" + ); + + let mut length = 0; + while length < 258 && ip + length < data.len() && data[ip + length] == data[prev_index + length] + { + length += 1; + } + length as u16 +} + +fn left_child(index: usize) -> usize { + 2 * (index as usize % WINDOW_SIZE) +} + +fn right_child(index: usize) -> usize { + 2 * (index as usize % WINDOW_SIZE) + 1 +} + +/// Match finder that uses a binary tree to find matches. +/// +/// Based on bt_matchfinder.h from libdeflate. +pub(crate) struct BTreeMatchFinder { + hash3_table: Option>, + hash_table: Box<[u32; CACHE_SIZE]>, + child_links: Box<[u32; WINDOW_SIZE * 2]>, + search_depth: u16, + early_return_length: usize, +} +impl BTreeMatchFinder { + pub(crate) fn new(min_match: u8) -> Self { + assert!((3..=4).contains(&min_match)); + + Self { + hash3_table: (min_match == 3) + .then(|| vec![0; CACHE3_SIZE].into_boxed_slice().try_into().unwrap()), + hash_table: vec![0; CACHE_SIZE].into_boxed_slice().try_into().unwrap(), + child_links: vec![0; WINDOW_SIZE * 2] + .into_boxed_slice() + .try_into() + .unwrap(), + search_depth: 2000, + early_return_length: 256, + } + } + + fn update( + &mut self, + data: &[u8], + ip: usize, + value: u64, + min_match: u16, + record_matches: bool, + ) -> (u16, u16, usize) { + let min_offset = ip.saturating_sub(WINDOW_SIZE).max(1); + + let mut best_offset = 0; + let mut best_length = min_match - 1; + + // Handle 3-byte matches + if let Some(hash3_table) = &mut self.hash3_table { + let hash3 = compute_hash3(value as u32); + if best_length < min_match && min_match <= 3 { + let hash3_offset = hash3_table[(hash3 as usize) % CACHE3_SIZE] as usize; + if hash3_offset >= ip.saturating_sub(8192).max(1) { + let length = match_length(data, ip, hash3_offset); + if length >= 3 { + best_length = length; + best_offset = hash3_offset as u32; + } + } + } + hash3_table[(hash3 as usize) % CACHE3_SIZE] = ip as u32; + } + + // Lookup current value + let hash = compute_hash(value & 0xffff_ffff); + let hash_index = (hash as usize) % CACHE_SIZE; + let mut offset = self.hash_table[hash_index] as usize; + self.hash_table[hash_index] = ip as u32; + + let mut pending_left = left_child(ip); + let mut pending_right = right_child(ip); + + if offset < min_offset { + self.child_links[pending_left] = 0; + self.child_links[pending_right] = 0; + return (0, 0, ip); + } + + let mut best_left_length = 0; + let mut best_right_length = 0; + let mut length = 0; + + // Visit previous matches + // eprintln!("---"); + let mut depth_remaining = self.search_depth; + loop { + if data[ip + length] == data[offset + length] { + while length < 258 + && ip + length < data.len() + && data[ip + length] == data[offset + length] + { + length += 1; + } + + // for i in 0..length.min(self.early_return_length) { + // assert_eq!( + // data[ip + i], + // data[offset + i], + // "{i} {length} ip={ip} data_len={}", + // data.len() + // ); + // } + + if record_matches && length > best_length as usize { + best_length = length as u16; + best_offset = offset as u32; + } + + if length >= self.early_return_length || ip + length == data.len() { + self.child_links[pending_left] = self.child_links[left_child(offset)]; + self.child_links[pending_right] = self.child_links[right_child(offset)]; + break; + } + } + + assert!(ip + length < data.len()); + + if data[offset + length] < data[ip + length] { + self.child_links[pending_left] = offset as u32; + pending_left = right_child(offset); + offset = self.child_links[pending_left] as usize; + + best_left_length = length; + if best_right_length < length { + length = best_right_length; + } + // length = length.min(best_right_length); + // eprintln!( + // "left {best_right_length},{best_left_length} dist={}", + // ip - offset + // ); + } else { + assert!( + data[offset + length] > data[ip + length], + "{length} {depth_remaining} {offset} {min_offset}" + ); + + self.child_links[pending_right] = offset as u32; + pending_right = left_child(offset); + offset = self.child_links[pending_right] as usize; + + best_right_length = length; + if best_left_length < length { + length = best_left_length; + } + // length = length.min(best_left_length); + // eprintln!( + // "right {best_right_length},{best_left_length} dist={}", + // ip - offset + // ); + } + + depth_remaining -= 1; + if offset <= min_offset || depth_remaining == 0 { + self.child_links[pending_left] = 0; + self.child_links[pending_right] = 0; + break; + } + } + + if best_length >= min_match { + return (best_length as u16, (ip - best_offset as usize) as u16, ip); + } + + (0, 0, ip) + } + + pub(crate) fn get_and_insert( + &mut self, + data: &[u8], + ip: usize, + value: u64, + min_match: u16, + ) -> (u16, u16, usize) { + self.update(data, ip, value, min_match, true) + } + + pub(crate) fn insert(&mut self, data: &[u8], value: u64, ip: usize) { + self.update(data, ip, value, 3, false); + } +} diff --git a/src/compress/fast.rs b/src/compress/fast.rs new file mode 100644 index 0000000..c1d0992 --- /dev/null +++ b/src/compress/fast.rs @@ -0,0 +1,120 @@ +use std::io::{self, Write}; + +use super::{BitWriter, HashTableMatchFinder, Symbol}; + +pub(super) struct FastCompressor { + match_finder: HashTableMatchFinder, + skip_ahead_shift: u8, +} + +impl FastCompressor { + pub fn new(skip_ahead_shift: u8) -> Self { + Self { + match_finder: HashTableMatchFinder::new(), + skip_ahead_shift, + } + } + + pub fn compress(&mut self, writer: &mut BitWriter, data: &[u8]) -> io::Result<()> { + let mut ip = 0; + + while ip < data.len() { + let mut symbols = Vec::new(); + + let mut last_match = ip; + 'outer: while symbols.len() < 16384 && ip + 8 <= data.len() { + let current = u64::from_le_bytes(data[ip..][..8].try_into().unwrap()); + + if current & 0xFF_FFFF_FFFF == 0 { + while ip > last_match && data[ip - 1] == 0 { + ip -= 1; + } + + if ip == 0 || data[ip - 1] != 0 { + ip += 1; + } + + symbols.push(Symbol::LiteralRun { + start: last_match as u32, + end: ip as u32, + }); + + let mut run_length = 0; + while ip < data.len() && data[ip] == 0 && run_length < 258 { + run_length += 1; + ip += 1; + } + + symbols.push(Symbol::Backref { + length: run_length as u16, + distance: 1, + dist_sym: 0, + }); + + last_match = ip; + + continue; + } + + let (length, distance, match_start) = self + .match_finder + .get_and_insert(&data, last_match, ip, current, 4); + + if length >= 3 { + assert!(last_match <= match_start); + + symbols.push(Symbol::LiteralRun { + start: last_match as u32, + end: match_start as u32, + }); + + symbols.push(Symbol::Backref { + length: length as u16, + distance, + dist_sym: super::distance_to_dist_sym(distance), + }); + + let match_end = match_start + length as usize; + let insert_end = (match_end - 2).min(data.len() - 8); + let insert_start = (ip + 1).max(insert_end.saturating_sub(16)); + for j in insert_start..insert_end { + let v = u64::from_le_bytes(data[j..][..8].try_into().unwrap()); + self.match_finder.insert(v, j); + } + + ip = match_end; + last_match = ip; + + continue 'outer; + } + + // If we haven't found a match in a while, start skipping ahead by emitting multiple + // literals at once. But check that we don't skip over a big run of zeroes. + let advance = 1 + ((ip - last_match) >> self.skip_ahead_shift); + if advance >= 8 { + let end_index = (ip + advance).min(data.len()); + if let Some(advance) = data[ip + 1..end_index] + .chunks_exact(8) + .position(|w| w == [0; 8]) + { + ip += advance + 1; + continue 'outer; + } + } + + ip += advance; + } + if data.len() < ip + 8 { + symbols.push(Symbol::LiteralRun { + start: last_match as u32, + end: data.len() as u32, + }); + ip = data.len(); + } + + super::write_block(writer, data, &symbols, ip == data.len())?; + } + + Ok(()) + } +} diff --git a/src/compress/hc_matchfinder.rs b/src/compress/hc_matchfinder.rs new file mode 100644 index 0000000..71655fa --- /dev/null +++ b/src/compress/hc_matchfinder.rs @@ -0,0 +1,139 @@ +use crate::compress::compute_hash32; + +use super::WINDOW_SIZE; + +const CACHE_SIZE: usize = 65536;// 1 << 18; + +/// Find the length of the match between the current position and the previous position, searching +/// both forwards and backwards from the starting position. +fn match_length( + data: &[u8], + anchor: usize, + mut ip: usize, + mut prev_index: usize, + value: u32, +) -> (u16, usize) { + assert!( + prev_index < ip, + "Match past current position: {prev_index} {ip}" + ); + + if value != u32::from_ne_bytes(data[prev_index..][..4].try_into().unwrap()) { + return (0, ip); + } + + let mut length = 4; + while length < 258 && ip > anchor && prev_index > 0 && data[ip - 1] == data[prev_index - 1] { + length += 1; + ip -= 1; + prev_index -= 1; + } + while length < 258 && ip + length < data.len() && data[ip + length] == data[prev_index + length] + { + length += 1; + } + (length as u16, ip) +} + +pub(crate) struct HashChainMatchFinder { + hash_table: Box<[u32; CACHE_SIZE]>, + links: Box<[u32; WINDOW_SIZE]>, + + search_depth: u16, + + // /// If we already have a match of this length, limit lazy search to a smaller search depth. + // good_length: u16, + /// Stop searching for matches if the length is at least this long. + nice_length: u16, + // /// Mask of low-bytes to consider for hashing. + // hash_mask: u64, +} +impl HashChainMatchFinder { + pub(crate) fn new(search_depth: u16, nice_length: u16, min_match: u8) -> Self { + assert!((3..=8).contains(&min_match)); + + Self { + hash_table: vec![0; CACHE_SIZE].into_boxed_slice().try_into().unwrap(), + links: vec![0; WINDOW_SIZE].into_boxed_slice().try_into().unwrap(), + search_depth, + // good_length: 8, + nice_length, + // hash_mask: if min_match == 8 { + // u64::MAX + // } else { + // (1 << (min_match.max(4) * 8)) - 1 + // }, + } + } + + pub(crate) fn get_and_insert( + &mut self, + data: &[u8], + anchor: usize, + ip: usize, + value: u32, + min_match: u16, + ) -> (u16, u16, usize) { + let min_offset = ip.saturating_sub(32768).max(1); + + let mut best_offset = 0; + let mut best_length = min_match - 1; + let mut best_ip = 0; + + let mut n = self.search_depth; + // if min_match >= self.good_length { + // n >>= 2; + // } + + let hash = compute_hash32(value); + let hash_index = (hash as usize) % CACHE_SIZE; + let mut offset = self.hash_table[hash_index] as usize; + + // Insert current value + self.hash_table[hash_index] = ip as u32; + self.links[ip % WINDOW_SIZE] = offset as u32; + + // Visit previous matches + loop { + if offset < min_offset { + break; + } + + let (length, start) = match_length(data, anchor, ip, offset, value); + if length > best_length { + best_length = length; + best_offset = offset as u32; + best_ip = start; + // } else if best_length > min_match { + // break; + } + if length >= self.nice_length || ip + length as usize == data.len() { + break; + } + + n -= 1; + if n == 0 { + break; + } + + offset = self.links[offset % WINDOW_SIZE] as usize; + } + + if best_length >= min_match { + return ( + best_length as u16, + (ip - best_offset as usize) as u16, + best_ip, + ); + } + + (0, 0, ip) + } + + pub(crate) fn insert(&mut self, value: u64, offset: usize) { + let hash = compute_hash32(value as u32); + let prev_offset = self.hash_table[(hash as usize) % CACHE_SIZE]; + self.hash_table[(hash as usize) % CACHE_SIZE] = offset as u32; + self.links[offset as usize % WINDOW_SIZE] = prev_offset; + } +} diff --git a/src/compress/ht_matchfinder.rs b/src/compress/ht_matchfinder.rs new file mode 100644 index 0000000..2302c82 --- /dev/null +++ b/src/compress/ht_matchfinder.rs @@ -0,0 +1,77 @@ +use super::compute_hash; + +const CACHE_SIZE: usize = 1 << 16; + +/// Find the length of the match between the current position and the previous position, searching +/// both forwards and backwards from the starting position. +fn match_length( + value: u64, + data: &[u8], + anchor: usize, + mut ip: usize, + mut prev_index: usize, +) -> (u16, usize) { + assert!( + prev_index < ip, + "Match past current position: {prev_index} {ip}" + ); + + if value != u64::from_ne_bytes(data[prev_index..][..8].try_into().unwrap()) { + return (0, ip); + } + + let mut length = 8; + while length < 258 && ip > anchor && prev_index > 0 && data[ip - 1] == data[prev_index - 1] { + length += 1; + ip -= 1; + prev_index -= 1; + } + while length < 258 && ip + length < data.len() && data[ip + length] == data[prev_index + length] + { + length += 1; + } + (length as u16, ip) +} + +pub(crate) struct HashTableMatchFinder { + hash_table: Box<[u32; CACHE_SIZE]>, +} +impl HashTableMatchFinder { + pub(crate) fn new() -> Self { + Self { + hash_table: vec![0; CACHE_SIZE].into_boxed_slice().try_into().unwrap(), + } + } + + pub(crate) fn get_and_insert( + &mut self, + data: &[u8], + anchor: usize, + ip: usize, + value: u64, + min_match: u16, + ) -> (u16, u16, usize) { + let min_offset = ip.saturating_sub(32768).max(1); + + let hash = compute_hash(value); + let hash_index = (hash as usize) % CACHE_SIZE; + let offset = self.hash_table[hash_index] as usize; + + // Insert current value + self.hash_table[hash_index] = ip as u32; + + if offset >= min_offset { + let (length, start) = match_length(value, data, anchor, ip, offset); + if length > min_match { + return (length as u16, (ip - offset as usize) as u16, start); + } + } + + (0, 0, ip) + } + + pub(crate) fn insert(&mut self, value: u64, offset: usize) { + let hash = compute_hash(value); + self.hash_table[(hash as usize) % CACHE_SIZE] = offset as u32; + } +} diff --git a/src/compress/medium.rs b/src/compress/medium.rs new file mode 100644 index 0000000..3731cb5 --- /dev/null +++ b/src/compress/medium.rs @@ -0,0 +1,194 @@ +use std::io::{self, Write}; + +use super::{BitWriter, HashChainMatchFinder, Symbol}; + +pub(super) struct MediumCompressor { + match_finder: HashChainMatchFinder, + skip_ahead_shift: u8, +} + +impl MediumCompressor { + pub fn new(search_depth: u16, nice_length: u16, skip_ahead_shift: u8) -> Self { + Self { + match_finder: HashChainMatchFinder::new(search_depth, nice_length, 4), + skip_ahead_shift, + } + } + + pub fn compress(&mut self, writer: &mut BitWriter, data: &[u8]) -> io::Result<()> { + let mut ip = 0; // Points at the next byte to hash/lookup for. + let mut last_match = 0; //ip; + + while ip < data.len() { + let mut length = 0u16; + let mut distance = 0; + let mut match_start = 0; + + let mut symbols = Vec::new(); + while symbols.len() < 16384 && ip + 8 <= data.len() { + if length == 0 { + let current = u64::from_le_bytes(data[ip..][..8].try_into().unwrap()); + // if current & 0xFF_FFFF_FFFF == 0 { + // length = 4; + // match_start = ip + 1; + // distance = 1; + + // let min_start = 1.max(last_match).max(match_start.saturating_sub(258 - 4)); + + // while match_start > min_start && data[match_start - 2] == 0 { + // match_start -= 1; + // length += 1; + // } + // while length < 258 + // && match_start + (length as usize) < data.len() + // && data[match_start + length as usize] == 0 + // { + // length += 1; + // } + + // // Skip inserting all the totally zero values into the hash table. + // ip = match_start + length as usize - 3; + // } else { + (length, distance, match_start) = self.match_finder.get_and_insert( + &data, + last_match, + ip, + current as u32, + 3, + ); + ip += 1; + // } + } + + if length < 3 { + // If we haven't found a match in a while, start skipping ahead by emitting + // multiple literals at once. + ip += (ip - last_match) >> self.skip_ahead_shift; + continue; + } + + assert!(last_match <= ip); + assert!(last_match <= match_start,); + let (mut next_length, mut next_distance, mut next_match_start) = (0, 0, 0); + + let match_end = match_start + length as usize; + if match_end >= ip { + // // Insert match finder entries for the current match. + // let insert_end = (match_end - 3).min(data.len() - 8); + // let insert_start = ip.max(insert_end.saturating_sub(16)); + // for j in (insert_start..insert_end).step_by(4) { + // let v = u64::from_le_bytes(data[j..][..8].try_into().unwrap()); + // self.match_finder.insert(v, j); + // self.match_finder.insert(v >> 8, j + 1); + // self.match_finder.insert(v >> 16, j + 2); + // self.match_finder.insert(v >> 24, j + 3); + // } + for j in ip..match_end.min(data.len() - 8) { + let v = u32::from_le_bytes(data[j..][..4].try_into().unwrap()); + self.match_finder.insert(v as u64, j); + } + + ip = match_end; + + // innumerable::event!("current-delta", ip as i32 - match_start as i32); + + // Do a lookup at the position following the match. We'll need this even if we + // accept the match, so it doesn't cost anything. + if ip + 8 <= data.len() { + let next = u64::from_le_bytes(data[ip..][..8].try_into().unwrap()); + // if next & 0xFF_FFFF_FFFF == 0 { + // next_length = 4; + // next_match_start = ip + 1; + // next_distance = 1; + + // let min_start = + // 1.max(last_match).max(next_match_start.saturating_sub(258 - 4)); + + // while next_match_start > min_start && data[next_match_start - 2] == 0 { + // next_match_start -= 1; + // next_length += 1; + // } + // while next_length < 258 + // && next_match_start + (next_length as usize) < data.len() + // && data[next_match_start + next_length as usize] == 0 + // { + // next_length += 1; + // } + + // // Skip inserting all the totally zero values into the hash table. + // ip = next_match_start + next_length as usize - 3; + // } else { + (next_length, next_distance, next_match_start) = self + .match_finder + .get_and_insert(&data, last_match, ip, next as u32, 3); + + // innumerable::event!("x-delta", next_match_start as i32 - ip as i32); + + ip += 1; + // } + } + } + + // if next_length >= 3 { + // // innumerable::event!("next-length", next_length); + // innumerable::event!("next-delta", next_match_start as i32 - match_start as i32); + // } + + // Insert the current match, unless the next match starts too close to the current + // one. Because we expand matches backwards, the next match might almost completely + // overlap. If so, it'll probably be cheaper to emit an extra literal rather than an + // extra backref. + if next_length < 3 || next_match_start > match_start + 1 { + // if next_length < 3 && next_match_start > match_start + 1 { + // innumerable::event!("match", 0); + // } else if next_length < 3 { + // innumerable::event!("match", 1); + // } else { + // innumerable::event!("match", 2); + // } + + assert!(last_match <= match_start); + symbols.push(Symbol::LiteralRun { + start: last_match as u32, + end: match_start as u32, + }); + symbols.push(Symbol::Backref { + length: length as u16, + distance, + dist_sym: super::distance_to_dist_sym(distance), + }); + last_match = match_start + length as usize; + + // If the next match starts before the end of the current match, we need to + // adjust the next match length and start position. + if next_length > 0 && next_match_start < last_match { + assert!(next_length >= 3); + next_length -= (last_match - next_match_start) as u16; + next_match_start = last_match; + if next_length < 4 { + next_length = 0; + } + } + // innumerable::event!("fizzle", 0); + // } else if next_length >= 3 { + // innumerable::event!("fizzle", 1); + } + + // Advance to the next match (which might have a length of zero) + length = next_length; + match_start = next_match_start; + distance = next_distance; + } + if data.len() < ip + 8 { + symbols.push(Symbol::LiteralRun { + start: last_match as u32, + end: data.len() as u32, + }); + ip = data.len(); + } + super::write_block(writer, data, &symbols, ip == data.len())?; + } + + Ok(()) + } +} diff --git a/src/compress/mod.rs b/src/compress/mod.rs new file mode 100644 index 0000000..f2ae0ec --- /dev/null +++ b/src/compress/mod.rs @@ -0,0 +1,648 @@ +use std::{ + collections::BinaryHeap, + io::{self, Seek, SeekFrom, Write}, +}; + +use simd_adler32::Adler32; + +use crate::tables::{ + BITMASKS, CLCL_ORDER, DIST_SYM_TO_DIST_BASE, DIST_SYM_TO_DIST_EXTRA, LENGTH_TO_LEN_EXTRA, + LENGTH_TO_SYMBOL, +}; + +use fast::FastCompressor; +use hc_matchfinder::HashChainMatchFinder; +use ht_matchfinder::HashTableMatchFinder; +use medium::MediumCompressor; +use slow::SlowCompressor; + +mod bt_matchfinder; +mod hc_matchfinder; +mod ht_matchfinder; + +mod fast; +mod medium; +mod slow; +pub mod ultrafast; + +fn build_huffman_tree( + frequencies: &[u32], + lengths: &mut [u8], + codes: &mut [u16], + length_limit: u8, +) -> bool { + assert_eq!(frequencies.len(), lengths.len()); + assert_eq!(frequencies.len(), codes.len()); + + if frequencies.iter().filter(|&&f| f > 0).count() <= 1 { + lengths.fill(0); + codes.fill(0); + if let Some(i) = frequencies.iter().position(|&f| f > 0) { + lengths[i] = 1; + } + return false; + } + + #[derive(Eq, PartialEq, Copy, Clone, Debug)] + struct Item(u32, u16); + impl Ord for Item { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + other.0.cmp(&self.0) + } + } + impl PartialOrd for Item { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } + } + + // Build a huffman tree + let mut internal_nodes = Vec::new(); + let mut nodes = BinaryHeap::from_iter( + frequencies + .iter() + .enumerate() + .filter(|(_, &frequency)| frequency > 0) + .map(|(i, &frequency)| Item(frequency, i as u16)), + ); + while nodes.len() > 1 { + let Item(frequency1, index1) = nodes.pop().unwrap(); + let mut root = nodes.peek_mut().unwrap(); + internal_nodes.push((index1, root.1)); + *root = Item( + frequency1 + root.0, + internal_nodes.len() as u16 + frequencies.len() as u16 - 1, + ); + } + + // Walk the tree to assign code lengths + lengths.fill(0); + let mut stack = Vec::new(); + stack.push((nodes.pop().unwrap().1, 0)); + while let Some((node, depth)) = stack.pop() { + let node = node as usize; + if node < frequencies.len() { + lengths[node] = depth as u8; + } else { + let (left, right) = internal_nodes[node - frequencies.len()]; + stack.push((left, depth + 1)); + stack.push((right, depth + 1)); + } + } + + // Limit the codes to length length_limit + let mut max_length = 0; + for &length in lengths.iter() { + max_length = max_length.max(length); + } + if max_length > length_limit { + let mut counts = [0u32; 16]; + for &length in lengths.iter() { + counts[length.min(length_limit) as usize] += 1; + } + + let mut total = 0; + for (i, count) in counts + .iter() + .enumerate() + .skip(1) + .take(length_limit as usize) + { + total += count << (length_limit as usize - i); + } + + while total > 1u32 << length_limit { + let mut i = length_limit as usize - 1; + while counts[i] == 0 { + i -= 1; + } + counts[i] -= 1; + counts[length_limit as usize] -= 1; + counts[i + 1] += 2; + total -= 1; + } + + // assign new lengths + let mut len = length_limit; + let mut indexes = frequencies.iter().copied().enumerate().collect::>(); + indexes.sort_unstable_by_key(|&(_, frequency)| frequency); + for &(i, frequency) in indexes.iter() { + if frequency > 0 { + while counts[len as usize] == 0 { + len -= 1; + } + lengths[i] = len; + counts[len as usize] -= 1; + } + } + } + + // Assign codes + codes.fill(0); + let mut code = 0u32; + for len in 1..=length_limit { + for (i, &length) in lengths.iter().enumerate() { + if length == len { + codes[i] = (code as u16).reverse_bits() >> (16 - len); + code += 1; + } + } + code <<= 1; + } + assert_eq!(code, 2 << length_limit); + + true +} + +fn distance_to_dist_sym(distance: u16) -> u8 { + const LOOKUP: [u8; 16] = [0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7]; + if distance <= 16 { + return LOOKUP[distance as usize - 1]; + } + + let mut dist_sym = 29; + while dist_sym > 0 && distance < DIST_SYM_TO_DIST_BASE[dist_sym as usize] { + dist_sym -= 1; + } + dist_sym +} + +fn compute_hash3(v: u32) -> u32 { + (0x330698ecu64.wrapping_mul(((v & 0xff_ffff) ^ 0x2722_0a95) as u64) >> 16) as u32 +} +fn compute_hash(v: u64) -> u32 { + let mut hasher = fnv::FnvHasher::default(); + std::hash::Hasher::write_u64(&mut hasher, v); + std::hash::Hasher::finish(&hasher) as u32 + + // (11400714785074694791u64.wrapping_mul(v) >> 40) as u32 +} + +fn compute_hash32(v: u32) -> u32 { + let mut hasher = fnv::FnvHasher::default(); + std::hash::Hasher::write_u32(&mut hasher, v); + std::hash::Hasher::finish(&hasher) as u32 + + // (11400714785074694791u64.wrapping_mul(v) >> 40) as u32 +} + + +enum Symbol { + LiteralRun { + start: u32, + end: u32, + }, + Backref { + length: u16, + distance: u16, + dist_sym: u8, + }, +} + +fn write_block( + writer: &mut BitWriter, + data: &[u8], + symbols: &[Symbol], + eof: bool, +) -> io::Result<()> { + let mut frequencies = [0u32; 286]; + let mut dist_frequencies = [0u32; 30]; + frequencies[256] = 1; + for symbol in symbols { + match symbol { + Symbol::LiteralRun { start, end } => { + for lit in &data[*start as usize..*end as usize] { + frequencies[*lit as usize] += 1; + } + } + Symbol::Backref { + length, dist_sym, .. + } => { + let sym = LENGTH_TO_SYMBOL[*length as usize - 3] as usize; + frequencies[sym] += 1; + dist_frequencies[*dist_sym as usize] += 1; + } + } + } + + let mut lengths = [0u8; 286]; + let mut codes = [0u16; 286]; + build_huffman_tree(&frequencies, &mut lengths, &mut codes, 15); + + let mut dist_lengths = [0u8; 30]; + let mut dist_codes = [0u16; 30]; + build_huffman_tree(&dist_frequencies, &mut dist_lengths, &mut dist_codes, 15); + + let num_litlen_codes = 286; + // while num_litlen_codes > 257 && lengths[num_litlen_codes - 1] == 0 { + // num_litlen_codes -= 1; + // } + + let num_dist_codes = 30; + // while num_dist_codes > 1 && dist_lengths[num_dist_codes - 1] == 0 { + // num_dist_codes -= 1; + // } + + let mut code_length_frequencies = [0u32; 19]; + for &length in &lengths[..num_litlen_codes] { + code_length_frequencies[length as usize] += 1; + } + for &length in &dist_lengths[..num_dist_codes] { + code_length_frequencies[length as usize] += 1; + } + let mut code_length_lengths = [0u8; 19]; + let mut code_length_codes = [0u16; 19]; + build_huffman_tree( + &code_length_frequencies, + &mut code_length_lengths, + &mut code_length_codes, + 7, + ); + + if eof { + writer.write_bits(101, 3)?; // final block + } else { + writer.write_bits(100, 3)?; // non-final block + } + + writer.write_bits(num_litlen_codes as u64 - 257, 5)?; // hlit + writer.write_bits(num_dist_codes as u64 - 1, 5)?; // hdist + writer.write_bits(15, 4)?; // hclen + + for j in 0..19 { + writer.write_bits(code_length_lengths[CLCL_ORDER[j]] as u64, 3)?; + } + + for &length in lengths[..num_litlen_codes] + .iter() + .chain(&dist_lengths[..num_dist_codes]) + { + writer.write_bits( + code_length_codes[length as usize] as u64, + code_length_lengths[length as usize], + )?; + } + + for symbol in symbols { + match symbol { + Symbol::LiteralRun { start, end } => { + let mut groups = data[*start as usize..*end as usize].chunks_exact(4); + for group in &mut groups { + let code0 = codes[group[0] as usize] as u64; + let code1 = codes[group[1] as usize] as u64; + let code2 = codes[group[2] as usize] as u64; + let code3 = codes[group[3] as usize] as u64; + + let len0 = lengths[group[0] as usize]; + let len1 = lengths[group[1] as usize]; + let len2 = lengths[group[2] as usize]; + let len3 = lengths[group[3] as usize]; + + writer.write_bits( + code0 + | (code1 << len0) + | (code2 << (len0 + len1)) + | (code3 << (len0 + len1 + len2)), + len0 + len1 + len2 + len3, + )?; + } + + for &lit in groups.remainder() { + writer.write_bits(codes[lit as usize] as u64, lengths[lit as usize] as u8)?; + } + } + Symbol::Backref { + length, + distance, + dist_sym, + } => { + let sym = LENGTH_TO_SYMBOL[*length as usize - 3] as usize; + writer.write_bits(codes[sym] as u64, lengths[sym] as u8)?; + let len_extra = LENGTH_TO_LEN_EXTRA[*length as usize - 3]; + let extra = (((*length as u32) - 3) & BITMASKS[len_extra as usize]) as u64; + writer.write_bits(extra, len_extra)?; + + writer.write_bits( + dist_codes[*dist_sym as usize] as u64, + dist_lengths[*dist_sym as usize], + )?; + let dist_extra = DIST_SYM_TO_DIST_EXTRA[*dist_sym as usize]; + let extra = *distance - DIST_SYM_TO_DIST_BASE[*dist_sym as usize]; + + writer.write_bits(extra as u64, dist_extra)?; + } + } + } + writer.write_bits(codes[256] as u64, lengths[256])?; + Ok(()) +} + +enum CompressorInner { + Stored, + Fast(FastCompressor), + Medium(MediumCompressor), + Slow(SlowCompressor), +} +impl CompressorInner { + fn compress_data( + &mut self, + writer: &mut BitWriter, + data: &[u8], + eof: bool, + ) -> io::Result<()> { + match self { + Self::Stored => Self::compress_stored(writer, data, eof), + Self::Fast(inner) => inner.compress(writer, data), + Self::Medium(inner) => inner.compress(writer, data), + Self::Slow(inner) => inner.compress(writer, data), + } + } + + fn compress_stored( + writer: &mut BitWriter, + data: &[u8], + eof: bool, + ) -> io::Result<()> { + if data.is_empty() { + if eof { + // TODO: write empty final block + } + return Ok(()); + } + + let chunks = data.chunks(65535); + let last_chunk_index = chunks.len() - 1; + for (i, chunk) in chunks.into_iter().enumerate() { + if i == last_chunk_index { + writer.write_bits(1, 3)?; // final block + } else { + writer.write_bits(0, 3)?; // non-final block + } + writer.flush()?; + writer + .writer + .write_all(&(chunk.len() as u16).to_le_bytes())?; + writer + .writer + .write_all(&(!(chunk.len() as u16)).to_le_bytes())?; + writer.writer.write_all(chunk)?; + } + return Ok(()); + } +} + +const WINDOW_SIZE: usize = 32768; + +struct BitWriter { + buffer: u64, + nbits: u8, + writer: W, +} +impl BitWriter { + fn write_bits(&mut self, bits: u64, nbits: u8) -> io::Result<()> { + debug_assert!(nbits <= 64); + + self.buffer |= bits << self.nbits; + self.nbits += nbits; + + if self.nbits >= 64 { + self.writer.write_all(&self.buffer.to_le_bytes())?; + self.nbits -= 64; + self.buffer = bits.checked_shr((nbits - self.nbits) as u32).unwrap_or(0); + } + debug_assert!(self.nbits < 64); + Ok(()) + } + + fn flush(&mut self) -> io::Result<()> { + if self.nbits % 8 != 0 { + self.write_bits(0, 8 - self.nbits % 8)?; + } + if self.nbits > 0 { + self.writer + .write_all(&self.buffer.to_le_bytes()[..self.nbits as usize / 8]) + .unwrap(); + self.buffer = 0; + self.nbits = 0; + } + Ok(()) + } +} + +/// Compressor that produces fdeflate compressed streams. +pub struct Compressor { + checksum: Adler32, + pending: Vec, + bit_writer: BitWriter, + inner: CompressorInner, +} +impl Compressor { + /// Create a new Compressor. + pub fn new(writer: W) -> io::Result { + Self::with_level(writer, 1) + } + + /// Create a new Compressor with the specified compression level. + pub fn with_level(mut writer: W, level: u8) -> io::Result { + writer.write_all(&[0x78, 0x01])?; // zlib header + + let inner = match level { + 0 => CompressorInner::Stored, + 1 => CompressorInner::Fast(FastCompressor::new(4)), + 2 => CompressorInner::Fast(FastCompressor::new(9)), + 3 => CompressorInner::Medium(MediumCompressor::new(6, 16, 6)), + 4 => CompressorInner::Medium(MediumCompressor::new(24, 32, 9)), + 5 => CompressorInner::Medium(MediumCompressor::new(32, 32, 9)), + 6 => CompressorInner::Medium(MediumCompressor::new(128, 128, 12)), + 7.. => CompressorInner::Slow(SlowCompressor::new()), + }; + + Ok(Self { + checksum: Adler32::new(), + bit_writer: BitWriter { + buffer: 0, + nbits: 0, + writer, + }, + pending: Vec::new(), + inner, + }) + } + + /// Write data to the compressor. + pub fn write_data(&mut self, data: &[u8]) -> io::Result<()> { + self.checksum.write(data); + self.pending.extend_from_slice(data); + Ok(()) + } + + /// Write the remainder of the stream and return the inner writer. + pub fn finish(mut self) -> io::Result { + self.inner + .compress_data(&mut self.bit_writer, &self.pending, true)?; + + // Write end of block + self.bit_writer.flush()?; + + // Write Adler32 checksum + let checksum: u32 = self.checksum.finish(); + self.bit_writer + .writer + .write_all(checksum.to_be_bytes().as_ref()) + .unwrap(); + Ok(self.bit_writer.writer) + } +} + +/// Compressor that only writes the stored blocks. +/// +/// This is useful for writing files that are not compressed, but still need to be wrapped in a +/// zlib stream. +pub struct StoredOnlyCompressor { + writer: W, + checksum: Adler32, + block_bytes: u16, +} +impl StoredOnlyCompressor { + /// Creates a new `StoredOnlyCompressor` that writes to the given writer. + pub fn new(mut writer: W) -> io::Result { + writer.write_all(&[0x78, 0x01])?; // zlib header + writer.write_all(&[0; 5])?; // placeholder stored block header + + Ok(Self { + writer, + checksum: Adler32::new(), + block_bytes: 0, + }) + } + + fn set_block_header(&mut self, size: u16, last: bool) -> io::Result<()> { + self.writer.seek(SeekFrom::Current(-(size as i64 + 5)))?; + self.writer.write_all(&[ + last as u8, + (size & 0xFF) as u8, + ((size >> 8) & 0xFF) as u8, + (!size & 0xFF) as u8, + ((!size >> 8) & 0xFF) as u8, + ])?; + self.writer.seek(SeekFrom::Current(size as i64))?; + + Ok(()) + } + + /// Writes the given data to the underlying writer. + pub fn write_data(&mut self, mut data: &[u8]) -> io::Result<()> { + self.checksum.write(data); + while !data.is_empty() { + if self.block_bytes == u16::MAX { + self.set_block_header(u16::MAX, false)?; + self.writer.write_all(&[0; 5])?; // placeholder stored block header + self.block_bytes = 0; + } + + let prefix_bytes = data.len().min((u16::MAX - self.block_bytes) as usize); + self.writer.write_all(&data[..prefix_bytes])?; + self.block_bytes += prefix_bytes as u16; + data = &data[prefix_bytes..]; + } + + Ok(()) + } + + /// Finish writing the final block and return the underlying writer. + pub fn finish(mut self) -> io::Result { + self.set_block_header(self.block_bytes, true)?; + + // Write Adler32 checksum + let checksum: u32 = self.checksum.finish(); + self.writer + .write_all(checksum.to_be_bytes().as_ref()) + .unwrap(); + + Ok(self.writer) + } +} +impl StoredOnlyCompressor { + /// Return the number of bytes that will be written to the output stream + /// for the given input size. Because this compressor only writes stored blocks, + /// the output size is always slightly *larger* than the input size. + pub fn compressed_size(raw_size: usize) -> usize { + (raw_size.saturating_sub(1) / u16::MAX as usize) * (u16::MAX as usize + 5) + + (raw_size % u16::MAX as usize + 5) + + 6 + } +} + +/// Compresses the given data. +pub fn compress_to_vec(input: &[u8]) -> Vec { + compress_to_vec_with_level(input, 1) +} + +/// Compresses the given data with the specified compression level. +pub fn compress_to_vec_with_level(input: &[u8], level: u8) -> Vec { + let mut compressor = + Compressor::with_level(Vec::with_capacity(input.len() / 4), level).unwrap(); + compressor.write_data(input).unwrap(); + let mut compressed = compressor.finish().unwrap(); + + if compressed.len() > StoredOnlyCompressor::>::compressed_size(input.len()) { + compressed.clear(); + let mut compressor = StoredOnlyCompressor::new(io::Cursor::new(compressed)).unwrap(); + compressor.write_data(input).unwrap(); + compressor.finish().unwrap().into_inner() + } else { + compressed + } +} + +#[cfg(test)] +mod tests { + use super::*; + use rand::Rng; + + #[test] + fn test_distance_to_dist_sym() { + assert_eq!(distance_to_dist_sym(1), 0); + assert_eq!(distance_to_dist_sym(2), 1); + assert_eq!(distance_to_dist_sym(3), 2); + assert_eq!(distance_to_dist_sym(4), 3); + assert_eq!(distance_to_dist_sym(5), 4); + assert_eq!(distance_to_dist_sym(7), 5); + assert_eq!(distance_to_dist_sym(9), 6); + assert_eq!(distance_to_dist_sym(13), 7); + assert_eq!(distance_to_dist_sym(18), 8); + assert_eq!(distance_to_dist_sym(257), 16); + } + + fn roundtrip(data: &[u8]) { + let compressed = compress_to_vec(data); + //let decompressed = miniz_oxide::inflate::decompress_to_vec_zlib(&compressed).unwrap(); + let decompressed = crate::decompress_to_vec(&compressed).unwrap(); + assert_eq!(&decompressed, data); + } + + #[test] + fn it_works() { + roundtrip(b"Hello world!"); + } + + #[test] + fn constant() { + roundtrip(&vec![0; 2048]); + roundtrip(&vec![5; 2048]); + roundtrip(&vec![128; 2048]); + roundtrip(&vec![254; 2048]); + } + + #[test] + fn random() { + let mut rng = rand::thread_rng(); + let mut data = vec![0; 2048]; + for _ in 0..10 { + for byte in &mut data { + *byte = rng.gen(); + } + roundtrip(&data); + } + } +} diff --git a/src/compress/slow.rs b/src/compress/slow.rs new file mode 100644 index 0000000..580cf2f --- /dev/null +++ b/src/compress/slow.rs @@ -0,0 +1,148 @@ +use std::io::{self, Write}; + +use super::{BitWriter, Symbol}; + +use super::bt_matchfinder::BTreeMatchFinder; + +pub(super) struct SlowCompressor { + match_finder: BTreeMatchFinder, + + min_match: u8, + skip_ahead_shift: u8, + search_depth: u16, + nice_length: u16, + max_lazy: u16, +} + +impl SlowCompressor { + pub fn new() -> Self { + Self { + match_finder: BTreeMatchFinder::new(3), + + min_match: 4, + skip_ahead_shift: 9, + search_depth: 64, + nice_length: 258, + max_lazy: 32, + } + } + + pub fn compress(&mut self, writer: &mut BitWriter, data: &[u8]) -> io::Result<()> { + let mut ip = 0; + + let mut length = 0; + let mut distance = 0; + let mut match_start = 0; + + while ip < data.len() { + let mut symbols = Vec::new(); + let mut num_symbols = 0; + + let mut last_match = ip; + 'outer: while symbols.len() < 16384 && ip + 8 < data.len() { + let current = u64::from_le_bytes(data[ip..][..8].try_into().unwrap()); + + if length == 0 { + // if current == 0 { + // while ip > last_match && data[ip - 1] == 0 { + // ip -= 1; + // } + + // if ip == 0 || data[ip - 1] != 0 { + // ip += 1; + // } + + // symbols.push(Symbol::LiteralRun { + // start: last_match as u32, + // end: ip as u32, + // }); + // num_symbols += ip - last_match; + + // let mut run_length = 0; + // while ip < data.len() && data[ip] == 0 && run_length < 258 { + // run_length += 1; + // ip += 1; + // } + + // symbols.push(Symbol::Backref { + // length: run_length as u16, + // distance: 1, + // dist_sym: 0, + // }); + // num_symbols += 1; + + // last_match = ip; + + // length = 0; + // continue; + // } + + (length, distance, match_start) = + self.match_finder.get_and_insert(&data, ip, current, 4); + } + + if length >= 3 { + if + /*match_start + length as usize > ip + 1 + && length < self.max_lazy + &&*/ + ip + length as usize + 9 <= data.len() { + ip += 1; + let (next_length, next_distance, next_match_start) = self + .match_finder + .get_and_insert(&data, ip, current >> 8, length + 1); + if next_length > 0 && match_start + 1 >= next_match_start { + assert!(next_length > length); + distance = next_distance; + length = next_length; + match_start = next_match_start; + continue; + } + } + assert!(last_match <= match_start); + + symbols.push(Symbol::LiteralRun { + start: last_match as u32, + end: match_start as u32, + }); + num_symbols += match_start - last_match; + + symbols.push(Symbol::Backref { + length: length as u16, + distance, + dist_sym: super::distance_to_dist_sym(distance), + }); + num_symbols += 1; + + let match_end = match_start + length as usize; + + if match_end + 8 < data.len() { + for j in (ip + 1)..match_end { + let v = u64::from_le_bytes(data[j..][..8].try_into().unwrap()); + self.match_finder.insert(data, v, j); + } + } + + ip = match_end; + last_match = match_end; + + length = 0; + continue 'outer; + } + + ip += 1; + } + if data.len() <= ip + 8 { + symbols.push(Symbol::LiteralRun { + start: last_match as u32, + end: data.len() as u32, + }); + ip = data.len(); + } + + super::write_block(writer, data, &symbols, ip == data.len())?; + } + + Ok(()) + } +} diff --git a/src/compress.rs b/src/compress/ultrafast.rs similarity index 68% rename from src/compress.rs rename to src/compress/ultrafast.rs index b55116e..b7e8ac0 100644 --- a/src/compress.rs +++ b/src/compress/ultrafast.rs @@ -1,18 +1,23 @@ use simd_adler32::Adler32; -use std::io::{self, Seek, SeekFrom, Write}; +use std::io::{self, Write}; use crate::tables::{ BITMASKS, HUFFMAN_CODES, HUFFMAN_LENGTHS, LENGTH_TO_LEN_EXTRA, LENGTH_TO_SYMBOL, }; -/// Compressor that produces fdeflate compressed streams. -pub struct Compressor { +/// Very fast zlib compressor that trades compression ratio for speed. +/// +/// This compressor is designed to be fast and efficient for filtered PNG data pixel data, where it +/// is expected that there will be many long runs of zeros, and the rest of the data is mostly small +/// differences from the previous pixel. On data data that does not match this pattern, it may +/// produce output that is *larger* than the input. +pub struct UltraFastCompressor { checksum: Adler32, buffer: u64, nbits: u8, writer: W, } -impl Compressor { +impl UltraFastCompressor { fn write_bits(&mut self, bits: u64, nbits: u8) -> io::Result<()> { debug_assert!(nbits <= 64); @@ -181,97 +186,17 @@ impl Compressor { } } -/// Compressor that only writes the stored blocks. -/// -/// This is useful for writing files that are not compressed, but still need to be wrapped in a -/// zlib stream. -pub struct StoredOnlyCompressor { - writer: W, - checksum: Adler32, - block_bytes: u16, -} -impl StoredOnlyCompressor { - /// Creates a new `StoredOnlyCompressor` that writes to the given writer. - pub fn new(mut writer: W) -> io::Result { - writer.write_all(&[0x78, 0x01])?; // zlib header - writer.write_all(&[0; 5])?; // placeholder stored block header - - Ok(Self { - writer, - checksum: Adler32::new(), - block_bytes: 0, - }) - } - - fn set_block_header(&mut self, size: u16, last: bool) -> io::Result<()> { - self.writer.seek(SeekFrom::Current(-(size as i64 + 5)))?; - self.writer.write_all(&[ - last as u8, - (size & 0xFF) as u8, - ((size >> 8) & 0xFF) as u8, - (!size & 0xFF) as u8, - ((!size >> 8) & 0xFF) as u8, - ])?; - self.writer.seek(SeekFrom::Current(size as i64))?; - - Ok(()) - } - - /// Writes the given data to the underlying writer. - pub fn write_data(&mut self, mut data: &[u8]) -> io::Result<()> { - self.checksum.write(data); - while !data.is_empty() { - if self.block_bytes == u16::MAX { - self.set_block_header(u16::MAX, false)?; - self.writer.write_all(&[0; 5])?; // placeholder stored block header - self.block_bytes = 0; - } - - let prefix_bytes = data.len().min((u16::MAX - self.block_bytes) as usize); - self.writer.write_all(&data[..prefix_bytes])?; - self.block_bytes += prefix_bytes as u16; - data = &data[prefix_bytes..]; - } - - Ok(()) - } - - /// Finish writing the final block and return the underlying writer. - pub fn finish(mut self) -> io::Result { - self.set_block_header(self.block_bytes, true)?; - - // Write Adler32 checksum - let checksum: u32 = self.checksum.finish(); - self.writer - .write_all(checksum.to_be_bytes().as_ref()) - .unwrap(); - - Ok(self.writer) - } -} -impl StoredOnlyCompressor { - /// Return the number of bytes that will be written to the output stream - /// for the given input size. Because this compressor only writes stored blocks, - /// the output size is always slightly *larger* than the input size. - pub fn compressed_size(raw_size: usize) -> usize { - (raw_size.saturating_sub(1) / u16::MAX as usize) * (u16::MAX as usize + 5) - + (raw_size % u16::MAX as usize + 5) - + 6 - } -} - -/// Compresses the given data. -pub fn compress_to_vec(input: &[u8]) -> Vec { - let mut compressor = Compressor::new(Vec::with_capacity(input.len() / 4)).unwrap(); - compressor.write_data(input).unwrap(); - compressor.finish().unwrap() -} - #[cfg(test)] mod tests { use super::*; use rand::Rng; + pub fn compress_to_vec(input: &[u8]) -> Vec { + let mut compressor = UltraFastCompressor::new(Vec::with_capacity(input.len() / 4)).unwrap(); + compressor.write_data(input).unwrap(); + compressor.finish().unwrap() + } + fn roundtrip(data: &[u8]) { let compressed = compress_to_vec(data); let decompressed = miniz_oxide::inflate::decompress_to_vec_zlib(&compressed).unwrap(); diff --git a/src/decompress.rs b/src/decompress.rs index 2b853a0..48f09f5 100644 --- a/src/decompress.rs +++ b/src/decompress.rs @@ -1309,6 +1309,7 @@ mod tests { } #[test] + #[ignore] fn zero_length() { let mut compressed = crate::compress_to_vec(b"").to_vec(); diff --git a/src/lib.rs b/src/lib.rs index e273699..627dca1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -26,7 +26,10 @@ mod decompress; mod huffman; mod tables; -pub use compress::{compress_to_vec, Compressor, StoredOnlyCompressor}; +pub use compress::{ + compress_to_vec, compress_to_vec_with_level, ultrafast::UltraFastCompressor, Compressor, + StoredOnlyCompressor, +}; pub use decompress::{ decompress_to_vec, decompress_to_vec_bounded, BoundedDecompressionError, DecompressionError, Decompressor, diff --git a/src/tables.rs b/src/tables.rs index 567565a..13069c1 100644 --- a/src/tables.rs +++ b/src/tables.rs @@ -19,6 +19,7 @@ pub(crate) const HUFFMAN_LENGTHS: [u8; 286] = [ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 9, ]; +#[allow(unused)] pub(crate) const HUFFMAN_CODES: [u16; 286] = match crate::compute_codes(&HUFFMAN_LENGTHS) { Some(codes) => codes, None => panic!("HUFFMAN_LENGTHS is invalid"),