summaryrefslogtreecommitdiffstats
path: root/third_party/rust/deflate/src
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/deflate/src')
-rw-r--r--third_party/rust/deflate/src/bit_reverse.rs27
-rw-r--r--third_party/rust/deflate/src/bitstream.rs265
-rw-r--r--third_party/rust/deflate/src/chained_hash_table.rs374
-rw-r--r--third_party/rust/deflate/src/checksum.rs72
-rw-r--r--third_party/rust/deflate/src/compress.rs366
-rw-r--r--third_party/rust/deflate/src/compression_options.rs196
-rw-r--r--third_party/rust/deflate/src/deflate_state.rs145
-rw-r--r--third_party/rust/deflate/src/encoder_state.rs130
-rw-r--r--third_party/rust/deflate/src/huffman_lengths.rs396
-rw-r--r--third_party/rust/deflate/src/huffman_table.rs1676
-rw-r--r--third_party/rust/deflate/src/input_buffer.rs148
-rw-r--r--third_party/rust/deflate/src/length_encode.rs1487
-rw-r--r--third_party/rust/deflate/src/lib.rs495
-rw-r--r--third_party/rust/deflate/src/lz77.rs1304
-rw-r--r--third_party/rust/deflate/src/lzvalue.rs121
-rw-r--r--third_party/rust/deflate/src/matching.rs425
-rw-r--r--third_party/rust/deflate/src/output_writer.rs154
-rw-r--r--third_party/rust/deflate/src/rle.rs107
-rw-r--r--third_party/rust/deflate/src/stored_block.rs97
-rw-r--r--third_party/rust/deflate/src/test_utils.rs79
-rw-r--r--third_party/rust/deflate/src/writer.rs663
-rw-r--r--third_party/rust/deflate/src/zlib.rs87
22 files changed, 8814 insertions, 0 deletions
diff --git a/third_party/rust/deflate/src/bit_reverse.rs b/third_party/rust/deflate/src/bit_reverse.rs
new file mode 100644
index 0000000000..da78736877
--- /dev/null
+++ b/third_party/rust/deflate/src/bit_reverse.rs
@@ -0,0 +1,27 @@
+/// Reverse the first length bits of n.
+/// (Passing more than 16 as length will produce garbage.
+pub fn reverse_bits(mut n: u16, length: u8) -> u16 {
+ debug_assert!(length <= 16);
+ // Borrowed from http://aggregate.org/MAGIC/#Bit%20Reversal
+ n = ((n & 0xaaaa) >> 1) | ((n & 0x5555) << 1);
+ n = ((n & 0xcccc) >> 2) | ((n & 0x3333) << 2);
+ n = ((n & 0xf0f0) >> 4) | ((n & 0x0f0f) << 4);
+ n = ((n & 0xff00) >> 8) | ((n & 0x00ff) << 8);
+ n >> (16 - length)
+}
+
+#[cfg(test)]
+mod test {
+ use super::reverse_bits;
+ #[test]
+ fn test_bit_reverse() {
+ assert_eq!(reverse_bits(0b0111_0100, 8), 0b0010_1110);
+ assert_eq!(
+ reverse_bits(0b1100_1100_1100_1100, 16),
+ 0b0011_0011_0011_0011
+ );
+ // Check that we ignore >16 length
+ // We no longer check for this.
+ // assert_eq!(reverse_bits(0b11001100_11001100, 32), 0b0011001100110011);
+ }
+}
diff --git a/third_party/rust/deflate/src/bitstream.rs b/third_party/rust/deflate/src/bitstream.rs
new file mode 100644
index 0000000000..6aba35d4c1
--- /dev/null
+++ b/third_party/rust/deflate/src/bitstream.rs
@@ -0,0 +1,265 @@
+// This was originally based on code from: https://github.com/nwin/lzw
+// Copyright (c) 2015 nwin
+// which is under both Apache 2.0 and MIT
+
+//! This module provides a bit writer
+use std::io::{self, Write};
+
+#[cfg(target_pointer_width = "64")]
+#[macro_use]
+mod arch_dep {
+ /// The data type of the accumulator.
+ /// a 64-bit value allows us to store more before
+ /// each push to the vector, but is sub-optimal
+ /// on 32-bit platforms.
+ pub type AccType = u64;
+ pub const FLUSH_AT: u8 = 48;
+ /// Push pending bits to vector.
+ /// Using a macro here since an inline function.
+ /// didn't optimise properly.
+ macro_rules! push{
+ ($s:ident) => {
+ let len = $s.w.len();
+ $s.w.reserve(6);
+ // Optimization:
+ //
+ // This is basically what `Vec::extend_from_slice` does, but it didn't inline
+ // properly, so we do it manually for now.
+ //
+ // # Unsafe
+ // We reserve enough space right before this, so setting the len manually and doing
+ // unchecked indexing is safe here since we only, and always write to all of the the
+ // uninitialized bytes of the vector.
+ unsafe {
+ $s.w.set_len(len + 6);
+ $s.w.get_unchecked_mut(len..).copy_from_slice(&[$s.acc as u8,
+ ($s.acc >> 8) as u8,
+ ($s.acc >> 16) as u8,
+ ($s.acc >> 24) as u8,
+ ($s.acc >> 32) as u8,
+ ($s.acc >> 40) as u8
+ ][..]);
+ }
+
+ };
+ }
+}
+#[cfg(not(target_pointer_width = "64"))]
+#[macro_use]
+mod arch_dep {
+ pub type AccType = u32;
+ pub const FLUSH_AT: u8 = 16;
+ macro_rules! push{
+ ($s:ident) => {
+ // Unlike the 64-bit case, using copy_from_slice seemed to worsen performance here.
+ // TODO: Needs benching on a 32-bit system to see what works best.
+ $s.w.push($s.acc as u8);
+ $s.w.push(($s.acc >> 8) as u8);
+ };
+ }
+}
+
+use self::arch_dep::*;
+
+/// Writes bits to a byte stream, LSB first.
+pub struct LsbWriter {
+ // Public for now so it can be replaced after initialization.
+ pub w: Vec<u8>,
+ bits: u8,
+ acc: AccType,
+}
+
+impl LsbWriter {
+ /// Creates a new bit reader
+ pub fn new(writer: Vec<u8>) -> LsbWriter {
+ LsbWriter {
+ w: writer,
+ bits: 0,
+ acc: 0,
+ }
+ }
+
+ pub fn pending_bits(&self) -> u8 {
+ self.bits
+ }
+
+ /// Buffer n number of bits, and write them to the vec if there are enough pending bits.
+ pub fn write_bits(&mut self, v: u16, n: u8) {
+ // NOTE: This outputs garbage data if n is 0, but v is not 0
+ self.acc |= (v as AccType) << self.bits;
+ self.bits += n;
+ // Waiting until we have FLUSH_AT bits and pushing them all in one batch.
+ while self.bits >= FLUSH_AT {
+ push!(self);
+ self.acc >>= FLUSH_AT;
+ self.bits -= FLUSH_AT;
+ }
+ }
+
+ fn write_bits_finish(&mut self, v: u16, n: u8) {
+ // NOTE: This outputs garbage data if n is 0, but v is not 0
+ self.acc |= (v as AccType) << self.bits;
+ self.bits += n % 8;
+ while self.bits >= 8 {
+ self.w.push(self.acc as u8);
+ self.acc >>= 8;
+ self.bits -= 8;
+ }
+ }
+
+ pub fn flush_raw(&mut self) {
+ let missing = FLUSH_AT - self.bits;
+ // Have to test for self.bits > 0 here,
+ // otherwise flush would output an extra byte when flush was called at a byte boundary
+ if missing > 0 && self.bits > 0 {
+ self.write_bits_finish(0, missing);
+ }
+ }
+}
+
+impl Write for LsbWriter {
+ fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+ if self.acc == 0 {
+ self.w.extend_from_slice(buf)
+ } else {
+ for &byte in buf.iter() {
+ self.write_bits(byte as u16, 8)
+ }
+ }
+ Ok(buf.len())
+ }
+
+ fn flush(&mut self) -> io::Result<()> {
+ self.flush_raw();
+ Ok(())
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::LsbWriter;
+
+ #[test]
+ fn write_bits() {
+ let input = [
+ (3, 3),
+ (10, 8),
+ (88, 7),
+ (0, 2),
+ (0, 5),
+ (0, 0),
+ (238, 8),
+ (126, 8),
+ (161, 8),
+ (10, 8),
+ (238, 8),
+ (174, 8),
+ (126, 8),
+ (174, 8),
+ (65, 8),
+ (142, 8),
+ (62, 8),
+ (10, 8),
+ (1, 8),
+ (161, 8),
+ (78, 8),
+ (62, 8),
+ (158, 8),
+ (206, 8),
+ (10, 8),
+ (64, 7),
+ (0, 0),
+ (24, 5),
+ (0, 0),
+ (174, 8),
+ (126, 8),
+ (193, 8),
+ (174, 8),
+ ];
+ let expected = [
+ 83,
+ 192,
+ 2,
+ 220,
+ 253,
+ 66,
+ 21,
+ 220,
+ 93,
+ 253,
+ 92,
+ 131,
+ 28,
+ 125,
+ 20,
+ 2,
+ 66,
+ 157,
+ 124,
+ 60,
+ 157,
+ 21,
+ 128,
+ 216,
+ 213,
+ 47,
+ 216,
+ 21,
+ ];
+ let mut writer = LsbWriter::new(Vec::new());
+ for v in input.iter() {
+ writer.write_bits(v.0, v.1);
+ }
+ writer.flush_raw();
+ assert_eq!(writer.w, expected);
+ }
+}
+
+
+#[cfg(all(test, feature = "benchmarks"))]
+mod bench {
+ use test_std::Bencher;
+ use super::LsbWriter;
+ #[bench]
+ fn bit_writer(b: &mut Bencher) {
+ let input = [
+ (3, 3),
+ (10, 8),
+ (88, 7),
+ (0, 2),
+ (0, 5),
+ (0, 0),
+ (238, 8),
+ (126, 8),
+ (161, 8),
+ (10, 8),
+ (238, 8),
+ (174, 8),
+ (126, 8),
+ (174, 8),
+ (65, 8),
+ (142, 8),
+ (62, 8),
+ (10, 8),
+ (1, 8),
+ (161, 8),
+ (78, 8),
+ (62, 8),
+ (158, 8),
+ (206, 8),
+ (10, 8),
+ (64, 7),
+ (0, 0),
+ (24, 5),
+ (0, 0),
+ (174, 8),
+ (126, 8),
+ (193, 8),
+ (174, 8),
+ ];
+ let mut writer = LsbWriter::new(Vec::with_capacity(100));
+ b.iter(|| for v in input.iter() {
+ let _ = writer.write_bits(v.0, v.1);
+ });
+ }
+}
diff --git a/third_party/rust/deflate/src/chained_hash_table.rs b/third_party/rust/deflate/src/chained_hash_table.rs
new file mode 100644
index 0000000000..fb2b0dfd6e
--- /dev/null
+++ b/third_party/rust/deflate/src/chained_hash_table.rs
@@ -0,0 +1,374 @@
+//use deflate_state::DebugCounter;
+use std::{mem, ptr};
+
+pub const WINDOW_SIZE: usize = 32768;
+pub const WINDOW_MASK: usize = WINDOW_SIZE - 1;
+#[cfg(test)]
+pub const HASH_BYTES: usize = 3;
+const HASH_SHIFT: u16 = 5;
+const HASH_MASK: u16 = WINDOW_MASK as u16;
+
+/// Helper struct to let us allocate both head and prev in the same block.
+struct Tables {
+ /// Starts of hash chains (in prev)
+ pub head: [u16; WINDOW_SIZE],
+ /// Link to previous occurence of this hash value
+ pub prev: [u16; WINDOW_SIZE],
+}
+
+impl Default for Tables {
+ #[inline]
+ fn default() -> Tables {
+ // # Unsafe
+ // This struct is not public and is only used in this module, and
+ // the values are immediately filled in after this struct is
+ // created.
+ unsafe {
+ Tables {
+ head: mem::uninitialized(),
+ prev: mem::uninitialized(),
+ }
+ }
+ }
+}
+
+impl Tables {
+ fn fill_prev(&mut self) {
+ assert_eq!(self.head.len(), self.prev.len());
+ // # Unsafe
+ //
+ // The arrays are created with the same length statically, so this should be safe.
+ // We use this rather than copy_from_slice as prev starts out unitialized.
+ unsafe {
+ ptr::copy_nonoverlapping(self.head.as_ptr(), self.prev.as_mut_ptr(), self.prev.len())
+ }
+ }
+}
+
+/// Create and box the hash chains.
+fn create_tables() -> Box<Tables> {
+ // Using default here is a trick to get around the lack of box syntax on stable rust.
+ //
+ // Box::new([0u16,n]) ends up creating an temporary array on the stack which is not optimised
+ // but using default, which simply calls `box value` internally allows us to get around this.
+ //
+ // We could use vec instead, but using a boxed array helps the compiler optimise
+ // away bounds checks as `n & WINDOW_MASK < WINDOW_SIZE` will always be true.
+ let mut t: Box<Tables> = Box::default();
+
+ for (n, b) in t.head.iter_mut().enumerate() {
+ // # Unsafe
+ //
+ // Using ptr::write here since the values are uninitialised.
+ // u16 is a primitive and doesn't implement drop, so this would be safe either way.
+ unsafe {
+ ptr::write(b, n as u16);
+ }
+ }
+
+ t.fill_prev();
+
+ t
+}
+
+/// Returns a new hash value based on the previous value and the next byte
+#[inline]
+pub fn update_hash(current_hash: u16, to_insert: u8) -> u16 {
+ update_hash_conf(current_hash, to_insert, HASH_SHIFT, HASH_MASK)
+}
+
+#[inline]
+fn update_hash_conf(current_hash: u16, to_insert: u8, shift: u16, mask: u16) -> u16 {
+ ((current_hash << shift) ^ (to_insert as u16)) & mask
+}
+
+#[inline]
+fn reset_array(arr: &mut [u16; WINDOW_SIZE]) {
+ for (n, b) in arr.iter_mut().enumerate() {
+ *b = n as u16;
+ }
+}
+
+pub struct ChainedHashTable {
+ // Current running hash value of the last 3 bytes
+ current_hash: u16,
+ // Hash chains.
+ c: Box<Tables>,
+ // Used for testing
+ // count: DebugCounter,
+}
+
+impl ChainedHashTable {
+ pub fn new() -> ChainedHashTable {
+ ChainedHashTable {
+ current_hash: 0,
+ c: create_tables(),
+ //count: DebugCounter::default(),
+ }
+ }
+
+ #[cfg(test)]
+ pub fn from_starting_values(v1: u8, v2: u8) -> ChainedHashTable {
+ let mut t = ChainedHashTable::new();
+ t.current_hash = update_hash(t.current_hash, v1);
+ t.current_hash = update_hash(t.current_hash, v2);
+ t
+ }
+
+ /// Resets the hash value and hash chains
+ pub fn reset(&mut self) {
+ self.current_hash = 0;
+ reset_array(&mut self.c.head);
+ {
+ let h = self.c.head;
+ let mut c = self.c.prev;
+ c[..].copy_from_slice(&h[..]);
+ }
+ /*if cfg!(debug_assertions) {
+ self.count.reset();
+ }*/
+ }
+
+ pub fn add_initial_hash_values(&mut self, v1: u8, v2: u8) {
+ self.current_hash = update_hash(self.current_hash, v1);
+ self.current_hash = update_hash(self.current_hash, v2);
+ }
+
+ /// Insert a byte into the hash table
+ #[inline]
+ pub fn add_hash_value(&mut self, position: usize, value: u8) {
+ // Check that all bytes are input in order and at the correct positions.
+ // Disabled for now as it breaks when sync flushing.
+ /*debug_assert_eq!(
+ position & WINDOW_MASK,
+ self.count.get() as usize & WINDOW_MASK
+ );*/
+ debug_assert!(
+ position < WINDOW_SIZE * 2,
+ "Position is larger than 2 * window size! {}",
+ position
+ );
+ // Storing the hash in a temporary variable here makes the compiler avoid the
+ // bounds checks in this function.
+ let new_hash = update_hash(self.current_hash, value);
+
+ self.add_with_hash(position, new_hash);
+
+ // Update the stored hash value with the new hash.
+ self.current_hash = new_hash;
+ }
+
+ /// Directly set the current hash value
+ #[inline]
+ pub fn set_hash(&mut self, hash: u16) {
+ self.current_hash = hash;
+ }
+
+ /// Update the tables directly, providing the hash.
+ #[inline]
+ pub fn add_with_hash(&mut self, position: usize, hash: u16) {
+ /*if cfg!(debug_assertions) {
+ self.count.add(1);
+ }*/
+
+ self.c.prev[position & WINDOW_MASK] = self.c.head[hash as usize];
+
+ // Ignoring any bits over 16 here is deliberate, as we only concern ourselves about
+ // where in the buffer (which is 64k bytes) we are referring to.
+ self.c.head[hash as usize] = position as u16;
+ }
+
+ // Get the head of the hash chain for the current hash value
+ #[cfg(test)]
+ #[inline]
+ pub fn current_head(&self) -> u16 {
+ self.c.head[self.current_hash as usize]
+ }
+
+ #[inline]
+ pub fn current_hash(&self) -> u16 {
+ self.current_hash
+ }
+
+ #[inline]
+ pub fn get_prev(&self, bytes: usize) -> u16 {
+ self.c.prev[bytes & WINDOW_MASK]
+ }
+
+ #[cfg(test)]
+ #[inline]
+ pub fn farthest_next(&self, match_pos: usize, match_len: usize) -> usize {
+ let to_check = match_len.saturating_sub(2);
+
+ let mut n = 0;
+ let mut smallest_prev =
+ self.get_prev(match_pos);
+ let mut smallest_pos = 0;
+ while n < to_check {
+ let prev =
+ self.get_prev(match_pos + n);
+ if prev < smallest_prev {
+ smallest_prev = prev;
+ smallest_pos = n;
+ }
+ n += 1;
+ }
+ smallest_pos
+ }
+
+ #[inline]
+ fn slide_value(b: u16, pos: u16, bytes: u16) -> u16 {
+ if b >= bytes {
+ b - bytes
+ } else {
+ pos
+ }
+ }
+
+ #[inline]
+ fn slide_table(table: &mut [u16; WINDOW_SIZE], bytes: u16) {
+ for (n, b) in table.iter_mut().enumerate() {
+ *b = ChainedHashTable::slide_value(*b, n as u16, bytes);
+ }
+ }
+
+ pub fn slide(&mut self, bytes: usize) {
+ /*if cfg!(debug_assertions) && bytes != WINDOW_SIZE {
+ // This should only happen in tests in this file.
+ self.count.reset();
+ }*/
+ ChainedHashTable::slide_table(&mut self.c.head, bytes as u16);
+ ChainedHashTable::slide_table(&mut self.c.prev, bytes as u16);
+ }
+}
+
+#[cfg(test)]
+pub fn filled_hash_table(data: &[u8]) -> ChainedHashTable {
+ assert!(data.len() <= (WINDOW_SIZE * 2) + 2);
+ let mut hash_table = ChainedHashTable::from_starting_values(data[0], data[1]);
+ for (n, b) in data[2..].iter().enumerate() {
+ hash_table.add_hash_value(n, *b);
+ }
+ hash_table
+}
+
+#[cfg(test)]
+mod test {
+ use super::{filled_hash_table, ChainedHashTable};
+
+ #[test]
+ fn chained_hash() {
+ use std::str;
+
+ let test_string = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do \
+ eiusmod tempor. rum. incididunt ut labore et dolore magna aliqua. Ut \
+ enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi \
+ ut aliquip ex ea commodo consequat. rum. Duis aute irure dolor in \
+ reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla \
+ pariatur. Excepteur sint occaecat cupidatat non proident, sunt in \
+ culpa qui officia deserunt mollit anim id est laborum.";
+
+ let test_data = test_string.as_bytes();
+
+ let current_bytes = &test_data[test_data.len() - super::HASH_BYTES..test_data.len()];
+
+ let num_iters = test_string
+ .matches(str::from_utf8(current_bytes).unwrap())
+ .count();
+
+ let hash_table = filled_hash_table(test_data);
+
+ // Test that the positions in the chain are valid
+ let mut prev_value = hash_table.get_prev(hash_table.current_head() as usize) as usize;
+ let mut count = 0;
+ let mut current = hash_table.current_head() as usize;
+ while current != prev_value {
+ count += 1;
+ current = prev_value;
+ prev_value = hash_table.get_prev(prev_value) as usize;
+ }
+ // There should be at least as many occurences of the hash of the checked bytes as the
+ // numbers of occurences of the checked bytes themselves. As the hashes are not large enough
+ // to store 8 * 3 = 24 bits, there could be more with different input data.
+ assert!(count >= num_iters);
+ }
+
+ #[test]
+ fn table_unique() {
+ let mut test_data = Vec::new();
+ test_data.extend(0u8..255);
+ test_data.extend(255u8..0);
+ let hash_table = filled_hash_table(&test_data);
+ let prev_pos = hash_table.get_prev(hash_table.current_head() as usize);
+ // Since all sequences in the input are unique, there shouldn't be any previous values.
+ assert_eq!(prev_pos, hash_table.current_hash());
+ }
+
+ #[test]
+ fn table_slide() {
+ use std::fs::File;
+ use std::io::Read;
+
+ let window_size = super::WINDOW_SIZE;
+ let window_size16 = super::WINDOW_SIZE as u16;
+
+ let mut input = Vec::new();
+
+ let mut f = File::open("tests/pg11.txt").unwrap();
+
+ f.read_to_end(&mut input).unwrap();
+
+ let mut hash_table = filled_hash_table(&input[..window_size + 2]);
+
+ for (n, b) in input[2..window_size + 2].iter().enumerate() {
+ hash_table.add_hash_value(n + window_size, *b);
+ }
+
+ hash_table.slide(window_size);
+
+ {
+ let max_head = hash_table.c.head.iter().max().unwrap();
+ // After sliding there should be no hashes referring to values
+ // higher than the window size
+ assert!(*max_head < window_size16);
+ assert!(*max_head > 0);
+ let pos = hash_table.get_prev(hash_table.current_head() as usize);
+ // There should be a previous occurence since we inserted the data 3 times
+ assert!(pos < window_size16);
+ assert!(pos > 0);
+ }
+
+ for (n, b) in input[2..(window_size / 2)].iter().enumerate() {
+ hash_table.add_hash_value(n + window_size, *b);
+ }
+
+ // There should hashes referring to values in the upper part of the input window
+ // at this point
+ let max_prev = hash_table.c.prev.iter().max().unwrap();
+ assert!(*max_prev > window_size16);
+
+ let mut pos = hash_table.current_head();
+ // There should be a previous occurence since we inserted the data 3 times
+ assert!(pos > window_size16);
+ let end_byte = input[(window_size / 2) - 1 - 2];
+ let mut iterations = 0;
+ while pos > window_size16 && iterations < 5000 {
+ assert_eq!(input[pos as usize & window_size - 1], end_byte);
+
+ pos = hash_table.get_prev(pos as usize);
+ iterations += 1;
+ }
+ }
+
+ #[test]
+ /// Ensure that the initial hash values are correct.
+ fn initial_chains() {
+ let t = ChainedHashTable::new();
+ for (n, &b) in t.c.head.iter().enumerate() {
+ assert_eq!(n, b as usize);
+ }
+ for (n, &b) in t.c.prev.iter().enumerate() {
+ assert_eq!(n, b as usize);
+ }
+ }
+}
diff --git a/third_party/rust/deflate/src/checksum.rs b/third_party/rust/deflate/src/checksum.rs
new file mode 100644
index 0000000000..e1885a210f
--- /dev/null
+++ b/third_party/rust/deflate/src/checksum.rs
@@ -0,0 +1,72 @@
+use adler32::RollingAdler32;
+
+pub trait RollingChecksum {
+ fn update(&mut self, byte: u8);
+ fn update_from_slice(&mut self, data: &[u8]);
+ fn current_hash(&self) -> u32;
+}
+
+pub struct NoChecksum {}
+
+impl NoChecksum {
+ pub fn new() -> NoChecksum {
+ NoChecksum {}
+ }
+}
+
+impl RollingChecksum for NoChecksum {
+ fn update(&mut self, _: u8) {}
+ fn update_from_slice(&mut self, _: &[u8]) {}
+ fn current_hash(&self) -> u32 {
+ 1
+ }
+}
+
+impl<'a> RollingChecksum for &'a mut NoChecksum {
+ fn update(&mut self, _: u8) {}
+ fn update_from_slice(&mut self, _: &[u8]) {}
+ fn current_hash(&self) -> u32 {
+ 1
+ }
+}
+
+pub struct Adler32Checksum {
+ adler32: RollingAdler32,
+}
+
+impl Adler32Checksum {
+ pub fn new() -> Adler32Checksum {
+ Adler32Checksum {
+ adler32: RollingAdler32::new(),
+ }
+ }
+}
+
+impl RollingChecksum for Adler32Checksum {
+ fn update(&mut self, byte: u8) {
+ self.adler32.update(byte);
+ }
+
+ fn update_from_slice(&mut self, data: &[u8]) {
+ self.adler32.update_buffer(data);
+ }
+
+ fn current_hash(&self) -> u32 {
+ self.adler32.hash()
+ }
+}
+
+
+impl<'a> RollingChecksum for &'a mut Adler32Checksum {
+ fn update(&mut self, byte: u8) {
+ self.adler32.update(byte);
+ }
+
+ fn update_from_slice(&mut self, data: &[u8]) {
+ self.adler32.update_buffer(data);
+ }
+
+ fn current_hash(&self) -> u32 {
+ self.adler32.hash()
+ }
+}
diff --git a/third_party/rust/deflate/src/compress.rs b/third_party/rust/deflate/src/compress.rs
new file mode 100644
index 0000000000..0cfee6e89b
--- /dev/null
+++ b/third_party/rust/deflate/src/compress.rs
@@ -0,0 +1,366 @@
+use std::io::Write;
+use std::io;
+
+use deflate_state::DeflateState;
+use encoder_state::EncoderState;
+use lzvalue::LZValue;
+use lz77::{lz77_compress_block, LZ77Status};
+use huffman_lengths::{gen_huffman_lengths, write_huffman_lengths, BlockType};
+use bitstream::LsbWriter;
+use stored_block::{compress_block_stored, write_stored_header, MAX_STORED_BLOCK_LENGTH};
+
+const LARGEST_OUTPUT_BUF_SIZE: usize = 1024 * 32;
+
+/// Flush mode to use when compressing input received in multiple steps.
+///
+/// (The more obscure ZLIB flush modes are not implemented.)
+#[derive(Eq, PartialEq, Debug, Copy, Clone)]
+pub enum Flush {
+ // Simply wait for more input when we are out of input data to process.
+ None,
+ // Send a "sync block", corresponding to Z_SYNC_FLUSH in zlib. This finishes compressing and
+ // outputting all pending data, and then outputs an empty stored block.
+ // (That is, the block header indicating a stored block followed by `0000FFFF`).
+ Sync,
+ _Partial,
+ _Block,
+ _Full,
+ // Finish compressing and output all remaining input.
+ Finish,
+}
+
+/// Write all the lz77 encoded data in the buffer using the specified `EncoderState`, and finish
+/// with the end of block code.
+pub fn flush_to_bitstream(buffer: &[LZValue], state: &mut EncoderState) {
+ for &b in buffer {
+ state.write_lzvalue(b.value());
+ }
+ state.write_end_of_block()
+}
+
+/// Compress the input data using only fixed huffman codes.
+///
+/// Currently only used in tests.
+#[cfg(test)]
+pub fn compress_data_fixed(input: &[u8]) -> Vec<u8> {
+ use lz77::lz77_compress;
+
+ let mut state = EncoderState::fixed(Vec::new());
+ let compressed = lz77_compress(input).unwrap();
+
+ // We currently don't split blocks here(this function is just used for tests anyhow)
+ state.write_start_of_block(true, true);
+ flush_to_bitstream(&compressed, &mut state);
+
+ state.flush();
+ state.reset(Vec::new())
+}
+
+fn write_stored_block(input: &[u8], mut writer: &mut LsbWriter, final_block: bool) {
+
+ // If the input is not zero, we write stored blocks for the input data.
+ if !input.is_empty() {
+ let mut i = input.chunks(MAX_STORED_BLOCK_LENGTH).peekable();
+
+ while let Some(chunk) = i.next() {
+ let last_chunk = i.peek().is_none();
+ // Write the block header
+ write_stored_header(writer, final_block && last_chunk);
+
+ // Write the actual data.
+ compress_block_stored(chunk, &mut writer).expect("Write error");
+
+ }
+ } else {
+ // If the input length is zero, we output an empty block. This is used for syncing.
+ write_stored_header(writer, final_block);
+ compress_block_stored(&[], &mut writer).expect("Write error");
+ }
+}
+
+/// Inner compression function used by both the writers and the simple compression functions.
+pub fn compress_data_dynamic_n<W: Write>(
+ input: &[u8],
+ deflate_state: &mut DeflateState<W>,
+ flush: Flush,
+) -> io::Result<usize> {
+ let mut bytes_written = 0;
+
+ let mut slice = input;
+
+ loop {
+ let output_buf_len = deflate_state.output_buf().len();
+ let output_buf_pos = deflate_state.output_buf_pos;
+ // If the output buffer has too much data in it already, flush it before doing anything
+ // else.
+ if output_buf_len > LARGEST_OUTPUT_BUF_SIZE {
+ let written = deflate_state
+ .inner
+ .as_mut()
+ .expect("Missing writer!")
+ .write(&deflate_state.encoder_state.inner_vec()[output_buf_pos..])?;
+
+ if written < output_buf_len.checked_sub(output_buf_pos).unwrap() {
+ // Only some of the data was flushed, so keep track of where we were.
+ deflate_state.output_buf_pos += written;
+ } else {
+ // If we flushed all of the output, reset the output buffer.
+ deflate_state.output_buf_pos = 0;
+ deflate_state.output_buf().clear();
+ }
+
+ if bytes_written == 0 {
+ // If the buffer was already full when the function was called, this has to be
+ // returned rather than Ok(0) to indicate that we didn't write anything, but are
+ // not done yet.
+ return Err(io::Error::new(
+ io::ErrorKind::Interrupted,
+ "Internal buffer full.",
+ ));
+ } else {
+ return Ok(bytes_written);
+ }
+ }
+
+ if deflate_state.lz77_state.is_last_block() {
+ // The last block has already been written, so we don't ave anything to compress.
+ break;
+ }
+
+ let (written, status, position) = lz77_compress_block(
+ slice,
+ &mut deflate_state.lz77_state,
+ &mut deflate_state.input_buffer,
+ &mut deflate_state.lz77_writer,
+ flush,
+ );
+
+ // Bytes written in this call
+ bytes_written += written;
+ // Total bytes written since the compression process started
+ // TODO: Should we realistically have to worry about overflowing here?
+ deflate_state.bytes_written += written as u64;
+
+ if status == LZ77Status::NeedInput {
+ // If we've consumed all the data input so far, and we're not
+ // finishing or syncing or ending the block here, simply return
+ // the number of bytes consumed so far.
+ return Ok(bytes_written);
+ }
+
+ // Increment start of input data
+ slice = &slice[written..];
+
+ // We need to check if this is the last block as the header will then be
+ // slightly different to indicate this.
+ let last_block = deflate_state.lz77_state.is_last_block();
+
+ let current_block_input_bytes = deflate_state.lz77_state.current_block_input_bytes();
+
+ if cfg!(debug_assertions) {
+ deflate_state
+ .bytes_written_control
+ .add(current_block_input_bytes);
+ }
+
+ let partial_bits = deflate_state.encoder_state.writer.pending_bits();
+
+ let res = {
+ let (l_freqs, d_freqs) = deflate_state.lz77_writer.get_frequencies();
+ let (l_lengths, d_lengths) =
+ deflate_state.encoder_state.huffman_table.get_lengths_mut();
+
+ gen_huffman_lengths(
+ l_freqs,
+ d_freqs,
+ current_block_input_bytes,
+ partial_bits,
+ l_lengths,
+ d_lengths,
+ &mut deflate_state.length_buffers,
+ )
+ };
+
+ // Check if we've actually managed to compress the input, and output stored blocks
+ // if not.
+ match res {
+ BlockType::Dynamic(header) => {
+ // Write the block header.
+ deflate_state
+ .encoder_state
+ .write_start_of_block(false, last_block);
+
+ // Output the lengths of the huffman codes used in this block.
+ write_huffman_lengths(
+ &header,
+ &deflate_state.encoder_state.huffman_table,
+ &mut deflate_state.length_buffers.length_buf,
+ &mut deflate_state.encoder_state.writer,
+ );
+
+ // Uupdate the huffman codes that will be used to encode the
+ // lz77-compressed data.
+ deflate_state
+ .encoder_state
+ .huffman_table
+ .update_from_lengths();
+
+
+ // Write the huffman compressed data and the end of block marker.
+ flush_to_bitstream(
+ deflate_state.lz77_writer.get_buffer(),
+ &mut deflate_state.encoder_state,
+ );
+ }
+ BlockType::Fixed => {
+ // Write the block header for fixed code blocks.
+ deflate_state
+ .encoder_state
+ .write_start_of_block(true, last_block);
+
+ // Use the pre-defined static huffman codes.
+ deflate_state.encoder_state.set_huffman_to_fixed();
+
+ // Write the compressed data and the end of block marker.
+ flush_to_bitstream(
+ deflate_state.lz77_writer.get_buffer(),
+ &mut deflate_state.encoder_state,
+ );
+ }
+ BlockType::Stored => {
+ // If compression fails, output a stored block instead.
+
+ let start_pos = position.saturating_sub(current_block_input_bytes as usize);
+
+ assert!(
+ position >= current_block_input_bytes as usize,
+ "Error! Trying to output a stored block with forgotten data!\
+ if you encounter this error, please file an issue!"
+ );
+
+ write_stored_block(
+ &deflate_state.input_buffer.get_buffer()[start_pos..position],
+ &mut deflate_state.encoder_state.writer,
+ flush == Flush::Finish && last_block,
+ );
+ }
+ };
+
+ // Clear the current lz77 data in the writer for the next call.
+ deflate_state.lz77_writer.clear();
+ // We are done with the block, so we reset the number of bytes taken
+ // for the next one.
+ deflate_state.lz77_state.reset_input_bytes();
+
+ // We are done for now.
+ if status == LZ77Status::Finished {
+ // This flush mode means that there should be an empty stored block at the end.
+ if flush == Flush::Sync {
+ write_stored_block(&[], &mut deflate_state.encoder_state.writer, false);
+ } else if !deflate_state.lz77_state.is_last_block() {
+ // Make sure a block with the last block header has been output.
+ // Not sure this can actually happen, but we make sure to finish properly
+ // if it somehow does.
+ // An empty fixed block is the shortest.
+ let es = &mut deflate_state.encoder_state;
+ es.set_huffman_to_fixed();
+ es.write_start_of_block(true, true);
+ es.write_end_of_block();
+ }
+ break;
+ }
+ }
+
+ // If we reach this point, the remaining data in the buffers is to be flushed.
+ deflate_state.encoder_state.flush();
+ // Make sure we've output everything, and return the number of bytes written if everything
+ // went well.
+ let output_buf_pos = deflate_state.output_buf_pos;
+ let written_to_writer = deflate_state
+ .inner
+ .as_mut()
+ .expect("Missing writer!")
+ .write(&deflate_state.encoder_state.inner_vec()[output_buf_pos..])?;
+ if written_to_writer <
+ deflate_state
+ .output_buf()
+ .len()
+ .checked_sub(output_buf_pos)
+ .unwrap()
+ {
+ deflate_state.output_buf_pos += written_to_writer;
+ } else {
+ // If we sucessfully wrote all the data, we can clear the output buffer.
+ deflate_state.output_buf_pos = 0;
+ deflate_state.output_buf().clear();
+ }
+ Ok(bytes_written)
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use test_utils::{get_test_data, decompress_to_end};
+
+ #[test]
+ /// Test compressing a short string using fixed encoding.
+ fn fixed_string_mem() {
+ let test_data = String::from(" GNU GENERAL PUBLIC LICENSE").into_bytes();
+ let compressed = compress_data_fixed(&test_data);
+
+ let result = decompress_to_end(&compressed);
+
+ assert_eq!(test_data, result);
+ }
+
+ #[test]
+ fn fixed_data() {
+ let data = vec![190u8; 400];
+ let compressed = compress_data_fixed(&data);
+ let result = decompress_to_end(&compressed);
+
+ assert_eq!(data, result);
+ }
+
+ /// Test deflate example.
+ ///
+ /// Check if the encoder produces the same code as the example given by Mark Adler here:
+ /// https://stackoverflow.com/questions/17398931/deflate-encoding-with-static-huffman-codes/17415203
+ #[test]
+ fn fixed_example() {
+ let test_data = b"Deflate late";
+ // let check =
+ // [0x73, 0x49, 0x4d, 0xcb, 0x49, 0x2c, 0x49, 0x55, 0xc8, 0x49, 0x2c, 0x49, 0x5, 0x0];
+ let check = [
+ 0x73,
+ 0x49,
+ 0x4d,
+ 0xcb,
+ 0x49,
+ 0x2c,
+ 0x49,
+ 0x55,
+ 0x00,
+ 0x11,
+ 0x00,
+ ];
+ let compressed = compress_data_fixed(test_data);
+ assert_eq!(&compressed, &check);
+ let decompressed = decompress_to_end(&compressed);
+ assert_eq!(&decompressed, test_data)
+ }
+
+ #[test]
+ /// Test compression from a file.
+ fn fixed_string_file() {
+ let input = get_test_data();
+
+ let compressed = compress_data_fixed(&input);
+ println!("Fixed codes compressed len: {}", compressed.len());
+ let result = decompress_to_end(&compressed);
+
+ assert_eq!(input.len(), result.len());
+ // Not using assert_eq here deliberately to avoid massive amounts of output spam.
+ assert!(input == result);
+ }
+}
diff --git a/third_party/rust/deflate/src/compression_options.rs b/third_party/rust/deflate/src/compression_options.rs
new file mode 100644
index 0000000000..6e823fa2cf
--- /dev/null
+++ b/third_party/rust/deflate/src/compression_options.rs
@@ -0,0 +1,196 @@
+//! This module contains the various options to tweak how compression is performed.
+//!
+//! Note that due to the nature of the `DEFLATE` format, lower compression levels
+//! may for some data compress better than higher compression levels.
+//!
+//! For applications where a maximum level of compression (irrespective of compression
+//! speed) is required, consider using the [`Zopfli`](https://crates.io/crates/zopfli)
+//! compressor, which uses a specialised (but slow) algorithm to figure out the maximum
+//! of compression for the provided data.
+//!
+use lz77::MatchingType;
+use std::convert::From;
+
+pub const HIGH_MAX_HASH_CHECKS: u16 = 1768;
+pub const HIGH_LAZY_IF_LESS_THAN: u16 = 128;
+/// The maximum number of hash checks that make sense as this is the length
+/// of the hash chain.
+pub const MAX_HASH_CHECKS: u16 = 32 * 1024;
+pub const DEFAULT_MAX_HASH_CHECKS: u16 = 128;
+pub const DEFAULT_LAZY_IF_LESS_THAN: u16 = 32;
+
+/// An enum describing the level of compression to be used by the encoder
+///
+/// Higher compression ratios will take longer to encode.
+///
+/// This is a simplified interface to specify a compression level.
+///
+/// [See also `CompressionOptions`](./struct.CompressionOptions.html) which provides for
+/// tweaking the settings more finely.
+#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
+pub enum Compression {
+ /// Fast minimal compression (`CompressionOptions::fast()`).
+ Fast,
+ /// Default level (`CompressionOptions::default()`).
+ Default,
+ /// Higher compression level (`CompressionOptions::high()`).
+ ///
+ /// Best in this context isn't actually the highest possible level
+ /// the encoder can do, but is meant to emulate the `Best` setting in the `Flate2`
+ /// library.
+ Best,
+}
+
+impl Default for Compression {
+ fn default() -> Compression {
+ Compression::Default
+ }
+}
+
+/// Enum allowing some special options (not implemented yet)!
+#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
+pub enum SpecialOptions {
+ /// Compress normally.
+ Normal,
+ /// Force fixed huffman tables. (Unimplemented!).
+ _ForceFixed,
+ /// Force stored (uncompressed) blocks only. (Unimplemented!).
+ _ForceStored,
+}
+
+impl Default for SpecialOptions {
+ fn default() -> SpecialOptions {
+ SpecialOptions::Normal
+ }
+}
+
+pub const DEFAULT_OPTIONS: CompressionOptions = CompressionOptions {
+ max_hash_checks: DEFAULT_MAX_HASH_CHECKS,
+ lazy_if_less_than: DEFAULT_LAZY_IF_LESS_THAN,
+ matching_type: MatchingType::Lazy,
+ special: SpecialOptions::Normal,
+};
+
+/// A struct describing the options for a compressor or compression function.
+///
+/// These values are not stable and still subject to change!
+#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
+pub struct CompressionOptions {
+ /// The maximum number of checks to make in the hash table for matches.
+ ///
+ /// Higher numbers mean slower, but better compression. Very high (say `>1024`) values
+ /// will impact compression speed a lot. The maximum match length is 2^15, so values higher than
+ /// this won't make any difference, and will be truncated to 2^15 by the compression
+ /// function/writer.
+ ///
+ /// Default value: `128`
+ pub max_hash_checks: u16,
+ // pub _window_size: u16,
+ /// Only lazy match if we have a length less than this value.
+ ///
+ /// Higher values degrade compression slightly, but improve compression speed.
+ ///
+ /// * `0`: Never lazy match. (Same effect as setting MatchingType to greedy, but may be slower).
+ /// * `1...257`: Only check for a better match if the first match was shorter than this value.
+ /// * `258`: Always lazy match.
+ ///
+ /// As the maximum length of a match is `258`, values higher than this will have
+ /// no further effect.
+ ///
+ /// * Default value: `32`
+ pub lazy_if_less_than: u16,
+
+ // pub _decent_match: u16,
+ /// Whether to use lazy or greedy matching.
+ ///
+ /// Lazy matching will provide better compression, at the expense of compression speed.
+ ///
+ /// As a special case, if max_hash_checks is set to 0, and matching_type is set to lazy,
+ /// compression using only run-length encoding (i.e maximum match distance of 1) is performed.
+ /// (This may be changed in the future but is defined like this at the moment to avoid API
+ /// breakage.
+ ///
+ /// [See `MatchingType`](./enum.MatchingType.html)
+ ///
+ /// * Default value: `MatchingType::Lazy`
+ pub matching_type: MatchingType,
+ /// Force fixed/stored blocks (Not implemented yet).
+ /// * Default value: `SpecialOptions::Normal`
+ pub special: SpecialOptions,
+}
+
+// Some standard profiles for the compression options.
+// Ord should be implemented at some point, but won't yet until the struct is stabilised.
+impl CompressionOptions {
+ /// Returns compression settings rouhgly corresponding to the `HIGH(9)` setting in miniz.
+ pub fn high() -> CompressionOptions {
+ CompressionOptions {
+ max_hash_checks: HIGH_MAX_HASH_CHECKS,
+ lazy_if_less_than: HIGH_LAZY_IF_LESS_THAN,
+ matching_type: MatchingType::Lazy,
+ special: SpecialOptions::Normal,
+ }
+ }
+
+ /// Returns a fast set of compression settings
+ ///
+ /// Ideally this should roughly correspond to the `FAST(1)` setting in miniz.
+ /// However, that setting makes miniz use a somewhat different algorhithm,
+ /// so currently hte fast level in this library is slower and better compressing
+ /// than the corresponding level in miniz.
+ pub fn fast() -> CompressionOptions {
+ CompressionOptions {
+ max_hash_checks: 1,
+ lazy_if_less_than: 0,
+ matching_type: MatchingType::Greedy,
+ special: SpecialOptions::Normal,
+ }
+ }
+
+ /// Returns a set of compression settings that makes the compressor only compress using
+ /// huffman coding. (Ignoring any length/distance matching)
+ ///
+ /// This will normally have the worst compression ratio (besides only using uncompressed data),
+ /// but may be the fastest method in some cases.
+ pub fn huffman_only() -> CompressionOptions {
+ CompressionOptions {
+ max_hash_checks: 0,
+ lazy_if_less_than: 0,
+ matching_type: MatchingType::Greedy,
+ special: SpecialOptions::Normal,
+ }
+ }
+
+ /// Returns a set of compression settings that makes the compressor compress only using
+ /// run-length encoding (i.e only looking for matches one byte back).
+ ///
+ /// This is very fast, but tends to compress worse than looking for more matches using hash
+ /// chains that the slower settings do.
+ /// Works best on data that has runs of equivialent bytes, like binary or simple images,
+ /// less good for text.
+ pub fn rle() -> CompressionOptions {
+ CompressionOptions {
+ max_hash_checks: 0,
+ lazy_if_less_than: 0,
+ matching_type: MatchingType::Lazy,
+ special: SpecialOptions::Normal,
+ }
+ }
+}
+
+impl Default for CompressionOptions {
+ /// Returns the options describing the default compression level.
+ fn default() -> CompressionOptions {
+ DEFAULT_OPTIONS
+ }
+}
+
+impl From<Compression> for CompressionOptions {
+ fn from(compression: Compression) -> CompressionOptions {
+ match compression {
+ Compression::Fast => CompressionOptions::fast(),
+ Compression::Default => CompressionOptions::default(),
+ Compression::Best => CompressionOptions::high(),
+ }
+ }
+}
diff --git a/third_party/rust/deflate/src/deflate_state.rs b/third_party/rust/deflate/src/deflate_state.rs
new file mode 100644
index 0000000000..28d18f1449
--- /dev/null
+++ b/third_party/rust/deflate/src/deflate_state.rs
@@ -0,0 +1,145 @@
+use std::io::Write;
+use std::{io, mem, cmp};
+
+use lz77::LZ77State;
+use output_writer::DynamicWriter;
+use encoder_state::EncoderState;
+use input_buffer::InputBuffer;
+use compression_options::{CompressionOptions, MAX_HASH_CHECKS};
+use compress::Flush;
+use length_encode::{LeafVec, EncodedLength};
+use huffman_table::NUM_LITERALS_AND_LENGTHS;
+pub use huffman_table::MAX_MATCH;
+
+/// A counter used for checking values in debug mode.
+/// Does nothing when debug assertions are disabled.
+#[derive(Default)]
+pub struct DebugCounter {
+ #[cfg(debug_assertions)]
+ count: u64,
+}
+
+impl DebugCounter {
+ #[cfg(debug_assertions)]
+ pub fn get(&self) -> u64 {
+ self.count
+ }
+
+ #[cfg(not(debug_assertions))]
+ pub fn get(&self) -> u64 {
+ 0
+ }
+
+ #[cfg(debug_assertions)]
+ pub fn reset(&mut self) {
+ self.count = 0;
+ }
+
+ #[cfg(not(debug_assertions))]
+ pub fn reset(&self) {}
+
+ #[cfg(debug_assertions)]
+ pub fn add(&mut self, val: u64) {
+ self.count += val;
+ }
+
+ #[cfg(not(debug_assertions))]
+ pub fn add(&self, _: u64) {}
+}
+
+pub struct LengthBuffers {
+ pub leaf_buf: LeafVec,
+ pub length_buf: Vec<EncodedLength>,
+}
+
+impl LengthBuffers {
+ #[inline]
+ fn new() -> LengthBuffers {
+ LengthBuffers {
+ leaf_buf: Vec::with_capacity(NUM_LITERALS_AND_LENGTHS),
+ length_buf: Vec::with_capacity(19),
+ }
+ }
+}
+
+/// A struct containing all the stored state used for the encoder.
+pub struct DeflateState<W: Write> {
+ /// State of lz77 compression.
+ pub lz77_state: LZ77State,
+ pub input_buffer: InputBuffer,
+ pub compression_options: CompressionOptions,
+ /// State the huffman part of the compression and the output buffer.
+ pub encoder_state: EncoderState,
+ /// The buffer containing the raw output of the lz77-encoding.
+ pub lz77_writer: DynamicWriter,
+ /// Buffers used when generating huffman code lengths.
+ pub length_buffers: LengthBuffers,
+ /// Total number of bytes consumed/written to the input buffer.
+ pub bytes_written: u64,
+ /// Wrapped writer.
+ /// Option is used to allow us to implement `Drop` and `finish()` at the same time for the
+ /// writer structs.
+ pub inner: Option<W>,
+ /// The position in the output buffer where data should be flushed from, to keep track of
+ /// what data has been output in case not all data is output when writing to the wrapped
+ /// writer.
+ pub output_buf_pos: usize,
+ pub flush_mode: Flush,
+ /// Number of bytes written as calculated by sum of block input lengths.
+ /// Used to check that they are correct when `debug_assertions` are enabled.
+ pub bytes_written_control: DebugCounter,
+}
+
+impl<W: Write> DeflateState<W> {
+ pub fn new(compression_options: CompressionOptions, writer: W) -> DeflateState<W> {
+ DeflateState {
+ input_buffer: InputBuffer::empty(),
+ lz77_state: LZ77State::new(
+ compression_options.max_hash_checks,
+ cmp::min(compression_options.lazy_if_less_than, MAX_HASH_CHECKS),
+ compression_options.matching_type,
+ ),
+ encoder_state: EncoderState::new(Vec::with_capacity(1024 * 32)),
+ lz77_writer: DynamicWriter::new(),
+ length_buffers: LengthBuffers::new(),
+ compression_options: compression_options,
+ bytes_written: 0,
+ inner: Some(writer),
+ output_buf_pos: 0,
+ flush_mode: Flush::None,
+ bytes_written_control: DebugCounter::default(),
+ }
+ }
+
+ #[inline]
+ pub fn output_buf(&mut self) -> &mut Vec<u8> {
+ self.encoder_state.inner_vec()
+ }
+
+ /// Resets the status of the decoder, leaving the compression options intact
+ ///
+ /// If flushing the current writer succeeds, it is replaced with the provided one,
+ /// buffers and status (except compression options) is reset and the old writer
+ /// is returned.
+ ///
+ /// If flushing fails, the rest of the writer is not cleared.
+ pub fn reset(&mut self, writer: W) -> io::Result<W> {
+ self.encoder_state.flush();
+ self.inner
+ .as_mut()
+ .expect("Missing writer!")
+ .write_all(self.encoder_state.inner_vec())?;
+ self.encoder_state.inner_vec().clear();
+ self.input_buffer = InputBuffer::empty();
+ self.lz77_writer.clear();
+ self.lz77_state.reset();
+ self.bytes_written = 0;
+ self.output_buf_pos = 0;
+ self.flush_mode = Flush::None;
+ if cfg!(debug_assertions) {
+ self.bytes_written_control.reset();
+ }
+ mem::replace(&mut self.inner, Some(writer))
+ .ok_or_else(|| io::Error::new(io::ErrorKind::Other, "Missing writer"))
+ }
+}
diff --git a/third_party/rust/deflate/src/encoder_state.rs b/third_party/rust/deflate/src/encoder_state.rs
new file mode 100644
index 0000000000..639b7c60da
--- /dev/null
+++ b/third_party/rust/deflate/src/encoder_state.rs
@@ -0,0 +1,130 @@
+#[cfg(test)]
+use std::mem;
+use huffman_table::HuffmanTable;
+use bitstream::LsbWriter;
+use lzvalue::LZType;
+
+// The first bits of each block, which describe the type of the block
+// `-TTF` - TT = type, 00 = stored, 01 = fixed, 10 = dynamic, 11 = reserved, F - 1 if final block
+// `0000`;
+const FIXED_FIRST_BYTE: u16 = 0b010;
+const FIXED_FIRST_BYTE_FINAL: u16 = 0b011;
+const DYNAMIC_FIRST_BYTE: u16 = 0b100;
+const DYNAMIC_FIRST_BYTE_FINAL: u16 = 0b101;
+
+#[allow(dead_code)]
+pub enum BType {
+ NoCompression = 0b00,
+ FixedHuffman = 0b01,
+ DynamicHuffman = 0b10, // Reserved = 0b11, //Error
+}
+
+/// A struct wrapping a writer that writes data compressed using the provided huffman table
+pub struct EncoderState {
+ pub huffman_table: HuffmanTable,
+ pub writer: LsbWriter,
+}
+
+impl EncoderState {
+ /// Creates a new encoder state using the provided huffman table and writer
+ pub fn new(writer: Vec<u8>) -> EncoderState {
+ EncoderState {
+ huffman_table: HuffmanTable::empty(),
+ writer: LsbWriter::new(writer),
+ }
+ }
+
+ #[cfg(test)]
+ /// Creates a new encoder state using the fixed huffman table
+ pub fn fixed(writer: Vec<u8>) -> EncoderState {
+ EncoderState {
+ huffman_table: HuffmanTable::fixed_table(),
+ writer: LsbWriter::new(writer),
+ }
+ }
+
+ pub fn inner_vec(&mut self) -> &mut Vec<u8> {
+ &mut self.writer.w
+ }
+
+ /// Encodes a literal value to the writer
+ fn write_literal(&mut self, value: u8) {
+ let code = self.huffman_table.get_literal(value);
+ debug_assert!(code.length > 0);
+ self.writer.write_bits(code.code, code.length);
+ }
+
+ /// Write a LZvalue to the contained writer, returning Err if the write operation fails
+ pub fn write_lzvalue(&mut self, value: LZType) {
+ match value {
+ LZType::Literal(l) => self.write_literal(l),
+ LZType::StoredLengthDistance(l, d) => {
+ let (code, extra_bits_code) = self.huffman_table.get_length_huffman(l);
+ debug_assert!(
+ code.length != 0,
+ format!("Code: {:?}, Value: {:?}", code, value)
+ );
+ self.writer.write_bits(code.code, code.length);
+ self.writer
+ .write_bits(extra_bits_code.code, extra_bits_code.length);
+
+ let (code, extra_bits_code) = self.huffman_table.get_distance_huffman(d);
+ debug_assert!(
+ code.length != 0,
+ format!("Code: {:?}, Value: {:?}", code, value)
+ );
+
+ self.writer.write_bits(code.code, code.length);
+ self.writer
+ .write_bits(extra_bits_code.code, extra_bits_code.length)
+ }
+ };
+ }
+
+ /// Write the start of a block, returning Err if the write operation fails.
+ pub fn write_start_of_block(&mut self, fixed: bool, final_block: bool) {
+ if final_block {
+ // The final block has one bit flipped to indicate it's
+ // the final one
+ if fixed {
+ self.writer.write_bits(FIXED_FIRST_BYTE_FINAL, 3)
+ } else {
+ self.writer.write_bits(DYNAMIC_FIRST_BYTE_FINAL, 3)
+ }
+ } else if fixed {
+ self.writer.write_bits(FIXED_FIRST_BYTE, 3)
+ } else {
+ self.writer.write_bits(DYNAMIC_FIRST_BYTE, 3)
+ }
+ }
+
+ /// Write the end of block code
+ pub fn write_end_of_block(&mut self) {
+ let code = self.huffman_table.get_end_of_block();
+ self.writer.write_bits(code.code, code.length)
+ }
+
+ /// Flush the contained writer and it's bitstream wrapper.
+ pub fn flush(&mut self) {
+ self.writer.flush_raw()
+ }
+
+ pub fn set_huffman_to_fixed(&mut self) {
+ self.huffman_table.set_to_fixed()
+ }
+
+ /// Reset the encoder state with a new writer, returning the old one if flushing
+ /// succeeds.
+ #[cfg(test)]
+ pub fn reset(&mut self, writer: Vec<u8>) -> Vec<u8> {
+ // Make sure the writer is flushed
+ // Ideally this should be done before this function is called, but we
+ // do it here just in case.
+ self.flush();
+ // Reset the huffman table
+ // This probably isn't needed, but again, we do it just in case to avoid leaking any data
+ // If this turns out to be a performance issue, it can probably be ignored later.
+ self.huffman_table = HuffmanTable::empty();
+ mem::replace(&mut self.writer.w, writer)
+ }
+}
diff --git a/third_party/rust/deflate/src/huffman_lengths.rs b/third_party/rust/deflate/src/huffman_lengths.rs
new file mode 100644
index 0000000000..f9e96169c3
--- /dev/null
+++ b/third_party/rust/deflate/src/huffman_lengths.rs
@@ -0,0 +1,396 @@
+use length_encode::{EncodedLength, encode_lengths_m, huffman_lengths_from_frequency_m,
+ COPY_PREVIOUS, REPEAT_ZERO_3_BITS, REPEAT_ZERO_7_BITS};
+use huffman_table::{HuffmanTable, create_codes_in_place, num_extra_bits_for_length_code,
+ num_extra_bits_for_distance_code, NUM_LITERALS_AND_LENGTHS,
+ NUM_DISTANCE_CODES, MAX_CODE_LENGTH, FIXED_CODE_LENGTHS, LENGTH_BITS_START};
+use bitstream::LsbWriter;
+use output_writer::FrequencyType;
+use stored_block::MAX_STORED_BLOCK_LENGTH;
+use deflate_state::LengthBuffers;
+
+use std::cmp;
+
+// The minimum number of literal/length values
+pub const MIN_NUM_LITERALS_AND_LENGTHS: usize = 257;
+// The minimum number of distances
+pub const MIN_NUM_DISTANCES: usize = 1;
+
+const NUM_HUFFMAN_LENGTHS: usize = 19;
+
+// The output ordering of the lengths for the huffman codes used to encode the lengths
+// used to build the full huffman tree for length/literal codes.
+// http://www.gzip.org/zlib/rfc-deflate.html#dyn
+const HUFFMAN_LENGTH_ORDER: [u8; NUM_HUFFMAN_LENGTHS] = [
+ 16,
+ 17,
+ 18,
+ 0,
+ 8,
+ 7,
+ 9,
+ 6,
+ 10,
+ 5,
+ 11,
+ 4,
+ 12,
+ 3,
+ 13,
+ 2,
+ 14,
+ 1,
+ 15,
+];
+
+// Number of bits used for the values specifying the number of codes
+const HLIT_BITS: u8 = 5;
+const HDIST_BITS: u8 = 5;
+const HCLEN_BITS: u8 = 4;
+
+// The longest a huffman code describing another huffman length can be
+const MAX_HUFFMAN_CODE_LENGTH: usize = 7;
+
+// How many bytes (not including padding and the 3-bit block type) the stored block header takes up.
+const STORED_BLOCK_HEADER_LENGTH: u64 = 4;
+const BLOCK_MARKER_LENGTH: u8 = 3;
+
+/// Creates a new slice from the input slice that stops at the final non-zero value
+pub fn remove_trailing_zeroes<T: From<u8> + PartialEq>(input: &[T], min_length: usize) -> &[T] {
+ let num_zeroes = input.iter().rev().take_while(|&a| *a == T::from(0)).count();
+ &input[0..cmp::max(input.len() - num_zeroes, min_length)]
+}
+
+/// How many extra bits the huffman length code uses to represent a value.
+fn extra_bits_for_huffman_length_code(code: u8) -> u8 {
+ match code {
+ 16...17 => 3,
+ 18 => 7,
+ _ => 0,
+ }
+}
+
+/// Calculate how many bits the huffman-encoded huffman lengths will use.
+fn calculate_huffman_length(frequencies: &[FrequencyType], code_lengths: &[u8]) -> u64 {
+ frequencies.iter().zip(code_lengths).enumerate().fold(
+ 0,
+ |acc, (n, (&f, &l))| {
+ acc +
+ (u64::from(f) *
+ (u64::from(l) + u64::from(extra_bits_for_huffman_length_code(n as u8))))
+ },
+ )
+}
+
+/// Calculate how many bits data with the given frequencies will use when compressed with dynamic
+/// code lengths (first return value) and static code lengths (second return value).
+///
+/// Parameters:
+/// Frequencies, length of dynamic codes, and a function to get how many extra bits in addition
+/// to the length of the huffman code the symbol will use.
+fn calculate_block_length<F>(
+ frequencies: &[FrequencyType],
+ dyn_code_lengths: &[u8],
+ get_num_extra_bits: &F,
+) -> (u64, u64)
+where
+ F: Fn(usize) -> u64,
+{
+ // Length of data represented by dynamic codes.
+ let mut d_ll_length = 0u64;
+ // length of data represented by static codes.
+ let mut s_ll_length = 0u64;
+
+ let iter = frequencies
+ .iter()
+ .zip(dyn_code_lengths.iter().zip(FIXED_CODE_LENGTHS.iter()))
+ .enumerate();
+
+ // This could maybe be optimised a bit by splitting the iteration of codes using extra bits and
+ // codes not using extra bits, but the extra complexity may not be worth it.
+ for (c, (&f, (&l, &fl))) in iter {
+ // Frequency
+ let f = u64::from(f);
+ // How many extra bits the current code number needs.
+ let extra_bits_for_code = get_num_extra_bits(c);
+
+ d_ll_length += f * (u64::from(l) + extra_bits_for_code);
+ s_ll_length += f * (u64::from(fl) + extra_bits_for_code);
+
+ }
+
+ (d_ll_length, s_ll_length)
+}
+
+/// Get how extra padding bits after a block start header a stored block would use.
+///
+/// # Panics
+/// Panics if `pending_bits > 8`
+fn stored_padding(pending_bits: u8) -> u64 {
+ assert!(pending_bits <= 8);
+ let free_space = 8 - pending_bits;
+ if free_space >= BLOCK_MARKER_LENGTH {
+ // There is space in the current byte for the header.
+ free_space - BLOCK_MARKER_LENGTH
+ } else {
+ // The header will require an extra byte.
+ 8 - (BLOCK_MARKER_LENGTH - free_space)
+ }.into()
+}
+
+/// Calculate the number of bits storing the data in stored blocks will take up, excluding the
+/// first block start code and potential padding bits. As stored blocks have a maximum length,
+/// (as opposed to fixed and dynamic ones), multiple blocks may have to be utilised.
+///
+/// # Panics
+/// Panics if `input_bytes` is 0.
+fn stored_length(input_bytes: u64) -> u64 {
+ // Check how many stored blocks these bytes would take up.
+ // (Integer divison rounding up.)
+ let num_blocks = (input_bytes
+ .checked_sub(1)
+ .expect("Underflow calculating stored block length!") /
+ MAX_STORED_BLOCK_LENGTH as u64) + 1;
+ // The length will be the input length and the headers for each block. (Excluding the start
+ // of block code for the first one)
+ (input_bytes + (STORED_BLOCK_HEADER_LENGTH as u64 * num_blocks) + (num_blocks - 1)) * 8
+}
+
+pub enum BlockType {
+ Stored,
+ Fixed,
+ Dynamic(DynamicBlockHeader),
+}
+
+/// A struct containing the different data needed to write the header for a dynamic block.
+///
+/// The code lengths are stored directly in the `HuffmanTable` struct.
+/// TODO: Do the same for other things here.
+pub struct DynamicBlockHeader {
+ /// Length of the run-length encoding symbols.
+ pub huffman_table_lengths: Vec<u8>,
+ /// Number of lengths for values describing the huffman table that encodes the length values
+ /// of the main huffman tables.
+ pub used_hclens: usize,
+}
+
+/// Generate the lengths of the huffman codes we will be using, using the
+/// frequency of the different symbols/lengths/distances, and determine what block type will give
+/// the shortest representation.
+/// TODO: This needs a test
+pub fn gen_huffman_lengths(
+ l_freqs: &[FrequencyType],
+ d_freqs: &[FrequencyType],
+ num_input_bytes: u64,
+ pending_bits: u8,
+ l_lengths: &mut [u8; 288],
+ d_lengths: &mut [u8; 32],
+ length_buffers: &mut LengthBuffers,
+) -> BlockType {
+ // Avoid corner cases and issues if this is called for an empty block.
+ // For blocks this short, a fixed block will be the shortest.
+ // TODO: Find the minimum value it's worth doing calculations for.
+ if num_input_bytes <= 4 {
+ return BlockType::Fixed;
+ };
+
+ let l_freqs = remove_trailing_zeroes(l_freqs, MIN_NUM_LITERALS_AND_LENGTHS);
+ let d_freqs = remove_trailing_zeroes(d_freqs, MIN_NUM_DISTANCES);
+
+ // The huffman spec allows us to exclude zeroes at the end of the
+ // table of huffman lengths.
+ // Since a frequency of 0 will give an huffman
+ // length of 0. We strip off the trailing zeroes before even
+ // generating the lengths to save some work.
+ // There is however a minimum number of values we have to keep
+ // according to the deflate spec.
+ // TODO: We could probably compute some of this in parallel.
+ huffman_lengths_from_frequency_m(
+ l_freqs,
+ MAX_CODE_LENGTH,
+ &mut length_buffers.leaf_buf,
+ l_lengths,
+ );
+ huffman_lengths_from_frequency_m(
+ d_freqs,
+ MAX_CODE_LENGTH,
+ &mut length_buffers.leaf_buf,
+ d_lengths,
+ );
+
+
+ let used_lengths = l_freqs.len();
+ let used_distances = d_freqs.len();
+
+ // Encode length values
+ let mut freqs = [0u16; 19];
+ encode_lengths_m(
+ l_lengths[..used_lengths]
+ .iter()
+ .chain(&d_lengths[..used_distances]),
+ &mut length_buffers.length_buf,
+ &mut freqs,
+ );
+
+ // Create huffman lengths for the length/distance code lengths
+ let mut huffman_table_lengths = vec![0; freqs.len()];
+ huffman_lengths_from_frequency_m(
+ &freqs,
+ MAX_HUFFMAN_CODE_LENGTH,
+ &mut length_buffers.leaf_buf,
+ huffman_table_lengths.as_mut_slice(),
+ );
+
+ // Count how many of these lengths we use.
+ let used_hclens = HUFFMAN_LENGTH_ORDER.len() -
+ HUFFMAN_LENGTH_ORDER
+ .iter()
+ .rev()
+ .take_while(|&&n| huffman_table_lengths[n as usize] == 0)
+ .count();
+
+ // There has to be at least 4 hclens, so if there isn't, something went wrong.
+ debug_assert!(used_hclens >= 4);
+
+ // Calculate how many bytes of space this block will take up with the different block types
+ // (excluding the 3-bit block header since it's used in all block types).
+
+ // Total length of the compressed literals/lengths.
+ let (d_ll_length, s_ll_length) = calculate_block_length(l_freqs, l_lengths, &|c| {
+ num_extra_bits_for_length_code(c.saturating_sub(LENGTH_BITS_START as usize) as u8).into()
+ });
+
+ // Total length of the compressed distances.
+ let (d_dist_length, s_dist_length) = calculate_block_length(d_freqs, d_lengths, &|c| {
+ num_extra_bits_for_distance_code(c as u8).into()
+ });
+
+ // Total length of the compressed huffman code lengths.
+ let huff_table_length = calculate_huffman_length(&freqs, &huffman_table_lengths);
+
+ // For dynamic blocks the huffman tables takes up some extra space.
+ let dynamic_length = d_ll_length + d_dist_length + huff_table_length +
+ (used_hclens as u64 * 3) + u64::from(HLIT_BITS) +
+ u64::from(HDIST_BITS) + u64::from(HCLEN_BITS);
+
+ // Static blocks don't have any extra header data.
+ let static_length = s_ll_length + s_dist_length;
+
+ // Calculate how many bits it will take to store the data in uncompressed (stored) block(s).
+ let stored_length = stored_length(num_input_bytes) + stored_padding(pending_bits % 8);
+
+ let used_length = cmp::min(cmp::min(dynamic_length, static_length), stored_length);
+
+ // Check if the block is actually compressed. If using a dynamic block
+ // increases the length of the block (for instance if the input data is mostly random or
+ // already compressed), we want to output a stored(uncompressed) block instead to avoid wasting
+ // space.
+ if used_length == static_length {
+ BlockType::Fixed
+ } else if used_length == stored_length {
+ BlockType::Stored
+ } else {
+ BlockType::Dynamic(DynamicBlockHeader {
+ huffman_table_lengths: huffman_table_lengths,
+ used_hclens: used_hclens,
+ })
+ }
+}
+
+/// Write the specified huffman lengths to the bit writer
+pub fn write_huffman_lengths(
+ header: &DynamicBlockHeader,
+ huffman_table: &HuffmanTable,
+ encoded_lengths: &[EncodedLength],
+ writer: &mut LsbWriter,
+) {
+ // Ignore trailing zero lengths as allowed by the deflate spec.
+ let (literal_len_lengths, distance_lengths) = huffman_table.get_lengths();
+ let literal_len_lengths =
+ remove_trailing_zeroes(literal_len_lengths, MIN_NUM_LITERALS_AND_LENGTHS);
+ let distance_lengths = remove_trailing_zeroes(distance_lengths, MIN_NUM_DISTANCES);
+ let huffman_table_lengths = &header.huffman_table_lengths;
+ let used_hclens = header.used_hclens;
+
+ assert!(literal_len_lengths.len() <= NUM_LITERALS_AND_LENGTHS);
+ assert!(literal_len_lengths.len() >= MIN_NUM_LITERALS_AND_LENGTHS);
+ assert!(distance_lengths.len() <= NUM_DISTANCE_CODES);
+ assert!(distance_lengths.len() >= MIN_NUM_DISTANCES);
+
+ // Number of length codes - 257.
+ let hlit = (literal_len_lengths.len() - MIN_NUM_LITERALS_AND_LENGTHS) as u16;
+ writer.write_bits(hlit, HLIT_BITS);
+ // Number of distance codes - 1.
+ let hdist = (distance_lengths.len() - MIN_NUM_DISTANCES) as u16;
+ writer.write_bits(hdist, HDIST_BITS);
+
+ // Number of huffman table lengths - 4.
+ let hclen = used_hclens.saturating_sub(4);
+
+ // Write HCLEN.
+ // Casting to u16 is safe since the length can never be more than the length of
+ // `HUFFMAN_LENGTH_ORDER` anyhow.
+ writer.write_bits(hclen as u16, HCLEN_BITS);
+
+ // Write the lengths for the huffman table describing the huffman table
+ // Each length is 3 bits
+ for n in &HUFFMAN_LENGTH_ORDER[..used_hclens] {
+ writer.write_bits(huffman_table_lengths[usize::from(*n)] as u16, 3);
+ }
+
+ // Generate codes for the main huffman table using the lengths we just wrote
+ let mut codes = [0u16; NUM_HUFFMAN_LENGTHS];
+ create_codes_in_place(&mut codes[..], huffman_table_lengths);
+
+ // Write the actual huffman lengths
+ for v in encoded_lengths {
+ match *v {
+ EncodedLength::Length(n) => {
+ let (c, l) = (codes[usize::from(n)], huffman_table_lengths[usize::from(n)]);
+ writer.write_bits(c, l);
+ }
+ EncodedLength::CopyPrevious(n) => {
+ let (c, l) = (codes[COPY_PREVIOUS], huffman_table_lengths[COPY_PREVIOUS]);
+ writer.write_bits(c, l);
+ debug_assert!(n >= 3);
+ debug_assert!(n <= 6);
+ writer.write_bits((n - 3).into(), 2);
+ }
+ EncodedLength::RepeatZero3Bits(n) => {
+ let (c, l) = (
+ codes[REPEAT_ZERO_3_BITS],
+ huffman_table_lengths[REPEAT_ZERO_3_BITS],
+ );
+ writer.write_bits(c, l);
+ debug_assert!(n >= 3);
+ writer.write_bits((n - 3).into(), 3);
+ }
+ EncodedLength::RepeatZero7Bits(n) => {
+ let (c, l) = (
+ codes[REPEAT_ZERO_7_BITS],
+ huffman_table_lengths[REPEAT_ZERO_7_BITS],
+ );
+ writer.write_bits(c, l);
+ debug_assert!(n >= 11);
+ debug_assert!(n <= 138);
+ writer.write_bits((n - 11).into(), 7);
+ }
+ }
+ }
+}
+
+
+#[cfg(test)]
+mod test {
+ use super::stored_padding;
+ #[test]
+ fn padding() {
+ assert_eq!(stored_padding(0), 5);
+ assert_eq!(stored_padding(1), 4);
+ assert_eq!(stored_padding(2), 3);
+ assert_eq!(stored_padding(3), 2);
+ assert_eq!(stored_padding(4), 1);
+ assert_eq!(stored_padding(5), 0);
+ assert_eq!(stored_padding(6), 7);
+ assert_eq!(stored_padding(7), 6);
+ }
+}
diff --git a/third_party/rust/deflate/src/huffman_table.rs b/third_party/rust/deflate/src/huffman_table.rs
new file mode 100644
index 0000000000..6d4ca02866
--- /dev/null
+++ b/third_party/rust/deflate/src/huffman_table.rs
@@ -0,0 +1,1676 @@
+use std::fmt;
+use bit_reverse::reverse_bits;
+use lzvalue::StoredLength;
+
+// The number of length codes in the huffman table
+pub const NUM_LENGTH_CODES: usize = 29;
+
+// The number of distance codes in the distance huffman table
+// NOTE: two mode codes are actually used when constructing codes
+pub const NUM_DISTANCE_CODES: usize = 30;
+
+// Combined number of literal and length codes
+// NOTE: two mode codes are actually used when constructing codes
+pub const NUM_LITERALS_AND_LENGTHS: usize = 286;
+
+
+// The maximum length of a huffman code
+pub const MAX_CODE_LENGTH: usize = 15;
+
+// The minimun and maximum lengths for a match according to the DEFLATE specification
+pub const MIN_MATCH: u16 = 3;
+pub const MAX_MATCH: u16 = 258;
+
+pub const MIN_DISTANCE: u16 = 1;
+pub const MAX_DISTANCE: u16 = 32768;
+
+
+// The position in the literal/length table of the end of block symbol
+pub const END_OF_BLOCK_POSITION: usize = 256;
+
+// Bit lengths for literal and length codes in the fixed huffman table
+// The huffman codes are generated from this and the distance bit length table
+pub static FIXED_CODE_LENGTHS: [u8; NUM_LITERALS_AND_LENGTHS + 2] = [
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 7,
+ 7,
+ 7,
+ 7,
+ 7,
+ 7,
+ 7,
+ 7,
+ 7,
+ 7,
+ 7,
+ 7,
+ 7,
+ 7,
+ 7,
+ 7,
+ 7,
+ 7,
+ 7,
+ 7,
+ 7,
+ 7,
+ 7,
+ 7,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+];
+
+
+
+// The number of extra bits for the length codes
+const LENGTH_EXTRA_BITS_LENGTH: [u8; NUM_LENGTH_CODES] = [
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1,
+ 1,
+ 1,
+ 1,
+ 2,
+ 2,
+ 2,
+ 2,
+ 3,
+ 3,
+ 3,
+ 3,
+ 4,
+ 4,
+ 4,
+ 4,
+ 5,
+ 5,
+ 5,
+ 5,
+ 0,
+];
+
+// Table used to get a code from a length value (see get_distance_code_and_extra_bits)
+const LENGTH_CODE: [u8; 256] = [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 8,
+ 9,
+ 9,
+ 10,
+ 10,
+ 11,
+ 11,
+ 12,
+ 12,
+ 12,
+ 12,
+ 13,
+ 13,
+ 13,
+ 13,
+ 14,
+ 14,
+ 14,
+ 14,
+ 15,
+ 15,
+ 15,
+ 15,
+ 16,
+ 16,
+ 16,
+ 16,
+ 16,
+ 16,
+ 16,
+ 16,
+ 17,
+ 17,
+ 17,
+ 17,
+ 17,
+ 17,
+ 17,
+ 17,
+ 18,
+ 18,
+ 18,
+ 18,
+ 18,
+ 18,
+ 18,
+ 18,
+ 19,
+ 19,
+ 19,
+ 19,
+ 19,
+ 19,
+ 19,
+ 19,
+ 20,
+ 20,
+ 20,
+ 20,
+ 20,
+ 20,
+ 20,
+ 20,
+ 20,
+ 20,
+ 20,
+ 20,
+ 20,
+ 20,
+ 20,
+ 20,
+ 21,
+ 21,
+ 21,
+ 21,
+ 21,
+ 21,
+ 21,
+ 21,
+ 21,
+ 21,
+ 21,
+ 21,
+ 21,
+ 21,
+ 21,
+ 21,
+ 22,
+ 22,
+ 22,
+ 22,
+ 22,
+ 22,
+ 22,
+ 22,
+ 22,
+ 22,
+ 22,
+ 22,
+ 22,
+ 22,
+ 22,
+ 22,
+ 23,
+ 23,
+ 23,
+ 23,
+ 23,
+ 23,
+ 23,
+ 23,
+ 23,
+ 23,
+ 23,
+ 23,
+ 23,
+ 23,
+ 23,
+ 23,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 28,
+];
+
+// Base values to calculate the value of the bits in length codes
+const BASE_LENGTH: [u8; NUM_LENGTH_CODES] = [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 10,
+ 12,
+ 14,
+ 16,
+ 20,
+ 24,
+ 28,
+ 32,
+ 40,
+ 48,
+ 56,
+ 64,
+ 80,
+ 96,
+ 112,
+ 128,
+ 160,
+ 192,
+ 224,
+ 255,
+]; // 258 - MIN_MATCh
+
+// What number in the literal/length table the lengths start at
+pub const LENGTH_BITS_START: u16 = 257;
+
+// Lengths for the distance codes in the pre-defined/fixed huffman table
+// (All distance codes are 5 bits long)
+pub const FIXED_CODE_LENGTHS_DISTANCE: [u8; NUM_DISTANCE_CODES + 2] = [5; NUM_DISTANCE_CODES + 2];
+
+const DISTANCE_CODES: [u8; 512] = [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 4,
+ 5,
+ 5,
+ 6,
+ 6,
+ 6,
+ 6,
+ 7,
+ 7,
+ 7,
+ 7,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 9,
+ 10,
+ 10,
+ 10,
+ 10,
+ 10,
+ 10,
+ 10,
+ 10,
+ 10,
+ 10,
+ 10,
+ 10,
+ 10,
+ 10,
+ 10,
+ 10,
+ 11,
+ 11,
+ 11,
+ 11,
+ 11,
+ 11,
+ 11,
+ 11,
+ 11,
+ 11,
+ 11,
+ 11,
+ 11,
+ 11,
+ 11,
+ 11,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 12,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 13,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 14,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 15,
+ 0,
+ 0,
+ 16,
+ 17,
+ 18,
+ 18,
+ 19,
+ 19,
+ 20,
+ 20,
+ 20,
+ 20,
+ 21,
+ 21,
+ 21,
+ 21,
+ 22,
+ 22,
+ 22,
+ 22,
+ 22,
+ 22,
+ 22,
+ 22,
+ 23,
+ 23,
+ 23,
+ 23,
+ 23,
+ 23,
+ 23,
+ 23,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 24,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 25,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 26,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 27,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 28,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+ 29,
+];
+
+// Number of extra bits following the distance codes
+#[cfg(test)]
+const DISTANCE_EXTRA_BITS: [u8; NUM_DISTANCE_CODES] = [
+ 0,
+ 0,
+ 0,
+ 0,
+ 1,
+ 1,
+ 2,
+ 2,
+ 3,
+ 3,
+ 4,
+ 4,
+ 5,
+ 5,
+ 6,
+ 6,
+ 7,
+ 7,
+ 8,
+ 8,
+ 9,
+ 9,
+ 10,
+ 10,
+ 11,
+ 11,
+ 12,
+ 12,
+ 13,
+ 13,
+];
+
+const DISTANCE_BASE: [u16; NUM_DISTANCE_CODES] = [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 6,
+ 8,
+ 12,
+ 16,
+ 24,
+ 32,
+ 48,
+ 64,
+ 96,
+ 128,
+ 192,
+ 256,
+ 384,
+ 512,
+ 768,
+ 1024,
+ 1536,
+ 2048,
+ 3072,
+ 4096,
+ 6144,
+ 8192,
+ 12288,
+ 16384,
+ 24576,
+];
+
+pub fn num_extra_bits_for_length_code(code: u8) -> u8 {
+ LENGTH_EXTRA_BITS_LENGTH[code as usize]
+}
+
+/// Get the number of extra bits used for a distance code.
+/// (Code numbers above `NUM_DISTANCE_CODES` will give some garbage
+/// value.)
+pub fn num_extra_bits_for_distance_code(code: u8) -> u8 {
+ // This can be easily calculated without a lookup.
+ //
+ let mut c = code >> 1;
+ c -= (c != 0) as u8;
+ c
+}
+
+/// A struct representing the data needed to generate the bit codes for
+/// a given value and huffman table.
+#[derive(Copy, Clone)]
+struct ExtraBits {
+ // The position of the length in the huffman table.
+ pub code_number: u16,
+ // Number of extra bits following the code.
+ pub num_bits: u8,
+ // The value of the extra bits, which together with the length/distance code
+ // allow us to calculate the exact length/distance.
+ pub value: u16,
+}
+
+/// Get the length code that corresponds to the length value
+/// Panics if length is out of range.
+pub fn get_length_code(length: u16) -> usize {
+ // Going via an u8 here helps the compiler evade bounds checking.
+ usize::from(LENGTH_CODE[(length.wrapping_sub(MIN_MATCH)) as u8 as usize]) +
+ LENGTH_BITS_START as usize
+}
+
+/// Get the code for the huffman table and the extra bits for the requested length.
+fn get_length_code_and_extra_bits(length: StoredLength) -> ExtraBits {
+ // Length values are stored as unsigned bytes, where the actual length is the value - 3
+ // The `StoredLength` struct takes care of this conversion for us.
+ let n = LENGTH_CODE[length.stored_length() as usize];
+
+ // We can then get the base length from the base length table,
+ // which we use to calculate the value of the extra bits.
+ let base = BASE_LENGTH[n as usize];
+ let num_bits = num_extra_bits_for_length_code(n);
+ ExtraBits {
+ code_number: u16::from(n) + LENGTH_BITS_START,
+ num_bits: num_bits,
+ value: (length.stored_length() - base).into(),
+ }
+
+}
+
+/// Get the spot in the huffman table for distances `distance` corresponds to
+/// Returns 255 if the distance is invalid.
+/// Avoiding option here for simplicity and performance) as this being called with an invalid
+/// value would be a bug.
+pub fn get_distance_code(distance: u16) -> u8 {
+ let distance = distance as usize;
+
+ match distance {
+ // Since the array starts at 0, we need to subtract 1 to get the correct code number.
+ 1...256 => DISTANCE_CODES[distance - 1],
+ // Due to the distrubution of the distance codes above 256, we can get away with only
+ // using the top bits to determine the code, rather than having a 32k long table of
+ // distance codes.
+ 257...32768 => DISTANCE_CODES[256 + ((distance - 1) >> 7)],
+ _ => 0,
+ }
+}
+
+
+fn get_distance_code_and_extra_bits(distance: u16) -> ExtraBits {
+ let distance_code = get_distance_code(distance);
+ let extra = num_extra_bits_for_distance_code(distance_code);
+ // FIXME: We should add 1 to the values in distance_base to avoid having to add one here
+ let base = DISTANCE_BASE[distance_code as usize] + 1;
+ ExtraBits {
+ code_number: distance_code.into(),
+ num_bits: extra,
+ value: distance - base,
+ }
+}
+
+#[derive(Copy, Clone, Default)]
+pub struct HuffmanCode {
+ pub code: u16,
+ pub length: u8,
+}
+
+impl fmt::Debug for HuffmanCode {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(
+ f,
+ "HuffmanCode {{ code: {:b}, length: {}}}",
+ self.code,
+ self.length
+ )
+ }
+}
+
+impl HuffmanCode {
+ #[inline]
+ /// Create a huffman code value from a code and length.
+ fn new(code: u16, length: u8) -> HuffmanCode {
+ HuffmanCode {
+ code: code,
+ length: length,
+ }
+ }
+}
+
+#[cfg(test)]
+pub struct LengthAndDistanceBits {
+ pub length_code: HuffmanCode,
+ pub length_extra_bits: HuffmanCode,
+ pub distance_code: HuffmanCode,
+ pub distance_extra_bits: HuffmanCode,
+}
+
+/// Counts the number of values of each length.
+/// Returns a tuple containing the longest length value in the table, it's position,
+/// and fills in lengths in the `len_counts` slice.
+/// Returns an error if `table` is empty, or if any of the lengths exceed 15.
+fn build_length_count_table(table: &[u8], len_counts: &mut [u16; 16]) -> (usize, usize) {
+ // TODO: Validate the length table properly in debug mode.
+ let max_length = (*table.iter().max().expect("BUG! Empty lengths!")).into();
+
+ assert!(max_length <= MAX_CODE_LENGTH);
+
+ let mut max_length_pos = 0;
+
+ for (n, &length) in table.iter().enumerate() {
+ // TODO: Make sure we don't have more of one length than we can make
+ // codes for
+ if length > 0 {
+ len_counts[usize::from(length)] += 1;
+ max_length_pos = n;
+ }
+ }
+ (max_length, max_length_pos)
+}
+
+/// Generats a vector of huffman codes given a table of bit lengths
+/// Returns an error if any of the lengths are > 15
+pub fn create_codes_in_place(code_table: &mut [u16], length_table: &[u8]) {
+ let mut len_counts = [0; 16];
+ let (max_length, max_length_pos) = build_length_count_table(length_table, &mut len_counts);
+ let lengths = len_counts;
+
+ let mut code = 0u16;
+ let mut next_code = Vec::with_capacity(length_table.len());
+ next_code.push(code);
+
+ for bits in 1..max_length + 1 {
+ code = (code + lengths[bits - 1]) << 1;
+ next_code.push(code);
+ }
+
+ for n in 0..max_length_pos + 1 {
+ let length = usize::from(length_table[n]);
+ if length != 0 {
+ // The algorithm generates the code in the reverse bit order, so we need to reverse them
+ // to get the correct codes.
+ code_table[n] = reverse_bits(next_code[length], length as u8);
+ // We use wrapping here as we would otherwise overflow on the last code
+ // This should be okay as we exit the loop after this so the value is ignored
+ next_code[length] = next_code[length].wrapping_add(1);
+ }
+ }
+}
+
+/// A structure containing the tables of huffman codes for lengths, literals and distances
+pub struct HuffmanTable {
+ // Literal, end of block and length codes
+ codes: [u16; 288],
+ code_lengths: [u8; 288],
+ // Distance codes
+ distance_codes: [u16; 32],
+ distance_code_lengths: [u8; 32],
+}
+
+impl HuffmanTable {
+ pub fn empty() -> HuffmanTable {
+ HuffmanTable {
+ codes: [0; 288],
+ code_lengths: [0; 288],
+ distance_codes: [0; 32],
+ distance_code_lengths: [0; 32],
+ }
+ }
+
+ #[cfg(test)]
+ pub fn from_length_tables(
+ literals_and_lengths: &[u8; 288],
+ distances: &[u8; 32],
+ ) -> HuffmanTable {
+ let mut table = HuffmanTable {
+ codes: [0; 288],
+ code_lengths: *literals_and_lengths,
+ distance_codes: [0; 32],
+ distance_code_lengths: *distances,
+ };
+
+ table.update_from_lengths();
+ table
+ }
+
+ /// Get references to the lenghts of the current huffman codes.
+ #[inline]
+ pub fn get_lengths(&self) -> (&[u8; 288], &[u8; 32]) {
+ (&self.code_lengths, &self.distance_code_lengths)
+ }
+
+ /// Get mutable references to the lenghts of the current huffman codes.
+ ///
+ /// Used for updating the lengths in place.
+ #[inline]
+ pub fn get_lengths_mut(&mut self) -> (&mut [u8; 288], &mut [u8; 32]) {
+ (&mut self.code_lengths, &mut self.distance_code_lengths)
+ }
+
+ /// Update the huffman codes using the existing length values in the huffman table.
+ pub fn update_from_lengths(&mut self) {
+ create_codes_in_place(self.codes.as_mut(), &self.code_lengths[..]);
+ create_codes_in_place(
+ self.distance_codes.as_mut(),
+ &self.distance_code_lengths[..],
+ );
+ }
+
+ pub fn set_to_fixed(&mut self) {
+ self.code_lengths = FIXED_CODE_LENGTHS;
+ self.distance_code_lengths = FIXED_CODE_LENGTHS_DISTANCE;
+ self.update_from_lengths();
+ }
+
+ /// Create a HuffmanTable using the fixed tables specified in the DEFLATE format specification.
+ #[cfg(test)]
+ pub fn fixed_table() -> HuffmanTable {
+ // This should be safe to unwrap, if it were to panic the code is wrong,
+ // tests should catch it.
+ HuffmanTable::from_length_tables(&FIXED_CODE_LENGTHS, &FIXED_CODE_LENGTHS_DISTANCE)
+ }
+
+ #[inline]
+ fn get_ll_huff(&self, value: usize) -> HuffmanCode {
+ HuffmanCode::new(self.codes[value], self.code_lengths[value])
+ }
+
+ /// Get the huffman code from the corresponding literal value
+ #[inline]
+ pub fn get_literal(&self, value: u8) -> HuffmanCode {
+ let index = usize::from(value);
+ HuffmanCode::new(self.codes[index], self.code_lengths[index])
+ }
+
+ /// Get the huffman code for the end of block value
+ #[inline]
+ pub fn get_end_of_block(&self) -> HuffmanCode {
+ self.get_ll_huff(END_OF_BLOCK_POSITION)
+ }
+
+ /// Get the huffman code and extra bits for the specified length
+ #[inline]
+ pub fn get_length_huffman(&self, length: StoredLength) -> (HuffmanCode, HuffmanCode) {
+
+ let length_data = get_length_code_and_extra_bits(length);
+
+ let length_huffman_code = self.get_ll_huff(length_data.code_number as usize);
+
+ (
+ length_huffman_code,
+ HuffmanCode {
+ code: length_data.value,
+ length: length_data.num_bits,
+ },
+ )
+ }
+
+ /// Get the huffman code and extra bits for the specified distance
+ ///
+ /// Returns None if distance is 0 or above 32768
+ #[inline]
+ pub fn get_distance_huffman(&self, distance: u16) -> (HuffmanCode, HuffmanCode) {
+ debug_assert!(distance >= MIN_DISTANCE && distance <= MAX_DISTANCE);
+
+ let distance_data = get_distance_code_and_extra_bits(distance);
+
+ let distance_huffman_code = self.distance_codes[distance_data.code_number as usize];
+ let distance_huffman_length =
+ self.distance_code_lengths[distance_data.code_number as usize];
+
+ (
+ HuffmanCode {
+ code: distance_huffman_code,
+ length: distance_huffman_length,
+ },
+ HuffmanCode {
+ code: distance_data.value,
+ length: distance_data.num_bits,
+ },
+ )
+ }
+
+ #[cfg(test)]
+ pub fn get_length_distance_code(&self, length: u16, distance: u16) -> LengthAndDistanceBits {
+ assert!(length >= MIN_MATCH && length < MAX_DISTANCE);
+ let l_codes = self.get_length_huffman(StoredLength::from_actual_length(length));
+ let d_codes = self.get_distance_huffman(distance);
+ LengthAndDistanceBits {
+ length_code: l_codes.0,
+ length_extra_bits: l_codes.1,
+ distance_code: d_codes.0,
+ distance_extra_bits: d_codes.1,
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use super::{get_length_code_and_extra_bits, get_distance_code_and_extra_bits,
+ build_length_count_table};
+
+ use lzvalue::StoredLength;
+
+ fn l(length: u16) -> StoredLength {
+ StoredLength::from_actual_length(length)
+ }
+
+ #[test]
+ fn test_get_length_code() {
+ let extra_bits = get_length_code_and_extra_bits(l(4));
+ assert_eq!(extra_bits.code_number, 258);
+ assert_eq!(extra_bits.num_bits, 0);
+ assert_eq!(extra_bits.value, 0);
+
+ let extra_bits = get_length_code_and_extra_bits(l(165));
+ assert_eq!(extra_bits.code_number, 282);
+ assert_eq!(extra_bits.num_bits, 5);
+ assert_eq!(extra_bits.value, 2);
+
+ let extra_bits = get_length_code_and_extra_bits(l(257));
+ assert_eq!(extra_bits.code_number, 284);
+ assert_eq!(extra_bits.num_bits, 5);
+ assert_eq!(extra_bits.value, 30);
+
+ let extra_bits = get_length_code_and_extra_bits(l(258));
+ assert_eq!(extra_bits.code_number, 285);
+ assert_eq!(extra_bits.num_bits, 0);
+ }
+
+ #[test]
+ fn test_distance_code() {
+ assert_eq!(get_distance_code(1), 0);
+ // Using 0 for None at the moment
+ assert_eq!(get_distance_code(0), 0);
+ assert_eq!(get_distance_code(50000), 0);
+ assert_eq!(get_distance_code(6146), 25);
+ assert_eq!(get_distance_code(256), 15);
+ assert_eq!(get_distance_code(4733), 24);
+ assert_eq!(get_distance_code(257), 16);
+ }
+
+ #[test]
+ fn test_distance_extra_bits() {
+ let extra = get_distance_code_and_extra_bits(527);
+ assert_eq!(extra.value, 0b1110);
+ assert_eq!(extra.code_number, 18);
+ assert_eq!(extra.num_bits, 8);
+ let extra = get_distance_code_and_extra_bits(256);
+ assert_eq!(extra.code_number, 15);
+ assert_eq!(extra.num_bits, 6);
+ let extra = get_distance_code_and_extra_bits(4733);
+ assert_eq!(extra.code_number, 24);
+ assert_eq!(extra.num_bits, 11);
+ }
+
+ #[test]
+ fn test_length_table_fixed() {
+ let _ = build_length_count_table(&FIXED_CODE_LENGTHS, &mut [0; 16]);
+ }
+
+ #[test]
+ #[should_panic]
+ fn test_length_table_max_length() {
+ let table = [16u8; 288];
+ build_length_count_table(&table, &mut [0; 16]);
+ }
+
+ #[test]
+ #[should_panic]
+ fn test_empty_table() {
+ let table = [];
+ build_length_count_table(&table, &mut [0; 16]);
+ }
+
+ #[test]
+ fn make_table_fixed() {
+ let table = HuffmanTable::fixed_table();
+ assert_eq!(table.codes[0], 0b00001100);
+ assert_eq!(table.codes[143], 0b11111101);
+ assert_eq!(table.codes[144], 0b000010011);
+ assert_eq!(table.codes[255], 0b111111111);
+ assert_eq!(table.codes[256], 0b0000000);
+ assert_eq!(table.codes[279], 0b1110100);
+ assert_eq!(table.codes[280], 0b00000011);
+ assert_eq!(table.codes[287], 0b11100011);
+
+ assert_eq!(table.distance_codes[0], 0);
+ assert_eq!(table.distance_codes[5], 20);
+
+ let ld = table.get_length_distance_code(4, 5);
+
+ assert_eq!(ld.length_code.code, 0b00100000);
+ assert_eq!(ld.distance_code.code, 0b00100);
+ assert_eq!(ld.distance_extra_bits.length, 1);
+ assert_eq!(ld.distance_extra_bits.code, 0);
+ }
+
+ #[test]
+ fn extra_bits_distance() {
+ use std::mem::size_of;
+ for i in 0..NUM_DISTANCE_CODES {
+ assert_eq!(
+ num_extra_bits_for_distance_code(i as u8),
+ DISTANCE_EXTRA_BITS[i]
+ );
+ }
+ println!("Size of huffmanCode struct: {}", size_of::<HuffmanCode>());
+ }
+
+}
diff --git a/third_party/rust/deflate/src/input_buffer.rs b/third_party/rust/deflate/src/input_buffer.rs
new file mode 100644
index 0000000000..d63bc40ffa
--- /dev/null
+++ b/third_party/rust/deflate/src/input_buffer.rs
@@ -0,0 +1,148 @@
+use std::cmp;
+
+use chained_hash_table::WINDOW_SIZE;
+use huffman_table;
+
+const MAX_MATCH: usize = huffman_table::MAX_MATCH as usize;
+
+/// The maximum size of the buffer.
+pub const BUFFER_SIZE: usize = (WINDOW_SIZE * 2) + MAX_MATCH;
+
+pub struct InputBuffer {
+ buffer: Vec<u8>,
+}
+
+impl InputBuffer {
+ #[cfg(test)]
+ pub fn new<'a>(data: &'a [u8]) -> (InputBuffer, Option<&[u8]>) {
+ let mut b = InputBuffer::empty();
+ let rem = b.add_data(data);
+ (b, rem)
+ }
+
+ pub fn empty() -> InputBuffer {
+ InputBuffer {
+ buffer: Vec::with_capacity(BUFFER_SIZE),
+ }
+ }
+
+ /// Add data to the buffer.
+ ///
+ /// Returns a slice of the data that was not added (including the lookahead if any).
+ pub fn add_data<'a>(&mut self, data: &'a [u8]) -> Option<&'a [u8]> {
+ debug_assert!(self.current_end() <= BUFFER_SIZE);
+ if self.current_end() + data.len() > BUFFER_SIZE {
+ // Add data and return how much was left.
+ let consumed = {
+ let space_left = BUFFER_SIZE - self.buffer.len();
+ self.buffer.extend_from_slice(&data[..space_left]);
+ space_left
+ };
+ Some(&data[consumed..])
+ } else {
+ // There's space for all of the data.
+ self.buffer.extend_from_slice(data);
+ None
+ }
+ }
+
+ /// Get the current amount of data in the buffer.
+ pub fn current_end(&self) -> usize {
+ self.buffer.len()
+ }
+
+ /// Slide the input window and add new data.
+ ///
+ /// Returns a slice containing the data that did not fit, or None if all data was consumed.
+ pub fn slide<'a>(&mut self, data: &'a [u8]) -> Option<&'a [u8]> {
+ // This should only be used when the buffer is full
+ assert!(self.buffer.len() > WINDOW_SIZE * 2);
+
+ // Do this in a closure to to end the borrow of buffer.
+ let (final_len, upper_len, end) = {
+ // Split into lower window and upper window + lookahead
+ let (lower, upper) = self.buffer.split_at_mut(WINDOW_SIZE);
+ // Copy the upper window to the lower window
+ lower.copy_from_slice(&upper[..WINDOW_SIZE]);
+ let lookahead_len = {
+ // Copy the lookahead to the start of the upper window
+ let (upper_2, lookahead) = upper.split_at_mut(WINDOW_SIZE);
+ let lookahead_len = lookahead.len();
+ debug_assert!(lookahead_len <= MAX_MATCH);
+ upper_2[..lookahead_len].copy_from_slice(lookahead);
+ lookahead_len
+ };
+
+ // Length of the upper window minus the lookahead bytes
+ let upper_len = upper.len() - lookahead_len;
+ let end = cmp::min(data.len(), upper_len);
+ upper[lookahead_len..lookahead_len + end].copy_from_slice(&data[..end]);
+ // Remove unused data if any.
+ (lower.len() + lookahead_len + end, upper_len, end)
+ };
+ // Remove unused space.
+ self.buffer.truncate(final_len);
+
+ if data.len() > upper_len {
+ // Return a slice of the data that was not added
+ Some(&data[end..])
+ } else {
+ None
+ }
+ }
+
+ /// Get a mutable slice of the used part of the buffer.
+ pub fn get_buffer(&mut self) -> &mut [u8] {
+ &mut self.buffer
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::MAX_MATCH;
+ use chained_hash_table::WINDOW_SIZE;
+ use super::*;
+ #[test]
+ pub fn buffer_add_full() {
+ let data = [10u8; BUFFER_SIZE + 10];
+ let (mut buf, extra) = InputBuffer::new(&data[..]);
+ assert!(extra.unwrap() == &[10; 10]);
+ let to_add = [2, 5, 3];
+ let not_added = buf.add_data(&to_add);
+ assert_eq!(not_added.unwrap(), to_add);
+ }
+
+ #[test]
+ pub fn buffer_add_not_full() {
+ let data = [10u8; BUFFER_SIZE - 5];
+ let (mut buf, extra) = InputBuffer::new(&data[..]);
+ assert_eq!(buf.current_end(), data.len());
+ assert_eq!(extra, None);
+ let to_add = [2, 5, 3];
+ {
+ let not_added = buf.add_data(&to_add);
+ assert!(not_added.is_none());
+ }
+ let not_added = buf.add_data(&to_add);
+ assert_eq!(not_added.unwrap()[0], 3);
+ }
+
+ #[test]
+ fn slide() {
+ let data = [10u8; BUFFER_SIZE];
+ let (mut buf, extra) = InputBuffer::new(&data[..]);
+ assert_eq!(extra, None);
+ let to_add = [5; 5];
+ let rem = buf.slide(&to_add);
+ assert!(rem.is_none());
+ {
+ let slice = buf.get_buffer();
+ assert!(slice[..WINDOW_SIZE + MAX_MATCH] == data[WINDOW_SIZE..]);
+ assert_eq!(
+ slice[WINDOW_SIZE + MAX_MATCH..WINDOW_SIZE + MAX_MATCH + 5],
+ to_add
+ );
+ }
+ assert_eq!(buf.current_end(), WINDOW_SIZE + MAX_MATCH + to_add.len());
+ }
+}
diff --git a/third_party/rust/deflate/src/length_encode.rs b/third_party/rust/deflate/src/length_encode.rs
new file mode 100644
index 0000000000..fded50fdd3
--- /dev/null
+++ b/third_party/rust/deflate/src/length_encode.rs
@@ -0,0 +1,1487 @@
+use std::iter::Iterator;
+use std::clone::Clone;
+
+/// An enum representing the different types in the run-length encoded data used to encode
+/// huffman table lengths
+#[derive(Debug, PartialEq, Eq)]
+pub enum EncodedLength {
+ // An actual length value
+ Length(u8),
+ // Copy the previous value n times
+ CopyPrevious(u8),
+ // Repeat zero n times (with n represented by 3 bits)
+ RepeatZero3Bits(u8),
+ // Repeat zero n times (with n represented by 7 bits)
+ RepeatZero7Bits(u8),
+}
+
+impl EncodedLength {
+ fn from_prev_and_repeat(prev: u8, repeat: u8) -> EncodedLength {
+ match prev {
+ 0 => {
+ if repeat <= 10 {
+ EncodedLength::RepeatZero3Bits(repeat)
+ } else {
+ EncodedLength::RepeatZero7Bits(repeat)
+ }
+ }
+ 1...15 => EncodedLength::CopyPrevious(repeat),
+ _ => panic!(),
+ }
+ }
+}
+
+pub const COPY_PREVIOUS: usize = 16;
+pub const REPEAT_ZERO_3_BITS: usize = 17;
+pub const REPEAT_ZERO_7_BITS: usize = 18;
+
+const MIN_REPEAT: u8 = 3;
+
+/// Push an `EncodedLength` to the vector and update the frequency table.
+fn update_out_and_freq(
+ encoded: EncodedLength,
+ output: &mut Vec<EncodedLength>,
+ frequencies: &mut [u16; 19],
+) {
+ let index = match encoded {
+ EncodedLength::Length(l) => usize::from(l),
+ EncodedLength::CopyPrevious(_) => COPY_PREVIOUS,
+ EncodedLength::RepeatZero3Bits(_) => REPEAT_ZERO_3_BITS,
+ EncodedLength::RepeatZero7Bits(_) => REPEAT_ZERO_7_BITS,
+ };
+
+ frequencies[index] += 1;
+
+ output.push(encoded);
+}
+
+/// Convenience function to check if the repeat counter should be incremented further
+fn not_max_repetitions(length_value: u8, repeats: u8) -> bool {
+ (length_value == 0 && repeats < 138) || repeats < 6
+}
+
+///Convenience version for unit tests.
+#[cfg(test)]
+pub fn encode_lengths<'a, I>(lengths: I) -> (Vec<EncodedLength>, [u16; 19])
+where
+ I: Iterator<Item = &'a u8> + Clone,
+{
+ let mut freqs = [0u16; 19];
+ let mut encoded: Vec<EncodedLength> = Vec::new();
+ encode_lengths_m(lengths, &mut encoded, &mut freqs);
+ (encoded, freqs)
+}
+
+/// Run-length encodes the lengths of the values in `lengths` according to the deflate
+/// specification. This is used for writing the code lengths for the huffman tables for
+/// the deflate stream.
+///
+/// Returns a tuple containing a vec of the encoded lengths
+/// Populates the supplied array with the frequency of the different encoded length values
+/// The frequency array is taken as a parameter rather than returned to avoid
+/// excessive memcpying.
+pub fn encode_lengths_m<'a, I>(
+ lengths: I,
+ mut out: &mut Vec<EncodedLength>,
+ mut frequencies: &mut [u16; 19],
+) where
+ I: Iterator<Item = &'a u8> + Clone,
+{
+ out.clear();
+ // Number of repetitions of the current value
+ let mut repeat = 0;
+ let mut iter = lengths.clone().enumerate().peekable();
+ // Previous value
+ // We set it to the compliment of the first falue to simplify the code.
+ let mut prev = !iter.peek().expect("No length values!").1;
+
+ while let Some((n, &l)) = iter.next() {
+ if l == prev && not_max_repetitions(l, repeat) {
+ repeat += 1;
+ }
+ if l != prev || iter.peek().is_none() || !not_max_repetitions(l, repeat) {
+ if repeat >= MIN_REPEAT {
+ // The previous value has been repeated enough times to write out a repeat code.
+
+ let val = EncodedLength::from_prev_and_repeat(prev, repeat);
+ update_out_and_freq(val, &mut out, &mut frequencies);
+ repeat = 0;
+ // If we have a new length value, output l unless the last value is 0 or l is the
+ // last byte.
+ if l != prev {
+ if l != 0 || iter.peek().is_none() {
+ update_out_and_freq(EncodedLength::Length(l), &mut out, &mut frequencies);
+ repeat = 0;
+ } else {
+ // If we have a zero, we start repeat at one instead of outputting, as
+ // there are separate codes for repeats of zero so we don't need a literal
+ // to define what byte to repeat.
+ repeat = 1;
+ }
+ }
+ } else {
+ // There haven't been enough repetitions of the previous value,
+ // so just we output the lengths directly.
+
+ // If we are at the end, and we have a value that is repeated, we need to
+ // skip a byte and output the last one.
+ let extra_skip = if iter.peek().is_none() && l == prev {
+ 1
+ } else {
+ 0
+ };
+
+ // Get to the position of the next byte to output by starting at zero and skipping.
+ let b_iter = lengths.clone().skip(n + extra_skip - repeat as usize);
+
+ // As repeats of zeroes have separate codes, we don't need to output a literal here
+ // if we have a zero (unless we are at the end).
+ let extra = if l != 0 || iter.peek().is_none() {
+ 1
+ } else {
+ 0
+ };
+
+ for &i in b_iter.take(repeat as usize + extra) {
+ update_out_and_freq(EncodedLength::Length(i), &mut out, &mut frequencies);
+ }
+
+ // If the current byte is zero we start repeat at 1 as we didn't output the literal
+ // directly.
+ repeat = 1 - extra as u8;
+ }
+ }
+ prev = l;
+ }
+}
+
+#[cfg(test)]
+pub fn huffman_lengths_from_frequency(frequencies: &[u16], max_len: usize) -> Vec<u8> {
+ in_place::gen_lengths(frequencies, max_len)
+}
+
+pub type LeafVec = Vec<in_place::Node>;
+
+/// Generate a set of canonical huffman lengths from the given frequencies, with a maximum length
+/// of `max_len`. The lengths are put in the lens slice parameter. Unused lengths are set to 0.
+///
+/// The leaf buffer is passed in to avoid allocating it every time this function is called.
+/// The existing data contained in it is not preserved.
+pub fn huffman_lengths_from_frequency_m(
+ frequencies: &[u16],
+ max_len: usize,
+ leaf_buffer: &mut LeafVec,
+ lens: &mut [u8],
+) {
+ in_place::in_place_lengths(frequencies, max_len, leaf_buffer, lens);
+}
+
+mod in_place {
+ type WeightType = u32;
+
+ pub fn validate_lengths(lengths: &[u8]) -> bool {
+ // Avoid issue with floating point on mips: https://github.com/oyvindln/deflate-rs/issues/23
+ if cfg!(any(target_arch = "mips", target_arch = "mipsel",
+ target_arch = "mips64", target_arch = "mipsel64")) {
+ true
+ } else {
+ let v = lengths.iter().fold(0f64, |acc, &n| {
+ acc + if n != 0 { 2f64.powi(-(n as i32)) } else { 0f64 }
+ });
+ !(v > 1.0)
+ }
+ }
+
+ #[derive(Eq, PartialEq, Debug)]
+ pub struct Node {
+ value: WeightType,
+ symbol: u16,
+ }
+
+ fn step_1(leaves: &mut [Node]) {
+ // If there are less than 2 non-zero frequencies, this function should not have been
+ // called and we should not have gotten to this point.
+ debug_assert!(leaves.len() >= 2);
+ let mut root = 0;
+ let mut leaf = 2;
+
+ leaves[0].value += leaves[1].value;
+
+ for next in 1..leaves.len() - 1 {
+ if (leaf >= leaves.len()) || (leaves[root].value < leaves[leaf].value) {
+ leaves[next].value = leaves[root].value;
+ leaves[root].value = next as WeightType;
+ root += 1;
+ } else {
+ leaves[next].value = leaves[leaf].value;
+ leaf += 1;
+ }
+
+ if (leaf >= leaves.len()) ||
+ (root < next && (leaves[root].value < leaves[leaf].value))
+ {
+ leaves[next].value += leaves[root].value;
+ leaves[root].value = next as WeightType;
+ root += 1;
+ } else {
+ leaves[next].value += leaves[leaf].value;
+ leaf += 1;
+ }
+ }
+ }
+
+ fn step_2(leaves: &mut [Node]) {
+ debug_assert!(leaves.len() >= 2);
+ let n = leaves.len();
+
+ leaves[n - 2].value = 0;
+ for t in (0..(n + 1 - 3)).rev() {
+ leaves[t].value = leaves[leaves[t].value as usize].value + 1;
+ }
+
+ let mut available = 1 as usize;
+ let mut used = 0;
+ let mut depth = 0;
+ let mut root = n as isize - 2;
+ let mut next = n as isize - 1;
+
+ while available > 0 {
+ while root >= 0 && leaves[root as usize].value == depth {
+ used += 1;
+ root -= 1;
+ }
+ while available > used {
+ leaves[next as usize].value = depth;
+ next -= 1;
+ available -= 1;
+ }
+ available = 2 * used;
+ depth += 1;
+ used = 0;
+ }
+ }
+
+ const MAX_NUMBER_OF_CODES: usize = 32;
+ const NUM_CODES_LENGTH: usize = MAX_NUMBER_OF_CODES + 1;
+
+ /// Checks if any of the lengths exceed `max_len`, and if that is the case, alters the length
+ /// table so that no codes exceed `max_len`.
+ /// This is ported from miniz (which is released as public domain by Rich Geldreich
+ /// https://github.com/richgel999/miniz/blob/master/miniz.c)
+ ///
+ /// This will not generate optimal (minimim-redundancy) codes, however in most cases
+ /// this won't make a large difference.
+ pub fn enforce_max_code_lengths(
+ num_codes: &mut [u16; NUM_CODES_LENGTH],
+ num_used: usize,
+ max_len: usize,
+ ) {
+ debug_assert!(max_len <= 15);
+
+ if num_used <= 1 {
+ return;
+ } else {
+ let mut num_above_max = 0u16;
+ for &l in num_codes[(max_len as usize + 1)..].iter() {
+ num_above_max += l;
+ }
+
+ num_codes[max_len] += num_above_max;
+
+ let mut total = 0u32;
+ for i in (1..max_len + 1).rev() {
+ // This should be safe as max_len won't be higher than 15, and num_codes[i] can't
+ // be higher than 288,
+ // and 288 << 15 will not be anywhere close to overflowing 32 bits
+ total += (num_codes[i] as u32) << (max_len - i);
+ }
+
+ // miniz uses unsigned long here. 32-bits should be sufficient though,
+ // as max_len won't be longer than 15 anyhow.
+ while total != 1u32 << max_len {
+ num_codes[max_len] -= 1;
+ for i in (1..max_len).rev() {
+ if num_codes[i] != 0 {
+ num_codes[i] -= 1;
+ num_codes[i + 1] += 2;
+ break;
+ }
+ }
+ total -= 1;
+ }
+ }
+ }
+
+
+ #[cfg(test)]
+ /// Convenience wrapper for tests.
+ pub fn gen_lengths(frequencies: &[u16], max_len: usize) -> Vec<u8> {
+ let mut lens = vec![0u8; frequencies.len()];
+ let mut leaves = Vec::new();
+ in_place_lengths(frequencies, max_len, &mut leaves, lens.as_mut_slice());
+ lens
+ }
+
+ /// Generate huffman code lengths, using the algorithm described by
+ /// Moffat and Katajainen in In-Place Calculation of Minimum-Redundancy Codes
+ /// http://people.eng.unimelb.edu.au/ammoffat/abstracts/mk95wads.html
+ /// and it's implementation.
+ ///
+ /// This is significantly faster, and seems to generally create lengths that result in length
+ /// tables that are better compressible than the algorithm used previously. The downside of this
+ /// algorithm is that it's not length-limited, so if too long code lengths are generated,
+ /// it might result in a sub-optimal tables as the length-restricting function isn't optimal.
+ pub fn in_place_lengths(
+ frequencies: &[u16],
+ max_len: usize,
+ mut leaves: &mut Vec<Node>,
+ lengths: &mut [u8],
+ ) {
+
+ debug_assert!(lengths.len() >= frequencies.len());
+
+ for l in lengths.iter_mut() {
+ *l = 0;
+ }
+
+ // Clear any previous leaves in the leaf buffer.
+ leaves.clear();
+
+ // Discard zero length nodes as they won't be given a code and thus don't need to
+ // participate in code length generation and create a new vec of the remaining
+ // symbols and weights.
+ leaves.extend(frequencies.iter().enumerate().filter_map(
+ |(n, f)| if *f > 0 {
+ Some(Node {
+ value: *f as WeightType,
+ symbol: n as u16,
+ })
+ } else {
+ None
+ },
+ ));
+
+ // Special cases with zero or 1 value having a non-zero frequency
+ if leaves.len() == 1 {
+ lengths[leaves[0].symbol as usize] = 1;
+ return;
+ } else if leaves.is_empty() {
+ return;
+ }
+
+ // Sort the leaves by value. As the sort in the standard library is stable, we don't
+ // have to worry about the symbol code here.
+ leaves.sort_by(|a, b| a.value.cmp(&b.value));
+
+ step_1(&mut leaves);
+ step_2(&mut leaves);
+
+ // Count how many codes of each length used, for usage in the next section.
+ let mut num_codes = [0u16; NUM_CODES_LENGTH];
+ for l in leaves.iter() {
+ num_codes[l.value as usize] += 1;
+ }
+
+ // As the algorithm used here doesn't limit the maximum length that can be generated
+ // we need to make sure none of the lengths exceed `max_len`
+ enforce_max_code_lengths(&mut num_codes, leaves.len(), max_len);
+
+ // Output the actual lengths
+ let mut leaf_it = leaves.iter().rev();
+ // Start at 1 since the length table is already filled with zeroes.
+ for (&n_codes, i) in num_codes[1..max_len + 1].iter().zip(1..(max_len as u8) + 1) {
+ for _ in 0..n_codes {
+ lengths[leaf_it.next().unwrap().symbol as usize] = i;
+ }
+ }
+
+ debug_assert_eq!(leaf_it.next(), None);
+ debug_assert!(
+ validate_lengths(lengths),
+ "The generated length codes were not valid!"
+ );
+ }
+
+
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use std::u16;
+ use huffman_table::NUM_LITERALS_AND_LENGTHS;
+
+ fn lit(value: u8) -> EncodedLength {
+ EncodedLength::Length(value)
+ }
+
+ fn zero(repeats: u8) -> EncodedLength {
+ match repeats {
+ 0...1 => EncodedLength::Length(0),
+ 2...10 => EncodedLength::RepeatZero3Bits(repeats),
+ _ => EncodedLength::RepeatZero7Bits(repeats),
+ }
+ }
+
+ fn copy(copies: u8) -> EncodedLength {
+ EncodedLength::CopyPrevious(copies)
+ }
+
+ #[test]
+ fn test_encode_lengths() {
+ use huffman_table::FIXED_CODE_LENGTHS;
+ let enc = encode_lengths(FIXED_CODE_LENGTHS.iter());
+ // There are no lengths lower than 6 in the fixed table
+ assert_eq!(enc.1[0..7], [0, 0, 0, 0, 0, 0, 0]);
+ // Neither are there any lengths above 9
+ assert_eq!(enc.1[10..16], [0, 0, 0, 0, 0, 0]);
+ // Also there are no zero-length codes so there shouldn't be any repetitions of zero
+ assert_eq!(enc.1[17..19], [0, 0]);
+
+ let test_lengths = [0, 0, 5, 0, 15, 1, 0, 0, 0, 2, 4, 4, 4, 4, 3, 5, 5, 5, 5];
+ let enc = encode_lengths(test_lengths.iter()).0;
+ assert_eq!(
+ enc,
+ vec![
+ lit(0),
+ lit(0),
+ lit(5),
+ lit(0),
+ lit(15),
+ lit(1),
+ zero(3),
+ lit(2),
+ lit(4),
+ copy(3),
+ lit(3),
+ lit(5),
+ copy(3),
+ ]
+ );
+ let test_lengths = [0, 0, 0, 5, 2, 3, 0, 0, 0];
+ let enc = encode_lengths(test_lengths.iter()).0;
+ assert_eq!(enc, vec![zero(3), lit(5), lit(2), lit(3), zero(3)]);
+
+ let test_lengths = [0, 0, 0, 3, 3, 3, 5, 4, 4, 4, 4, 0, 0];
+ let enc = encode_lengths(test_lengths.iter()).0;
+ assert_eq!(
+ enc,
+ vec![
+ zero(3),
+ lit(3),
+ lit(3),
+ lit(3),
+ lit(5),
+ lit(4),
+ copy(3),
+ lit(0),
+ lit(0),
+ ]
+ );
+
+
+ let lens = [
+ 0,
+ 0,
+ 4,
+ 0,
+ 0,
+ 4,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 4,
+ 4,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 3,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 4,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 4,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1,
+ 1,
+ ];
+
+ let _ = encode_lengths(lens.iter()).0;
+
+ let lens = [
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 9,
+ 0,
+ 0,
+ 9,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 6,
+ 0,
+ 0,
+ 0,
+ 8,
+ 0,
+ 0,
+ 0,
+ 0,
+ 8,
+ 0,
+ 0,
+ 7,
+ 8,
+ 7,
+ 8,
+ 6,
+ 6,
+ 8,
+ 0,
+ 7,
+ 6,
+ 7,
+ 8,
+ 7,
+ 7,
+ 8,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 8,
+ 8,
+ 0,
+ 8,
+ 7,
+ 0,
+ 10,
+ 8,
+ 0,
+ 8,
+ 0,
+ 10,
+ 10,
+ 8,
+ 8,
+ 10,
+ 8,
+ 0,
+ 8,
+ 7,
+ 0,
+ 10,
+ 0,
+ 7,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 6,
+ 7,
+ 7,
+ 7,
+ 6,
+ 7,
+ 8,
+ 8,
+ 6,
+ 0,
+ 0,
+ 8,
+ 8,
+ 7,
+ 8,
+ 8,
+ 0,
+ 7,
+ 6,
+ 6,
+ 8,
+ 8,
+ 8,
+ 10,
+ 10,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 10,
+ 4,
+ 3,
+ 3,
+ 4,
+ 4,
+ 5,
+ 5,
+ 5,
+ 5,
+ 5,
+ 8,
+ 8,
+ 6,
+ 7,
+ 8,
+ 10,
+ 10,
+ 0,
+ 9, /* litlen */
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 8,
+ 8,
+ 8,
+ 8,
+ 6,
+ 6,
+ 5,
+ 5,
+ 5,
+ 5,
+ 6,
+ 5,
+ 5,
+ 4,
+ 4,
+ 4,
+ 4,
+ 4,
+ 4,
+ 3,
+ 4,
+ 3,
+ 4,
+ ];
+
+ let enc = encode_lengths(lens.iter()).0;
+
+ assert_eq!(
+ &enc[..10],
+ &[
+ zero(10),
+ lit(9),
+ lit(0),
+ lit(0),
+ lit(9),
+ zero(18),
+ lit(6),
+ zero(3),
+ lit(8),
+ zero(4)
+ ]
+ );
+ assert_eq!(
+ &enc[10..20],
+ &[
+ lit(8),
+ lit(0),
+ lit(0),
+ lit(7),
+ lit(8),
+ lit(7),
+ lit(8),
+ lit(6),
+ lit(6),
+ lit(8)
+ ]
+ );
+
+ let enc = encode_lengths([1, 1, 1, 2].iter()).0;
+ assert_eq!(enc, vec![lit(1), lit(1), lit(1), lit(2)]);
+ let enc = encode_lengths([0, 0, 3].iter()).0;
+ assert_eq!(enc, vec![lit(0), lit(0), lit(3)]);
+ let enc = encode_lengths([0, 0, 0, 5, 2].iter()).0;
+ assert_eq!(enc, vec![zero(3), lit(5), lit(2)]);
+
+ let enc = encode_lengths([0, 0, 0, 5, 0].iter()).0;
+ assert!(*enc.last().unwrap() != lit(5));
+
+ let enc = encode_lengths([0, 4, 4, 4, 4, 0].iter()).0;
+ assert_eq!(*enc.last().unwrap(), zero(0));
+ }
+
+ #[test]
+ fn test_lengths_from_frequencies() {
+ let frequencies = [1, 1, 5, 7, 10, 14];
+
+ let expected = [4, 4, 3, 2, 2, 2];
+ let res = huffman_lengths_from_frequency(&frequencies, 4);
+
+ assert_eq!(expected, res.as_slice());
+
+ let frequencies = [1, 5, 1, 7, 10, 14];
+ let expected = [4, 3, 4, 2, 2, 2];
+
+ let res = huffman_lengths_from_frequency(&frequencies, 4);
+
+ assert_eq!(expected, res.as_slice());
+
+ let frequencies = [0, 25, 0, 10, 2, 4];
+
+ let res = huffman_lengths_from_frequency(&frequencies, 4);
+ assert_eq!(res[0], 0);
+ assert_eq!(res[2], 0);
+ assert!(res[1] < 4);
+
+ // Only one value
+ let frequencies = [0, 0, 0, 0, 0, 0, 0, 0, 55, 0, 0, 0];
+ let res = huffman_lengths_from_frequency(&frequencies, 5);
+ let expected = [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0];
+ assert_eq!(expected, res.as_slice());
+
+ // No values
+ let frequencies = [0; 30];
+ let res = huffman_lengths_from_frequency(&frequencies, 5);
+ for (a, b) in frequencies.iter().zip(res.iter()) {
+ assert_eq!(*a, (*b).into());
+ }
+ // assert_eq!(frequencies, res.as_slice());
+
+ let mut frequencies = vec![3; NUM_LITERALS_AND_LENGTHS];
+ frequencies[55] = u16::MAX / 3;
+ frequencies[125] = u16::MAX / 3;
+
+ let res = huffman_lengths_from_frequency(&frequencies, 15);
+ assert_eq!(res.len(), NUM_LITERALS_AND_LENGTHS);
+ assert!(res[55] < 3);
+ assert!(res[125] < 3);
+ }
+
+ #[test]
+ /// Test if the bit lengths for a set of frequencies are optimal (give the best compression
+ /// give the provided frequencies).
+ fn optimal_lengths() {
+ let freqs = [
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 44,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 68,
+ 0,
+ 14,
+ 0,
+ 0,
+ 0,
+ 0,
+ 3,
+ 7,
+ 6,
+ 1,
+ 0,
+ 12,
+ 14,
+ 9,
+ 2,
+ 6,
+ 9,
+ 4,
+ 1,
+ 1,
+ 4,
+ 1,
+ 1,
+ 0,
+ 0,
+ 1,
+ 3,
+ 0,
+ 6,
+ 0,
+ 0,
+ 0,
+ 4,
+ 4,
+ 1,
+ 2,
+ 5,
+ 3,
+ 2,
+ 2,
+ 9,
+ 0,
+ 0,
+ 3,
+ 1,
+ 5,
+ 5,
+ 8,
+ 0,
+ 6,
+ 10,
+ 5,
+ 2,
+ 0,
+ 0,
+ 1,
+ 2,
+ 0,
+ 8,
+ 11,
+ 4,
+ 0,
+ 1,
+ 3,
+ 31,
+ 13,
+ 23,
+ 22,
+ 56,
+ 22,
+ 8,
+ 11,
+ 43,
+ 0,
+ 7,
+ 33,
+ 15,
+ 45,
+ 40,
+ 16,
+ 1,
+ 28,
+ 37,
+ 35,
+ 26,
+ 3,
+ 7,
+ 11,
+ 9,
+ 1,
+ 1,
+ 0,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1,
+ 126,
+ 114,
+ 66,
+ 31,
+ 41,
+ 25,
+ 15,
+ 21,
+ 20,
+ 16,
+ 15,
+ 10,
+ 7,
+ 5,
+ 1,
+ 1,
+ ];
+
+
+ let lens = huffman_lengths_from_frequency(&freqs, 15);
+
+ // Lengths produced by miniz for this frequency table for comparison
+ // the number of total bits encoded with these huffman codes is 7701
+ // NOTE: There can be more than one set of optimal lengths for a set of frequencies,
+ // (though there may be a difference in how well the table itself can be represented)
+ // so testing for a specific length table is not ideal since different algorithms
+ // may produce different length tables.
+ // let lens3 = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // 0, 0, 0, 0, 0,
+ // 0, 0, 0, 0, 0, 0, 4, 0, 7, 0, 0, 0, 0, 9, 8, 8, 10, 0, 7, 7, 7, 10, 8, 7, 8,
+ // 10, 10, 8, 10, 10, 0, 0, 10, 9, 0, 8, 0, 0, 0, 8, 8, 10, 9, 8, 9, 9, 9, 7, 0,
+ // 0, 9, 10, 8, 8, 7, 0, 8, 7, 8, 9, 0, 0, 10, 9, 0, 7, 7, 8, 0, 10, 9, 6, 7, 6,
+ // 6, 5, 6, 7, 7, 5, 0, 8, 5, 7, 5, 5, 6, 10, 6, 5, 5, 6, 9, 8, 7, 7, 10, 10, 0,
+ // 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // 0, 0, 10, 4, 4, 4, 5, 5, 6, 7, 6, 6, 6, 6, 7, 8, 8, 10, 10];
+ //
+
+
+ let num_bits = lens.iter()
+ .zip(freqs.iter())
+ .fold(0, |a, (&f, &l)| a + (f as u16 * l));
+ assert_eq!(num_bits, 7701);
+ }
+}
diff --git a/third_party/rust/deflate/src/lib.rs b/third_party/rust/deflate/src/lib.rs
new file mode 100644
index 0000000000..13da49785b
--- /dev/null
+++ b/third_party/rust/deflate/src/lib.rs
@@ -0,0 +1,495 @@
+//! An implementation an encoder using [DEFLATE](http://www.gzip.org/zlib/rfc-deflate.html)
+//! compression algorightm in pure rust.
+//!
+//! This library provides functions to compress data using the DEFLATE algorithm,
+//! optionally wrapped using the [zlib](https://tools.ietf.org/html/rfc1950) or
+//! [gzip](http://www.gzip.org/zlib/rfc-gzip.html) formats.
+//! The current implementation is still a bit lacking speed-wise compared to C-libraries
+//! like zlib and miniz.
+//!
+//! The deflate algorithm is an older compression algorithm that is still widely used today,
+//! by e.g html headers, the `.png` inage format, the unix `gzip` program and commonly in `.zip`
+//! files. The `zlib` and `gzip` formats are wrappers around DEFLATE-compressed data, containing
+//! some extra metadata and a checksum to validate the integrity of the raw data.
+//!
+//! The deflate algorithm does not perform as well as newer algorhitms used in file formats such as
+//! `.7z`, `.rar`, `.xz` and `.bz2`, and is thus not the ideal choice for applications where
+//! the `DEFLATE` format (with or without wrappers) is not required.
+//!
+//! Support for the gzip wrapper (the wrapper that is used in `.gz` files) is disabled by default,
+//! but can be enabled with the `gzip` feature.
+//!
+//! As this library is still in development, the compression output may change slightly
+//! between versions.
+//!
+//!
+//! # Examples:
+//! ## Simple compression function:
+//! ``` rust
+//! use deflate::deflate_bytes;
+//!
+//! let data = b"Some data";
+//! let compressed = deflate_bytes(data);
+//! # let _ = compressed;
+//! ```
+//!
+//! ## Using a writer:
+//! ``` rust
+//! use std::io::Write;
+//!
+//! use deflate::Compression;
+//! use deflate::write::ZlibEncoder;
+//!
+//! let data = b"This is some test data";
+//! let mut encoder = ZlibEncoder::new(Vec::new(), Compression::Default);
+//! encoder.write_all(data).expect("Write error!");
+//! let compressed_data = encoder.finish().expect("Failed to finish compression!");
+//! # let _ = compressed_data;
+//! ```
+
+#![cfg_attr(all(feature = "benchmarks", test), feature(test))]
+
+#[cfg(all(test, feature = "benchmarks"))]
+extern crate test as test_std;
+
+#[cfg(test)]
+extern crate flate2;
+// #[cfg(test)]
+// extern crate inflate;
+
+extern crate adler32;
+extern crate byteorder;
+#[cfg(feature = "gzip")]
+extern crate gzip_header;
+
+mod compression_options;
+mod huffman_table;
+mod lz77;
+mod lzvalue;
+mod chained_hash_table;
+mod length_encode;
+mod output_writer;
+mod stored_block;
+mod huffman_lengths;
+mod zlib;
+mod checksum;
+mod bit_reverse;
+mod bitstream;
+mod encoder_state;
+mod matching;
+mod input_buffer;
+mod deflate_state;
+mod compress;
+mod rle;
+mod writer;
+#[cfg(test)]
+mod test_utils;
+
+use std::io::Write;
+use std::io;
+
+use byteorder::BigEndian;
+#[cfg(feature = "gzip")]
+use gzip_header::GzBuilder;
+#[cfg(feature = "gzip")]
+use gzip_header::Crc;
+#[cfg(feature = "gzip")]
+use byteorder::LittleEndian;
+
+use checksum::RollingChecksum;
+use deflate_state::DeflateState;
+
+pub use compression_options::{CompressionOptions, SpecialOptions, Compression};
+use compress::Flush;
+pub use lz77::MatchingType;
+
+use writer::compress_until_done;
+
+/// Encoders implementing a `Write` interface.
+pub mod write {
+ pub use writer::{DeflateEncoder, ZlibEncoder};
+ #[cfg(feature = "gzip")]
+ pub use writer::gzip::GzEncoder;
+}
+
+
+fn compress_data_dynamic<RC: RollingChecksum, W: Write>(
+ input: &[u8],
+ writer: &mut W,
+ mut checksum: RC,
+ compression_options: CompressionOptions,
+) -> io::Result<()> {
+ checksum.update_from_slice(input);
+ // We use a box here to avoid putting the buffers on the stack
+ // It's done here rather than in the structs themselves for now to
+ // keep the data close in memory.
+ let mut deflate_state = Box::new(DeflateState::new(compression_options, writer));
+ compress_until_done(input, &mut deflate_state, Flush::Finish)
+}
+
+/// Compress the given slice of bytes with DEFLATE compression.
+///
+/// Returns a `Vec<u8>` of the compressed data.
+///
+/// # Examples
+///
+/// ```
+/// use deflate::{deflate_bytes_conf, Compression};
+///
+/// let data = b"This is some test data";
+/// let compressed_data = deflate_bytes_conf(data, Compression::Best);
+/// # let _ = compressed_data;
+/// ```
+pub fn deflate_bytes_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> {
+ let mut writer = Vec::with_capacity(input.len() / 3);
+ compress_data_dynamic(
+ input,
+ &mut writer,
+ checksum::NoChecksum::new(),
+ options.into(),
+ ).expect("Write error!");
+ writer
+}
+
+/// Compress the given slice of bytes with DEFLATE compression using the default compression
+/// level.
+///
+/// Returns a `Vec<u8>` of the compressed data.
+///
+/// # Examples
+///
+/// ```
+/// use deflate::deflate_bytes;
+///
+/// let data = b"This is some test data";
+/// let compressed_data = deflate_bytes(data);
+/// # let _ = compressed_data;
+/// ```
+pub fn deflate_bytes(input: &[u8]) -> Vec<u8> {
+ deflate_bytes_conf(input, Compression::Default)
+}
+
+/// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer.
+///
+/// Returns a `Vec<u8>` of the compressed data.
+///
+/// Zlib dictionaries are not yet suppored.
+///
+/// # Examples
+///
+/// ```
+/// use deflate::{deflate_bytes_zlib_conf, Compression};
+///
+/// let data = b"This is some test data";
+/// let compressed_data = deflate_bytes_zlib_conf(data, Compression::Best);
+/// # let _ = compressed_data;
+/// ```
+pub fn deflate_bytes_zlib_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> {
+ use byteorder::WriteBytesExt;
+ let mut writer = Vec::with_capacity(input.len() / 3);
+ // Write header
+ zlib::write_zlib_header(&mut writer, zlib::CompressionLevel::Default)
+ .expect("Write error when writing zlib header!");
+
+ let mut checksum = checksum::Adler32Checksum::new();
+ compress_data_dynamic(input, &mut writer, &mut checksum, options.into())
+ .expect("Write error when writing compressed data!");
+
+ let hash = checksum.current_hash();
+
+ writer
+ .write_u32::<BigEndian>(hash)
+ .expect("Write error when writing checksum!");
+ writer
+}
+
+/// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer,
+/// using the default compression level.
+///
+/// Returns a Vec<u8> of the compressed data.
+///
+/// Zlib dictionaries are not yet suppored.
+///
+/// # Examples
+///
+/// ```
+/// use deflate::deflate_bytes_zlib;
+///
+/// let data = b"This is some test data";
+/// let compressed_data = deflate_bytes_zlib(data);
+/// # let _ = compressed_data;
+/// ```
+pub fn deflate_bytes_zlib(input: &[u8]) -> Vec<u8> {
+ deflate_bytes_zlib_conf(input, Compression::Default)
+}
+
+/// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer
+/// using the given gzip header and compression options.
+///
+/// Returns a `Vec<u8>` of the compressed data.
+///
+///
+/// # Examples
+///
+/// ```
+/// extern crate gzip_header;
+/// extern crate deflate;
+///
+/// # fn main() {
+/// use deflate::{deflate_bytes_gzip_conf, Compression};
+/// use gzip_header::GzBuilder;
+///
+/// let data = b"This is some test data";
+/// let compressed_data = deflate_bytes_gzip_conf(data, Compression::Best, GzBuilder::new());
+/// # let _ = compressed_data;
+/// # }
+/// ```
+#[cfg(feature = "gzip")]
+pub fn deflate_bytes_gzip_conf<O: Into<CompressionOptions>>(
+ input: &[u8],
+ options: O,
+ gzip_header: GzBuilder,
+) -> Vec<u8> {
+ use byteorder::WriteBytesExt;
+ let mut writer = Vec::with_capacity(input.len() / 3);
+
+ // Write header
+ writer
+ .write_all(&gzip_header.into_header())
+ .expect("Write error when writing header!");
+ let mut checksum = checksum::NoChecksum::new();
+ compress_data_dynamic(input, &mut writer, &mut checksum, options.into())
+ .expect("Write error when writing compressed data!");
+
+ let mut crc = Crc::new();
+ crc.update(input);
+
+ writer
+ .write_u32::<LittleEndian>(crc.sum())
+ .expect("Write error when writing checksum!");
+ writer
+ .write_u32::<LittleEndian>(crc.amt_as_u32())
+ .expect("Write error when writing amt!");
+ writer
+}
+
+/// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer,
+/// using the default compression level, and a gzip header with default values.
+///
+/// Returns a `Vec<u8>` of the compressed data.
+///
+///
+/// # Examples
+///
+/// ```
+/// use deflate::deflate_bytes_gzip;
+/// let data = b"This is some test data";
+/// let compressed_data = deflate_bytes_gzip(data);
+/// # let _ = compressed_data;
+/// ```
+#[cfg(feature = "gzip")]
+pub fn deflate_bytes_gzip(input: &[u8]) -> Vec<u8> {
+ deflate_bytes_gzip_conf(input, Compression::Default, GzBuilder::new())
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use std::io::Write;
+
+ use test_utils::{get_test_data, decompress_to_end, decompress_zlib};
+ #[cfg(feature = "gzip")]
+ use test_utils::decompress_gzip;
+
+ type CO = CompressionOptions;
+
+ /// Write data to the writer in chunks of chunk_size.
+ fn chunked_write<W: Write>(mut writer: W, data: &[u8], chunk_size: usize) {
+ for chunk in data.chunks(chunk_size) {
+ writer.write_all(&chunk).unwrap();
+ }
+ }
+
+ #[test]
+ fn dynamic_string_mem() {
+ let test_data = String::from(" GNU GENERAL PUBLIC LICENSE").into_bytes();
+ let compressed = deflate_bytes(&test_data);
+
+ assert!(compressed.len() < test_data.len());
+
+ let result = decompress_to_end(&compressed);
+ assert_eq!(test_data, result);
+ }
+
+ #[test]
+ fn dynamic_string_file() {
+ let input = get_test_data();
+ let compressed = deflate_bytes(&input);
+
+ let result = decompress_to_end(&compressed);
+ for (n, (&a, &b)) in input.iter().zip(result.iter()).enumerate() {
+ if a != b {
+ println!("First difference at {}, input: {}, output: {}", n, a, b);
+ println!(
+ "input: {:?}, output: {:?}",
+ &input[n - 3..n + 3],
+ &result[n - 3..n + 3]
+ );
+ break;
+ }
+ }
+ // Not using assert_eq here deliberately to avoid massive amounts of output spam
+ assert!(input == result);
+ // Check that we actually managed to compress the input
+ assert!(compressed.len() < input.len());
+ }
+
+ #[test]
+ fn file_rle() {
+ let input = get_test_data();
+ let compressed = deflate_bytes_conf(&input, CO::rle());
+
+ let result = decompress_to_end(&compressed);
+ assert!(input == result);
+ }
+
+ #[test]
+ fn file_zlib() {
+ let test_data = get_test_data();
+
+ let compressed = deflate_bytes_zlib(&test_data);
+ // {
+ // use std::fs::File;
+ // use std::io::Write;
+ // let mut f = File::create("out.zlib").unwrap();
+ // f.write_all(&compressed).unwrap();
+ // }
+
+ println!("file_zlib compressed(default) length: {}", compressed.len());
+
+ let result = decompress_zlib(&compressed);
+
+ assert!(&test_data == &result);
+ assert!(compressed.len() < test_data.len());
+ }
+
+ #[test]
+ fn zlib_short() {
+ let test_data = [10, 10, 10, 10, 10, 55];
+ roundtrip_zlib(&test_data, CO::default());
+ }
+
+ #[test]
+ fn zlib_last_block() {
+ let mut test_data = vec![22; 32768];
+ test_data.extend(&[5, 2, 55, 11, 12]);
+ roundtrip_zlib(&test_data, CO::default());
+ }
+
+ #[test]
+ fn deflate_short() {
+ let test_data = [10, 10, 10, 10, 10, 55];
+ let compressed = deflate_bytes(&test_data);
+
+ let result = decompress_to_end(&compressed);
+ assert_eq!(&test_data, result.as_slice());
+ // If block type and compression is selected correctly, this should only take 5 bytes.
+ assert_eq!(compressed.len(), 5);
+ }
+
+ #[cfg(feature = "gzip")]
+ #[test]
+ fn gzip() {
+ let data = get_test_data();
+ let comment = b"Test";
+ let compressed = deflate_bytes_gzip_conf(
+ &data,
+ Compression::Default,
+ GzBuilder::new().comment(&comment[..]),
+ );
+ let (dec, decompressed) = decompress_gzip(&compressed);
+ assert_eq!(dec.header().comment().unwrap(), comment);
+ assert!(data == decompressed);
+ }
+
+ fn chunk_test(chunk_size: usize, level: CompressionOptions) {
+ let mut compressed = Vec::with_capacity(32000);
+ let data = get_test_data();
+ {
+ let mut compressor = write::ZlibEncoder::new(&mut compressed, level);
+ chunked_write(&mut compressor, &data, chunk_size);
+ compressor.finish().unwrap();
+ }
+ let compressed2 = deflate_bytes_zlib_conf(&data, level);
+ let res = decompress_zlib(&compressed);
+ assert!(res == data);
+ assert_eq!(compressed.len(), compressed2.len());
+ assert!(compressed == compressed2);
+ }
+
+ fn writer_chunks_level(level: CompressionOptions) {
+ use input_buffer::BUFFER_SIZE;
+ let ct = |n| chunk_test(n, level);
+ ct(1);
+ ct(50);
+ ct(400);
+ ct(32768);
+ ct(BUFFER_SIZE);
+ ct(50000);
+ ct((32768 * 2) + 258);
+ }
+
+ #[ignore]
+ #[test]
+ /// Test the writer by inputing data in one chunk at the time.
+ fn zlib_writer_chunks() {
+ writer_chunks_level(CompressionOptions::default());
+ writer_chunks_level(CompressionOptions::fast());
+ writer_chunks_level(CompressionOptions::rle());
+ }
+
+ /// Check that the frequency values don't overflow.
+ #[test]
+ fn frequency_overflow() {
+ let _ = deflate_bytes_conf(
+ &vec![5; 100000],
+ compression_options::CompressionOptions::default(),
+ );
+ }
+
+ fn roundtrip_zlib(data: &[u8], level: CompressionOptions) {
+ let compressed = deflate_bytes_zlib_conf(data, level);
+ let res = decompress_zlib(&compressed);
+ if data.len() <= 32 {
+ assert_eq!(res, data, "Failed with level: {:?}", level);
+ } else {
+ assert!(res == data, "Failed with level: {:?}", level);
+ }
+ }
+
+ fn check_zero(level: CompressionOptions) {
+ roundtrip_zlib(&[], level);
+ }
+
+ /// Compress with an empty slice.
+ #[test]
+ fn empty_input() {
+ check_zero(CompressionOptions::default());
+ check_zero(CompressionOptions::fast());
+ check_zero(CompressionOptions::rle());
+ }
+
+ #[test]
+ fn one_and_two_values() {
+ let one = &[1][..];
+ roundtrip_zlib(one, CO::rle());
+ roundtrip_zlib(one, CO::fast());
+ roundtrip_zlib(one, CO::default());
+ let two = &[5, 6, 7, 8][..];
+ roundtrip_zlib(two, CO::rle());
+ roundtrip_zlib(two, CO::fast());
+ roundtrip_zlib(two, CO::default());
+ }
+
+
+}
diff --git a/third_party/rust/deflate/src/lz77.rs b/third_party/rust/deflate/src/lz77.rs
new file mode 100644
index 0000000000..36ebdbeeea
--- /dev/null
+++ b/third_party/rust/deflate/src/lz77.rs
@@ -0,0 +1,1304 @@
+//! This module contains functionality for doing lz77 compression of data.
+#![macro_use]
+use std::cmp;
+use std::ops::{Range, RangeFrom};
+use std::iter::{self, Iterator};
+use std::slice::Iter;
+use std::fmt;
+
+use input_buffer::InputBuffer;
+use matching::longest_match;
+#[cfg(test)]
+use lzvalue::{LZValue, LZType};
+use huffman_table;
+use chained_hash_table::{ChainedHashTable, update_hash};
+#[cfg(test)]
+use compression_options::{HIGH_MAX_HASH_CHECKS, HIGH_LAZY_IF_LESS_THAN};
+use output_writer::{BufferStatus, DynamicWriter};
+use compress::Flush;
+use rle::process_chunk_greedy_rle;
+
+const MAX_MATCH: usize = huffman_table::MAX_MATCH as usize;
+const MIN_MATCH: usize = huffman_table::MIN_MATCH as usize;
+
+const NO_RLE: u16 = 43212;
+
+/// An enum describing whether we use lazy or greedy matching.
+#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
+pub enum MatchingType {
+ /// Use greedy matching: the matching algorithm simply uses a match right away
+ /// if found.
+ Greedy,
+ /// Use lazy matching: after finding a match, the next input byte is checked, to see
+ /// if there is a better match starting at that byte.
+ ///
+ /// As a special case, if max_hash_checks is set to 0, compression using only run-length
+ /// (i.e maximum match distance of 1) is performed instead.
+ Lazy,
+}
+
+impl fmt::Display for MatchingType {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match *self {
+ MatchingType::Greedy => write!(f, "Greedy matching"),
+ MatchingType::Lazy => write!(f, "Lazy matching"),
+ }
+ }
+}
+
+/// A struct that contains the hash table, and keeps track of where we are in the input data
+pub struct LZ77State {
+ /// Struct containing hash chains that will be used to find matches.
+ hash_table: ChainedHashTable,
+ /// True if this is the first window that is being processed.
+ is_first_window: bool,
+ /// Set to true when the last block has been processed.
+ is_last_block: bool,
+ /// How many bytes the last match in the previous window extended into the current one.
+ overlap: usize,
+ /// How many bytes of input the current block contains.
+ current_block_input_bytes: u64,
+ /// The maximum number of hash entries to search.
+ max_hash_checks: u16,
+ /// Only lazy match if we have a match length less than this.
+ lazy_if_less_than: u16,
+ /// Whether to use greedy or lazy parsing
+ matching_type: MatchingType,
+ /// Keep track of the previous match and byte in case the buffer is full when lazy matching.
+ match_state: ChunkState,
+ /// Keep track of how many bytes in the lookahead that was part of a match, but has not been
+ /// added to the hash chain yet.
+ bytes_to_hash: usize,
+ /// Keep track of if sync flush was used. If this is the case, the two first bytes needs to be
+ /// hashed.
+ was_synced: bool,
+}
+
+impl LZ77State {
+ /// Creates a new LZ77 state
+ pub fn new(
+ max_hash_checks: u16,
+ lazy_if_less_than: u16,
+ matching_type: MatchingType,
+ ) -> LZ77State {
+ LZ77State {
+ hash_table: ChainedHashTable::new(),
+ is_first_window: true,
+ is_last_block: false,
+ overlap: 0,
+ current_block_input_bytes: 0,
+ max_hash_checks: max_hash_checks,
+ lazy_if_less_than: lazy_if_less_than,
+ matching_type: matching_type,
+ match_state: ChunkState::new(),
+ bytes_to_hash: 0,
+ was_synced: false,
+ }
+ }
+
+ /// Resets the state excluding max_hash_checks and lazy_if_less_than
+ pub fn reset(&mut self) {
+ self.hash_table.reset();
+ self.is_first_window = true;
+ self.is_last_block = false;
+ self.overlap = 0;
+ self.current_block_input_bytes = 0;
+ self.match_state = ChunkState::new();
+ self.bytes_to_hash = 0
+ }
+
+ pub fn set_last(&mut self) {
+ self.is_last_block = true;
+ }
+
+ /// Is this the last block we are outputting?
+ pub fn is_last_block(&self) -> bool {
+ self.is_last_block
+ }
+
+ /// How many bytes of input the current block contains.
+ pub fn current_block_input_bytes(&self) -> u64 {
+ self.current_block_input_bytes
+ }
+
+ /// Sets the number of input bytes for the current block to 0.
+ pub fn reset_input_bytes(&mut self) {
+ self.current_block_input_bytes = 0;
+ }
+
+ /// Is there a buffered byte that has not been output yet?
+ pub fn pending_byte(&self) -> bool {
+ self.match_state.add
+ }
+
+ /// Returns 1 if pending_byte is true, 0 otherwise.
+ pub fn pending_byte_as_num(&self) -> usize {
+ // This could be implemented by using `as usize` as the documentation states this would give
+ // the same result, but not sure if that should be relied upon.
+ if self.match_state.add {
+ 1
+ } else {
+ 0
+ }
+ }
+}
+
+const DEFAULT_WINDOW_SIZE: usize = 32768;
+
+#[derive(Debug)]
+/// Status after calling `process_chunk`.
+pub enum ProcessStatus {
+ /// All the input data was processed.
+ Ok,
+ /// The output buffer was full.
+ ///
+ /// The argument is the position of the next byte to be checked by `process_chunk`.
+ BufferFull(usize),
+}
+
+#[derive(Debug)]
+/// A struct to keep track of status between calls of `process_chunk_lazy`
+///
+/// This is needed as the output buffer might become full before having output all pending data.
+pub struct ChunkState {
+ /// Length of the last match that was found, if any.
+ current_length: u16,
+ /// Distance of the last match that was found, if any.
+ current_distance: u16,
+ /// The previous byte checked in process_chunk.
+ prev_byte: u8,
+ /// The current byte being checked in process_chunk.
+ cur_byte: u8,
+ /// Whether prev_byte still needs to be output.
+ add: bool,
+}
+
+impl ChunkState {
+ pub fn new() -> ChunkState {
+ ChunkState {
+ current_length: 0,
+ current_distance: 0,
+ prev_byte: 0,
+ cur_byte: 0,
+ add: false,
+ }
+ }
+}
+
+pub fn buffer_full(position: usize) -> ProcessStatus {
+ ProcessStatus::BufferFull(position)
+}
+
+fn process_chunk(
+ data: &[u8],
+ iterated_data: &Range<usize>,
+ mut match_state: &mut ChunkState,
+ hash_table: &mut ChainedHashTable,
+ writer: &mut DynamicWriter,
+ max_hash_checks: u16,
+ lazy_if_less_than: usize,
+ matching_type: MatchingType,
+) -> (usize, ProcessStatus) {
+ let avoid_rle = if cfg!(test) {
+ // Avoid RLE if lazy_if_less than is a specific value.
+ // This is used in some tests, ideally we should probably do this in a less clunky way,
+ // but we use a value here that is higher than the maximum sensible one anyhow, and will
+ // be truncated by deflate_state for calls from outside the library.
+ lazy_if_less_than == NO_RLE as usize
+ } else {
+ false
+ };
+ match matching_type {
+ MatchingType::Greedy => {
+ process_chunk_greedy(data, iterated_data, hash_table, writer, max_hash_checks)
+ }
+ MatchingType::Lazy => {
+ if max_hash_checks > 0 || avoid_rle {
+ process_chunk_lazy(
+ data,
+ iterated_data,
+ &mut match_state,
+ hash_table,
+ writer,
+ max_hash_checks,
+ lazy_if_less_than,
+ )
+ } else {
+ // Use the RLE method if max_hash_checks is set to 0.
+ process_chunk_greedy_rle(data, iterated_data, writer)
+ }
+ }
+ }
+}
+
+/// Add the specified number of bytes to the hash table from the iterators
+/// adding `start` to the position supplied to the hash table.
+fn add_to_hash_table(
+ bytes_to_add: usize,
+ insert_it: &mut iter::Zip<RangeFrom<usize>, Iter<u8>>,
+ hash_it: &mut Iter<u8>,
+ hash_table: &mut ChainedHashTable,
+) {
+ let taker = insert_it.by_ref().take(bytes_to_add);
+ let mut hash_taker = hash_it.by_ref().take(bytes_to_add);
+ // Update the hash manually here to keep it in a register.
+ let mut hash = hash_table.current_hash();
+ // Advance the iterators and add the bytes we jump over to the hash table and
+ // checksum
+ for (ipos, _) in taker {
+ if let Some(&i_hash_byte) = hash_taker.next() {
+ hash = update_hash(hash, i_hash_byte);
+ hash_table.add_with_hash(ipos, hash);
+ }
+ }
+ // Write the hash back once we are done.
+ hash_table.set_hash(hash);
+}
+
+/// Write the specified literal `byte` to the writer `w`, and return
+/// `ProcessStatus::BufferFull($pos)` if the buffer is full after writing.
+///
+/// `pos` should indicate the byte to start at in the next call to `process_chunk`,
+/// `is_hashed` should be set to true of the byte at pos has been added to the hash chain.
+macro_rules! write_literal{
+ ($w:ident, $byte:expr, $pos:expr) => {
+ let b_status = $w.write_literal($byte);
+
+ if let BufferStatus::Full = b_status {
+ return (0, buffer_full($pos));
+ }
+ };
+}
+
+/// If the match is only 3 bytes long and the distance is more than 8 * 1024, it's likely to take
+/// up more space than it would save.
+#[inline]
+fn match_too_far(match_len: usize, match_dist: usize) -> bool {
+ const TOO_FAR: usize = 8 * 1024;
+ match_len == MIN_MATCH && match_dist > TOO_FAR
+}
+
+///Create the iterators used when processing through a chunk of data.
+fn create_iterators<'a>(
+ data: &'a [u8],
+ iterated_data: &Range<usize>,
+) -> (
+ usize,
+ iter::Zip<RangeFrom<usize>, Iter<'a, u8>>,
+ Iter<'a, u8>,
+) {
+ let end = cmp::min(data.len(), iterated_data.end);
+ let start = iterated_data.start;
+ let current_chunk = &data[start..end];
+
+ let insert_it = (start..).zip(current_chunk.iter());
+ let hash_it = {
+ let hash_start = if data.len() - start > 2 {
+ start + 2
+ } else {
+ data.len()
+ };
+ (&data[hash_start..]).iter()
+ };
+ (end, insert_it, hash_it)
+}
+
+fn process_chunk_lazy(
+ data: &[u8],
+ iterated_data: &Range<usize>,
+ state: &mut ChunkState,
+ mut hash_table: &mut ChainedHashTable,
+ writer: &mut DynamicWriter,
+ max_hash_checks: u16,
+ lazy_if_less_than: usize,
+) -> (usize, ProcessStatus) {
+
+ let (end, mut insert_it, mut hash_it) = create_iterators(data, iterated_data);
+
+ const NO_LENGTH: u16 = 0;
+
+ // The previous match length, if any.
+ let mut prev_length = state.current_length;
+ // The distance of the previous match if any.
+ let mut prev_distance = state.current_distance;
+
+ state.current_length = 0;
+ state.current_distance = 0;
+
+ // The number of bytes past end that was added due to finding a match that extends into
+ // the lookahead window.
+ let mut overlap = 0;
+
+
+ // Set to true if we found a match that is equal to or longer than `lazy_if_less_than`,
+ // indicating that we won't lazy match (check for a better match at the next byte).
+ // If we had a good match, carry this over from the previous call.
+ let mut ignore_next = prev_length as usize >= lazy_if_less_than;
+
+ // This is to output the correct byte in case there is one pending to be output
+ // from the previous call.
+ state.prev_byte = state.cur_byte;
+
+ // Iterate through the slice, adding literals or length/distance pairs
+ while let Some((position, &b)) = insert_it.next() {
+ state.cur_byte = b;
+ if let Some(&hash_byte) = hash_it.next() {
+ hash_table.add_hash_value(position, hash_byte);
+
+ // Only lazy match if we have a match shorter than a set value
+ // TODO: This should be cleaned up a bit
+ if !ignore_next {
+ let (mut match_len, match_dist) = {
+ // If there already was a decent match at the previous byte
+ // and we are lazy matching, do less match checks in this step.
+ let max_hash_checks = if prev_length >= 32 {
+ max_hash_checks >> 2
+ } else {
+ max_hash_checks
+ };
+
+ // Check if we can find a better match here than the one we had at
+ // the previous byte.
+ longest_match(
+ data,
+ hash_table,
+ position,
+ prev_length as usize,
+ max_hash_checks,
+ )
+ };
+
+ // If the match is only 3 bytes long and very far back, it's probably not worth
+ // outputting.
+ if match_too_far(match_len, match_dist) {
+ match_len = NO_LENGTH as usize;
+ };
+
+ if match_len >= lazy_if_less_than {
+ // We found a decent match, so we won't check for a better one at the next byte.
+ ignore_next = true;
+ }
+ state.current_length = match_len as u16;
+ state.current_distance = match_dist as u16;
+ } else {
+ // We already had a decent match, so we don't bother checking for another one.
+ state.current_length = NO_LENGTH;
+ state.current_distance = 0;
+ // Make sure we check again next time.
+ ignore_next = false;
+ };
+
+ if prev_length >= state.current_length && prev_length >= MIN_MATCH as u16 {
+ // The previous match was better so we add it.
+ // Casting note: length and distance is already bounded by the longest match
+ // function. Usize is just used for convenience.
+ let b_status =
+ writer.write_length_distance(prev_length as u16, prev_distance as u16);
+
+ // We add the bytes to the hash table and checksum.
+ // Since we've already added two of them, we need to add two less than
+ // the length.
+ let bytes_to_add = prev_length - 2;
+
+ add_to_hash_table(
+ bytes_to_add as usize,
+ &mut insert_it,
+ &mut hash_it,
+ &mut hash_table,
+ );
+
+ // If the match is longer than the current window, we have note how many
+ // bytes we overlap, since we don't need to do any matching on these bytes
+ // in the next call of this function.
+ // We don't have to worry about setting overlap to 0 if this is false, as the
+ // function will stop after this condition is true, and overlap is not altered
+ // elsewhere.
+ if position + prev_length as usize > end {
+ // We need to subtract 1 since the byte at pos is also included.
+ overlap = position + prev_length as usize - end - 1;
+ };
+
+ state.add = false;
+
+ // Note that there is no current match.
+ state.current_length = 0;
+ state.current_distance = 0;
+
+ if let BufferStatus::Full = b_status {
+ // MATCH(lazy)
+ return (overlap, buffer_full(position + prev_length as usize - 1));
+ }
+
+ ignore_next = false;
+
+ } else if state.add {
+ // We found a better match (or there was no previous match)
+ // so output the previous byte.
+ // BETTER OR NO MATCH
+ write_literal!(writer, state.prev_byte, position + 1);
+ } else {
+ state.add = true
+ }
+
+ prev_length = state.current_length;
+ prev_distance = state.current_distance;
+ state.prev_byte = b;
+ } else {
+ // If there is a match at this point, it will not have been added, so we need to add it.
+ if prev_length >= MIN_MATCH as u16 {
+ let b_status =
+ writer.write_length_distance(prev_length as u16, prev_distance as u16);
+
+ state.current_length = 0;
+ state.current_distance = 0;
+ state.add = false;
+
+ // As this will be a 3-length match at the end of the input data, there can't be any
+ // overlap.
+ // TODO: Not sure if we need to signal that the buffer is full here.
+ // It's only needed in the case of syncing.
+ if let BufferStatus::Full = b_status {
+ // TODO: These bytes should be hashed when doing a sync flush.
+ // This can't be done here as the new input data does not exist yet.
+ return (0, buffer_full(end));
+ } else {
+ return (0, ProcessStatus::Ok);
+ }
+ };
+
+ if state.add {
+ // We may still have a leftover byte at this point, so we add it here if needed.
+ state.add = false;
+
+ // ADD
+ write_literal!(writer, state.prev_byte, position + 1);
+
+ };
+
+ // We are at the last two bytes we want to add, so there is no point
+ // searching for matches here.
+
+ // AFTER ADD
+ write_literal!(writer, b, position + 1);
+ }
+ }
+ (overlap, ProcessStatus::Ok)
+}
+
+fn process_chunk_greedy(
+ data: &[u8],
+ iterated_data: &Range<usize>,
+ mut hash_table: &mut ChainedHashTable,
+ writer: &mut DynamicWriter,
+ max_hash_checks: u16,
+) -> (usize, ProcessStatus) {
+
+ let (end, mut insert_it, mut hash_it) = create_iterators(data, iterated_data);
+
+ const NO_LENGTH: usize = 0;
+
+ // The number of bytes past end that was added due to finding a match that extends into
+ // the lookahead window.
+ let mut overlap = 0;
+
+ // Iterate through the slice, adding literals or length/distance pairs.
+ while let Some((position, &b)) = insert_it.next() {
+ if let Some(&hash_byte) = hash_it.next() {
+ hash_table.add_hash_value(position, hash_byte);
+
+ // TODO: This should be cleaned up a bit.
+ let (match_len, match_dist) =
+ { longest_match(data, hash_table, position, NO_LENGTH, max_hash_checks) };
+
+ if match_len >= MIN_MATCH as usize && !match_too_far(match_len, match_dist) {
+ // Casting note: length and distance is already bounded by the longest match
+ // function. Usize is just used for convenience.
+ let b_status = writer.write_length_distance(match_len as u16, match_dist as u16);
+
+ // We add the bytes to the hash table and checksum.
+ // Since we've already added one of them, we need to add one less than
+ // the length.
+ let bytes_to_add = match_len - 1;
+ add_to_hash_table(
+ bytes_to_add,
+ &mut insert_it,
+ &mut hash_it,
+ &mut hash_table,
+ );
+
+ // If the match is longer than the current window, we have note how many
+ // bytes we overlap, since we don't need to do any matching on these bytes
+ // in the next call of this function.
+ if position + match_len > end {
+ // We need to subtract 1 since the byte at pos is also included.
+ overlap = position + match_len - end;
+ };
+
+ if let BufferStatus::Full = b_status {
+ // MATCH
+ return (overlap, buffer_full(position + match_len));
+ }
+
+ } else {
+ // NO MATCH
+ write_literal!(writer, b, position + 1);
+ }
+ } else {
+ // We are at the last two bytes we want to add, so there is no point
+ // searching for matches here.
+ // END
+ write_literal!(writer, b, position + 1);
+ }
+ }
+ (overlap, ProcessStatus::Ok)
+}
+
+
+#[derive(Eq, PartialEq, Clone, Copy, Debug)]
+pub enum LZ77Status {
+ /// Waiting for more input before doing any processing
+ NeedInput,
+ /// The output buffer is full, so the current block needs to be ended so the
+ /// buffer can be flushed.
+ EndBlock,
+ /// All pending data has been processed.
+ Finished,
+}
+
+#[cfg(test)]
+pub fn lz77_compress_block_finish(
+ data: &[u8],
+ state: &mut LZ77State,
+ buffer: &mut InputBuffer,
+ mut writer: &mut DynamicWriter,
+) -> (usize, LZ77Status) {
+ let (consumed, status, _) =
+ lz77_compress_block(data, state, buffer, &mut writer, Flush::Finish);
+ (consumed, status)
+}
+
+/// Compress a slice with lz77 compression.
+///
+/// This function processes one window at a time, and returns when there is no input left,
+/// or it determines it's time to end a block.
+///
+/// Returns the number of bytes of the input that were consumed, a status describing
+/// whether there is no input, it's time to finish, or it's time to end the block, and the position
+/// of the first byte in the input buffer that has not been output (but may have been checked for
+/// matches).
+pub fn lz77_compress_block(
+ data: &[u8],
+ state: &mut LZ77State,
+ buffer: &mut InputBuffer,
+ mut writer: &mut DynamicWriter,
+ flush: Flush,
+) -> (usize, LZ77Status, usize) {
+ // Currently we only support the maximum window size
+ let window_size = DEFAULT_WINDOW_SIZE;
+
+ // Indicates whether we should try to process all the data including the lookahead, or if we
+ // should wait until we have at least one window size of data before doing anything.
+ let finish = flush == Flush::Finish || flush == Flush::Sync;
+ let sync = flush == Flush::Sync;
+
+ let mut current_position = 0;
+
+ // The current status of the encoding.
+ let mut status = LZ77Status::EndBlock;
+
+ // Whether warm up the hash chain with the two first values.
+ let mut add_initial = true;
+
+ // If we have synced, add the two first bytes to the hash as they couldn't be added before.
+ if state.was_synced {
+ if buffer.current_end() > 2 {
+ let pos_add = buffer.current_end() - 2;
+ for (n, &b) in data.iter().take(2).enumerate() {
+ state.hash_table.add_hash_value(n + pos_add, b);
+ }
+ add_initial = false;
+ }
+ state.was_synced = false;
+ }
+
+ // Add data to the input buffer and keep a reference to the slice of data not added yet.
+ let mut remaining_data = buffer.add_data(data);
+
+ loop {
+ // Note if there is a pending byte from the previous call to process_chunk,
+ // so we get the block input size right.
+ let pending_previous = state.pending_byte_as_num();
+
+ assert!(writer.buffer_length() <= (window_size * 2));
+ // The process is a bit different for the first 32k bytes.
+ // TODO: There is a lot of duplicate code between the two branches here, we should be able
+ // to simplify this.
+ if state.is_first_window {
+ // Don't do anything until we are either flushing, or we have at least one window of
+ // data.
+ if buffer.current_end() >= (window_size * 2) + MAX_MATCH || finish {
+
+ if buffer.get_buffer().len() >= 2 && add_initial &&
+ state.current_block_input_bytes == 0
+ {
+ let b = buffer.get_buffer();
+ // Warm up the hash with the two first values, so we can find matches at
+ // index 0.
+ state.hash_table.add_initial_hash_values(b[0], b[1]);
+ add_initial = false;
+ }
+
+ let first_chunk_end = cmp::min(window_size, buffer.current_end());
+
+ let start = state.overlap;
+
+ let (overlap, p_status) = process_chunk(
+ buffer.get_buffer(),
+ &(start..first_chunk_end),
+ &mut state.match_state,
+ &mut state.hash_table,
+ &mut writer,
+ state.max_hash_checks,
+ state.lazy_if_less_than as usize,
+ state.matching_type,
+ );
+
+ state.overlap = overlap;
+ state.bytes_to_hash = overlap;
+
+ // If the buffer is full, we want to end the block.
+ if let ProcessStatus::BufferFull(written) = p_status {
+ state.overlap = if overlap > 0 { overlap } else { written };
+ status = LZ77Status::EndBlock;
+ current_position = written - state.pending_byte_as_num();
+ state.current_block_input_bytes +=
+ (written - start + overlap + pending_previous -
+ state.pending_byte_as_num()) as u64;
+ break;
+ }
+
+
+ // Update the length of how many input bytes the current block is representing,
+ // taking into account pending bytes.
+ state.current_block_input_bytes +=
+ (first_chunk_end - start + overlap + pending_previous -
+ state.pending_byte_as_num()) as u64;
+
+ // We are at the first window so we don't need to slide the hash table yet.
+ // If finishing or syncing, we stop here.
+ if first_chunk_end >= buffer.current_end() && finish {
+ current_position = first_chunk_end - state.pending_byte_as_num();
+ if !sync {
+ state.set_last();
+ state.is_first_window = false;
+ } else {
+ state.overlap = first_chunk_end;
+ state.was_synced = true;
+ }
+ debug_assert!(
+ !state.pending_byte(),
+ "Bug! Ended compression wit a pending byte!"
+ );
+ status = LZ77Status::Finished;
+ break;
+ }
+ // Otherwise, continue.
+ state.is_first_window = false;
+ } else {
+ status = LZ77Status::NeedInput;
+ break;
+ }
+ } else if buffer.current_end() >= (window_size * 2) + MAX_MATCH || finish {
+ if buffer.current_end() >= window_size + 2 {
+ for (n, &h) in buffer.get_buffer()[window_size + 2..]
+ .iter()
+ .enumerate()
+ .take(state.bytes_to_hash)
+ {
+ state.hash_table.add_hash_value(window_size + n, h);
+ }
+ state.bytes_to_hash = 0;
+ }
+ // This isn't the first chunk, so we start reading at one window in in the
+ // buffer plus any additional overlap from earlier.
+ let start = window_size + state.overlap;
+
+ // Determine where we have to stop iterating to slide the buffer and hash,
+ // or stop because we are at the end of the input data.
+ let end = cmp::min(window_size * 2, buffer.current_end());
+
+ let (overlap, p_status) = process_chunk(
+ buffer.get_buffer(),
+ &(start..end),
+ &mut state.match_state,
+ &mut state.hash_table,
+ &mut writer,
+ state.max_hash_checks,
+ state.lazy_if_less_than as usize,
+ state.matching_type,
+ );
+
+ state.bytes_to_hash = overlap;
+
+
+ if let ProcessStatus::BufferFull(written) = p_status {
+ state.current_block_input_bytes +=
+ (written - start + pending_previous - state.pending_byte_as_num()) as u64;
+
+ // If the buffer is full, return and end the block.
+ // If overlap is non-zero, the buffer was full after outputting the last byte,
+ // otherwise we have to skip to the point in the buffer where we stopped in the
+ // next call.
+ state.overlap = if overlap > 0 {
+ // If we are at the end of the window, make sure we slide the buffer and the
+ // hash table.
+ if state.max_hash_checks > 0 {
+ state.hash_table.slide(window_size);
+ }
+ remaining_data = buffer.slide(remaining_data.unwrap_or(&[]));
+ overlap
+ } else {
+ written - window_size
+ };
+
+ current_position = written - state.pending_byte_as_num();
+
+
+ // Status is already EndBlock at this point.
+ // status = LZ77Status::EndBlock;
+ break;
+ }
+
+ state.current_block_input_bytes +=
+ (end - start + overlap + pending_previous - state.pending_byte_as_num()) as u64;
+
+ // The buffer is not full, but we still need to note if there is any overlap into the
+ // next window.
+ state.overlap = overlap;
+
+ if remaining_data.is_none() && finish && end == buffer.current_end() {
+ current_position = buffer.current_end();
+ debug_assert!(
+ !state.pending_byte(),
+ "Bug! Ended compression wit a pending byte!"
+ );
+
+ // We stopped before or at the window size, so we are at the end.
+ if !sync {
+ // If we are finishing and not syncing, we simply indicate that we are done.
+ state.set_last();
+ } else {
+ // For sync flushing we need to slide the buffer and the hash chains so that the
+ // next call to this function starts at the right place.
+
+ // There won't be any overlap, since when syncing, we process to the end of the.
+ // pending data.
+ state.overlap = buffer.current_end() - window_size;
+ state.was_synced = true;
+ }
+ status = LZ77Status::Finished;
+ break;
+ } else {
+ // We are not at the end, so slide and continue.
+ // We slide the hash table back to make space for new hash values
+ // We only need to remember 2^15 bytes back (the maximum distance allowed by the
+ // deflate spec).
+ if state.max_hash_checks > 0 {
+ state.hash_table.slide(window_size);
+ }
+
+ // Also slide the buffer, discarding data we no longer need and adding new data.
+ remaining_data = buffer.slide(remaining_data.unwrap_or(&[]));
+ }
+ } else {
+ // The caller has not indicated that they want to finish or flush, and there is less
+ // than a window + lookahead of new data, so we wait for more.
+ status = LZ77Status::NeedInput;
+ break;
+ }
+
+ }
+
+ (
+ data.len() - remaining_data.unwrap_or(&[]).len(),
+ status,
+ current_position,
+ )
+}
+
+#[cfg(test)]
+pub fn decompress_lz77(input: &[LZValue]) -> Vec<u8> {
+ decompress_lz77_with_backbuffer(input, &[])
+}
+
+#[cfg(test)]
+pub fn decompress_lz77_with_backbuffer(input: &[LZValue], back_buffer: &[u8]) -> Vec<u8> {
+ let mut output = Vec::new();
+ for p in input {
+ match p.value() {
+ LZType::Literal(l) => output.push(l),
+ LZType::StoredLengthDistance(l, d) => {
+ // We found a match, so we have to get the data that the match refers to.
+ let d = d as usize;
+ let prev_output_len = output.len();
+ // Get match data from the back buffer if the match extends that far.
+ let consumed = if d > output.len() {
+ let into_back_buffer = d - output.len();
+
+ assert!(
+ into_back_buffer <= back_buffer.len(),
+ "ERROR: Attempted to refer to a match in non-existing data!\
+ into_back_buffer: {}, back_buffer len {}, d {}, l {:?}",
+ into_back_buffer,
+ back_buffer.len(),
+ d,
+ l
+ );
+ let start = back_buffer.len() - into_back_buffer;
+ let end = cmp::min(back_buffer.len(), start + l.actual_length() as usize);
+ output.extend_from_slice(&back_buffer[start..end]);
+
+ end - start
+ } else {
+ 0
+ };
+
+ // Get match data from the currently decompressed data.
+ let start = prev_output_len.saturating_sub(d);
+ let mut n = 0;
+ while n < (l.actual_length() as usize).saturating_sub(consumed) {
+ let b = output[start + n];
+ output.push(b);
+ n += 1;
+ }
+ }
+ }
+ }
+ output
+}
+
+#[cfg(test)]
+pub struct TestStruct {
+ state: LZ77State,
+ buffer: InputBuffer,
+ writer: DynamicWriter,
+}
+
+#[cfg(test)]
+impl TestStruct {
+ fn new() -> TestStruct {
+ TestStruct::with_config(
+ HIGH_MAX_HASH_CHECKS,
+ HIGH_LAZY_IF_LESS_THAN,
+ MatchingType::Lazy,
+ )
+ }
+
+ fn with_config(
+ max_hash_checks: u16,
+ lazy_if_less_than: u16,
+ matching_type: MatchingType,
+ ) -> TestStruct {
+ TestStruct {
+ state: LZ77State::new(max_hash_checks, lazy_if_less_than, matching_type),
+ buffer: InputBuffer::empty(),
+ writer: DynamicWriter::new(),
+ }
+ }
+
+ fn compress_block(&mut self, data: &[u8], flush: bool) -> (usize, LZ77Status, usize) {
+ lz77_compress_block(
+ data,
+ &mut self.state,
+ &mut self.buffer,
+ &mut self.writer,
+ if flush { Flush::Finish } else { Flush::None },
+ )
+ }
+}
+
+#[cfg(test)]
+pub fn lz77_compress(data: &[u8]) -> Option<Vec<LZValue>> {
+ lz77_compress_conf(
+ data,
+ HIGH_MAX_HASH_CHECKS,
+ HIGH_LAZY_IF_LESS_THAN,
+ MatchingType::Lazy,
+ )
+}
+
+/// Compress a slice, not storing frequency information
+///
+/// This is a convenience function for compression with fixed huffman values
+/// Only used in tests for now
+#[cfg(test)]
+pub fn lz77_compress_conf(
+ data: &[u8],
+ max_hash_checks: u16,
+ lazy_if_less_than: u16,
+ matching_type: MatchingType,
+) -> Option<Vec<LZValue>> {
+ let mut test_boxed = Box::new(TestStruct::with_config(
+ max_hash_checks,
+ lazy_if_less_than,
+ matching_type,
+ ));
+ let mut out = Vec::<LZValue>::with_capacity(data.len() / 3);
+ {
+ let test = test_boxed.as_mut();
+ let mut slice = data;
+
+ while !test.state.is_last_block {
+ let bytes_written = lz77_compress_block_finish(
+ slice,
+ &mut test.state,
+ &mut test.buffer,
+ &mut test.writer,
+ ).0;
+ slice = &slice[bytes_written..];
+ out.extend(test.writer.get_buffer());
+ test.writer.clear();
+ }
+
+ }
+
+ Some(out)
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ use lzvalue::{LZValue, LZType, lit, ld};
+ use chained_hash_table::WINDOW_SIZE;
+ use compression_options::DEFAULT_LAZY_IF_LESS_THAN;
+ use test_utils::get_test_data;
+ use output_writer::MAX_BUFFER_LENGTH;
+
+ /// Helper function to print the output from the lz77 compression function
+ fn print_output(input: &[LZValue]) {
+ let mut output = vec![];
+ for l in input {
+ match l.value() {
+ LZType::Literal(l) => output.push(l),
+ LZType::StoredLengthDistance(l, d) => {
+ output.extend(format!("<L {}>", l.actual_length()).into_bytes());
+ output.extend(format!("<D {}>", d).into_bytes())
+ }
+ }
+ }
+
+ println!("\"{}\"", String::from_utf8(output).unwrap());
+ }
+
+ /// Test that a short string from an example on SO compresses correctly
+ #[test]
+ fn compress_short() {
+ let test_bytes = String::from("Deflate late").into_bytes();
+ let res = lz77_compress(&test_bytes).unwrap();
+
+ let decompressed = decompress_lz77(&res);
+
+ assert_eq!(test_bytes, decompressed);
+ assert_eq!(*res.last().unwrap(), LZValue::length_distance(4, 5));
+ }
+
+ /// Test that compression is working for a longer file
+ #[test]
+ fn compress_long() {
+ let input = get_test_data();
+ let compressed = lz77_compress(&input).unwrap();
+ assert!(compressed.len() < input.len());
+
+ let decompressed = decompress_lz77(&compressed);
+ println!("compress_long length: {}", input.len());
+
+ // This is to check where the compression fails, if it were to
+ for (n, (&a, &b)) in input.iter().zip(decompressed.iter()).enumerate() {
+ if a != b {
+ println!("First difference at {}, input: {}, output: {}", n, a, b);
+ break;
+ }
+ }
+ assert_eq!(input.len(), decompressed.len());
+ assert!(&decompressed == &input);
+ }
+
+ /// Check that lazy matching is working as intended
+ #[test]
+ fn lazy() {
+ // We want to match on `badger` rather than `nba` as it is longer
+ // let data = b" nba nbadg badger nbadger";
+ let data = b"nba badger nbadger";
+ let compressed = lz77_compress(data).unwrap();
+ let test = compressed[compressed.len() - 1];
+ if let LZType::StoredLengthDistance(l, _) = test.value() {
+ assert_eq!(l.actual_length(), 6);
+ } else {
+ print_output(&compressed);
+ panic!();
+ }
+ }
+
+ fn roundtrip(data: &[u8]) {
+ let compressed = super::lz77_compress(&data).unwrap();
+ let decompressed = decompress_lz77(&compressed);
+ assert!(decompressed == data);
+ }
+
+ // Check that data with the exact window size is working properly
+ #[test]
+ #[allow(unused)]
+ fn exact_window_size() {
+ use std::io::Write;
+ let mut data = vec![0; WINDOW_SIZE];
+ roundtrip(&data);
+ {
+ data.write(&[22; WINDOW_SIZE]);
+ }
+ roundtrip(&data);
+ {
+ data.write(&[55; WINDOW_SIZE]);
+ }
+ roundtrip(&data);
+ }
+
+ /// Test that matches at the window border are working correctly
+ #[test]
+ fn border() {
+ use chained_hash_table::WINDOW_SIZE;
+ let mut data = vec![35; WINDOW_SIZE];
+ data.extend(b"Test");
+ let compressed = super::lz77_compress(&data).unwrap();
+ assert!(compressed.len() < data.len());
+ let decompressed = decompress_lz77(&compressed);
+
+ assert_eq!(decompressed.len(), data.len());
+ assert!(decompressed == data);
+ }
+
+ #[test]
+ fn border_multiple_blocks() {
+ use chained_hash_table::WINDOW_SIZE;
+ let mut data = vec![0; (WINDOW_SIZE * 2) + 50];
+ data.push(1);
+ let compressed = super::lz77_compress(&data).unwrap();
+ assert!(compressed.len() < data.len());
+ let decompressed = decompress_lz77(&compressed);
+ assert_eq!(decompressed.len(), data.len());
+ assert!(decompressed == data);
+ }
+
+ #[test]
+ fn compress_block_status() {
+ use input_buffer::InputBuffer;
+
+ let data = b"Test data data";
+ let mut writer = DynamicWriter::new();
+
+ let mut buffer = InputBuffer::empty();
+ let mut state = LZ77State::new(4096, DEFAULT_LAZY_IF_LESS_THAN, MatchingType::Lazy);
+ let status = lz77_compress_block_finish(data, &mut state, &mut buffer, &mut writer);
+ assert_eq!(status.1, LZ77Status::Finished);
+ assert!(&buffer.get_buffer()[..data.len()] == data);
+ assert_eq!(buffer.current_end(), data.len());
+ }
+
+ #[test]
+ fn compress_block_multiple_windows() {
+ use input_buffer::InputBuffer;
+ use output_writer::DynamicWriter;
+
+ let data = get_test_data();
+ assert!(data.len() > (WINDOW_SIZE * 2) + super::MAX_MATCH);
+ let mut writer = DynamicWriter::new();
+
+ let mut buffer = InputBuffer::empty();
+ let mut state = LZ77State::new(0, DEFAULT_LAZY_IF_LESS_THAN, MatchingType::Lazy);
+ let (bytes_consumed, status) =
+ lz77_compress_block_finish(&data, &mut state, &mut buffer, &mut writer);
+ assert_eq!(
+ buffer.get_buffer().len(),
+ (WINDOW_SIZE * 2) + super::MAX_MATCH
+ );
+ assert_eq!(status, LZ77Status::EndBlock);
+ let buf_len = buffer.get_buffer().len();
+ assert!(buffer.get_buffer()[..] == data[..buf_len]);
+
+ writer.clear();
+ let (_, status) = lz77_compress_block_finish(
+ &data[bytes_consumed..],
+ &mut state,
+ &mut buffer,
+ &mut writer,
+ );
+ assert_eq!(status, LZ77Status::EndBlock);
+
+ }
+
+ #[test]
+ fn multiple_inputs() {
+ let data = b"Badger badger bababa test data 25 asfgestghresjkgh";
+ let comp1 = lz77_compress(data).unwrap();
+ let comp2 = {
+ const SPLIT: usize = 25;
+ let first_part = &data[..SPLIT];
+ let second_part = &data[SPLIT..];
+ let mut state = TestStruct::new();
+ let (bytes_written, status, _) = state.compress_block(first_part, false);
+ assert_eq!(bytes_written, first_part.len());
+ assert_eq!(status, LZ77Status::NeedInput);
+ let (bytes_written, status, _) = state.compress_block(second_part, true);
+ assert_eq!(bytes_written, second_part.len());
+ assert_eq!(status, LZ77Status::Finished);
+ Vec::from(state.writer.get_buffer())
+ };
+ assert!(comp1 == comp2);
+ }
+
+
+ #[test]
+ /// Test that the exit from process_chunk when buffer is full is working correctly.
+ fn buffer_fill() {
+ let data = get_test_data();
+ // The comments above these calls refers the positions with the
+ // corersponding comments in process_chunk_{greedy/lazy}.
+ // POS BETTER OR NO MATCH
+ buffer_test_literals(&data);
+ // POS END
+ // POS NO MATCH
+ buffer_test_last_bytes(MatchingType::Greedy, &data);
+ // POS ADD
+ // POS AFTER ADD
+ buffer_test_last_bytes(MatchingType::Lazy, &data);
+
+ // POS MATCH
+ buffer_test_match(MatchingType::Greedy);
+ // POS MATCH(lazy)
+ buffer_test_match(MatchingType::Lazy);
+
+ // POS END
+ buffer_test_add_end(&data);
+ }
+
+ /// Test buffer fill when a byte is added due to no match being found.
+ fn buffer_test_literals(data: &[u8]) {
+ let mut state = TestStruct::with_config(0, NO_RLE, MatchingType::Lazy);
+ let (bytes_consumed, status, position) = state.compress_block(&data, false);
+
+ // There should be enough data for the block to have ended.
+ assert_eq!(status, LZ77Status::EndBlock);
+ assert!(bytes_consumed <= (WINDOW_SIZE * 2) + MAX_MATCH);
+
+ // The buffer should be full.
+ assert_eq!(state.writer.get_buffer().len(), MAX_BUFFER_LENGTH);
+ assert_eq!(position, state.writer.get_buffer().len());
+ // Since all literals have been input, the block should have the exact number of litlens
+ // as there were input bytes.
+ assert_eq!(
+ state.state.current_block_input_bytes() as usize,
+ MAX_BUFFER_LENGTH
+ );
+ state.state.reset_input_bytes();
+
+ let mut out = decompress_lz77(state.writer.get_buffer());
+
+ state.writer.clear();
+ // The buffer should now be cleared.
+ assert_eq!(state.writer.get_buffer().len(), 0);
+
+ assert!(data[..out.len()] == out[..]);
+
+ let _ = state.compress_block(&data[bytes_consumed..], false);
+ // We should have some new data in the buffer at this point.
+ assert!(state.writer.get_buffer().len() > 0);
+ assert_eq!(
+ state.state.current_block_input_bytes() as usize,
+ MAX_BUFFER_LENGTH
+ );
+
+ out.extend_from_slice(&decompress_lz77(state.writer.get_buffer()));
+ assert!(data[..out.len()] == out[..]);
+ }
+
+ /// Test buffer fill at the last two bytes that are not hashed.
+ fn buffer_test_last_bytes(matching_type: MatchingType, data: &[u8]) {
+ const BYTES_USED: usize = MAX_BUFFER_LENGTH;
+ assert!(
+ &data[..BYTES_USED] ==
+ &decompress_lz77(&lz77_compress_conf(
+ &data[..BYTES_USED],
+ 0,
+ NO_RLE,
+ matching_type,
+ ).unwrap())
+ [..]
+ );
+ assert!(
+ &data[..BYTES_USED + 1] ==
+ &decompress_lz77(&lz77_compress_conf(
+ &data[..BYTES_USED + 1],
+ 0,
+ NO_RLE,
+ matching_type,
+ ).unwrap())
+ [..]
+ );
+ }
+
+ /// Test buffer fill when buffer is full at a match.
+ fn buffer_test_match(matching_type: MatchingType) {
+ // TODO: Also test this for the second block to make sure
+ // buffer is slid.
+ let mut state = TestStruct::with_config(1, 0, matching_type);
+ for _ in 0..MAX_BUFFER_LENGTH - 4 {
+ assert!(state.writer.write_literal(0) == BufferStatus::NotFull);
+ }
+ state.compress_block(&[1, 2, 3, 1, 2, 3, 4], true);
+ assert!(*state.writer.get_buffer().last().unwrap() == LZValue::length_distance(3, 3));
+
+ }
+
+ /// Test buffer fill for the lazy match algorithm when adding a pending byte at the end.
+ fn buffer_test_add_end(_data: &[u8]) {
+ // This is disabled while the buffer size has not been stabilized.
+ /*
+ let mut state = TestStruct::with_config(DEFAULT_MAX_HASH_CHECKS,
+ DEFAULT_LAZY_IF_LESS_THAN,
+ MatchingType::Lazy);
+ // For the test file, this is how much data needs to be added to get the buffer
+ // full at the right spot to test that this buffer full exit is workong correctly.
+ for i in 0..31743 {
+ assert!(state.writer.write_literal(0) == BufferStatus::NotFull, "Buffer pos: {}", i);
+ }
+
+ let dec = decompress_lz77(&state.writer.get_buffer()[pos..]);
+ assert!(dec.len() > 0);
+ assert!(dec[..] == data[..dec.len()]);
+ */
+ }
+
+ /// Check that decompressing lz77-data that refers to the back-buffer works.
+ #[test]
+ fn test_decompress_with_backbuffer() {
+ let bb = vec![5; WINDOW_SIZE];
+ let lz_compressed = [lit(2), lit(4), ld(4, 20), lit(1), lit(1), ld(5, 10)];
+ let dec = decompress_lz77_with_backbuffer(&lz_compressed, &bb);
+
+ // ------------l2 l4 <-ld4,20-> l1 l1 <---ld5,10-->
+ assert!(dec == [2, 4, 5, 5, 5, 5, 1, 1, 5, 5, 2, 4, 5]);
+ }
+
+}
+
+#[cfg(all(test, feature = "benchmarks"))]
+mod bench {
+ use test_std::Bencher;
+ use test_utils::get_test_data;
+ use super::lz77_compress;
+ #[bench]
+ fn test_file_zlib_lz77_only(b: &mut Bencher) {
+ let test_data = get_test_data();
+ b.iter(|| lz77_compress(&test_data));
+ }
+}
diff --git a/third_party/rust/deflate/src/lzvalue.rs b/third_party/rust/deflate/src/lzvalue.rs
new file mode 100644
index 0000000000..acbbde8b20
--- /dev/null
+++ b/third_party/rust/deflate/src/lzvalue.rs
@@ -0,0 +1,121 @@
+use huffman_table::{MAX_DISTANCE, MIN_MATCH};
+#[cfg(test)]
+use huffman_table::MAX_MATCH;
+
+#[derive(Copy, Clone, Eq, PartialEq, Debug)]
+pub struct StoredLength {
+ length: u8,
+}
+
+impl StoredLength {
+ #[cfg(test)]
+ pub fn from_actual_length(length: u16) -> StoredLength {
+ assert!(length <= MAX_MATCH && length >= MIN_MATCH);
+ StoredLength {
+ length: (length - MIN_MATCH) as u8,
+ }
+ }
+
+ pub fn new(stored_length: u8) -> StoredLength {
+ StoredLength {
+ length: stored_length,
+ }
+ }
+
+ pub fn stored_length(&self) -> u8 {
+ self.length
+ }
+
+ #[cfg(test)]
+ pub fn actual_length(&self) -> u16 {
+ u16::from(self.length) + MIN_MATCH
+ }
+}
+
+#[derive(Copy, Clone, Eq, PartialEq, Debug)]
+pub enum LZType {
+ Literal(u8),
+ StoredLengthDistance(StoredLength, u16),
+}
+
+#[derive(Copy, Clone, Eq, PartialEq, Debug)]
+pub struct LZValue {
+ litlen: u8,
+ distance: u16,
+}
+
+impl LZValue {
+ #[inline]
+ pub fn literal(value: u8) -> LZValue {
+ LZValue {
+ litlen: value,
+ distance: 0,
+ }
+ }
+
+ #[inline]
+ pub fn length_distance(length: u16, distance: u16) -> LZValue {
+ debug_assert!(distance > 0 && distance <= MAX_DISTANCE);
+ let stored_length = (length - MIN_MATCH) as u8;
+ LZValue {
+ litlen: stored_length,
+ distance: distance,
+ }
+ }
+
+ #[inline]
+ pub fn value(&self) -> LZType {
+ if self.distance != 0 {
+ LZType::StoredLengthDistance(StoredLength::new(self.litlen), self.distance)
+ } else {
+ LZType::Literal(self.litlen)
+ }
+ }
+}
+
+#[cfg(test)]
+pub fn lit(l: u8) -> LZValue {
+ LZValue::literal(l)
+}
+
+#[cfg(test)]
+pub fn ld(l: u16, d: u16) -> LZValue {
+ LZValue::length_distance(l, d)
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use huffman_table::{MIN_MATCH, MIN_DISTANCE, MAX_MATCH, MAX_DISTANCE};
+ #[test]
+ fn lzvalue() {
+ for i in 0..255 as usize + 1 {
+ let v = LZValue::literal(i as u8);
+ if let LZType::Literal(n) = v.value() {
+ assert_eq!(n as usize, i);
+ } else {
+ panic!();
+ }
+ }
+
+ for i in MIN_MATCH..MAX_MATCH + 1 {
+ let v = LZValue::length_distance(i, 5);
+ if let LZType::StoredLengthDistance(l, _) = v.value() {
+ assert_eq!(l.actual_length(), i);
+ } else {
+ panic!();
+ }
+ }
+
+ for i in MIN_DISTANCE..MAX_DISTANCE + 1 {
+ let v = LZValue::length_distance(5, i);
+
+ if let LZType::StoredLengthDistance(_, d) = v.value() {
+ assert_eq!(d, i);
+ } else {
+ panic!("Failed to get distance {}", i);
+ }
+ }
+
+ }
+}
diff --git a/third_party/rust/deflate/src/matching.rs b/third_party/rust/deflate/src/matching.rs
new file mode 100644
index 0000000000..6cdcf3211a
--- /dev/null
+++ b/third_party/rust/deflate/src/matching.rs
@@ -0,0 +1,425 @@
+use std::cmp;
+
+use chained_hash_table::{ChainedHashTable, WINDOW_SIZE};
+use huffman_table;
+
+const MAX_MATCH: usize = huffman_table::MAX_MATCH as usize;
+#[cfg(test)]
+const MIN_MATCH: usize = huffman_table::MIN_MATCH as usize;
+
+
+/// Get the length of the checked match
+/// The function returns number of bytes at and including `current_pos` that are the same as the
+/// ones at `pos_to_check`
+#[inline]
+pub fn get_match_length(data: &[u8], current_pos: usize, pos_to_check: usize) -> usize {
+ // Unsafe version using unaligned loads for comparison.
+ // Faster when benching the matching function alone,
+ // but not as significant when running the full thing.
+/*
+ type Comp = u64;
+
+ use std::mem::size_of;
+
+ let max = cmp::min(data.len() - current_pos, MAX_MATCH);
+ let mut left = max;
+ let s = size_of::<Comp>();
+
+ unsafe {
+ let mut cur = data.as_ptr().offset(current_pos as isize);
+ let mut tc = data.as_ptr().offset(pos_to_check as isize);
+ while left >= s &&
+ (*(cur as *const Comp) == *(tc as *const Comp)) {
+ left -= s;
+ cur = cur.offset(s as isize);
+ tc = tc.offset(s as isize);
+ }
+ while left > 0 && *cur == *tc {
+ left -= 1;
+ cur = cur.offset(1);
+ tc = tc.offset(1);
+ }
+ }
+
+ max - left
+*/
+
+ // Slightly faster than naive in single bench.
+ // Does not use unaligned loads.
+ // let l = cmp::min(MAX_MATCH, data.len() - current_pos);
+
+ // let a = unsafe{&data.get_unchecked(current_pos..current_pos + l)};
+ // let b = unsafe{&data.get_unchecked(pos_to_check..)};
+
+ // let mut len = 0;
+
+ // for (l, r) in a
+ // .iter()
+ // .zip(b.iter()) {
+ // if *l == *r {
+ // len += 1;
+ // continue;
+ // } else {
+ // break;
+ // }
+ // }
+ // len as usize
+
+ // Naive version
+ data[current_pos..]
+ .iter()
+ .zip(data[pos_to_check..].iter())
+ .take(MAX_MATCH)
+ .take_while(|&(&a, &b)| a == b)
+ .count()
+}
+
+/// Try finding the position and length of the longest match in the input data.
+/// # Returns
+/// (length, distance from position)
+/// If no match is found that was better than `prev_length` or at all, or we are at the start,
+/// the length value returned will be 2.
+///
+/// # Arguments:
+/// `data`: The data to search in.
+/// `hash_table`: Hash table to use for searching.
+/// `position`: The position in the data to match against.
+/// `prev_length`: The length of the previous `longest_match` check to compare against.
+/// `max_hash_checks`: The maximum number of matching hash chain positions to check.
+pub fn longest_match(
+ data: &[u8],
+ hash_table: &ChainedHashTable,
+ position: usize,
+ prev_length: usize,
+ max_hash_checks: u16,
+) -> (usize, usize) {
+
+ // debug_assert_eq!(position, hash_table.current_head() as usize);
+
+ // If we already have a match at the maximum length,
+ // or we can't grow further, we stop here.
+ if prev_length >= MAX_MATCH || position + prev_length >= data.len() {
+ return (0, 0);
+ }
+
+ let limit = if position > WINDOW_SIZE {
+ position - WINDOW_SIZE
+ } else {
+ 0
+ };
+
+ // Make sure the length is at least one to simplify the matching code, as
+ // otherwise the matching code might underflow.
+ let prev_length = cmp::max(prev_length, 1);
+
+ let max_length = cmp::min(data.len() - position, MAX_MATCH);
+
+ // The position in the hash chain we are currently checking.
+ let mut current_head = position;
+
+ // The best match length we've found so far, and it's distance.
+ let mut best_length = prev_length;
+ let mut best_distance = 0;
+
+ // The position of the previous value in the hash chain.
+ let mut prev_head;
+
+ for _ in 0..max_hash_checks {
+ prev_head = current_head;
+ current_head = hash_table.get_prev(current_head) as usize;
+ if current_head >= prev_head || current_head < limit {
+ // If the current hash chain value refers to itself, or is referring to
+ // a value that's higher (we only move backwars through the chain),
+ // we are at the end and can stop.
+ break;
+ }
+
+ // We only check further if the match length can actually increase
+ // Checking if the end byte and the potential next byte matches is generally
+ // more likely to give a quick answer rather than checking from the start first, given
+ // that the hashes match.
+ // If there is no previous match, best_length will be 1 and the two first bytes will
+ // be checked instead.
+ // Since we've made sure best_length is always at least 1, this shouldn't underflow.
+ if data[position + best_length - 1..position + best_length + 1] ==
+ data[current_head + best_length - 1..current_head + best_length + 1]
+ {
+ // Actually check how many bytes match.
+ // At the moment this will check the two bytes we just checked again,
+ // though adding code for skipping these bytes may not result in any speed
+ // gain due to the added complexity.
+ let length = get_match_length(data, position, current_head);
+ if length > best_length {
+ best_length = length;
+ best_distance = position - current_head;
+ if length == max_length {
+ // We are at the max length, so there is no point
+ // searching any longer
+ break;
+ }
+ }
+ }
+ }
+
+ if best_length > prev_length {
+ (best_length, best_distance)
+ } else {
+ (0, 0)
+ }
+}
+
+/// Try finding the position and length of the longest match in the input data using fast zlib
+/// hash skipping algorithm.
+/// # Returns
+/// (length, distance from position)
+/// If no match is found that was better than `prev_length` or at all, or we are at the start,
+/// the length value returned will be 2.
+///
+/// # Arguments:
+/// `data`: The data to search in.
+/// `hash_table`: Hash table to use for searching.
+/// `position`: The position in the data to match against.
+/// `prev_length`: The length of the previous `longest_match` check to compare against.
+/// `max_hash_checks`: The maximum number of matching hash chain positions to check.
+#[cfg(test)]
+pub fn longest_match_fast(
+ data: &[u8],
+ hash_table: &ChainedHashTable,
+ position: usize,
+ prev_length: usize,
+ max_hash_checks: u16,
+) -> (usize, usize) {
+
+ // debug_assert_eq!(position, hash_table.current_head() as usize);
+
+ // If we already have a match at the maximum length,
+ // or we can't grow further, we stop here.
+ if prev_length >= MAX_MATCH || position + prev_length >= data.len() {
+ return (0, 0);
+ }
+
+ let limit = if position > WINDOW_SIZE {
+ position - WINDOW_SIZE
+ } else {
+ 0
+ };
+
+ // Make sure the length is at least one to simplify the matching code, as
+ // otherwise the matching code might underflow.
+ let prev_length = cmp::max(prev_length, 1);
+
+ let max_length = cmp::min((data.len() - position), MAX_MATCH);
+
+ // The position in the hash chain we are currently checking.
+ let mut current_head = position;
+
+ // The best match length we've found so far, and it's distance.
+ let mut best_length = prev_length;
+ let mut best_distance = 0;
+ // The offset from the start of the match of the hash chain we are traversing.
+ let mut offset = 0;
+
+ // The position of the previous value in the hash chain.
+ let mut prev_head;
+
+ for _ in 0..max_hash_checks {
+ prev_head = current_head;
+ current_head = hash_table.get_prev(current_head) as usize;
+ if current_head >= prev_head || current_head < limit + offset {
+ // If the current hash chain value refers to itself, or is referring to
+ // a value that's higher (we only move backwars through the chain),
+ // we are at the end and can stop.
+ break;
+ }
+
+ let offset_head = current_head - offset;
+
+ // We only check further if the match length can actually increase
+ // Checking if the end byte and the potential next byte matches is generally
+ // more likely to give a quick answer rather than checking from the start first, given
+ // that the hashes match.
+ // If there is no previous match, best_length will be 1 and the two first bytes will
+ // be checked instead.
+ // Since we've made sure best_length is always at least 1, this shouldn't underflow.
+ if data[position + best_length - 1..position + best_length + 1] ==
+ data[offset_head + best_length - 1..offset_head + best_length + 1]
+ {
+ // Actually check how many bytes match.
+ // At the moment this will check the two bytes we just checked again,
+ // though adding code for skipping these bytes may not result in any speed
+ // gain due to the added complexity.
+ let length = get_match_length(data, position, offset_head);
+ if length > best_length {
+ best_length = length;
+ best_distance = position - offset_head;
+ if length == max_length {
+ // We are at the max length, so there is no point
+ // searching any longer
+ break;
+ }
+
+ // Find the position in the match where the next has position is the furthest away.
+ // By moving to a different hash chain we can potentially skip a lot of checks,
+ // saving time.
+ // We avoid doing this for matches that extend past the starting position, as
+ // those will contain positions that are not in the hash table yet.
+ if best_distance > best_length {
+ offset = hash_table.farthest_next(offset_head, length);
+ current_head = offset_head + offset;
+ }
+ }
+ }
+ }
+
+ if best_length > prev_length {
+ (best_length, best_distance)
+ } else {
+ (0, 0)
+ }
+}
+
+// Get the longest match from the current position of the hash table.
+#[inline]
+#[cfg(test)]
+pub fn longest_match_current(data: &[u8], hash_table: &ChainedHashTable) -> (usize, usize) {
+ use compression_options::MAX_HASH_CHECKS;
+ longest_match(
+ data,
+ hash_table,
+ hash_table.current_head() as usize,
+ MIN_MATCH as usize - 1,
+ MAX_HASH_CHECKS,
+ )
+}
+
+#[cfg(test)]
+mod test {
+ use chained_hash_table::{filled_hash_table, HASH_BYTES, ChainedHashTable};
+ use super::{get_match_length, longest_match, longest_match_fast};
+
+ /// Test that match lengths are calculated correctly
+ #[test]
+ fn match_length() {
+ let test_arr = [5u8, 5, 5, 5, 5, 9, 9, 2, 3, 5, 5, 5, 5, 5];
+ let l = get_match_length(&test_arr, 9, 0);
+ assert_eq!(l, 5);
+ let l2 = get_match_length(&test_arr, 9, 7);
+ assert_eq!(l2, 0);
+ let l3 = get_match_length(&test_arr, 10, 0);
+ assert_eq!(l3, 4);
+ }
+
+ /// Test that we get the longest of the matches
+ #[test]
+ fn get_longest_match() {
+ let test_data = b"xTest data, Test_data,zTest data";
+ let hash_table = filled_hash_table(&test_data[..23 + 1 + HASH_BYTES - 1]);
+
+ let (length, distance) = super::longest_match_current(test_data, &hash_table);
+
+ // We check that we get the longest match, rather than the shorter, but closer one.
+ assert_eq!(distance, 22);
+ assert_eq!(length, 9);
+ let test_arr2 = [
+ 10u8,
+ 10,
+ 10,
+ 10,
+ 10,
+ 10,
+ 10,
+ 10,
+ 2,
+ 3,
+ 5,
+ 10,
+ 10,
+ 10,
+ 10,
+ 10,
+ ];
+ let hash_table = filled_hash_table(&test_arr2[..HASH_BYTES + 1 + 1 + 2]);
+ let (length, distance) = super::longest_match_current(&test_arr2, &hash_table);
+
+ assert_eq!(distance, 1);
+ assert_eq!(length, 4);
+ }
+
+ /// Make sure we can get a match at index zero
+ #[test]
+ fn match_index_zero() {
+ let test_data = b"AAAAAAA";
+
+ let mut hash_table = ChainedHashTable::from_starting_values(test_data[0], test_data[1]);
+ for (n, &b) in test_data[2..5].iter().enumerate() {
+ hash_table.add_hash_value(n, b);
+ }
+
+ let (match_length, match_dist) = longest_match(test_data, &hash_table, 1, 0, 4096);
+
+ assert_eq!(match_dist, 1);
+ assert!(match_length == 6);
+ }
+
+ /// Test for fast_zlib algorithm.
+ /// Check that it doesn't give worse matches than the default one.
+ /// ignored by default as it's slow, and best ran in release mode.
+ #[ignore]
+ #[test]
+ fn fast_match_at_least_equal() {
+ use test_utils::get_test_data;
+ for start_pos in 10000..50000 {
+ const NUM_CHECKS: u16 = 400;
+ let data = get_test_data();
+ let hash_table = filled_hash_table(&data[..start_pos + 1]);
+ let pos = hash_table.current_head() as usize;
+
+ let naive_match = longest_match(&data[..], &hash_table, pos, 0, NUM_CHECKS);
+ let fast_match = longest_match_fast(&data[..], &hash_table, pos, 0, NUM_CHECKS);
+
+ if fast_match.0 > naive_match.0 {
+ println!("Fast match found better match!");
+ }
+
+ assert!(fast_match.0 >= naive_match.0,
+ "naive match had better length! start_pos: {}, naive: {:?}, fast {:?}"
+ , start_pos, naive_match, fast_match);
+ assert!(fast_match.1 >= naive_match.1,
+ "naive match had better dist! start_pos: {} naive {:?}, fast {:?}"
+ , start_pos, naive_match, fast_match);
+ }
+
+ }
+}
+
+
+#[cfg(all(test, feature = "benchmarks"))]
+mod bench {
+ use test_std::Bencher;
+ use test_utils::get_test_data;
+ use chained_hash_table::filled_hash_table;
+ use super::{longest_match, longest_match_fast};
+ #[bench]
+ fn matching(b: &mut Bencher) {
+ const POS: usize = 29000;
+ let data = get_test_data();
+ let hash_table = filled_hash_table(&data[..POS + 1]);
+ let pos = hash_table.current_head() as usize;
+ println!("M: {:?}", longest_match(&data[..], &hash_table, pos, 0, 4096));
+ b.iter( ||
+ longest_match(&data[..], &hash_table, pos, 0, 4096)
+ );
+ }
+
+ #[bench]
+ fn fast_matching(b: &mut Bencher) {
+ const POS: usize = 29000;
+ let data = get_test_data();
+ let hash_table = filled_hash_table(&data[..POS + 1]);
+ let pos = hash_table.current_head() as usize;
+ println!("M: {:?}", longest_match_fast(&data[..], &hash_table, pos, 0, 4096));
+ b.iter( ||
+ longest_match_fast(&data[..], &hash_table, pos, 0, 4096)
+ );
+ }
+}
diff --git a/third_party/rust/deflate/src/output_writer.rs b/third_party/rust/deflate/src/output_writer.rs
new file mode 100644
index 0000000000..508ba5c675
--- /dev/null
+++ b/third_party/rust/deflate/src/output_writer.rs
@@ -0,0 +1,154 @@
+use std::u16;
+
+use lzvalue::LZValue;
+use huffman_table::{NUM_LITERALS_AND_LENGTHS, NUM_DISTANCE_CODES, END_OF_BLOCK_POSITION,
+ get_distance_code, get_length_code};
+
+/// The type used for representing how many times a literal, length or distance code has been ouput
+/// to the current buffer.
+/// As we are limiting the blocks to be at most 2^16 bytes long, we can represent frequencies using
+/// 16-bit values.
+pub type FrequencyType = u16;
+
+/// The maximum number of literals/lengths in the buffer, which in practice also means the maximum
+/// number of literals/lengths output before a new block is started.
+/// This should not be larger than the maximum value `FrequencyType` can represent to prevent
+/// overflowing (which would degrade, or in the worst case break compression).
+pub const MAX_BUFFER_LENGTH: usize = 1024 * 31;
+
+#[derive(Debug, PartialEq)]
+pub enum BufferStatus {
+ NotFull,
+ Full,
+}
+
+/// Struct that buffers lz77 data and keeps track of the usage of different codes
+pub struct DynamicWriter {
+ buffer: Vec<LZValue>,
+ // The two last length codes are not actually used, but only participates in code construction
+ // Therefore, we ignore them to get the correct number of lengths
+ frequencies: [FrequencyType; NUM_LITERALS_AND_LENGTHS],
+ distance_frequencies: [FrequencyType; NUM_DISTANCE_CODES],
+}
+
+impl DynamicWriter {
+ #[inline]
+ pub fn check_buffer_length(&self) -> BufferStatus {
+ if self.buffer.len() >= MAX_BUFFER_LENGTH {
+ BufferStatus::Full
+ } else {
+ BufferStatus::NotFull
+ }
+ }
+
+ #[inline]
+ pub fn write_literal(&mut self, literal: u8) -> BufferStatus {
+ debug_assert!(self.buffer.len() < MAX_BUFFER_LENGTH);
+ self.buffer.push(LZValue::literal(literal));
+ self.frequencies[usize::from(literal)] += 1;
+ self.check_buffer_length()
+ }
+
+ #[inline]
+ pub fn write_length_distance(&mut self, length: u16, distance: u16) -> BufferStatus {
+ self.buffer.push(LZValue::length_distance(length, distance));
+ let l_code_num = get_length_code(length);
+ // As we limit the buffer to 2^16 values, this should be safe from overflowing.
+ if cfg!(debug_assertions) {
+ self.frequencies[l_code_num] += 1;
+ } else {
+ // #Safety
+ // None of the values in the table of length code numbers will give a value
+ // that is out of bounds.
+ // There is a test to ensure that these functions can not produce too large values.
+ unsafe {
+ *self.frequencies.get_unchecked_mut(l_code_num) += 1;
+ }
+ }
+ let d_code_num = get_distance_code(distance);
+ // The compiler seems to be able to evade the bounds check here somehow.
+ self.distance_frequencies[usize::from(d_code_num)] += 1;
+ self.check_buffer_length()
+ }
+
+ pub fn buffer_length(&self) -> usize {
+ self.buffer.len()
+ }
+
+ pub fn get_buffer(&self) -> &[LZValue] {
+ &self.buffer
+ }
+
+ pub fn new() -> DynamicWriter {
+ let mut w = DynamicWriter {
+ buffer: Vec::with_capacity(MAX_BUFFER_LENGTH),
+ frequencies: [0; NUM_LITERALS_AND_LENGTHS],
+ distance_frequencies: [0; NUM_DISTANCE_CODES],
+ };
+ // This will always be 1,
+ // since there will always only be one end of block marker in each block
+ w.frequencies[END_OF_BLOCK_POSITION] = 1;
+ w
+ }
+
+ /// Special output function used with RLE compression
+ /// that avoids bothering to lookup a distance code.
+ #[inline]
+ pub fn write_length_rle(&mut self, length: u16) -> BufferStatus {
+ self.buffer.push(LZValue::length_distance(length, 1));
+ let l_code_num = get_length_code(length);
+ // As we limit the buffer to 2^16 values, this should be safe from overflowing.
+ if cfg!(debug_assertions) {
+ self.frequencies[l_code_num] += 1;
+ } else {
+ // #Safety
+ // None of the values in the table of length code numbers will give a value
+ // that is out of bounds.
+ // There is a test to ensure that these functions won't produce too large values.
+ unsafe {
+ *self.frequencies.get_unchecked_mut(l_code_num) += 1;
+ }
+ }
+ self.distance_frequencies[0] += 1;
+ self.check_buffer_length()
+ }
+
+ pub fn get_frequencies(&self) -> (&[u16], &[u16]) {
+ (&self.frequencies, &self.distance_frequencies)
+ }
+
+ pub fn clear_frequencies(&mut self) {
+ self.frequencies = [0; NUM_LITERALS_AND_LENGTHS];
+ self.distance_frequencies = [0; NUM_DISTANCE_CODES];
+ self.frequencies[END_OF_BLOCK_POSITION] = 1;
+ }
+
+ pub fn clear_data(&mut self) {
+ self.buffer.clear()
+ }
+
+ pub fn clear(&mut self) {
+ self.clear_frequencies();
+ self.clear_data();
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use huffman_table::{get_length_code, get_distance_code};
+ #[test]
+ /// Ensure that these function won't produce values that would overflow the output_writer
+ /// tables since we use some unsafe indexing.
+ fn array_bounds() {
+ let w = DynamicWriter::new();
+
+ for i in 0..u16::max_value() {
+ get_length_code(i) < w.frequencies.len();
+ }
+
+ for i in 0..u16::max_value() {
+ get_distance_code(i) < w.distance_frequencies.len() as u8;
+ }
+ }
+}
diff --git a/third_party/rust/deflate/src/rle.rs b/third_party/rust/deflate/src/rle.rs
new file mode 100644
index 0000000000..d7c38078dd
--- /dev/null
+++ b/third_party/rust/deflate/src/rle.rs
@@ -0,0 +1,107 @@
+use lz77::{ProcessStatus, buffer_full};
+use output_writer::{BufferStatus, DynamicWriter};
+use huffman_table;
+
+use std::ops::Range;
+use std::cmp;
+
+const MIN_MATCH: usize = huffman_table::MIN_MATCH as usize;
+const MAX_MATCH: usize = huffman_table::MAX_MATCH as usize;
+
+
+/// Simple match function for run-length encoding.
+///
+/// Checks how many of the next bytes from the start of the slice `data` matches prev.
+fn get_match_length_rle(data: &[u8], prev: u8) -> usize {
+ data.iter()
+ .take(MAX_MATCH)
+ .take_while(|&&b| b == prev)
+ .count()
+}
+
+/// L77-Compress data using the RLE(Run-length encoding) strategy
+///
+/// This function simply looks for runs of data of at least length 3.
+pub fn process_chunk_greedy_rle(
+ data: &[u8],
+ iterated_data: &Range<usize>,
+ writer: &mut DynamicWriter,
+) -> (usize, ProcessStatus) {
+ if data.is_empty() {
+ return (0, ProcessStatus::Ok);
+ };
+
+ let end = cmp::min(data.len(), iterated_data.end);
+ // Start on at least byte 1.
+ let start = cmp::max(iterated_data.start, 1);
+ // The previous byte.
+ let mut prev = data[start - 1];
+ // Iterate through the requested range, but avoid going off the end.
+ let current_chunk = &data[cmp::min(start, end)..end];
+ let mut insert_it = current_chunk.iter().enumerate();
+ let mut overlap = 0;
+ // Make sure to output the first byte
+ if iterated_data.start == 0 && !data.is_empty() {
+ write_literal!(writer, data[0], 1);
+ }
+
+ while let Some((n, &b)) = insert_it.next() {
+ let position = n + start;
+ let match_len = if prev == b {
+ //TODO: Avoid comparing with self here.
+ // Would use as_slice() but that doesn't work on an enumerated iterator.
+ get_match_length_rle(&data[position..], prev)
+ } else {
+ 0
+ };
+ if match_len >= MIN_MATCH {
+ if position + match_len > end {
+ overlap = position + match_len - end;
+ };
+ let b_status = writer.write_length_rle(match_len as u16);
+ if b_status == BufferStatus::Full {
+ return (overlap, buffer_full(position + match_len));
+ }
+ insert_it.nth(match_len - 2);
+ } else {
+ write_literal!(writer, b, position + 1);
+ }
+ prev = b;
+ }
+
+ (overlap, ProcessStatus::Ok)
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use lzvalue::{LZValue, lit, ld};
+
+ fn l(c: char) -> LZValue {
+ lit(c as u8)
+ }
+
+ #[test]
+ fn rle_compress() {
+ let input = b"textaaaaaaaaatext";
+ let mut w = DynamicWriter::new();
+ let r = 0..input.len();
+ let (overlap, _) = process_chunk_greedy_rle(input, &r, &mut w);
+ let expected = [
+ l('t'),
+ l('e'),
+ l('x'),
+ l('t'),
+ l('a'),
+ ld(8, 1),
+ l('t'),
+ l('e'),
+ l('x'),
+ l('t'),
+ ];
+ //println!("expected: {:?}", expected);
+ //println!("actual: {:?}", w.get_buffer());
+ assert!(w.get_buffer() == expected);
+ assert_eq!(overlap, 0);
+ }
+}
diff --git a/third_party/rust/deflate/src/stored_block.rs b/third_party/rust/deflate/src/stored_block.rs
new file mode 100644
index 0000000000..4cf765703c
--- /dev/null
+++ b/third_party/rust/deflate/src/stored_block.rs
@@ -0,0 +1,97 @@
+use std::io::Write;
+use std::io;
+use std::u16;
+use bitstream::LsbWriter;
+use byteorder::{LittleEndian, WriteBytesExt};
+
+#[cfg(test)]
+const BLOCK_SIZE: u16 = 32000;
+
+const STORED_FIRST_BYTE: u8 = 0b0000_0000;
+pub const STORED_FIRST_BYTE_FINAL: u8 = 0b0000_0001;
+pub const MAX_STORED_BLOCK_LENGTH: usize = (u16::MAX as usize) / 2;
+
+pub fn write_stored_header(writer: &mut LsbWriter, final_block: bool) {
+ let header = if final_block {
+ STORED_FIRST_BYTE_FINAL
+ } else {
+ STORED_FIRST_BYTE
+ };
+ // Write the block header
+ writer.write_bits(header.into(), 3);
+ // Flush the writer to make sure we are aligned to the byte boundary.
+ writer.flush_raw();
+}
+
+// Compress one stored block (excluding the header)
+pub fn compress_block_stored<W: Write>(input: &[u8], writer: &mut W) -> io::Result<usize> {
+ if input.len() > u16::max_value() as usize {
+ return Err(io::Error::new(
+ io::ErrorKind::InvalidInput,
+ "Stored block too long!",
+ ));
+ };
+ // The header is written before this function.
+ // The next two bytes indicates the length
+ writer.write_u16::<LittleEndian>(input.len() as u16)?;
+ // the next two after the length is the ones complement of the length
+ writer.write_u16::<LittleEndian>(!input.len() as u16)?;
+ // After this the data is written directly with no compression
+ writer.write(input)
+}
+
+#[cfg(test)]
+pub fn compress_data_stored(input: &[u8]) -> Vec<u8> {
+ let block_length = BLOCK_SIZE as usize;
+
+ let mut output = Vec::with_capacity(input.len() + 2);
+ let mut i = input.chunks(block_length).peekable();
+ while let Some(chunk) = i.next() {
+ let last_chunk = i.peek().is_none();
+ // First bit tells us if this is the final chunk
+ // the next two details compression type (none in this case)
+ let first_byte = if last_chunk {
+ STORED_FIRST_BYTE_FINAL
+ } else {
+ STORED_FIRST_BYTE
+ };
+ output.write(&[first_byte]).unwrap();
+
+ compress_block_stored(chunk, &mut output).unwrap();
+ }
+ output
+}
+
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use test_utils::decompress_to_end;
+
+ #[test]
+ fn no_compression_one_chunk() {
+ let test_data = vec![1u8, 2, 3, 4, 5, 6, 7, 8];
+ let compressed = compress_data_stored(&test_data);
+ let result = decompress_to_end(&compressed);
+ assert_eq!(test_data, result);
+ }
+
+ #[test]
+ fn no_compression_multiple_chunks() {
+ let test_data = vec![32u8; 40000];
+ let compressed = compress_data_stored(&test_data);
+ let result = decompress_to_end(&compressed);
+ assert_eq!(test_data, result);
+ }
+
+ #[test]
+ fn no_compression_string() {
+ let test_data = String::from(
+ "This is some text, this is some more text, this is even \
+ more text, lots of text here.",
+ ).into_bytes();
+ let compressed = compress_data_stored(&test_data);
+ let result = decompress_to_end(&compressed);
+ assert_eq!(test_data, result);
+ }
+}
diff --git a/third_party/rust/deflate/src/test_utils.rs b/third_party/rust/deflate/src/test_utils.rs
new file mode 100644
index 0000000000..c0ef355e1a
--- /dev/null
+++ b/third_party/rust/deflate/src/test_utils.rs
@@ -0,0 +1,79 @@
+#![cfg(test)]
+
+#[cfg(feature = "gzip")]
+use flate2::read::GzDecoder;
+
+fn get_test_file_data(name: &str) -> Vec<u8> {
+ use std::fs::File;
+ use std::io::Read;
+ let mut input = Vec::new();
+ let mut f = File::open(name).unwrap();
+
+ f.read_to_end(&mut input).unwrap();
+ input
+}
+
+pub fn get_test_data() -> Vec<u8> {
+ use std::env;
+ let path = env::var("TEST_FILE").unwrap_or("tests/pg11.txt".to_string());
+ get_test_file_data(&path)
+}
+
+/// Helper function to decompress into a `Vec<u8>`
+pub fn decompress_to_end(input: &[u8]) -> Vec<u8> {
+ // use std::str;
+ // let mut inflater = super::inflate::InflateStream::new();
+ // let mut out = Vec::new();
+ // let mut n = 0;
+ // println!("input len {}", input.len());
+ // while n < input.len() {
+ // let res = inflater.update(&input[n..]) ;
+ // if let Ok((num_bytes_read, result)) = res {
+ // println!("result len {}, bytes_read {}", result.len(), num_bytes_read);
+ // n += num_bytes_read;
+ // out.extend(result);
+ // } else {
+ // println!("Output: `{}`", str::from_utf8(&out).unwrap());
+ // println!("Output decompressed: {}", out.len());
+ // res.unwrap();
+ // }
+ //
+ // }
+ // out
+
+ use std::io::Read;
+ use flate2::read::DeflateDecoder;
+
+ let mut result = Vec::new();
+ let i = &input[..];
+ let mut e = DeflateDecoder::new(i);
+
+ let res = e.read_to_end(&mut result);
+ if let Ok(_) = res {
+ // println!("{} bytes decompressed successfully", n);
+ } else {
+ println!("result size: {}", result.len());
+ res.unwrap();
+ }
+ result
+}
+
+#[cfg(feature = "gzip")]
+pub fn decompress_gzip(compressed: &[u8]) -> (GzDecoder<&[u8]>, Vec<u8>) {
+ use std::io::Read;
+ let mut e = GzDecoder::new(&compressed[..]).unwrap();
+
+ let mut result = Vec::new();
+ e.read_to_end(&mut result).unwrap();
+ (e, result)
+}
+
+pub fn decompress_zlib(compressed: &[u8]) -> Vec<u8> {
+ use std::io::Read;
+ use flate2::read::ZlibDecoder;
+ let mut e = ZlibDecoder::new(&compressed[..]);
+
+ let mut result = Vec::new();
+ e.read_to_end(&mut result).unwrap();
+ result
+}
diff --git a/third_party/rust/deflate/src/writer.rs b/third_party/rust/deflate/src/writer.rs
new file mode 100644
index 0000000000..8ecb4e4a43
--- /dev/null
+++ b/third_party/rust/deflate/src/writer.rs
@@ -0,0 +1,663 @@
+use std::io::Write;
+use std::{thread, io};
+
+use byteorder::{WriteBytesExt, BigEndian};
+
+use checksum::{Adler32Checksum, RollingChecksum};
+use compress::compress_data_dynamic_n;
+use compress::Flush;
+use deflate_state::DeflateState;
+use compression_options::CompressionOptions;
+use zlib::{write_zlib_header, CompressionLevel};
+
+const ERR_STR: &'static str = "Error! The wrapped writer is missing.\
+ This is a bug, please file an issue.";
+
+/// Keep compressing until all the input has been compressed and output or the writer returns `Err`.
+pub fn compress_until_done<W: Write>(
+ mut input: &[u8],
+ deflate_state: &mut DeflateState<W>,
+ flush_mode: Flush,
+) -> io::Result<()> {
+ // This should only be used for flushing.
+ assert!(flush_mode != Flush::None);
+ loop {
+ match compress_data_dynamic_n(input, deflate_state, flush_mode) {
+ Ok(0) => {
+ if deflate_state.output_buf().is_empty() {
+ break;
+ } else {
+ // If the output buffer isn't empty, keep going until it is, as there is still
+ // data to be flushed.
+ input = &[];
+ }
+ }
+ Ok(n) => {
+ if n < input.len() {
+ input = &input[n..]
+ } else {
+ input = &[];
+ }
+ }
+ Err(e) => {
+ match e.kind() {
+ // This error means that there may still be data to flush.
+ // This could possibly get stuck if the underlying writer keeps returning this
+ // error.
+ io::ErrorKind::Interrupted => (),
+ _ => return Err(e),
+ }
+ }
+ }
+ }
+
+ debug_assert_eq!(
+ deflate_state.bytes_written,
+ deflate_state.bytes_written_control.get()
+ );
+
+ Ok(())
+}
+
+/// A DEFLATE encoder/compressor.
+///
+/// A struct implementing a [`Write`] interface that takes unencoded data and compresses it to
+/// the provided writer using DEFLATE compression.
+///
+/// # Examples
+///
+/// ```rust
+/// # use std::io;
+/// #
+/// # fn try_main() -> io::Result<Vec<u8>> {
+/// #
+/// use std::io::Write;
+///
+/// use deflate::Compression;
+/// use deflate::write::DeflateEncoder;
+///
+/// let data = b"This is some test data";
+/// let mut encoder = DeflateEncoder::new(Vec::new(), Compression::Default);
+/// encoder.write_all(data)?;
+/// let compressed_data = encoder.finish()?;
+/// # Ok(compressed_data)
+/// #
+/// # }
+/// # fn main() {
+/// # try_main().unwrap();
+/// # }
+/// ```
+/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
+pub struct DeflateEncoder<W: Write> {
+ deflate_state: DeflateState<W>,
+}
+
+impl<W: Write> DeflateEncoder<W> {
+ /// Creates a new encoder using the provided compression options.
+ pub fn new<O: Into<CompressionOptions>>(writer: W, options: O) -> DeflateEncoder<W> {
+ DeflateEncoder {
+ deflate_state: DeflateState::new(options.into(), writer),
+ }
+ }
+
+ /// Encode all pending data to the contained writer, consume this `DeflateEncoder`,
+ /// and return the contained writer if writing succeeds.
+ pub fn finish(mut self) -> io::Result<W> {
+ self.output_all()?;
+ // We have to move the inner writer out of the encoder, and replace it with `None`
+ // to let the `DeflateEncoder` drop safely.
+ Ok(self.deflate_state.inner.take().expect(ERR_STR))
+ }
+
+ /// Resets the encoder (except the compression options), replacing the current writer
+ /// with a new one, returning the old one.
+ pub fn reset(&mut self, w: W) -> io::Result<W> {
+ self.output_all()?;
+ self.deflate_state.reset(w)
+ }
+
+ /// Output all pending data as if encoding is done, but without resetting anything
+ fn output_all(&mut self) -> io::Result<()> {
+ compress_until_done(&[], &mut self.deflate_state, Flush::Finish)
+ }
+}
+
+impl<W: Write> io::Write for DeflateEncoder<W> {
+ fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+ let flush_mode = self.deflate_state.flush_mode;
+ compress_data_dynamic_n(buf, &mut self.deflate_state, flush_mode)
+ }
+
+ /// Flush the encoder.
+ ///
+ /// This will flush the encoder, emulating the Sync flush method from Zlib.
+ /// This essentially finishes the current block, and sends an additional empty stored block to
+ /// the writer.
+ fn flush(&mut self) -> io::Result<()> {
+ compress_until_done(&[], &mut self.deflate_state, Flush::Sync)
+ }
+}
+
+impl<W: Write> Drop for DeflateEncoder<W> {
+ /// When the encoder is dropped, output the rest of the data.
+ ///
+ /// WARNING: This may silently fail if writing fails, so using this to finish encoding
+ /// for writers where writing might fail is not recommended, for that call
+ /// [`finish()`](#method.finish) instead.
+ fn drop(&mut self) {
+ // Not sure if implementing drop is a good idea or not, but we follow flate2 for now.
+ // We only do this if we are not panicking, to avoid a double panic.
+ if self.deflate_state.inner.is_some() && !thread::panicking() {
+ let _ = self.output_all();
+ }
+ }
+}
+
+
+/// A Zlib encoder/compressor.
+///
+/// A struct implementing a [`Write`] interface that takes unencoded data and compresses it to
+/// the provided writer using DEFLATE compression with Zlib headers and trailers.
+///
+/// # Examples
+///
+/// ```rust
+/// # use std::io;
+/// #
+/// # fn try_main() -> io::Result<Vec<u8>> {
+/// #
+/// use std::io::Write;
+///
+/// use deflate::Compression;
+/// use deflate::write::ZlibEncoder;
+///
+/// let data = b"This is some test data";
+/// let mut encoder = ZlibEncoder::new(Vec::new(), Compression::Default);
+/// encoder.write_all(data)?;
+/// let compressed_data = encoder.finish()?;
+/// # Ok(compressed_data)
+/// #
+/// # }
+/// # fn main() {
+/// # try_main().unwrap();
+/// # }
+/// ```
+/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
+pub struct ZlibEncoder<W: Write> {
+ deflate_state: DeflateState<W>,
+ checksum: Adler32Checksum,
+ header_written: bool,
+}
+
+impl<W: Write> ZlibEncoder<W> {
+ /// Create a new `ZlibEncoder` using the provided compression options.
+ pub fn new<O: Into<CompressionOptions>>(writer: W, options: O) -> ZlibEncoder<W> {
+ ZlibEncoder {
+ deflate_state: DeflateState::new(options.into(), writer),
+ checksum: Adler32Checksum::new(),
+ header_written: false,
+ }
+ }
+
+ /// Output all pending data ,including the trailer(checksum) as if encoding is done,
+ /// but without resetting anything.
+ fn output_all(&mut self) -> io::Result<()> {
+ self.check_write_header()?;
+ compress_until_done(&[], &mut self.deflate_state, Flush::Finish)?;
+ self.write_trailer()
+ }
+
+ /// Encode all pending data to the contained writer, consume this `ZlibEncoder`,
+ /// and return the contained writer if writing succeeds.
+ pub fn finish(mut self) -> io::Result<W> {
+ self.output_all()?;
+ // We have to move the inner writer out of the encoder, and replace it with `None`
+ // to let the `DeflateEncoder` drop safely.
+ Ok(self.deflate_state.inner.take().expect(ERR_STR))
+ }
+
+ /// Resets the encoder (except the compression options), replacing the current writer
+ /// with a new one, returning the old one.
+ pub fn reset(&mut self, writer: W) -> io::Result<W> {
+ self.output_all()?;
+ self.header_written = false;
+ self.checksum = Adler32Checksum::new();
+ self.deflate_state.reset(writer)
+ }
+
+ /// Check if a zlib header should be written.
+ fn check_write_header(&mut self) -> io::Result<()> {
+ if !self.header_written {
+ write_zlib_header(self.deflate_state.output_buf(), CompressionLevel::Default)?;
+ self.header_written = true;
+ }
+ Ok(())
+ }
+
+ /// Write the trailer, which for zlib is the Adler32 checksum.
+ fn write_trailer(&mut self) -> io::Result<()> {
+ let hash = self.checksum.current_hash();
+
+ self.deflate_state
+ .inner
+ .as_mut()
+ .expect(ERR_STR)
+ .write_u32::<BigEndian>(hash)
+ }
+
+ /// Return the adler32 checksum of the currently consumed data.
+ pub fn checksum(&self) -> u32 {
+ self.checksum.current_hash()
+ }
+}
+
+impl<W: Write> io::Write for ZlibEncoder<W> {
+ fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+ self.check_write_header()?;
+ let flush_mode = self.deflate_state.flush_mode;
+ let res = compress_data_dynamic_n(buf, &mut self.deflate_state, flush_mode);
+ match res {
+ // If this is returned, the whole buffer was consumed
+ Ok(0) => self.checksum.update_from_slice(buf),
+ // Otherwise, only part of it was consumed, so only that part
+ // added to the checksum.
+ Ok(n) => self.checksum.update_from_slice(&buf[0..n]),
+ _ => (),
+ };
+ res
+ }
+
+ /// Flush the encoder.
+ ///
+ /// This will flush the encoder, emulating the Sync flush method from Zlib.
+ /// This essentially finishes the current block, and sends an additional empty stored block to
+ /// the writer.
+ fn flush(&mut self) -> io::Result<()> {
+ compress_until_done(&[], &mut self.deflate_state, Flush::Sync)
+ }
+}
+
+impl<W: Write> Drop for ZlibEncoder<W> {
+ /// When the encoder is dropped, output the rest of the data.
+ ///
+ /// WARNING: This may silently fail if writing fails, so using this to finish encoding
+ /// for writers where writing might fail is not recommended, for that call
+ /// [`finish()`](#method.finish) instead.
+ fn drop(&mut self) {
+ if self.deflate_state.inner.is_some() && !thread::panicking() {
+ let _ = self.output_all();
+ }
+ }
+}
+
+#[cfg(feature = "gzip")]
+pub mod gzip {
+
+ use std::io::{Write, Cursor};
+ use std::{thread, io};
+
+ use super::*;
+
+ use byteorder::{WriteBytesExt, LittleEndian};
+ use gzip_header::{Crc, GzBuilder};
+
+ /// A Gzip encoder/compressor.
+ ///
+ /// A struct implementing a [`Write`] interface that takes unencoded data and compresses it to
+ /// the provided writer using DEFLATE compression with Gzip headers and trailers.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// # use std::io;
+ /// #
+ /// # fn try_main() -> io::Result<Vec<u8>> {
+ /// #
+ /// use std::io::Write;
+ ///
+ /// use deflate::Compression;
+ /// use deflate::write::GzEncoder;
+ ///
+ /// let data = b"This is some test data";
+ /// let mut encoder = GzEncoder::new(Vec::new(), Compression::Default);
+ /// encoder.write_all(data)?;
+ /// let compressed_data = encoder.finish()?;
+ /// # Ok(compressed_data)
+ /// #
+ /// # }
+ /// # fn main() {
+ /// # try_main().unwrap();
+ /// # }
+ /// ```
+ /// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
+ pub struct GzEncoder<W: Write> {
+ inner: DeflateEncoder<W>,
+ checksum: Crc,
+ header: Vec<u8>,
+ }
+
+ impl<W: Write> GzEncoder<W> {
+ /// Create a new `GzEncoder` writing deflate-compressed data to the underlying writer when
+ /// written to, wrapped in a gzip header and trailer. The header details will be blank.
+ pub fn new<O: Into<CompressionOptions>>(writer: W, options: O) -> GzEncoder<W> {
+ GzEncoder::from_builder(GzBuilder::new(), writer, options)
+ }
+
+ /// Create a new GzEncoder from the provided `GzBuilder`. This allows customising
+ /// the detalis of the header, such as the filename and comment fields.
+ pub fn from_builder<O: Into<CompressionOptions>>(
+ builder: GzBuilder,
+ writer: W,
+ options: O,
+ ) -> GzEncoder<W> {
+ GzEncoder {
+ inner: DeflateEncoder::new(writer, options),
+ checksum: Crc::new(),
+ header: builder.into_header(),
+ }
+ }
+
+ /// Write header to the output buffer if it hasn't been done yet.
+ fn check_write_header(&mut self) {
+ if !self.header.is_empty() {
+ self.inner
+ .deflate_state
+ .output_buf()
+ .extend_from_slice(&self.header);
+ self.header.clear();
+ }
+ }
+
+ /// Output all pending data ,including the trailer(checksum + count) as if encoding is done.
+ /// but without resetting anything.
+ fn output_all(&mut self) -> io::Result<()> {
+ self.check_write_header();
+ self.inner.output_all()?;
+ self.write_trailer()
+ }
+
+ /// Encode all pending data to the contained writer, consume this `GzEncoder`,
+ /// and return the contained writer if writing succeeds.
+ pub fn finish(mut self) -> io::Result<W> {
+ self.output_all()?;
+ // We have to move the inner writer out of the encoder, and replace it with `None`
+ // to let the `DeflateEncoder` drop safely.
+ Ok(self.inner.deflate_state.inner.take().expect(ERR_STR))
+ }
+
+ fn reset_no_header(&mut self, writer: W) -> io::Result<W> {
+ self.output_all()?;
+ self.checksum = Crc::new();
+ self.inner.deflate_state.reset(writer)
+ }
+
+ /// Resets the encoder (except the compression options), replacing the current writer
+ /// with a new one, returning the old one. (Using a blank header).
+ pub fn reset(&mut self, writer: W) -> io::Result<W> {
+ let w = self.reset_no_header(writer);
+ self.header = GzBuilder::new().into_header();
+ w
+ }
+
+ /// Resets the encoder (excelt the compression options), replacing the current writer
+ /// with a new one, returning the old one, and using the provided `GzBuilder` to
+ /// create the header.
+ pub fn reset_with_builder(&mut self, writer: W, builder: GzBuilder) -> io::Result<W> {
+ let w = self.reset_no_header(writer);
+ self.header = builder.into_header();
+ w
+ }
+
+ /// Write the checksum and number of bytes mod 2^32 to the output writer.
+ fn write_trailer(&mut self) -> io::Result<()> {
+ let crc = self.checksum.sum();
+ let amount = self.checksum.amt_as_u32();
+
+ // We use a buffer here to make sure we don't end up writing only half the header if
+ // writing fails.
+ let mut buf = [0u8; 8];
+ let mut temp = Cursor::new(&mut buf[..]);
+ temp.write_u32::<LittleEndian>(crc).unwrap();
+ temp.write_u32::<LittleEndian>(amount).unwrap();
+ self.inner
+ .deflate_state
+ .inner
+ .as_mut()
+ .expect(ERR_STR)
+ .write_all(temp.into_inner())
+ }
+
+ /// Get the crc32 checksum of the data comsumed so far.
+ pub fn checksum(&self) -> u32 {
+ self.checksum.sum()
+ }
+ }
+
+ impl<W: Write> io::Write for GzEncoder<W> {
+ fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+ self.check_write_header();
+ let res = self.inner.write(buf);
+ match res {
+ Ok(0) => self.checksum.update(buf),
+ Ok(n) => self.checksum.update(&buf[0..n]),
+ _ => (),
+ };
+ res
+ }
+
+ /// Flush the encoder.
+ ///
+ /// This will flush the encoder, emulating the Sync flush method from Zlib.
+ /// This essentially finishes the current block, and sends an additional empty stored
+ /// block to the writer.
+ fn flush(&mut self) -> io::Result<()> {
+ self.inner.flush()
+ }
+ }
+
+ impl<W: Write> Drop for GzEncoder<W> {
+ /// When the encoder is dropped, output the rest of the data.
+ ///
+ /// WARNING: This may silently fail if writing fails, so using this to finish encoding
+ /// for writers where writing might fail is not recommended, for that call
+ /// [`finish()`](#method.finish) instead.
+ fn drop(&mut self) {
+ if self.inner.deflate_state.inner.is_some() && !thread::panicking() {
+ let _ = self.output_all();
+ }
+ }
+ }
+
+ #[cfg(test)]
+ mod test {
+ use super::*;
+ use test_utils::{get_test_data, decompress_gzip};
+ #[test]
+ fn gzip_writer() {
+ let data = get_test_data();
+ let comment = b"Comment";
+ let compressed = {
+ let mut compressor = GzEncoder::from_builder(
+ GzBuilder::new().comment(&comment[..]),
+ Vec::with_capacity(data.len() / 3),
+ CompressionOptions::default(),
+ );
+ compressor.write_all(&data[0..data.len() / 2]).unwrap();
+ compressor.write_all(&data[data.len() / 2..]).unwrap();
+ compressor.finish().unwrap()
+ };
+
+ let (dec, res) = decompress_gzip(&compressed);
+ assert_eq!(dec.header().comment().unwrap(), comment);
+ assert!(res == data);
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use test_utils::{get_test_data, decompress_to_end, decompress_zlib};
+ use compression_options::CompressionOptions;
+ use std::io::Write;
+
+ #[test]
+ fn deflate_writer() {
+ let data = get_test_data();
+ let compressed = {
+ let mut compressor = DeflateEncoder::new(
+ Vec::with_capacity(data.len() / 3),
+ CompressionOptions::high(),
+ );
+ // Write in multiple steps to see if this works as it's supposed to.
+ compressor.write_all(&data[0..data.len() / 2]).unwrap();
+ compressor.write_all(&data[data.len() / 2..]).unwrap();
+ compressor.finish().unwrap()
+ };
+
+ let res = decompress_to_end(&compressed);
+ assert!(res == data);
+ }
+
+ #[test]
+ fn zlib_writer() {
+ let data = get_test_data();
+ let compressed = {
+ let mut compressor = ZlibEncoder::new(
+ Vec::with_capacity(data.len() / 3),
+ CompressionOptions::high(),
+ );
+ compressor.write_all(&data[0..data.len() / 2]).unwrap();
+ compressor.write_all(&data[data.len() / 2..]).unwrap();
+ compressor.finish().unwrap()
+ };
+
+ let res = decompress_zlib(&compressed);
+ assert!(res == data);
+ }
+
+ #[test]
+ /// Check if the the result of compressing after resetting is the same as before.
+ fn writer_reset() {
+ let data = get_test_data();
+ let mut compressor = DeflateEncoder::new(
+ Vec::with_capacity(data.len() / 3),
+ CompressionOptions::default(),
+ );
+ compressor.write_all(&data).unwrap();
+ let res1 = compressor
+ .reset(Vec::with_capacity(data.len() / 3))
+ .unwrap();
+ compressor.write_all(&data).unwrap();
+ let res2 = compressor.finish().unwrap();
+ assert!(res1 == res2);
+ }
+
+ #[test]
+ fn writer_reset_zlib() {
+ let data = get_test_data();
+ let mut compressor = ZlibEncoder::new(
+ Vec::with_capacity(data.len() / 3),
+ CompressionOptions::default(),
+ );
+ compressor.write_all(&data).unwrap();
+ let res1 = compressor
+ .reset(Vec::with_capacity(data.len() / 3))
+ .unwrap();
+ compressor.write_all(&data).unwrap();
+ let res2 = compressor.finish().unwrap();
+ assert!(res1 == res2);
+ }
+
+ #[test]
+ fn writer_sync() {
+ let data = get_test_data();
+ let compressed = {
+ let mut compressor = DeflateEncoder::new(
+ Vec::with_capacity(data.len() / 3),
+ CompressionOptions::default(),
+ );
+ let split = data.len() / 2;
+ compressor.write_all(&data[..split]).unwrap();
+ compressor.flush().unwrap();
+ {
+ let buf = &mut compressor.deflate_state.inner.as_mut().unwrap();
+ let buf_len = buf.len();
+ // Check for the sync marker. (excluding the header as it might not line
+ // up with the byte boundary.)
+ assert_eq!(buf[buf_len - 4..], [0, 0, 255, 255]);
+ }
+ compressor.write_all(&data[split..]).unwrap();
+ compressor.finish().unwrap()
+ };
+
+ let decompressed = decompress_to_end(&compressed);
+
+ assert!(decompressed == data);
+ }
+
+ #[test]
+ /// Make sure compression works with the writer when the input is between 1 and 2 window sizes.
+ fn issue_18() {
+ use compression_options::Compression;
+ let data = vec![0; 61000];
+ let compressed = {
+ let mut compressor = ZlibEncoder::new(Vec::new(), Compression::Default);
+ compressor.write_all(&data[..]).unwrap();
+ compressor.finish().unwrap()
+ };
+ let decompressed = decompress_zlib(&compressed);
+ assert!(decompressed == data);
+ }
+
+ #[test]
+ fn writer_sync_multiple() {
+ use std::cmp;
+ let data = get_test_data();
+ let compressed = {
+ let mut compressor = DeflateEncoder::new(
+ Vec::with_capacity(data.len() / 3),
+ CompressionOptions::default(),
+ );
+ let split = data.len() / 2;
+ compressor.write_all(&data[..split]).unwrap();
+ compressor.flush().unwrap();
+ compressor.flush().unwrap();
+ {
+ let buf = &mut compressor.deflate_state.inner.as_mut().unwrap();
+ let buf_len = buf.len();
+ // Check for the sync marker. (excluding the header as it might not line
+ // up with the byte boundary.)
+ assert_eq!(buf[buf_len - 4..], [0, 0, 255, 255]);
+ }
+ compressor
+ .write_all(&data[split..cmp::min(split + 2, data.len())])
+ .unwrap();
+ compressor.flush().unwrap();
+ compressor
+ .write_all(&data[cmp::min(split + 2, data.len())..])
+ .unwrap();
+ compressor.finish().unwrap()
+ };
+
+ let decompressed = decompress_to_end(&compressed);
+
+ assert!(decompressed == data);
+
+ let mut compressor = DeflateEncoder::new(
+ Vec::with_capacity(data.len() / 3),
+ CompressionOptions::default(),
+ );
+
+ compressor.flush().unwrap();
+ compressor.write_all(&[1, 2]).unwrap();
+ compressor.flush().unwrap();
+ compressor.write_all(&[3]).unwrap();
+ compressor.flush().unwrap();
+ let compressed = compressor.finish().unwrap();
+
+ let decompressed = decompress_to_end(&compressed);
+
+ assert_eq!(decompressed, [1, 2, 3]);
+ }
+}
diff --git a/third_party/rust/deflate/src/zlib.rs b/third_party/rust/deflate/src/zlib.rs
new file mode 100644
index 0000000000..8400b8fe6c
--- /dev/null
+++ b/third_party/rust/deflate/src/zlib.rs
@@ -0,0 +1,87 @@
+//! This module contains functionality for generating a [zlib](https://tools.ietf.org/html/rfc1950)
+//! header.
+//!
+//! The Zlib header contains some metadata (a window size and a compression level), and optionally
+//! a block of data serving as an extra dictionary for the compressor/decompressor.
+//! The dictionary is not implemented in this library.
+//! The data in the header aside from the dictionary doesn't actually have any effect on the
+//! decompressed data, it only offers some hints for the decompressor on how the data was
+//! compressed.
+
+use std::io::{Write, Result};
+
+// CM = 8 means to use the DEFLATE compression method.
+const DEFAULT_CM: u8 = 8;
+// CINFO = 7 Indicates a 32k window size.
+const DEFAULT_CINFO: u8 = 7 << 4;
+const DEFAULT_CMF: u8 = DEFAULT_CM | DEFAULT_CINFO;
+
+// No dict by default.
+#[cfg(test)]
+const DEFAULT_FDICT: u8 = 0;
+// FLEVEL = 0 means fastest compression algorithm.
+const _DEFAULT_FLEVEL: u8 = 0 << 7;
+
+// The 16-bit value consisting of CMF and FLG must be divisible by this to be valid.
+const FCHECK_DIVISOR: u8 = 31;
+
+#[allow(dead_code)]
+#[repr(u8)]
+pub enum CompressionLevel {
+ Fastest = 0 << 6,
+ Fast = 1 << 6,
+ Default = 2 << 6,
+ Maximum = 3 << 6,
+}
+
+/// Generate FCHECK from CMF and FLG (without FCKECH )so that they are correct according to the
+/// specification, i.e (CMF*256 + FCHK) % 31 = 0.
+/// Returns flg with the FCHKECK bits added (any existing FCHECK bits are ignored).
+fn add_fcheck(cmf: u8, flg: u8) -> u8 {
+ let rem = ((usize::from(cmf) * 256) + usize::from(flg)) % usize::from(FCHECK_DIVISOR);
+
+ // Clear existing FCHECK if any
+ let flg = flg & 0b11100000;
+
+ // Casting is safe as rem can't overflow since it is a value mod 31
+ // We can simply add the value to flg as (31 - rem) will never be above 2^5
+ flg + (FCHECK_DIVISOR - rem as u8)
+}
+
+/// Write a zlib header with an empty dictionary to the writer using the specified
+/// compression level preset.
+pub fn write_zlib_header<W: Write>(writer: &mut W, level: CompressionLevel) -> Result<()> {
+ writer.write_all(&get_zlib_header(level))
+}
+
+/// Get the zlib header for the `CompressionLevel` level using the default window size and no
+/// dictionary.
+pub fn get_zlib_header(level: CompressionLevel) -> [u8; 2] {
+ let cmf = DEFAULT_CMF;
+ [cmf, add_fcheck(cmf, level as u8)]
+}
+
+#[cfg(test)]
+mod test {
+ use super::DEFAULT_CMF;
+ use super::*;
+
+ #[test]
+ fn test_gen_fcheck() {
+ let cmf = DEFAULT_CMF;
+ let flg = super::add_fcheck(
+ DEFAULT_CMF,
+ CompressionLevel::Default as u8 | super::DEFAULT_FDICT,
+ );
+ assert_eq!(((usize::from(cmf) * 256) + usize::from(flg)) % 31, 0);
+ }
+
+ #[test]
+ fn test_header() {
+ let header = get_zlib_header(CompressionLevel::Fastest);
+ assert_eq!(
+ ((usize::from(header[0]) * 256) + usize::from(header[1])) % 31,
+ 0
+ );
+ }
+}