diff options
Diffstat (limited to 'third_party/rust/deflate/src/lib.rs')
-rw-r--r-- | third_party/rust/deflate/src/lib.rs | 495 |
1 files changed, 495 insertions, 0 deletions
diff --git a/third_party/rust/deflate/src/lib.rs b/third_party/rust/deflate/src/lib.rs new file mode 100644 index 0000000000..13da49785b --- /dev/null +++ b/third_party/rust/deflate/src/lib.rs @@ -0,0 +1,495 @@ +//! An implementation an encoder using [DEFLATE](http://www.gzip.org/zlib/rfc-deflate.html) +//! compression algorightm in pure rust. +//! +//! This library provides functions to compress data using the DEFLATE algorithm, +//! optionally wrapped using the [zlib](https://tools.ietf.org/html/rfc1950) or +//! [gzip](http://www.gzip.org/zlib/rfc-gzip.html) formats. +//! The current implementation is still a bit lacking speed-wise compared to C-libraries +//! like zlib and miniz. +//! +//! The deflate algorithm is an older compression algorithm that is still widely used today, +//! by e.g html headers, the `.png` inage format, the unix `gzip` program and commonly in `.zip` +//! files. The `zlib` and `gzip` formats are wrappers around DEFLATE-compressed data, containing +//! some extra metadata and a checksum to validate the integrity of the raw data. +//! +//! The deflate algorithm does not perform as well as newer algorhitms used in file formats such as +//! `.7z`, `.rar`, `.xz` and `.bz2`, and is thus not the ideal choice for applications where +//! the `DEFLATE` format (with or without wrappers) is not required. +//! +//! Support for the gzip wrapper (the wrapper that is used in `.gz` files) is disabled by default, +//! but can be enabled with the `gzip` feature. +//! +//! As this library is still in development, the compression output may change slightly +//! between versions. +//! +//! +//! # Examples: +//! ## Simple compression function: +//! ``` rust +//! use deflate::deflate_bytes; +//! +//! let data = b"Some data"; +//! let compressed = deflate_bytes(data); +//! # let _ = compressed; +//! ``` +//! +//! ## Using a writer: +//! ``` rust +//! use std::io::Write; +//! +//! use deflate::Compression; +//! use deflate::write::ZlibEncoder; +//! +//! let data = b"This is some test data"; +//! let mut encoder = ZlibEncoder::new(Vec::new(), Compression::Default); +//! encoder.write_all(data).expect("Write error!"); +//! let compressed_data = encoder.finish().expect("Failed to finish compression!"); +//! # let _ = compressed_data; +//! ``` + +#![cfg_attr(all(feature = "benchmarks", test), feature(test))] + +#[cfg(all(test, feature = "benchmarks"))] +extern crate test as test_std; + +#[cfg(test)] +extern crate flate2; +// #[cfg(test)] +// extern crate inflate; + +extern crate adler32; +extern crate byteorder; +#[cfg(feature = "gzip")] +extern crate gzip_header; + +mod compression_options; +mod huffman_table; +mod lz77; +mod lzvalue; +mod chained_hash_table; +mod length_encode; +mod output_writer; +mod stored_block; +mod huffman_lengths; +mod zlib; +mod checksum; +mod bit_reverse; +mod bitstream; +mod encoder_state; +mod matching; +mod input_buffer; +mod deflate_state; +mod compress; +mod rle; +mod writer; +#[cfg(test)] +mod test_utils; + +use std::io::Write; +use std::io; + +use byteorder::BigEndian; +#[cfg(feature = "gzip")] +use gzip_header::GzBuilder; +#[cfg(feature = "gzip")] +use gzip_header::Crc; +#[cfg(feature = "gzip")] +use byteorder::LittleEndian; + +use checksum::RollingChecksum; +use deflate_state::DeflateState; + +pub use compression_options::{CompressionOptions, SpecialOptions, Compression}; +use compress::Flush; +pub use lz77::MatchingType; + +use writer::compress_until_done; + +/// Encoders implementing a `Write` interface. +pub mod write { + pub use writer::{DeflateEncoder, ZlibEncoder}; + #[cfg(feature = "gzip")] + pub use writer::gzip::GzEncoder; +} + + +fn compress_data_dynamic<RC: RollingChecksum, W: Write>( + input: &[u8], + writer: &mut W, + mut checksum: RC, + compression_options: CompressionOptions, +) -> io::Result<()> { + checksum.update_from_slice(input); + // We use a box here to avoid putting the buffers on the stack + // It's done here rather than in the structs themselves for now to + // keep the data close in memory. + let mut deflate_state = Box::new(DeflateState::new(compression_options, writer)); + compress_until_done(input, &mut deflate_state, Flush::Finish) +} + +/// Compress the given slice of bytes with DEFLATE compression. +/// +/// Returns a `Vec<u8>` of the compressed data. +/// +/// # Examples +/// +/// ``` +/// use deflate::{deflate_bytes_conf, Compression}; +/// +/// let data = b"This is some test data"; +/// let compressed_data = deflate_bytes_conf(data, Compression::Best); +/// # let _ = compressed_data; +/// ``` +pub fn deflate_bytes_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> { + let mut writer = Vec::with_capacity(input.len() / 3); + compress_data_dynamic( + input, + &mut writer, + checksum::NoChecksum::new(), + options.into(), + ).expect("Write error!"); + writer +} + +/// Compress the given slice of bytes with DEFLATE compression using the default compression +/// level. +/// +/// Returns a `Vec<u8>` of the compressed data. +/// +/// # Examples +/// +/// ``` +/// use deflate::deflate_bytes; +/// +/// let data = b"This is some test data"; +/// let compressed_data = deflate_bytes(data); +/// # let _ = compressed_data; +/// ``` +pub fn deflate_bytes(input: &[u8]) -> Vec<u8> { + deflate_bytes_conf(input, Compression::Default) +} + +/// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer. +/// +/// Returns a `Vec<u8>` of the compressed data. +/// +/// Zlib dictionaries are not yet suppored. +/// +/// # Examples +/// +/// ``` +/// use deflate::{deflate_bytes_zlib_conf, Compression}; +/// +/// let data = b"This is some test data"; +/// let compressed_data = deflate_bytes_zlib_conf(data, Compression::Best); +/// # let _ = compressed_data; +/// ``` +pub fn deflate_bytes_zlib_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> { + use byteorder::WriteBytesExt; + let mut writer = Vec::with_capacity(input.len() / 3); + // Write header + zlib::write_zlib_header(&mut writer, zlib::CompressionLevel::Default) + .expect("Write error when writing zlib header!"); + + let mut checksum = checksum::Adler32Checksum::new(); + compress_data_dynamic(input, &mut writer, &mut checksum, options.into()) + .expect("Write error when writing compressed data!"); + + let hash = checksum.current_hash(); + + writer + .write_u32::<BigEndian>(hash) + .expect("Write error when writing checksum!"); + writer +} + +/// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer, +/// using the default compression level. +/// +/// Returns a Vec<u8> of the compressed data. +/// +/// Zlib dictionaries are not yet suppored. +/// +/// # Examples +/// +/// ``` +/// use deflate::deflate_bytes_zlib; +/// +/// let data = b"This is some test data"; +/// let compressed_data = deflate_bytes_zlib(data); +/// # let _ = compressed_data; +/// ``` +pub fn deflate_bytes_zlib(input: &[u8]) -> Vec<u8> { + deflate_bytes_zlib_conf(input, Compression::Default) +} + +/// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer +/// using the given gzip header and compression options. +/// +/// Returns a `Vec<u8>` of the compressed data. +/// +/// +/// # Examples +/// +/// ``` +/// extern crate gzip_header; +/// extern crate deflate; +/// +/// # fn main() { +/// use deflate::{deflate_bytes_gzip_conf, Compression}; +/// use gzip_header::GzBuilder; +/// +/// let data = b"This is some test data"; +/// let compressed_data = deflate_bytes_gzip_conf(data, Compression::Best, GzBuilder::new()); +/// # let _ = compressed_data; +/// # } +/// ``` +#[cfg(feature = "gzip")] +pub fn deflate_bytes_gzip_conf<O: Into<CompressionOptions>>( + input: &[u8], + options: O, + gzip_header: GzBuilder, +) -> Vec<u8> { + use byteorder::WriteBytesExt; + let mut writer = Vec::with_capacity(input.len() / 3); + + // Write header + writer + .write_all(&gzip_header.into_header()) + .expect("Write error when writing header!"); + let mut checksum = checksum::NoChecksum::new(); + compress_data_dynamic(input, &mut writer, &mut checksum, options.into()) + .expect("Write error when writing compressed data!"); + + let mut crc = Crc::new(); + crc.update(input); + + writer + .write_u32::<LittleEndian>(crc.sum()) + .expect("Write error when writing checksum!"); + writer + .write_u32::<LittleEndian>(crc.amt_as_u32()) + .expect("Write error when writing amt!"); + writer +} + +/// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer, +/// using the default compression level, and a gzip header with default values. +/// +/// Returns a `Vec<u8>` of the compressed data. +/// +/// +/// # Examples +/// +/// ``` +/// use deflate::deflate_bytes_gzip; +/// let data = b"This is some test data"; +/// let compressed_data = deflate_bytes_gzip(data); +/// # let _ = compressed_data; +/// ``` +#[cfg(feature = "gzip")] +pub fn deflate_bytes_gzip(input: &[u8]) -> Vec<u8> { + deflate_bytes_gzip_conf(input, Compression::Default, GzBuilder::new()) +} + +#[cfg(test)] +mod test { + use super::*; + use std::io::Write; + + use test_utils::{get_test_data, decompress_to_end, decompress_zlib}; + #[cfg(feature = "gzip")] + use test_utils::decompress_gzip; + + type CO = CompressionOptions; + + /// Write data to the writer in chunks of chunk_size. + fn chunked_write<W: Write>(mut writer: W, data: &[u8], chunk_size: usize) { + for chunk in data.chunks(chunk_size) { + writer.write_all(&chunk).unwrap(); + } + } + + #[test] + fn dynamic_string_mem() { + let test_data = String::from(" GNU GENERAL PUBLIC LICENSE").into_bytes(); + let compressed = deflate_bytes(&test_data); + + assert!(compressed.len() < test_data.len()); + + let result = decompress_to_end(&compressed); + assert_eq!(test_data, result); + } + + #[test] + fn dynamic_string_file() { + let input = get_test_data(); + let compressed = deflate_bytes(&input); + + let result = decompress_to_end(&compressed); + for (n, (&a, &b)) in input.iter().zip(result.iter()).enumerate() { + if a != b { + println!("First difference at {}, input: {}, output: {}", n, a, b); + println!( + "input: {:?}, output: {:?}", + &input[n - 3..n + 3], + &result[n - 3..n + 3] + ); + break; + } + } + // Not using assert_eq here deliberately to avoid massive amounts of output spam + assert!(input == result); + // Check that we actually managed to compress the input + assert!(compressed.len() < input.len()); + } + + #[test] + fn file_rle() { + let input = get_test_data(); + let compressed = deflate_bytes_conf(&input, CO::rle()); + + let result = decompress_to_end(&compressed); + assert!(input == result); + } + + #[test] + fn file_zlib() { + let test_data = get_test_data(); + + let compressed = deflate_bytes_zlib(&test_data); + // { + // use std::fs::File; + // use std::io::Write; + // let mut f = File::create("out.zlib").unwrap(); + // f.write_all(&compressed).unwrap(); + // } + + println!("file_zlib compressed(default) length: {}", compressed.len()); + + let result = decompress_zlib(&compressed); + + assert!(&test_data == &result); + assert!(compressed.len() < test_data.len()); + } + + #[test] + fn zlib_short() { + let test_data = [10, 10, 10, 10, 10, 55]; + roundtrip_zlib(&test_data, CO::default()); + } + + #[test] + fn zlib_last_block() { + let mut test_data = vec![22; 32768]; + test_data.extend(&[5, 2, 55, 11, 12]); + roundtrip_zlib(&test_data, CO::default()); + } + + #[test] + fn deflate_short() { + let test_data = [10, 10, 10, 10, 10, 55]; + let compressed = deflate_bytes(&test_data); + + let result = decompress_to_end(&compressed); + assert_eq!(&test_data, result.as_slice()); + // If block type and compression is selected correctly, this should only take 5 bytes. + assert_eq!(compressed.len(), 5); + } + + #[cfg(feature = "gzip")] + #[test] + fn gzip() { + let data = get_test_data(); + let comment = b"Test"; + let compressed = deflate_bytes_gzip_conf( + &data, + Compression::Default, + GzBuilder::new().comment(&comment[..]), + ); + let (dec, decompressed) = decompress_gzip(&compressed); + assert_eq!(dec.header().comment().unwrap(), comment); + assert!(data == decompressed); + } + + fn chunk_test(chunk_size: usize, level: CompressionOptions) { + let mut compressed = Vec::with_capacity(32000); + let data = get_test_data(); + { + let mut compressor = write::ZlibEncoder::new(&mut compressed, level); + chunked_write(&mut compressor, &data, chunk_size); + compressor.finish().unwrap(); + } + let compressed2 = deflate_bytes_zlib_conf(&data, level); + let res = decompress_zlib(&compressed); + assert!(res == data); + assert_eq!(compressed.len(), compressed2.len()); + assert!(compressed == compressed2); + } + + fn writer_chunks_level(level: CompressionOptions) { + use input_buffer::BUFFER_SIZE; + let ct = |n| chunk_test(n, level); + ct(1); + ct(50); + ct(400); + ct(32768); + ct(BUFFER_SIZE); + ct(50000); + ct((32768 * 2) + 258); + } + + #[ignore] + #[test] + /// Test the writer by inputing data in one chunk at the time. + fn zlib_writer_chunks() { + writer_chunks_level(CompressionOptions::default()); + writer_chunks_level(CompressionOptions::fast()); + writer_chunks_level(CompressionOptions::rle()); + } + + /// Check that the frequency values don't overflow. + #[test] + fn frequency_overflow() { + let _ = deflate_bytes_conf( + &vec![5; 100000], + compression_options::CompressionOptions::default(), + ); + } + + fn roundtrip_zlib(data: &[u8], level: CompressionOptions) { + let compressed = deflate_bytes_zlib_conf(data, level); + let res = decompress_zlib(&compressed); + if data.len() <= 32 { + assert_eq!(res, data, "Failed with level: {:?}", level); + } else { + assert!(res == data, "Failed with level: {:?}", level); + } + } + + fn check_zero(level: CompressionOptions) { + roundtrip_zlib(&[], level); + } + + /// Compress with an empty slice. + #[test] + fn empty_input() { + check_zero(CompressionOptions::default()); + check_zero(CompressionOptions::fast()); + check_zero(CompressionOptions::rle()); + } + + #[test] + fn one_and_two_values() { + let one = &[1][..]; + roundtrip_zlib(one, CO::rle()); + roundtrip_zlib(one, CO::fast()); + roundtrip_zlib(one, CO::default()); + let two = &[5, 6, 7, 8][..]; + roundtrip_zlib(two, CO::rle()); + roundtrip_zlib(two, CO::fast()); + roundtrip_zlib(two, CO::default()); + } + + +} |