summaryrefslogtreecommitdiffstats
path: root/third_party/rust/deflate/src/compress.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/deflate/src/compress.rs')
-rw-r--r--third_party/rust/deflate/src/compress.rs366
1 files changed, 366 insertions, 0 deletions
diff --git a/third_party/rust/deflate/src/compress.rs b/third_party/rust/deflate/src/compress.rs
new file mode 100644
index 0000000000..0cfee6e89b
--- /dev/null
+++ b/third_party/rust/deflate/src/compress.rs
@@ -0,0 +1,366 @@
+use std::io::Write;
+use std::io;
+
+use deflate_state::DeflateState;
+use encoder_state::EncoderState;
+use lzvalue::LZValue;
+use lz77::{lz77_compress_block, LZ77Status};
+use huffman_lengths::{gen_huffman_lengths, write_huffman_lengths, BlockType};
+use bitstream::LsbWriter;
+use stored_block::{compress_block_stored, write_stored_header, MAX_STORED_BLOCK_LENGTH};
+
+const LARGEST_OUTPUT_BUF_SIZE: usize = 1024 * 32;
+
+/// Flush mode to use when compressing input received in multiple steps.
+///
+/// (The more obscure ZLIB flush modes are not implemented.)
+#[derive(Eq, PartialEq, Debug, Copy, Clone)]
+pub enum Flush {
+ // Simply wait for more input when we are out of input data to process.
+ None,
+ // Send a "sync block", corresponding to Z_SYNC_FLUSH in zlib. This finishes compressing and
+ // outputting all pending data, and then outputs an empty stored block.
+ // (That is, the block header indicating a stored block followed by `0000FFFF`).
+ Sync,
+ _Partial,
+ _Block,
+ _Full,
+ // Finish compressing and output all remaining input.
+ Finish,
+}
+
+/// Write all the lz77 encoded data in the buffer using the specified `EncoderState`, and finish
+/// with the end of block code.
+pub fn flush_to_bitstream(buffer: &[LZValue], state: &mut EncoderState) {
+ for &b in buffer {
+ state.write_lzvalue(b.value());
+ }
+ state.write_end_of_block()
+}
+
+/// Compress the input data using only fixed huffman codes.
+///
+/// Currently only used in tests.
+#[cfg(test)]
+pub fn compress_data_fixed(input: &[u8]) -> Vec<u8> {
+ use lz77::lz77_compress;
+
+ let mut state = EncoderState::fixed(Vec::new());
+ let compressed = lz77_compress(input).unwrap();
+
+ // We currently don't split blocks here(this function is just used for tests anyhow)
+ state.write_start_of_block(true, true);
+ flush_to_bitstream(&compressed, &mut state);
+
+ state.flush();
+ state.reset(Vec::new())
+}
+
+fn write_stored_block(input: &[u8], mut writer: &mut LsbWriter, final_block: bool) {
+
+ // If the input is not zero, we write stored blocks for the input data.
+ if !input.is_empty() {
+ let mut i = input.chunks(MAX_STORED_BLOCK_LENGTH).peekable();
+
+ while let Some(chunk) = i.next() {
+ let last_chunk = i.peek().is_none();
+ // Write the block header
+ write_stored_header(writer, final_block && last_chunk);
+
+ // Write the actual data.
+ compress_block_stored(chunk, &mut writer).expect("Write error");
+
+ }
+ } else {
+ // If the input length is zero, we output an empty block. This is used for syncing.
+ write_stored_header(writer, final_block);
+ compress_block_stored(&[], &mut writer).expect("Write error");
+ }
+}
+
+/// Inner compression function used by both the writers and the simple compression functions.
+pub fn compress_data_dynamic_n<W: Write>(
+ input: &[u8],
+ deflate_state: &mut DeflateState<W>,
+ flush: Flush,
+) -> io::Result<usize> {
+ let mut bytes_written = 0;
+
+ let mut slice = input;
+
+ loop {
+ let output_buf_len = deflate_state.output_buf().len();
+ let output_buf_pos = deflate_state.output_buf_pos;
+ // If the output buffer has too much data in it already, flush it before doing anything
+ // else.
+ if output_buf_len > LARGEST_OUTPUT_BUF_SIZE {
+ let written = deflate_state
+ .inner
+ .as_mut()
+ .expect("Missing writer!")
+ .write(&deflate_state.encoder_state.inner_vec()[output_buf_pos..])?;
+
+ if written < output_buf_len.checked_sub(output_buf_pos).unwrap() {
+ // Only some of the data was flushed, so keep track of where we were.
+ deflate_state.output_buf_pos += written;
+ } else {
+ // If we flushed all of the output, reset the output buffer.
+ deflate_state.output_buf_pos = 0;
+ deflate_state.output_buf().clear();
+ }
+
+ if bytes_written == 0 {
+ // If the buffer was already full when the function was called, this has to be
+ // returned rather than Ok(0) to indicate that we didn't write anything, but are
+ // not done yet.
+ return Err(io::Error::new(
+ io::ErrorKind::Interrupted,
+ "Internal buffer full.",
+ ));
+ } else {
+ return Ok(bytes_written);
+ }
+ }
+
+ if deflate_state.lz77_state.is_last_block() {
+ // The last block has already been written, so we don't ave anything to compress.
+ break;
+ }
+
+ let (written, status, position) = lz77_compress_block(
+ slice,
+ &mut deflate_state.lz77_state,
+ &mut deflate_state.input_buffer,
+ &mut deflate_state.lz77_writer,
+ flush,
+ );
+
+ // Bytes written in this call
+ bytes_written += written;
+ // Total bytes written since the compression process started
+ // TODO: Should we realistically have to worry about overflowing here?
+ deflate_state.bytes_written += written as u64;
+
+ if status == LZ77Status::NeedInput {
+ // If we've consumed all the data input so far, and we're not
+ // finishing or syncing or ending the block here, simply return
+ // the number of bytes consumed so far.
+ return Ok(bytes_written);
+ }
+
+ // Increment start of input data
+ slice = &slice[written..];
+
+ // We need to check if this is the last block as the header will then be
+ // slightly different to indicate this.
+ let last_block = deflate_state.lz77_state.is_last_block();
+
+ let current_block_input_bytes = deflate_state.lz77_state.current_block_input_bytes();
+
+ if cfg!(debug_assertions) {
+ deflate_state
+ .bytes_written_control
+ .add(current_block_input_bytes);
+ }
+
+ let partial_bits = deflate_state.encoder_state.writer.pending_bits();
+
+ let res = {
+ let (l_freqs, d_freqs) = deflate_state.lz77_writer.get_frequencies();
+ let (l_lengths, d_lengths) =
+ deflate_state.encoder_state.huffman_table.get_lengths_mut();
+
+ gen_huffman_lengths(
+ l_freqs,
+ d_freqs,
+ current_block_input_bytes,
+ partial_bits,
+ l_lengths,
+ d_lengths,
+ &mut deflate_state.length_buffers,
+ )
+ };
+
+ // Check if we've actually managed to compress the input, and output stored blocks
+ // if not.
+ match res {
+ BlockType::Dynamic(header) => {
+ // Write the block header.
+ deflate_state
+ .encoder_state
+ .write_start_of_block(false, last_block);
+
+ // Output the lengths of the huffman codes used in this block.
+ write_huffman_lengths(
+ &header,
+ &deflate_state.encoder_state.huffman_table,
+ &mut deflate_state.length_buffers.length_buf,
+ &mut deflate_state.encoder_state.writer,
+ );
+
+ // Uupdate the huffman codes that will be used to encode the
+ // lz77-compressed data.
+ deflate_state
+ .encoder_state
+ .huffman_table
+ .update_from_lengths();
+
+
+ // Write the huffman compressed data and the end of block marker.
+ flush_to_bitstream(
+ deflate_state.lz77_writer.get_buffer(),
+ &mut deflate_state.encoder_state,
+ );
+ }
+ BlockType::Fixed => {
+ // Write the block header for fixed code blocks.
+ deflate_state
+ .encoder_state
+ .write_start_of_block(true, last_block);
+
+ // Use the pre-defined static huffman codes.
+ deflate_state.encoder_state.set_huffman_to_fixed();
+
+ // Write the compressed data and the end of block marker.
+ flush_to_bitstream(
+ deflate_state.lz77_writer.get_buffer(),
+ &mut deflate_state.encoder_state,
+ );
+ }
+ BlockType::Stored => {
+ // If compression fails, output a stored block instead.
+
+ let start_pos = position.saturating_sub(current_block_input_bytes as usize);
+
+ assert!(
+ position >= current_block_input_bytes as usize,
+ "Error! Trying to output a stored block with forgotten data!\
+ if you encounter this error, please file an issue!"
+ );
+
+ write_stored_block(
+ &deflate_state.input_buffer.get_buffer()[start_pos..position],
+ &mut deflate_state.encoder_state.writer,
+ flush == Flush::Finish && last_block,
+ );
+ }
+ };
+
+ // Clear the current lz77 data in the writer for the next call.
+ deflate_state.lz77_writer.clear();
+ // We are done with the block, so we reset the number of bytes taken
+ // for the next one.
+ deflate_state.lz77_state.reset_input_bytes();
+
+ // We are done for now.
+ if status == LZ77Status::Finished {
+ // This flush mode means that there should be an empty stored block at the end.
+ if flush == Flush::Sync {
+ write_stored_block(&[], &mut deflate_state.encoder_state.writer, false);
+ } else if !deflate_state.lz77_state.is_last_block() {
+ // Make sure a block with the last block header has been output.
+ // Not sure this can actually happen, but we make sure to finish properly
+ // if it somehow does.
+ // An empty fixed block is the shortest.
+ let es = &mut deflate_state.encoder_state;
+ es.set_huffman_to_fixed();
+ es.write_start_of_block(true, true);
+ es.write_end_of_block();
+ }
+ break;
+ }
+ }
+
+ // If we reach this point, the remaining data in the buffers is to be flushed.
+ deflate_state.encoder_state.flush();
+ // Make sure we've output everything, and return the number of bytes written if everything
+ // went well.
+ let output_buf_pos = deflate_state.output_buf_pos;
+ let written_to_writer = deflate_state
+ .inner
+ .as_mut()
+ .expect("Missing writer!")
+ .write(&deflate_state.encoder_state.inner_vec()[output_buf_pos..])?;
+ if written_to_writer <
+ deflate_state
+ .output_buf()
+ .len()
+ .checked_sub(output_buf_pos)
+ .unwrap()
+ {
+ deflate_state.output_buf_pos += written_to_writer;
+ } else {
+ // If we sucessfully wrote all the data, we can clear the output buffer.
+ deflate_state.output_buf_pos = 0;
+ deflate_state.output_buf().clear();
+ }
+ Ok(bytes_written)
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use test_utils::{get_test_data, decompress_to_end};
+
+ #[test]
+ /// Test compressing a short string using fixed encoding.
+ fn fixed_string_mem() {
+ let test_data = String::from(" GNU GENERAL PUBLIC LICENSE").into_bytes();
+ let compressed = compress_data_fixed(&test_data);
+
+ let result = decompress_to_end(&compressed);
+
+ assert_eq!(test_data, result);
+ }
+
+ #[test]
+ fn fixed_data() {
+ let data = vec![190u8; 400];
+ let compressed = compress_data_fixed(&data);
+ let result = decompress_to_end(&compressed);
+
+ assert_eq!(data, result);
+ }
+
+ /// Test deflate example.
+ ///
+ /// Check if the encoder produces the same code as the example given by Mark Adler here:
+ /// https://stackoverflow.com/questions/17398931/deflate-encoding-with-static-huffman-codes/17415203
+ #[test]
+ fn fixed_example() {
+ let test_data = b"Deflate late";
+ // let check =
+ // [0x73, 0x49, 0x4d, 0xcb, 0x49, 0x2c, 0x49, 0x55, 0xc8, 0x49, 0x2c, 0x49, 0x5, 0x0];
+ let check = [
+ 0x73,
+ 0x49,
+ 0x4d,
+ 0xcb,
+ 0x49,
+ 0x2c,
+ 0x49,
+ 0x55,
+ 0x00,
+ 0x11,
+ 0x00,
+ ];
+ let compressed = compress_data_fixed(test_data);
+ assert_eq!(&compressed, &check);
+ let decompressed = decompress_to_end(&compressed);
+ assert_eq!(&decompressed, test_data)
+ }
+
+ #[test]
+ /// Test compression from a file.
+ fn fixed_string_file() {
+ let input = get_test_data();
+
+ let compressed = compress_data_fixed(&input);
+ println!("Fixed codes compressed len: {}", compressed.len());
+ let result = decompress_to_end(&compressed);
+
+ assert_eq!(input.len(), result.len());
+ // Not using assert_eq here deliberately to avoid massive amounts of output spam.
+ assert!(input == result);
+ }
+}