diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/miniz_oxide/src | |
parent | Initial commit. (diff) | |
download | firefox-esr-upstream.tar.xz firefox-esr-upstream.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/miniz_oxide/src')
-rw-r--r-- | third_party/rust/miniz_oxide/src/deflate/buffer.rs | 58 | ||||
-rw-r--r-- | third_party/rust/miniz_oxide/src/deflate/core.rs | 2463 | ||||
-rw-r--r-- | third_party/rust/miniz_oxide/src/deflate/mod.rs | 227 | ||||
-rw-r--r-- | third_party/rust/miniz_oxide/src/deflate/stream.rs | 121 | ||||
-rw-r--r-- | third_party/rust/miniz_oxide/src/inflate/core.rs | 1932 | ||||
-rw-r--r-- | third_party/rust/miniz_oxide/src/inflate/mod.rs | 337 | ||||
-rw-r--r-- | third_party/rust/miniz_oxide/src/inflate/output_buffer.rs | 60 | ||||
-rw-r--r-- | third_party/rust/miniz_oxide/src/inflate/stream.rs | 418 | ||||
-rw-r--r-- | third_party/rust/miniz_oxide/src/lib.rs | 210 | ||||
-rw-r--r-- | third_party/rust/miniz_oxide/src/shared.rs | 25 |
10 files changed, 5851 insertions, 0 deletions
diff --git a/third_party/rust/miniz_oxide/src/deflate/buffer.rs b/third_party/rust/miniz_oxide/src/deflate/buffer.rs new file mode 100644 index 0000000000..f246c07dfb --- /dev/null +++ b/third_party/rust/miniz_oxide/src/deflate/buffer.rs @@ -0,0 +1,58 @@ +//! Buffer wrappers implementing default so we can allocate the buffers with `Box::default()` +//! to avoid stack copies. Box::new() doesn't at the moment, and using a vec means we would lose +//! static length info. + +use crate::deflate::core::{LZ_DICT_SIZE, MAX_MATCH_LEN}; + +/// Size of the buffer of lz77 encoded data. +pub const LZ_CODE_BUF_SIZE: usize = 64 * 1024; +/// Size of the output buffer. +pub const OUT_BUF_SIZE: usize = (LZ_CODE_BUF_SIZE * 13) / 10; +pub const LZ_DICT_FULL_SIZE: usize = LZ_DICT_SIZE + MAX_MATCH_LEN - 1 + 1; + +/// Size of hash values in the hash chains. +pub const LZ_HASH_BITS: i32 = 15; +/// How many bits to shift when updating the current hash value. +pub const LZ_HASH_SHIFT: i32 = (LZ_HASH_BITS + 2) / 3; +/// Size of the chained hash tables. +pub const LZ_HASH_SIZE: usize = 1 << LZ_HASH_BITS; + +#[inline] +pub fn update_hash(current_hash: u16, byte: u8) -> u16 { + ((current_hash << LZ_HASH_SHIFT) ^ u16::from(byte)) & (LZ_HASH_SIZE as u16 - 1) +} + +pub struct HashBuffers { + pub dict: [u8; LZ_DICT_FULL_SIZE], + pub next: [u16; LZ_DICT_SIZE], + pub hash: [u16; LZ_DICT_SIZE], +} + +impl HashBuffers { + #[inline] + pub fn reset(&mut self) { + *self = HashBuffers::default(); + } +} + +impl Default for HashBuffers { + fn default() -> HashBuffers { + HashBuffers { + dict: [0; LZ_DICT_FULL_SIZE], + next: [0; LZ_DICT_SIZE], + hash: [0; LZ_DICT_SIZE], + } + } +} + +pub struct LocalBuf { + pub b: [u8; OUT_BUF_SIZE], +} + +impl Default for LocalBuf { + fn default() -> LocalBuf { + LocalBuf { + b: [0; OUT_BUF_SIZE], + } + } +} diff --git a/third_party/rust/miniz_oxide/src/deflate/core.rs b/third_party/rust/miniz_oxide/src/deflate/core.rs new file mode 100644 index 0000000000..91a9bf8b85 --- /dev/null +++ b/third_party/rust/miniz_oxide/src/deflate/core.rs @@ -0,0 +1,2463 @@ +//! Streaming compression functionality. + +use alloc::boxed::Box; +use core::convert::TryInto; +use core::{cmp, mem}; + +use super::super::*; +use super::deflate_flags::*; +use super::CompressionLevel; +use crate::deflate::buffer::{ + update_hash, HashBuffers, LocalBuf, LZ_CODE_BUF_SIZE, LZ_DICT_FULL_SIZE, LZ_HASH_BITS, + LZ_HASH_SHIFT, LZ_HASH_SIZE, OUT_BUF_SIZE, +}; +use crate::shared::{update_adler32, HUFFMAN_LENGTH_ORDER, MZ_ADLER32_INIT}; +use crate::DataFormat; + +// Currently not bubbled up outside this module, so can fill in with more +// context eventually if needed. +type Result<T, E = Error> = core::result::Result<T, E>; +struct Error {} + +const MAX_PROBES_MASK: i32 = 0xFFF; + +const MAX_SUPPORTED_HUFF_CODESIZE: usize = 32; + +/// Length code for length values. +#[rustfmt::skip] +const LEN_SYM: [u16; 256] = [ + 257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268, 268, + 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272, 272, 272, + 273, 273, 273, 273, 273, 273, 273, 273, 274, 274, 274, 274, 274, 274, 274, 274, + 275, 275, 275, 275, 275, 275, 275, 275, 276, 276, 276, 276, 276, 276, 276, 276, + 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, + 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, + 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, + 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, + 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, + 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, + 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, + 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, + 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, + 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 285 +]; + +/// Number of extra bits for length values. +#[rustfmt::skip] +const LEN_EXTRA: [u8; 256] = [ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0 +]; + +/// Distance codes for distances smaller than 512. +#[rustfmt::skip] +const SMALL_DIST_SYM: [u8; 512] = [ + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17 +]; + +/// Number of extra bits for distances smaller than 512. +#[rustfmt::skip] +const SMALL_DIST_EXTRA: [u8; 512] = [ + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +]; + +/// Base values to calculate distances above 512. +#[rustfmt::skip] +const LARGE_DIST_SYM: [u8; 128] = [ + 0, 0, 18, 19, 20, 20, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 +]; + +/// Number of extra bits distances above 512. +#[rustfmt::skip] +const LARGE_DIST_EXTRA: [u8; 128] = [ + 0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 +]; + +#[rustfmt::skip] +const BITMASKS: [u32; 17] = [ + 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, + 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF +]; + +/// The maximum number of checks for matches in the hash table the compressor will make for each +/// compression level. +const NUM_PROBES: [u32; 11] = [0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500]; + +#[derive(Copy, Clone)] +struct SymFreq { + key: u16, + sym_index: u16, +} + +pub mod deflate_flags { + /// Whether to use a zlib wrapper. + pub const TDEFL_WRITE_ZLIB_HEADER: u32 = 0x0000_1000; + /// Should we compute the adler32 checksum. + pub const TDEFL_COMPUTE_ADLER32: u32 = 0x0000_2000; + /// Should we use greedy parsing (as opposed to lazy parsing where look ahead one or more + /// bytes to check for better matches.) + pub const TDEFL_GREEDY_PARSING_FLAG: u32 = 0x0000_4000; + /// Used in miniz to skip zero-initializing hash and dict. We don't do this here, so + /// this flag is ignored. + pub const TDEFL_NONDETERMINISTIC_PARSING_FLAG: u32 = 0x0000_8000; + /// Only look for matches with a distance of 0. + pub const TDEFL_RLE_MATCHES: u32 = 0x0001_0000; + /// Only use matches that are at least 6 bytes long. + pub const TDEFL_FILTER_MATCHES: u32 = 0x0002_0000; + /// Force the compressor to only output static blocks. (Blocks using the default huffman codes + /// specified in the deflate specification.) + pub const TDEFL_FORCE_ALL_STATIC_BLOCKS: u32 = 0x0004_0000; + /// Force the compressor to only output raw/uncompressed blocks. + pub const TDEFL_FORCE_ALL_RAW_BLOCKS: u32 = 0x0008_0000; +} + +/// Strategy setting for compression. +/// +/// The non-default settings offer some special-case compression variants. +#[repr(i32)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum CompressionStrategy { + /// Don't use any of the special strategies. + Default = 0, + /// Only use matches that are at least 5 bytes long. + Filtered = 1, + /// Don't look for matches, only huffman encode the literals. + HuffmanOnly = 2, + /// Only look for matches with a distance of 1, i.e do run-length encoding only. + RLE = 3, + /// Only use static/fixed blocks. (Blocks using the default huffman codes + /// specified in the deflate specification.) + Fixed = 4, +} + +/// A list of deflate flush types. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum TDEFLFlush { + /// Normal operation. + /// + /// Compress as much as there is space for, and then return waiting for more input. + None = 0, + + /// Try to flush all the current data and output an empty raw block. + Sync = 2, + + /// Same as [`Sync`][Self::Sync], but reset the dictionary so that the following data does not + /// depend on previous data. + Full = 3, + + /// Try to flush everything and end the deflate stream. + /// + /// On success this will yield a [`TDEFLStatus::Done`] return status. + Finish = 4, +} + +impl From<MZFlush> for TDEFLFlush { + fn from(flush: MZFlush) -> Self { + match flush { + MZFlush::None => TDEFLFlush::None, + MZFlush::Sync => TDEFLFlush::Sync, + MZFlush::Full => TDEFLFlush::Full, + MZFlush::Finish => TDEFLFlush::Finish, + _ => TDEFLFlush::None, // TODO: ??? What to do ??? + } + } +} + +impl TDEFLFlush { + pub fn new(flush: i32) -> Result<Self, MZError> { + match flush { + 0 => Ok(TDEFLFlush::None), + 2 => Ok(TDEFLFlush::Sync), + 3 => Ok(TDEFLFlush::Full), + 4 => Ok(TDEFLFlush::Finish), + _ => Err(MZError::Param), + } + } +} + +/// Return status of compression. +#[repr(i32)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum TDEFLStatus { + /// Usage error. + /// + /// This indicates that either the [`CompressorOxide`] experienced a previous error, or the + /// stream has already been [`TDEFLFlush::Finish`]'d. + BadParam = -2, + + /// Error putting data into output buffer. + /// + /// This usually indicates a too-small buffer. + PutBufFailed = -1, + + /// Compression succeeded normally. + Okay = 0, + + /// Compression succeeded and the deflate stream was ended. + /// + /// This is the result of calling compression with [`TDEFLFlush::Finish`]. + Done = 1, +} + +const MAX_HUFF_SYMBOLS: usize = 288; +/// Size of hash chain for fast compression mode. +const LEVEL1_HASH_SIZE_MASK: u32 = 4095; +/// The number of huffman tables used by the compressor. +/// Literal/length, Distances and Length of the huffman codes for the other two tables. +const MAX_HUFF_TABLES: usize = 3; +/// Literal/length codes +const MAX_HUFF_SYMBOLS_0: usize = 288; +/// Distance codes. +const MAX_HUFF_SYMBOLS_1: usize = 32; +/// Huffman length values. +const MAX_HUFF_SYMBOLS_2: usize = 19; +/// Size of the chained hash table. +pub(crate) const LZ_DICT_SIZE: usize = 32_768; +/// Mask used when stepping through the hash chains. +const LZ_DICT_SIZE_MASK: usize = (LZ_DICT_SIZE as u32 - 1) as usize; +/// The minimum length of a match. +const MIN_MATCH_LEN: u8 = 3; +/// The maximum length of a match. +pub(crate) const MAX_MATCH_LEN: usize = 258; + +const DEFAULT_FLAGS: u32 = NUM_PROBES[4] | TDEFL_WRITE_ZLIB_HEADER; + +mod zlib { + const DEFAULT_CM: u8 = 8; + const DEFAULT_CINFO: u8 = 7 << 4; + const _DEFAULT_FDICT: u8 = 0; + const DEFAULT_CMF: u8 = DEFAULT_CM | DEFAULT_CINFO; + /// The 16-bit value consisting of CMF and FLG must be divisible by this to be valid. + const FCHECK_DIVISOR: u8 = 31; + + /// Generate FCHECK from CMF and FLG (without FCKECH )so that they are correct according to the + /// specification, i.e (CMF*256 + FCHK) % 31 = 0. + /// Returns flg with the FCHKECK bits added (any existing FCHECK bits are ignored). + fn add_fcheck(cmf: u8, flg: u8) -> u8 { + let rem = ((usize::from(cmf) * 256) + usize::from(flg)) % usize::from(FCHECK_DIVISOR); + + // Clear existing FCHECK if any + let flg = flg & 0b11100000; + + // Casting is safe as rem can't overflow since it is a value mod 31 + // We can simply add the value to flg as (31 - rem) will never be above 2^5 + flg + (FCHECK_DIVISOR - rem as u8) + } + + fn zlib_level_from_flags(flags: u32) -> u8 { + use super::NUM_PROBES; + + let num_probes = flags & (super::MAX_PROBES_MASK as u32); + if flags & super::TDEFL_GREEDY_PARSING_FLAG != 0 { + if num_probes <= 1 { + 0 + } else { + 1 + } + } else if num_probes >= NUM_PROBES[9] { + 3 + } else { + 2 + } + } + + /// Get the zlib header for the level using the default window size and no + /// dictionary. + fn header_from_level(level: u8) -> [u8; 2] { + let cmf = DEFAULT_CMF; + [cmf, add_fcheck(cmf, (level as u8) << 6)] + } + + /// Create a zlib header from the given compression flags. + /// Only level is considered. + pub fn header_from_flags(flags: u32) -> [u8; 2] { + let level = zlib_level_from_flags(flags); + header_from_level(level) + } + + #[cfg(test)] + mod test { + #[test] + fn zlib() { + use super::super::*; + use super::*; + + let test_level = |level, expected| { + let flags = create_comp_flags_from_zip_params( + level, + MZ_DEFAULT_WINDOW_BITS, + CompressionStrategy::Default as i32, + ); + assert_eq!(zlib_level_from_flags(flags), expected); + }; + + assert_eq!(zlib_level_from_flags(DEFAULT_FLAGS), 2); + test_level(0, 0); + test_level(1, 0); + test_level(2, 1); + test_level(3, 1); + for i in 4..=8 { + test_level(i, 2) + } + test_level(9, 3); + test_level(10, 3); + } + + #[test] + fn test_header() { + let header = super::header_from_level(3); + assert_eq!( + ((usize::from(header[0]) * 256) + usize::from(header[1])) % 31, + 0 + ); + } + } +} + +fn memset<T: Copy>(slice: &mut [T], val: T) { + for x in slice { + *x = val + } +} + +#[cfg(test)] +#[inline] +fn write_u16_le(val: u16, slice: &mut [u8], pos: usize) { + slice[pos] = val as u8; + slice[pos + 1] = (val >> 8) as u8; +} + +// Read the two bytes starting at pos and interpret them as an u16. +#[inline] +const fn read_u16_le(slice: &[u8], pos: usize) -> u16 { + // The compiler is smart enough to optimize this into an unaligned load. + slice[pos] as u16 | ((slice[pos + 1] as u16) << 8) +} + +/// Main compression struct. +pub struct CompressorOxide { + lz: LZOxide, + params: ParamsOxide, + huff: Box<HuffmanOxide>, + dict: DictOxide, +} + +impl CompressorOxide { + /// Create a new `CompressorOxide` with the given flags. + /// + /// # Notes + /// This function may be changed to take different parameters in the future. + pub fn new(flags: u32) -> Self { + CompressorOxide { + lz: LZOxide::new(), + params: ParamsOxide::new(flags), + /// Put HuffmanOxide on the heap with default trick to avoid + /// excessive stack copies. + huff: Box::default(), + dict: DictOxide::new(flags), + } + } + + /// Get the adler32 checksum of the currently encoded data. + pub const fn adler32(&self) -> u32 { + self.params.adler32 + } + + /// Get the return status of the previous [`compress`](fn.compress.html) + /// call with this compressor. + pub const fn prev_return_status(&self) -> TDEFLStatus { + self.params.prev_return_status + } + + /// Get the raw compressor flags. + /// + /// # Notes + /// This function may be deprecated or changed in the future to use more rust-style flags. + pub const fn flags(&self) -> i32 { + self.params.flags as i32 + } + + /// Returns whether the compressor is wrapping the data in a zlib format or not. + pub fn data_format(&self) -> DataFormat { + if (self.params.flags & TDEFL_WRITE_ZLIB_HEADER) != 0 { + DataFormat::Zlib + } else { + DataFormat::Raw + } + } + + /// Reset the state of the compressor, keeping the same parameters. + /// + /// This avoids re-allocating data. + pub fn reset(&mut self) { + // LZ buf and huffman has no settings or dynamic memory + // that needs to be saved, so we simply replace them. + self.lz = LZOxide::new(); + self.params.reset(); + *self.huff = HuffmanOxide::default(); + self.dict.reset(); + } + + /// Set the compression level of the compressor. + /// + /// Using this to change level after compression has started is supported. + /// # Notes + /// The compression strategy will be reset to the default one when this is called. + pub fn set_compression_level(&mut self, level: CompressionLevel) { + let format = self.data_format(); + self.set_format_and_level(format, level as u8); + } + + /// Set the compression level of the compressor using an integer value. + /// + /// Using this to change level after compression has started is supported. + /// # Notes + /// The compression strategy will be reset to the default one when this is called. + pub fn set_compression_level_raw(&mut self, level: u8) { + let format = self.data_format(); + self.set_format_and_level(format, level); + } + + /// Update the compression settings of the compressor. + /// + /// Changing the `DataFormat` after compression has started will result in + /// a corrupted stream. + /// + /// # Notes + /// This function mainly intended for setting the initial settings after e.g creating with + /// `default` or after calling `CompressorOxide::reset()`, and behaviour may be changed + /// to disallow calling it after starting compression in the future. + pub fn set_format_and_level(&mut self, data_format: DataFormat, level: u8) { + let flags = create_comp_flags_from_zip_params( + level.into(), + data_format.to_window_bits(), + CompressionStrategy::Default as i32, + ); + self.params.update_flags(flags); + self.dict.update_flags(flags); + } +} + +impl Default for CompressorOxide { + /// Initialize the compressor with a level of 4, zlib wrapper and + /// the default strategy. + #[inline(always)] + fn default() -> Self { + CompressorOxide { + lz: LZOxide::new(), + params: ParamsOxide::new(DEFAULT_FLAGS), + /// Put HuffmanOxide on the heap with default trick to avoid + /// excessive stack copies. + huff: Box::default(), + dict: DictOxide::new(DEFAULT_FLAGS), + } + } +} + +/// Callback function and user used in `compress_to_output`. +pub struct CallbackFunc<'a> { + pub put_buf_func: &'a mut dyn FnMut(&[u8]) -> bool, +} + +impl<'a> CallbackFunc<'a> { + fn flush_output( + &mut self, + saved_output: SavedOutputBufferOxide, + params: &mut ParamsOxide, + ) -> i32 { + // TODO: As this could be unsafe since + // we can't verify the function pointer + // this whole function should maybe be unsafe as well. + let call_success = (self.put_buf_func)(¶ms.local_buf.b[0..saved_output.pos as usize]); + + if !call_success { + params.prev_return_status = TDEFLStatus::PutBufFailed; + return params.prev_return_status as i32; + } + + params.flush_remaining as i32 + } +} + +struct CallbackBuf<'a> { + pub out_buf: &'a mut [u8], +} + +impl<'a> CallbackBuf<'a> { + fn flush_output( + &mut self, + saved_output: SavedOutputBufferOxide, + params: &mut ParamsOxide, + ) -> i32 { + if saved_output.local { + let n = cmp::min( + saved_output.pos as usize, + self.out_buf.len() - params.out_buf_ofs, + ); + (&mut self.out_buf[params.out_buf_ofs..params.out_buf_ofs + n]) + .copy_from_slice(¶ms.local_buf.b[..n]); + + params.out_buf_ofs += n; + if saved_output.pos != n { + params.flush_ofs = n as u32; + params.flush_remaining = (saved_output.pos - n) as u32; + } + } else { + params.out_buf_ofs += saved_output.pos; + } + + params.flush_remaining as i32 + } +} + +enum CallbackOut<'a> { + Func(CallbackFunc<'a>), + Buf(CallbackBuf<'a>), +} + +impl<'a> CallbackOut<'a> { + fn new_output_buffer<'b>( + &'b mut self, + local_buf: &'b mut [u8], + out_buf_ofs: usize, + ) -> OutputBufferOxide<'b> { + let is_local; + let buf_len = OUT_BUF_SIZE - 16; + let chosen_buffer = match *self { + CallbackOut::Buf(ref mut cb) if cb.out_buf.len() - out_buf_ofs >= OUT_BUF_SIZE => { + is_local = false; + &mut cb.out_buf[out_buf_ofs..out_buf_ofs + buf_len] + } + _ => { + is_local = true; + &mut local_buf[..buf_len] + } + }; + + OutputBufferOxide { + inner: chosen_buffer, + inner_pos: 0, + local: is_local, + bit_buffer: 0, + bits_in: 0, + } + } +} + +struct CallbackOxide<'a> { + in_buf: Option<&'a [u8]>, + in_buf_size: Option<&'a mut usize>, + out_buf_size: Option<&'a mut usize>, + out: CallbackOut<'a>, +} + +impl<'a> CallbackOxide<'a> { + fn new_callback_buf(in_buf: &'a [u8], out_buf: &'a mut [u8]) -> Self { + CallbackOxide { + in_buf: Some(in_buf), + in_buf_size: None, + out_buf_size: None, + out: CallbackOut::Buf(CallbackBuf { out_buf }), + } + } + + fn new_callback_func(in_buf: &'a [u8], callback_func: CallbackFunc<'a>) -> Self { + CallbackOxide { + in_buf: Some(in_buf), + in_buf_size: None, + out_buf_size: None, + out: CallbackOut::Func(callback_func), + } + } + + fn update_size(&mut self, in_size: Option<usize>, out_size: Option<usize>) { + if let (Some(in_size), Some(size)) = (in_size, self.in_buf_size.as_mut()) { + **size = in_size; + } + + if let (Some(out_size), Some(size)) = (out_size, self.out_buf_size.as_mut()) { + **size = out_size + } + } + + fn flush_output( + &mut self, + saved_output: SavedOutputBufferOxide, + params: &mut ParamsOxide, + ) -> i32 { + if saved_output.pos == 0 { + return params.flush_remaining as i32; + } + + self.update_size(Some(params.src_pos), None); + match self.out { + CallbackOut::Func(ref mut cf) => cf.flush_output(saved_output, params), + CallbackOut::Buf(ref mut cb) => cb.flush_output(saved_output, params), + } + } +} + +struct OutputBufferOxide<'a> { + pub inner: &'a mut [u8], + pub inner_pos: usize, + pub local: bool, + + pub bit_buffer: u32, + pub bits_in: u32, +} + +impl<'a> OutputBufferOxide<'a> { + fn put_bits(&mut self, bits: u32, len: u32) { + assert!(bits <= ((1u32 << len) - 1u32)); + self.bit_buffer |= bits << self.bits_in; + self.bits_in += len; + while self.bits_in >= 8 { + self.inner[self.inner_pos] = self.bit_buffer as u8; + self.inner_pos += 1; + self.bit_buffer >>= 8; + self.bits_in -= 8; + } + } + + const fn save(&self) -> SavedOutputBufferOxide { + SavedOutputBufferOxide { + pos: self.inner_pos, + bit_buffer: self.bit_buffer, + bits_in: self.bits_in, + local: self.local, + } + } + + fn load(&mut self, saved: SavedOutputBufferOxide) { + self.inner_pos = saved.pos; + self.bit_buffer = saved.bit_buffer; + self.bits_in = saved.bits_in; + self.local = saved.local; + } + + fn pad_to_bytes(&mut self) { + if self.bits_in != 0 { + let len = 8 - self.bits_in; + self.put_bits(0, len); + } + } +} + +struct SavedOutputBufferOxide { + pub pos: usize, + pub bit_buffer: u32, + pub bits_in: u32, + pub local: bool, +} + +struct BitBuffer { + pub bit_buffer: u64, + pub bits_in: u32, +} + +impl BitBuffer { + fn put_fast(&mut self, bits: u64, len: u32) { + self.bit_buffer |= bits << self.bits_in; + self.bits_in += len; + } + + fn flush(&mut self, output: &mut OutputBufferOxide) -> Result<()> { + let pos = output.inner_pos; + { + // isolation to please borrow checker + let inner = &mut output.inner[pos..pos + 8]; + let bytes = u64::to_le_bytes(self.bit_buffer); + inner.copy_from_slice(&bytes); + } + match output.inner_pos.checked_add((self.bits_in >> 3) as usize) { + Some(n) if n <= output.inner.len() => output.inner_pos = n, + _ => return Err(Error {}), + } + self.bit_buffer >>= self.bits_in & !7; + self.bits_in &= 7; + Ok(()) + } +} + +/// A struct containing data about huffman codes and symbol frequencies. +/// +/// NOTE: Only the literal/lengths have enough symbols to actually use +/// the full array. It's unclear why it's defined like this in miniz, +/// it could be for cache/alignment reasons. +struct HuffmanOxide { + /// Number of occurrences of each symbol. + pub count: [[u16; MAX_HUFF_SYMBOLS]; MAX_HUFF_TABLES], + /// The bits of the huffman code assigned to the symbol + pub codes: [[u16; MAX_HUFF_SYMBOLS]; MAX_HUFF_TABLES], + /// The length of the huffman code assigned to the symbol. + pub code_sizes: [[u8; MAX_HUFF_SYMBOLS]; MAX_HUFF_TABLES], +} + +/// Tables used for literal/lengths in `HuffmanOxide`. +const LITLEN_TABLE: usize = 0; +/// Tables for distances. +const DIST_TABLE: usize = 1; +/// Tables for the run-length encoded huffman lengths for literals/lengths/distances. +const HUFF_CODES_TABLE: usize = 2; + +/// Status of RLE encoding of huffman code lengths. +struct Rle { + pub z_count: u32, + pub repeat_count: u32, + pub prev_code_size: u8, +} + +impl Rle { + fn prev_code_size( + &mut self, + packed_code_sizes: &mut [u8], + packed_pos: &mut usize, + h: &mut HuffmanOxide, + ) -> Result<()> { + let mut write = |buf| write(buf, packed_code_sizes, packed_pos); + let counts = &mut h.count[HUFF_CODES_TABLE]; + if self.repeat_count != 0 { + if self.repeat_count < 3 { + counts[self.prev_code_size as usize] = + counts[self.prev_code_size as usize].wrapping_add(self.repeat_count as u16); + let code = self.prev_code_size; + write(&[code, code, code][..self.repeat_count as usize])?; + } else { + counts[16] = counts[16].wrapping_add(1); + write(&[16, (self.repeat_count - 3) as u8][..])?; + } + self.repeat_count = 0; + } + + Ok(()) + } + + fn zero_code_size( + &mut self, + packed_code_sizes: &mut [u8], + packed_pos: &mut usize, + h: &mut HuffmanOxide, + ) -> Result<()> { + let mut write = |buf| write(buf, packed_code_sizes, packed_pos); + let counts = &mut h.count[HUFF_CODES_TABLE]; + if self.z_count != 0 { + if self.z_count < 3 { + counts[0] = counts[0].wrapping_add(self.z_count as u16); + write(&[0, 0, 0][..self.z_count as usize])?; + } else if self.z_count <= 10 { + counts[17] = counts[17].wrapping_add(1); + write(&[17, (self.z_count - 3) as u8][..])?; + } else { + counts[18] = counts[18].wrapping_add(1); + write(&[18, (self.z_count - 11) as u8][..])?; + } + self.z_count = 0; + } + + Ok(()) + } +} + +fn write(src: &[u8], dst: &mut [u8], dst_pos: &mut usize) -> Result<()> { + match dst.get_mut(*dst_pos..*dst_pos + src.len()) { + Some(s) => s.copy_from_slice(src), + None => return Err(Error {}), + } + *dst_pos += src.len(); + Ok(()) +} + +impl Default for HuffmanOxide { + fn default() -> Self { + HuffmanOxide { + count: [[0; MAX_HUFF_SYMBOLS]; MAX_HUFF_TABLES], + codes: [[0; MAX_HUFF_SYMBOLS]; MAX_HUFF_TABLES], + code_sizes: [[0; MAX_HUFF_SYMBOLS]; MAX_HUFF_TABLES], + } + } +} + +impl HuffmanOxide { + fn radix_sort_symbols<'a>( + symbols0: &'a mut [SymFreq], + symbols1: &'a mut [SymFreq], + ) -> &'a mut [SymFreq] { + let mut hist = [[0; 256]; 2]; + + for freq in symbols0.iter() { + hist[0][(freq.key & 0xFF) as usize] += 1; + hist[1][((freq.key >> 8) & 0xFF) as usize] += 1; + } + + let mut n_passes = 2; + if symbols0.len() == hist[1][0] { + n_passes -= 1; + } + + let mut current_symbols = symbols0; + let mut new_symbols = symbols1; + + for (pass, hist_item) in hist.iter().enumerate().take(n_passes) { + let mut offsets = [0; 256]; + let mut offset = 0; + for i in 0..256 { + offsets[i] = offset; + offset += hist_item[i]; + } + + for sym in current_symbols.iter() { + let j = ((sym.key >> (pass * 8)) & 0xFF) as usize; + new_symbols[offsets[j]] = *sym; + offsets[j] += 1; + } + + mem::swap(&mut current_symbols, &mut new_symbols); + } + + current_symbols + } + + fn calculate_minimum_redundancy(symbols: &mut [SymFreq]) { + match symbols.len() { + 0 => (), + 1 => symbols[0].key = 1, + n => { + symbols[0].key += symbols[1].key; + let mut root = 0; + let mut leaf = 2; + for next in 1..n - 1 { + if (leaf >= n) || (symbols[root].key < symbols[leaf].key) { + symbols[next].key = symbols[root].key; + symbols[root].key = next as u16; + root += 1; + } else { + symbols[next].key = symbols[leaf].key; + leaf += 1; + } + + if (leaf >= n) || (root < next && symbols[root].key < symbols[leaf].key) { + symbols[next].key = symbols[next].key.wrapping_add(symbols[root].key); + symbols[root].key = next as u16; + root += 1; + } else { + symbols[next].key = symbols[next].key.wrapping_add(symbols[leaf].key); + leaf += 1; + } + } + + symbols[n - 2].key = 0; + for next in (0..n - 2).rev() { + symbols[next].key = symbols[symbols[next].key as usize].key + 1; + } + + let mut avbl = 1; + let mut used = 0; + let mut dpth = 0; + let mut root = (n - 2) as i32; + let mut next = (n - 1) as i32; + while avbl > 0 { + while (root >= 0) && (symbols[root as usize].key == dpth) { + used += 1; + root -= 1; + } + while avbl > used { + symbols[next as usize].key = dpth; + next -= 1; + avbl -= 1; + } + avbl = 2 * used; + dpth += 1; + used = 0; + } + } + } + } + + fn enforce_max_code_size(num_codes: &mut [i32], code_list_len: usize, max_code_size: usize) { + if code_list_len <= 1 { + return; + } + + num_codes[max_code_size] += num_codes[max_code_size + 1..].iter().sum::<i32>(); + let total = num_codes[1..=max_code_size] + .iter() + .rev() + .enumerate() + .fold(0u32, |total, (i, &x)| total + ((x as u32) << i)); + + for _ in (1 << max_code_size)..total { + num_codes[max_code_size] -= 1; + for i in (1..max_code_size).rev() { + if num_codes[i] != 0 { + num_codes[i] -= 1; + num_codes[i + 1] += 2; + break; + } + } + } + } + + fn optimize_table( + &mut self, + table_num: usize, + table_len: usize, + code_size_limit: usize, + static_table: bool, + ) { + let mut num_codes = [0i32; MAX_SUPPORTED_HUFF_CODESIZE + 1]; + let mut next_code = [0u32; MAX_SUPPORTED_HUFF_CODESIZE + 1]; + + if static_table { + for &code_size in &self.code_sizes[table_num][..table_len] { + num_codes[code_size as usize] += 1; + } + } else { + let mut symbols0 = [SymFreq { + key: 0, + sym_index: 0, + }; MAX_HUFF_SYMBOLS]; + let mut symbols1 = [SymFreq { + key: 0, + sym_index: 0, + }; MAX_HUFF_SYMBOLS]; + + let mut num_used_symbols = 0; + for i in 0..table_len { + if self.count[table_num][i] != 0 { + symbols0[num_used_symbols] = SymFreq { + key: self.count[table_num][i], + sym_index: i as u16, + }; + num_used_symbols += 1; + } + } + + let symbols = Self::radix_sort_symbols( + &mut symbols0[..num_used_symbols], + &mut symbols1[..num_used_symbols], + ); + Self::calculate_minimum_redundancy(symbols); + + for symbol in symbols.iter() { + num_codes[symbol.key as usize] += 1; + } + + Self::enforce_max_code_size(&mut num_codes, num_used_symbols, code_size_limit); + + memset(&mut self.code_sizes[table_num][..], 0); + memset(&mut self.codes[table_num][..], 0); + + let mut last = num_used_symbols; + for (i, &num_item) in num_codes + .iter() + .enumerate() + .take(code_size_limit + 1) + .skip(1) + { + let first = last - num_item as usize; + for symbol in &symbols[first..last] { + self.code_sizes[table_num][symbol.sym_index as usize] = i as u8; + } + last = first; + } + } + + let mut j = 0; + next_code[1] = 0; + for i in 2..=code_size_limit { + j = (j + num_codes[i - 1]) << 1; + next_code[i] = j as u32; + } + + for (&code_size, huff_code) in self.code_sizes[table_num] + .iter() + .take(table_len) + .zip(self.codes[table_num].iter_mut().take(table_len)) + { + if code_size == 0 { + continue; + } + + let mut code = next_code[code_size as usize]; + next_code[code_size as usize] += 1; + + let mut rev_code = 0; + for _ in 0..code_size { + rev_code = (rev_code << 1) | (code & 1); + code >>= 1; + } + *huff_code = rev_code as u16; + } + } + + fn start_static_block(&mut self, output: &mut OutputBufferOxide) { + memset(&mut self.code_sizes[LITLEN_TABLE][0..144], 8); + memset(&mut self.code_sizes[LITLEN_TABLE][144..256], 9); + memset(&mut self.code_sizes[LITLEN_TABLE][256..280], 7); + memset(&mut self.code_sizes[LITLEN_TABLE][280..288], 8); + + memset(&mut self.code_sizes[DIST_TABLE][..32], 5); + + self.optimize_table(LITLEN_TABLE, 288, 15, true); + self.optimize_table(DIST_TABLE, 32, 15, true); + + output.put_bits(0b01, 2) + } + + fn start_dynamic_block(&mut self, output: &mut OutputBufferOxide) -> Result<()> { + // There will always be one, and only one end of block code. + self.count[0][256] = 1; + + self.optimize_table(0, MAX_HUFF_SYMBOLS_0, 15, false); + self.optimize_table(1, MAX_HUFF_SYMBOLS_1, 15, false); + + let num_lit_codes = 286 + - &self.code_sizes[0][257..286] + .iter() + .rev() + .take_while(|&x| *x == 0) + .count(); + + let num_dist_codes = 30 + - &self.code_sizes[1][1..30] + .iter() + .rev() + .take_while(|&x| *x == 0) + .count(); + + let mut code_sizes_to_pack = [0u8; MAX_HUFF_SYMBOLS_0 + MAX_HUFF_SYMBOLS_1]; + let mut packed_code_sizes = [0u8; MAX_HUFF_SYMBOLS_0 + MAX_HUFF_SYMBOLS_1]; + + let total_code_sizes_to_pack = num_lit_codes + num_dist_codes; + + code_sizes_to_pack[..num_lit_codes].copy_from_slice(&self.code_sizes[0][..num_lit_codes]); + + code_sizes_to_pack[num_lit_codes..total_code_sizes_to_pack] + .copy_from_slice(&self.code_sizes[1][..num_dist_codes]); + + let mut rle = Rle { + z_count: 0, + repeat_count: 0, + prev_code_size: 0xFF, + }; + + memset(&mut self.count[HUFF_CODES_TABLE][..MAX_HUFF_SYMBOLS_2], 0); + + let mut packed_pos = 0; + for &code_size in &code_sizes_to_pack[..total_code_sizes_to_pack] { + if code_size == 0 { + rle.prev_code_size(&mut packed_code_sizes, &mut packed_pos, self)?; + rle.z_count += 1; + if rle.z_count == 138 { + rle.zero_code_size(&mut packed_code_sizes, &mut packed_pos, self)?; + } + } else { + rle.zero_code_size(&mut packed_code_sizes, &mut packed_pos, self)?; + if code_size != rle.prev_code_size { + rle.prev_code_size(&mut packed_code_sizes, &mut packed_pos, self)?; + self.count[HUFF_CODES_TABLE][code_size as usize] = + self.count[HUFF_CODES_TABLE][code_size as usize].wrapping_add(1); + write(&[code_size], &mut packed_code_sizes, &mut packed_pos)?; + } else { + rle.repeat_count += 1; + if rle.repeat_count == 6 { + rle.prev_code_size(&mut packed_code_sizes, &mut packed_pos, self)?; + } + } + } + rle.prev_code_size = code_size; + } + + if rle.repeat_count != 0 { + rle.prev_code_size(&mut packed_code_sizes, &mut packed_pos, self)?; + } else { + rle.zero_code_size(&mut packed_code_sizes, &mut packed_pos, self)?; + } + + self.optimize_table(2, MAX_HUFF_SYMBOLS_2, 7, false); + + output.put_bits(2, 2); + + output.put_bits((num_lit_codes - 257) as u32, 5); + output.put_bits((num_dist_codes - 1) as u32, 5); + + let mut num_bit_lengths = 18 + - HUFFMAN_LENGTH_ORDER + .iter() + .rev() + .take_while(|&swizzle| self.code_sizes[HUFF_CODES_TABLE][*swizzle as usize] == 0) + .count(); + + num_bit_lengths = cmp::max(4, num_bit_lengths + 1); + output.put_bits(num_bit_lengths as u32 - 4, 4); + for &swizzle in &HUFFMAN_LENGTH_ORDER[..num_bit_lengths] { + output.put_bits( + u32::from(self.code_sizes[HUFF_CODES_TABLE][swizzle as usize]), + 3, + ); + } + + let mut packed_code_size_index = 0; + while packed_code_size_index < packed_pos { + let code = packed_code_sizes[packed_code_size_index] as usize; + packed_code_size_index += 1; + assert!(code < MAX_HUFF_SYMBOLS_2); + output.put_bits( + u32::from(self.codes[HUFF_CODES_TABLE][code]), + u32::from(self.code_sizes[HUFF_CODES_TABLE][code]), + ); + if code >= 16 { + output.put_bits( + u32::from(packed_code_sizes[packed_code_size_index]), + [2, 3, 7][code - 16], + ); + packed_code_size_index += 1; + } + } + + Ok(()) + } +} + +struct DictOxide { + /// The maximum number of checks in the hash chain, for the initial, + /// and the lazy match respectively. + pub max_probes: [u32; 2], + /// Buffer of input data. + /// Padded with 1 byte to simplify matching code in `compress_fast`. + pub b: Box<HashBuffers>, + + pub code_buf_dict_pos: usize, + pub lookahead_size: usize, + pub lookahead_pos: usize, + pub size: usize, +} + +const fn probes_from_flags(flags: u32) -> [u32; 2] { + [ + 1 + ((flags & 0xFFF) + 2) / 3, + 1 + (((flags & 0xFFF) >> 2) + 2) / 3, + ] +} + +impl DictOxide { + fn new(flags: u32) -> Self { + DictOxide { + max_probes: probes_from_flags(flags), + b: Box::default(), + code_buf_dict_pos: 0, + lookahead_size: 0, + lookahead_pos: 0, + size: 0, + } + } + + fn update_flags(&mut self, flags: u32) { + self.max_probes = probes_from_flags(flags); + } + + fn reset(&mut self) { + self.b.reset(); + self.code_buf_dict_pos = 0; + self.lookahead_size = 0; + self.lookahead_pos = 0; + self.size = 0; + } + + /// Do an unaligned read of the data at `pos` in the dictionary and treat it as if it was of + /// type T. + #[inline] + fn read_unaligned_u32(&self, pos: usize) -> u32 { + // Masking the value here helps avoid bounds checks. + let pos = (pos & LZ_DICT_SIZE_MASK) as usize; + let end = pos + 4; + // Somehow this assertion makes things faster. + assert!(end < LZ_DICT_FULL_SIZE); + + let bytes: [u8; 4] = self.b.dict[pos..end].try_into().unwrap(); + u32::from_le_bytes(bytes) + } + + /// Do an unaligned read of the data at `pos` in the dictionary and treat it as if it was of + /// type T. + #[inline] + fn read_unaligned_u64(&self, pos: usize) -> u64 { + let pos = pos as usize; + let bytes: [u8; 8] = self.b.dict[pos..pos + 8].try_into().unwrap(); + u64::from_le_bytes(bytes) + } + + /// Do an unaligned read of the data at `pos` in the dictionary and treat it as if it was of + /// type T. + #[inline] + fn read_as_u16(&self, pos: usize) -> u16 { + read_u16_le(&self.b.dict[..], pos) + } + + /// Try to find a match for the data at lookahead_pos in the dictionary that is + /// longer than `match_len`. + /// Returns a tuple containing (match_distance, match_length). Will be equal to the input + /// values if no better matches were found. + fn find_match( + &self, + lookahead_pos: usize, + max_dist: usize, + max_match_len: u32, + mut match_dist: u32, + mut match_len: u32, + ) -> (u32, u32) { + // Clamp the match len and max_match_len to be valid. (It should be when this is called, but + // do it for now just in case for safety reasons.) + // This should normally end up as at worst conditional moves, + // so it shouldn't slow us down much. + // TODO: Statically verify these so we don't need to do this. + let max_match_len = cmp::min(MAX_MATCH_LEN as u32, max_match_len); + match_len = cmp::max(match_len, 1); + + let pos = lookahead_pos as usize & LZ_DICT_SIZE_MASK; + let mut probe_pos = pos; + // Number of probes into the hash chains. + let mut num_probes_left = self.max_probes[(match_len >= 32) as usize]; + + // If we already have a match of the full length don't bother searching for another one. + if max_match_len <= match_len { + return (match_dist, match_len); + } + + // Read the last byte of the current match, and the next one, used to compare matches. + let mut c01: u16 = self.read_as_u16(pos as usize + match_len as usize - 1); + // Read the two bytes at the end position of the current match. + let s01: u16 = self.read_as_u16(pos as usize); + + 'outer: loop { + let mut dist; + 'found: loop { + num_probes_left -= 1; + if num_probes_left == 0 { + // We have done as many probes in the hash chain as the current compression + // settings allow, so return the best match we found, if any. + return (match_dist, match_len); + } + + for _ in 0..3 { + let next_probe_pos = self.b.next[probe_pos as usize] as usize; + + dist = (lookahead_pos - next_probe_pos) & 0xFFFF; + if next_probe_pos == 0 || dist > max_dist { + // We reached the end of the hash chain, or the next value is further away + // than the maximum allowed distance, so return the best match we found, if + // any. + return (match_dist, match_len); + } + + // Mask the position value to get the position in the hash chain of the next + // position to match against. + probe_pos = next_probe_pos & LZ_DICT_SIZE_MASK; + + if self.read_as_u16((probe_pos + match_len as usize - 1) as usize) == c01 { + break 'found; + } + } + } + + if dist == 0 { + // We've looked through the whole match range, so return the best match we + // found. + return (match_dist, match_len); + } + + // Check if the two first bytes match. + if self.read_as_u16(probe_pos as usize) != s01 { + continue; + } + + let mut p = pos + 2; + let mut q = probe_pos + 2; + // The first two bytes matched, so check the full length of the match. + for _ in 0..32 { + let p_data: u64 = self.read_unaligned_u64(p); + let q_data: u64 = self.read_unaligned_u64(q); + // Compare of 8 bytes at a time by using unaligned loads of 64-bit integers. + let xor_data = p_data ^ q_data; + if xor_data == 0 { + p += 8; + q += 8; + } else { + // If not all of the last 8 bytes matched, check how may of them did. + let trailing = xor_data.trailing_zeros(); + + let probe_len = p - pos + (trailing as usize >> 3); + if probe_len > match_len as usize { + match_dist = dist as u32; + match_len = cmp::min(max_match_len, probe_len as u32); + if match_len == max_match_len { + // We found a match that had the maximum allowed length, + // so there is now point searching further. + return (match_dist, match_len); + } + // We found a better match, so save the last two bytes for further match + // comparisons. + c01 = self.read_as_u16(pos + match_len as usize - 1) + } + continue 'outer; + } + } + + return (dist as u32, cmp::min(max_match_len, MAX_MATCH_LEN as u32)); + } + } +} + +struct ParamsOxide { + pub flags: u32, + pub greedy_parsing: bool, + pub block_index: u32, + + pub saved_match_dist: u32, + pub saved_match_len: u32, + pub saved_lit: u8, + + pub flush: TDEFLFlush, + pub flush_ofs: u32, + pub flush_remaining: u32, + pub finished: bool, + + pub adler32: u32, + + pub src_pos: usize, + + pub out_buf_ofs: usize, + pub prev_return_status: TDEFLStatus, + + pub saved_bit_buffer: u32, + pub saved_bits_in: u32, + + pub local_buf: Box<LocalBuf>, +} + +impl ParamsOxide { + fn new(flags: u32) -> Self { + ParamsOxide { + flags, + greedy_parsing: flags & TDEFL_GREEDY_PARSING_FLAG != 0, + block_index: 0, + saved_match_dist: 0, + saved_match_len: 0, + saved_lit: 0, + flush: TDEFLFlush::None, + flush_ofs: 0, + flush_remaining: 0, + finished: false, + adler32: MZ_ADLER32_INIT, + src_pos: 0, + out_buf_ofs: 0, + prev_return_status: TDEFLStatus::Okay, + saved_bit_buffer: 0, + saved_bits_in: 0, + local_buf: Box::default(), + } + } + + fn update_flags(&mut self, flags: u32) { + self.flags = flags; + self.greedy_parsing = self.flags & TDEFL_GREEDY_PARSING_FLAG != 0; + } + + /// Reset state, saving settings. + fn reset(&mut self) { + self.block_index = 0; + self.saved_match_len = 0; + self.saved_match_dist = 0; + self.saved_lit = 0; + self.flush = TDEFLFlush::None; + self.flush_ofs = 0; + self.flush_remaining = 0; + self.finished = false; + self.adler32 = MZ_ADLER32_INIT; + self.src_pos = 0; + self.out_buf_ofs = 0; + self.prev_return_status = TDEFLStatus::Okay; + self.saved_bit_buffer = 0; + self.saved_bits_in = 0; + self.local_buf.b = [0; OUT_BUF_SIZE]; + } +} + +struct LZOxide { + pub codes: [u8; LZ_CODE_BUF_SIZE], + pub code_position: usize, + pub flag_position: usize, + + // The total number of bytes in the current block. + // (Could maybe use usize, but it's not possible to exceed a block size of ) + pub total_bytes: u32, + pub num_flags_left: u32, +} + +impl LZOxide { + const fn new() -> Self { + LZOxide { + codes: [0; LZ_CODE_BUF_SIZE], + code_position: 1, + flag_position: 0, + total_bytes: 0, + num_flags_left: 8, + } + } + + fn write_code(&mut self, val: u8) { + self.codes[self.code_position] = val; + self.code_position += 1; + } + + fn init_flag(&mut self) { + if self.num_flags_left == 8 { + *self.get_flag() = 0; + self.code_position -= 1; + } else { + *self.get_flag() >>= self.num_flags_left; + } + } + + fn get_flag(&mut self) -> &mut u8 { + &mut self.codes[self.flag_position] + } + + fn plant_flag(&mut self) { + self.flag_position = self.code_position; + self.code_position += 1; + } + + fn consume_flag(&mut self) { + self.num_flags_left -= 1; + if self.num_flags_left == 0 { + self.num_flags_left = 8; + self.plant_flag(); + } + } +} + +fn compress_lz_codes( + huff: &HuffmanOxide, + output: &mut OutputBufferOxide, + lz_code_buf: &[u8], +) -> Result<bool> { + let mut flags = 1; + let mut bb = BitBuffer { + bit_buffer: u64::from(output.bit_buffer), + bits_in: output.bits_in, + }; + + let mut i: usize = 0; + while i < lz_code_buf.len() { + if flags == 1 { + flags = u32::from(lz_code_buf[i]) | 0x100; + i += 1; + } + + // The lz code was a length code + if flags & 1 == 1 { + flags >>= 1; + + let sym; + let num_extra_bits; + + let match_len = lz_code_buf[i] as usize; + + let match_dist = read_u16_le(lz_code_buf, i + 1); + + i += 3; + + debug_assert!(huff.code_sizes[0][LEN_SYM[match_len] as usize] != 0); + bb.put_fast( + u64::from(huff.codes[0][LEN_SYM[match_len] as usize]), + u32::from(huff.code_sizes[0][LEN_SYM[match_len] as usize]), + ); + bb.put_fast( + match_len as u64 & u64::from(BITMASKS[LEN_EXTRA[match_len] as usize]), + u32::from(LEN_EXTRA[match_len]), + ); + + if match_dist < 512 { + sym = SMALL_DIST_SYM[match_dist as usize] as usize; + num_extra_bits = SMALL_DIST_EXTRA[match_dist as usize] as usize; + } else { + sym = LARGE_DIST_SYM[(match_dist >> 8) as usize] as usize; + num_extra_bits = LARGE_DIST_EXTRA[(match_dist >> 8) as usize] as usize; + } + + debug_assert!(huff.code_sizes[1][sym] != 0); + bb.put_fast( + u64::from(huff.codes[1][sym]), + u32::from(huff.code_sizes[1][sym]), + ); + bb.put_fast( + u64::from(match_dist) & u64::from(BITMASKS[num_extra_bits as usize]), + num_extra_bits as u32, + ); + } else { + // The lz code was a literal + for _ in 0..3 { + flags >>= 1; + let lit = lz_code_buf[i]; + i += 1; + + debug_assert!(huff.code_sizes[0][lit as usize] != 0); + bb.put_fast( + u64::from(huff.codes[0][lit as usize]), + u32::from(huff.code_sizes[0][lit as usize]), + ); + + if flags & 1 == 1 || i >= lz_code_buf.len() { + break; + } + } + } + + bb.flush(output)?; + } + + output.bits_in = 0; + output.bit_buffer = 0; + while bb.bits_in != 0 { + let n = cmp::min(bb.bits_in, 16); + output.put_bits(bb.bit_buffer as u32 & BITMASKS[n as usize], n); + bb.bit_buffer >>= n; + bb.bits_in -= n; + } + + // Output the end of block symbol. + output.put_bits( + u32::from(huff.codes[0][256]), + u32::from(huff.code_sizes[0][256]), + ); + + Ok(true) +} + +fn compress_block( + huff: &mut HuffmanOxide, + output: &mut OutputBufferOxide, + lz: &LZOxide, + static_block: bool, +) -> Result<bool> { + if static_block { + huff.start_static_block(output); + } else { + huff.start_dynamic_block(output)?; + } + + compress_lz_codes(huff, output, &lz.codes[..lz.code_position]) +} + +fn flush_block( + d: &mut CompressorOxide, + callback: &mut CallbackOxide, + flush: TDEFLFlush, +) -> Result<i32> { + let mut saved_buffer; + { + let mut output = callback + .out + .new_output_buffer(&mut d.params.local_buf.b, d.params.out_buf_ofs); + output.bit_buffer = d.params.saved_bit_buffer; + output.bits_in = d.params.saved_bits_in; + + let use_raw_block = (d.params.flags & TDEFL_FORCE_ALL_RAW_BLOCKS != 0) + && (d.dict.lookahead_pos - d.dict.code_buf_dict_pos) <= d.dict.size; + + assert!(d.params.flush_remaining == 0); + d.params.flush_ofs = 0; + d.params.flush_remaining = 0; + + d.lz.init_flag(); + + // If we are at the start of the stream, write the zlib header if requested. + if d.params.flags & TDEFL_WRITE_ZLIB_HEADER != 0 && d.params.block_index == 0 { + let header = zlib::header_from_flags(d.params.flags as u32); + output.put_bits(header[0].into(), 8); + output.put_bits(header[1].into(), 8); + } + + // Output the block header. + output.put_bits((flush == TDEFLFlush::Finish) as u32, 1); + + saved_buffer = output.save(); + + let comp_success = if !use_raw_block { + let use_static = + (d.params.flags & TDEFL_FORCE_ALL_STATIC_BLOCKS != 0) || (d.lz.total_bytes < 48); + compress_block(&mut d.huff, &mut output, &d.lz, use_static)? + } else { + false + }; + + // If we failed to compress anything and the output would take up more space than the output + // data, output a stored block instead, which has at most 5 bytes of overhead. + // We only use some simple heuristics for now. + // A stored block will have an overhead of at least 4 bytes containing the block length + // but usually more due to the length parameters having to start at a byte boundary and thus + // requiring up to 5 bytes of padding. + // As a static block will have an overhead of at most 1 bit per byte + // (as literals are either 8 or 9 bytes), a raw block will + // never take up less space if the number of input bytes are less than 32. + let expanded = (d.lz.total_bytes > 32) + && (output.inner_pos - saved_buffer.pos + 1 >= (d.lz.total_bytes as usize)) + && (d.dict.lookahead_pos - d.dict.code_buf_dict_pos <= d.dict.size); + + if use_raw_block || expanded { + output.load(saved_buffer); + + // Block header. + output.put_bits(0, 2); + + // Block length has to start on a byte boundary, s opad. + output.pad_to_bytes(); + + // Block length and ones complement of block length. + output.put_bits(d.lz.total_bytes & 0xFFFF, 16); + output.put_bits(!d.lz.total_bytes & 0xFFFF, 16); + + // Write the actual bytes. + for i in 0..d.lz.total_bytes { + let pos = (d.dict.code_buf_dict_pos + i as usize) & LZ_DICT_SIZE_MASK; + output.put_bits(u32::from(d.dict.b.dict[pos as usize]), 8); + } + } else if !comp_success { + output.load(saved_buffer); + compress_block(&mut d.huff, &mut output, &d.lz, true)?; + } + + if flush != TDEFLFlush::None { + if flush == TDEFLFlush::Finish { + output.pad_to_bytes(); + if d.params.flags & TDEFL_WRITE_ZLIB_HEADER != 0 { + let mut adler = d.params.adler32; + for _ in 0..4 { + output.put_bits((adler >> 24) & 0xFF, 8); + adler <<= 8; + } + } + } else { + // Sync or Full flush. + // Output an empty raw block. + output.put_bits(0, 3); + output.pad_to_bytes(); + output.put_bits(0, 16); + output.put_bits(0xFFFF, 16); + } + } + + memset(&mut d.huff.count[0][..MAX_HUFF_SYMBOLS_0], 0); + memset(&mut d.huff.count[1][..MAX_HUFF_SYMBOLS_1], 0); + + d.lz.code_position = 1; + d.lz.flag_position = 0; + d.lz.num_flags_left = 8; + d.dict.code_buf_dict_pos += d.lz.total_bytes as usize; + d.lz.total_bytes = 0; + d.params.block_index += 1; + + saved_buffer = output.save(); + + d.params.saved_bit_buffer = saved_buffer.bit_buffer; + d.params.saved_bits_in = saved_buffer.bits_in; + } + + Ok(callback.flush_output(saved_buffer, &mut d.params)) +} + +fn record_literal(h: &mut HuffmanOxide, lz: &mut LZOxide, lit: u8) { + lz.total_bytes += 1; + lz.write_code(lit); + + *lz.get_flag() >>= 1; + lz.consume_flag(); + + h.count[0][lit as usize] += 1; +} + +fn record_match(h: &mut HuffmanOxide, lz: &mut LZOxide, mut match_len: u32, mut match_dist: u32) { + assert!(match_len >= MIN_MATCH_LEN.into()); + assert!(match_dist >= 1); + assert!(match_dist as usize <= LZ_DICT_SIZE); + + lz.total_bytes += match_len; + match_dist -= 1; + match_len -= u32::from(MIN_MATCH_LEN); + lz.write_code(match_len as u8); + lz.write_code(match_dist as u8); + lz.write_code((match_dist >> 8) as u8); + + *lz.get_flag() >>= 1; + *lz.get_flag() |= 0x80; + lz.consume_flag(); + + let symbol = if match_dist < 512 { + SMALL_DIST_SYM[match_dist as usize] + } else { + LARGE_DIST_SYM[((match_dist >> 8) & 127) as usize] + } as usize; + h.count[1][symbol] += 1; + h.count[0][LEN_SYM[match_len as usize] as usize] += 1; +} + +fn compress_normal(d: &mut CompressorOxide, callback: &mut CallbackOxide) -> bool { + let mut src_pos = d.params.src_pos; + let in_buf = match callback.in_buf { + None => return true, + Some(in_buf) => in_buf, + }; + + let mut lookahead_size = d.dict.lookahead_size; + let mut lookahead_pos = d.dict.lookahead_pos; + let mut saved_lit = d.params.saved_lit; + let mut saved_match_dist = d.params.saved_match_dist; + let mut saved_match_len = d.params.saved_match_len; + + while src_pos < in_buf.len() || (d.params.flush != TDEFLFlush::None && lookahead_size != 0) { + let src_buf_left = in_buf.len() - src_pos; + let num_bytes_to_process = cmp::min(src_buf_left, MAX_MATCH_LEN - lookahead_size as usize); + + if lookahead_size + d.dict.size >= usize::from(MIN_MATCH_LEN) - 1 + && num_bytes_to_process > 0 + { + let dictb = &mut d.dict.b; + + let mut dst_pos = (lookahead_pos + lookahead_size as usize) & LZ_DICT_SIZE_MASK; + let mut ins_pos = lookahead_pos + lookahead_size as usize - 2; + // Start the hash value from the first two bytes + let mut hash = update_hash( + u16::from(dictb.dict[(ins_pos & LZ_DICT_SIZE_MASK) as usize]), + dictb.dict[((ins_pos + 1) & LZ_DICT_SIZE_MASK) as usize], + ); + + lookahead_size += num_bytes_to_process; + + for &c in &in_buf[src_pos..src_pos + num_bytes_to_process] { + // Add byte to input buffer. + dictb.dict[dst_pos as usize] = c; + if (dst_pos as usize) < MAX_MATCH_LEN - 1 { + dictb.dict[LZ_DICT_SIZE + dst_pos as usize] = c; + } + + // Generate hash from the current byte, + hash = update_hash(hash, c); + dictb.next[(ins_pos & LZ_DICT_SIZE_MASK) as usize] = dictb.hash[hash as usize]; + // and insert it into the hash chain. + dictb.hash[hash as usize] = ins_pos as u16; + dst_pos = (dst_pos + 1) & LZ_DICT_SIZE_MASK; + ins_pos += 1; + } + src_pos += num_bytes_to_process; + } else { + let dictb = &mut d.dict.b; + for &c in &in_buf[src_pos..src_pos + num_bytes_to_process] { + let dst_pos = (lookahead_pos + lookahead_size) & LZ_DICT_SIZE_MASK; + dictb.dict[dst_pos as usize] = c; + if (dst_pos as usize) < MAX_MATCH_LEN - 1 { + dictb.dict[LZ_DICT_SIZE + dst_pos as usize] = c; + } + + lookahead_size += 1; + if lookahead_size + d.dict.size >= MIN_MATCH_LEN.into() { + let ins_pos = lookahead_pos + lookahead_size - 3; + let hash = ((u32::from(dictb.dict[(ins_pos & LZ_DICT_SIZE_MASK) as usize]) + << (LZ_HASH_SHIFT * 2)) + ^ ((u32::from(dictb.dict[((ins_pos + 1) & LZ_DICT_SIZE_MASK) as usize]) + << LZ_HASH_SHIFT) + ^ u32::from(c))) + & (LZ_HASH_SIZE as u32 - 1); + + dictb.next[(ins_pos & LZ_DICT_SIZE_MASK) as usize] = dictb.hash[hash as usize]; + dictb.hash[hash as usize] = ins_pos as u16; + } + } + + src_pos += num_bytes_to_process; + } + + d.dict.size = cmp::min(LZ_DICT_SIZE - lookahead_size, d.dict.size); + if d.params.flush == TDEFLFlush::None && (lookahead_size as usize) < MAX_MATCH_LEN { + break; + } + + let mut len_to_move = 1; + let mut cur_match_dist = 0; + let mut cur_match_len = if saved_match_len != 0 { + saved_match_len + } else { + u32::from(MIN_MATCH_LEN) - 1 + }; + let cur_pos = lookahead_pos & LZ_DICT_SIZE_MASK; + if d.params.flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS) != 0 { + // If TDEFL_RLE_MATCHES is set, we only look for repeating sequences of the current byte. + if d.dict.size != 0 && d.params.flags & TDEFL_FORCE_ALL_RAW_BLOCKS == 0 { + let c = d.dict.b.dict[((cur_pos.wrapping_sub(1)) & LZ_DICT_SIZE_MASK) as usize]; + cur_match_len = d.dict.b.dict[cur_pos as usize..(cur_pos + lookahead_size) as usize] + .iter() + .take_while(|&x| *x == c) + .count() as u32; + if cur_match_len < MIN_MATCH_LEN.into() { + cur_match_len = 0 + } else { + cur_match_dist = 1 + } + } + } else { + // Try to find a match for the bytes at the current position. + let dist_len = d.dict.find_match( + lookahead_pos, + d.dict.size, + lookahead_size as u32, + cur_match_dist, + cur_match_len, + ); + cur_match_dist = dist_len.0; + cur_match_len = dist_len.1; + } + + let far_and_small = cur_match_len == MIN_MATCH_LEN.into() && cur_match_dist >= 8 * 1024; + let filter_small = d.params.flags & TDEFL_FILTER_MATCHES != 0 && cur_match_len <= 5; + if far_and_small || filter_small || cur_pos == cur_match_dist as usize { + cur_match_dist = 0; + cur_match_len = 0; + } + + if saved_match_len != 0 { + if cur_match_len > saved_match_len { + record_literal(&mut d.huff, &mut d.lz, saved_lit); + if cur_match_len >= 128 { + record_match(&mut d.huff, &mut d.lz, cur_match_len, cur_match_dist); + saved_match_len = 0; + len_to_move = cur_match_len as usize; + } else { + saved_lit = d.dict.b.dict[cur_pos as usize]; + saved_match_dist = cur_match_dist; + saved_match_len = cur_match_len; + } + } else { + record_match(&mut d.huff, &mut d.lz, saved_match_len, saved_match_dist); + len_to_move = (saved_match_len - 1) as usize; + saved_match_len = 0; + } + } else if cur_match_dist == 0 { + record_literal( + &mut d.huff, + &mut d.lz, + d.dict.b.dict[cmp::min(cur_pos as usize, d.dict.b.dict.len() - 1)], + ); + } else if d.params.greedy_parsing + || (d.params.flags & TDEFL_RLE_MATCHES != 0) + || cur_match_len >= 128 + { + // If we are using lazy matching, check for matches at the next byte if the current + // match was shorter than 128 bytes. + record_match(&mut d.huff, &mut d.lz, cur_match_len, cur_match_dist); + len_to_move = cur_match_len as usize; + } else { + saved_lit = d.dict.b.dict[cmp::min(cur_pos as usize, d.dict.b.dict.len() - 1)]; + saved_match_dist = cur_match_dist; + saved_match_len = cur_match_len; + } + + lookahead_pos += len_to_move; + assert!(lookahead_size >= len_to_move); + lookahead_size -= len_to_move; + d.dict.size = cmp::min(d.dict.size + len_to_move, LZ_DICT_SIZE); + + let lz_buf_tight = d.lz.code_position > LZ_CODE_BUF_SIZE - 8; + let raw = d.params.flags & TDEFL_FORCE_ALL_RAW_BLOCKS != 0; + let fat = ((d.lz.code_position * 115) >> 7) >= d.lz.total_bytes as usize; + let fat_or_raw = (d.lz.total_bytes > 31 * 1024) && (fat || raw); + + if lz_buf_tight || fat_or_raw { + d.params.src_pos = src_pos; + // These values are used in flush_block, so we need to write them back here. + d.dict.lookahead_size = lookahead_size; + d.dict.lookahead_pos = lookahead_pos; + + let n = flush_block(d, callback, TDEFLFlush::None) + .unwrap_or(TDEFLStatus::PutBufFailed as i32); + if n != 0 { + d.params.saved_lit = saved_lit; + d.params.saved_match_dist = saved_match_dist; + d.params.saved_match_len = saved_match_len; + return n > 0; + } + } + } + + d.params.src_pos = src_pos; + d.dict.lookahead_size = lookahead_size; + d.dict.lookahead_pos = lookahead_pos; + d.params.saved_lit = saved_lit; + d.params.saved_match_dist = saved_match_dist; + d.params.saved_match_len = saved_match_len; + true +} + +const COMP_FAST_LOOKAHEAD_SIZE: usize = 4096; + +fn compress_fast(d: &mut CompressorOxide, callback: &mut CallbackOxide) -> bool { + let mut src_pos = d.params.src_pos; + let mut lookahead_size = d.dict.lookahead_size; + let mut lookahead_pos = d.dict.lookahead_pos; + + let mut cur_pos = lookahead_pos & LZ_DICT_SIZE_MASK; + let in_buf = match callback.in_buf { + None => return true, + Some(in_buf) => in_buf, + }; + + debug_assert!(d.lz.code_position < LZ_CODE_BUF_SIZE - 2); + + while src_pos < in_buf.len() || (d.params.flush != TDEFLFlush::None && lookahead_size > 0) { + let mut dst_pos = ((lookahead_pos + lookahead_size) & LZ_DICT_SIZE_MASK) as usize; + let mut num_bytes_to_process = cmp::min( + in_buf.len() - src_pos, + (COMP_FAST_LOOKAHEAD_SIZE - lookahead_size) as usize, + ); + lookahead_size += num_bytes_to_process; + + while num_bytes_to_process != 0 { + let n = cmp::min(LZ_DICT_SIZE - dst_pos, num_bytes_to_process); + d.dict.b.dict[dst_pos..dst_pos + n].copy_from_slice(&in_buf[src_pos..src_pos + n]); + + if dst_pos < MAX_MATCH_LEN - 1 { + let m = cmp::min(n, MAX_MATCH_LEN - 1 - dst_pos); + d.dict.b.dict[dst_pos + LZ_DICT_SIZE..dst_pos + LZ_DICT_SIZE + m] + .copy_from_slice(&in_buf[src_pos..src_pos + m]); + } + + src_pos += n; + dst_pos = (dst_pos + n) & LZ_DICT_SIZE_MASK as usize; + num_bytes_to_process -= n; + } + + d.dict.size = cmp::min(LZ_DICT_SIZE - lookahead_size, d.dict.size); + if d.params.flush == TDEFLFlush::None && lookahead_size < COMP_FAST_LOOKAHEAD_SIZE { + break; + } + + while lookahead_size >= 4 { + let mut cur_match_len = 1; + + let first_trigram = d.dict.read_unaligned_u32(cur_pos) & 0xFF_FFFF; + + let hash = (first_trigram ^ (first_trigram >> (24 - (LZ_HASH_BITS - 8)))) + & LEVEL1_HASH_SIZE_MASK; + + let mut probe_pos = usize::from(d.dict.b.hash[hash as usize]); + d.dict.b.hash[hash as usize] = lookahead_pos as u16; + + let mut cur_match_dist = (lookahead_pos - probe_pos as usize) as u16; + if cur_match_dist as usize <= d.dict.size { + probe_pos &= LZ_DICT_SIZE_MASK; + + let trigram = d.dict.read_unaligned_u32(probe_pos) & 0xFF_FFFF; + + if first_trigram == trigram { + // Trigram was tested, so we can start with "+ 3" displacement. + let mut p = cur_pos + 3; + let mut q = probe_pos + 3; + cur_match_len = (|| { + for _ in 0..32 { + let p_data: u64 = d.dict.read_unaligned_u64(p); + let q_data: u64 = d.dict.read_unaligned_u64(q); + let xor_data = p_data ^ q_data; + if xor_data == 0 { + p += 8; + q += 8; + } else { + let trailing = xor_data.trailing_zeros(); + return p as u32 - cur_pos as u32 + (trailing >> 3); + } + } + + if cur_match_dist == 0 { + 0 + } else { + MAX_MATCH_LEN as u32 + } + })(); + + if cur_match_len < MIN_MATCH_LEN.into() + || (cur_match_len == MIN_MATCH_LEN.into() && cur_match_dist >= 8 * 1024) + { + let lit = first_trigram as u8; + cur_match_len = 1; + d.lz.write_code(lit); + *d.lz.get_flag() >>= 1; + d.huff.count[0][lit as usize] += 1; + } else { + // Limit the match to the length of the lookahead so we don't create a match + // that ends after the end of the input data. + cur_match_len = cmp::min(cur_match_len, lookahead_size as u32); + debug_assert!(cur_match_len >= MIN_MATCH_LEN.into()); + debug_assert!(cur_match_dist >= 1); + debug_assert!(cur_match_dist as usize <= LZ_DICT_SIZE); + cur_match_dist -= 1; + + d.lz.write_code((cur_match_len - u32::from(MIN_MATCH_LEN)) as u8); + d.lz.write_code(cur_match_dist as u8); + d.lz.write_code((cur_match_dist >> 8) as u8); + + *d.lz.get_flag() >>= 1; + *d.lz.get_flag() |= 0x80; + if cur_match_dist < 512 { + d.huff.count[1][SMALL_DIST_SYM[cur_match_dist as usize] as usize] += 1; + } else { + d.huff.count[1] + [LARGE_DIST_SYM[(cur_match_dist >> 8) as usize] as usize] += 1; + } + + d.huff.count[0][LEN_SYM[(cur_match_len - u32::from(MIN_MATCH_LEN)) as usize] + as usize] += 1; + } + } else { + d.lz.write_code(first_trigram as u8); + *d.lz.get_flag() >>= 1; + d.huff.count[0][first_trigram as u8 as usize] += 1; + } + + d.lz.consume_flag(); + d.lz.total_bytes += cur_match_len; + lookahead_pos += cur_match_len as usize; + d.dict.size = cmp::min(d.dict.size + cur_match_len as usize, LZ_DICT_SIZE); + cur_pos = (cur_pos + cur_match_len as usize) & LZ_DICT_SIZE_MASK; + lookahead_size -= cur_match_len as usize; + + if d.lz.code_position > LZ_CODE_BUF_SIZE - 8 { + // These values are used in flush_block, so we need to write them back here. + d.dict.lookahead_size = lookahead_size; + d.dict.lookahead_pos = lookahead_pos; + + let n = match flush_block(d, callback, TDEFLFlush::None) { + Err(_) => { + d.params.src_pos = src_pos; + d.params.prev_return_status = TDEFLStatus::PutBufFailed; + return false; + } + Ok(status) => status, + }; + if n != 0 { + d.params.src_pos = src_pos; + return n > 0; + } + debug_assert!(d.lz.code_position < LZ_CODE_BUF_SIZE - 2); + + lookahead_size = d.dict.lookahead_size; + lookahead_pos = d.dict.lookahead_pos; + } + } + } + + while lookahead_size != 0 { + let lit = d.dict.b.dict[cur_pos as usize]; + d.lz.total_bytes += 1; + d.lz.write_code(lit); + *d.lz.get_flag() >>= 1; + d.lz.consume_flag(); + + d.huff.count[0][lit as usize] += 1; + lookahead_pos += 1; + d.dict.size = cmp::min(d.dict.size + 1, LZ_DICT_SIZE); + cur_pos = (cur_pos + 1) & LZ_DICT_SIZE_MASK; + lookahead_size -= 1; + + if d.lz.code_position > LZ_CODE_BUF_SIZE - 8 { + // These values are used in flush_block, so we need to write them back here. + d.dict.lookahead_size = lookahead_size; + d.dict.lookahead_pos = lookahead_pos; + + let n = match flush_block(d, callback, TDEFLFlush::None) { + Err(_) => { + d.params.prev_return_status = TDEFLStatus::PutBufFailed; + d.params.src_pos = src_pos; + return false; + } + Ok(status) => status, + }; + if n != 0 { + d.params.src_pos = src_pos; + return n > 0; + } + + lookahead_size = d.dict.lookahead_size; + lookahead_pos = d.dict.lookahead_pos; + } + } + } + + d.params.src_pos = src_pos; + d.dict.lookahead_size = lookahead_size; + d.dict.lookahead_pos = lookahead_pos; + true +} + +fn flush_output_buffer(c: &mut CallbackOxide, p: &mut ParamsOxide) -> (TDEFLStatus, usize, usize) { + let mut res = (TDEFLStatus::Okay, p.src_pos, 0); + if let CallbackOut::Buf(ref mut cb) = c.out { + let n = cmp::min(cb.out_buf.len() - p.out_buf_ofs, p.flush_remaining as usize); + if n != 0 { + (&mut cb.out_buf[p.out_buf_ofs..p.out_buf_ofs + n]) + .copy_from_slice(&p.local_buf.b[p.flush_ofs as usize..p.flush_ofs as usize + n]); + } + p.flush_ofs += n as u32; + p.flush_remaining -= n as u32; + p.out_buf_ofs += n; + res.2 = p.out_buf_ofs; + } + + if p.finished && p.flush_remaining == 0 { + res.0 = TDEFLStatus::Done + } + res +} + +/// Main compression function. Tries to compress as much as possible from `in_buf` and +/// puts compressed output into `out_buf`. +/// +/// The value of `flush` determines if the compressor should attempt to flush all output +/// and alternatively try to finish the stream. +/// +/// Use [`TDEFLFlush::Finish`] on the final call to signal that the stream is finishing. +/// +/// Note that this function does not keep track of whether a flush marker has been output, so +/// if called using [`TDEFLFlush::Sync`], the caller needs to ensure there is enough space in the +/// output buffer if they want to avoid repeated flush markers. +/// See #105 for details. +/// +/// # Returns +/// Returns a tuple containing the current status of the compressor, the current position +/// in the input buffer and the current position in the output buffer. +pub fn compress( + d: &mut CompressorOxide, + in_buf: &[u8], + out_buf: &mut [u8], + flush: TDEFLFlush, +) -> (TDEFLStatus, usize, usize) { + compress_inner( + d, + &mut CallbackOxide::new_callback_buf(in_buf, out_buf), + flush, + ) +} + +/// Main compression function. Callbacks output. +/// +/// # Returns +/// Returns a tuple containing the current status of the compressor, the current position +/// in the input buffer. +/// +/// The caller is responsible for ensuring the `CallbackFunc` struct will not cause undefined +/// behaviour. +pub fn compress_to_output( + d: &mut CompressorOxide, + in_buf: &[u8], + flush: TDEFLFlush, + mut callback_func: impl FnMut(&[u8]) -> bool, +) -> (TDEFLStatus, usize) { + let res = compress_inner( + d, + &mut CallbackOxide::new_callback_func( + in_buf, + CallbackFunc { + put_buf_func: &mut callback_func, + }, + ), + flush, + ); + + (res.0, res.1) +} + +fn compress_inner( + d: &mut CompressorOxide, + callback: &mut CallbackOxide, + flush: TDEFLFlush, +) -> (TDEFLStatus, usize, usize) { + d.params.out_buf_ofs = 0; + d.params.src_pos = 0; + + let prev_ok = d.params.prev_return_status == TDEFLStatus::Okay; + let flush_finish_once = d.params.flush != TDEFLFlush::Finish || flush == TDEFLFlush::Finish; + + d.params.flush = flush; + if !prev_ok || !flush_finish_once { + d.params.prev_return_status = TDEFLStatus::BadParam; + return (d.params.prev_return_status, 0, 0); + } + + if d.params.flush_remaining != 0 || d.params.finished { + let res = flush_output_buffer(callback, &mut d.params); + d.params.prev_return_status = res.0; + return res; + } + + let one_probe = d.params.flags & MAX_PROBES_MASK as u32 == 1; + let greedy = d.params.flags & TDEFL_GREEDY_PARSING_FLAG != 0; + let filter_or_rle_or_raw = d.params.flags + & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | TDEFL_RLE_MATCHES) + != 0; + + let compress_success = if one_probe && greedy && !filter_or_rle_or_raw { + compress_fast(d, callback) + } else { + compress_normal(d, callback) + }; + + if !compress_success { + return ( + d.params.prev_return_status, + d.params.src_pos, + d.params.out_buf_ofs, + ); + } + + if let Some(in_buf) = callback.in_buf { + if d.params.flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32) != 0 { + d.params.adler32 = update_adler32(d.params.adler32, &in_buf[..d.params.src_pos]); + } + } + + let flush_none = d.params.flush == TDEFLFlush::None; + let in_left = callback.in_buf.map_or(0, |buf| buf.len()) - d.params.src_pos; + let remaining = in_left != 0 || d.params.flush_remaining != 0; + if !flush_none && d.dict.lookahead_size == 0 && !remaining { + let flush = d.params.flush; + match flush_block(d, callback, flush) { + Err(_) => { + d.params.prev_return_status = TDEFLStatus::PutBufFailed; + return ( + d.params.prev_return_status, + d.params.src_pos, + d.params.out_buf_ofs, + ); + } + Ok(x) if x < 0 => { + return ( + d.params.prev_return_status, + d.params.src_pos, + d.params.out_buf_ofs, + ) + } + _ => { + d.params.finished = d.params.flush == TDEFLFlush::Finish; + if d.params.flush == TDEFLFlush::Full { + memset(&mut d.dict.b.hash[..], 0); + memset(&mut d.dict.b.next[..], 0); + d.dict.size = 0; + } + } + } + } + + let res = flush_output_buffer(callback, &mut d.params); + d.params.prev_return_status = res.0; + + res +} + +/// Create a set of compression flags using parameters used by zlib and other compressors. +/// Mainly intended for use with transition from c libraries as it deals with raw integers. +/// +/// # Parameters +/// `level` determines compression level. Clamped to maximum of 10. Negative values result in +/// `CompressionLevel::DefaultLevel`. +/// `window_bits`: Above 0, wraps the stream in a zlib wrapper, 0 or negative for a raw deflate +/// stream. +/// `strategy`: Sets the strategy if this conforms to any of the values in `CompressionStrategy`. +/// +/// # Notes +/// This function may be removed or moved to the `miniz_oxide_c_api` in the future. +pub fn create_comp_flags_from_zip_params(level: i32, window_bits: i32, strategy: i32) -> u32 { + let num_probes = (if level >= 0 { + cmp::min(10, level) + } else { + CompressionLevel::DefaultLevel as i32 + }) as usize; + let greedy = if level <= 3 { + TDEFL_GREEDY_PARSING_FLAG + } else { + 0 + }; + let mut comp_flags = NUM_PROBES[num_probes] | greedy; + + if window_bits > 0 { + comp_flags |= TDEFL_WRITE_ZLIB_HEADER; + } + + if level == 0 { + comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS; + } else if strategy == CompressionStrategy::Filtered as i32 { + comp_flags |= TDEFL_FILTER_MATCHES; + } else if strategy == CompressionStrategy::HuffmanOnly as i32 { + comp_flags &= !MAX_PROBES_MASK as u32; + } else if strategy == CompressionStrategy::Fixed as i32 { + comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS; + } else if strategy == CompressionStrategy::RLE as i32 { + comp_flags |= TDEFL_RLE_MATCHES; + } + + comp_flags +} + +#[cfg(test)] +mod test { + use super::{ + compress_to_output, create_comp_flags_from_zip_params, read_u16_le, write_u16_le, + CompressionStrategy, CompressorOxide, TDEFLFlush, TDEFLStatus, DEFAULT_FLAGS, + MZ_DEFAULT_WINDOW_BITS, + }; + use crate::inflate::decompress_to_vec; + use alloc::vec; + + #[test] + fn u16_to_slice() { + let mut slice = [0, 0]; + write_u16_le(2000, &mut slice, 0); + assert_eq!(slice, [208, 7]); + } + + #[test] + fn u16_from_slice() { + let mut slice = [208, 7]; + assert_eq!(read_u16_le(&mut slice, 0), 2000); + } + + #[test] + fn compress_output() { + assert_eq!( + DEFAULT_FLAGS, + create_comp_flags_from_zip_params( + 4, + MZ_DEFAULT_WINDOW_BITS, + CompressionStrategy::Default as i32 + ) + ); + + let slice = [ + 1, 2, 3, 4, 1, 2, 3, 1, 2, 3, 1, 2, 6, 1, 2, 3, 1, 2, 3, 2, 3, 1, 2, 3, + ]; + let mut encoded = vec![]; + let flags = create_comp_flags_from_zip_params(6, 0, 0); + let mut d = CompressorOxide::new(flags); + let (status, in_consumed) = + compress_to_output(&mut d, &slice, TDEFLFlush::Finish, |out: &[u8]| { + encoded.extend_from_slice(out); + true + }); + + assert_eq!(status, TDEFLStatus::Done); + assert_eq!(in_consumed, slice.len()); + + let decoded = decompress_to_vec(&encoded[..]).unwrap(); + assert_eq!(&decoded[..], &slice[..]); + } + + #[test] + /// Check fast compress mode + fn compress_fast() { + let slice = [ + 1, 2, 3, 4, 1, 2, 3, 1, 2, 3, 1, 2, 6, 1, 2, 3, 1, 2, 3, 2, 3, 1, 2, 3, + ]; + let mut encoded = vec![]; + let flags = create_comp_flags_from_zip_params(1, 0, 0); + let mut d = CompressorOxide::new(flags); + let (status, in_consumed) = + compress_to_output(&mut d, &slice, TDEFLFlush::Finish, |out: &[u8]| { + encoded.extend_from_slice(out); + true + }); + + assert_eq!(status, TDEFLStatus::Done); + assert_eq!(in_consumed, slice.len()); + + // Needs to be altered if algorithm improves. + assert_eq!( + &encoded[..], + [99, 100, 98, 102, 1, 98, 48, 98, 3, 147, 204, 76, 204, 140, 76, 204, 0] + ); + + let decoded = decompress_to_vec(&encoded[..]).unwrap(); + assert_eq!(&decoded[..], &slice[..]); + } +} diff --git a/third_party/rust/miniz_oxide/src/deflate/mod.rs b/third_party/rust/miniz_oxide/src/deflate/mod.rs new file mode 100644 index 0000000000..471b94b9d8 --- /dev/null +++ b/third_party/rust/miniz_oxide/src/deflate/mod.rs @@ -0,0 +1,227 @@ +//! This module contains functionality for compression. + +use crate::alloc::vec; +use crate::alloc::vec::Vec; + +mod buffer; +pub mod core; +pub mod stream; +use self::core::*; + +/// How much processing the compressor should do to compress the data. +/// `NoCompression` and `Bestspeed` have special meanings, the other levels determine the number +/// of checks for matches in the hash chains and whether to use lazy or greedy parsing. +#[repr(i32)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum CompressionLevel { + /// Don't do any compression, only output uncompressed blocks. + NoCompression = 0, + /// Fast compression. Uses a special compression routine that is optimized for speed. + BestSpeed = 1, + /// Slow/high compression. Do a lot of checks to try to find good matches. + BestCompression = 9, + /// Even more checks, can be very slow. + UberCompression = 10, + /// Default compromise between speed and compression. + DefaultLevel = 6, + /// Use the default compression level. + DefaultCompression = -1, +} + +// Missing safe rust analogue (this and mem-to-mem are quite similar) +/* +fn tdefl_compress( + d: Option<&mut CompressorOxide>, + in_buf: *const c_void, + in_size: Option<&mut usize>, + out_buf: *mut c_void, + out_size: Option<&mut usize>, + flush: TDEFLFlush, +) -> TDEFLStatus { + let res = match d { + None => { + in_size.map(|size| *size = 0); + out_size.map(|size| *size = 0); + (TDEFLStatus::BadParam, 0, 0) + }, + Some(compressor) => { + let callback_res = CallbackOxide::new( + compressor.callback_func.clone(), + in_buf, + in_size, + out_buf, + out_size, + ); + + if let Ok(mut callback) = callback_res { + let res = compress(compressor, &mut callback, flush); + callback.update_size(Some(res.1), Some(res.2)); + res + } else { + (TDEFLStatus::BadParam, 0, 0) + } + } + }; + res.0 +}*/ + +// Missing safe rust analogue +/* +fn tdefl_init( + d: Option<&mut CompressorOxide>, + put_buf_func: PutBufFuncPtr, + put_buf_user: *mut c_void, + flags: c_int, +) -> TDEFLStatus { + if let Some(d) = d { + *d = CompressorOxide::new( + put_buf_func.map(|func| + CallbackFunc { put_buf_func: func, put_buf_user: put_buf_user } + ), + flags as u32, + ); + TDEFLStatus::Okay + } else { + TDEFLStatus::BadParam + } +}*/ + +// Missing safe rust analogue (though maybe best served by flate2 front-end instead) +/* +fn tdefl_compress_mem_to_output( + buf: *const c_void, + buf_len: usize, + put_buf_func: PutBufFuncPtr, + put_buf_user: *mut c_void, + flags: c_int, +) -> bool*/ + +// Missing safe Rust analogue +/* +fn tdefl_compress_mem_to_mem( + out_buf: *mut c_void, + out_buf_len: usize, + src_buf: *const c_void, + src_buf_len: usize, + flags: c_int, +) -> usize*/ + +/// Compress the input data to a vector, using the specified compression level (0-10). +pub fn compress_to_vec(input: &[u8], level: u8) -> Vec<u8> { + compress_to_vec_inner(input, level, 0, 0) +} + +/// Compress the input data to a vector, using the specified compression level (0-10), and with a +/// zlib wrapper. +pub fn compress_to_vec_zlib(input: &[u8], level: u8) -> Vec<u8> { + compress_to_vec_inner(input, level, 1, 0) +} + +/// Simple function to compress data to a vec. +fn compress_to_vec_inner(input: &[u8], level: u8, window_bits: i32, strategy: i32) -> Vec<u8> { + // The comp flags function sets the zlib flag if the window_bits parameter is > 0. + let flags = create_comp_flags_from_zip_params(level.into(), window_bits, strategy); + let mut compressor = CompressorOxide::new(flags); + let mut output = vec![0; ::core::cmp::max(input.len() / 2, 2)]; + + let mut in_pos = 0; + let mut out_pos = 0; + loop { + let (status, bytes_in, bytes_out) = compress( + &mut compressor, + &input[in_pos..], + &mut output[out_pos..], + TDEFLFlush::Finish, + ); + + out_pos += bytes_out; + in_pos += bytes_in; + + match status { + TDEFLStatus::Done => { + output.truncate(out_pos); + break; + } + TDEFLStatus::Okay => { + // We need more space, so resize the vector. + if output.len().saturating_sub(out_pos) < 30 { + output.resize(output.len() * 2, 0) + } + } + // Not supposed to happen unless there is a bug. + _ => panic!("Bug! Unexpectedly failed to compress!"), + } + } + + output +} + +#[cfg(test)] +mod test { + use super::{compress_to_vec, compress_to_vec_inner, CompressionStrategy}; + use crate::inflate::decompress_to_vec; + use alloc::vec; + + /// Test deflate example. + /// + /// Check if the encoder produces the same code as the example given by Mark Adler here: + /// https://stackoverflow.com/questions/17398931/deflate-encoding-with-static-huffman-codes/17415203 + #[test] + fn compress_small() { + let test_data = b"Deflate late"; + let check = [ + 0x73, 0x49, 0x4d, 0xcb, 0x49, 0x2c, 0x49, 0x55, 0x00, 0x11, 0x00, + ]; + + let res = compress_to_vec(test_data, 1); + assert_eq!(&check[..], res.as_slice()); + + let res = compress_to_vec(test_data, 9); + assert_eq!(&check[..], res.as_slice()); + } + + #[test] + fn compress_huff_only() { + let test_data = b"Deflate late"; + + let res = compress_to_vec_inner(test_data, 1, 0, CompressionStrategy::HuffmanOnly as i32); + let d = decompress_to_vec(res.as_slice()).expect("Failed to decompress!"); + assert_eq!(test_data, d.as_slice()); + } + + /// Test that a raw block compresses fine. + #[test] + fn compress_raw() { + let text = b"Hello, zlib!"; + let encoded = { + let len = text.len(); + let notlen = !len; + let mut encoded = vec![ + 1, + len as u8, + (len >> 8) as u8, + notlen as u8, + (notlen >> 8) as u8, + ]; + encoded.extend_from_slice(&text[..]); + encoded + }; + + let res = compress_to_vec(text, 0); + assert_eq!(encoded, res.as_slice()); + } + + #[test] + fn short() { + let test_data = [10, 10, 10, 10, 10, 55]; + let c = compress_to_vec(&test_data, 9); + + let d = decompress_to_vec(c.as_slice()).expect("Failed to decompress!"); + assert_eq!(&test_data, d.as_slice()); + // Check that a static block is used here, rather than a raw block + // , so the data is actually compressed. + // (The optimal compressed length would be 5, but neither miniz nor zlib manages that either + // as neither checks matches against the byte at index 0.) + assert!(c.len() <= 6); + } +} diff --git a/third_party/rust/miniz_oxide/src/deflate/stream.rs b/third_party/rust/miniz_oxide/src/deflate/stream.rs new file mode 100644 index 0000000000..39aa82d924 --- /dev/null +++ b/third_party/rust/miniz_oxide/src/deflate/stream.rs @@ -0,0 +1,121 @@ +//! Extra streaming compression functionality. +//! +//! As of now this is mainly intended for use to build a higher-level wrapper. +//! +//! There is no DeflateState as the needed state is contained in the compressor struct itself. + +use crate::deflate::core::{compress, CompressorOxide, TDEFLFlush, TDEFLStatus}; +use crate::{MZError, MZFlush, MZStatus, StreamResult}; + +/// Try to compress from input to output with the given [`CompressorOxide`]. +/// +/// # Errors +/// +/// Returns [`MZError::Buf`] If the size of the `output` slice is empty or no progress was made due +/// to lack of expected input data, or if called without [`MZFlush::Finish`] after the compression +/// was already finished. +/// +/// Returns [`MZError::Param`] if the compressor parameters are set wrong. +/// +/// Returns [`MZError::Stream`] when lower-level decompressor returns a +/// [`TDEFLStatus::PutBufFailed`]; may not actually be possible. +pub fn deflate( + compressor: &mut CompressorOxide, + input: &[u8], + output: &mut [u8], + flush: MZFlush, +) -> StreamResult { + if output.is_empty() { + return StreamResult::error(MZError::Buf); + } + + if compressor.prev_return_status() == TDEFLStatus::Done { + return if flush == MZFlush::Finish { + StreamResult { + bytes_written: 0, + bytes_consumed: 0, + status: Ok(MZStatus::StreamEnd), + } + } else { + StreamResult::error(MZError::Buf) + }; + } + + let mut bytes_written = 0; + let mut bytes_consumed = 0; + + let mut next_in = input; + let mut next_out = output; + + let status = loop { + let in_bytes; + let out_bytes; + let defl_status = { + let res = compress(compressor, next_in, next_out, TDEFLFlush::from(flush)); + in_bytes = res.1; + out_bytes = res.2; + res.0 + }; + + next_in = &next_in[in_bytes..]; + next_out = &mut next_out[out_bytes..]; + bytes_consumed += in_bytes; + bytes_written += out_bytes; + + // Check if we are done, or compression failed. + match defl_status { + TDEFLStatus::BadParam => break Err(MZError::Param), + // Don't think this can happen as we're not using a custom callback. + TDEFLStatus::PutBufFailed => break Err(MZError::Stream), + TDEFLStatus::Done => break Ok(MZStatus::StreamEnd), + _ => (), + }; + + // All the output space was used, so wait for more. + if next_out.is_empty() { + break Ok(MZStatus::Ok); + } + + if next_in.is_empty() && (flush != MZFlush::Finish) { + let total_changed = bytes_written > 0 || bytes_consumed > 0; + + break if (flush != MZFlush::None) || total_changed { + // We wrote or consumed something, and/or did a flush (sync/partial etc.). + Ok(MZStatus::Ok) + } else { + // No more input data, not flushing, and nothing was consumed or written, + // so couldn't make any progress. + Err(MZError::Buf) + }; + } + }; + StreamResult { + bytes_consumed, + bytes_written, + status, + } +} + +#[cfg(test)] +mod test { + use super::deflate; + use crate::deflate::CompressorOxide; + use crate::inflate::decompress_to_vec_zlib; + use crate::{MZFlush, MZStatus}; + use alloc::boxed::Box; + use alloc::vec; + + #[test] + fn test_state() { + let data = b"Hello zlib!"; + let mut compressed = vec![0; 50]; + let mut compressor = Box::<CompressorOxide>::default(); + let res = deflate(&mut compressor, data, &mut compressed, MZFlush::Finish); + let status = res.status.expect("Failed to compress!"); + let decomp = + decompress_to_vec_zlib(&compressed).expect("Failed to decompress compressed data"); + assert_eq!(status, MZStatus::StreamEnd); + assert_eq!(decomp[..], data[..]); + assert_eq!(res.bytes_consumed, data.len()); + } +} diff --git a/third_party/rust/miniz_oxide/src/inflate/core.rs b/third_party/rust/miniz_oxide/src/inflate/core.rs new file mode 100644 index 0000000000..630e5e6fdb --- /dev/null +++ b/third_party/rust/miniz_oxide/src/inflate/core.rs @@ -0,0 +1,1932 @@ +//! Streaming decompression functionality. + +use super::*; +use crate::shared::{update_adler32, HUFFMAN_LENGTH_ORDER}; + +use ::core::convert::TryInto; +use ::core::{cmp, slice}; + +use self::output_buffer::OutputBuffer; + +pub const TINFL_LZ_DICT_SIZE: usize = 32_768; + +/// A struct containing huffman code lengths and the huffman code tree used by the decompressor. +struct HuffmanTable { + /// Length of the code at each index. + pub code_size: [u8; MAX_HUFF_SYMBOLS_0], + /// Fast lookup table for shorter huffman codes. + /// + /// See `HuffmanTable::fast_lookup`. + pub look_up: [i16; FAST_LOOKUP_SIZE as usize], + /// Full huffman tree. + /// + /// Positive values are edge nodes/symbols, negative values are + /// parent nodes/references to other nodes. + pub tree: [i16; MAX_HUFF_TREE_SIZE], +} + +impl HuffmanTable { + const fn new() -> HuffmanTable { + HuffmanTable { + code_size: [0; MAX_HUFF_SYMBOLS_0], + look_up: [0; FAST_LOOKUP_SIZE as usize], + tree: [0; MAX_HUFF_TREE_SIZE], + } + } + + /// Look for a symbol in the fast lookup table. + /// The symbol is stored in the lower 9 bits, the length in the next 6. + /// If the returned value is negative, the code wasn't found in the + /// fast lookup table and the full tree has to be traversed to find the code. + #[inline] + fn fast_lookup(&self, bit_buf: BitBuffer) -> i16 { + self.look_up[(bit_buf & BitBuffer::from(FAST_LOOKUP_SIZE - 1)) as usize] + } + + /// Get the symbol and the code length from the huffman tree. + #[inline] + fn tree_lookup(&self, fast_symbol: i32, bit_buf: BitBuffer, mut code_len: u32) -> (i32, u32) { + let mut symbol = fast_symbol; + // We step through the tree until we encounter a positive value, which indicates a + // symbol. + loop { + // symbol here indicates the position of the left (0) node, if the next bit is 1 + // we add 1 to the lookup position to get the right node. + symbol = i32::from(self.tree[(!symbol + ((bit_buf >> code_len) & 1) as i32) as usize]); + code_len += 1; + if symbol >= 0 { + break; + } + } + (symbol, code_len) + } + + #[inline] + /// Look up a symbol and code length from the bits in the provided bit buffer. + /// + /// Returns Some(symbol, length) on success, + /// None if the length is 0. + /// + /// It's possible we could avoid checking for 0 if we can guarantee a sane table. + /// TODO: Check if a smaller type for code_len helps performance. + fn lookup(&self, bit_buf: BitBuffer) -> Option<(i32, u32)> { + let symbol = self.fast_lookup(bit_buf).into(); + if symbol >= 0 { + if (symbol >> 9) as u32 != 0 { + Some((symbol, (symbol >> 9) as u32)) + } else { + // Zero-length code. + None + } + } else { + // We didn't get a symbol from the fast lookup table, so check the tree instead. + Some(self.tree_lookup(symbol, bit_buf, FAST_LOOKUP_BITS.into())) + } + } +} + +/// The number of huffman tables used. +const MAX_HUFF_TABLES: usize = 3; +/// The length of the first (literal/length) huffman table. +const MAX_HUFF_SYMBOLS_0: usize = 288; +/// The length of the second (distance) huffman table. +const MAX_HUFF_SYMBOLS_1: usize = 32; +/// The length of the last (huffman code length) huffman table. +const _MAX_HUFF_SYMBOLS_2: usize = 19; +/// The maximum length of a code that can be looked up in the fast lookup table. +const FAST_LOOKUP_BITS: u8 = 10; +/// The size of the fast lookup table. +const FAST_LOOKUP_SIZE: u32 = 1 << FAST_LOOKUP_BITS; +const MAX_HUFF_TREE_SIZE: usize = MAX_HUFF_SYMBOLS_0 * 2; +const LITLEN_TABLE: usize = 0; +const DIST_TABLE: usize = 1; +const HUFFLEN_TABLE: usize = 2; + +/// Flags to [`decompress()`] to control how inflation works. +/// +/// These define bits for a bitmask argument. +pub mod inflate_flags { + /// Should we try to parse a zlib header? + /// + /// If unset, the function will expect an RFC1951 deflate stream. If set, it will expect a + /// RFC1950 zlib wrapper around the deflate stream. + pub const TINFL_FLAG_PARSE_ZLIB_HEADER: u32 = 1; + + /// There will be more input that hasn't been given to the decompressor yet. + /// + /// This is useful when you want to decompress what you have so far, + /// even if you know there is probably more input that hasn't gotten here yet (_e.g._, over a + /// network connection). When [`decompress()`][super::decompress] reaches the end of the input + /// without finding the end of the compressed stream, it will return + /// [`TINFLStatus::NeedsMoreInput`][super::TINFLStatus::NeedsMoreInput] if this is set, + /// indicating that you should get more data before calling again. If not set, it will return + /// [`TINFLStatus::FailedCannotMakeProgress`][super::TINFLStatus::FailedCannotMakeProgress] + /// suggesting the stream is corrupt, since you claimed it was all there. + pub const TINFL_FLAG_HAS_MORE_INPUT: u32 = 2; + + /// The output buffer should not wrap around. + pub const TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: u32 = 4; + + /// Calculate the adler32 checksum of the output data even if we're not inflating a zlib stream. + /// + /// If [`TINFL_FLAG_IGNORE_ADLER32`] is specified, it will override this. + /// + /// NOTE: Enabling/disabling this between calls to decompress will result in an incorect + /// checksum. + pub const TINFL_FLAG_COMPUTE_ADLER32: u32 = 8; + + /// Ignore adler32 checksum even if we are inflating a zlib stream. + /// + /// Overrides [`TINFL_FLAG_COMPUTE_ADLER32`] if both are enabled. + /// + /// NOTE: This flag does not exist in miniz as it does not support this and is a + /// custom addition for miniz_oxide. + /// + /// NOTE: Should not be changed from enabled to disabled after decompression has started, + /// this will result in checksum failure (outside the unlikely event where the checksum happens + /// to match anyway). + pub const TINFL_FLAG_IGNORE_ADLER32: u32 = 64; +} + +use self::inflate_flags::*; + +const MIN_TABLE_SIZES: [u16; 3] = [257, 1, 4]; + +#[cfg(target_pointer_width = "64")] +type BitBuffer = u64; + +#[cfg(not(target_pointer_width = "64"))] +type BitBuffer = u32; + +/// Main decompression struct. +/// +pub struct DecompressorOxide { + /// Current state of the decompressor. + state: core::State, + /// Number of bits in the bit buffer. + num_bits: u32, + /// Zlib CMF + z_header0: u32, + /// Zlib FLG + z_header1: u32, + /// Adler32 checksum from the zlib header. + z_adler32: u32, + /// 1 if the current block is the last block, 0 otherwise. + finish: u32, + /// The type of the current block. + block_type: u32, + /// 1 if the adler32 value should be checked. + check_adler32: u32, + /// Last match distance. + dist: u32, + /// Variable used for match length, symbols, and a number of other things. + counter: u32, + /// Number of extra bits for the last length or distance code. + num_extra: u32, + /// Number of entries in each huffman table. + table_sizes: [u32; MAX_HUFF_TABLES], + /// Buffer of input data. + bit_buf: BitBuffer, + /// Huffman tables. + tables: [HuffmanTable; MAX_HUFF_TABLES], + /// Raw block header. + raw_header: [u8; 4], + /// Huffman length codes. + len_codes: [u8; MAX_HUFF_SYMBOLS_0 + MAX_HUFF_SYMBOLS_1 + 137], +} + +impl DecompressorOxide { + /// Create a new tinfl_decompressor with all fields set to 0. + pub fn new() -> DecompressorOxide { + DecompressorOxide::default() + } + + /// Set the current state to `Start`. + #[inline] + pub fn init(&mut self) { + // The rest of the data is reset or overwritten when used. + self.state = core::State::Start; + } + + /// Returns the adler32 checksum of the currently decompressed data. + /// Note: Will return Some(1) if decompressing zlib but ignoring adler32. + #[inline] + pub fn adler32(&self) -> Option<u32> { + if self.state != State::Start && !self.state.is_failure() && self.z_header0 != 0 { + Some(self.check_adler32) + } else { + None + } + } + + /// Returns the adler32 that was read from the zlib header if it exists. + #[inline] + pub fn adler32_header(&self) -> Option<u32> { + if self.state != State::Start && self.state != State::BadZlibHeader && self.z_header0 != 0 { + Some(self.z_adler32) + } else { + None + } + } +} + +impl Default for DecompressorOxide { + /// Create a new tinfl_decompressor with all fields set to 0. + #[inline(always)] + fn default() -> Self { + DecompressorOxide { + state: core::State::Start, + num_bits: 0, + z_header0: 0, + z_header1: 0, + z_adler32: 0, + finish: 0, + block_type: 0, + check_adler32: 0, + dist: 0, + counter: 0, + num_extra: 0, + table_sizes: [0; MAX_HUFF_TABLES], + bit_buf: 0, + // TODO:(oyvindln) Check that copies here are optimized out in release mode. + tables: [ + HuffmanTable::new(), + HuffmanTable::new(), + HuffmanTable::new(), + ], + raw_header: [0; 4], + len_codes: [0; MAX_HUFF_SYMBOLS_0 + MAX_HUFF_SYMBOLS_1 + 137], + } + } +} + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +enum State { + Start = 0, + ReadZlibCmf, + ReadZlibFlg, + ReadBlockHeader, + BlockTypeNoCompression, + RawHeader, + RawMemcpy1, + RawMemcpy2, + ReadTableSizes, + ReadHufflenTableCodeSize, + ReadLitlenDistTablesCodeSize, + ReadExtraBitsCodeSize, + DecodeLitlen, + WriteSymbol, + ReadExtraBitsLitlen, + DecodeDistance, + ReadExtraBitsDistance, + RawReadFirstByte, + RawStoreFirstByte, + WriteLenBytesToEnd, + BlockDone, + HuffDecodeOuterLoop1, + HuffDecodeOuterLoop2, + ReadAdler32, + + DoneForever, + + // Failure states. + BlockTypeUnexpected, + BadCodeSizeSum, + BadTotalSymbols, + BadZlibHeader, + DistanceOutOfBounds, + BadRawLength, + BadCodeSizeDistPrevLookup, + InvalidLitlen, + InvalidDist, + InvalidCodeLen, +} + +impl State { + fn is_failure(self) -> bool { + match self { + BlockTypeUnexpected => true, + BadCodeSizeSum => true, + BadTotalSymbols => true, + BadZlibHeader => true, + DistanceOutOfBounds => true, + BadRawLength => true, + BadCodeSizeDistPrevLookup => true, + InvalidLitlen => true, + InvalidDist => true, + _ => false, + } + } + + #[inline] + fn begin(&mut self, new_state: State) { + *self = new_state; + } +} + +use self::State::*; + +// Not sure why miniz uses 32-bit values for these, maybe alignment/cache again? +// # Optimization +// We add a extra value at the end and make the tables 32 elements long +// so we can use a mask to avoid bounds checks. +// The invalid values are set to something high enough to avoid underflowing +// the match length. +/// Base length for each length code. +/// +/// The base is used together with the value of the extra bits to decode the actual +/// length/distance values in a match. +#[rustfmt::skip] +const LENGTH_BASE: [u16; 32] = [ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 512, 512, 512 +]; + +/// Number of extra bits for each length code. +#[rustfmt::skip] +const LENGTH_EXTRA: [u8; 32] = [ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0, 0 +]; + +/// Base length for each distance code. +#[rustfmt::skip] +const DIST_BASE: [u16; 32] = [ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, + 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, + 2049, 3073, 4097, 6145, 8193, 12_289, 16_385, 24_577, 32_768, 32_768 +]; + +/// Number of extra bits for each distance code. +#[rustfmt::skip] +const DIST_EXTRA: [u8; 32] = [ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 13, 13 +]; + +/// The mask used when indexing the base/extra arrays. +const BASE_EXTRA_MASK: usize = 32 - 1; + +/// Sets the value of all the elements of the slice to `val`. +#[inline] +fn memset<T: Copy>(slice: &mut [T], val: T) { + for x in slice { + *x = val + } +} + +/// Read an le u16 value from the slice iterator. +/// +/// # Panics +/// Panics if there are less than two bytes left. +#[inline] +fn read_u16_le(iter: &mut slice::Iter<u8>) -> u16 { + let ret = { + let two_bytes = iter.as_ref()[..2].try_into().unwrap(); + u16::from_le_bytes(two_bytes) + }; + iter.nth(1); + ret +} + +/// Read an le u32 value from the slice iterator. +/// +/// # Panics +/// Panics if there are less than four bytes left. +#[inline(always)] +#[cfg(target_pointer_width = "64")] +fn read_u32_le(iter: &mut slice::Iter<u8>) -> u32 { + let ret = { + let four_bytes: [u8; 4] = iter.as_ref()[..4].try_into().unwrap(); + u32::from_le_bytes(four_bytes) + }; + iter.nth(3); + ret +} + +/// Ensure that there is data in the bit buffer. +/// +/// On 64-bit platform, we use a 64-bit value so this will +/// result in there being at least 32 bits in the bit buffer. +/// This function assumes that there is at least 4 bytes left in the input buffer. +#[inline(always)] +#[cfg(target_pointer_width = "64")] +fn fill_bit_buffer(l: &mut LocalVars, in_iter: &mut slice::Iter<u8>) { + // Read four bytes into the buffer at once. + if l.num_bits < 30 { + l.bit_buf |= BitBuffer::from(read_u32_le(in_iter)) << l.num_bits; + l.num_bits += 32; + } +} + +/// Same as previous, but for non-64-bit platforms. +/// Ensures at least 16 bits are present, requires at least 2 bytes in the in buffer. +#[inline(always)] +#[cfg(not(target_pointer_width = "64"))] +fn fill_bit_buffer(l: &mut LocalVars, in_iter: &mut slice::Iter<u8>) { + // If the buffer is 32-bit wide, read 2 bytes instead. + if l.num_bits < 15 { + l.bit_buf |= BitBuffer::from(read_u16_le(in_iter)) << l.num_bits; + l.num_bits += 16; + } +} + +/// Check that the zlib header is correct and that there is enough space in the buffer +/// for the window size specified in the header. +/// +/// See https://tools.ietf.org/html/rfc1950 +#[inline] +fn validate_zlib_header(cmf: u32, flg: u32, flags: u32, mask: usize) -> Action { + let mut failed = + // cmf + flg should be divisible by 31. + (((cmf * 256) + flg) % 31 != 0) || + // If this flag is set, a dictionary was used for this zlib compressed data. + // This is currently not supported by miniz or miniz-oxide + ((flg & 0b0010_0000) != 0) || + // Compression method. Only 8(DEFLATE) is defined by the standard. + ((cmf & 15) != 8); + + let window_size = 1 << ((cmf >> 4) + 8); + if (flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) == 0 { + // Bail if the buffer is wrapping and the window size is larger than the buffer. + failed |= (mask + 1) < window_size; + } + + // Zlib doesn't allow window sizes above 32 * 1024. + failed |= window_size > 32_768; + + if failed { + Action::Jump(BadZlibHeader) + } else { + Action::Jump(ReadBlockHeader) + } +} + +enum Action { + None, + Jump(State), + End(TINFLStatus), +} + +/// Try to decode the next huffman code, and puts it in the counter field of the decompressor +/// if successful. +/// +/// # Returns +/// The specified action returned from `f` on success, +/// `Action::End` if there are not enough data left to decode a symbol. +fn decode_huffman_code<F>( + r: &mut DecompressorOxide, + l: &mut LocalVars, + table: usize, + flags: u32, + in_iter: &mut slice::Iter<u8>, + f: F, +) -> Action +where + F: FnOnce(&mut DecompressorOxide, &mut LocalVars, i32) -> Action, +{ + // As the huffman codes can be up to 15 bits long we need at least 15 bits + // ready in the bit buffer to start decoding the next huffman code. + if l.num_bits < 15 { + // First, make sure there is enough data in the bit buffer to decode a huffman code. + if in_iter.len() < 2 { + // If there is less than 2 bytes left in the input buffer, we try to look up + // the huffman code with what's available, and return if that doesn't succeed. + // Original explanation in miniz: + // /* TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes + // * remaining in the input buffer falls below 2. */ + // /* It reads just enough bytes from the input stream that are needed to decode + // * the next Huffman code (and absolutely no more). It works by trying to fully + // * decode a */ + // /* Huffman code by using whatever bits are currently present in the bit buffer. + // * If this fails, it reads another byte, and tries again until it succeeds or + // * until the */ + // /* bit buffer contains >=15 bits (deflate's max. Huffman code size). */ + loop { + let mut temp = i32::from(r.tables[table].fast_lookup(l.bit_buf)); + + if temp >= 0 { + let code_len = (temp >> 9) as u32; + if (code_len != 0) && (l.num_bits >= code_len) { + break; + } + } else if l.num_bits > FAST_LOOKUP_BITS.into() { + let mut code_len = u32::from(FAST_LOOKUP_BITS); + loop { + temp = i32::from( + r.tables[table].tree + [(!temp + ((l.bit_buf >> code_len) & 1) as i32) as usize], + ); + code_len += 1; + if temp >= 0 || l.num_bits < code_len + 1 { + break; + } + } + if temp >= 0 { + break; + } + } + + // TODO: miniz jumps straight to here after getting here again after failing to read + // a byte. + // Doing that lets miniz avoid re-doing the lookup that that was done in the + // previous call. + let mut byte = 0; + if let a @ Action::End(_) = read_byte(in_iter, flags, |b| { + byte = b; + Action::None + }) { + return a; + }; + + // Do this outside closure for now to avoid borrowing r. + l.bit_buf |= BitBuffer::from(byte) << l.num_bits; + l.num_bits += 8; + + if l.num_bits >= 15 { + break; + } + } + } else { + // There is enough data in the input buffer, so read the next two bytes + // and add them to the bit buffer. + // Unwrapping here is fine since we just checked that there are at least two + // bytes left. + l.bit_buf |= BitBuffer::from(read_u16_le(in_iter)) << l.num_bits; + l.num_bits += 16; + } + } + + // We now have at least 15 bits in the input buffer. + let mut symbol = i32::from(r.tables[table].fast_lookup(l.bit_buf)); + let code_len; + // If the symbol was found in the fast lookup table. + if symbol >= 0 { + // Get the length value from the top bits. + // As we shift down the sign bit, converting to an unsigned value + // shouldn't overflow. + code_len = (symbol >> 9) as u32; + // Mask out the length value. + symbol &= 511; + } else { + let res = r.tables[table].tree_lookup(symbol, l.bit_buf, u32::from(FAST_LOOKUP_BITS)); + symbol = res.0; + code_len = res.1 as u32; + }; + + if code_len == 0 { + return Action::Jump(InvalidCodeLen); + } + + l.bit_buf >>= code_len as u32; + l.num_bits -= code_len; + f(r, l, symbol) +} + +/// Try to read one byte from `in_iter` and call `f` with the read byte as an argument, +/// returning the result. +/// If reading fails, `Action::End is returned` +#[inline] +fn read_byte<F>(in_iter: &mut slice::Iter<u8>, flags: u32, f: F) -> Action +where + F: FnOnce(u8) -> Action, +{ + match in_iter.next() { + None => end_of_input(flags), + Some(&byte) => f(byte), + } +} + +// TODO: `l: &mut LocalVars` may be slow similar to decompress_fast (even with inline(always)) +/// Try to read `amount` number of bits from `in_iter` and call the function `f` with the bits as an +/// an argument after reading, returning the result of that function, or `Action::End` if there are +/// not enough bytes left. +#[inline] +#[allow(clippy::while_immutable_condition)] +fn read_bits<F>( + l: &mut LocalVars, + amount: u32, + in_iter: &mut slice::Iter<u8>, + flags: u32, + f: F, +) -> Action +where + F: FnOnce(&mut LocalVars, BitBuffer) -> Action, +{ + // Clippy gives a false positive warning here due to the closure. + // Read enough bytes from the input iterator to cover the number of bits we want. + while l.num_bits < amount { + match read_byte(in_iter, flags, |byte| { + l.bit_buf |= BitBuffer::from(byte) << l.num_bits; + l.num_bits += 8; + Action::None + }) { + Action::None => (), + // If there are not enough bytes in the input iterator, return and signal that we need + // more. + action => return action, + } + } + + let bits = l.bit_buf & ((1 << amount) - 1); + l.bit_buf >>= amount; + l.num_bits -= amount; + f(l, bits) +} + +#[inline] +fn pad_to_bytes<F>(l: &mut LocalVars, in_iter: &mut slice::Iter<u8>, flags: u32, f: F) -> Action +where + F: FnOnce(&mut LocalVars) -> Action, +{ + let num_bits = l.num_bits & 7; + read_bits(l, num_bits, in_iter, flags, |l, _| f(l)) +} + +#[inline] +fn end_of_input(flags: u32) -> Action { + Action::End(if flags & TINFL_FLAG_HAS_MORE_INPUT != 0 { + TINFLStatus::NeedsMoreInput + } else { + TINFLStatus::FailedCannotMakeProgress + }) +} + +#[inline] +fn undo_bytes(l: &mut LocalVars, max: u32) -> u32 { + let res = cmp::min(l.num_bits >> 3, max); + l.num_bits -= res << 3; + res +} + +fn start_static_table(r: &mut DecompressorOxide) { + r.table_sizes[LITLEN_TABLE] = 288; + r.table_sizes[DIST_TABLE] = 32; + memset(&mut r.tables[LITLEN_TABLE].code_size[0..144], 8); + memset(&mut r.tables[LITLEN_TABLE].code_size[144..256], 9); + memset(&mut r.tables[LITLEN_TABLE].code_size[256..280], 7); + memset(&mut r.tables[LITLEN_TABLE].code_size[280..288], 8); + memset(&mut r.tables[DIST_TABLE].code_size[0..32], 5); +} + +fn init_tree(r: &mut DecompressorOxide, l: &mut LocalVars) -> Action { + loop { + let table = &mut r.tables[r.block_type as usize]; + let table_size = r.table_sizes[r.block_type as usize] as usize; + let mut total_symbols = [0u32; 16]; + let mut next_code = [0u32; 17]; + memset(&mut table.look_up[..], 0); + memset(&mut table.tree[..], 0); + + for &code_size in &table.code_size[..table_size] { + total_symbols[code_size as usize] += 1; + } + + let mut used_symbols = 0; + let mut total = 0; + for i in 1..16 { + used_symbols += total_symbols[i]; + total += total_symbols[i]; + total <<= 1; + next_code[i + 1] = total; + } + + if total != 65_536 && used_symbols > 1 { + return Action::Jump(BadTotalSymbols); + } + + let mut tree_next = -1; + for symbol_index in 0..table_size { + let mut rev_code = 0; + let code_size = table.code_size[symbol_index]; + if code_size == 0 { + continue; + } + + let mut cur_code = next_code[code_size as usize]; + next_code[code_size as usize] += 1; + + for _ in 0..code_size { + rev_code = (rev_code << 1) | (cur_code & 1); + cur_code >>= 1; + } + + if code_size <= FAST_LOOKUP_BITS { + let k = (i16::from(code_size) << 9) | symbol_index as i16; + while rev_code < FAST_LOOKUP_SIZE { + table.look_up[rev_code as usize] = k; + rev_code += 1 << code_size; + } + continue; + } + + let mut tree_cur = table.look_up[(rev_code & (FAST_LOOKUP_SIZE - 1)) as usize]; + if tree_cur == 0 { + table.look_up[(rev_code & (FAST_LOOKUP_SIZE - 1)) as usize] = tree_next as i16; + tree_cur = tree_next; + tree_next -= 2; + } + + rev_code >>= FAST_LOOKUP_BITS - 1; + for _ in FAST_LOOKUP_BITS + 1..code_size { + rev_code >>= 1; + tree_cur -= (rev_code & 1) as i16; + if table.tree[(-tree_cur - 1) as usize] == 0 { + table.tree[(-tree_cur - 1) as usize] = tree_next as i16; + tree_cur = tree_next; + tree_next -= 2; + } else { + tree_cur = table.tree[(-tree_cur - 1) as usize]; + } + } + + rev_code >>= 1; + tree_cur -= (rev_code & 1) as i16; + table.tree[(-tree_cur - 1) as usize] = symbol_index as i16; + } + + if r.block_type == 2 { + l.counter = 0; + return Action::Jump(ReadLitlenDistTablesCodeSize); + } + + if r.block_type == 0 { + break; + } + r.block_type -= 1; + } + + l.counter = 0; + Action::Jump(DecodeLitlen) +} + +// A helper macro for generating the state machine. +// +// As Rust doesn't have fallthrough on matches, we have to return to the match statement +// and jump for each state change. (Which would ideally be optimized away, but often isn't.) +macro_rules! generate_state { + ($state: ident, $state_machine: tt, $f: expr) => { + loop { + match $f { + Action::None => continue, + Action::Jump(new_state) => { + $state = new_state; + continue $state_machine; + }, + Action::End(result) => break $state_machine result, + } + } + }; +} + +#[derive(Copy, Clone)] +struct LocalVars { + pub bit_buf: BitBuffer, + pub num_bits: u32, + pub dist: u32, + pub counter: u32, + pub num_extra: u32, +} + +#[inline] +fn transfer( + out_slice: &mut [u8], + mut source_pos: usize, + mut out_pos: usize, + match_len: usize, + out_buf_size_mask: usize, +) { + for _ in 0..match_len >> 2 { + out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask]; + out_slice[out_pos + 1] = out_slice[(source_pos + 1) & out_buf_size_mask]; + out_slice[out_pos + 2] = out_slice[(source_pos + 2) & out_buf_size_mask]; + out_slice[out_pos + 3] = out_slice[(source_pos + 3) & out_buf_size_mask]; + source_pos += 4; + out_pos += 4; + } + + match match_len & 3 { + 0 => (), + 1 => out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask], + 2 => { + out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask]; + out_slice[out_pos + 1] = out_slice[(source_pos + 1) & out_buf_size_mask]; + } + 3 => { + out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask]; + out_slice[out_pos + 1] = out_slice[(source_pos + 1) & out_buf_size_mask]; + out_slice[out_pos + 2] = out_slice[(source_pos + 2) & out_buf_size_mask]; + } + _ => unreachable!(), + } +} + +/// Presumes that there is at least match_len bytes in output left. +#[inline] +fn apply_match( + out_slice: &mut [u8], + out_pos: usize, + dist: usize, + match_len: usize, + out_buf_size_mask: usize, +) { + debug_assert!(out_pos + match_len <= out_slice.len()); + + let source_pos = out_pos.wrapping_sub(dist) & out_buf_size_mask; + + if match_len == 3 { + // Fast path for match len 3. + out_slice[out_pos] = out_slice[source_pos]; + out_slice[out_pos + 1] = out_slice[(source_pos + 1) & out_buf_size_mask]; + out_slice[out_pos + 2] = out_slice[(source_pos + 2) & out_buf_size_mask]; + return; + } + + if cfg!(not(any(target_arch = "x86", target_arch = "x86_64"))) { + // We are not on x86 so copy manually. + transfer(out_slice, source_pos, out_pos, match_len, out_buf_size_mask); + return; + } + + if source_pos >= out_pos && (source_pos - out_pos) < match_len { + transfer(out_slice, source_pos, out_pos, match_len, out_buf_size_mask); + } else if match_len <= dist && source_pos + match_len < out_slice.len() { + // Destination and source segments does not intersect and source does not wrap. + if source_pos < out_pos { + let (from_slice, to_slice) = out_slice.split_at_mut(out_pos); + to_slice[..match_len].copy_from_slice(&from_slice[source_pos..source_pos + match_len]); + } else { + let (to_slice, from_slice) = out_slice.split_at_mut(source_pos); + to_slice[out_pos..out_pos + match_len].copy_from_slice(&from_slice[..match_len]); + } + } else { + transfer(out_slice, source_pos, out_pos, match_len, out_buf_size_mask); + } +} + +/// Fast inner decompression loop which is run while there is at least +/// 259 bytes left in the output buffer, and at least 6 bytes left in the input buffer +/// (The maximum one match would need + 1). +/// +/// This was inspired by a similar optimization in zlib, which uses this info to do +/// faster unchecked copies of multiple bytes at a time. +/// Currently we don't do this here, but this function does avoid having to jump through the +/// big match loop on each state change(as rust does not have fallthrough or gotos at the moment), +/// and already improves decompression speed a fair bit. +fn decompress_fast( + r: &mut DecompressorOxide, + in_iter: &mut slice::Iter<u8>, + out_buf: &mut OutputBuffer, + flags: u32, + local_vars: &mut LocalVars, + out_buf_size_mask: usize, +) -> (TINFLStatus, State) { + // Make a local copy of the most used variables, to avoid having to update and read from values + // in a random memory location and to encourage more register use. + let mut l = *local_vars; + let mut state; + + let status: TINFLStatus = 'o: loop { + state = State::DecodeLitlen; + loop { + // This function assumes that there is at least 259 bytes left in the output buffer, + // and that there is at least 14 bytes left in the input buffer. 14 input bytes: + // 15 (prev lit) + 15 (length) + 5 (length extra) + 15 (dist) + // + 29 + 32 (left in bit buf, including last 13 dist extra) = 111 bits < 14 bytes + // We need the one extra byte as we may write one length and one full match + // before checking again. + if out_buf.bytes_left() < 259 || in_iter.len() < 14 { + state = State::DecodeLitlen; + break 'o TINFLStatus::Done; + } + + fill_bit_buffer(&mut l, in_iter); + + if let Some((symbol, code_len)) = r.tables[LITLEN_TABLE].lookup(l.bit_buf) { + l.counter = symbol as u32; + l.bit_buf >>= code_len; + l.num_bits -= code_len; + + if (l.counter & 256) != 0 { + // The symbol is not a literal. + break; + } else { + // If we have a 32-bit buffer we need to read another two bytes now + // to have enough bits to keep going. + if cfg!(not(target_pointer_width = "64")) { + fill_bit_buffer(&mut l, in_iter); + } + + if let Some((symbol, code_len)) = r.tables[LITLEN_TABLE].lookup(l.bit_buf) { + l.bit_buf >>= code_len; + l.num_bits -= code_len; + // The previous symbol was a literal, so write it directly and check + // the next one. + out_buf.write_byte(l.counter as u8); + if (symbol & 256) != 0 { + l.counter = symbol as u32; + // The symbol is a length value. + break; + } else { + // The symbol is a literal, so write it directly and continue. + out_buf.write_byte(symbol as u8); + } + } else { + state.begin(InvalidCodeLen); + break 'o TINFLStatus::Failed; + } + } + } else { + state.begin(InvalidCodeLen); + break 'o TINFLStatus::Failed; + } + } + + // Mask the top bits since they may contain length info. + l.counter &= 511; + if l.counter == 256 { + // We hit the end of block symbol. + state.begin(BlockDone); + break 'o TINFLStatus::Done; + } else if l.counter > 285 { + // Invalid code. + // We already verified earlier that the code is > 256. + state.begin(InvalidLitlen); + break 'o TINFLStatus::Failed; + } else { + // The symbol was a length code. + // # Optimization + // Mask the value to avoid bounds checks + // We could use get_unchecked later if can statically verify that + // this will never go out of bounds. + l.num_extra = u32::from(LENGTH_EXTRA[(l.counter - 257) as usize & BASE_EXTRA_MASK]); + l.counter = u32::from(LENGTH_BASE[(l.counter - 257) as usize & BASE_EXTRA_MASK]); + // Length and distance codes have a number of extra bits depending on + // the base, which together with the base gives us the exact value. + + fill_bit_buffer(&mut l, in_iter); + if l.num_extra != 0 { + let extra_bits = l.bit_buf & ((1 << l.num_extra) - 1); + l.bit_buf >>= l.num_extra; + l.num_bits -= l.num_extra; + l.counter += extra_bits as u32; + } + + // We found a length code, so a distance code should follow. + + if cfg!(not(target_pointer_width = "64")) { + fill_bit_buffer(&mut l, in_iter); + } + + if let Some((mut symbol, code_len)) = r.tables[DIST_TABLE].lookup(l.bit_buf) { + symbol &= 511; + l.bit_buf >>= code_len; + l.num_bits -= code_len; + if symbol > 29 { + state.begin(InvalidDist); + break 'o TINFLStatus::Failed; + } + + l.num_extra = u32::from(DIST_EXTRA[symbol as usize]); + l.dist = u32::from(DIST_BASE[symbol as usize]); + } else { + state.begin(InvalidCodeLen); + break 'o TINFLStatus::Failed; + } + + if l.num_extra != 0 { + fill_bit_buffer(&mut l, in_iter); + let extra_bits = l.bit_buf & ((1 << l.num_extra) - 1); + l.bit_buf >>= l.num_extra; + l.num_bits -= l.num_extra; + l.dist += extra_bits as u32; + } + + let position = out_buf.position(); + if l.dist as usize > out_buf.position() + && (flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF != 0) + { + // We encountered a distance that refers a position before + // the start of the decoded data, so we can't continue. + state.begin(DistanceOutOfBounds); + break TINFLStatus::Failed; + } + + apply_match( + out_buf.get_mut(), + position, + l.dist as usize, + l.counter as usize, + out_buf_size_mask, + ); + + out_buf.set_position(position + l.counter as usize); + } + }; + + *local_vars = l; + (status, state) +} + +/// Main decompression function. Keeps decompressing data from `in_buf` until the `in_buf` is +/// empty, `out` is full, the end of the deflate stream is hit, or there is an error in the +/// deflate stream. +/// +/// # Arguments +/// +/// `r` is a [`DecompressorOxide`] struct with the state of this stream. +/// +/// `in_buf` is a reference to the compressed data that is to be decompressed. The decompressor will +/// start at the first byte of this buffer. +/// +/// `out` is a reference to the buffer that will store the decompressed data, and that +/// stores previously decompressed data if any. +/// +/// * The offset given by `out_pos` indicates where in the output buffer slice writing should start. +/// * If [`TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF`] is not set, the output buffer is used in a +/// wrapping manner, and it's size is required to be a power of 2. +/// * The decompression function normally needs access to 32KiB of the previously decompressed data +///(or to the beginning of the decompressed data if less than 32KiB has been decompressed.) +/// - If this data is not available, decompression may fail. +/// - Some deflate compressors allow specifying a window size which limits match distances to +/// less than this, or alternatively an RLE mode where matches will only refer to the previous byte +/// and thus allows a smaller output buffer. The window size can be specified in the zlib +/// header structure, however, the header data should not be relied on to be correct. +/// +/// `flags` indicates settings and status to the decompression function. +/// * The [`TINFL_FLAG_HAS_MORE_INPUT`] has to be specified if more compressed data is to be provided +/// in a subsequent call to this function. +/// * See the the [`inflate_flags`] module for details on other flags. +/// +/// # Returns +/// +/// Returns a tuple containing the status of the compressor, the number of input bytes read, and the +/// number of bytes output to `out`. +/// +/// This function shouldn't panic pending any bugs. +pub fn decompress( + r: &mut DecompressorOxide, + in_buf: &[u8], + out: &mut [u8], + out_pos: usize, + flags: u32, +) -> (TINFLStatus, usize, usize) { + let out_buf_size_mask = if flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF != 0 { + usize::max_value() + } else { + // In the case of zero len, any attempt to write would produce HasMoreOutput, + // so to gracefully process the case of there really being no output, + // set the mask to all zeros. + out.len().saturating_sub(1) + }; + + // Ensure the output buffer's size is a power of 2, unless the output buffer + // is large enough to hold the entire output file (in which case it doesn't + // matter). + // Also make sure that the output buffer position is not past the end of the output buffer. + if (out_buf_size_mask.wrapping_add(1) & out_buf_size_mask) != 0 || out_pos > out.len() { + return (TINFLStatus::BadParam, 0, 0); + } + + let mut in_iter = in_buf.iter(); + + let mut state = r.state; + + let mut out_buf = OutputBuffer::from_slice_and_pos(out, out_pos); + + // Make a local copy of the important variables here so we can work with them on the stack. + let mut l = LocalVars { + bit_buf: r.bit_buf, + num_bits: r.num_bits, + dist: r.dist, + counter: r.counter, + num_extra: r.num_extra, + }; + + let mut status = 'state_machine: loop { + match state { + Start => generate_state!(state, 'state_machine, { + l.bit_buf = 0; + l.num_bits = 0; + l.dist = 0; + l.counter = 0; + l.num_extra = 0; + r.z_header0 = 0; + r.z_header1 = 0; + r.z_adler32 = 1; + r.check_adler32 = 1; + if flags & TINFL_FLAG_PARSE_ZLIB_HEADER != 0 { + Action::Jump(State::ReadZlibCmf) + } else { + Action::Jump(State::ReadBlockHeader) + } + }), + + ReadZlibCmf => generate_state!(state, 'state_machine, { + read_byte(&mut in_iter, flags, |cmf| { + r.z_header0 = u32::from(cmf); + Action::Jump(State::ReadZlibFlg) + }) + }), + + ReadZlibFlg => generate_state!(state, 'state_machine, { + read_byte(&mut in_iter, flags, |flg| { + r.z_header1 = u32::from(flg); + validate_zlib_header(r.z_header0, r.z_header1, flags, out_buf_size_mask) + }) + }), + + // Read the block header and jump to the relevant section depending on the block type. + ReadBlockHeader => generate_state!(state, 'state_machine, { + read_bits(&mut l, 3, &mut in_iter, flags, |l, bits| { + r.finish = (bits & 1) as u32; + r.block_type = (bits >> 1) as u32 & 3; + match r.block_type { + 0 => Action::Jump(BlockTypeNoCompression), + 1 => { + start_static_table(r); + init_tree(r, l) + }, + 2 => { + l.counter = 0; + Action::Jump(ReadTableSizes) + }, + 3 => Action::Jump(BlockTypeUnexpected), + _ => unreachable!() + } + }) + }), + + // Raw/Stored/uncompressed block. + BlockTypeNoCompression => generate_state!(state, 'state_machine, { + pad_to_bytes(&mut l, &mut in_iter, flags, |l| { + l.counter = 0; + Action::Jump(RawHeader) + }) + }), + + // Check that the raw block header is correct. + RawHeader => generate_state!(state, 'state_machine, { + if l.counter < 4 { + // Read block length and block length check. + if l.num_bits != 0 { + read_bits(&mut l, 8, &mut in_iter, flags, |l, bits| { + r.raw_header[l.counter as usize] = bits as u8; + l.counter += 1; + Action::None + }) + } else { + read_byte(&mut in_iter, flags, |byte| { + r.raw_header[l.counter as usize] = byte; + l.counter += 1; + Action::None + }) + } + } else { + // Check if the length value of a raw block is correct. + // The 2 first (2-byte) words in a raw header are the length and the + // ones complement of the length. + let length = u16::from(r.raw_header[0]) | (u16::from(r.raw_header[1]) << 8); + let check = u16::from(r.raw_header[2]) | (u16::from(r.raw_header[3]) << 8); + let valid = length == !check; + l.counter = length.into(); + + if !valid { + Action::Jump(BadRawLength) + } else if l.counter == 0 { + // Empty raw block. Sometimes used for synchronization. + Action::Jump(BlockDone) + } else if l.num_bits != 0 { + // There is some data in the bit buffer, so we need to write that first. + Action::Jump(RawReadFirstByte) + } else { + // The bit buffer is empty, so memcpy the rest of the uncompressed data from + // the block. + Action::Jump(RawMemcpy1) + } + } + }), + + // Read the byte from the bit buffer. + RawReadFirstByte => generate_state!(state, 'state_machine, { + read_bits(&mut l, 8, &mut in_iter, flags, |l, bits| { + l.dist = bits as u32; + Action::Jump(RawStoreFirstByte) + }) + }), + + // Write the byte we just read to the output buffer. + RawStoreFirstByte => generate_state!(state, 'state_machine, { + if out_buf.bytes_left() == 0 { + Action::End(TINFLStatus::HasMoreOutput) + } else { + out_buf.write_byte(l.dist as u8); + l.counter -= 1; + if l.counter == 0 || l.num_bits == 0 { + Action::Jump(RawMemcpy1) + } else { + // There is still some data left in the bit buffer that needs to be output. + // TODO: Changed this to jump to `RawReadfirstbyte` rather than + // `RawStoreFirstByte` as that seemed to be the correct path, but this + // needs testing. + Action::Jump(RawReadFirstByte) + } + } + }), + + RawMemcpy1 => generate_state!(state, 'state_machine, { + if l.counter == 0 { + Action::Jump(BlockDone) + } else if out_buf.bytes_left() == 0 { + Action::End(TINFLStatus::HasMoreOutput) + } else { + Action::Jump(RawMemcpy2) + } + }), + + RawMemcpy2 => generate_state!(state, 'state_machine, { + if in_iter.len() > 0 { + // Copy as many raw bytes as possible from the input to the output using memcpy. + // Raw block lengths are limited to 64 * 1024, so casting through usize and u32 + // is not an issue. + let space_left = out_buf.bytes_left(); + let bytes_to_copy = cmp::min(cmp::min( + space_left, + in_iter.len()), + l.counter as usize + ); + + out_buf.write_slice(&in_iter.as_slice()[..bytes_to_copy]); + + (&mut in_iter).nth(bytes_to_copy - 1); + l.counter -= bytes_to_copy as u32; + Action::Jump(RawMemcpy1) + } else { + end_of_input(flags) + } + }), + + // Read how many huffman codes/symbols are used for each table. + ReadTableSizes => generate_state!(state, 'state_machine, { + if l.counter < 3 { + let num_bits = [5, 5, 4][l.counter as usize]; + read_bits(&mut l, num_bits, &mut in_iter, flags, |l, bits| { + r.table_sizes[l.counter as usize] = + bits as u32 + u32::from(MIN_TABLE_SIZES[l.counter as usize]); + l.counter += 1; + Action::None + }) + } else { + memset(&mut r.tables[HUFFLEN_TABLE].code_size[..], 0); + l.counter = 0; + Action::Jump(ReadHufflenTableCodeSize) + } + }), + + // Read the 3-bit lengths of the huffman codes describing the huffman code lengths used + // to decode the lengths of the main tables. + ReadHufflenTableCodeSize => generate_state!(state, 'state_machine, { + if l.counter < r.table_sizes[HUFFLEN_TABLE] { + read_bits(&mut l, 3, &mut in_iter, flags, |l, bits| { + // These lengths are not stored in a normal ascending order, but rather one + // specified by the deflate specification intended to put the most used + // values at the front as trailing zero lengths do not have to be stored. + r.tables[HUFFLEN_TABLE] + .code_size[HUFFMAN_LENGTH_ORDER[l.counter as usize] as usize] = + bits as u8; + l.counter += 1; + Action::None + }) + } else { + r.table_sizes[HUFFLEN_TABLE] = 19; + init_tree(r, &mut l) + } + }), + + ReadLitlenDistTablesCodeSize => generate_state!(state, 'state_machine, { + if l.counter < r.table_sizes[LITLEN_TABLE] + r.table_sizes[DIST_TABLE] { + decode_huffman_code( + r, &mut l, HUFFLEN_TABLE, + flags, &mut in_iter, |r, l, symbol| { + l.dist = symbol as u32; + if l.dist < 16 { + r.len_codes[l.counter as usize] = l.dist as u8; + l.counter += 1; + Action::None + } else if l.dist == 16 && l.counter == 0 { + Action::Jump(BadCodeSizeDistPrevLookup) + } else { + l.num_extra = [2, 3, 7][l.dist as usize - 16]; + Action::Jump(ReadExtraBitsCodeSize) + } + } + ) + } else if l.counter != r.table_sizes[LITLEN_TABLE] + r.table_sizes[DIST_TABLE] { + Action::Jump(BadCodeSizeSum) + } else { + r.tables[LITLEN_TABLE].code_size[..r.table_sizes[LITLEN_TABLE] as usize] + .copy_from_slice(&r.len_codes[..r.table_sizes[LITLEN_TABLE] as usize]); + + let dist_table_start = r.table_sizes[LITLEN_TABLE] as usize; + let dist_table_end = (r.table_sizes[LITLEN_TABLE] + + r.table_sizes[DIST_TABLE]) as usize; + r.tables[DIST_TABLE].code_size[..r.table_sizes[DIST_TABLE] as usize] + .copy_from_slice(&r.len_codes[dist_table_start..dist_table_end]); + + r.block_type -= 1; + init_tree(r, &mut l) + } + }), + + ReadExtraBitsCodeSize => generate_state!(state, 'state_machine, { + let num_extra = l.num_extra; + read_bits(&mut l, num_extra, &mut in_iter, flags, |l, mut extra_bits| { + // Mask to avoid a bounds check. + extra_bits += [3, 3, 11][(l.dist as usize - 16) & 3]; + let val = if l.dist == 16 { + r.len_codes[l.counter as usize - 1] + } else { + 0 + }; + + memset( + &mut r.len_codes[ + l.counter as usize..l.counter as usize + extra_bits as usize + ], + val, + ); + l.counter += extra_bits as u32; + Action::Jump(ReadLitlenDistTablesCodeSize) + }) + }), + + DecodeLitlen => generate_state!(state, 'state_machine, { + if in_iter.len() < 4 || out_buf.bytes_left() < 2 { + // See if we can decode a literal with the data we have left. + // Jumps to next state (WriteSymbol) if successful. + decode_huffman_code( + r, + &mut l, + LITLEN_TABLE, + flags, + &mut in_iter, + |_r, l, symbol| { + l.counter = symbol as u32; + Action::Jump(WriteSymbol) + }, + ) + } else if + // If there is enough space, use the fast inner decompression + // function. + out_buf.bytes_left() >= 259 && + in_iter.len() >= 14 + { + let (status, new_state) = decompress_fast( + r, + &mut in_iter, + &mut out_buf, + flags, + &mut l, + out_buf_size_mask, + ); + + state = new_state; + if status == TINFLStatus::Done { + Action::Jump(new_state) + } else { + Action::End(status) + } + } else { + fill_bit_buffer(&mut l, &mut in_iter); + + if let Some((symbol, code_len)) = r.tables[LITLEN_TABLE].lookup(l.bit_buf) { + + l.counter = symbol as u32; + l.bit_buf >>= code_len; + l.num_bits -= code_len; + + if (l.counter & 256) != 0 { + // The symbol is not a literal. + Action::Jump(HuffDecodeOuterLoop1) + } else { + // If we have a 32-bit buffer we need to read another two bytes now + // to have enough bits to keep going. + if cfg!(not(target_pointer_width = "64")) { + fill_bit_buffer(&mut l, &mut in_iter); + } + + if let Some((symbol, code_len)) = r.tables[LITLEN_TABLE].lookup(l.bit_buf) { + + l.bit_buf >>= code_len; + l.num_bits -= code_len; + // The previous symbol was a literal, so write it directly and check + // the next one. + out_buf.write_byte(l.counter as u8); + if (symbol & 256) != 0 { + l.counter = symbol as u32; + // The symbol is a length value. + Action::Jump(HuffDecodeOuterLoop1) + } else { + // The symbol is a literal, so write it directly and continue. + out_buf.write_byte(symbol as u8); + Action::None + } + } else { + Action::Jump(InvalidCodeLen) + } + } + } else { + Action::Jump(InvalidCodeLen) + } + } + }), + + WriteSymbol => generate_state!(state, 'state_machine, { + if l.counter >= 256 { + Action::Jump(HuffDecodeOuterLoop1) + } else if out_buf.bytes_left() > 0 { + out_buf.write_byte(l.counter as u8); + Action::Jump(DecodeLitlen) + } else { + Action::End(TINFLStatus::HasMoreOutput) + } + }), + + HuffDecodeOuterLoop1 => generate_state!(state, 'state_machine, { + // Mask the top bits since they may contain length info. + l.counter &= 511; + + if l.counter + == 256 { + // We hit the end of block symbol. + Action::Jump(BlockDone) + } else if l.counter > 285 { + // Invalid code. + // We already verified earlier that the code is > 256. + Action::Jump(InvalidLitlen) + } else { + // # Optimization + // Mask the value to avoid bounds checks + // We could use get_unchecked later if can statically verify that + // this will never go out of bounds. + l.num_extra = + u32::from(LENGTH_EXTRA[(l.counter - 257) as usize & BASE_EXTRA_MASK]); + l.counter = u32::from(LENGTH_BASE[(l.counter - 257) as usize & BASE_EXTRA_MASK]); + // Length and distance codes have a number of extra bits depending on + // the base, which together with the base gives us the exact value. + if l.num_extra != 0 { + Action::Jump(ReadExtraBitsLitlen) + } else { + Action::Jump(DecodeDistance) + } + } + }), + + ReadExtraBitsLitlen => generate_state!(state, 'state_machine, { + let num_extra = l.num_extra; + read_bits(&mut l, num_extra, &mut in_iter, flags, |l, extra_bits| { + l.counter += extra_bits as u32; + Action::Jump(DecodeDistance) + }) + }), + + DecodeDistance => generate_state!(state, 'state_machine, { + // Try to read a huffman code from the input buffer and look up what + // length code the decoded symbol refers to. + decode_huffman_code(r, &mut l, DIST_TABLE, flags, &mut in_iter, |_r, l, symbol| { + if symbol > 29 { + // Invalid distance code. + return Action::Jump(InvalidDist) + } + // # Optimization + // Mask the value to avoid bounds checks + // We could use get_unchecked later if can statically verify that + // this will never go out of bounds. + l.num_extra = u32::from(DIST_EXTRA[symbol as usize & BASE_EXTRA_MASK]); + l.dist = u32::from(DIST_BASE[symbol as usize & BASE_EXTRA_MASK]); + if l.num_extra != 0 { + // ReadEXTRA_BITS_DISTACNE + Action::Jump(ReadExtraBitsDistance) + } else { + Action::Jump(HuffDecodeOuterLoop2) + } + }) + }), + + ReadExtraBitsDistance => generate_state!(state, 'state_machine, { + let num_extra = l.num_extra; + read_bits(&mut l, num_extra, &mut in_iter, flags, |l, extra_bits| { + l.dist += extra_bits as u32; + Action::Jump(HuffDecodeOuterLoop2) + }) + }), + + HuffDecodeOuterLoop2 => generate_state!(state, 'state_machine, { + if l.dist as usize > out_buf.position() && + (flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF != 0) + { + // We encountered a distance that refers a position before + // the start of the decoded data, so we can't continue. + Action::Jump(DistanceOutOfBounds) + } else { + let out_pos = out_buf.position(); + let source_pos = out_buf.position() + .wrapping_sub(l.dist as usize) & out_buf_size_mask; + + let out_len = out_buf.get_ref().len() as usize; + let match_end_pos = out_buf.position() + l.counter as usize; + + if match_end_pos > out_len || + // miniz doesn't do this check here. Not sure how it makes sure + // that this case doesn't happen. + (source_pos >= out_pos && (source_pos - out_pos) < l.counter as usize) + { + // Not enough space for all of the data in the output buffer, + // so copy what we have space for. + if l.counter == 0 { + Action::Jump(DecodeLitlen) + } else { + Action::Jump(WriteLenBytesToEnd) + } + } else { + apply_match( + out_buf.get_mut(), + out_pos, + l.dist as usize, + l.counter as usize, + out_buf_size_mask + ); + out_buf.set_position(out_pos + l.counter as usize); + Action::Jump(DecodeLitlen) + } + } + }), + + WriteLenBytesToEnd => generate_state!(state, 'state_machine, { + if out_buf.bytes_left() > 0 { + let out_pos = out_buf.position(); + let source_pos = out_buf.position() + .wrapping_sub(l.dist as usize) & out_buf_size_mask; + + + let len = cmp::min(out_buf.bytes_left(), l.counter as usize); + + transfer(out_buf.get_mut(), source_pos, out_pos, len, out_buf_size_mask); + + out_buf.set_position(out_pos + len); + l.counter -= len as u32; + if l.counter == 0 { + Action::Jump(DecodeLitlen) + } else { + Action::None + } + } else { + Action::End(TINFLStatus::HasMoreOutput) + } + }), + + BlockDone => generate_state!(state, 'state_machine, { + // End once we've read the last block. + if r.finish != 0 { + pad_to_bytes(&mut l, &mut in_iter, flags, |_| Action::None); + + let in_consumed = in_buf.len() - in_iter.len(); + let undo = undo_bytes(&mut l, in_consumed as u32) as usize; + in_iter = in_buf[in_consumed - undo..].iter(); + + l.bit_buf &= ((1 as BitBuffer) << l.num_bits) - 1; + debug_assert_eq!(l.num_bits, 0); + + if flags & TINFL_FLAG_PARSE_ZLIB_HEADER != 0 { + l.counter = 0; + Action::Jump(ReadAdler32) + } else { + Action::Jump(DoneForever) + } + } else { + Action::Jump(ReadBlockHeader) + } + }), + + ReadAdler32 => generate_state!(state, 'state_machine, { + if l.counter < 4 { + if l.num_bits != 0 { + read_bits(&mut l, 8, &mut in_iter, flags, |l, bits| { + r.z_adler32 <<= 8; + r.z_adler32 |= bits as u32; + l.counter += 1; + Action::None + }) + } else { + read_byte(&mut in_iter, flags, |byte| { + r.z_adler32 <<= 8; + r.z_adler32 |= u32::from(byte); + l.counter += 1; + Action::None + }) + } + } else { + Action::Jump(DoneForever) + } + }), + + // We are done. + DoneForever => break TINFLStatus::Done, + + // Anything else indicates failure. + // BadZlibHeader | BadRawLength | BlockTypeUnexpected | DistanceOutOfBounds | + // BadTotalSymbols | BadCodeSizeDistPrevLookup | BadCodeSizeSum | InvalidLitlen | + // InvalidDist | InvalidCodeLen + _ => break TINFLStatus::Failed, + }; + }; + + let in_undo = if status != TINFLStatus::NeedsMoreInput + && status != TINFLStatus::FailedCannotMakeProgress + { + undo_bytes(&mut l, (in_buf.len() - in_iter.len()) as u32) as usize + } else { + 0 + }; + + // Make sure HasMoreOutput overrides NeedsMoreInput if the output buffer is full. + // (Unless the missing input is the adler32 value in which case we don't need to write anything.) + // TODO: May want to see if we can do this in a better way. + if status == TINFLStatus::NeedsMoreInput + && out_buf.bytes_left() == 0 + && state != State::ReadAdler32 + { + status = TINFLStatus::HasMoreOutput + } + + r.state = state; + r.bit_buf = l.bit_buf; + r.num_bits = l.num_bits; + r.dist = l.dist; + r.counter = l.counter; + r.num_extra = l.num_extra; + + r.bit_buf &= ((1 as BitBuffer) << r.num_bits) - 1; + + // If this is a zlib stream, and update the adler32 checksum with the decompressed bytes if + // requested. + let need_adler = if (flags & TINFL_FLAG_IGNORE_ADLER32) == 0 { + flags & (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32) != 0 + } else { + // If TINFL_FLAG_IGNORE_ADLER32 is enabled, ignore the checksum. + false + }; + if need_adler && status as i32 >= 0 { + let out_buf_pos = out_buf.position(); + r.check_adler32 = update_adler32(r.check_adler32, &out_buf.get_ref()[out_pos..out_buf_pos]); + + // disabled so that random input from fuzzer would not be rejected early, + // before it has a chance to reach interesting parts of code + if !cfg!(fuzzing) { + // Once we are done, check if the checksum matches with the one provided in the zlib header. + if status == TINFLStatus::Done + && flags & TINFL_FLAG_PARSE_ZLIB_HEADER != 0 + && r.check_adler32 != r.z_adler32 + { + status = TINFLStatus::Adler32Mismatch; + } + } + } + + ( + status, + in_buf.len() - in_iter.len() - in_undo, + out_buf.position() - out_pos, + ) +} + +#[cfg(test)] +mod test { + use super::*; + + //TODO: Fix these. + + fn tinfl_decompress_oxide<'i>( + r: &mut DecompressorOxide, + input_buffer: &'i [u8], + output_buffer: &mut [u8], + flags: u32, + ) -> (TINFLStatus, &'i [u8], usize) { + let (status, in_pos, out_pos) = decompress(r, input_buffer, output_buffer, 0, flags); + (status, &input_buffer[in_pos..], out_pos) + } + + #[test] + fn decompress_zlib() { + let encoded = [ + 120, 156, 243, 72, 205, 201, 201, 215, 81, 168, 202, 201, 76, 82, 4, 0, 27, 101, 4, 19, + ]; + let flags = TINFL_FLAG_COMPUTE_ADLER32 | TINFL_FLAG_PARSE_ZLIB_HEADER; + + let mut b = DecompressorOxide::new(); + const LEN: usize = 32; + let mut b_buf = vec![0; LEN]; + + // This should fail with the out buffer being to small. + let b_status = tinfl_decompress_oxide(&mut b, &encoded[..], b_buf.as_mut_slice(), flags); + + assert_eq!(b_status.0, TINFLStatus::Failed); + + let flags = flags | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + + b = DecompressorOxide::new(); + + // With TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF set this should no longer fail. + let b_status = tinfl_decompress_oxide(&mut b, &encoded[..], b_buf.as_mut_slice(), flags); + + assert_eq!(b_buf[..b_status.2], b"Hello, zlib!"[..]); + assert_eq!(b_status.0, TINFLStatus::Done); + } + + #[test] + fn raw_block() { + const LEN: usize = 64; + + let text = b"Hello, zlib!"; + let encoded = { + let len = text.len(); + let notlen = !len; + let mut encoded = vec![ + 1, + len as u8, + (len >> 8) as u8, + notlen as u8, + (notlen >> 8) as u8, + ]; + encoded.extend_from_slice(&text[..]); + encoded + }; + + //let flags = TINFL_FLAG_COMPUTE_ADLER32 | TINFL_FLAG_PARSE_ZLIB_HEADER | + let flags = TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + + let mut b = DecompressorOxide::new(); + + let mut b_buf = vec![0; LEN]; + + let b_status = tinfl_decompress_oxide(&mut b, &encoded[..], b_buf.as_mut_slice(), flags); + assert_eq!(b_buf[..b_status.2], text[..]); + assert_eq!(b_status.0, TINFLStatus::Done); + } + + fn masked_lookup(table: &HuffmanTable, bit_buf: BitBuffer) -> (i32, u32) { + let ret = table.lookup(bit_buf).unwrap(); + (ret.0 & 511, ret.1) + } + + #[test] + fn fixed_table_lookup() { + let mut d = DecompressorOxide::new(); + d.block_type = 1; + start_static_table(&mut d); + let mut l = LocalVars { + bit_buf: d.bit_buf, + num_bits: d.num_bits, + dist: d.dist, + counter: d.counter, + num_extra: d.num_extra, + }; + init_tree(&mut d, &mut l); + let llt = &d.tables[LITLEN_TABLE]; + let dt = &d.tables[DIST_TABLE]; + assert_eq!(masked_lookup(llt, 0b00001100), (0, 8)); + assert_eq!(masked_lookup(llt, 0b00011110), (72, 8)); + assert_eq!(masked_lookup(llt, 0b01011110), (74, 8)); + assert_eq!(masked_lookup(llt, 0b11111101), (143, 8)); + assert_eq!(masked_lookup(llt, 0b000010011), (144, 9)); + assert_eq!(masked_lookup(llt, 0b111111111), (255, 9)); + assert_eq!(masked_lookup(llt, 0b00000000), (256, 7)); + assert_eq!(masked_lookup(llt, 0b1110100), (279, 7)); + assert_eq!(masked_lookup(llt, 0b00000011), (280, 8)); + assert_eq!(masked_lookup(llt, 0b11100011), (287, 8)); + + assert_eq!(masked_lookup(dt, 0), (0, 5)); + assert_eq!(masked_lookup(dt, 20), (5, 5)); + } + + fn check_result(input: &[u8], expected_status: TINFLStatus, expected_state: State, zlib: bool) { + let mut r = DecompressorOxide::default(); + let mut output_buf = vec![0; 1024 * 32]; + let flags = if zlib { + inflate_flags::TINFL_FLAG_PARSE_ZLIB_HEADER + } else { + 0 + } | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF + | TINFL_FLAG_HAS_MORE_INPUT; + let (d_status, _in_bytes, _out_bytes) = + decompress(&mut r, input, &mut output_buf, 0, flags); + assert_eq!(expected_status, d_status); + assert_eq!(expected_state, r.state); + } + + #[test] + fn bogus_input() { + use self::check_result as cr; + const F: TINFLStatus = TINFLStatus::Failed; + const OK: TINFLStatus = TINFLStatus::Done; + // Bad CM. + cr(&[0x77, 0x85], F, State::BadZlibHeader, true); + // Bad window size (but check is correct). + cr(&[0x88, 0x98], F, State::BadZlibHeader, true); + // Bad check bits. + cr(&[0x78, 0x98], F, State::BadZlibHeader, true); + + // Too many code lengths. (From inflate library issues) + cr( + b"M\xff\xffM*\xad\xad\xad\xad\xad\xad\xad\xcd\xcd\xcdM", + F, + State::BadTotalSymbols, + false, + ); + // Bad CLEN (also from inflate library issues) + cr( + b"\xdd\xff\xff*M\x94ffffffffff", + F, + State::BadTotalSymbols, + false, + ); + + // Port of inflate coverage tests from zlib-ng + // https://github.com/Dead2/zlib-ng/blob/develop/test/infcover.c + let c = |a, b, c| cr(a, b, c, false); + + // Invalid uncompressed/raw block length. + c(&[0, 0, 0, 0, 0], F, State::BadRawLength); + // Ok empty uncompressed block. + c(&[3, 0], OK, State::DoneForever); + // Invalid block type. + c(&[6], F, State::BlockTypeUnexpected); + // Ok uncompressed block. + c(&[1, 1, 0, 0xfe, 0xff, 0], OK, State::DoneForever); + // Too many litlens, we handle this later than zlib, so this test won't + // give the same result. + // c(&[0xfc, 0, 0], F, State::BadTotalSymbols); + // Invalid set of code lengths - TODO Check if this is the correct error for this. + c(&[4, 0, 0xfe, 0xff], F, State::BadTotalSymbols); + // Invalid repeat in list of code lengths. + // (Try to repeat a non-existant code.) + c(&[4, 0, 0x24, 0x49, 0], F, State::BadCodeSizeDistPrevLookup); + // Missing end of block code (should we have a separate error for this?) - fails on futher input + // c(&[4, 0, 0x24, 0xe9, 0xff, 0x6d], F, State::BadTotalSymbols); + // Invalid set of literals/lengths + c( + &[ + 4, 0x80, 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0x71, 0xff, 0xff, 0x93, 0x11, 0, + ], + F, + State::BadTotalSymbols, + ); + // Invalid set of distances _ needsmoreinput + // c(&[4, 0x80, 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0x0f, 0xb4, 0xff, 0xff, 0xc3, 0x84], F, State::BadTotalSymbols); + // Invalid distance code + c(&[2, 0x7e, 0xff, 0xff], F, State::InvalidDist); + + // Distance refers to position before the start + c( + &[0x0c, 0xc0, 0x81, 0, 0, 0, 0, 0, 0x90, 0xff, 0x6b, 0x4, 0], + F, + State::DistanceOutOfBounds, + ); + + // Trailer + // Bad gzip trailer checksum GZip header not handled by miniz_oxide + //cr(&[0x1f, 0x8b, 0x08 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0x03, 0, 0, 0, 0, 0x01], F, State::BadCRC, false) + // Bad gzip trailer length + //cr(&[0x1f, 0x8b, 0x08 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0x03, 0, 0, 0, 0, 0, 0, 0, 0, 0x01], F, State::BadCRC, false) + } + + #[test] + fn empty_output_buffer_non_wrapping() { + let encoded = [ + 120, 156, 243, 72, 205, 201, 201, 215, 81, 168, 202, 201, 76, 82, 4, 0, 27, 101, 4, 19, + ]; + let flags = TINFL_FLAG_COMPUTE_ADLER32 + | TINFL_FLAG_PARSE_ZLIB_HEADER + | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + let mut r = DecompressorOxide::new(); + let mut output_buf = vec![]; + // Check that we handle an empty buffer properly and not panicking. + // https://github.com/Frommi/miniz_oxide/issues/23 + let res = decompress(&mut r, &encoded, &mut output_buf, 0, flags); + assert_eq!(res, (TINFLStatus::HasMoreOutput, 4, 0)); + } + + #[test] + fn empty_output_buffer_wrapping() { + let encoded = [ + 0x73, 0x49, 0x4d, 0xcb, 0x49, 0x2c, 0x49, 0x55, 0x00, 0x11, 0x00, + ]; + let flags = TINFL_FLAG_COMPUTE_ADLER32; + let mut r = DecompressorOxide::new(); + let mut output_buf = vec![]; + // Check that we handle an empty buffer properly and not panicking. + // https://github.com/Frommi/miniz_oxide/issues/23 + let res = decompress(&mut r, &encoded, &mut output_buf, 0, flags); + assert_eq!(res, (TINFLStatus::HasMoreOutput, 2, 0)); + } +} diff --git a/third_party/rust/miniz_oxide/src/inflate/mod.rs b/third_party/rust/miniz_oxide/src/inflate/mod.rs new file mode 100644 index 0000000000..bb19e379cc --- /dev/null +++ b/third_party/rust/miniz_oxide/src/inflate/mod.rs @@ -0,0 +1,337 @@ +//! This module contains functionality for decompression. + +#[cfg(feature = "with-alloc")] +use crate::alloc::{boxed::Box, vec, vec::Vec}; +use ::core::usize; +#[cfg(all(feature = "std", feature = "with-alloc"))] +use std::error::Error; + +pub mod core; +mod output_buffer; +pub mod stream; +use self::core::*; + +const TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS: i32 = -4; +const TINFL_STATUS_BAD_PARAM: i32 = -3; +const TINFL_STATUS_ADLER32_MISMATCH: i32 = -2; +const TINFL_STATUS_FAILED: i32 = -1; +const TINFL_STATUS_DONE: i32 = 0; +const TINFL_STATUS_NEEDS_MORE_INPUT: i32 = 1; +const TINFL_STATUS_HAS_MORE_OUTPUT: i32 = 2; + +/// Return status codes. +#[repr(i8)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum TINFLStatus { + /// More input data was expected, but the caller indicated that there was no more data, so the + /// input stream is likely truncated. + /// + /// This can't happen if you have provided the + /// [`TINFL_FLAG_HAS_MORE_INPUT`][core::inflate_flags::TINFL_FLAG_HAS_MORE_INPUT] flag to the + /// decompression. By setting that flag, you indicate more input exists but is not provided, + /// and so reaching the end of the input data without finding the end of the compressed stream + /// would instead return a [`NeedsMoreInput`][Self::NeedsMoreInput] status. + FailedCannotMakeProgress = TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS as i8, + + /// The output buffer is an invalid size; consider the `flags` parameter. + BadParam = TINFL_STATUS_BAD_PARAM as i8, + + /// The decompression went fine, but the adler32 checksum did not match the one + /// provided in the header. + Adler32Mismatch = TINFL_STATUS_ADLER32_MISMATCH as i8, + + /// Failed to decompress due to invalid data. + Failed = TINFL_STATUS_FAILED as i8, + + /// Finished decompression without issues. + /// + /// This indicates the end of the compressed stream has been reached. + Done = TINFL_STATUS_DONE as i8, + + /// The decompressor needs more input data to continue decompressing. + /// + /// This occurs when there's no more consumable input, but the end of the stream hasn't been + /// reached, and you have supplied the + /// [`TINFL_FLAG_HAS_MORE_INPUT`][core::inflate_flags::TINFL_FLAG_HAS_MORE_INPUT] flag to the + /// decompressor. Had you not supplied that flag (which would mean you were asserting that you + /// believed all the data was available) you would have gotten a + /// [`FailedCannotMakeProcess`][Self::FailedCannotMakeProgress] instead. + NeedsMoreInput = TINFL_STATUS_NEEDS_MORE_INPUT as i8, + + /// There is still pending data that didn't fit in the output buffer. + HasMoreOutput = TINFL_STATUS_HAS_MORE_OUTPUT as i8, +} + +impl TINFLStatus { + pub fn from_i32(value: i32) -> Option<TINFLStatus> { + use self::TINFLStatus::*; + match value { + TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS => Some(FailedCannotMakeProgress), + TINFL_STATUS_BAD_PARAM => Some(BadParam), + TINFL_STATUS_ADLER32_MISMATCH => Some(Adler32Mismatch), + TINFL_STATUS_FAILED => Some(Failed), + TINFL_STATUS_DONE => Some(Done), + TINFL_STATUS_NEEDS_MORE_INPUT => Some(NeedsMoreInput), + TINFL_STATUS_HAS_MORE_OUTPUT => Some(HasMoreOutput), + _ => None, + } + } +} + +/// Struct return when decompress_to_vec functions fail. +#[cfg(feature = "with-alloc")] +#[derive(Debug)] +pub struct DecompressError { + /// Decompressor status on failure. See [TINFLStatus] for details. + pub status: TINFLStatus, + /// The currently decompressed data if any. + pub output: Vec<u8>, +} + +#[cfg(feature = "with-alloc")] +impl alloc::fmt::Display for DecompressError { + fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result { + f.write_str(match self.status { + TINFLStatus::FailedCannotMakeProgress => "Truncated input stream", + TINFLStatus::BadParam => "Invalid output buffer size", + TINFLStatus::Adler32Mismatch => "Adler32 checksum mismatch", + TINFLStatus::Failed => "Invalid input data", + TINFLStatus::Done => unreachable!(), + TINFLStatus::NeedsMoreInput => "Truncated input stream", + TINFLStatus::HasMoreOutput => "Output size exceeded the specified limit", + }) + } +} + +/// Implement Error trait only if std feature is requested as it requires std. +#[cfg(all(feature = "std", feature = "with-alloc"))] +impl Error for DecompressError {} + +#[cfg(feature = "with-alloc")] +fn decompress_error(status: TINFLStatus, output: Vec<u8>) -> Result<Vec<u8>, DecompressError> { + Err(DecompressError { status, output }) +} + +/// Decompress the deflate-encoded data in `input` to a vector. +/// +/// NOTE: This function will not bound the output, so if the output is large enough it can result in an out of memory error. +/// It is therefore suggested to not use this for anything other than test programs, use the functions with a specified limit, or +/// ideally streaming decompression via the [flate2](https://github.com/alexcrichton/flate2-rs) library instead. +/// +/// Returns a [`Result`] containing the [`Vec`] of decompressed data on success, and a [struct][DecompressError] containing the status and so far decompressed data if any on failure. +#[inline] +#[cfg(feature = "with-alloc")] +pub fn decompress_to_vec(input: &[u8]) -> Result<Vec<u8>, DecompressError> { + decompress_to_vec_inner(input, 0, usize::max_value()) +} + +/// Decompress the deflate-encoded data (with a zlib wrapper) in `input` to a vector. +/// +/// NOTE: This function will not bound the output, so if the output is large enough it can result in an out of memory error. +/// It is therefore suggested to not use this for anything other than test programs, use the functions with a specified limit, or +/// ideally streaming decompression via the [flate2](https://github.com/alexcrichton/flate2-rs) library instead. +/// +/// Returns a [`Result`] containing the [`Vec`] of decompressed data on success, and a [struct][DecompressError] containing the status and so far decompressed data if any on failure. +#[inline] +#[cfg(feature = "with-alloc")] +pub fn decompress_to_vec_zlib(input: &[u8]) -> Result<Vec<u8>, DecompressError> { + decompress_to_vec_inner( + input, + inflate_flags::TINFL_FLAG_PARSE_ZLIB_HEADER, + usize::max_value(), + ) +} + +/// Decompress the deflate-encoded data in `input` to a vector. +/// +/// The vector is grown to at most `max_size` bytes; if the data does not fit in that size, +/// the error [struct][DecompressError] will contain the status [`TINFLStatus::HasMoreOutput`] and the data that was decompressed on failure. +/// +/// As this function tries to decompress everything in one go, it's not ideal for general use outside of tests or where the output size is expected to be small. +/// It is suggested to use streaming decompression via the [flate2](https://github.com/alexcrichton/flate2-rs) library instead. +/// +/// Returns a [`Result`] containing the [`Vec`] of decompressed data on success, and a [struct][DecompressError] on failure. +#[inline] +#[cfg(feature = "with-alloc")] +pub fn decompress_to_vec_with_limit( + input: &[u8], + max_size: usize, +) -> Result<Vec<u8>, DecompressError> { + decompress_to_vec_inner(input, 0, max_size) +} + +/// Decompress the deflate-encoded data (with a zlib wrapper) in `input` to a vector. +/// The vector is grown to at most `max_size` bytes; if the data does not fit in that size, +/// the error [struct][DecompressError] will contain the status [`TINFLStatus::HasMoreOutput`] and the data that was decompressed on failure. +/// +/// As this function tries to decompress everything in one go, it's not ideal for general use outside of tests or where the output size is expected to be small. +/// It is suggested to use streaming decompression via the [flate2](https://github.com/alexcrichton/flate2-rs) library instead. +/// +/// Returns a [`Result`] containing the [`Vec`] of decompressed data on success, and a [struct][DecompressError] on failure. +#[inline] +#[cfg(feature = "with-alloc")] +pub fn decompress_to_vec_zlib_with_limit( + input: &[u8], + max_size: usize, +) -> Result<Vec<u8>, DecompressError> { + decompress_to_vec_inner(input, inflate_flags::TINFL_FLAG_PARSE_ZLIB_HEADER, max_size) +} + +/// Backend of various to-[`Vec`] decompressions. +/// +/// Returns [`Vec`] of decompressed data on success and the [error struct][DecompressError] with details on failure. +#[cfg(feature = "with-alloc")] +fn decompress_to_vec_inner( + input: &[u8], + flags: u32, + max_output_size: usize, +) -> Result<Vec<u8>, DecompressError> { + let flags = flags | inflate_flags::TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + let mut ret: Vec<u8> = vec![0; input.len().saturating_mul(2).min(max_output_size)]; + + let mut decomp = Box::<DecompressorOxide>::default(); + + let mut in_pos = 0; + let mut out_pos = 0; + loop { + // Wrap the whole output slice so we know we have enough of the + // decompressed data for matches. + let (status, in_consumed, out_consumed) = + decompress(&mut decomp, &input[in_pos..], &mut ret, out_pos, flags); + in_pos += in_consumed; + out_pos += out_consumed; + + match status { + TINFLStatus::Done => { + ret.truncate(out_pos); + return Ok(ret); + } + + TINFLStatus::HasMoreOutput => { + // if the buffer has already reached the size limit, return an error + if ret.len() >= max_output_size { + return decompress_error(TINFLStatus::HasMoreOutput, ret); + } + // calculate the new length, capped at `max_output_size` + let new_len = ret.len().saturating_mul(2).min(max_output_size); + ret.resize(new_len, 0); + } + + _ => return decompress_error(status, ret), + } + } +} + +/// Decompress one or more source slices from an iterator into the output slice. +/// +/// * On success, returns the number of bytes that were written. +/// * On failure, returns the failure status code. +/// +/// This will fail if the output buffer is not large enough, but in that case +/// the output buffer will still contain the partial decompression. +/// +/// * `out` the output buffer. +/// * `it` the iterator of input slices. +/// * `zlib_header` if the first slice out of the iterator is expected to have a +/// Zlib header. Otherwise the slices are assumed to be the deflate data only. +/// * `ignore_adler32` if the adler32 checksum should be calculated or not. +pub fn decompress_slice_iter_to_slice<'out, 'inp>( + out: &'out mut [u8], + it: impl Iterator<Item = &'inp [u8]>, + zlib_header: bool, + ignore_adler32: bool, +) -> Result<usize, TINFLStatus> { + use self::core::inflate_flags::*; + + let mut it = it.peekable(); + let r = &mut DecompressorOxide::new(); + let mut out_pos = 0; + while let Some(in_buf) = it.next() { + let has_more = it.peek().is_some(); + let flags = { + let mut f = TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + if zlib_header { + f |= TINFL_FLAG_PARSE_ZLIB_HEADER; + } + if ignore_adler32 { + f |= TINFL_FLAG_IGNORE_ADLER32; + } + if has_more { + f |= TINFL_FLAG_HAS_MORE_INPUT; + } + f + }; + let (status, _input_read, bytes_written) = decompress(r, in_buf, out, out_pos, flags); + out_pos += bytes_written; + match status { + TINFLStatus::NeedsMoreInput => continue, + TINFLStatus::Done => return Ok(out_pos), + e => return Err(e), + } + } + // If we ran out of source slices without getting a `Done` from the + // decompression we can call it a failure. + Err(TINFLStatus::FailedCannotMakeProgress) +} + +#[cfg(test)] +mod test { + use super::{ + decompress_slice_iter_to_slice, decompress_to_vec_zlib, decompress_to_vec_zlib_with_limit, + DecompressError, TINFLStatus, + }; + const ENCODED: [u8; 20] = [ + 120, 156, 243, 72, 205, 201, 201, 215, 81, 168, 202, 201, 76, 82, 4, 0, 27, 101, 4, 19, + ]; + + #[test] + fn decompress_vec() { + let res = decompress_to_vec_zlib(&ENCODED[..]).unwrap(); + assert_eq!(res.as_slice(), &b"Hello, zlib!"[..]); + } + + #[test] + fn decompress_vec_with_high_limit() { + let res = decompress_to_vec_zlib_with_limit(&ENCODED[..], 100_000).unwrap(); + assert_eq!(res.as_slice(), &b"Hello, zlib!"[..]); + } + + #[test] + fn fail_to_decompress_with_limit() { + let res = decompress_to_vec_zlib_with_limit(&ENCODED[..], 8); + match res { + Err(DecompressError { + status: TINFLStatus::HasMoreOutput, + .. + }) => (), // expected result + _ => panic!("Decompression output size limit was not enforced"), + } + } + + #[test] + fn test_decompress_slice_iter_to_slice() { + // one slice + let mut out = [0_u8; 12_usize]; + let r = + decompress_slice_iter_to_slice(&mut out, Some(&ENCODED[..]).into_iter(), true, false); + assert_eq!(r, Ok(12)); + assert_eq!(&out[..12], &b"Hello, zlib!"[..]); + + // some chunks at a time + for chunk_size in 1..13 { + // Note: because of https://github.com/Frommi/miniz_oxide/issues/110 our + // out buffer needs to have +1 byte available when the chunk size cuts + // the adler32 data off from the last actual data. + let mut out = [0_u8; 12_usize + 1]; + let r = + decompress_slice_iter_to_slice(&mut out, ENCODED.chunks(chunk_size), true, false); + assert_eq!(r, Ok(12)); + assert_eq!(&out[..12], &b"Hello, zlib!"[..]); + } + + // output buffer too small + let mut out = [0_u8; 3_usize]; + let r = decompress_slice_iter_to_slice(&mut out, ENCODED.chunks(7), true, false); + assert!(r.is_err()); + } +} diff --git a/third_party/rust/miniz_oxide/src/inflate/output_buffer.rs b/third_party/rust/miniz_oxide/src/inflate/output_buffer.rs new file mode 100644 index 0000000000..5218a807d3 --- /dev/null +++ b/third_party/rust/miniz_oxide/src/inflate/output_buffer.rs @@ -0,0 +1,60 @@ +/// A wrapper for the output slice used when decompressing. +/// +/// Using this rather than `Cursor` lets us implement the writing methods directly on +/// the buffer and lets us use a usize rather than u64 for the position which helps with +/// performance on 32-bit systems. +pub struct OutputBuffer<'a> { + slice: &'a mut [u8], + position: usize, +} + +impl<'a> OutputBuffer<'a> { + #[inline] + pub fn from_slice_and_pos(slice: &'a mut [u8], position: usize) -> OutputBuffer<'a> { + OutputBuffer { slice, position } + } + + #[inline] + pub const fn position(&self) -> usize { + self.position + } + + #[inline] + pub fn set_position(&mut self, position: usize) { + self.position = position; + } + + /// Write a byte to the current position and increment + /// + /// Assumes that there is space. + #[inline] + pub fn write_byte(&mut self, byte: u8) { + self.slice[self.position] = byte; + self.position += 1; + } + + /// Write a slice to the current position and increment + /// + /// Assumes that there is space. + #[inline] + pub fn write_slice(&mut self, data: &[u8]) { + let len = data.len(); + self.slice[self.position..self.position + len].copy_from_slice(data); + self.position += data.len(); + } + + #[inline] + pub const fn bytes_left(&self) -> usize { + self.slice.len() - self.position + } + + #[inline] + pub const fn get_ref(&self) -> &[u8] { + self.slice + } + + #[inline] + pub fn get_mut(&mut self) -> &mut [u8] { + self.slice + } +} diff --git a/third_party/rust/miniz_oxide/src/inflate/stream.rs b/third_party/rust/miniz_oxide/src/inflate/stream.rs new file mode 100644 index 0000000000..ee681b67bb --- /dev/null +++ b/third_party/rust/miniz_oxide/src/inflate/stream.rs @@ -0,0 +1,418 @@ +//! Extra streaming decompression functionality. +//! +//! As of now this is mainly intended for use to build a higher-level wrapper. +#[cfg(feature = "with-alloc")] +use crate::alloc::boxed::Box; +use core::{cmp, mem}; + +use crate::inflate::core::{decompress, inflate_flags, DecompressorOxide, TINFL_LZ_DICT_SIZE}; +use crate::inflate::TINFLStatus; +use crate::{DataFormat, MZError, MZFlush, MZResult, MZStatus, StreamResult}; + +/// Tag that determines reset policy of [InflateState](struct.InflateState.html) +pub trait ResetPolicy { + /// Performs reset + fn reset(&self, state: &mut InflateState); +} + +/// Resets state, without performing expensive ops (e.g. zeroing buffer) +/// +/// Note that not zeroing buffer can lead to security issues when dealing with untrusted input. +pub struct MinReset; + +impl ResetPolicy for MinReset { + fn reset(&self, state: &mut InflateState) { + state.decompressor().init(); + state.dict_ofs = 0; + state.dict_avail = 0; + state.first_call = true; + state.has_flushed = false; + state.last_status = TINFLStatus::NeedsMoreInput; + } +} + +/// Resets state and zero memory, continuing to use the same data format. +pub struct ZeroReset; + +impl ResetPolicy for ZeroReset { + #[inline] + fn reset(&self, state: &mut InflateState) { + MinReset.reset(state); + state.dict = [0; TINFL_LZ_DICT_SIZE]; + } +} + +/// Full reset of the state, including zeroing memory. +/// +/// Requires to provide new data format. +pub struct FullReset(pub DataFormat); + +impl ResetPolicy for FullReset { + #[inline] + fn reset(&self, state: &mut InflateState) { + ZeroReset.reset(state); + state.data_format = self.0; + } +} + +/// A struct that compbines a decompressor with extra data for streaming decompression. +/// +pub struct InflateState { + /// Inner decompressor struct + decomp: DecompressorOxide, + + /// Buffer of input bytes for matches. + /// TODO: Could probably do this a bit cleaner with some + /// Cursor-like class. + /// We may also look into whether we need to keep a buffer here, or just one in the + /// decompressor struct. + dict: [u8; TINFL_LZ_DICT_SIZE], + /// Where in the buffer are we currently at? + dict_ofs: usize, + /// How many bytes of data to be flushed is there currently in the buffer? + dict_avail: usize, + + first_call: bool, + has_flushed: bool, + + /// Whether the input data is wrapped in a zlib header and checksum. + /// TODO: This should be stored in the decompressor. + data_format: DataFormat, + last_status: TINFLStatus, +} + +impl Default for InflateState { + fn default() -> Self { + InflateState { + decomp: DecompressorOxide::default(), + dict: [0; TINFL_LZ_DICT_SIZE], + dict_ofs: 0, + dict_avail: 0, + first_call: true, + has_flushed: false, + data_format: DataFormat::Raw, + last_status: TINFLStatus::NeedsMoreInput, + } + } +} +impl InflateState { + /// Create a new state. + /// + /// Note that this struct is quite large due to internal buffers, and as such storing it on + /// the stack is not recommended. + /// + /// # Parameters + /// `data_format`: Determines whether the compressed data is assumed to wrapped with zlib + /// metadata. + pub fn new(data_format: DataFormat) -> InflateState { + InflateState { + data_format, + ..Default::default() + } + } + + /// Create a new state on the heap. + /// + /// # Parameters + /// `data_format`: Determines whether the compressed data is assumed to wrapped with zlib + /// metadata. + #[cfg(feature = "with-alloc")] + pub fn new_boxed(data_format: DataFormat) -> Box<InflateState> { + let mut b: Box<InflateState> = Box::default(); + b.data_format = data_format; + b + } + + /// Access the innner decompressor. + pub fn decompressor(&mut self) -> &mut DecompressorOxide { + &mut self.decomp + } + + /// Return the status of the last call to `inflate` with this `InflateState`. + pub const fn last_status(&self) -> TINFLStatus { + self.last_status + } + + /// Create a new state using miniz/zlib style window bits parameter. + /// + /// The decompressor does not support different window sizes. As such, + /// any positive (>0) value will set the zlib header flag, while a negative one + /// will not. + #[cfg(feature = "with-alloc")] + pub fn new_boxed_with_window_bits(window_bits: i32) -> Box<InflateState> { + let mut b: Box<InflateState> = Box::default(); + b.data_format = DataFormat::from_window_bits(window_bits); + b + } + + #[inline] + /// Reset the decompressor without re-allocating memory, using the given + /// data format. + pub fn reset(&mut self, data_format: DataFormat) { + self.reset_as(FullReset(data_format)); + } + + #[inline] + /// Resets the state according to specified policy. + pub fn reset_as<T: ResetPolicy>(&mut self, policy: T) { + policy.reset(self) + } +} + +/// Try to decompress from `input` to `output` with the given [`InflateState`] +/// +/// # `flush` +/// +/// Generally, the various [`MZFlush`] flags have meaning only on the compression side. They can be +/// supplied here, but the only one that has any semantic meaning is [`MZFlush::Finish`], which is a +/// signal that the stream is expected to finish, and failing to do so is an error. It isn't +/// necessary to specify it when the stream ends; you'll still get returned a +/// [`MZStatus::StreamEnd`] anyway. Other values either have no effect or cause errors. It's +/// likely that you'll almost always just want to use [`MZFlush::None`]. +/// +/// # Errors +/// +/// Returns [`MZError::Buf`] if the size of the `output` slice is empty or no progress was made due +/// to lack of expected input data, or if called with [`MZFlush::Finish`] and input wasn't all +/// consumed. +/// +/// Returns [`MZError::Data`] if this or a a previous call failed with an error return from +/// [`TINFLStatus`]; probably indicates corrupted data. +/// +/// Returns [`MZError::Stream`] when called with [`MZFlush::Full`] (meaningless on +/// decompression), or when called without [`MZFlush::Finish`] after an earlier call with +/// [`MZFlush::Finish`] has been made. +pub fn inflate( + state: &mut InflateState, + input: &[u8], + output: &mut [u8], + flush: MZFlush, +) -> StreamResult { + let mut bytes_consumed = 0; + let mut bytes_written = 0; + let mut next_in = input; + let mut next_out = output; + + if flush == MZFlush::Full { + return StreamResult::error(MZError::Stream); + } + + let mut decomp_flags = if state.data_format == DataFormat::Zlib { + inflate_flags::TINFL_FLAG_COMPUTE_ADLER32 + } else { + inflate_flags::TINFL_FLAG_IGNORE_ADLER32 + }; + + if (state.data_format == DataFormat::Zlib) + | (state.data_format == DataFormat::ZLibIgnoreChecksum) + { + decomp_flags |= inflate_flags::TINFL_FLAG_PARSE_ZLIB_HEADER; + } + + let first_call = state.first_call; + state.first_call = false; + if (state.last_status as i32) < 0 { + return StreamResult::error(MZError::Data); + } + + if state.has_flushed && (flush != MZFlush::Finish) { + return StreamResult::error(MZError::Stream); + } + state.has_flushed |= flush == MZFlush::Finish; + + if (flush == MZFlush::Finish) && first_call { + decomp_flags |= inflate_flags::TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + + let status = decompress(&mut state.decomp, next_in, next_out, 0, decomp_flags); + let in_bytes = status.1; + let out_bytes = status.2; + let status = status.0; + + state.last_status = status; + + bytes_consumed += in_bytes; + bytes_written += out_bytes; + + let ret_status = { + if (status as i32) < 0 { + Err(MZError::Data) + } else if status != TINFLStatus::Done { + state.last_status = TINFLStatus::Failed; + Err(MZError::Buf) + } else { + Ok(MZStatus::StreamEnd) + } + }; + return StreamResult { + bytes_consumed, + bytes_written, + status: ret_status, + }; + } + + if flush != MZFlush::Finish { + decomp_flags |= inflate_flags::TINFL_FLAG_HAS_MORE_INPUT; + } + + if state.dict_avail != 0 { + bytes_written += push_dict_out(state, &mut next_out); + return StreamResult { + bytes_consumed, + bytes_written, + status: Ok( + if (state.last_status == TINFLStatus::Done) && (state.dict_avail == 0) { + MZStatus::StreamEnd + } else { + MZStatus::Ok + }, + ), + }; + } + + let status = inflate_loop( + state, + &mut next_in, + &mut next_out, + &mut bytes_consumed, + &mut bytes_written, + decomp_flags, + flush, + ); + StreamResult { + bytes_consumed, + bytes_written, + status, + } +} + +fn inflate_loop( + state: &mut InflateState, + next_in: &mut &[u8], + next_out: &mut &mut [u8], + total_in: &mut usize, + total_out: &mut usize, + decomp_flags: u32, + flush: MZFlush, +) -> MZResult { + let orig_in_len = next_in.len(); + loop { + let status = decompress( + &mut state.decomp, + *next_in, + &mut state.dict, + state.dict_ofs, + decomp_flags, + ); + + let in_bytes = status.1; + let out_bytes = status.2; + let status = status.0; + + state.last_status = status; + + *next_in = &next_in[in_bytes..]; + *total_in += in_bytes; + + state.dict_avail = out_bytes; + *total_out += push_dict_out(state, next_out); + + // The stream was corrupted, and decompression failed. + if (status as i32) < 0 { + return Err(MZError::Data); + } + + // The decompressor has flushed all it's data and is waiting for more input, but + // there was no more input provided. + if (status == TINFLStatus::NeedsMoreInput) && orig_in_len == 0 { + return Err(MZError::Buf); + } + + if flush == MZFlush::Finish { + if status == TINFLStatus::Done { + // There is not enough space in the output buffer to flush the remaining + // decompressed data in the internal buffer. + return if state.dict_avail != 0 { + Err(MZError::Buf) + } else { + Ok(MZStatus::StreamEnd) + }; + // No more space in the output buffer, but we're not done. + } else if next_out.is_empty() { + return Err(MZError::Buf); + } + } else { + // We're not expected to finish, so it's fine if we can't flush everything yet. + let empty_buf = next_in.is_empty() || next_out.is_empty(); + if (status == TINFLStatus::Done) || empty_buf || (state.dict_avail != 0) { + return if (status == TINFLStatus::Done) && (state.dict_avail == 0) { + // No more data left, we're done. + Ok(MZStatus::StreamEnd) + } else { + // Ok for now, still waiting for more input data or output space. + Ok(MZStatus::Ok) + }; + } + } + } +} + +fn push_dict_out(state: &mut InflateState, next_out: &mut &mut [u8]) -> usize { + let n = cmp::min(state.dict_avail as usize, next_out.len()); + (next_out[..n]).copy_from_slice(&state.dict[state.dict_ofs..state.dict_ofs + n]); + *next_out = &mut mem::take(next_out)[n..]; + state.dict_avail -= n; + state.dict_ofs = (state.dict_ofs + (n)) & (TINFL_LZ_DICT_SIZE - 1); + n +} + +#[cfg(test)] +mod test { + use super::{inflate, InflateState}; + use crate::{DataFormat, MZFlush, MZStatus}; + use alloc::vec; + + #[test] + fn test_state() { + let encoded = [ + 120u8, 156, 243, 72, 205, 201, 201, 215, 81, 168, 202, 201, 76, 82, 4, 0, 27, 101, 4, + 19, + ]; + let mut out = vec![0; 50]; + let mut state = InflateState::new_boxed(DataFormat::Zlib); + let res = inflate(&mut state, &encoded, &mut out, MZFlush::Finish); + let status = res.status.expect("Failed to decompress!"); + assert_eq!(status, MZStatus::StreamEnd); + assert_eq!(out[..res.bytes_written as usize], b"Hello, zlib!"[..]); + assert_eq!(res.bytes_consumed, encoded.len()); + + state.reset_as(super::ZeroReset); + out.iter_mut().map(|x| *x = 0).count(); + let res = inflate(&mut state, &encoded, &mut out, MZFlush::Finish); + let status = res.status.expect("Failed to decompress!"); + assert_eq!(status, MZStatus::StreamEnd); + assert_eq!(out[..res.bytes_written as usize], b"Hello, zlib!"[..]); + assert_eq!(res.bytes_consumed, encoded.len()); + + state.reset_as(super::MinReset); + out.iter_mut().map(|x| *x = 0).count(); + let res = inflate(&mut state, &encoded, &mut out, MZFlush::Finish); + let status = res.status.expect("Failed to decompress!"); + assert_eq!(status, MZStatus::StreamEnd); + assert_eq!(out[..res.bytes_written as usize], b"Hello, zlib!"[..]); + assert_eq!(res.bytes_consumed, encoded.len()); + assert_eq!(state.decompressor().adler32(), Some(459605011)); + + // Test state when not computing adler. + state = InflateState::new_boxed(DataFormat::ZLibIgnoreChecksum); + out.iter_mut().map(|x| *x = 0).count(); + let res = inflate(&mut state, &encoded, &mut out, MZFlush::Finish); + let status = res.status.expect("Failed to decompress!"); + assert_eq!(status, MZStatus::StreamEnd); + assert_eq!(out[..res.bytes_written as usize], b"Hello, zlib!"[..]); + assert_eq!(res.bytes_consumed, encoded.len()); + // Not computed, so should be Some(1) + assert_eq!(state.decompressor().adler32(), Some(1)); + // Should still have the checksum read from the header file. + assert_eq!(state.decompressor().adler32_header(), Some(459605011)) + } +} diff --git a/third_party/rust/miniz_oxide/src/lib.rs b/third_party/rust/miniz_oxide/src/lib.rs new file mode 100644 index 0000000000..fd64932b0b --- /dev/null +++ b/third_party/rust/miniz_oxide/src/lib.rs @@ -0,0 +1,210 @@ +//! A pure rust replacement for the [miniz](https://github.com/richgel999/miniz) +//! DEFLATE/zlib encoder/decoder. +//! The plan for this crate is to be used as a back-end for the +//! [flate2](https://github.com/alexcrichton/flate2-rs) crate and eventually remove the +//! need to depend on a C library. +//! +//! # Usage +//! ## Simple compression/decompression: +//! ``` rust +//! +//! use miniz_oxide::inflate::decompress_to_vec; +//! use miniz_oxide::deflate::compress_to_vec; +//! +//! fn roundtrip(data: &[u8]) { +//! let compressed = compress_to_vec(data, 6); +//! let decompressed = decompress_to_vec(compressed.as_slice()).expect("Failed to decompress!"); +//! # let _ = decompressed; +//! } +//! +//! # roundtrip(b"Test_data test data lalalal blabla"); +//! +//! ``` + +#![forbid(unsafe_code)] +#![cfg_attr(not(feature = "std"), no_std)] + +#[cfg(feature = "with-alloc")] +extern crate alloc; + +#[cfg(feature = "with-alloc")] +pub mod deflate; +pub mod inflate; +mod shared; + +pub use crate::shared::update_adler32 as mz_adler32_oxide; +pub use crate::shared::{MZ_ADLER32_INIT, MZ_DEFAULT_WINDOW_BITS}; + +/// A list of flush types. +/// +/// See <http://www.bolet.org/~pornin/deflate-flush.html> for more in-depth info. +#[repr(i32)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum MZFlush { + /// Don't force any flushing. + /// Used when more input data is expected. + None = 0, + /// Zlib partial flush. + /// Currently treated as [`Sync`]. + Partial = 1, + /// Finish compressing the currently buffered data, and output an empty raw block. + /// Has no use in decompression. + Sync = 2, + /// Same as [`Sync`], but resets the compression dictionary so that further compressed + /// data does not depend on data compressed before the flush. + /// + /// Has no use in decompression, and is an error to supply in that case. + Full = 3, + /// Attempt to flush the remaining data and end the stream. + Finish = 4, + /// Not implemented. + Block = 5, +} + +impl MZFlush { + /// Create an MZFlush value from an integer value. + /// + /// Returns `MZError::Param` on invalid values. + pub fn new(flush: i32) -> Result<Self, MZError> { + match flush { + 0 => Ok(MZFlush::None), + 1 | 2 => Ok(MZFlush::Sync), + 3 => Ok(MZFlush::Full), + 4 => Ok(MZFlush::Finish), + _ => Err(MZError::Param), + } + } +} + +/// A list of miniz successful status codes. +/// +/// These are emitted as the [`Ok`] side of a [`MZResult`] in the [`StreamResult`] returned from +/// [`deflate::stream::deflate()`] or [`inflate::stream::inflate()`]. +#[repr(i32)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum MZStatus { + /// Operation succeeded. + /// + /// Some data was decompressed or compressed; see the byte counters in the [`StreamResult`] for + /// details. + Ok = 0, + + /// Operation succeeded and end of deflate stream was found. + /// + /// X-ref [`TINFLStatus::Done`][inflate::TINFLStatus::Done] or + /// [`TDEFLStatus::Done`][deflate::core::TDEFLStatus::Done] for `inflate` or `deflate` + /// respectively. + StreamEnd = 1, + + /// Unused + NeedDict = 2, +} + +/// A list of miniz failed status codes. +/// +/// These are emitted as the [`Err`] side of a [`MZResult`] in the [`StreamResult`] returned from +/// [`deflate::stream::deflate()`] or [`inflate::stream::inflate()`]. +#[repr(i32)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum MZError { + /// Unused + ErrNo = -1, + + /// General stream error. + /// + /// See [`inflate::stream::inflate()`] docs for details of how it can occur there. + /// + /// See [`deflate::stream::deflate()`] docs for how it can in principle occur there, though it's + /// believed impossible in practice. + Stream = -2, + + /// Error in inflation; see [`inflate::stream::inflate()`] for details. + /// + /// Not returned from [`deflate::stream::deflate()`]. + Data = -3, + + /// Unused + Mem = -4, + + /// Buffer-related error. + /// + /// See the docs of [`deflate::stream::deflate()`] or [`inflate::stream::inflate()`] for details + /// of when it would trigger in the one you're using. + Buf = -5, + + /// Unused + Version = -6, + + /// Bad parameters. + /// + /// This can be returned from [`deflate::stream::deflate()`] in the case of bad parameters. See + /// [`TDEFLStatus::BadParam`][deflate::core::TDEFLStatus::BadParam]. + Param = -10_000, +} + +/// How compressed data is wrapped. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum DataFormat { + /// Wrapped using the [zlib](http://www.zlib.org/rfc-zlib.html) format. + Zlib, + /// Zlib wrapped but ignore and don't compute the adler32 checksum. + /// Currently only used for inflate, behaves the same as Zlib for compression. + ZLibIgnoreChecksum, + /// Raw DEFLATE. + Raw, +} + +impl DataFormat { + pub fn from_window_bits(window_bits: i32) -> DataFormat { + if window_bits > 0 { + DataFormat::Zlib + } else { + DataFormat::Raw + } + } + + pub fn to_window_bits(self) -> i32 { + match self { + DataFormat::Zlib | DataFormat::ZLibIgnoreChecksum => shared::MZ_DEFAULT_WINDOW_BITS, + DataFormat::Raw => -shared::MZ_DEFAULT_WINDOW_BITS, + } + } +} + +/// `Result` alias for all miniz status codes both successful and failed. +pub type MZResult = Result<MZStatus, MZError>; + +/// A structure containg the result of a call to the inflate or deflate streaming functions. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct StreamResult { + /// The number of bytes consumed from the input slice. + pub bytes_consumed: usize, + /// The number of bytes written to the output slice. + pub bytes_written: usize, + /// The return status of the call. + pub status: MZResult, +} + +impl StreamResult { + #[inline] + pub const fn error(error: MZError) -> StreamResult { + StreamResult { + bytes_consumed: 0, + bytes_written: 0, + status: Err(error), + } + } +} + +impl core::convert::From<StreamResult> for MZResult { + fn from(res: StreamResult) -> Self { + res.status + } +} + +impl core::convert::From<&StreamResult> for MZResult { + fn from(res: &StreamResult) -> Self { + res.status + } +} diff --git a/third_party/rust/miniz_oxide/src/shared.rs b/third_party/rust/miniz_oxide/src/shared.rs new file mode 100644 index 0000000000..8b81fb112b --- /dev/null +++ b/third_party/rust/miniz_oxide/src/shared.rs @@ -0,0 +1,25 @@ +#[doc(hidden)] +pub const MZ_ADLER32_INIT: u32 = 1; + +#[doc(hidden)] +pub const MZ_DEFAULT_WINDOW_BITS: i32 = 15; + +pub const HUFFMAN_LENGTH_ORDER: [u8; 19] = [ + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15, +]; + +#[doc(hidden)] +#[cfg(not(feature = "simd"))] +pub fn update_adler32(adler: u32, data: &[u8]) -> u32 { + let mut hash = adler::Adler32::from_checksum(adler); + hash.write_slice(data); + hash.checksum() +} + +#[doc(hidden)] +#[cfg(feature = "simd")] +pub fn update_adler32(adler: u32, data: &[u8]) -> u32 { + let mut hash = simd_adler32::Adler32::from_checksum(adler); + hash.write(data); + hash.finish() +} |