diff options
Diffstat (limited to 'third_party/rust/miniz_oxide/src/inflate')
-rw-r--r-- | third_party/rust/miniz_oxide/src/inflate/core.rs | 1932 | ||||
-rw-r--r-- | third_party/rust/miniz_oxide/src/inflate/mod.rs | 337 | ||||
-rw-r--r-- | third_party/rust/miniz_oxide/src/inflate/output_buffer.rs | 60 | ||||
-rw-r--r-- | third_party/rust/miniz_oxide/src/inflate/stream.rs | 418 |
4 files changed, 2747 insertions, 0 deletions
diff --git a/third_party/rust/miniz_oxide/src/inflate/core.rs b/third_party/rust/miniz_oxide/src/inflate/core.rs new file mode 100644 index 0000000000..630e5e6fdb --- /dev/null +++ b/third_party/rust/miniz_oxide/src/inflate/core.rs @@ -0,0 +1,1932 @@ +//! Streaming decompression functionality. + +use super::*; +use crate::shared::{update_adler32, HUFFMAN_LENGTH_ORDER}; + +use ::core::convert::TryInto; +use ::core::{cmp, slice}; + +use self::output_buffer::OutputBuffer; + +pub const TINFL_LZ_DICT_SIZE: usize = 32_768; + +/// A struct containing huffman code lengths and the huffman code tree used by the decompressor. +struct HuffmanTable { + /// Length of the code at each index. + pub code_size: [u8; MAX_HUFF_SYMBOLS_0], + /// Fast lookup table for shorter huffman codes. + /// + /// See `HuffmanTable::fast_lookup`. + pub look_up: [i16; FAST_LOOKUP_SIZE as usize], + /// Full huffman tree. + /// + /// Positive values are edge nodes/symbols, negative values are + /// parent nodes/references to other nodes. + pub tree: [i16; MAX_HUFF_TREE_SIZE], +} + +impl HuffmanTable { + const fn new() -> HuffmanTable { + HuffmanTable { + code_size: [0; MAX_HUFF_SYMBOLS_0], + look_up: [0; FAST_LOOKUP_SIZE as usize], + tree: [0; MAX_HUFF_TREE_SIZE], + } + } + + /// Look for a symbol in the fast lookup table. + /// The symbol is stored in the lower 9 bits, the length in the next 6. + /// If the returned value is negative, the code wasn't found in the + /// fast lookup table and the full tree has to be traversed to find the code. + #[inline] + fn fast_lookup(&self, bit_buf: BitBuffer) -> i16 { + self.look_up[(bit_buf & BitBuffer::from(FAST_LOOKUP_SIZE - 1)) as usize] + } + + /// Get the symbol and the code length from the huffman tree. + #[inline] + fn tree_lookup(&self, fast_symbol: i32, bit_buf: BitBuffer, mut code_len: u32) -> (i32, u32) { + let mut symbol = fast_symbol; + // We step through the tree until we encounter a positive value, which indicates a + // symbol. + loop { + // symbol here indicates the position of the left (0) node, if the next bit is 1 + // we add 1 to the lookup position to get the right node. + symbol = i32::from(self.tree[(!symbol + ((bit_buf >> code_len) & 1) as i32) as usize]); + code_len += 1; + if symbol >= 0 { + break; + } + } + (symbol, code_len) + } + + #[inline] + /// Look up a symbol and code length from the bits in the provided bit buffer. + /// + /// Returns Some(symbol, length) on success, + /// None if the length is 0. + /// + /// It's possible we could avoid checking for 0 if we can guarantee a sane table. + /// TODO: Check if a smaller type for code_len helps performance. + fn lookup(&self, bit_buf: BitBuffer) -> Option<(i32, u32)> { + let symbol = self.fast_lookup(bit_buf).into(); + if symbol >= 0 { + if (symbol >> 9) as u32 != 0 { + Some((symbol, (symbol >> 9) as u32)) + } else { + // Zero-length code. + None + } + } else { + // We didn't get a symbol from the fast lookup table, so check the tree instead. + Some(self.tree_lookup(symbol, bit_buf, FAST_LOOKUP_BITS.into())) + } + } +} + +/// The number of huffman tables used. +const MAX_HUFF_TABLES: usize = 3; +/// The length of the first (literal/length) huffman table. +const MAX_HUFF_SYMBOLS_0: usize = 288; +/// The length of the second (distance) huffman table. +const MAX_HUFF_SYMBOLS_1: usize = 32; +/// The length of the last (huffman code length) huffman table. +const _MAX_HUFF_SYMBOLS_2: usize = 19; +/// The maximum length of a code that can be looked up in the fast lookup table. +const FAST_LOOKUP_BITS: u8 = 10; +/// The size of the fast lookup table. +const FAST_LOOKUP_SIZE: u32 = 1 << FAST_LOOKUP_BITS; +const MAX_HUFF_TREE_SIZE: usize = MAX_HUFF_SYMBOLS_0 * 2; +const LITLEN_TABLE: usize = 0; +const DIST_TABLE: usize = 1; +const HUFFLEN_TABLE: usize = 2; + +/// Flags to [`decompress()`] to control how inflation works. +/// +/// These define bits for a bitmask argument. +pub mod inflate_flags { + /// Should we try to parse a zlib header? + /// + /// If unset, the function will expect an RFC1951 deflate stream. If set, it will expect a + /// RFC1950 zlib wrapper around the deflate stream. + pub const TINFL_FLAG_PARSE_ZLIB_HEADER: u32 = 1; + + /// There will be more input that hasn't been given to the decompressor yet. + /// + /// This is useful when you want to decompress what you have so far, + /// even if you know there is probably more input that hasn't gotten here yet (_e.g._, over a + /// network connection). When [`decompress()`][super::decompress] reaches the end of the input + /// without finding the end of the compressed stream, it will return + /// [`TINFLStatus::NeedsMoreInput`][super::TINFLStatus::NeedsMoreInput] if this is set, + /// indicating that you should get more data before calling again. If not set, it will return + /// [`TINFLStatus::FailedCannotMakeProgress`][super::TINFLStatus::FailedCannotMakeProgress] + /// suggesting the stream is corrupt, since you claimed it was all there. + pub const TINFL_FLAG_HAS_MORE_INPUT: u32 = 2; + + /// The output buffer should not wrap around. + pub const TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: u32 = 4; + + /// Calculate the adler32 checksum of the output data even if we're not inflating a zlib stream. + /// + /// If [`TINFL_FLAG_IGNORE_ADLER32`] is specified, it will override this. + /// + /// NOTE: Enabling/disabling this between calls to decompress will result in an incorect + /// checksum. + pub const TINFL_FLAG_COMPUTE_ADLER32: u32 = 8; + + /// Ignore adler32 checksum even if we are inflating a zlib stream. + /// + /// Overrides [`TINFL_FLAG_COMPUTE_ADLER32`] if both are enabled. + /// + /// NOTE: This flag does not exist in miniz as it does not support this and is a + /// custom addition for miniz_oxide. + /// + /// NOTE: Should not be changed from enabled to disabled after decompression has started, + /// this will result in checksum failure (outside the unlikely event where the checksum happens + /// to match anyway). + pub const TINFL_FLAG_IGNORE_ADLER32: u32 = 64; +} + +use self::inflate_flags::*; + +const MIN_TABLE_SIZES: [u16; 3] = [257, 1, 4]; + +#[cfg(target_pointer_width = "64")] +type BitBuffer = u64; + +#[cfg(not(target_pointer_width = "64"))] +type BitBuffer = u32; + +/// Main decompression struct. +/// +pub struct DecompressorOxide { + /// Current state of the decompressor. + state: core::State, + /// Number of bits in the bit buffer. + num_bits: u32, + /// Zlib CMF + z_header0: u32, + /// Zlib FLG + z_header1: u32, + /// Adler32 checksum from the zlib header. + z_adler32: u32, + /// 1 if the current block is the last block, 0 otherwise. + finish: u32, + /// The type of the current block. + block_type: u32, + /// 1 if the adler32 value should be checked. + check_adler32: u32, + /// Last match distance. + dist: u32, + /// Variable used for match length, symbols, and a number of other things. + counter: u32, + /// Number of extra bits for the last length or distance code. + num_extra: u32, + /// Number of entries in each huffman table. + table_sizes: [u32; MAX_HUFF_TABLES], + /// Buffer of input data. + bit_buf: BitBuffer, + /// Huffman tables. + tables: [HuffmanTable; MAX_HUFF_TABLES], + /// Raw block header. + raw_header: [u8; 4], + /// Huffman length codes. + len_codes: [u8; MAX_HUFF_SYMBOLS_0 + MAX_HUFF_SYMBOLS_1 + 137], +} + +impl DecompressorOxide { + /// Create a new tinfl_decompressor with all fields set to 0. + pub fn new() -> DecompressorOxide { + DecompressorOxide::default() + } + + /// Set the current state to `Start`. + #[inline] + pub fn init(&mut self) { + // The rest of the data is reset or overwritten when used. + self.state = core::State::Start; + } + + /// Returns the adler32 checksum of the currently decompressed data. + /// Note: Will return Some(1) if decompressing zlib but ignoring adler32. + #[inline] + pub fn adler32(&self) -> Option<u32> { + if self.state != State::Start && !self.state.is_failure() && self.z_header0 != 0 { + Some(self.check_adler32) + } else { + None + } + } + + /// Returns the adler32 that was read from the zlib header if it exists. + #[inline] + pub fn adler32_header(&self) -> Option<u32> { + if self.state != State::Start && self.state != State::BadZlibHeader && self.z_header0 != 0 { + Some(self.z_adler32) + } else { + None + } + } +} + +impl Default for DecompressorOxide { + /// Create a new tinfl_decompressor with all fields set to 0. + #[inline(always)] + fn default() -> Self { + DecompressorOxide { + state: core::State::Start, + num_bits: 0, + z_header0: 0, + z_header1: 0, + z_adler32: 0, + finish: 0, + block_type: 0, + check_adler32: 0, + dist: 0, + counter: 0, + num_extra: 0, + table_sizes: [0; MAX_HUFF_TABLES], + bit_buf: 0, + // TODO:(oyvindln) Check that copies here are optimized out in release mode. + tables: [ + HuffmanTable::new(), + HuffmanTable::new(), + HuffmanTable::new(), + ], + raw_header: [0; 4], + len_codes: [0; MAX_HUFF_SYMBOLS_0 + MAX_HUFF_SYMBOLS_1 + 137], + } + } +} + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +enum State { + Start = 0, + ReadZlibCmf, + ReadZlibFlg, + ReadBlockHeader, + BlockTypeNoCompression, + RawHeader, + RawMemcpy1, + RawMemcpy2, + ReadTableSizes, + ReadHufflenTableCodeSize, + ReadLitlenDistTablesCodeSize, + ReadExtraBitsCodeSize, + DecodeLitlen, + WriteSymbol, + ReadExtraBitsLitlen, + DecodeDistance, + ReadExtraBitsDistance, + RawReadFirstByte, + RawStoreFirstByte, + WriteLenBytesToEnd, + BlockDone, + HuffDecodeOuterLoop1, + HuffDecodeOuterLoop2, + ReadAdler32, + + DoneForever, + + // Failure states. + BlockTypeUnexpected, + BadCodeSizeSum, + BadTotalSymbols, + BadZlibHeader, + DistanceOutOfBounds, + BadRawLength, + BadCodeSizeDistPrevLookup, + InvalidLitlen, + InvalidDist, + InvalidCodeLen, +} + +impl State { + fn is_failure(self) -> bool { + match self { + BlockTypeUnexpected => true, + BadCodeSizeSum => true, + BadTotalSymbols => true, + BadZlibHeader => true, + DistanceOutOfBounds => true, + BadRawLength => true, + BadCodeSizeDistPrevLookup => true, + InvalidLitlen => true, + InvalidDist => true, + _ => false, + } + } + + #[inline] + fn begin(&mut self, new_state: State) { + *self = new_state; + } +} + +use self::State::*; + +// Not sure why miniz uses 32-bit values for these, maybe alignment/cache again? +// # Optimization +// We add a extra value at the end and make the tables 32 elements long +// so we can use a mask to avoid bounds checks. +// The invalid values are set to something high enough to avoid underflowing +// the match length. +/// Base length for each length code. +/// +/// The base is used together with the value of the extra bits to decode the actual +/// length/distance values in a match. +#[rustfmt::skip] +const LENGTH_BASE: [u16; 32] = [ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 512, 512, 512 +]; + +/// Number of extra bits for each length code. +#[rustfmt::skip] +const LENGTH_EXTRA: [u8; 32] = [ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0, 0 +]; + +/// Base length for each distance code. +#[rustfmt::skip] +const DIST_BASE: [u16; 32] = [ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, + 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, + 2049, 3073, 4097, 6145, 8193, 12_289, 16_385, 24_577, 32_768, 32_768 +]; + +/// Number of extra bits for each distance code. +#[rustfmt::skip] +const DIST_EXTRA: [u8; 32] = [ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 13, 13 +]; + +/// The mask used when indexing the base/extra arrays. +const BASE_EXTRA_MASK: usize = 32 - 1; + +/// Sets the value of all the elements of the slice to `val`. +#[inline] +fn memset<T: Copy>(slice: &mut [T], val: T) { + for x in slice { + *x = val + } +} + +/// Read an le u16 value from the slice iterator. +/// +/// # Panics +/// Panics if there are less than two bytes left. +#[inline] +fn read_u16_le(iter: &mut slice::Iter<u8>) -> u16 { + let ret = { + let two_bytes = iter.as_ref()[..2].try_into().unwrap(); + u16::from_le_bytes(two_bytes) + }; + iter.nth(1); + ret +} + +/// Read an le u32 value from the slice iterator. +/// +/// # Panics +/// Panics if there are less than four bytes left. +#[inline(always)] +#[cfg(target_pointer_width = "64")] +fn read_u32_le(iter: &mut slice::Iter<u8>) -> u32 { + let ret = { + let four_bytes: [u8; 4] = iter.as_ref()[..4].try_into().unwrap(); + u32::from_le_bytes(four_bytes) + }; + iter.nth(3); + ret +} + +/// Ensure that there is data in the bit buffer. +/// +/// On 64-bit platform, we use a 64-bit value so this will +/// result in there being at least 32 bits in the bit buffer. +/// This function assumes that there is at least 4 bytes left in the input buffer. +#[inline(always)] +#[cfg(target_pointer_width = "64")] +fn fill_bit_buffer(l: &mut LocalVars, in_iter: &mut slice::Iter<u8>) { + // Read four bytes into the buffer at once. + if l.num_bits < 30 { + l.bit_buf |= BitBuffer::from(read_u32_le(in_iter)) << l.num_bits; + l.num_bits += 32; + } +} + +/// Same as previous, but for non-64-bit platforms. +/// Ensures at least 16 bits are present, requires at least 2 bytes in the in buffer. +#[inline(always)] +#[cfg(not(target_pointer_width = "64"))] +fn fill_bit_buffer(l: &mut LocalVars, in_iter: &mut slice::Iter<u8>) { + // If the buffer is 32-bit wide, read 2 bytes instead. + if l.num_bits < 15 { + l.bit_buf |= BitBuffer::from(read_u16_le(in_iter)) << l.num_bits; + l.num_bits += 16; + } +} + +/// Check that the zlib header is correct and that there is enough space in the buffer +/// for the window size specified in the header. +/// +/// See https://tools.ietf.org/html/rfc1950 +#[inline] +fn validate_zlib_header(cmf: u32, flg: u32, flags: u32, mask: usize) -> Action { + let mut failed = + // cmf + flg should be divisible by 31. + (((cmf * 256) + flg) % 31 != 0) || + // If this flag is set, a dictionary was used for this zlib compressed data. + // This is currently not supported by miniz or miniz-oxide + ((flg & 0b0010_0000) != 0) || + // Compression method. Only 8(DEFLATE) is defined by the standard. + ((cmf & 15) != 8); + + let window_size = 1 << ((cmf >> 4) + 8); + if (flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) == 0 { + // Bail if the buffer is wrapping and the window size is larger than the buffer. + failed |= (mask + 1) < window_size; + } + + // Zlib doesn't allow window sizes above 32 * 1024. + failed |= window_size > 32_768; + + if failed { + Action::Jump(BadZlibHeader) + } else { + Action::Jump(ReadBlockHeader) + } +} + +enum Action { + None, + Jump(State), + End(TINFLStatus), +} + +/// Try to decode the next huffman code, and puts it in the counter field of the decompressor +/// if successful. +/// +/// # Returns +/// The specified action returned from `f` on success, +/// `Action::End` if there are not enough data left to decode a symbol. +fn decode_huffman_code<F>( + r: &mut DecompressorOxide, + l: &mut LocalVars, + table: usize, + flags: u32, + in_iter: &mut slice::Iter<u8>, + f: F, +) -> Action +where + F: FnOnce(&mut DecompressorOxide, &mut LocalVars, i32) -> Action, +{ + // As the huffman codes can be up to 15 bits long we need at least 15 bits + // ready in the bit buffer to start decoding the next huffman code. + if l.num_bits < 15 { + // First, make sure there is enough data in the bit buffer to decode a huffman code. + if in_iter.len() < 2 { + // If there is less than 2 bytes left in the input buffer, we try to look up + // the huffman code with what's available, and return if that doesn't succeed. + // Original explanation in miniz: + // /* TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes + // * remaining in the input buffer falls below 2. */ + // /* It reads just enough bytes from the input stream that are needed to decode + // * the next Huffman code (and absolutely no more). It works by trying to fully + // * decode a */ + // /* Huffman code by using whatever bits are currently present in the bit buffer. + // * If this fails, it reads another byte, and tries again until it succeeds or + // * until the */ + // /* bit buffer contains >=15 bits (deflate's max. Huffman code size). */ + loop { + let mut temp = i32::from(r.tables[table].fast_lookup(l.bit_buf)); + + if temp >= 0 { + let code_len = (temp >> 9) as u32; + if (code_len != 0) && (l.num_bits >= code_len) { + break; + } + } else if l.num_bits > FAST_LOOKUP_BITS.into() { + let mut code_len = u32::from(FAST_LOOKUP_BITS); + loop { + temp = i32::from( + r.tables[table].tree + [(!temp + ((l.bit_buf >> code_len) & 1) as i32) as usize], + ); + code_len += 1; + if temp >= 0 || l.num_bits < code_len + 1 { + break; + } + } + if temp >= 0 { + break; + } + } + + // TODO: miniz jumps straight to here after getting here again after failing to read + // a byte. + // Doing that lets miniz avoid re-doing the lookup that that was done in the + // previous call. + let mut byte = 0; + if let a @ Action::End(_) = read_byte(in_iter, flags, |b| { + byte = b; + Action::None + }) { + return a; + }; + + // Do this outside closure for now to avoid borrowing r. + l.bit_buf |= BitBuffer::from(byte) << l.num_bits; + l.num_bits += 8; + + if l.num_bits >= 15 { + break; + } + } + } else { + // There is enough data in the input buffer, so read the next two bytes + // and add them to the bit buffer. + // Unwrapping here is fine since we just checked that there are at least two + // bytes left. + l.bit_buf |= BitBuffer::from(read_u16_le(in_iter)) << l.num_bits; + l.num_bits += 16; + } + } + + // We now have at least 15 bits in the input buffer. + let mut symbol = i32::from(r.tables[table].fast_lookup(l.bit_buf)); + let code_len; + // If the symbol was found in the fast lookup table. + if symbol >= 0 { + // Get the length value from the top bits. + // As we shift down the sign bit, converting to an unsigned value + // shouldn't overflow. + code_len = (symbol >> 9) as u32; + // Mask out the length value. + symbol &= 511; + } else { + let res = r.tables[table].tree_lookup(symbol, l.bit_buf, u32::from(FAST_LOOKUP_BITS)); + symbol = res.0; + code_len = res.1 as u32; + }; + + if code_len == 0 { + return Action::Jump(InvalidCodeLen); + } + + l.bit_buf >>= code_len as u32; + l.num_bits -= code_len; + f(r, l, symbol) +} + +/// Try to read one byte from `in_iter` and call `f` with the read byte as an argument, +/// returning the result. +/// If reading fails, `Action::End is returned` +#[inline] +fn read_byte<F>(in_iter: &mut slice::Iter<u8>, flags: u32, f: F) -> Action +where + F: FnOnce(u8) -> Action, +{ + match in_iter.next() { + None => end_of_input(flags), + Some(&byte) => f(byte), + } +} + +// TODO: `l: &mut LocalVars` may be slow similar to decompress_fast (even with inline(always)) +/// Try to read `amount` number of bits from `in_iter` and call the function `f` with the bits as an +/// an argument after reading, returning the result of that function, or `Action::End` if there are +/// not enough bytes left. +#[inline] +#[allow(clippy::while_immutable_condition)] +fn read_bits<F>( + l: &mut LocalVars, + amount: u32, + in_iter: &mut slice::Iter<u8>, + flags: u32, + f: F, +) -> Action +where + F: FnOnce(&mut LocalVars, BitBuffer) -> Action, +{ + // Clippy gives a false positive warning here due to the closure. + // Read enough bytes from the input iterator to cover the number of bits we want. + while l.num_bits < amount { + match read_byte(in_iter, flags, |byte| { + l.bit_buf |= BitBuffer::from(byte) << l.num_bits; + l.num_bits += 8; + Action::None + }) { + Action::None => (), + // If there are not enough bytes in the input iterator, return and signal that we need + // more. + action => return action, + } + } + + let bits = l.bit_buf & ((1 << amount) - 1); + l.bit_buf >>= amount; + l.num_bits -= amount; + f(l, bits) +} + +#[inline] +fn pad_to_bytes<F>(l: &mut LocalVars, in_iter: &mut slice::Iter<u8>, flags: u32, f: F) -> Action +where + F: FnOnce(&mut LocalVars) -> Action, +{ + let num_bits = l.num_bits & 7; + read_bits(l, num_bits, in_iter, flags, |l, _| f(l)) +} + +#[inline] +fn end_of_input(flags: u32) -> Action { + Action::End(if flags & TINFL_FLAG_HAS_MORE_INPUT != 0 { + TINFLStatus::NeedsMoreInput + } else { + TINFLStatus::FailedCannotMakeProgress + }) +} + +#[inline] +fn undo_bytes(l: &mut LocalVars, max: u32) -> u32 { + let res = cmp::min(l.num_bits >> 3, max); + l.num_bits -= res << 3; + res +} + +fn start_static_table(r: &mut DecompressorOxide) { + r.table_sizes[LITLEN_TABLE] = 288; + r.table_sizes[DIST_TABLE] = 32; + memset(&mut r.tables[LITLEN_TABLE].code_size[0..144], 8); + memset(&mut r.tables[LITLEN_TABLE].code_size[144..256], 9); + memset(&mut r.tables[LITLEN_TABLE].code_size[256..280], 7); + memset(&mut r.tables[LITLEN_TABLE].code_size[280..288], 8); + memset(&mut r.tables[DIST_TABLE].code_size[0..32], 5); +} + +fn init_tree(r: &mut DecompressorOxide, l: &mut LocalVars) -> Action { + loop { + let table = &mut r.tables[r.block_type as usize]; + let table_size = r.table_sizes[r.block_type as usize] as usize; + let mut total_symbols = [0u32; 16]; + let mut next_code = [0u32; 17]; + memset(&mut table.look_up[..], 0); + memset(&mut table.tree[..], 0); + + for &code_size in &table.code_size[..table_size] { + total_symbols[code_size as usize] += 1; + } + + let mut used_symbols = 0; + let mut total = 0; + for i in 1..16 { + used_symbols += total_symbols[i]; + total += total_symbols[i]; + total <<= 1; + next_code[i + 1] = total; + } + + if total != 65_536 && used_symbols > 1 { + return Action::Jump(BadTotalSymbols); + } + + let mut tree_next = -1; + for symbol_index in 0..table_size { + let mut rev_code = 0; + let code_size = table.code_size[symbol_index]; + if code_size == 0 { + continue; + } + + let mut cur_code = next_code[code_size as usize]; + next_code[code_size as usize] += 1; + + for _ in 0..code_size { + rev_code = (rev_code << 1) | (cur_code & 1); + cur_code >>= 1; + } + + if code_size <= FAST_LOOKUP_BITS { + let k = (i16::from(code_size) << 9) | symbol_index as i16; + while rev_code < FAST_LOOKUP_SIZE { + table.look_up[rev_code as usize] = k; + rev_code += 1 << code_size; + } + continue; + } + + let mut tree_cur = table.look_up[(rev_code & (FAST_LOOKUP_SIZE - 1)) as usize]; + if tree_cur == 0 { + table.look_up[(rev_code & (FAST_LOOKUP_SIZE - 1)) as usize] = tree_next as i16; + tree_cur = tree_next; + tree_next -= 2; + } + + rev_code >>= FAST_LOOKUP_BITS - 1; + for _ in FAST_LOOKUP_BITS + 1..code_size { + rev_code >>= 1; + tree_cur -= (rev_code & 1) as i16; + if table.tree[(-tree_cur - 1) as usize] == 0 { + table.tree[(-tree_cur - 1) as usize] = tree_next as i16; + tree_cur = tree_next; + tree_next -= 2; + } else { + tree_cur = table.tree[(-tree_cur - 1) as usize]; + } + } + + rev_code >>= 1; + tree_cur -= (rev_code & 1) as i16; + table.tree[(-tree_cur - 1) as usize] = symbol_index as i16; + } + + if r.block_type == 2 { + l.counter = 0; + return Action::Jump(ReadLitlenDistTablesCodeSize); + } + + if r.block_type == 0 { + break; + } + r.block_type -= 1; + } + + l.counter = 0; + Action::Jump(DecodeLitlen) +} + +// A helper macro for generating the state machine. +// +// As Rust doesn't have fallthrough on matches, we have to return to the match statement +// and jump for each state change. (Which would ideally be optimized away, but often isn't.) +macro_rules! generate_state { + ($state: ident, $state_machine: tt, $f: expr) => { + loop { + match $f { + Action::None => continue, + Action::Jump(new_state) => { + $state = new_state; + continue $state_machine; + }, + Action::End(result) => break $state_machine result, + } + } + }; +} + +#[derive(Copy, Clone)] +struct LocalVars { + pub bit_buf: BitBuffer, + pub num_bits: u32, + pub dist: u32, + pub counter: u32, + pub num_extra: u32, +} + +#[inline] +fn transfer( + out_slice: &mut [u8], + mut source_pos: usize, + mut out_pos: usize, + match_len: usize, + out_buf_size_mask: usize, +) { + for _ in 0..match_len >> 2 { + out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask]; + out_slice[out_pos + 1] = out_slice[(source_pos + 1) & out_buf_size_mask]; + out_slice[out_pos + 2] = out_slice[(source_pos + 2) & out_buf_size_mask]; + out_slice[out_pos + 3] = out_slice[(source_pos + 3) & out_buf_size_mask]; + source_pos += 4; + out_pos += 4; + } + + match match_len & 3 { + 0 => (), + 1 => out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask], + 2 => { + out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask]; + out_slice[out_pos + 1] = out_slice[(source_pos + 1) & out_buf_size_mask]; + } + 3 => { + out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask]; + out_slice[out_pos + 1] = out_slice[(source_pos + 1) & out_buf_size_mask]; + out_slice[out_pos + 2] = out_slice[(source_pos + 2) & out_buf_size_mask]; + } + _ => unreachable!(), + } +} + +/// Presumes that there is at least match_len bytes in output left. +#[inline] +fn apply_match( + out_slice: &mut [u8], + out_pos: usize, + dist: usize, + match_len: usize, + out_buf_size_mask: usize, +) { + debug_assert!(out_pos + match_len <= out_slice.len()); + + let source_pos = out_pos.wrapping_sub(dist) & out_buf_size_mask; + + if match_len == 3 { + // Fast path for match len 3. + out_slice[out_pos] = out_slice[source_pos]; + out_slice[out_pos + 1] = out_slice[(source_pos + 1) & out_buf_size_mask]; + out_slice[out_pos + 2] = out_slice[(source_pos + 2) & out_buf_size_mask]; + return; + } + + if cfg!(not(any(target_arch = "x86", target_arch = "x86_64"))) { + // We are not on x86 so copy manually. + transfer(out_slice, source_pos, out_pos, match_len, out_buf_size_mask); + return; + } + + if source_pos >= out_pos && (source_pos - out_pos) < match_len { + transfer(out_slice, source_pos, out_pos, match_len, out_buf_size_mask); + } else if match_len <= dist && source_pos + match_len < out_slice.len() { + // Destination and source segments does not intersect and source does not wrap. + if source_pos < out_pos { + let (from_slice, to_slice) = out_slice.split_at_mut(out_pos); + to_slice[..match_len].copy_from_slice(&from_slice[source_pos..source_pos + match_len]); + } else { + let (to_slice, from_slice) = out_slice.split_at_mut(source_pos); + to_slice[out_pos..out_pos + match_len].copy_from_slice(&from_slice[..match_len]); + } + } else { + transfer(out_slice, source_pos, out_pos, match_len, out_buf_size_mask); + } +} + +/// Fast inner decompression loop which is run while there is at least +/// 259 bytes left in the output buffer, and at least 6 bytes left in the input buffer +/// (The maximum one match would need + 1). +/// +/// This was inspired by a similar optimization in zlib, which uses this info to do +/// faster unchecked copies of multiple bytes at a time. +/// Currently we don't do this here, but this function does avoid having to jump through the +/// big match loop on each state change(as rust does not have fallthrough or gotos at the moment), +/// and already improves decompression speed a fair bit. +fn decompress_fast( + r: &mut DecompressorOxide, + in_iter: &mut slice::Iter<u8>, + out_buf: &mut OutputBuffer, + flags: u32, + local_vars: &mut LocalVars, + out_buf_size_mask: usize, +) -> (TINFLStatus, State) { + // Make a local copy of the most used variables, to avoid having to update and read from values + // in a random memory location and to encourage more register use. + let mut l = *local_vars; + let mut state; + + let status: TINFLStatus = 'o: loop { + state = State::DecodeLitlen; + loop { + // This function assumes that there is at least 259 bytes left in the output buffer, + // and that there is at least 14 bytes left in the input buffer. 14 input bytes: + // 15 (prev lit) + 15 (length) + 5 (length extra) + 15 (dist) + // + 29 + 32 (left in bit buf, including last 13 dist extra) = 111 bits < 14 bytes + // We need the one extra byte as we may write one length and one full match + // before checking again. + if out_buf.bytes_left() < 259 || in_iter.len() < 14 { + state = State::DecodeLitlen; + break 'o TINFLStatus::Done; + } + + fill_bit_buffer(&mut l, in_iter); + + if let Some((symbol, code_len)) = r.tables[LITLEN_TABLE].lookup(l.bit_buf) { + l.counter = symbol as u32; + l.bit_buf >>= code_len; + l.num_bits -= code_len; + + if (l.counter & 256) != 0 { + // The symbol is not a literal. + break; + } else { + // If we have a 32-bit buffer we need to read another two bytes now + // to have enough bits to keep going. + if cfg!(not(target_pointer_width = "64")) { + fill_bit_buffer(&mut l, in_iter); + } + + if let Some((symbol, code_len)) = r.tables[LITLEN_TABLE].lookup(l.bit_buf) { + l.bit_buf >>= code_len; + l.num_bits -= code_len; + // The previous symbol was a literal, so write it directly and check + // the next one. + out_buf.write_byte(l.counter as u8); + if (symbol & 256) != 0 { + l.counter = symbol as u32; + // The symbol is a length value. + break; + } else { + // The symbol is a literal, so write it directly and continue. + out_buf.write_byte(symbol as u8); + } + } else { + state.begin(InvalidCodeLen); + break 'o TINFLStatus::Failed; + } + } + } else { + state.begin(InvalidCodeLen); + break 'o TINFLStatus::Failed; + } + } + + // Mask the top bits since they may contain length info. + l.counter &= 511; + if l.counter == 256 { + // We hit the end of block symbol. + state.begin(BlockDone); + break 'o TINFLStatus::Done; + } else if l.counter > 285 { + // Invalid code. + // We already verified earlier that the code is > 256. + state.begin(InvalidLitlen); + break 'o TINFLStatus::Failed; + } else { + // The symbol was a length code. + // # Optimization + // Mask the value to avoid bounds checks + // We could use get_unchecked later if can statically verify that + // this will never go out of bounds. + l.num_extra = u32::from(LENGTH_EXTRA[(l.counter - 257) as usize & BASE_EXTRA_MASK]); + l.counter = u32::from(LENGTH_BASE[(l.counter - 257) as usize & BASE_EXTRA_MASK]); + // Length and distance codes have a number of extra bits depending on + // the base, which together with the base gives us the exact value. + + fill_bit_buffer(&mut l, in_iter); + if l.num_extra != 0 { + let extra_bits = l.bit_buf & ((1 << l.num_extra) - 1); + l.bit_buf >>= l.num_extra; + l.num_bits -= l.num_extra; + l.counter += extra_bits as u32; + } + + // We found a length code, so a distance code should follow. + + if cfg!(not(target_pointer_width = "64")) { + fill_bit_buffer(&mut l, in_iter); + } + + if let Some((mut symbol, code_len)) = r.tables[DIST_TABLE].lookup(l.bit_buf) { + symbol &= 511; + l.bit_buf >>= code_len; + l.num_bits -= code_len; + if symbol > 29 { + state.begin(InvalidDist); + break 'o TINFLStatus::Failed; + } + + l.num_extra = u32::from(DIST_EXTRA[symbol as usize]); + l.dist = u32::from(DIST_BASE[symbol as usize]); + } else { + state.begin(InvalidCodeLen); + break 'o TINFLStatus::Failed; + } + + if l.num_extra != 0 { + fill_bit_buffer(&mut l, in_iter); + let extra_bits = l.bit_buf & ((1 << l.num_extra) - 1); + l.bit_buf >>= l.num_extra; + l.num_bits -= l.num_extra; + l.dist += extra_bits as u32; + } + + let position = out_buf.position(); + if l.dist as usize > out_buf.position() + && (flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF != 0) + { + // We encountered a distance that refers a position before + // the start of the decoded data, so we can't continue. + state.begin(DistanceOutOfBounds); + break TINFLStatus::Failed; + } + + apply_match( + out_buf.get_mut(), + position, + l.dist as usize, + l.counter as usize, + out_buf_size_mask, + ); + + out_buf.set_position(position + l.counter as usize); + } + }; + + *local_vars = l; + (status, state) +} + +/// Main decompression function. Keeps decompressing data from `in_buf` until the `in_buf` is +/// empty, `out` is full, the end of the deflate stream is hit, or there is an error in the +/// deflate stream. +/// +/// # Arguments +/// +/// `r` is a [`DecompressorOxide`] struct with the state of this stream. +/// +/// `in_buf` is a reference to the compressed data that is to be decompressed. The decompressor will +/// start at the first byte of this buffer. +/// +/// `out` is a reference to the buffer that will store the decompressed data, and that +/// stores previously decompressed data if any. +/// +/// * The offset given by `out_pos` indicates where in the output buffer slice writing should start. +/// * If [`TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF`] is not set, the output buffer is used in a +/// wrapping manner, and it's size is required to be a power of 2. +/// * The decompression function normally needs access to 32KiB of the previously decompressed data +///(or to the beginning of the decompressed data if less than 32KiB has been decompressed.) +/// - If this data is not available, decompression may fail. +/// - Some deflate compressors allow specifying a window size which limits match distances to +/// less than this, or alternatively an RLE mode where matches will only refer to the previous byte +/// and thus allows a smaller output buffer. The window size can be specified in the zlib +/// header structure, however, the header data should not be relied on to be correct. +/// +/// `flags` indicates settings and status to the decompression function. +/// * The [`TINFL_FLAG_HAS_MORE_INPUT`] has to be specified if more compressed data is to be provided +/// in a subsequent call to this function. +/// * See the the [`inflate_flags`] module for details on other flags. +/// +/// # Returns +/// +/// Returns a tuple containing the status of the compressor, the number of input bytes read, and the +/// number of bytes output to `out`. +/// +/// This function shouldn't panic pending any bugs. +pub fn decompress( + r: &mut DecompressorOxide, + in_buf: &[u8], + out: &mut [u8], + out_pos: usize, + flags: u32, +) -> (TINFLStatus, usize, usize) { + let out_buf_size_mask = if flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF != 0 { + usize::max_value() + } else { + // In the case of zero len, any attempt to write would produce HasMoreOutput, + // so to gracefully process the case of there really being no output, + // set the mask to all zeros. + out.len().saturating_sub(1) + }; + + // Ensure the output buffer's size is a power of 2, unless the output buffer + // is large enough to hold the entire output file (in which case it doesn't + // matter). + // Also make sure that the output buffer position is not past the end of the output buffer. + if (out_buf_size_mask.wrapping_add(1) & out_buf_size_mask) != 0 || out_pos > out.len() { + return (TINFLStatus::BadParam, 0, 0); + } + + let mut in_iter = in_buf.iter(); + + let mut state = r.state; + + let mut out_buf = OutputBuffer::from_slice_and_pos(out, out_pos); + + // Make a local copy of the important variables here so we can work with them on the stack. + let mut l = LocalVars { + bit_buf: r.bit_buf, + num_bits: r.num_bits, + dist: r.dist, + counter: r.counter, + num_extra: r.num_extra, + }; + + let mut status = 'state_machine: loop { + match state { + Start => generate_state!(state, 'state_machine, { + l.bit_buf = 0; + l.num_bits = 0; + l.dist = 0; + l.counter = 0; + l.num_extra = 0; + r.z_header0 = 0; + r.z_header1 = 0; + r.z_adler32 = 1; + r.check_adler32 = 1; + if flags & TINFL_FLAG_PARSE_ZLIB_HEADER != 0 { + Action::Jump(State::ReadZlibCmf) + } else { + Action::Jump(State::ReadBlockHeader) + } + }), + + ReadZlibCmf => generate_state!(state, 'state_machine, { + read_byte(&mut in_iter, flags, |cmf| { + r.z_header0 = u32::from(cmf); + Action::Jump(State::ReadZlibFlg) + }) + }), + + ReadZlibFlg => generate_state!(state, 'state_machine, { + read_byte(&mut in_iter, flags, |flg| { + r.z_header1 = u32::from(flg); + validate_zlib_header(r.z_header0, r.z_header1, flags, out_buf_size_mask) + }) + }), + + // Read the block header and jump to the relevant section depending on the block type. + ReadBlockHeader => generate_state!(state, 'state_machine, { + read_bits(&mut l, 3, &mut in_iter, flags, |l, bits| { + r.finish = (bits & 1) as u32; + r.block_type = (bits >> 1) as u32 & 3; + match r.block_type { + 0 => Action::Jump(BlockTypeNoCompression), + 1 => { + start_static_table(r); + init_tree(r, l) + }, + 2 => { + l.counter = 0; + Action::Jump(ReadTableSizes) + }, + 3 => Action::Jump(BlockTypeUnexpected), + _ => unreachable!() + } + }) + }), + + // Raw/Stored/uncompressed block. + BlockTypeNoCompression => generate_state!(state, 'state_machine, { + pad_to_bytes(&mut l, &mut in_iter, flags, |l| { + l.counter = 0; + Action::Jump(RawHeader) + }) + }), + + // Check that the raw block header is correct. + RawHeader => generate_state!(state, 'state_machine, { + if l.counter < 4 { + // Read block length and block length check. + if l.num_bits != 0 { + read_bits(&mut l, 8, &mut in_iter, flags, |l, bits| { + r.raw_header[l.counter as usize] = bits as u8; + l.counter += 1; + Action::None + }) + } else { + read_byte(&mut in_iter, flags, |byte| { + r.raw_header[l.counter as usize] = byte; + l.counter += 1; + Action::None + }) + } + } else { + // Check if the length value of a raw block is correct. + // The 2 first (2-byte) words in a raw header are the length and the + // ones complement of the length. + let length = u16::from(r.raw_header[0]) | (u16::from(r.raw_header[1]) << 8); + let check = u16::from(r.raw_header[2]) | (u16::from(r.raw_header[3]) << 8); + let valid = length == !check; + l.counter = length.into(); + + if !valid { + Action::Jump(BadRawLength) + } else if l.counter == 0 { + // Empty raw block. Sometimes used for synchronization. + Action::Jump(BlockDone) + } else if l.num_bits != 0 { + // There is some data in the bit buffer, so we need to write that first. + Action::Jump(RawReadFirstByte) + } else { + // The bit buffer is empty, so memcpy the rest of the uncompressed data from + // the block. + Action::Jump(RawMemcpy1) + } + } + }), + + // Read the byte from the bit buffer. + RawReadFirstByte => generate_state!(state, 'state_machine, { + read_bits(&mut l, 8, &mut in_iter, flags, |l, bits| { + l.dist = bits as u32; + Action::Jump(RawStoreFirstByte) + }) + }), + + // Write the byte we just read to the output buffer. + RawStoreFirstByte => generate_state!(state, 'state_machine, { + if out_buf.bytes_left() == 0 { + Action::End(TINFLStatus::HasMoreOutput) + } else { + out_buf.write_byte(l.dist as u8); + l.counter -= 1; + if l.counter == 0 || l.num_bits == 0 { + Action::Jump(RawMemcpy1) + } else { + // There is still some data left in the bit buffer that needs to be output. + // TODO: Changed this to jump to `RawReadfirstbyte` rather than + // `RawStoreFirstByte` as that seemed to be the correct path, but this + // needs testing. + Action::Jump(RawReadFirstByte) + } + } + }), + + RawMemcpy1 => generate_state!(state, 'state_machine, { + if l.counter == 0 { + Action::Jump(BlockDone) + } else if out_buf.bytes_left() == 0 { + Action::End(TINFLStatus::HasMoreOutput) + } else { + Action::Jump(RawMemcpy2) + } + }), + + RawMemcpy2 => generate_state!(state, 'state_machine, { + if in_iter.len() > 0 { + // Copy as many raw bytes as possible from the input to the output using memcpy. + // Raw block lengths are limited to 64 * 1024, so casting through usize and u32 + // is not an issue. + let space_left = out_buf.bytes_left(); + let bytes_to_copy = cmp::min(cmp::min( + space_left, + in_iter.len()), + l.counter as usize + ); + + out_buf.write_slice(&in_iter.as_slice()[..bytes_to_copy]); + + (&mut in_iter).nth(bytes_to_copy - 1); + l.counter -= bytes_to_copy as u32; + Action::Jump(RawMemcpy1) + } else { + end_of_input(flags) + } + }), + + // Read how many huffman codes/symbols are used for each table. + ReadTableSizes => generate_state!(state, 'state_machine, { + if l.counter < 3 { + let num_bits = [5, 5, 4][l.counter as usize]; + read_bits(&mut l, num_bits, &mut in_iter, flags, |l, bits| { + r.table_sizes[l.counter as usize] = + bits as u32 + u32::from(MIN_TABLE_SIZES[l.counter as usize]); + l.counter += 1; + Action::None + }) + } else { + memset(&mut r.tables[HUFFLEN_TABLE].code_size[..], 0); + l.counter = 0; + Action::Jump(ReadHufflenTableCodeSize) + } + }), + + // Read the 3-bit lengths of the huffman codes describing the huffman code lengths used + // to decode the lengths of the main tables. + ReadHufflenTableCodeSize => generate_state!(state, 'state_machine, { + if l.counter < r.table_sizes[HUFFLEN_TABLE] { + read_bits(&mut l, 3, &mut in_iter, flags, |l, bits| { + // These lengths are not stored in a normal ascending order, but rather one + // specified by the deflate specification intended to put the most used + // values at the front as trailing zero lengths do not have to be stored. + r.tables[HUFFLEN_TABLE] + .code_size[HUFFMAN_LENGTH_ORDER[l.counter as usize] as usize] = + bits as u8; + l.counter += 1; + Action::None + }) + } else { + r.table_sizes[HUFFLEN_TABLE] = 19; + init_tree(r, &mut l) + } + }), + + ReadLitlenDistTablesCodeSize => generate_state!(state, 'state_machine, { + if l.counter < r.table_sizes[LITLEN_TABLE] + r.table_sizes[DIST_TABLE] { + decode_huffman_code( + r, &mut l, HUFFLEN_TABLE, + flags, &mut in_iter, |r, l, symbol| { + l.dist = symbol as u32; + if l.dist < 16 { + r.len_codes[l.counter as usize] = l.dist as u8; + l.counter += 1; + Action::None + } else if l.dist == 16 && l.counter == 0 { + Action::Jump(BadCodeSizeDistPrevLookup) + } else { + l.num_extra = [2, 3, 7][l.dist as usize - 16]; + Action::Jump(ReadExtraBitsCodeSize) + } + } + ) + } else if l.counter != r.table_sizes[LITLEN_TABLE] + r.table_sizes[DIST_TABLE] { + Action::Jump(BadCodeSizeSum) + } else { + r.tables[LITLEN_TABLE].code_size[..r.table_sizes[LITLEN_TABLE] as usize] + .copy_from_slice(&r.len_codes[..r.table_sizes[LITLEN_TABLE] as usize]); + + let dist_table_start = r.table_sizes[LITLEN_TABLE] as usize; + let dist_table_end = (r.table_sizes[LITLEN_TABLE] + + r.table_sizes[DIST_TABLE]) as usize; + r.tables[DIST_TABLE].code_size[..r.table_sizes[DIST_TABLE] as usize] + .copy_from_slice(&r.len_codes[dist_table_start..dist_table_end]); + + r.block_type -= 1; + init_tree(r, &mut l) + } + }), + + ReadExtraBitsCodeSize => generate_state!(state, 'state_machine, { + let num_extra = l.num_extra; + read_bits(&mut l, num_extra, &mut in_iter, flags, |l, mut extra_bits| { + // Mask to avoid a bounds check. + extra_bits += [3, 3, 11][(l.dist as usize - 16) & 3]; + let val = if l.dist == 16 { + r.len_codes[l.counter as usize - 1] + } else { + 0 + }; + + memset( + &mut r.len_codes[ + l.counter as usize..l.counter as usize + extra_bits as usize + ], + val, + ); + l.counter += extra_bits as u32; + Action::Jump(ReadLitlenDistTablesCodeSize) + }) + }), + + DecodeLitlen => generate_state!(state, 'state_machine, { + if in_iter.len() < 4 || out_buf.bytes_left() < 2 { + // See if we can decode a literal with the data we have left. + // Jumps to next state (WriteSymbol) if successful. + decode_huffman_code( + r, + &mut l, + LITLEN_TABLE, + flags, + &mut in_iter, + |_r, l, symbol| { + l.counter = symbol as u32; + Action::Jump(WriteSymbol) + }, + ) + } else if + // If there is enough space, use the fast inner decompression + // function. + out_buf.bytes_left() >= 259 && + in_iter.len() >= 14 + { + let (status, new_state) = decompress_fast( + r, + &mut in_iter, + &mut out_buf, + flags, + &mut l, + out_buf_size_mask, + ); + + state = new_state; + if status == TINFLStatus::Done { + Action::Jump(new_state) + } else { + Action::End(status) + } + } else { + fill_bit_buffer(&mut l, &mut in_iter); + + if let Some((symbol, code_len)) = r.tables[LITLEN_TABLE].lookup(l.bit_buf) { + + l.counter = symbol as u32; + l.bit_buf >>= code_len; + l.num_bits -= code_len; + + if (l.counter & 256) != 0 { + // The symbol is not a literal. + Action::Jump(HuffDecodeOuterLoop1) + } else { + // If we have a 32-bit buffer we need to read another two bytes now + // to have enough bits to keep going. + if cfg!(not(target_pointer_width = "64")) { + fill_bit_buffer(&mut l, &mut in_iter); + } + + if let Some((symbol, code_len)) = r.tables[LITLEN_TABLE].lookup(l.bit_buf) { + + l.bit_buf >>= code_len; + l.num_bits -= code_len; + // The previous symbol was a literal, so write it directly and check + // the next one. + out_buf.write_byte(l.counter as u8); + if (symbol & 256) != 0 { + l.counter = symbol as u32; + // The symbol is a length value. + Action::Jump(HuffDecodeOuterLoop1) + } else { + // The symbol is a literal, so write it directly and continue. + out_buf.write_byte(symbol as u8); + Action::None + } + } else { + Action::Jump(InvalidCodeLen) + } + } + } else { + Action::Jump(InvalidCodeLen) + } + } + }), + + WriteSymbol => generate_state!(state, 'state_machine, { + if l.counter >= 256 { + Action::Jump(HuffDecodeOuterLoop1) + } else if out_buf.bytes_left() > 0 { + out_buf.write_byte(l.counter as u8); + Action::Jump(DecodeLitlen) + } else { + Action::End(TINFLStatus::HasMoreOutput) + } + }), + + HuffDecodeOuterLoop1 => generate_state!(state, 'state_machine, { + // Mask the top bits since they may contain length info. + l.counter &= 511; + + if l.counter + == 256 { + // We hit the end of block symbol. + Action::Jump(BlockDone) + } else if l.counter > 285 { + // Invalid code. + // We already verified earlier that the code is > 256. + Action::Jump(InvalidLitlen) + } else { + // # Optimization + // Mask the value to avoid bounds checks + // We could use get_unchecked later if can statically verify that + // this will never go out of bounds. + l.num_extra = + u32::from(LENGTH_EXTRA[(l.counter - 257) as usize & BASE_EXTRA_MASK]); + l.counter = u32::from(LENGTH_BASE[(l.counter - 257) as usize & BASE_EXTRA_MASK]); + // Length and distance codes have a number of extra bits depending on + // the base, which together with the base gives us the exact value. + if l.num_extra != 0 { + Action::Jump(ReadExtraBitsLitlen) + } else { + Action::Jump(DecodeDistance) + } + } + }), + + ReadExtraBitsLitlen => generate_state!(state, 'state_machine, { + let num_extra = l.num_extra; + read_bits(&mut l, num_extra, &mut in_iter, flags, |l, extra_bits| { + l.counter += extra_bits as u32; + Action::Jump(DecodeDistance) + }) + }), + + DecodeDistance => generate_state!(state, 'state_machine, { + // Try to read a huffman code from the input buffer and look up what + // length code the decoded symbol refers to. + decode_huffman_code(r, &mut l, DIST_TABLE, flags, &mut in_iter, |_r, l, symbol| { + if symbol > 29 { + // Invalid distance code. + return Action::Jump(InvalidDist) + } + // # Optimization + // Mask the value to avoid bounds checks + // We could use get_unchecked later if can statically verify that + // this will never go out of bounds. + l.num_extra = u32::from(DIST_EXTRA[symbol as usize & BASE_EXTRA_MASK]); + l.dist = u32::from(DIST_BASE[symbol as usize & BASE_EXTRA_MASK]); + if l.num_extra != 0 { + // ReadEXTRA_BITS_DISTACNE + Action::Jump(ReadExtraBitsDistance) + } else { + Action::Jump(HuffDecodeOuterLoop2) + } + }) + }), + + ReadExtraBitsDistance => generate_state!(state, 'state_machine, { + let num_extra = l.num_extra; + read_bits(&mut l, num_extra, &mut in_iter, flags, |l, extra_bits| { + l.dist += extra_bits as u32; + Action::Jump(HuffDecodeOuterLoop2) + }) + }), + + HuffDecodeOuterLoop2 => generate_state!(state, 'state_machine, { + if l.dist as usize > out_buf.position() && + (flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF != 0) + { + // We encountered a distance that refers a position before + // the start of the decoded data, so we can't continue. + Action::Jump(DistanceOutOfBounds) + } else { + let out_pos = out_buf.position(); + let source_pos = out_buf.position() + .wrapping_sub(l.dist as usize) & out_buf_size_mask; + + let out_len = out_buf.get_ref().len() as usize; + let match_end_pos = out_buf.position() + l.counter as usize; + + if match_end_pos > out_len || + // miniz doesn't do this check here. Not sure how it makes sure + // that this case doesn't happen. + (source_pos >= out_pos && (source_pos - out_pos) < l.counter as usize) + { + // Not enough space for all of the data in the output buffer, + // so copy what we have space for. + if l.counter == 0 { + Action::Jump(DecodeLitlen) + } else { + Action::Jump(WriteLenBytesToEnd) + } + } else { + apply_match( + out_buf.get_mut(), + out_pos, + l.dist as usize, + l.counter as usize, + out_buf_size_mask + ); + out_buf.set_position(out_pos + l.counter as usize); + Action::Jump(DecodeLitlen) + } + } + }), + + WriteLenBytesToEnd => generate_state!(state, 'state_machine, { + if out_buf.bytes_left() > 0 { + let out_pos = out_buf.position(); + let source_pos = out_buf.position() + .wrapping_sub(l.dist as usize) & out_buf_size_mask; + + + let len = cmp::min(out_buf.bytes_left(), l.counter as usize); + + transfer(out_buf.get_mut(), source_pos, out_pos, len, out_buf_size_mask); + + out_buf.set_position(out_pos + len); + l.counter -= len as u32; + if l.counter == 0 { + Action::Jump(DecodeLitlen) + } else { + Action::None + } + } else { + Action::End(TINFLStatus::HasMoreOutput) + } + }), + + BlockDone => generate_state!(state, 'state_machine, { + // End once we've read the last block. + if r.finish != 0 { + pad_to_bytes(&mut l, &mut in_iter, flags, |_| Action::None); + + let in_consumed = in_buf.len() - in_iter.len(); + let undo = undo_bytes(&mut l, in_consumed as u32) as usize; + in_iter = in_buf[in_consumed - undo..].iter(); + + l.bit_buf &= ((1 as BitBuffer) << l.num_bits) - 1; + debug_assert_eq!(l.num_bits, 0); + + if flags & TINFL_FLAG_PARSE_ZLIB_HEADER != 0 { + l.counter = 0; + Action::Jump(ReadAdler32) + } else { + Action::Jump(DoneForever) + } + } else { + Action::Jump(ReadBlockHeader) + } + }), + + ReadAdler32 => generate_state!(state, 'state_machine, { + if l.counter < 4 { + if l.num_bits != 0 { + read_bits(&mut l, 8, &mut in_iter, flags, |l, bits| { + r.z_adler32 <<= 8; + r.z_adler32 |= bits as u32; + l.counter += 1; + Action::None + }) + } else { + read_byte(&mut in_iter, flags, |byte| { + r.z_adler32 <<= 8; + r.z_adler32 |= u32::from(byte); + l.counter += 1; + Action::None + }) + } + } else { + Action::Jump(DoneForever) + } + }), + + // We are done. + DoneForever => break TINFLStatus::Done, + + // Anything else indicates failure. + // BadZlibHeader | BadRawLength | BlockTypeUnexpected | DistanceOutOfBounds | + // BadTotalSymbols | BadCodeSizeDistPrevLookup | BadCodeSizeSum | InvalidLitlen | + // InvalidDist | InvalidCodeLen + _ => break TINFLStatus::Failed, + }; + }; + + let in_undo = if status != TINFLStatus::NeedsMoreInput + && status != TINFLStatus::FailedCannotMakeProgress + { + undo_bytes(&mut l, (in_buf.len() - in_iter.len()) as u32) as usize + } else { + 0 + }; + + // Make sure HasMoreOutput overrides NeedsMoreInput if the output buffer is full. + // (Unless the missing input is the adler32 value in which case we don't need to write anything.) + // TODO: May want to see if we can do this in a better way. + if status == TINFLStatus::NeedsMoreInput + && out_buf.bytes_left() == 0 + && state != State::ReadAdler32 + { + status = TINFLStatus::HasMoreOutput + } + + r.state = state; + r.bit_buf = l.bit_buf; + r.num_bits = l.num_bits; + r.dist = l.dist; + r.counter = l.counter; + r.num_extra = l.num_extra; + + r.bit_buf &= ((1 as BitBuffer) << r.num_bits) - 1; + + // If this is a zlib stream, and update the adler32 checksum with the decompressed bytes if + // requested. + let need_adler = if (flags & TINFL_FLAG_IGNORE_ADLER32) == 0 { + flags & (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32) != 0 + } else { + // If TINFL_FLAG_IGNORE_ADLER32 is enabled, ignore the checksum. + false + }; + if need_adler && status as i32 >= 0 { + let out_buf_pos = out_buf.position(); + r.check_adler32 = update_adler32(r.check_adler32, &out_buf.get_ref()[out_pos..out_buf_pos]); + + // disabled so that random input from fuzzer would not be rejected early, + // before it has a chance to reach interesting parts of code + if !cfg!(fuzzing) { + // Once we are done, check if the checksum matches with the one provided in the zlib header. + if status == TINFLStatus::Done + && flags & TINFL_FLAG_PARSE_ZLIB_HEADER != 0 + && r.check_adler32 != r.z_adler32 + { + status = TINFLStatus::Adler32Mismatch; + } + } + } + + ( + status, + in_buf.len() - in_iter.len() - in_undo, + out_buf.position() - out_pos, + ) +} + +#[cfg(test)] +mod test { + use super::*; + + //TODO: Fix these. + + fn tinfl_decompress_oxide<'i>( + r: &mut DecompressorOxide, + input_buffer: &'i [u8], + output_buffer: &mut [u8], + flags: u32, + ) -> (TINFLStatus, &'i [u8], usize) { + let (status, in_pos, out_pos) = decompress(r, input_buffer, output_buffer, 0, flags); + (status, &input_buffer[in_pos..], out_pos) + } + + #[test] + fn decompress_zlib() { + let encoded = [ + 120, 156, 243, 72, 205, 201, 201, 215, 81, 168, 202, 201, 76, 82, 4, 0, 27, 101, 4, 19, + ]; + let flags = TINFL_FLAG_COMPUTE_ADLER32 | TINFL_FLAG_PARSE_ZLIB_HEADER; + + let mut b = DecompressorOxide::new(); + const LEN: usize = 32; + let mut b_buf = vec![0; LEN]; + + // This should fail with the out buffer being to small. + let b_status = tinfl_decompress_oxide(&mut b, &encoded[..], b_buf.as_mut_slice(), flags); + + assert_eq!(b_status.0, TINFLStatus::Failed); + + let flags = flags | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + + b = DecompressorOxide::new(); + + // With TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF set this should no longer fail. + let b_status = tinfl_decompress_oxide(&mut b, &encoded[..], b_buf.as_mut_slice(), flags); + + assert_eq!(b_buf[..b_status.2], b"Hello, zlib!"[..]); + assert_eq!(b_status.0, TINFLStatus::Done); + } + + #[test] + fn raw_block() { + const LEN: usize = 64; + + let text = b"Hello, zlib!"; + let encoded = { + let len = text.len(); + let notlen = !len; + let mut encoded = vec![ + 1, + len as u8, + (len >> 8) as u8, + notlen as u8, + (notlen >> 8) as u8, + ]; + encoded.extend_from_slice(&text[..]); + encoded + }; + + //let flags = TINFL_FLAG_COMPUTE_ADLER32 | TINFL_FLAG_PARSE_ZLIB_HEADER | + let flags = TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + + let mut b = DecompressorOxide::new(); + + let mut b_buf = vec![0; LEN]; + + let b_status = tinfl_decompress_oxide(&mut b, &encoded[..], b_buf.as_mut_slice(), flags); + assert_eq!(b_buf[..b_status.2], text[..]); + assert_eq!(b_status.0, TINFLStatus::Done); + } + + fn masked_lookup(table: &HuffmanTable, bit_buf: BitBuffer) -> (i32, u32) { + let ret = table.lookup(bit_buf).unwrap(); + (ret.0 & 511, ret.1) + } + + #[test] + fn fixed_table_lookup() { + let mut d = DecompressorOxide::new(); + d.block_type = 1; + start_static_table(&mut d); + let mut l = LocalVars { + bit_buf: d.bit_buf, + num_bits: d.num_bits, + dist: d.dist, + counter: d.counter, + num_extra: d.num_extra, + }; + init_tree(&mut d, &mut l); + let llt = &d.tables[LITLEN_TABLE]; + let dt = &d.tables[DIST_TABLE]; + assert_eq!(masked_lookup(llt, 0b00001100), (0, 8)); + assert_eq!(masked_lookup(llt, 0b00011110), (72, 8)); + assert_eq!(masked_lookup(llt, 0b01011110), (74, 8)); + assert_eq!(masked_lookup(llt, 0b11111101), (143, 8)); + assert_eq!(masked_lookup(llt, 0b000010011), (144, 9)); + assert_eq!(masked_lookup(llt, 0b111111111), (255, 9)); + assert_eq!(masked_lookup(llt, 0b00000000), (256, 7)); + assert_eq!(masked_lookup(llt, 0b1110100), (279, 7)); + assert_eq!(masked_lookup(llt, 0b00000011), (280, 8)); + assert_eq!(masked_lookup(llt, 0b11100011), (287, 8)); + + assert_eq!(masked_lookup(dt, 0), (0, 5)); + assert_eq!(masked_lookup(dt, 20), (5, 5)); + } + + fn check_result(input: &[u8], expected_status: TINFLStatus, expected_state: State, zlib: bool) { + let mut r = DecompressorOxide::default(); + let mut output_buf = vec![0; 1024 * 32]; + let flags = if zlib { + inflate_flags::TINFL_FLAG_PARSE_ZLIB_HEADER + } else { + 0 + } | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF + | TINFL_FLAG_HAS_MORE_INPUT; + let (d_status, _in_bytes, _out_bytes) = + decompress(&mut r, input, &mut output_buf, 0, flags); + assert_eq!(expected_status, d_status); + assert_eq!(expected_state, r.state); + } + + #[test] + fn bogus_input() { + use self::check_result as cr; + const F: TINFLStatus = TINFLStatus::Failed; + const OK: TINFLStatus = TINFLStatus::Done; + // Bad CM. + cr(&[0x77, 0x85], F, State::BadZlibHeader, true); + // Bad window size (but check is correct). + cr(&[0x88, 0x98], F, State::BadZlibHeader, true); + // Bad check bits. + cr(&[0x78, 0x98], F, State::BadZlibHeader, true); + + // Too many code lengths. (From inflate library issues) + cr( + b"M\xff\xffM*\xad\xad\xad\xad\xad\xad\xad\xcd\xcd\xcdM", + F, + State::BadTotalSymbols, + false, + ); + // Bad CLEN (also from inflate library issues) + cr( + b"\xdd\xff\xff*M\x94ffffffffff", + F, + State::BadTotalSymbols, + false, + ); + + // Port of inflate coverage tests from zlib-ng + // https://github.com/Dead2/zlib-ng/blob/develop/test/infcover.c + let c = |a, b, c| cr(a, b, c, false); + + // Invalid uncompressed/raw block length. + c(&[0, 0, 0, 0, 0], F, State::BadRawLength); + // Ok empty uncompressed block. + c(&[3, 0], OK, State::DoneForever); + // Invalid block type. + c(&[6], F, State::BlockTypeUnexpected); + // Ok uncompressed block. + c(&[1, 1, 0, 0xfe, 0xff, 0], OK, State::DoneForever); + // Too many litlens, we handle this later than zlib, so this test won't + // give the same result. + // c(&[0xfc, 0, 0], F, State::BadTotalSymbols); + // Invalid set of code lengths - TODO Check if this is the correct error for this. + c(&[4, 0, 0xfe, 0xff], F, State::BadTotalSymbols); + // Invalid repeat in list of code lengths. + // (Try to repeat a non-existant code.) + c(&[4, 0, 0x24, 0x49, 0], F, State::BadCodeSizeDistPrevLookup); + // Missing end of block code (should we have a separate error for this?) - fails on futher input + // c(&[4, 0, 0x24, 0xe9, 0xff, 0x6d], F, State::BadTotalSymbols); + // Invalid set of literals/lengths + c( + &[ + 4, 0x80, 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0x71, 0xff, 0xff, 0x93, 0x11, 0, + ], + F, + State::BadTotalSymbols, + ); + // Invalid set of distances _ needsmoreinput + // c(&[4, 0x80, 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0x0f, 0xb4, 0xff, 0xff, 0xc3, 0x84], F, State::BadTotalSymbols); + // Invalid distance code + c(&[2, 0x7e, 0xff, 0xff], F, State::InvalidDist); + + // Distance refers to position before the start + c( + &[0x0c, 0xc0, 0x81, 0, 0, 0, 0, 0, 0x90, 0xff, 0x6b, 0x4, 0], + F, + State::DistanceOutOfBounds, + ); + + // Trailer + // Bad gzip trailer checksum GZip header not handled by miniz_oxide + //cr(&[0x1f, 0x8b, 0x08 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0x03, 0, 0, 0, 0, 0x01], F, State::BadCRC, false) + // Bad gzip trailer length + //cr(&[0x1f, 0x8b, 0x08 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0x03, 0, 0, 0, 0, 0, 0, 0, 0, 0x01], F, State::BadCRC, false) + } + + #[test] + fn empty_output_buffer_non_wrapping() { + let encoded = [ + 120, 156, 243, 72, 205, 201, 201, 215, 81, 168, 202, 201, 76, 82, 4, 0, 27, 101, 4, 19, + ]; + let flags = TINFL_FLAG_COMPUTE_ADLER32 + | TINFL_FLAG_PARSE_ZLIB_HEADER + | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + let mut r = DecompressorOxide::new(); + let mut output_buf = vec![]; + // Check that we handle an empty buffer properly and not panicking. + // https://github.com/Frommi/miniz_oxide/issues/23 + let res = decompress(&mut r, &encoded, &mut output_buf, 0, flags); + assert_eq!(res, (TINFLStatus::HasMoreOutput, 4, 0)); + } + + #[test] + fn empty_output_buffer_wrapping() { + let encoded = [ + 0x73, 0x49, 0x4d, 0xcb, 0x49, 0x2c, 0x49, 0x55, 0x00, 0x11, 0x00, + ]; + let flags = TINFL_FLAG_COMPUTE_ADLER32; + let mut r = DecompressorOxide::new(); + let mut output_buf = vec![]; + // Check that we handle an empty buffer properly and not panicking. + // https://github.com/Frommi/miniz_oxide/issues/23 + let res = decompress(&mut r, &encoded, &mut output_buf, 0, flags); + assert_eq!(res, (TINFLStatus::HasMoreOutput, 2, 0)); + } +} diff --git a/third_party/rust/miniz_oxide/src/inflate/mod.rs b/third_party/rust/miniz_oxide/src/inflate/mod.rs new file mode 100644 index 0000000000..bb19e379cc --- /dev/null +++ b/third_party/rust/miniz_oxide/src/inflate/mod.rs @@ -0,0 +1,337 @@ +//! This module contains functionality for decompression. + +#[cfg(feature = "with-alloc")] +use crate::alloc::{boxed::Box, vec, vec::Vec}; +use ::core::usize; +#[cfg(all(feature = "std", feature = "with-alloc"))] +use std::error::Error; + +pub mod core; +mod output_buffer; +pub mod stream; +use self::core::*; + +const TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS: i32 = -4; +const TINFL_STATUS_BAD_PARAM: i32 = -3; +const TINFL_STATUS_ADLER32_MISMATCH: i32 = -2; +const TINFL_STATUS_FAILED: i32 = -1; +const TINFL_STATUS_DONE: i32 = 0; +const TINFL_STATUS_NEEDS_MORE_INPUT: i32 = 1; +const TINFL_STATUS_HAS_MORE_OUTPUT: i32 = 2; + +/// Return status codes. +#[repr(i8)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum TINFLStatus { + /// More input data was expected, but the caller indicated that there was no more data, so the + /// input stream is likely truncated. + /// + /// This can't happen if you have provided the + /// [`TINFL_FLAG_HAS_MORE_INPUT`][core::inflate_flags::TINFL_FLAG_HAS_MORE_INPUT] flag to the + /// decompression. By setting that flag, you indicate more input exists but is not provided, + /// and so reaching the end of the input data without finding the end of the compressed stream + /// would instead return a [`NeedsMoreInput`][Self::NeedsMoreInput] status. + FailedCannotMakeProgress = TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS as i8, + + /// The output buffer is an invalid size; consider the `flags` parameter. + BadParam = TINFL_STATUS_BAD_PARAM as i8, + + /// The decompression went fine, but the adler32 checksum did not match the one + /// provided in the header. + Adler32Mismatch = TINFL_STATUS_ADLER32_MISMATCH as i8, + + /// Failed to decompress due to invalid data. + Failed = TINFL_STATUS_FAILED as i8, + + /// Finished decompression without issues. + /// + /// This indicates the end of the compressed stream has been reached. + Done = TINFL_STATUS_DONE as i8, + + /// The decompressor needs more input data to continue decompressing. + /// + /// This occurs when there's no more consumable input, but the end of the stream hasn't been + /// reached, and you have supplied the + /// [`TINFL_FLAG_HAS_MORE_INPUT`][core::inflate_flags::TINFL_FLAG_HAS_MORE_INPUT] flag to the + /// decompressor. Had you not supplied that flag (which would mean you were asserting that you + /// believed all the data was available) you would have gotten a + /// [`FailedCannotMakeProcess`][Self::FailedCannotMakeProgress] instead. + NeedsMoreInput = TINFL_STATUS_NEEDS_MORE_INPUT as i8, + + /// There is still pending data that didn't fit in the output buffer. + HasMoreOutput = TINFL_STATUS_HAS_MORE_OUTPUT as i8, +} + +impl TINFLStatus { + pub fn from_i32(value: i32) -> Option<TINFLStatus> { + use self::TINFLStatus::*; + match value { + TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS => Some(FailedCannotMakeProgress), + TINFL_STATUS_BAD_PARAM => Some(BadParam), + TINFL_STATUS_ADLER32_MISMATCH => Some(Adler32Mismatch), + TINFL_STATUS_FAILED => Some(Failed), + TINFL_STATUS_DONE => Some(Done), + TINFL_STATUS_NEEDS_MORE_INPUT => Some(NeedsMoreInput), + TINFL_STATUS_HAS_MORE_OUTPUT => Some(HasMoreOutput), + _ => None, + } + } +} + +/// Struct return when decompress_to_vec functions fail. +#[cfg(feature = "with-alloc")] +#[derive(Debug)] +pub struct DecompressError { + /// Decompressor status on failure. See [TINFLStatus] for details. + pub status: TINFLStatus, + /// The currently decompressed data if any. + pub output: Vec<u8>, +} + +#[cfg(feature = "with-alloc")] +impl alloc::fmt::Display for DecompressError { + fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result { + f.write_str(match self.status { + TINFLStatus::FailedCannotMakeProgress => "Truncated input stream", + TINFLStatus::BadParam => "Invalid output buffer size", + TINFLStatus::Adler32Mismatch => "Adler32 checksum mismatch", + TINFLStatus::Failed => "Invalid input data", + TINFLStatus::Done => unreachable!(), + TINFLStatus::NeedsMoreInput => "Truncated input stream", + TINFLStatus::HasMoreOutput => "Output size exceeded the specified limit", + }) + } +} + +/// Implement Error trait only if std feature is requested as it requires std. +#[cfg(all(feature = "std", feature = "with-alloc"))] +impl Error for DecompressError {} + +#[cfg(feature = "with-alloc")] +fn decompress_error(status: TINFLStatus, output: Vec<u8>) -> Result<Vec<u8>, DecompressError> { + Err(DecompressError { status, output }) +} + +/// Decompress the deflate-encoded data in `input` to a vector. +/// +/// NOTE: This function will not bound the output, so if the output is large enough it can result in an out of memory error. +/// It is therefore suggested to not use this for anything other than test programs, use the functions with a specified limit, or +/// ideally streaming decompression via the [flate2](https://github.com/alexcrichton/flate2-rs) library instead. +/// +/// Returns a [`Result`] containing the [`Vec`] of decompressed data on success, and a [struct][DecompressError] containing the status and so far decompressed data if any on failure. +#[inline] +#[cfg(feature = "with-alloc")] +pub fn decompress_to_vec(input: &[u8]) -> Result<Vec<u8>, DecompressError> { + decompress_to_vec_inner(input, 0, usize::max_value()) +} + +/// Decompress the deflate-encoded data (with a zlib wrapper) in `input` to a vector. +/// +/// NOTE: This function will not bound the output, so if the output is large enough it can result in an out of memory error. +/// It is therefore suggested to not use this for anything other than test programs, use the functions with a specified limit, or +/// ideally streaming decompression via the [flate2](https://github.com/alexcrichton/flate2-rs) library instead. +/// +/// Returns a [`Result`] containing the [`Vec`] of decompressed data on success, and a [struct][DecompressError] containing the status and so far decompressed data if any on failure. +#[inline] +#[cfg(feature = "with-alloc")] +pub fn decompress_to_vec_zlib(input: &[u8]) -> Result<Vec<u8>, DecompressError> { + decompress_to_vec_inner( + input, + inflate_flags::TINFL_FLAG_PARSE_ZLIB_HEADER, + usize::max_value(), + ) +} + +/// Decompress the deflate-encoded data in `input` to a vector. +/// +/// The vector is grown to at most `max_size` bytes; if the data does not fit in that size, +/// the error [struct][DecompressError] will contain the status [`TINFLStatus::HasMoreOutput`] and the data that was decompressed on failure. +/// +/// As this function tries to decompress everything in one go, it's not ideal for general use outside of tests or where the output size is expected to be small. +/// It is suggested to use streaming decompression via the [flate2](https://github.com/alexcrichton/flate2-rs) library instead. +/// +/// Returns a [`Result`] containing the [`Vec`] of decompressed data on success, and a [struct][DecompressError] on failure. +#[inline] +#[cfg(feature = "with-alloc")] +pub fn decompress_to_vec_with_limit( + input: &[u8], + max_size: usize, +) -> Result<Vec<u8>, DecompressError> { + decompress_to_vec_inner(input, 0, max_size) +} + +/// Decompress the deflate-encoded data (with a zlib wrapper) in `input` to a vector. +/// The vector is grown to at most `max_size` bytes; if the data does not fit in that size, +/// the error [struct][DecompressError] will contain the status [`TINFLStatus::HasMoreOutput`] and the data that was decompressed on failure. +/// +/// As this function tries to decompress everything in one go, it's not ideal for general use outside of tests or where the output size is expected to be small. +/// It is suggested to use streaming decompression via the [flate2](https://github.com/alexcrichton/flate2-rs) library instead. +/// +/// Returns a [`Result`] containing the [`Vec`] of decompressed data on success, and a [struct][DecompressError] on failure. +#[inline] +#[cfg(feature = "with-alloc")] +pub fn decompress_to_vec_zlib_with_limit( + input: &[u8], + max_size: usize, +) -> Result<Vec<u8>, DecompressError> { + decompress_to_vec_inner(input, inflate_flags::TINFL_FLAG_PARSE_ZLIB_HEADER, max_size) +} + +/// Backend of various to-[`Vec`] decompressions. +/// +/// Returns [`Vec`] of decompressed data on success and the [error struct][DecompressError] with details on failure. +#[cfg(feature = "with-alloc")] +fn decompress_to_vec_inner( + input: &[u8], + flags: u32, + max_output_size: usize, +) -> Result<Vec<u8>, DecompressError> { + let flags = flags | inflate_flags::TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + let mut ret: Vec<u8> = vec![0; input.len().saturating_mul(2).min(max_output_size)]; + + let mut decomp = Box::<DecompressorOxide>::default(); + + let mut in_pos = 0; + let mut out_pos = 0; + loop { + // Wrap the whole output slice so we know we have enough of the + // decompressed data for matches. + let (status, in_consumed, out_consumed) = + decompress(&mut decomp, &input[in_pos..], &mut ret, out_pos, flags); + in_pos += in_consumed; + out_pos += out_consumed; + + match status { + TINFLStatus::Done => { + ret.truncate(out_pos); + return Ok(ret); + } + + TINFLStatus::HasMoreOutput => { + // if the buffer has already reached the size limit, return an error + if ret.len() >= max_output_size { + return decompress_error(TINFLStatus::HasMoreOutput, ret); + } + // calculate the new length, capped at `max_output_size` + let new_len = ret.len().saturating_mul(2).min(max_output_size); + ret.resize(new_len, 0); + } + + _ => return decompress_error(status, ret), + } + } +} + +/// Decompress one or more source slices from an iterator into the output slice. +/// +/// * On success, returns the number of bytes that were written. +/// * On failure, returns the failure status code. +/// +/// This will fail if the output buffer is not large enough, but in that case +/// the output buffer will still contain the partial decompression. +/// +/// * `out` the output buffer. +/// * `it` the iterator of input slices. +/// * `zlib_header` if the first slice out of the iterator is expected to have a +/// Zlib header. Otherwise the slices are assumed to be the deflate data only. +/// * `ignore_adler32` if the adler32 checksum should be calculated or not. +pub fn decompress_slice_iter_to_slice<'out, 'inp>( + out: &'out mut [u8], + it: impl Iterator<Item = &'inp [u8]>, + zlib_header: bool, + ignore_adler32: bool, +) -> Result<usize, TINFLStatus> { + use self::core::inflate_flags::*; + + let mut it = it.peekable(); + let r = &mut DecompressorOxide::new(); + let mut out_pos = 0; + while let Some(in_buf) = it.next() { + let has_more = it.peek().is_some(); + let flags = { + let mut f = TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + if zlib_header { + f |= TINFL_FLAG_PARSE_ZLIB_HEADER; + } + if ignore_adler32 { + f |= TINFL_FLAG_IGNORE_ADLER32; + } + if has_more { + f |= TINFL_FLAG_HAS_MORE_INPUT; + } + f + }; + let (status, _input_read, bytes_written) = decompress(r, in_buf, out, out_pos, flags); + out_pos += bytes_written; + match status { + TINFLStatus::NeedsMoreInput => continue, + TINFLStatus::Done => return Ok(out_pos), + e => return Err(e), + } + } + // If we ran out of source slices without getting a `Done` from the + // decompression we can call it a failure. + Err(TINFLStatus::FailedCannotMakeProgress) +} + +#[cfg(test)] +mod test { + use super::{ + decompress_slice_iter_to_slice, decompress_to_vec_zlib, decompress_to_vec_zlib_with_limit, + DecompressError, TINFLStatus, + }; + const ENCODED: [u8; 20] = [ + 120, 156, 243, 72, 205, 201, 201, 215, 81, 168, 202, 201, 76, 82, 4, 0, 27, 101, 4, 19, + ]; + + #[test] + fn decompress_vec() { + let res = decompress_to_vec_zlib(&ENCODED[..]).unwrap(); + assert_eq!(res.as_slice(), &b"Hello, zlib!"[..]); + } + + #[test] + fn decompress_vec_with_high_limit() { + let res = decompress_to_vec_zlib_with_limit(&ENCODED[..], 100_000).unwrap(); + assert_eq!(res.as_slice(), &b"Hello, zlib!"[..]); + } + + #[test] + fn fail_to_decompress_with_limit() { + let res = decompress_to_vec_zlib_with_limit(&ENCODED[..], 8); + match res { + Err(DecompressError { + status: TINFLStatus::HasMoreOutput, + .. + }) => (), // expected result + _ => panic!("Decompression output size limit was not enforced"), + } + } + + #[test] + fn test_decompress_slice_iter_to_slice() { + // one slice + let mut out = [0_u8; 12_usize]; + let r = + decompress_slice_iter_to_slice(&mut out, Some(&ENCODED[..]).into_iter(), true, false); + assert_eq!(r, Ok(12)); + assert_eq!(&out[..12], &b"Hello, zlib!"[..]); + + // some chunks at a time + for chunk_size in 1..13 { + // Note: because of https://github.com/Frommi/miniz_oxide/issues/110 our + // out buffer needs to have +1 byte available when the chunk size cuts + // the adler32 data off from the last actual data. + let mut out = [0_u8; 12_usize + 1]; + let r = + decompress_slice_iter_to_slice(&mut out, ENCODED.chunks(chunk_size), true, false); + assert_eq!(r, Ok(12)); + assert_eq!(&out[..12], &b"Hello, zlib!"[..]); + } + + // output buffer too small + let mut out = [0_u8; 3_usize]; + let r = decompress_slice_iter_to_slice(&mut out, ENCODED.chunks(7), true, false); + assert!(r.is_err()); + } +} diff --git a/third_party/rust/miniz_oxide/src/inflate/output_buffer.rs b/third_party/rust/miniz_oxide/src/inflate/output_buffer.rs new file mode 100644 index 0000000000..5218a807d3 --- /dev/null +++ b/third_party/rust/miniz_oxide/src/inflate/output_buffer.rs @@ -0,0 +1,60 @@ +/// A wrapper for the output slice used when decompressing. +/// +/// Using this rather than `Cursor` lets us implement the writing methods directly on +/// the buffer and lets us use a usize rather than u64 for the position which helps with +/// performance on 32-bit systems. +pub struct OutputBuffer<'a> { + slice: &'a mut [u8], + position: usize, +} + +impl<'a> OutputBuffer<'a> { + #[inline] + pub fn from_slice_and_pos(slice: &'a mut [u8], position: usize) -> OutputBuffer<'a> { + OutputBuffer { slice, position } + } + + #[inline] + pub const fn position(&self) -> usize { + self.position + } + + #[inline] + pub fn set_position(&mut self, position: usize) { + self.position = position; + } + + /// Write a byte to the current position and increment + /// + /// Assumes that there is space. + #[inline] + pub fn write_byte(&mut self, byte: u8) { + self.slice[self.position] = byte; + self.position += 1; + } + + /// Write a slice to the current position and increment + /// + /// Assumes that there is space. + #[inline] + pub fn write_slice(&mut self, data: &[u8]) { + let len = data.len(); + self.slice[self.position..self.position + len].copy_from_slice(data); + self.position += data.len(); + } + + #[inline] + pub const fn bytes_left(&self) -> usize { + self.slice.len() - self.position + } + + #[inline] + pub const fn get_ref(&self) -> &[u8] { + self.slice + } + + #[inline] + pub fn get_mut(&mut self) -> &mut [u8] { + self.slice + } +} diff --git a/third_party/rust/miniz_oxide/src/inflate/stream.rs b/third_party/rust/miniz_oxide/src/inflate/stream.rs new file mode 100644 index 0000000000..ee681b67bb --- /dev/null +++ b/third_party/rust/miniz_oxide/src/inflate/stream.rs @@ -0,0 +1,418 @@ +//! Extra streaming decompression functionality. +//! +//! As of now this is mainly intended for use to build a higher-level wrapper. +#[cfg(feature = "with-alloc")] +use crate::alloc::boxed::Box; +use core::{cmp, mem}; + +use crate::inflate::core::{decompress, inflate_flags, DecompressorOxide, TINFL_LZ_DICT_SIZE}; +use crate::inflate::TINFLStatus; +use crate::{DataFormat, MZError, MZFlush, MZResult, MZStatus, StreamResult}; + +/// Tag that determines reset policy of [InflateState](struct.InflateState.html) +pub trait ResetPolicy { + /// Performs reset + fn reset(&self, state: &mut InflateState); +} + +/// Resets state, without performing expensive ops (e.g. zeroing buffer) +/// +/// Note that not zeroing buffer can lead to security issues when dealing with untrusted input. +pub struct MinReset; + +impl ResetPolicy for MinReset { + fn reset(&self, state: &mut InflateState) { + state.decompressor().init(); + state.dict_ofs = 0; + state.dict_avail = 0; + state.first_call = true; + state.has_flushed = false; + state.last_status = TINFLStatus::NeedsMoreInput; + } +} + +/// Resets state and zero memory, continuing to use the same data format. +pub struct ZeroReset; + +impl ResetPolicy for ZeroReset { + #[inline] + fn reset(&self, state: &mut InflateState) { + MinReset.reset(state); + state.dict = [0; TINFL_LZ_DICT_SIZE]; + } +} + +/// Full reset of the state, including zeroing memory. +/// +/// Requires to provide new data format. +pub struct FullReset(pub DataFormat); + +impl ResetPolicy for FullReset { + #[inline] + fn reset(&self, state: &mut InflateState) { + ZeroReset.reset(state); + state.data_format = self.0; + } +} + +/// A struct that compbines a decompressor with extra data for streaming decompression. +/// +pub struct InflateState { + /// Inner decompressor struct + decomp: DecompressorOxide, + + /// Buffer of input bytes for matches. + /// TODO: Could probably do this a bit cleaner with some + /// Cursor-like class. + /// We may also look into whether we need to keep a buffer here, or just one in the + /// decompressor struct. + dict: [u8; TINFL_LZ_DICT_SIZE], + /// Where in the buffer are we currently at? + dict_ofs: usize, + /// How many bytes of data to be flushed is there currently in the buffer? + dict_avail: usize, + + first_call: bool, + has_flushed: bool, + + /// Whether the input data is wrapped in a zlib header and checksum. + /// TODO: This should be stored in the decompressor. + data_format: DataFormat, + last_status: TINFLStatus, +} + +impl Default for InflateState { + fn default() -> Self { + InflateState { + decomp: DecompressorOxide::default(), + dict: [0; TINFL_LZ_DICT_SIZE], + dict_ofs: 0, + dict_avail: 0, + first_call: true, + has_flushed: false, + data_format: DataFormat::Raw, + last_status: TINFLStatus::NeedsMoreInput, + } + } +} +impl InflateState { + /// Create a new state. + /// + /// Note that this struct is quite large due to internal buffers, and as such storing it on + /// the stack is not recommended. + /// + /// # Parameters + /// `data_format`: Determines whether the compressed data is assumed to wrapped with zlib + /// metadata. + pub fn new(data_format: DataFormat) -> InflateState { + InflateState { + data_format, + ..Default::default() + } + } + + /// Create a new state on the heap. + /// + /// # Parameters + /// `data_format`: Determines whether the compressed data is assumed to wrapped with zlib + /// metadata. + #[cfg(feature = "with-alloc")] + pub fn new_boxed(data_format: DataFormat) -> Box<InflateState> { + let mut b: Box<InflateState> = Box::default(); + b.data_format = data_format; + b + } + + /// Access the innner decompressor. + pub fn decompressor(&mut self) -> &mut DecompressorOxide { + &mut self.decomp + } + + /// Return the status of the last call to `inflate` with this `InflateState`. + pub const fn last_status(&self) -> TINFLStatus { + self.last_status + } + + /// Create a new state using miniz/zlib style window bits parameter. + /// + /// The decompressor does not support different window sizes. As such, + /// any positive (>0) value will set the zlib header flag, while a negative one + /// will not. + #[cfg(feature = "with-alloc")] + pub fn new_boxed_with_window_bits(window_bits: i32) -> Box<InflateState> { + let mut b: Box<InflateState> = Box::default(); + b.data_format = DataFormat::from_window_bits(window_bits); + b + } + + #[inline] + /// Reset the decompressor without re-allocating memory, using the given + /// data format. + pub fn reset(&mut self, data_format: DataFormat) { + self.reset_as(FullReset(data_format)); + } + + #[inline] + /// Resets the state according to specified policy. + pub fn reset_as<T: ResetPolicy>(&mut self, policy: T) { + policy.reset(self) + } +} + +/// Try to decompress from `input` to `output` with the given [`InflateState`] +/// +/// # `flush` +/// +/// Generally, the various [`MZFlush`] flags have meaning only on the compression side. They can be +/// supplied here, but the only one that has any semantic meaning is [`MZFlush::Finish`], which is a +/// signal that the stream is expected to finish, and failing to do so is an error. It isn't +/// necessary to specify it when the stream ends; you'll still get returned a +/// [`MZStatus::StreamEnd`] anyway. Other values either have no effect or cause errors. It's +/// likely that you'll almost always just want to use [`MZFlush::None`]. +/// +/// # Errors +/// +/// Returns [`MZError::Buf`] if the size of the `output` slice is empty or no progress was made due +/// to lack of expected input data, or if called with [`MZFlush::Finish`] and input wasn't all +/// consumed. +/// +/// Returns [`MZError::Data`] if this or a a previous call failed with an error return from +/// [`TINFLStatus`]; probably indicates corrupted data. +/// +/// Returns [`MZError::Stream`] when called with [`MZFlush::Full`] (meaningless on +/// decompression), or when called without [`MZFlush::Finish`] after an earlier call with +/// [`MZFlush::Finish`] has been made. +pub fn inflate( + state: &mut InflateState, + input: &[u8], + output: &mut [u8], + flush: MZFlush, +) -> StreamResult { + let mut bytes_consumed = 0; + let mut bytes_written = 0; + let mut next_in = input; + let mut next_out = output; + + if flush == MZFlush::Full { + return StreamResult::error(MZError::Stream); + } + + let mut decomp_flags = if state.data_format == DataFormat::Zlib { + inflate_flags::TINFL_FLAG_COMPUTE_ADLER32 + } else { + inflate_flags::TINFL_FLAG_IGNORE_ADLER32 + }; + + if (state.data_format == DataFormat::Zlib) + | (state.data_format == DataFormat::ZLibIgnoreChecksum) + { + decomp_flags |= inflate_flags::TINFL_FLAG_PARSE_ZLIB_HEADER; + } + + let first_call = state.first_call; + state.first_call = false; + if (state.last_status as i32) < 0 { + return StreamResult::error(MZError::Data); + } + + if state.has_flushed && (flush != MZFlush::Finish) { + return StreamResult::error(MZError::Stream); + } + state.has_flushed |= flush == MZFlush::Finish; + + if (flush == MZFlush::Finish) && first_call { + decomp_flags |= inflate_flags::TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + + let status = decompress(&mut state.decomp, next_in, next_out, 0, decomp_flags); + let in_bytes = status.1; + let out_bytes = status.2; + let status = status.0; + + state.last_status = status; + + bytes_consumed += in_bytes; + bytes_written += out_bytes; + + let ret_status = { + if (status as i32) < 0 { + Err(MZError::Data) + } else if status != TINFLStatus::Done { + state.last_status = TINFLStatus::Failed; + Err(MZError::Buf) + } else { + Ok(MZStatus::StreamEnd) + } + }; + return StreamResult { + bytes_consumed, + bytes_written, + status: ret_status, + }; + } + + if flush != MZFlush::Finish { + decomp_flags |= inflate_flags::TINFL_FLAG_HAS_MORE_INPUT; + } + + if state.dict_avail != 0 { + bytes_written += push_dict_out(state, &mut next_out); + return StreamResult { + bytes_consumed, + bytes_written, + status: Ok( + if (state.last_status == TINFLStatus::Done) && (state.dict_avail == 0) { + MZStatus::StreamEnd + } else { + MZStatus::Ok + }, + ), + }; + } + + let status = inflate_loop( + state, + &mut next_in, + &mut next_out, + &mut bytes_consumed, + &mut bytes_written, + decomp_flags, + flush, + ); + StreamResult { + bytes_consumed, + bytes_written, + status, + } +} + +fn inflate_loop( + state: &mut InflateState, + next_in: &mut &[u8], + next_out: &mut &mut [u8], + total_in: &mut usize, + total_out: &mut usize, + decomp_flags: u32, + flush: MZFlush, +) -> MZResult { + let orig_in_len = next_in.len(); + loop { + let status = decompress( + &mut state.decomp, + *next_in, + &mut state.dict, + state.dict_ofs, + decomp_flags, + ); + + let in_bytes = status.1; + let out_bytes = status.2; + let status = status.0; + + state.last_status = status; + + *next_in = &next_in[in_bytes..]; + *total_in += in_bytes; + + state.dict_avail = out_bytes; + *total_out += push_dict_out(state, next_out); + + // The stream was corrupted, and decompression failed. + if (status as i32) < 0 { + return Err(MZError::Data); + } + + // The decompressor has flushed all it's data and is waiting for more input, but + // there was no more input provided. + if (status == TINFLStatus::NeedsMoreInput) && orig_in_len == 0 { + return Err(MZError::Buf); + } + + if flush == MZFlush::Finish { + if status == TINFLStatus::Done { + // There is not enough space in the output buffer to flush the remaining + // decompressed data in the internal buffer. + return if state.dict_avail != 0 { + Err(MZError::Buf) + } else { + Ok(MZStatus::StreamEnd) + }; + // No more space in the output buffer, but we're not done. + } else if next_out.is_empty() { + return Err(MZError::Buf); + } + } else { + // We're not expected to finish, so it's fine if we can't flush everything yet. + let empty_buf = next_in.is_empty() || next_out.is_empty(); + if (status == TINFLStatus::Done) || empty_buf || (state.dict_avail != 0) { + return if (status == TINFLStatus::Done) && (state.dict_avail == 0) { + // No more data left, we're done. + Ok(MZStatus::StreamEnd) + } else { + // Ok for now, still waiting for more input data or output space. + Ok(MZStatus::Ok) + }; + } + } + } +} + +fn push_dict_out(state: &mut InflateState, next_out: &mut &mut [u8]) -> usize { + let n = cmp::min(state.dict_avail as usize, next_out.len()); + (next_out[..n]).copy_from_slice(&state.dict[state.dict_ofs..state.dict_ofs + n]); + *next_out = &mut mem::take(next_out)[n..]; + state.dict_avail -= n; + state.dict_ofs = (state.dict_ofs + (n)) & (TINFL_LZ_DICT_SIZE - 1); + n +} + +#[cfg(test)] +mod test { + use super::{inflate, InflateState}; + use crate::{DataFormat, MZFlush, MZStatus}; + use alloc::vec; + + #[test] + fn test_state() { + let encoded = [ + 120u8, 156, 243, 72, 205, 201, 201, 215, 81, 168, 202, 201, 76, 82, 4, 0, 27, 101, 4, + 19, + ]; + let mut out = vec![0; 50]; + let mut state = InflateState::new_boxed(DataFormat::Zlib); + let res = inflate(&mut state, &encoded, &mut out, MZFlush::Finish); + let status = res.status.expect("Failed to decompress!"); + assert_eq!(status, MZStatus::StreamEnd); + assert_eq!(out[..res.bytes_written as usize], b"Hello, zlib!"[..]); + assert_eq!(res.bytes_consumed, encoded.len()); + + state.reset_as(super::ZeroReset); + out.iter_mut().map(|x| *x = 0).count(); + let res = inflate(&mut state, &encoded, &mut out, MZFlush::Finish); + let status = res.status.expect("Failed to decompress!"); + assert_eq!(status, MZStatus::StreamEnd); + assert_eq!(out[..res.bytes_written as usize], b"Hello, zlib!"[..]); + assert_eq!(res.bytes_consumed, encoded.len()); + + state.reset_as(super::MinReset); + out.iter_mut().map(|x| *x = 0).count(); + let res = inflate(&mut state, &encoded, &mut out, MZFlush::Finish); + let status = res.status.expect("Failed to decompress!"); + assert_eq!(status, MZStatus::StreamEnd); + assert_eq!(out[..res.bytes_written as usize], b"Hello, zlib!"[..]); + assert_eq!(res.bytes_consumed, encoded.len()); + assert_eq!(state.decompressor().adler32(), Some(459605011)); + + // Test state when not computing adler. + state = InflateState::new_boxed(DataFormat::ZLibIgnoreChecksum); + out.iter_mut().map(|x| *x = 0).count(); + let res = inflate(&mut state, &encoded, &mut out, MZFlush::Finish); + let status = res.status.expect("Failed to decompress!"); + assert_eq!(status, MZStatus::StreamEnd); + assert_eq!(out[..res.bytes_written as usize], b"Hello, zlib!"[..]); + assert_eq!(res.bytes_consumed, encoded.len()); + // Not computed, so should be Some(1) + assert_eq!(state.decompressor().adler32(), Some(1)); + // Should still have the checksum read from the header file. + assert_eq!(state.decompressor().adler32_header(), Some(459605011)) + } +} |