diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 17:39:49 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 17:39:49 +0000 |
commit | a0aa2307322cd47bbf416810ac0292925e03be87 (patch) | |
tree | 37076262a026c4b48c8a0e84f44ff9187556ca35 /rust/vendor/lzma-rs | |
parent | Initial commit. (diff) | |
download | suricata-a0aa2307322cd47bbf416810ac0292925e03be87.tar.xz suricata-a0aa2307322cd47bbf416810ac0292925e03be87.zip |
Adding upstream version 1:7.0.3.upstream/1%7.0.3
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'rust/vendor/lzma-rs')
27 files changed, 3838 insertions, 0 deletions
diff --git a/rust/vendor/lzma-rs/.cargo-checksum.json b/rust/vendor/lzma-rs/.cargo-checksum.json new file mode 100644 index 0000000..1fb82aa --- /dev/null +++ b/rust/vendor/lzma-rs/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"CHANGELOG.md":"fa7c2c68d94f47a8bc484bd6732cd823887876a526706cee07afa47930001e92","Cargo.toml":"b8ff60ec00c24deb0667b098ab3e3a6fd3a2796b0cdc3db33aef55b3cdcfef99","LICENSE":"ad9f07faa5f6f671df93523293cf8c1ce24f14886fa101013c21536c7890c59b","README.md":"11eaa4b5ec2069779e84435e237a172e5afed84446a4f7b08003bb8cbe3a3136","src/decode/lzbuffer.rs":"dad942bac27d9be51263e49e826a364c20d975c2634e4415fce800bcd8fda1fc","src/decode/lzma.rs":"6b04ba5606f1f60124cbb40778e3ddf5a1b85b1b208f5a60839181a2dba9effd","src/decode/lzma2.rs":"a1b3552167631bdff6d46990218ad358852907ae2de7ab70f21ec62fce806d94","src/decode/mod.rs":"342a1f0465270ceb0dcc0ba5132ce93833cbfab9b4e4c86acbdfa4a74be6ce9e","src/decode/options.rs":"36302877616cf978741b8c72aa6e80e2d99c6dedf7ee5d2e9e208422d57283c5","src/decode/rangecoder.rs":"10c1c635ef507f6136923d37bb09ac2513d4621a8e8304d9c771cc8ec20856f8","src/decode/stream.rs":"5f270757ddbfa80dc55dd1a93496e6576a88b36536bd0cbbc8406522a314e69b","src/decode/util.rs":"2c15bf1dea97605067fe56d6b528888c789a245ee5b9122b495f2b21d96d6a73","src/decode/xz.rs":"e82ea066516f257f6c3cfdc26509472954e70ba0ac33dbe9d8519f664763d668","src/encode/dumbencoder.rs":"ccc3866c2b47e9a6ffb10357394d0116fd35637b35de6aa9070183fe65e62d7a","src/encode/lzma2.rs":"5f2b5adf05a7c9cb44e49aefad8598774dab7dfec61bcaec4e92c372705187ad","src/encode/mod.rs":"a28bd02c8a14416fea301dff1614e77c21a729e7794f2b90ab44c4228c68ef61","src/encode/options.rs":"f4f88d4b0141041dc7814184cafcf8c7c28468fc7863d96bbb769ffc56c0abb6","src/encode/rangecoder.rs":"a22e1bd2d82995e905aee4feb0f203ab803717a2661a03c5a8d5e00d3697eec9","src/encode/util.rs":"7b824fded80c437ae2ff1040578d570615a06b5fad8255f122fef9788326da53","src/encode/xz.rs":"fc199a3952d2bb02d98272916aea4ba5fde77364f893773b9a89b1c0bae9d7d4","src/error.rs":"228ced7366cbcfad5531753e30777569e940b4026b8765991c5e6a668c9e0f60","src/lib.rs":"fef04d2910c4fd8b0799a80ce8fed0e497f4498702217f1aca6a2ffbd1e73f82","src/macros.rs":"57223ee9a2240b883824a42eac300b1d91eeb36001d0cb857bb949499ee87711","src/xz/footer.rs":"3ae9f1e888f43aba9ba4d683976ac3cdc18bdd1bf6e745e29bb5a471dead37fc","src/xz/header.rs":"c4db40ba9a65c98fb68cbfabdd0dc9a7c395f8b22dc4b4d58b520011653a2d18","src/xz/mod.rs":"6beb33c72083be8399ab51da02af477f6bb9eaee54ac2aa8a50bfb385ae4d1e5"},"package":"aba8ecb0450dfabce4ad72085eed0a75dffe8f21f7ada05638564ea9db2d7fb1"}
\ No newline at end of file diff --git a/rust/vendor/lzma-rs/CHANGELOG.md b/rust/vendor/lzma-rs/CHANGELOG.md new file mode 100644 index 0000000..969d055 --- /dev/null +++ b/rust/vendor/lzma-rs/CHANGELOG.md @@ -0,0 +1,51 @@ +## 0.2.0 + +- Update minimum supported Rust version: 1.32.0 -> 1.40.0. +- Update dependencies: + - `log`: ^0.4.8 -> ^0.4.14 + - `env_logger`: 0.7.1 -> ^0.8.3 +- [Breaking change] Rename acronyms to be lowercase, following clippy::upper-case-acronyms. +- [Breaking change] Add a memory limit option (https://github.com/gendx/lzma-rs/pull/50). +- Fix bug in LZMA2 decompression (https://github.com/gendx/lzma-rs/pull/61). +- Fix bug in CRC32 validation (https://github.com/gendx/lzma-rs/pull/56). +- Add a streaming mode for LZMA decompression, gated by the `stream` feature. +- Add more fuzzing targets, including comparison with the `xz2` crate. +- Various improvements: benchmarks, fix lint warnings. +- Migrate from Travis-CI to GitHub Actions. + +## 0.1.3 + +- Minimum supported Rust version: 1.32.0. +- Update dependencies: + - `log`: ^0.4.0 -> ^0.4.8 + - `env_logger`: 0.6.0 -> ^0.7.1 +- Gate logging behind an opt-in feature. This improves decoding performance by + ~25% (https://github.com/gendx/lzma-rs/pull/31). +- Lazily allocate the circular buffer (https://github.com/gendx/lzma-rs/pull/22). + This improves memory usage (especially for WebAssembly targets) at the expense + of a ~5% performance regression (https://github.com/gendx/lzma-rs/issues/27). +- Return an error instead of panicking on unsupported SHA-256 checksum for XZ + decoding (https://github.com/gendx/lzma-rs/pull/40). +- Add Clippy to CI. +- Document public APIs. +- Deny missing docs, missing Debug implementations and build warnings. +- Forbid unsafe code. +- Remove extern statements that are unnecessary on the 2018 edition. + +## 0.1.2 + +- Fix bug in the range coder (https://github.com/gendx/lzma-rs/issues/15). +- Add support for specifying the unpacked size outside of the header + (https://github.com/gendx/lzma-rs/pull/17). +- Migrate to Rust 2018 edition. +- Add benchmarks. +- Fix some Clippy warnings. + +## 0.1.1 + +- Upgrade `env_logger` dependency. +- Refactoring to use `std::io::Take`, operator `?`. + +## 0.1.0 + +- Initial release. diff --git a/rust/vendor/lzma-rs/Cargo.toml b/rust/vendor/lzma-rs/Cargo.toml new file mode 100644 index 0000000..e66b746 --- /dev/null +++ b/rust/vendor/lzma-rs/Cargo.toml @@ -0,0 +1,43 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies +# +# If you believe there's an error in this file please file an +# issue against the rust-lang/cargo repository. If you're +# editing this file be aware that the upstream Cargo.toml +# will likely look very different (and much more reasonable) + +[package] +edition = "2018" +name = "lzma-rs" +version = "0.2.0" +authors = ["Guillaume Endignoux <ggendx@gmail.com>"] +exclude = ["tests/*", "benches/*", "fuzz/*", ".github/*", "Cargo.lock"] +description = "A codec for LZMA, LZMA2 and XZ written in pure Rust" +readme = "README.md" +keywords = ["lzma", "compression", "decompression"] +categories = ["compression"] +license = "MIT" +repository = "https://github.com/gendx/lzma-rs" +[dependencies.byteorder] +version = "^1.0.0" + +[dependencies.crc] +version = "^1.0.0" + +[dependencies.env_logger] +version = "^0.8.3" +optional = true + +[dependencies.log] +version = "^0.4.14" +optional = true +[dev-dependencies.rust-lzma] +version = "0.5" + +[features] +enable_logging = ["env_logger", "log"] +stream = [] diff --git a/rust/vendor/lzma-rs/LICENSE b/rust/vendor/lzma-rs/LICENSE new file mode 100644 index 0000000..ccf054e --- /dev/null +++ b/rust/vendor/lzma-rs/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017 - 2018 Guillaume Endignoux + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/rust/vendor/lzma-rs/README.md b/rust/vendor/lzma-rs/README.md new file mode 100644 index 0000000..2c26908 --- /dev/null +++ b/rust/vendor/lzma-rs/README.md @@ -0,0 +1,38 @@ +# lzma-rs + +[![Crate](https://img.shields.io/crates/v/lzma-rs.svg)](https://crates.io/crates/lzma-rs) +[![Documentation](https://docs.rs/lzma-rs/badge.svg)](https://docs.rs/lzma-rs) +[![Safety Dance](https://img.shields.io/badge/unsafe-forbidden-success.svg)](https://github.com/rust-secure-code/safety-dance/) +![Build Status](https://github.com/gendx/lzma-rs/workflows/Build%20and%20run%20tests/badge.svg) +[![Minimum rust 1.40](https://img.shields.io/badge/rust-1.40%2B-orange.svg)](https://github.com/rust-lang/rust/blob/master/RELEASES.md#version-1400-2019-12-19) + +This project is a decoder for LZMA and its variants written in pure Rust, with focus on clarity. +It already supports LZMA, LZMA2 and a subset of the `.xz` file format. + +## Usage + +Decompress a `.xz` file. + +```rust +let filename = "foo.xz"; +let mut f = std::io::BufReader::new(std::fs::File::open(filename).unwrap()); +// "decomp" can be anything that implements "std::io::Write" +let mut decomp: Vec<u8> = Vec::new(); +lzma_rs::xz_decompress(&mut f, &mut decomp).unwrap(); +// Decompressed content is now in "decomp" +``` + +## Encoder + +For now, there is also a dumb encoder that only uses byte literals, with many hard-coded constants for code simplicity. +Better encoders are welcome! + +## Contributing + +Pull-requests are welcome, to improve the decoder, add better encoders, or more tests. +Ultimately, this project should also implement .xz and .7z files. + +## License + +MIT + diff --git a/rust/vendor/lzma-rs/src/decode/lzbuffer.rs b/rust/vendor/lzma-rs/src/decode/lzbuffer.rs new file mode 100644 index 0000000..9409424 --- /dev/null +++ b/rust/vendor/lzma-rs/src/decode/lzbuffer.rs @@ -0,0 +1,314 @@ +use crate::error; +use std::io; + +pub trait LzBuffer<W> +where + W: io::Write, +{ + fn len(&self) -> usize; + // Retrieve the last byte or return a default + fn last_or(&self, lit: u8) -> u8; + // Retrieve the n-th last byte + fn last_n(&self, dist: usize) -> error::Result<u8>; + // Append a literal + fn append_literal(&mut self, lit: u8) -> error::Result<()>; + // Fetch an LZ sequence (length, distance) from inside the buffer + fn append_lz(&mut self, len: usize, dist: usize) -> error::Result<()>; + // Get a reference to the output sink + fn get_output(&self) -> &W; + // Get a mutable reference to the output sink + fn get_output_mut(&mut self) -> &mut W; + // Consumes this buffer and flushes any data + fn finish(self) -> io::Result<W>; + // Consumes this buffer without flushing any data + fn into_output(self) -> W; +} + +// An accumulating buffer for LZ sequences +pub struct LzAccumBuffer<W> +where + W: io::Write, +{ + stream: W, // Output sink + buf: Vec<u8>, // Buffer + memlimit: usize, // Buffer memory limit + len: usize, // Total number of bytes sent through the buffer +} + +impl<W> LzAccumBuffer<W> +where + W: io::Write, +{ + pub fn from_stream(stream: W) -> Self { + Self::from_stream_with_memlimit(stream, std::usize::MAX) + } + + pub fn from_stream_with_memlimit(stream: W, memlimit: usize) -> Self { + Self { + stream, + buf: Vec::new(), + memlimit, + len: 0, + } + } + + // Append bytes + pub fn append_bytes(&mut self, buf: &[u8]) { + self.buf.extend_from_slice(buf); + self.len += buf.len(); + } + + // Reset the internal dictionary + pub fn reset(&mut self) -> io::Result<()> { + self.stream.write_all(self.buf.as_slice())?; + self.buf.clear(); + self.len = 0; + Ok(()) + } +} + +impl<W> LzBuffer<W> for LzAccumBuffer<W> +where + W: io::Write, +{ + fn len(&self) -> usize { + self.len + } + + // Retrieve the last byte or return a default + fn last_or(&self, lit: u8) -> u8 { + let buf_len = self.buf.len(); + if buf_len == 0 { + lit + } else { + self.buf[buf_len - 1] + } + } + + // Retrieve the n-th last byte + fn last_n(&self, dist: usize) -> error::Result<u8> { + let buf_len = self.buf.len(); + if dist > buf_len { + return Err(error::Error::LzmaError(format!( + "Match distance {} is beyond output size {}", + dist, buf_len + ))); + } + + Ok(self.buf[buf_len - dist]) + } + + // Append a literal + fn append_literal(&mut self, lit: u8) -> error::Result<()> { + let new_len = self.len + 1; + + if new_len > self.memlimit { + Err(error::Error::LzmaError(format!( + "exceeded memory limit of {}", + self.memlimit + ))) + } else { + self.buf.push(lit); + self.len = new_len; + Ok(()) + } + } + + // Fetch an LZ sequence (length, distance) from inside the buffer + fn append_lz(&mut self, len: usize, dist: usize) -> error::Result<()> { + lzma_debug!("LZ {{ len: {}, dist: {} }}", len, dist); + let buf_len = self.buf.len(); + if dist > buf_len { + return Err(error::Error::LzmaError(format!( + "LZ distance {} is beyond output size {}", + dist, buf_len + ))); + } + + let mut offset = buf_len - dist; + for _ in 0..len { + let x = self.buf[offset]; + self.buf.push(x); + offset += 1; + } + self.len += len; + Ok(()) + } + + // Get a reference to the output sink + fn get_output(&self) -> &W { + &self.stream + } + + // Get a mutable reference to the output sink + fn get_output_mut(&mut self) -> &mut W { + &mut self.stream + } + + // Consumes this buffer and flushes any data + fn finish(mut self) -> io::Result<W> { + self.stream.write_all(self.buf.as_slice())?; + self.stream.flush()?; + Ok(self.stream) + } + + // Consumes this buffer without flushing any data + fn into_output(self) -> W { + self.stream + } +} + +// A circular buffer for LZ sequences +pub struct LzCircularBuffer<W> +where + W: io::Write, +{ + stream: W, // Output sink + buf: Vec<u8>, // Circular buffer + dict_size: usize, // Length of the buffer + memlimit: usize, // Buffer memory limit + cursor: usize, // Current position + len: usize, // Total number of bytes sent through the buffer +} + +impl<W> LzCircularBuffer<W> +where + W: io::Write, +{ + pub fn from_stream_with_memlimit(stream: W, dict_size: usize, memlimit: usize) -> Self { + lzma_info!("Dict size in LZ buffer: {}", dict_size); + Self { + stream, + buf: Vec::new(), + dict_size, + memlimit, + cursor: 0, + len: 0, + } + } + + fn get(&self, index: usize) -> u8 { + *self.buf.get(index).unwrap_or(&0) + } + + fn set(&mut self, index: usize, value: u8) -> error::Result<()> { + let new_len = index + 1; + + if self.buf.len() < new_len { + if new_len <= self.memlimit { + self.buf.resize(new_len, 0); + } else { + return Err(error::Error::LzmaError(format!( + "exceeded memory limit of {}", + self.memlimit + ))); + } + } + self.buf[index] = value; + Ok(()) + } +} + +impl<W> LzBuffer<W> for LzCircularBuffer<W> +where + W: io::Write, +{ + fn len(&self) -> usize { + self.len + } + + // Retrieve the last byte or return a default + fn last_or(&self, lit: u8) -> u8 { + if self.len == 0 { + lit + } else { + self.get((self.dict_size + self.cursor - 1) % self.dict_size) + } + } + + // Retrieve the n-th last byte + fn last_n(&self, dist: usize) -> error::Result<u8> { + if dist > self.dict_size { + return Err(error::Error::LzmaError(format!( + "Match distance {} is beyond dictionary size {}", + dist, self.dict_size + ))); + } + if dist > self.len { + return Err(error::Error::LzmaError(format!( + "Match distance {} is beyond output size {}", + dist, self.len + ))); + } + + let offset = (self.dict_size + self.cursor - dist) % self.dict_size; + Ok(self.get(offset)) + } + + // Append a literal + fn append_literal(&mut self, lit: u8) -> error::Result<()> { + self.set(self.cursor, lit)?; + self.cursor += 1; + self.len += 1; + + // Flush the circular buffer to the output + if self.cursor == self.dict_size { + self.stream.write_all(self.buf.as_slice())?; + self.cursor = 0; + } + + Ok(()) + } + + // Fetch an LZ sequence (length, distance) from inside the buffer + fn append_lz(&mut self, len: usize, dist: usize) -> error::Result<()> { + lzma_debug!("LZ {{ len: {}, dist: {} }}", len, dist); + if dist > self.dict_size { + return Err(error::Error::LzmaError(format!( + "LZ distance {} is beyond dictionary size {}", + dist, self.dict_size + ))); + } + if dist > self.len { + return Err(error::Error::LzmaError(format!( + "LZ distance {} is beyond output size {}", + dist, self.len + ))); + } + + let mut offset = (self.dict_size + self.cursor - dist) % self.dict_size; + for _ in 0..len { + let x = self.get(offset); + self.append_literal(x)?; + offset += 1; + if offset == self.dict_size { + offset = 0 + } + } + Ok(()) + } + + // Get a reference to the output sink + fn get_output(&self) -> &W { + &self.stream + } + + // Get a mutable reference to the output sink + fn get_output_mut(&mut self) -> &mut W { + &mut self.stream + } + + // Consumes this buffer and flushes any data + fn finish(mut self) -> io::Result<W> { + if self.cursor > 0 { + self.stream.write_all(&self.buf[0..self.cursor])?; + self.stream.flush()?; + } + Ok(self.stream) + } + + // Consumes this buffer without flushing any data + fn into_output(self) -> W { + self.stream + } +} diff --git a/rust/vendor/lzma-rs/src/decode/lzma.rs b/rust/vendor/lzma-rs/src/decode/lzma.rs new file mode 100644 index 0000000..313bdbc --- /dev/null +++ b/rust/vendor/lzma-rs/src/decode/lzma.rs @@ -0,0 +1,599 @@ +use crate::decode::lzbuffer; +use crate::decode::rangecoder; +use crate::error; +use byteorder::{LittleEndian, ReadBytesExt}; +use std::io; +use std::marker::PhantomData; + +use crate::decompress::Options; +use crate::decompress::UnpackedSize; + +/// Maximum input data that can be processed in one iteration. +/// Libhtp uses the following equation to define the maximum number of bits +/// for the worst case scenario: +/// log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160 +const MAX_REQUIRED_INPUT: usize = 20; + +/// Processing mode for decompression. +/// +/// Tells the decompressor if we should expect more data after parsing the +/// current input. +#[derive(Debug, PartialEq)] +enum ProcessingMode { + /// Streaming mode. Process the input bytes but assume there will be more + /// chunks of input data to receive in future calls to `process_mode()`. + Partial, + /// Synchronous mode. Process the input bytes and confirm end of stream has been reached. + /// Use this mode if you are processing a fixed buffer of compressed data, or after + /// using `Mode::Partial` to check for the end of stream. + Finish, +} + +/// Result of the next iteration of processing. +/// +/// Indicates whether processing should continue or is finished. +#[derive(Debug, PartialEq)] +enum ProcessingStatus { + Continue, + Finished, +} + +pub struct LzmaParams { + // most lc significant bits of previous byte are part of the literal context + lc: u32, // 0..8 + lp: u32, // 0..4 + // context for literal/match is plaintext offset modulo 2^pb + pb: u32, // 0..4 + dict_size: u32, + unpacked_size: Option<u64>, +} + +impl LzmaParams { + pub fn read_header<R>(input: &mut R, options: &Options) -> error::Result<LzmaParams> + where + R: io::BufRead, + { + // Properties + let props = input.read_u8().map_err(error::Error::HeaderTooShort)?; + + let mut pb = props as u32; + if pb >= 225 { + return Err(error::Error::LzmaError(format!( + "LZMA header invalid properties: {} must be < 225", + pb + ))); + } + + let lc: u32 = pb % 9; + pb /= 9; + let lp: u32 = pb % 5; + pb /= 5; + + lzma_info!("Properties {{ lc: {}, lp: {}, pb: {} }}", lc, lp, pb); + + // Dictionary + let dict_size_provided = input + .read_u32::<LittleEndian>() + .map_err(error::Error::HeaderTooShort)?; + let dict_size = if dict_size_provided < 0x1000 { + 0x1000 + } else { + dict_size_provided + }; + + lzma_info!("Dict size: {}", dict_size); + + // Unpacked size + let unpacked_size: Option<u64> = match options.unpacked_size { + UnpackedSize::ReadFromHeader => { + let unpacked_size_provided = input + .read_u64::<LittleEndian>() + .map_err(error::Error::HeaderTooShort)?; + let marker_mandatory: bool = unpacked_size_provided == 0xFFFF_FFFF_FFFF_FFFF; + if marker_mandatory { + None + } else { + Some(unpacked_size_provided) + } + } + UnpackedSize::ReadHeaderButUseProvided(x) => { + input + .read_u64::<LittleEndian>() + .map_err(error::Error::HeaderTooShort)?; + x + } + UnpackedSize::UseProvided(x) => x, + }; + + lzma_info!("Unpacked size: {:?}", unpacked_size); + + let params = LzmaParams { + lc, + lp, + pb, + dict_size, + unpacked_size, + }; + + Ok(params) + } +} + +pub struct DecoderState<W, LZB> +where + W: io::Write, + LZB: lzbuffer::LzBuffer<W>, +{ + _phantom: PhantomData<W>, + // Buffer input data here if we need more for decompression. Up to + // MAX_REQUIRED_INPUT bytes can be consumed during one iteration. + partial_input_buf: std::io::Cursor<[u8; MAX_REQUIRED_INPUT]>, + pub output: LZB, + // most lc significant bits of previous byte are part of the literal context + pub lc: u32, // 0..8 + pub lp: u32, // 0..4 + // context for literal/match is plaintext offset modulo 2^pb + pub pb: u32, // 0..4 + unpacked_size: Option<u64>, + literal_probs: Vec<Vec<u16>>, + pos_slot_decoder: Vec<rangecoder::BitTree>, + align_decoder: rangecoder::BitTree, + pos_decoders: [u16; 115], + is_match: [u16; 192], // true = LZ, false = literal + is_rep: [u16; 12], + is_rep_g0: [u16; 12], + is_rep_g1: [u16; 12], + is_rep_g2: [u16; 12], + is_rep_0long: [u16; 192], + state: usize, + rep: [usize; 4], + len_decoder: rangecoder::LenDecoder, + rep_len_decoder: rangecoder::LenDecoder, +} + +// Initialize decoder with accumulating buffer +pub fn new_accum<W>( + output: lzbuffer::LzAccumBuffer<W>, + lc: u32, + lp: u32, + pb: u32, + unpacked_size: Option<u64>, +) -> DecoderState<W, lzbuffer::LzAccumBuffer<W>> +where + W: io::Write, +{ + DecoderState { + _phantom: PhantomData, + partial_input_buf: std::io::Cursor::new([0; MAX_REQUIRED_INPUT]), + output, + lc, + lp, + pb, + unpacked_size, + literal_probs: vec![vec![0x400; 0x300]; 1 << (lc + lp)], + pos_slot_decoder: vec![rangecoder::BitTree::new(6); 4], + align_decoder: rangecoder::BitTree::new(4), + pos_decoders: [0x400; 115], + is_match: [0x400; 192], + is_rep: [0x400; 12], + is_rep_g0: [0x400; 12], + is_rep_g1: [0x400; 12], + is_rep_g2: [0x400; 12], + is_rep_0long: [0x400; 192], + state: 0, + rep: [0; 4], + len_decoder: rangecoder::LenDecoder::new(), + rep_len_decoder: rangecoder::LenDecoder::new(), + } +} + +// Initialize decoder with circular buffer +pub fn new_circular<W>( + output: W, + params: LzmaParams, +) -> error::Result<DecoderState<W, lzbuffer::LzCircularBuffer<W>>> +where + W: io::Write, +{ + new_circular_with_memlimit(output, params, std::usize::MAX) +} + +// Initialize decoder with circular buffer +pub fn new_circular_with_memlimit<W>( + output: W, + params: LzmaParams, + memlimit: usize, +) -> error::Result<DecoderState<W, lzbuffer::LzCircularBuffer<W>>> +where + W: io::Write, +{ + // Decoder + let decoder = DecoderState { + _phantom: PhantomData, + output: lzbuffer::LzCircularBuffer::from_stream_with_memlimit( + output, + params.dict_size as usize, + memlimit, + ), + partial_input_buf: std::io::Cursor::new([0; MAX_REQUIRED_INPUT]), + lc: params.lc, + lp: params.lp, + pb: params.pb, + unpacked_size: params.unpacked_size, + literal_probs: vec![vec![0x400; 0x300]; 1 << (params.lc + params.lp)], + pos_slot_decoder: vec![rangecoder::BitTree::new(6); 4], + align_decoder: rangecoder::BitTree::new(4), + pos_decoders: [0x400; 115], + is_match: [0x400; 192], + is_rep: [0x400; 12], + is_rep_g0: [0x400; 12], + is_rep_g1: [0x400; 12], + is_rep_g2: [0x400; 12], + is_rep_0long: [0x400; 192], + state: 0, + rep: [0; 4], + len_decoder: rangecoder::LenDecoder::new(), + rep_len_decoder: rangecoder::LenDecoder::new(), + }; + + Ok(decoder) +} + +impl<W, LZB> DecoderState<W, LZB> +where + W: io::Write, + LZB: lzbuffer::LzBuffer<W>, +{ + pub fn reset_state(&mut self, lc: u32, lp: u32, pb: u32) { + self.lc = lc; + self.lp = lp; + self.pb = pb; + self.literal_probs = vec![vec![0x400; 0x300]; 1 << (lc + lp)]; + self.pos_slot_decoder = vec![rangecoder::BitTree::new(6); 4]; + self.align_decoder = rangecoder::BitTree::new(4); + self.pos_decoders = [0x400; 115]; + self.is_match = [0x400; 192]; + self.is_rep = [0x400; 12]; + self.is_rep_g0 = [0x400; 12]; + self.is_rep_g1 = [0x400; 12]; + self.is_rep_g2 = [0x400; 12]; + self.is_rep_0long = [0x400; 192]; + self.state = 0; + self.rep = [0; 4]; + self.len_decoder = rangecoder::LenDecoder::new(); + self.rep_len_decoder = rangecoder::LenDecoder::new(); + } + + pub fn set_unpacked_size(&mut self, unpacked_size: Option<u64>) { + self.unpacked_size = unpacked_size; + } + + pub fn process<'a, R: io::BufRead>( + &mut self, + rangecoder: &mut rangecoder::RangeDecoder<'a, R>, + ) -> error::Result<()> { + self.process_mode(rangecoder, ProcessingMode::Finish) + } + + #[cfg(feature = "stream")] + pub fn process_stream<'a, R: io::BufRead>( + &mut self, + rangecoder: &mut rangecoder::RangeDecoder<'a, R>, + ) -> error::Result<()> { + self.process_mode(rangecoder, ProcessingMode::Partial) + } + + /// Process the next iteration of the loop. + /// + /// If the update flag is true, the decoder's state will be updated. + /// + /// Returns `ProcessingStatus` to determine whether one should continue + /// processing the loop. + fn process_next_inner<'a, R: io::BufRead>( + &mut self, + rangecoder: &mut rangecoder::RangeDecoder<'a, R>, + update: bool, + ) -> error::Result<ProcessingStatus> { + let pos_state = self.output.len() & ((1 << self.pb) - 1); + + // Literal + if !rangecoder.decode_bit( + // TODO: assumes pb = 2 ?? + &mut self.is_match[(self.state << 4) + pos_state], + update, + )? { + let byte: u8 = self.decode_literal(rangecoder, update)?; + + if update { + lzma_debug!("Literal: {}", byte); + self.output.append_literal(byte)?; + + self.state = if self.state < 4 { + 0 + } else if self.state < 10 { + self.state - 3 + } else { + self.state - 6 + }; + } + return Ok(ProcessingStatus::Continue); + } + + // LZ + let mut len: usize; + // Distance is repeated from LRU + if rangecoder.decode_bit(&mut self.is_rep[self.state], update)? { + // dist = rep[0] + if !rangecoder.decode_bit(&mut self.is_rep_g0[self.state], update)? { + // len = 1 + if !rangecoder.decode_bit( + &mut self.is_rep_0long[(self.state << 4) + pos_state], + update, + )? { + // update state (short rep) + if update { + self.state = if self.state < 7 { 9 } else { 11 }; + let dist = self.rep[0] + 1; + self.output.append_lz(1, dist)?; + } + return Ok(ProcessingStatus::Continue); + } + // dist = rep[i] + } else { + let idx: usize; + if !rangecoder.decode_bit(&mut self.is_rep_g1[self.state], update)? { + idx = 1; + } else if !rangecoder.decode_bit(&mut self.is_rep_g2[self.state], update)? { + idx = 2; + } else { + idx = 3; + } + if update { + // Update LRU + let dist = self.rep[idx]; + for i in (0..idx).rev() { + self.rep[i + 1] = self.rep[i]; + } + self.rep[0] = dist + } + } + + len = self.rep_len_decoder.decode(rangecoder, pos_state, update)?; + + if update { + // update state (rep) + self.state = if self.state < 7 { 8 } else { 11 }; + } + // New distance + } else { + if update { + // Update LRU + self.rep[3] = self.rep[2]; + self.rep[2] = self.rep[1]; + self.rep[1] = self.rep[0]; + } + + len = self.len_decoder.decode(rangecoder, pos_state, update)?; + + if update { + // update state (match) + self.state = if self.state < 7 { 7 } else { 10 }; + } + + let rep_0 = self.decode_distance(rangecoder, len, update)?; + + if update { + self.rep[0] = rep_0; + if self.rep[0] == 0xFFFF_FFFF { + if rangecoder.is_finished_ok()? { + return Ok(ProcessingStatus::Finished); + } + return Err(error::Error::LzmaError(String::from( + "Found end-of-stream marker but more bytes are available", + ))); + } + } + } + + if update { + len += 2; + + let dist = self.rep[0] + 1; + self.output.append_lz(len, dist)?; + } + + Ok(ProcessingStatus::Continue) + } + + fn process_next<'a, R: io::BufRead>( + &mut self, + rangecoder: &mut rangecoder::RangeDecoder<'a, R>, + ) -> error::Result<ProcessingStatus> { + self.process_next_inner(rangecoder, true) + } + + /// Try to process the next iteration of the loop. + /// + /// This will check to see if there is enough data to consume and advance the + /// decompressor. Needed in streaming mode to avoid corrupting the state while + /// processing incomplete chunks of data. + fn try_process_next(&mut self, buf: &[u8], range: u32, code: u32) -> error::Result<()> { + let mut temp = std::io::Cursor::new(buf); + let mut rangecoder = rangecoder::RangeDecoder::from_parts(&mut temp, range, code); + let _ = self.process_next_inner(&mut rangecoder, false)?; + Ok(()) + } + + /// Utility function to read data into the partial input buffer. + fn read_partial_input_buf<'a, R: io::BufRead>( + &mut self, + rangecoder: &mut rangecoder::RangeDecoder<'a, R>, + ) -> error::Result<()> { + // Fill as much of the tmp buffer as possible + let start = self.partial_input_buf.position() as usize; + let bytes_read = + rangecoder.read_into(&mut self.partial_input_buf.get_mut()[start..])? as u64; + self.partial_input_buf + .set_position(self.partial_input_buf.position() + bytes_read); + Ok(()) + } + + fn process_mode<'a, R: io::BufRead>( + &mut self, + mut rangecoder: &mut rangecoder::RangeDecoder<'a, R>, + mode: ProcessingMode, + ) -> error::Result<()> { + loop { + if let Some(unpacked_size) = self.unpacked_size { + if self.output.len() as u64 >= unpacked_size { + break; + } + } else if match mode { + ProcessingMode::Partial => { + rangecoder.is_eof()? && self.partial_input_buf.position() as usize == 0 + } + ProcessingMode::Finish => { + rangecoder.is_finished_ok()? && self.partial_input_buf.position() as usize == 0 + } + } { + break; + } + + if self.partial_input_buf.position() as usize > 0 { + self.read_partial_input_buf(rangecoder)?; + let tmp = *self.partial_input_buf.get_ref(); + + // Check if we need more data to advance the decompressor + if mode == ProcessingMode::Partial + && (self.partial_input_buf.position() as usize) < MAX_REQUIRED_INPUT + && self + .try_process_next( + &tmp[..self.partial_input_buf.position() as usize], + rangecoder.range, + rangecoder.code, + ) + .is_err() + { + return Ok(()); + } + + // Run the decompressor on the tmp buffer + let mut tmp_reader = + io::Cursor::new(&tmp[..self.partial_input_buf.position() as usize]); + let mut tmp_rangecoder = rangecoder::RangeDecoder::from_parts( + &mut tmp_reader, + rangecoder.range, + rangecoder.code, + ); + let res = self.process_next(&mut tmp_rangecoder)?; + + // Update the actual rangecoder + rangecoder.set(tmp_rangecoder.range, tmp_rangecoder.code); + + // Update tmp buffer + let end = self.partial_input_buf.position(); + let new_len = end - tmp_reader.position(); + self.partial_input_buf.get_mut()[..new_len as usize] + .copy_from_slice(&tmp[tmp_reader.position() as usize..end as usize]); + self.partial_input_buf.set_position(new_len); + + if res == ProcessingStatus::Finished { + break; + }; + } else { + let buf: &[u8] = rangecoder.stream.fill_buf()?; + if mode == ProcessingMode::Partial + && buf.len() < MAX_REQUIRED_INPUT + && self + .try_process_next(buf, rangecoder.range, rangecoder.code) + .is_err() + { + return self.read_partial_input_buf(rangecoder); + } + + if self.process_next(&mut rangecoder)? == ProcessingStatus::Finished { + break; + }; + } + } + + if let Some(len) = self.unpacked_size { + if mode == ProcessingMode::Finish && len != self.output.len() as u64 { + return Err(error::Error::LzmaError(format!( + "Expected unpacked size of {} but decompressed to {}", + len, + self.output.len() + ))); + } + } + + Ok(()) + } + + fn decode_literal<'a, R: io::BufRead>( + &mut self, + rangecoder: &mut rangecoder::RangeDecoder<'a, R>, + update: bool, + ) -> error::Result<u8> { + let def_prev_byte = 0u8; + let prev_byte = self.output.last_or(def_prev_byte) as usize; + + let mut result: usize = 1; + let lit_state = + ((self.output.len() & ((1 << self.lp) - 1)) << self.lc) + (prev_byte >> (8 - self.lc)); + let probs = &mut self.literal_probs[lit_state]; + + if self.state >= 7 { + let mut match_byte = self.output.last_n(self.rep[0] + 1)? as usize; + + while result < 0x100 { + let match_bit = (match_byte >> 7) & 1; + match_byte <<= 1; + let bit = rangecoder + .decode_bit(&mut probs[((1 + match_bit) << 8) + result], update)? + as usize; + result = (result << 1) ^ bit; + if match_bit != bit { + break; + } + } + } + + while result < 0x100 { + result = (result << 1) ^ (rangecoder.decode_bit(&mut probs[result], update)? as usize); + } + + Ok((result - 0x100) as u8) + } + + fn decode_distance<'a, R: io::BufRead>( + &mut self, + rangecoder: &mut rangecoder::RangeDecoder<'a, R>, + length: usize, + update: bool, + ) -> error::Result<usize> { + let len_state = if length > 3 { 3 } else { length }; + + let pos_slot = self.pos_slot_decoder[len_state].parse(rangecoder, update)? as usize; + if pos_slot < 4 { + return Ok(pos_slot); + } + + let num_direct_bits = (pos_slot >> 1) - 1; + let mut result = (2 ^ (pos_slot & 1)) << num_direct_bits; + + if pos_slot < 14 { + result += rangecoder.parse_reverse_bit_tree( + num_direct_bits, + &mut self.pos_decoders, + result - pos_slot, + update, + )? as usize; + } else { + result += (rangecoder.get(num_direct_bits - 4)? as usize) << 4; + result += self.align_decoder.parse_reverse(rangecoder, update)? as usize; + } + + Ok(result) + } +} diff --git a/rust/vendor/lzma-rs/src/decode/lzma2.rs b/rust/vendor/lzma-rs/src/decode/lzma2.rs new file mode 100644 index 0000000..f359511 --- /dev/null +++ b/rust/vendor/lzma-rs/src/decode/lzma2.rs @@ -0,0 +1,191 @@ +use crate::decode::lzbuffer; +use crate::decode::lzbuffer::LzBuffer; +use crate::decode::lzma; +use crate::decode::rangecoder; +use crate::error; +use byteorder::{BigEndian, ReadBytesExt}; +use std::io; +use std::io::Read; + +pub fn decode_stream<R, W>(input: &mut R, output: &mut W) -> error::Result<()> +where + R: io::BufRead, + W: io::Write, +{ + let accum = lzbuffer::LzAccumBuffer::from_stream(output); + let mut decoder = lzma::new_accum(accum, 0, 0, 0, None); + + loop { + let status = input + .read_u8() + .map_err(|e| error::Error::LzmaError(format!("LZMA2 expected new status: {}", e)))?; + + lzma_info!("LZMA2 status: {}", status); + + if status == 0 { + lzma_info!("LZMA2 end of input"); + break; + } else if status == 1 { + // uncompressed reset dict + parse_uncompressed(&mut decoder, input, true)?; + } else if status == 2 { + // uncompressed no reset + parse_uncompressed(&mut decoder, input, false)?; + } else { + parse_lzma(&mut decoder, input, status)?; + } + } + + decoder.output.finish()?; + Ok(()) +} + +fn parse_lzma<R, W>( + decoder: &mut lzma::DecoderState<W, lzbuffer::LzAccumBuffer<W>>, + input: &mut R, + status: u8, +) -> error::Result<()> +where + R: io::BufRead, + W: io::Write, +{ + if status & 0x80 == 0 { + return Err(error::Error::LzmaError(format!( + "LZMA2 invalid status {}, must be 0, 1, 2 or >= 128", + status + ))); + } + + let reset_dict: bool; + let reset_state: bool; + let reset_props: bool; + match (status >> 5) & 0x3 { + 0 => { + reset_dict = false; + reset_state = false; + reset_props = false; + } + 1 => { + reset_dict = false; + reset_state = true; + reset_props = false; + } + 2 => { + reset_dict = false; + reset_state = true; + reset_props = true; + } + 3 => { + reset_dict = true; + reset_state = true; + reset_props = true; + } + _ => unreachable!(), + } + + let unpacked_size = input + .read_u16::<BigEndian>() + .map_err(|e| error::Error::LzmaError(format!("LZMA2 expected unpacked size: {}", e)))?; + let unpacked_size = ((((status & 0x1F) as u64) << 16) | (unpacked_size as u64)) + 1; + + let packed_size = input + .read_u16::<BigEndian>() + .map_err(|e| error::Error::LzmaError(format!("LZMA2 expected packed size: {}", e)))?; + let packed_size = (packed_size as u64) + 1; + + lzma_info!( + "LZMA2 compressed block {{ unpacked_size: {}, packed_size: {}, reset_dict: {}, reset_state: {}, reset_props: {} }}", + unpacked_size, + packed_size, + reset_dict, + reset_state, + reset_props + ); + + if reset_dict { + decoder.output.reset()?; + } + + if reset_state { + let lc: u32; + let lp: u32; + let mut pb: u32; + + if reset_props { + let props = input.read_u8().map_err(|e| { + error::Error::LzmaError(format!("LZMA2 expected new properties: {}", e)) + })?; + + pb = props as u32; + if pb >= 225 { + return Err(error::Error::LzmaError(format!( + "LZMA2 invalid properties: {} must be < 225", + pb + ))); + } + + lc = pb % 9; + pb /= 9; + lp = pb % 5; + pb /= 5; + + if lc + lp > 4 { + return Err(error::Error::LzmaError(format!( + "LZMA2 invalid properties: lc + lp ({} + {}) must be <= 4", + lc, lp + ))); + } + + lzma_info!("Properties {{ lc: {}, lp: {}, pb: {} }}", lc, lp, pb); + } else { + lc = decoder.lc; + lp = decoder.lp; + pb = decoder.pb; + } + + decoder.reset_state(lc, lp, pb); + } + + decoder.set_unpacked_size(Some(unpacked_size + decoder.output.len() as u64)); + + let mut taken = input.take(packed_size); + let mut rangecoder = rangecoder::RangeDecoder::new(&mut taken) + .map_err(|e| error::Error::LzmaError(format!("LZMA input too short: {}", e)))?; + decoder.process(&mut rangecoder) +} + +fn parse_uncompressed<R, W>( + decoder: &mut lzma::DecoderState<W, lzbuffer::LzAccumBuffer<W>>, + input: &mut R, + reset_dict: bool, +) -> error::Result<()> +where + R: io::BufRead, + W: io::Write, +{ + let unpacked_size = input + .read_u16::<BigEndian>() + .map_err(|e| error::Error::LzmaError(format!("LZMA2 expected unpacked size: {}", e)))?; + let unpacked_size = (unpacked_size as usize) + 1; + + lzma_info!( + "LZMA2 uncompressed block {{ unpacked_size: {}, reset_dict: {} }}", + unpacked_size, + reset_dict + ); + + if reset_dict { + decoder.output.reset()?; + } + + let mut buf = vec![0; unpacked_size]; + input.read_exact(buf.as_mut_slice()).map_err(|e| { + error::Error::LzmaError(format!( + "LZMA2 expected {} uncompressed bytes: {}", + unpacked_size, e + )) + })?; + decoder.output.append_bytes(buf.as_slice()); + + Ok(()) +} diff --git a/rust/vendor/lzma-rs/src/decode/mod.rs b/rust/vendor/lzma-rs/src/decode/mod.rs new file mode 100644 index 0000000..2a7b0b8 --- /dev/null +++ b/rust/vendor/lzma-rs/src/decode/mod.rs @@ -0,0 +1,12 @@ +//! Decoding logic. + +pub mod lzbuffer; +pub mod lzma; +pub mod lzma2; +pub mod options; +pub mod rangecoder; +pub mod util; +pub mod xz; + +#[cfg(feature = "stream")] +pub mod stream; diff --git a/rust/vendor/lzma-rs/src/decode/options.rs b/rust/vendor/lzma-rs/src/decode/options.rs new file mode 100644 index 0000000..cea2b58 --- /dev/null +++ b/rust/vendor/lzma-rs/src/decode/options.rs @@ -0,0 +1,62 @@ +/// Options to tweak decompression behavior. +#[derive(Clone, Copy, Debug, PartialEq, Default)] +pub struct Options { + /// Defines whether the unpacked size should be read from the header or provided. + /// + /// The default is + /// [`UnpackedSize::ReadFromHeader`](enum.UnpackedSize.html#variant.ReadFromHeader). + pub unpacked_size: UnpackedSize, + /// Defines whether the dictionary's dynamic size should be limited during decompression. + /// + /// The default is unlimited. + pub memlimit: Option<usize>, + /// Determines whether to bypass end of stream validation. + /// + /// This option only applies to the [`Stream`](struct.Stream.html) API. + /// + /// The default is false (always do completion check). + pub allow_incomplete: bool, +} + +/// Alternatives for defining the unpacked size of the decoded data. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum UnpackedSize { + /// Assume that the 8 bytes used to specify the unpacked size are present in the header. + /// If the bytes are `0xFFFF_FFFF_FFFF_FFFF`, assume that there is an end-of-payload marker in + /// the file. + /// If not, read the 8 bytes as a little-endian encoded u64. + ReadFromHeader, + /// Assume that there are 8 bytes representing the unpacked size present in the header. + /// Read it, but ignore it and use the provided value instead. + /// If the provided value is `None`, assume that there is an end-of-payload marker in the file. + /// Note that this is a non-standard way of reading LZMA data, + /// but is used by certain libraries such as + /// [OpenCTM](http://openctm.sourceforge.net/). + ReadHeaderButUseProvided(Option<u64>), + /// Assume that the 8 bytes typically used to represent the unpacked size are *not* present in + /// the header. Use the provided value. + /// If the provided value is `None`, assume that there is an end-of-payload marker in the file. + UseProvided(Option<u64>), +} + +impl Default for UnpackedSize { + fn default() -> UnpackedSize { + UnpackedSize::ReadFromHeader + } +} + +#[cfg(test)] +mod test { + use super::*; + #[test] + fn test_options() { + assert_eq!( + Options { + unpacked_size: UnpackedSize::ReadFromHeader, + memlimit: None, + allow_incomplete: false, + }, + Options::default() + ); + } +} diff --git a/rust/vendor/lzma-rs/src/decode/rangecoder.rs b/rust/vendor/lzma-rs/src/decode/rangecoder.rs new file mode 100644 index 0000000..a1ffacc --- /dev/null +++ b/rust/vendor/lzma-rs/src/decode/rangecoder.rs @@ -0,0 +1,218 @@ +use crate::decode::util; +use crate::error; +use byteorder::{BigEndian, ReadBytesExt}; +use std::io; + +pub struct RangeDecoder<'a, R> +where + R: 'a + io::BufRead, +{ + pub stream: &'a mut R, + pub range: u32, + pub code: u32, +} + +impl<'a, R> RangeDecoder<'a, R> +where + R: io::BufRead, +{ + pub fn new(stream: &'a mut R) -> io::Result<Self> { + let mut dec = Self { + stream, + range: 0xFFFF_FFFF, + code: 0, + }; + let _ = dec.stream.read_u8()?; + dec.code = dec.stream.read_u32::<BigEndian>()?; + lzma_debug!("0 {{ range: {:08x}, code: {:08x} }}", dec.range, dec.code); + Ok(dec) + } + + pub fn from_parts(stream: &'a mut R, range: u32, code: u32) -> Self { + Self { + stream, + range, + code, + } + } + + pub fn set(&mut self, range: u32, code: u32) { + self.range = range; + self.code = code; + } + + pub fn read_into(&mut self, dst: &mut [u8]) -> io::Result<usize> { + self.stream.read(dst) + } + + #[inline] + pub fn is_finished_ok(&mut self) -> io::Result<bool> { + Ok(self.code == 0 && self.is_eof()?) + } + + #[inline] + pub fn is_eof(&mut self) -> io::Result<bool> { + util::is_eof(self.stream) + } + + #[inline] + fn normalize(&mut self) -> io::Result<()> { + lzma_trace!(" {{ range: {:08x}, code: {:08x} }}", self.range, self.code); + if self.range < 0x0100_0000 { + self.range <<= 8; + self.code = (self.code << 8) ^ (self.stream.read_u8()? as u32); + + lzma_debug!("+ {{ range: {:08x}, code: {:08x} }}", self.range, self.code); + } + Ok(()) + } + + #[inline] + fn get_bit(&mut self) -> error::Result<bool> { + self.range >>= 1; + + let bit = self.code >= self.range; + if bit { + self.code -= self.range + } + + self.normalize()?; + Ok(bit) + } + + pub fn get(&mut self, count: usize) -> error::Result<u32> { + let mut result = 0u32; + for _ in 0..count { + result = (result << 1) ^ (self.get_bit()? as u32) + } + Ok(result) + } + + #[inline] + pub fn decode_bit(&mut self, prob: &mut u16, update: bool) -> io::Result<bool> { + let bound: u32 = (self.range >> 11) * (*prob as u32); + + lzma_trace!( + " bound: {:08x}, prob: {:04x}, bit: {}", + bound, + prob, + (self.code > bound) as u8 + ); + if self.code < bound { + if update { + *prob += (0x800_u16 - *prob) >> 5; + } + self.range = bound; + + self.normalize()?; + Ok(false) + } else { + if update { + *prob -= *prob >> 5; + } + self.code -= bound; + self.range -= bound; + + self.normalize()?; + Ok(true) + } + } + + fn parse_bit_tree( + &mut self, + num_bits: usize, + probs: &mut [u16], + update: bool, + ) -> io::Result<u32> { + let mut tmp: u32 = 1; + for _ in 0..num_bits { + let bit = self.decode_bit(&mut probs[tmp as usize], update)?; + tmp = (tmp << 1) ^ (bit as u32); + } + Ok(tmp - (1 << num_bits)) + } + + pub fn parse_reverse_bit_tree( + &mut self, + num_bits: usize, + probs: &mut [u16], + offset: usize, + update: bool, + ) -> io::Result<u32> { + let mut result = 0u32; + let mut tmp: usize = 1; + for i in 0..num_bits { + let bit = self.decode_bit(&mut probs[offset + tmp], update)?; + tmp = (tmp << 1) ^ (bit as usize); + result ^= (bit as u32) << i; + } + Ok(result) + } +} + +// TODO: parametrize by constant and use [u16; 1 << num_bits] as soon as Rust supports this +#[derive(Clone)] +pub struct BitTree { + num_bits: usize, + probs: Vec<u16>, +} + +impl BitTree { + pub fn new(num_bits: usize) -> Self { + BitTree { + num_bits, + probs: vec![0x400; 1 << num_bits], + } + } + + pub fn parse<R: io::BufRead>( + &mut self, + rangecoder: &mut RangeDecoder<R>, + update: bool, + ) -> io::Result<u32> { + rangecoder.parse_bit_tree(self.num_bits, self.probs.as_mut_slice(), update) + } + + pub fn parse_reverse<R: io::BufRead>( + &mut self, + rangecoder: &mut RangeDecoder<R>, + update: bool, + ) -> io::Result<u32> { + rangecoder.parse_reverse_bit_tree(self.num_bits, self.probs.as_mut_slice(), 0, update) + } +} + +pub struct LenDecoder { + choice: u16, + choice2: u16, + low_coder: Vec<BitTree>, + mid_coder: Vec<BitTree>, + high_coder: BitTree, +} + +impl LenDecoder { + pub fn new() -> Self { + LenDecoder { + choice: 0x400, + choice2: 0x400, + low_coder: vec![BitTree::new(3); 16], + mid_coder: vec![BitTree::new(3); 16], + high_coder: BitTree::new(8), + } + } + + pub fn decode<R: io::BufRead>( + &mut self, + rangecoder: &mut RangeDecoder<R>, + pos_state: usize, + update: bool, + ) -> io::Result<usize> { + if !rangecoder.decode_bit(&mut self.choice, update)? { + Ok(self.low_coder[pos_state].parse(rangecoder, update)? as usize) + } else if !rangecoder.decode_bit(&mut self.choice2, update)? { + Ok(self.mid_coder[pos_state].parse(rangecoder, update)? as usize + 8) + } else { + Ok(self.high_coder.parse(rangecoder, update)? as usize + 16) + } + } +} diff --git a/rust/vendor/lzma-rs/src/decode/stream.rs b/rust/vendor/lzma-rs/src/decode/stream.rs new file mode 100644 index 0000000..1fe7b67 --- /dev/null +++ b/rust/vendor/lzma-rs/src/decode/stream.rs @@ -0,0 +1,497 @@ +use crate::decode::lzbuffer::{LzBuffer, LzCircularBuffer}; +use crate::decode::lzma::{new_circular, new_circular_with_memlimit, DecoderState, LzmaParams}; +use crate::decode::rangecoder::RangeDecoder; +use crate::decompress::Options; +use crate::error::Error; +use std::fmt::Debug; +use std::io::{self, BufRead, Cursor, Read, Write}; + +/// Minimum header length to be read. +/// - props: u8 (1 byte) +/// - dict_size: u32 (4 bytes) +const MIN_HEADER_LEN: usize = 5; + +/// Max header length to be read. +/// - unpacked_size: u64 (8 bytes) +const MAX_HEADER_LEN: usize = MIN_HEADER_LEN + 8; + +/// Required bytes after the header. +/// - ignore: u8 (1 byte) +/// - code: u32 (4 bytes) +const START_BYTES: usize = 5; + +/// Maximum number of bytes to buffer while reading the header. +const MAX_TMP_LEN: usize = MAX_HEADER_LEN + START_BYTES; + +/// Internal state of this streaming decoder. This is needed because we have to +/// initialize the stream before processing any data. +#[derive(Debug)] +enum State<W> +where + W: Write, +{ + /// Stream is initialized but header values have not yet been read. + Header(W), + /// Header values have been read and the stream is ready to process more data. + Data(RunState<W>), +} + +/// Structures needed while decoding data. +struct RunState<W> +where + W: Write, +{ + decoder: DecoderState<W, LzCircularBuffer<W>>, + range: u32, + code: u32, +} + +impl<W> Debug for RunState<W> +where + W: Write, +{ + fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { + fmt.debug_struct("RunState") + .field("range", &self.range) + .field("code", &self.code) + .finish() + } +} + +/// Lzma decompressor that can process multiple chunks of data using the +/// `io::Write` interface. +pub struct Stream<W> +where + W: Write, +{ + /// Temporary buffer to hold data while the header is being read. + tmp: Cursor<[u8; MAX_TMP_LEN]>, + /// Whether the stream is initialized and ready to process data. + /// An `Option` is used to avoid interior mutability when updating the state. + state: Option<State<W>>, + /// Options given when a stream is created. + options: Options, +} + +impl<W> Stream<W> +where + W: Write, +{ + /// Initialize the stream. This will consume the `output` which is the sink + /// implementing `io::Write` that will receive decompressed bytes. + pub fn new(output: W) -> Self { + Self::new_with_options(&Options::default(), output) + } + + /// Initialize the stream with the given `options`. This will consume the + /// `output` which is the sink implementing `io::Write` that will + /// receive decompressed bytes. + pub fn new_with_options(options: &Options, output: W) -> Self { + Self { + tmp: Cursor::new([0; MAX_TMP_LEN]), + state: Some(State::Header(output)), + options: *options, + } + } + + /// Get a reference to the output sink + pub fn get_output(&self) -> Option<&W> { + self.state.as_ref().map(|state| match state { + State::Header(output) => &output, + State::Data(state) => state.decoder.output.get_output(), + }) + } + + /// Get a mutable reference to the output sink + pub fn get_output_mut(&mut self) -> Option<&mut W> { + self.state.as_mut().map(|state| match state { + State::Header(output) => output, + State::Data(state) => state.decoder.output.get_output_mut(), + }) + } + + /// Consumes the stream and returns the output sink. This also makes sure + /// we have properly reached the end of the stream. + pub fn finish(mut self) -> crate::error::Result<W> { + if let Some(state) = self.state.take() { + match state { + State::Header(output) => { + if self.tmp.position() > 0 { + Err(Error::LzmaError("failed to read header".to_string())) + } else { + Ok(output) + } + } + State::Data(mut state) => { + if !self.options.allow_incomplete { + // Process one last time with empty input to force end of + // stream checks + let mut stream = + Cursor::new(&self.tmp.get_ref()[0..self.tmp.position() as usize]); + let mut range_decoder = + RangeDecoder::from_parts(&mut stream, state.range, state.code); + state.decoder.process(&mut range_decoder)?; + } + let output = state.decoder.output.finish()?; + Ok(output) + } + } + } else { + // this will occur if a call to `write()` fails + Err(Error::LzmaError( + "can't finish stream because of previous write error".to_string(), + )) + } + } + + /// Attempts to read the header and transition into a running state. + /// + /// This function will consume the state, returning the next state on both + /// error and success. + fn read_header<R: BufRead>( + output: W, + mut input: &mut R, + options: &Options, + ) -> crate::error::Result<State<W>> { + match LzmaParams::read_header(&mut input, options) { + Ok(params) => { + let decoder = if let Some(memlimit) = options.memlimit { + new_circular_with_memlimit(output, params, memlimit) + } else { + new_circular(output, params) + }?; + + // The RangeDecoder is only kept temporarily as we are processing + // chunks of data. + if let Ok(rangecoder) = RangeDecoder::new(&mut input) { + Ok(State::Data(RunState { + decoder, + range: rangecoder.range, + code: rangecoder.code, + })) + } else { + // Failed to create a RangeDecoder because we need more data, + // try again later. + Ok(State::Header(decoder.output.into_output())) + } + } + // Failed to read_header() because we need more data, try again later. + Err(Error::HeaderTooShort(_)) => Ok(State::Header(output)), + // Fatal error. Don't retry. + Err(e) => Err(e), + } + } + + /// Process compressed data + fn read_data<R: BufRead>(mut state: RunState<W>, mut input: &mut R) -> io::Result<RunState<W>> { + // Construct our RangeDecoder from the previous range and code + // values. + let mut rangecoder = RangeDecoder::from_parts(&mut input, state.range, state.code); + + // Try to process all bytes of data. + state + .decoder + .process_stream(&mut rangecoder) + .map_err(|e| -> io::Error { e.into() })?; + + Ok(RunState { + decoder: state.decoder, + range: rangecoder.range, + code: rangecoder.code, + }) + } +} + +impl<W> Debug for Stream<W> +where + W: Write + Debug, +{ + fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { + fmt.debug_struct("Stream") + .field("tmp", &self.tmp.position()) + .field("state", &self.state) + .field("options", &self.options) + .finish() + } +} + +impl<W> Write for Stream<W> +where + W: Write, +{ + fn write(&mut self, data: &[u8]) -> io::Result<usize> { + let mut input = Cursor::new(data); + + if let Some(state) = self.state.take() { + let state = match state { + // Read the header values and transition into a running state. + State::Header(state) => { + let res = if self.tmp.position() > 0 { + // attempt to fill the tmp buffer + let position = self.tmp.position(); + let bytes_read = + input.read(&mut self.tmp.get_mut()[position as usize..])?; + let bytes_read = if bytes_read < std::u64::MAX as usize { + bytes_read as u64 + } else { + return Err(io::Error::new( + io::ErrorKind::Other, + "Failed to convert integer to u64.", + )); + }; + self.tmp.set_position(position + bytes_read); + + // attempt to read the header from our tmp buffer + let (position, res) = { + let mut tmp_input = + Cursor::new(&self.tmp.get_ref()[0..self.tmp.position() as usize]); + let res = Stream::read_header(state, &mut tmp_input, &self.options); + (tmp_input.position(), res) + }; + + // discard all bytes up to position if reading the header + // was successful + if let Ok(State::Data(_)) = &res { + let tmp = *self.tmp.get_ref(); + let end = self.tmp.position(); + let new_len = end - position; + (&mut self.tmp.get_mut()[0..new_len as usize]) + .copy_from_slice(&tmp[position as usize..end as usize]); + self.tmp.set_position(new_len); + } + res + } else { + Stream::read_header(state, &mut input, &self.options) + }; + + match res { + // occurs when not enough input bytes were provided to + // read the entire header + Ok(State::Header(val)) => { + if self.tmp.position() == 0 { + // reset the cursor because we may have partial reads + input.set_position(0); + let bytes_read = input.read(&mut self.tmp.get_mut()[..])?; + let bytes_read = if bytes_read < std::u64::MAX as usize { + bytes_read as u64 + } else { + return Err(io::Error::new( + io::ErrorKind::Other, + "Failed to convert integer to u64.", + )); + }; + self.tmp.set_position(bytes_read); + } + State::Header(val) + } + + // occurs when the header was successfully read and we + // move on to the next state + Ok(State::Data(val)) => State::Data(val), + + // occurs when the output was consumed due to a + // non-recoverable error + Err(e) => { + return Err(match e { + Error::IoError(e) | Error::HeaderTooShort(e) => e, + Error::LzmaError(e) | Error::XzError(e) => { + io::Error::new(io::ErrorKind::Other, e) + } + }); + } + } + } + + // Process another chunk of data. + State::Data(state) => { + let state = if self.tmp.position() > 0 { + let mut tmp_input = + Cursor::new(&self.tmp.get_ref()[0..self.tmp.position() as usize]); + let res = Stream::read_data(state, &mut tmp_input)?; + self.tmp.set_position(0); + res + } else { + state + }; + State::Data(Stream::read_data(state, &mut input)?) + } + }; + self.state.replace(state); + } + Ok(input.position() as usize) + } + + /// Flushes the output sink. The internal buffer isn't flushed to avoid + /// corrupting the internal state. Instead, call `finish()` to finalize the + /// stream and flush all remaining internal data. + fn flush(&mut self) -> io::Result<()> { + if let Some(ref mut state) = self.state { + match state { + State::Header(_) => Ok(()), + State::Data(state) => state.decoder.output.get_output_mut().flush(), + } + } else { + Ok(()) + } + } +} + +impl From<Error> for io::Error { + fn from(error: Error) -> io::Error { + io::Error::new(io::ErrorKind::Other, format!("{:?}", error)) + } +} + +#[cfg(test)] +mod test { + use super::*; + + /// Test an empty stream + #[test] + fn test_stream_noop() { + let stream = Stream::new(Vec::new()); + assert!(stream.get_output().unwrap().is_empty()); + + let output = stream.finish().unwrap(); + assert!(output.is_empty()); + } + + /// Test writing an empty slice + #[test] + fn test_stream_zero() { + let mut stream = Stream::new(Vec::new()); + + stream.write_all(&[]).unwrap(); + stream.write_all(&[]).unwrap(); + + let output = stream.finish().unwrap(); + + assert!(output.is_empty()); + } + + /// Test a bad header value + #[test] + #[should_panic(expected = "LZMA header invalid properties: 255 must be < 225")] + fn test_bad_header() { + let input = [255u8; 32]; + + let mut stream = Stream::new(Vec::new()); + + stream.write_all(&input[..]).unwrap(); + + let output = stream.finish().unwrap(); + + assert!(output.is_empty()); + } + + /// Test processing only partial data + #[test] + fn test_stream_incomplete() { + let input = b"\x5d\x00\x00\x80\x00\xff\xff\xff\xff\xff\xff\xff\xff\x00\x83\xff\ + \xfb\xff\xff\xc0\x00\x00\x00"; + // Process until this index is reached. + let mut end = 1u64; + + // Test when we fail to provide the minimum number of bytes required to + // read the header. Header size is 13 bytes but we also read the first 5 + // bytes of data. + while end < (MAX_HEADER_LEN + START_BYTES) as u64 { + let mut stream = Stream::new(Vec::new()); + stream.write_all(&input[..end as usize]).unwrap(); + assert_eq!(stream.tmp.position(), end); + + let err = stream.finish().unwrap_err(); + assert!( + err.to_string().contains("failed to read header"), + "error was: {}", + err + ); + + end += 1; + } + + // Test when we fail to provide enough bytes to terminate the stream. A + // properly terminated stream will have a code value of 0. + while end < input.len() as u64 { + let mut stream = Stream::new(Vec::new()); + stream.write_all(&input[..end as usize]).unwrap(); + + // Header bytes will be buffered until there are enough to read + if end < (MAX_HEADER_LEN + START_BYTES) as u64 { + assert_eq!(stream.tmp.position(), end); + } + + let err = stream.finish().unwrap_err(); + assert!(err.to_string().contains("failed to fill whole buffer")); + + end += 1; + } + } + + /// Test processing all chunk sizes + #[test] + fn test_stream_chunked() { + let small_input = include_bytes!("../../tests/files/small.txt"); + + let mut reader = io::Cursor::new(&small_input[..]); + let mut small_input_compressed = Vec::new(); + crate::lzma_compress(&mut reader, &mut small_input_compressed).unwrap(); + + let input : Vec<(&[u8], &[u8])> = vec![ + (b"\x5d\x00\x00\x80\x00\xff\xff\xff\xff\xff\xff\xff\xff\x00\x83\xff\xfb\xff\xff\xc0\x00\x00\x00", b""), + (&small_input_compressed[..], small_input)]; + for (input, expected) in input { + for chunk in 1..input.len() { + let mut consumed = 0; + let mut stream = Stream::new(Vec::new()); + while consumed < input.len() { + let end = std::cmp::min(consumed + chunk, input.len()); + stream.write_all(&input[consumed..end]).unwrap(); + consumed = end; + } + let output = stream.finish().unwrap(); + assert_eq!(expected, &output[..]); + } + } + } + + #[test] + fn test_stream_corrupted() { + let mut stream = Stream::new(Vec::new()); + let err = stream + .write_all(b"corrupted bytes here corrupted bytes here") + .unwrap_err(); + assert!(err.to_string().contains("beyond output size")); + let err = stream.finish().unwrap_err(); + assert!(err + .to_string() + .contains("can\'t finish stream because of previous write error")); + } + + #[test] + fn test_allow_incomplete() { + let input = include_bytes!("../../tests/files/small.txt"); + + let mut reader = io::Cursor::new(&input[..]); + let mut compressed = Vec::new(); + crate::lzma_compress(&mut reader, &mut compressed).unwrap(); + let compressed = &compressed[..compressed.len() / 2]; + + // Should fail to finish() without the allow_incomplete option. + let mut stream = Stream::new(Vec::new()); + stream.write_all(&compressed[..]).unwrap(); + stream.finish().unwrap_err(); + + // Should succeed with the allow_incomplete option. + let mut stream = Stream::new_with_options( + &Options { + allow_incomplete: true, + ..Default::default() + }, + Vec::new(), + ); + stream.write_all(&compressed[..]).unwrap(); + let output = stream.finish().unwrap(); + assert_eq!(output, &input[..26]); + } +} diff --git a/rust/vendor/lzma-rs/src/decode/util.rs b/rust/vendor/lzma-rs/src/decode/util.rs new file mode 100644 index 0000000..8581daf --- /dev/null +++ b/rust/vendor/lzma-rs/src/decode/util.rs @@ -0,0 +1,114 @@ +use std::hash; +use std::io; + +pub fn read_tag<R: io::BufRead>(input: &mut R, tag: &[u8]) -> io::Result<bool> { + let mut buf = vec![0; tag.len()]; + input.read_exact(buf.as_mut_slice())?; + Ok(buf.as_slice() == tag) +} + +pub fn is_eof<R: io::BufRead>(input: &mut R) -> io::Result<bool> { + let buf = input.fill_buf()?; + Ok(buf.is_empty()) +} + +pub fn flush_zero_padding<R: io::BufRead>(input: &mut R) -> io::Result<bool> { + loop { + let len = { + let buf = input.fill_buf()?; + let len = buf.len(); + + if len == 0 { + return Ok(true); + } + + for x in buf { + if *x != 0u8 { + return Ok(false); + } + } + len + }; + + input.consume(len); + } +} + +// A Read computing a digest on the bytes read. +pub struct HasherRead<'a, R, H> +where + R: 'a + io::Read, + H: 'a + hash::Hasher, +{ + read: &'a mut R, // underlying reader + hasher: &'a mut H, // hasher +} + +impl<'a, R, H> HasherRead<'a, R, H> +where + R: io::Read, + H: hash::Hasher, +{ + pub fn new(read: &'a mut R, hasher: &'a mut H) -> Self { + Self { read, hasher } + } +} + +impl<'a, R, H> io::Read for HasherRead<'a, R, H> +where + R: io::Read, + H: hash::Hasher, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + let result = self.read.read(buf)?; + self.hasher.write(&buf[..result]); + Ok(result) + } +} + +// A BufRead counting the bytes read. +pub struct CountBufRead<'a, R> +where + R: 'a + io::BufRead, +{ + read: &'a mut R, // underlying reader + count: usize, // number of bytes read +} + +impl<'a, R> CountBufRead<'a, R> +where + R: io::BufRead, +{ + pub fn new(read: &'a mut R) -> Self { + Self { read, count: 0 } + } + + pub fn count(&self) -> usize { + self.count + } +} + +impl<'a, R> io::Read for CountBufRead<'a, R> +where + R: io::BufRead, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + let result = self.read.read(buf)?; + self.count += result; + Ok(result) + } +} + +impl<'a, R> io::BufRead for CountBufRead<'a, R> +where + R: io::BufRead, +{ + fn fill_buf(&mut self) -> io::Result<&[u8]> { + self.read.fill_buf() + } + + fn consume(&mut self, amt: usize) { + self.read.consume(amt); + self.count += amt; + } +} diff --git a/rust/vendor/lzma-rs/src/decode/xz.rs b/rust/vendor/lzma-rs/src/decode/xz.rs new file mode 100644 index 0000000..b623765 --- /dev/null +++ b/rust/vendor/lzma-rs/src/decode/xz.rs @@ -0,0 +1,469 @@ +//! Decoder for the `.xz` file format. + +use crate::decode::lzma2; +use crate::decode::util; +use crate::error; +use crate::xz::{footer, header, CheckMethod, StreamFlags}; +use byteorder::{BigEndian, LittleEndian, ReadBytesExt}; +use crc::{crc32, crc64, Hasher32}; +use std::hash::Hasher; +use std::io; +use std::io::Read; + +#[derive(Debug)] +struct Record { + unpadded_size: u64, + unpacked_size: u64, +} + +pub fn decode_stream<R, W>(input: &mut R, output: &mut W) -> error::Result<()> +where + R: io::BufRead, + W: io::Write, +{ + let header = header::StreamHeader::parse(input)?; + + let mut records: Vec<Record> = vec![]; + let index_size = loop { + let mut count_input = util::CountBufRead::new(input); + let header_size = count_input.read_u8()?; + lzma_info!("XZ block header_size byte: 0x{:02x}", header_size); + + if header_size == 0 { + lzma_info!("XZ records: {:?}", records); + check_index(&mut count_input, &records)?; + let index_size = count_input.count(); + break index_size; + } + + read_block( + &mut count_input, + output, + header.stream_flags.check_method, + &mut records, + header_size, + )?; + }; + + let crc32 = input.read_u32::<LittleEndian>()?; + let mut digest = crc32::Digest::new(crc32::IEEE); + + { + let mut digested = util::HasherRead::new(input, &mut digest); + + let backward_size = digested.read_u32::<LittleEndian>()?; + if index_size as u32 != (backward_size + 1) << 2 { + return Err(error::Error::XzError(format!( + "Invalid index size: expected {} but got {}", + (backward_size + 1) << 2, + index_size + ))); + } + + let stream_flags = { + let field = digested.read_u16::<BigEndian>()?; + StreamFlags::parse(field)? + }; + + if header.stream_flags != stream_flags { + return Err(error::Error::XzError(format!( + "Flags in header ({:?}) does not match footer ({:?})", + header.stream_flags, stream_flags + ))); + } + } + + let digest_crc32 = digest.sum32(); + if crc32 != digest_crc32 { + return Err(error::Error::XzError(format!( + "Invalid footer CRC32: expected 0x{:08x} but got 0x{:08x}", + crc32, digest_crc32 + ))); + } + + if !util::read_tag(input, footer::XZ_MAGIC_FOOTER)? { + return Err(error::Error::XzError(format!( + "Invalid footer magic, expected {:?}", + footer::XZ_MAGIC_FOOTER + ))); + } + + if !util::is_eof(input)? { + return Err(error::Error::XzError( + "Unexpected data after last XZ block".to_string(), + )); + } + Ok(()) +} + +fn check_index<'a, R>( + count_input: &mut util::CountBufRead<'a, R>, + records: &[Record], +) -> error::Result<()> +where + R: io::BufRead, +{ + let mut digest = crc32::Digest::new(crc32::IEEE); + let index_tag = 0u8; + digest.write_u8(index_tag); + + { + let mut digested = util::HasherRead::new(count_input, &mut digest); + + let num_records = get_multibyte(&mut digested)?; + if num_records != records.len() as u64 { + return Err(error::Error::XzError(format!( + "Expected {} records but got {} records", + num_records, + records.len() + ))); + } + + for (i, record) in records.iter().enumerate() { + lzma_info!("XZ index checking record {}: {:?}", i, record); + + let unpadded_size = get_multibyte(&mut digested)?; + if unpadded_size != record.unpadded_size as u64 { + return Err(error::Error::XzError(format!( + "Invalid index for record {}: unpadded size ({}) does not match index ({})", + i, record.unpadded_size, unpadded_size + ))); + } + + let unpacked_size = get_multibyte(&mut digested)?; + if unpacked_size != record.unpacked_size as u64 { + return Err(error::Error::XzError(format!( + "Invalid index for record {}: unpacked size ({}) does not match index ({})", + i, record.unpacked_size, unpacked_size + ))); + } + } + } + + // TODO: create padding parser function + let count = count_input.count(); + let padding_size = ((count ^ 0x03) + 1) & 0x03; + lzma_info!( + "XZ index: {} byte(s) read, {} byte(s) of padding", + count, + padding_size + ); + + { + let mut digested = util::HasherRead::new(count_input, &mut digest); + for _ in 0..padding_size { + let byte = digested.read_u8()?; + if byte != 0 { + return Err(error::Error::XzError( + "Invalid index padding, must be null bytes".to_string(), + )); + } + } + } + + let digest_crc32 = digest.sum32(); + lzma_info!("XZ index checking digest 0x{:08x}", digest_crc32); + + let crc32 = count_input.read_u32::<LittleEndian>()?; + if crc32 != digest_crc32 { + return Err(error::Error::XzError(format!( + "Invalid index CRC32: expected 0x{:08x} but got 0x{:08x}", + crc32, digest_crc32 + ))); + } + + Ok(()) +} + +#[derive(Debug)] +enum FilterId { + Lzma2, +} + +fn get_filter_id(id: u64) -> error::Result<FilterId> { + match id { + 0x21 => Ok(FilterId::Lzma2), + _ => Err(error::Error::XzError(format!("Unknown filter id {}", id))), + } +} + +struct Filter { + filter_id: FilterId, + props: Vec<u8>, +} + +struct BlockHeader { + filters: Vec<Filter>, + packed_size: Option<u64>, + unpacked_size: Option<u64>, +} + +fn read_block<'a, R, W>( + count_input: &mut util::CountBufRead<'a, R>, + output: &mut W, + check_method: CheckMethod, + records: &mut Vec<Record>, + header_size: u8, +) -> error::Result<bool> +where + R: io::BufRead, + W: io::Write, +{ + let mut digest = crc32::Digest::new(crc32::IEEE); + digest.write_u8(header_size); + let header_size = ((header_size as u64) << 2) - 1; + + let block_header = { + let mut taken = count_input.take(header_size); + let mut digested = io::BufReader::new(util::HasherRead::new(&mut taken, &mut digest)); + read_block_header(&mut digested, header_size)? + }; + + let crc32 = count_input.read_u32::<LittleEndian>()?; + let digest_crc32 = digest.sum32(); + if crc32 != digest_crc32 { + return Err(error::Error::XzError(format!( + "Invalid header CRC32: expected 0x{:08x} but got 0x{:08x}", + crc32, digest_crc32 + ))); + } + + let mut tmpbuf: Vec<u8> = Vec::new(); + let filters = block_header.filters; + for (i, filter) in filters.iter().enumerate() { + if i == 0 { + // TODO: use SubBufRead on input if packed_size is known? + let packed_size = decode_filter(count_input, &mut tmpbuf, filter)?; + if let Some(expected_packed_size) = block_header.packed_size { + if (packed_size as u64) != expected_packed_size { + return Err(error::Error::XzError(format!( + "Invalid compressed size: expected {} but got {}", + expected_packed_size, packed_size + ))); + } + } + } else { + let mut newbuf: Vec<u8> = Vec::new(); + decode_filter( + &mut io::BufReader::new(tmpbuf.as_slice()), + &mut newbuf, + filter, + )?; + // TODO: does this move or copy? + tmpbuf = newbuf; + } + } + + let unpacked_size = tmpbuf.len(); + lzma_info!("XZ block decompressed to {} byte(s)", tmpbuf.len()); + + if let Some(expected_unpacked_size) = block_header.unpacked_size { + if (unpacked_size as u64) != expected_unpacked_size { + return Err(error::Error::XzError(format!( + "Invalid decompressed size: expected {} but got {}", + expected_unpacked_size, unpacked_size + ))); + } + } + + let count = count_input.count(); + let padding_size = ((count ^ 0x03) + 1) & 0x03; + lzma_info!( + "XZ block: {} byte(s) read, {} byte(s) of padding, check method {:?}", + count, + padding_size, + check_method + ); + for _ in 0..padding_size { + let byte = count_input.read_u8()?; + if byte != 0 { + return Err(error::Error::XzError( + "Invalid block padding, must be null bytes".to_string(), + )); + } + } + validate_block_check(count_input, tmpbuf.as_slice(), check_method)?; + + output.write_all(tmpbuf.as_slice())?; + records.push(Record { + unpadded_size: (count_input.count() - padding_size) as u64, + unpacked_size: unpacked_size as u64, + }); + + let finished = false; + Ok(finished) +} + +/// Verify block checksum against the "Block Check" field. +/// +/// See spec section 3.4 for details. +fn validate_block_check<R>( + input: &mut R, + buf: &[u8], + check_method: CheckMethod, +) -> error::Result<()> +where + R: io::BufRead, +{ + match check_method { + CheckMethod::None => (), + CheckMethod::Crc32 => { + let crc32 = input.read_u32::<LittleEndian>()?; + let digest_crc32 = crc32::checksum_ieee(buf); + if crc32 != digest_crc32 { + return Err(error::Error::XzError(format!( + "Invalid block CRC32, expected 0x{:08x} but got 0x{:08x}", + crc32, digest_crc32 + ))); + } + } + CheckMethod::Crc64 => { + let crc64 = input.read_u64::<LittleEndian>()?; + let digest_crc64 = crc64::checksum_ecma(buf); + if crc64 != digest_crc64 { + return Err(error::Error::XzError(format!( + "Invalid block CRC64, expected 0x{:016x} but got 0x{:016x}", + crc64, digest_crc64 + ))); + } + } + // TODO + CheckMethod::Sha256 => { + return Err(error::Error::XzError( + "Unsupported SHA-256 checksum (not yet implemented)".to_string(), + )); + } + } + Ok(()) +} + +fn decode_filter<R, W>(input: &mut R, output: &mut W, filter: &Filter) -> error::Result<usize> +where + R: io::BufRead, + W: io::Write, +{ + let mut count_input = util::CountBufRead::new(input); + match filter.filter_id { + FilterId::Lzma2 => { + if filter.props.len() != 1 { + return Err(error::Error::XzError(format!( + "Invalid properties for filter {:?}", + filter.filter_id + ))); + } + // TODO: properties?? + lzma2::decode_stream(&mut count_input, output)?; + Ok(count_input.count()) + } + } +} + +fn read_block_header<R>(input: &mut R, header_size: u64) -> error::Result<BlockHeader> +where + R: io::BufRead, +{ + let flags = input.read_u8()?; + let num_filters = (flags & 0x03) + 1; + let reserved = flags & 0x3C; + let has_packed_size = flags & 0x40 != 0; + let has_unpacked_size = flags & 0x80 != 0; + + lzma_info!( + "XZ block header: {{ header_size: {}, flags: {}, num_filters: {}, has_packed_size: {}, has_unpacked_size: {} }}", + header_size, + flags, + num_filters, + has_packed_size, + has_unpacked_size + ); + + if reserved != 0 { + return Err(error::Error::XzError(format!( + "Invalid block flags {}, reserved bits (mask 0x3C) must be zero", + flags + ))); + } + + let packed_size = if has_packed_size { + Some(get_multibyte(input)?) + } else { + None + }; + + let unpacked_size = if has_unpacked_size { + Some(get_multibyte(input)?) + } else { + None + }; + + lzma_info!( + "XZ block header: {{ packed_size: {:?}, unpacked_size: {:?} }}", + packed_size, + unpacked_size + ); + + let mut filters: Vec<Filter> = vec![]; + for _ in 0..num_filters { + let filter_id = get_filter_id(get_multibyte(input)?)?; + let size_of_properties = get_multibyte(input)?; + + lzma_info!( + "XZ filter: {{ filter_id: {:?}, size_of_properties: {} }}", + filter_id, + size_of_properties + ); + + // Early abort to avoid allocating a large vector + if size_of_properties > header_size { + return Err(error::Error::XzError(format!( + "Size of filter properties exceeds block header size ({} > {})", + size_of_properties, header_size + ))); + } + + let mut buf = vec![0; size_of_properties as usize]; + input.read_exact(buf.as_mut_slice()).map_err(|e| { + error::Error::XzError(format!( + "Could not read filter properties of size {}: {}", + size_of_properties, e + )) + })?; + + lzma_info!("XZ filter properties: {:?}", buf); + + filters.push(Filter { + filter_id, + props: buf, + }) + } + + if !util::flush_zero_padding(input)? { + return Err(error::Error::XzError( + "Invalid block header padding, must be null bytes".to_string(), + )); + } + + Ok(BlockHeader { + filters, + packed_size, + unpacked_size, + }) +} + +pub fn get_multibyte<R>(input: &mut R) -> error::Result<u64> +where + R: io::Read, +{ + let mut result = 0; + for i in 0..9 { + let byte = input.read_u8()?; + result ^= ((byte & 0x7F) as u64) << (i * 7); + if (byte & 0x80) == 0 { + return Ok(result); + } + } + + Err(error::Error::XzError( + "Invalid multi-byte encoding".to_string(), + )) +} diff --git a/rust/vendor/lzma-rs/src/encode/dumbencoder.rs b/rust/vendor/lzma-rs/src/encode/dumbencoder.rs new file mode 100644 index 0000000..f1574c5 --- /dev/null +++ b/rust/vendor/lzma-rs/src/encode/dumbencoder.rs @@ -0,0 +1,140 @@ +use crate::compress::{Options, UnpackedSize}; +use crate::encode::rangecoder; +use byteorder::{LittleEndian, WriteBytesExt}; +use std::io; + +pub struct Encoder<'a, W> +where + W: 'a + io::Write, +{ + rangecoder: rangecoder::RangeEncoder<'a, W>, + literal_probs: [[u16; 0x300]; 8], + is_match: [u16; 4], // true = LZ, false = literal + unpacked_size: UnpackedSize, +} + +const LC: u32 = 3; +const LP: u32 = 0; +const PB: u32 = 2; + +impl<'a, W> Encoder<'a, W> +where + W: io::Write, +{ + pub fn from_stream(stream: &'a mut W, options: &Options) -> io::Result<Self> { + let dict_size = 0x0080_0000; + + // Properties + let props = (LC + 9 * (LP + 5 * PB)) as u8; + lzma_info!("Properties {{ lc: {}, lp: {}, pb: {} }}", LC, LP, PB); + stream.write_u8(props)?; + + // Dictionary + lzma_info!("Dict size: {}", dict_size); + stream.write_u32::<LittleEndian>(dict_size)?; + + // Unpacked size + match &options.unpacked_size { + UnpackedSize::WriteToHeader(unpacked_size) => { + let value: u64 = match unpacked_size { + None => { + lzma_info!("Unpacked size: unknown"); + 0xFFFF_FFFF_FFFF_FFFF + } + Some(x) => { + lzma_info!("Unpacked size: {}", x); + *x + } + }; + stream.write_u64::<LittleEndian>(value)?; + } + UnpackedSize::SkipWritingToHeader => {} + }; + + let encoder = Encoder { + rangecoder: rangecoder::RangeEncoder::new(stream), + literal_probs: [[0x400; 0x300]; 8], + is_match: [0x400; 4], + unpacked_size: options.unpacked_size, + }; + + Ok(encoder) + } + + pub fn process<R>(mut self, input: R) -> io::Result<()> + where + R: io::Read, + { + let mut prev_byte = 0u8; + let mut input_len = 0; + + for (out_len, byte_result) in input.bytes().enumerate() { + let byte = byte_result?; + let pos_state = out_len & 3; + input_len = out_len; + + // Literal + self.rangecoder + .encode_bit(&mut self.is_match[pos_state], false)?; + + self.encode_literal(byte, prev_byte)?; + prev_byte = byte; + } + + self.finish(input_len + 1) + } + + fn finish(&mut self, input_len: usize) -> io::Result<()> { + match self.unpacked_size { + UnpackedSize::SkipWritingToHeader | UnpackedSize::WriteToHeader(Some(_)) => {} + UnpackedSize::WriteToHeader(None) => { + // Write end-of-stream marker + let pos_state = input_len & 3; + + // Match + self.rangecoder + .encode_bit(&mut self.is_match[pos_state], true)?; + // New distance + self.rangecoder.encode_bit(&mut 0x400, false)?; + + // Dummy len, as small as possible (len = 0) + for _ in 0..4 { + self.rangecoder.encode_bit(&mut 0x400, false)?; + } + + // Distance marker = 0xFFFFFFFF + // pos_slot = 63 + for _ in 0..6 { + self.rangecoder.encode_bit(&mut 0x400, true)?; + } + // num_direct_bits = 30 + // result = 3 << 30 = C000_0000 + // + 3FFF_FFF0 (26 bits) + // + F ( 4 bits) + for _ in 0..30 { + self.rangecoder.encode_bit(&mut 0x400, true)?; + } + // = FFFF_FFFF + } + } + + // Flush range coder + self.rangecoder.finish() + } + + fn encode_literal(&mut self, byte: u8, prev_byte: u8) -> io::Result<()> { + let prev_byte = prev_byte as usize; + + let mut result: usize = 1; + let lit_state = prev_byte >> 5; + let probs = &mut self.literal_probs[lit_state]; + + for i in 0..8 { + let bit = ((byte >> (7 - i)) & 1) != 0; + self.rangecoder.encode_bit(&mut probs[result], bit)?; + result = (result << 1) ^ (bit as usize); + } + + Ok(()) + } +} diff --git a/rust/vendor/lzma-rs/src/encode/lzma2.rs b/rust/vendor/lzma-rs/src/encode/lzma2.rs new file mode 100644 index 0000000..ead0726 --- /dev/null +++ b/rust/vendor/lzma-rs/src/encode/lzma2.rs @@ -0,0 +1,26 @@ +use byteorder::{BigEndian, WriteBytesExt}; +use std::io; + +pub fn encode_stream<R, W>(input: &mut R, output: &mut W) -> io::Result<()> +where + R: io::BufRead, + W: io::Write, +{ + let mut buf = vec![0u8; 0x10000]; + loop { + let n = input.read(&mut buf)?; + if n == 0 { + // status = EOF + output.write_u8(0)?; + break; + } + + // status = uncompressed reset dict + output.write_u8(1)?; + // unpacked size + output.write_u16::<BigEndian>((n - 1) as u16)?; + // contents + output.write_all(&buf[..n])?; + } + Ok(()) +} diff --git a/rust/vendor/lzma-rs/src/encode/mod.rs b/rust/vendor/lzma-rs/src/encode/mod.rs new file mode 100644 index 0000000..98a0e84 --- /dev/null +++ b/rust/vendor/lzma-rs/src/encode/mod.rs @@ -0,0 +1,8 @@ +//! Encoding logic. + +pub mod dumbencoder; +pub mod lzma2; +pub mod options; +mod rangecoder; +mod util; +pub mod xz; diff --git a/rust/vendor/lzma-rs/src/encode/options.rs b/rust/vendor/lzma-rs/src/encode/options.rs new file mode 100644 index 0000000..cf2d305 --- /dev/null +++ b/rust/vendor/lzma-rs/src/encode/options.rs @@ -0,0 +1,30 @@ +/// Options for the `lzma_compress` function +#[derive(Clone, Copy, Debug, Default)] +pub struct Options { + /// Defines whether the unpacked size should be written to the header. + /// The default is + /// [`UnpackedSize::WriteToHeader(None)`](enum.encode.UnpackedSize.html#variant.WriteValueToHeader) + pub unpacked_size: UnpackedSize, +} + +/// Alternatives for handling unpacked size +#[derive(Clone, Copy, Debug)] +pub enum UnpackedSize { + /// If the value is `Some(u64)`, write the provided u64 value to the header. + /// There is currently no check in place that verifies that this is the actual number of bytes + /// provided by the input stream. + /// If the value is `None`, write the special `0xFFFF_FFFF_FFFF_FFFF` code to the header, + /// indicating that the unpacked size is unknown. + WriteToHeader(Option<u64>), + /// Do not write anything to the header. The unpacked size needs to be stored elsewhere and + /// provided when reading the file. Note that this is a non-standard way of writing LZMA data, + /// but is used by certain libraries such as + /// [OpenCTM](http://openctm.sourceforge.net/). + SkipWritingToHeader, +} + +impl Default for UnpackedSize { + fn default() -> UnpackedSize { + UnpackedSize::WriteToHeader(None) + } +} diff --git a/rust/vendor/lzma-rs/src/encode/rangecoder.rs b/rust/vendor/lzma-rs/src/encode/rangecoder.rs new file mode 100644 index 0000000..da5385d --- /dev/null +++ b/rust/vendor/lzma-rs/src/encode/rangecoder.rs @@ -0,0 +1,377 @@ +use byteorder::WriteBytesExt; +use std::io; + +pub struct RangeEncoder<'a, W> +where + W: 'a + io::Write, +{ + stream: &'a mut W, + range: u32, + low: u64, + cache: u8, + cachesz: u32, +} + +impl<'a, W> RangeEncoder<'a, W> +where + W: io::Write, +{ + #[allow(clippy::let_and_return)] + pub fn new(stream: &'a mut W) -> Self { + let enc = Self { + stream, + range: 0xFFFF_FFFF, + low: 0, + cache: 0, + cachesz: 1, + }; + lzma_debug!("0 {{ range: {:08x}, low: {:010x} }}", enc.range, enc.low); + enc + } + + fn write_low(&mut self) -> io::Result<()> { + if self.low < 0xFF00_0000 || self.low > 0xFFFF_FFFF { + let mut tmp = self.cache; + loop { + let byte = tmp.wrapping_add((self.low >> 32) as u8); + self.stream.write_u8(byte)?; + lzma_debug!("> byte: {:02x}", byte); + tmp = 0xFF; + self.cachesz -= 1; + if self.cachesz == 0 { + break; + } + } + self.cache = (self.low >> 24) as u8; + } + + self.cachesz += 1; + self.low = (self.low << 8) & 0xFFFF_FFFF; + Ok(()) + } + + pub fn finish(&mut self) -> io::Result<()> { + for _ in 0..5 { + self.write_low()?; + + lzma_debug!("$ {{ range: {:08x}, low: {:010x} }}", self.range, self.low); + } + Ok(()) + } + + fn normalize(&mut self) -> io::Result<()> { + while self.range < 0x0100_0000 { + lzma_debug!( + "+ {{ range: {:08x}, low: {:010x}, cache: {:02x}, {} }}", + self.range, + self.low, + self.cache, + self.cachesz + ); + self.range <<= 8; + self.write_low()?; + lzma_debug!( + "* {{ range: {:08x}, low: {:010x}, cache: {:02x}, {} }}", + self.range, + self.low, + self.cache, + self.cachesz + ); + } + lzma_trace!(" {{ range: {:08x}, low: {:010x} }}", self.range, self.low); + Ok(()) + } + + pub fn encode_bit(&mut self, prob: &mut u16, bit: bool) -> io::Result<()> { + let bound: u32 = (self.range >> 11) * (*prob as u32); + lzma_trace!( + " bound: {:08x}, prob: {:04x}, bit: {}", + bound, + prob, + bit as u8 + ); + + if bit { + *prob -= *prob >> 5; + self.low += bound as u64; + self.range -= bound; + } else { + *prob += (0x800_u16 - *prob) >> 5; + self.range = bound; + } + + self.normalize() + } + + #[cfg(test)] + fn encode_bit_tree( + &mut self, + num_bits: usize, + probs: &mut [u16], + value: u32, + ) -> io::Result<()> { + debug_assert!(value.leading_zeros() as usize + num_bits >= 32); + let mut tmp: usize = 1; + for i in 0..num_bits { + let bit = ((value >> (num_bits - i - 1)) & 1) != 0; + self.encode_bit(&mut probs[tmp], bit)?; + tmp = (tmp << 1) ^ (bit as usize); + } + Ok(()) + } + + #[cfg(test)] + pub fn encode_reverse_bit_tree( + &mut self, + num_bits: usize, + probs: &mut [u16], + offset: usize, + mut value: u32, + ) -> io::Result<()> { + debug_assert!(value.leading_zeros() as usize + num_bits >= 32); + let mut tmp: usize = 1; + for _ in 0..num_bits { + let bit = (value & 1) != 0; + value >>= 1; + self.encode_bit(&mut probs[offset + tmp], bit)?; + tmp = (tmp << 1) ^ (bit as usize); + } + Ok(()) + } +} + +// TODO: parametrize by constant and use [u16; 1 << num_bits] as soon as Rust supports this +#[cfg(test)] +#[derive(Clone)] +pub struct BitTree { + num_bits: usize, + probs: Vec<u16>, +} + +#[cfg(test)] +impl BitTree { + pub fn new(num_bits: usize) -> Self { + BitTree { + num_bits, + probs: vec![0x400; 1 << num_bits], + } + } + + pub fn encode<W: io::Write>( + &mut self, + rangecoder: &mut RangeEncoder<W>, + value: u32, + ) -> io::Result<()> { + rangecoder.encode_bit_tree(self.num_bits, self.probs.as_mut_slice(), value) + } + + pub fn encode_reverse<W: io::Write>( + &mut self, + rangecoder: &mut RangeEncoder<W>, + value: u32, + ) -> io::Result<()> { + rangecoder.encode_reverse_bit_tree(self.num_bits, self.probs.as_mut_slice(), 0, value) + } +} + +#[cfg(test)] +pub struct LenEncoder { + choice: u16, + choice2: u16, + low_coder: Vec<BitTree>, + mid_coder: Vec<BitTree>, + high_coder: BitTree, +} + +#[cfg(test)] +impl LenEncoder { + pub fn new() -> Self { + LenEncoder { + choice: 0x400, + choice2: 0x400, + low_coder: vec![BitTree::new(3); 16], + mid_coder: vec![BitTree::new(3); 16], + high_coder: BitTree::new(8), + } + } + + pub fn encode<W: io::Write>( + &mut self, + rangecoder: &mut RangeEncoder<W>, + pos_state: usize, + value: u32, + ) -> io::Result<()> { + let is_low: bool = value < 8; + rangecoder.encode_bit(&mut self.choice, !is_low)?; + if is_low { + return self.low_coder[pos_state].encode(rangecoder, value); + } + + let is_middle: bool = value < 16; + rangecoder.encode_bit(&mut self.choice2, !is_middle)?; + if is_middle { + return self.mid_coder[pos_state].encode(rangecoder, value - 8); + } + + self.high_coder.encode(rangecoder, value - 16) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::decode::rangecoder::{LenDecoder, RangeDecoder}; + use crate::{decode, encode}; + use std::io::BufReader; + + fn encode_decode(prob_init: u16, bits: &[bool]) { + let mut buf: Vec<u8> = Vec::new(); + + let mut encoder = RangeEncoder::new(&mut buf); + let mut prob = prob_init; + for &b in bits { + encoder.encode_bit(&mut prob, b).unwrap(); + } + encoder.finish().unwrap(); + + let mut bufread = BufReader::new(buf.as_slice()); + let mut decoder = RangeDecoder::new(&mut bufread).unwrap(); + let mut prob = prob_init; + for &b in bits { + assert_eq!(decoder.decode_bit(&mut prob, true).unwrap(), b); + } + assert!(decoder.is_finished_ok().unwrap()); + } + + #[test] + fn test_encode_decode_zeros() { + encode_decode(0x400, &[false; 10000]); + } + + #[test] + fn test_encode_decode_ones() { + encode_decode(0x400, &[true; 10000]); + } + + fn encode_decode_bittree(num_bits: usize, values: &[u32]) { + let mut buf: Vec<u8> = Vec::new(); + + let mut encoder = RangeEncoder::new(&mut buf); + let mut tree = encode::rangecoder::BitTree::new(num_bits); + for &v in values { + tree.encode(&mut encoder, v).unwrap(); + } + encoder.finish().unwrap(); + + let mut bufread = BufReader::new(buf.as_slice()); + let mut decoder = RangeDecoder::new(&mut bufread).unwrap(); + let mut tree = decode::rangecoder::BitTree::new(num_bits); + for &v in values { + assert_eq!(tree.parse(&mut decoder, true).unwrap(), v); + } + assert!(decoder.is_finished_ok().unwrap()); + } + + #[test] + fn test_encode_decode_bittree_zeros() { + for num_bits in 0..16 { + encode_decode_bittree(num_bits, &[0; 10000]); + } + } + + #[test] + fn test_encode_decode_bittree_ones() { + for num_bits in 0..16 { + encode_decode_bittree(num_bits, &[(1 << num_bits) - 1; 10000]); + } + } + + #[test] + fn test_encode_decode_bittree_all() { + for num_bits in 0..16 { + let max = 1 << num_bits; + let values: Vec<u32> = (0..max).collect(); + encode_decode_bittree(num_bits, &values); + } + } + + fn encode_decode_reverse_bittree(num_bits: usize, values: &[u32]) { + let mut buf: Vec<u8> = Vec::new(); + + let mut encoder = RangeEncoder::new(&mut buf); + let mut tree = encode::rangecoder::BitTree::new(num_bits); + for &v in values { + tree.encode_reverse(&mut encoder, v).unwrap(); + } + encoder.finish().unwrap(); + + let mut bufread = BufReader::new(buf.as_slice()); + let mut decoder = RangeDecoder::new(&mut bufread).unwrap(); + let mut tree = decode::rangecoder::BitTree::new(num_bits); + for &v in values { + assert_eq!(tree.parse_reverse(&mut decoder, true).unwrap(), v); + } + assert!(decoder.is_finished_ok().unwrap()); + } + + #[test] + fn test_encode_decode_reverse_bittree_zeros() { + for num_bits in 0..16 { + encode_decode_reverse_bittree(num_bits, &[0; 10000]); + } + } + + #[test] + fn test_encode_decode_reverse_bittree_ones() { + for num_bits in 0..16 { + encode_decode_reverse_bittree(num_bits, &[(1 << num_bits) - 1; 10000]); + } + } + + #[test] + fn test_encode_decode_reverse_bittree_all() { + for num_bits in 0..16 { + let max = 1 << num_bits; + let values: Vec<u32> = (0..max).collect(); + encode_decode_reverse_bittree(num_bits, &values); + } + } + + fn encode_decode_length(pos_state: usize, values: &[u32]) { + let mut buf: Vec<u8> = Vec::new(); + + let mut encoder = RangeEncoder::new(&mut buf); + let mut len_encoder = LenEncoder::new(); + for &v in values { + len_encoder.encode(&mut encoder, pos_state, v).unwrap(); + } + encoder.finish().unwrap(); + + let mut bufread = BufReader::new(buf.as_slice()); + let mut decoder = RangeDecoder::new(&mut bufread).unwrap(); + let mut len_decoder = LenDecoder::new(); + for &v in values { + assert_eq!( + len_decoder.decode(&mut decoder, pos_state, true).unwrap(), + v as usize + ); + } + assert!(decoder.is_finished_ok().unwrap()); + } + + #[test] + fn test_encode_decode_length_zeros() { + for pos_state in 0..16 { + encode_decode_length(pos_state, &[0; 10000]); + } + } + + #[test] + fn test_encode_decode_length_all() { + for pos_state in 0..16 { + let max = (1 << 8) + 16; + let values: Vec<u32> = (0..max).collect(); + encode_decode_length(pos_state, &values); + } + } +} diff --git a/rust/vendor/lzma-rs/src/encode/util.rs b/rust/vendor/lzma-rs/src/encode/util.rs new file mode 100644 index 0000000..e231f60 --- /dev/null +++ b/rust/vendor/lzma-rs/src/encode/util.rs @@ -0,0 +1,75 @@ +use std::hash; +use std::io; + +// A Write computing a digest on the bytes written. +pub struct HasherWrite<'a, W, H> +where + W: 'a + io::Write, + H: 'a + hash::Hasher, +{ + write: &'a mut W, // underlying writer + hasher: &'a mut H, // hasher +} + +impl<'a, W, H> HasherWrite<'a, W, H> +where + W: io::Write, + H: hash::Hasher, +{ + pub fn new(write: &'a mut W, hasher: &'a mut H) -> Self { + Self { write, hasher } + } +} + +impl<'a, W, H> io::Write for HasherWrite<'a, W, H> +where + W: io::Write, + H: hash::Hasher, +{ + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { + let result = self.write.write(buf)?; + self.hasher.write(&buf[..result]); + Ok(result) + } + + fn flush(&mut self) -> io::Result<()> { + self.write.flush() + } +} + +// A Write counting the bytes written. +pub struct CountWrite<'a, W> +where + W: 'a + io::Write, +{ + write: &'a mut W, // underlying writer + count: usize, // number of bytes written +} + +impl<'a, W> CountWrite<'a, W> +where + W: io::Write, +{ + pub fn new(write: &'a mut W) -> Self { + Self { write, count: 0 } + } + + pub fn count(&self) -> usize { + self.count + } +} + +impl<'a, W> io::Write for CountWrite<'a, W> +where + W: io::Write, +{ + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { + let result = self.write.write(buf)?; + self.count += result; + Ok(result) + } + + fn flush(&mut self) -> io::Result<()> { + self.write.flush() + } +} diff --git a/rust/vendor/lzma-rs/src/encode/xz.rs b/rust/vendor/lzma-rs/src/encode/xz.rs new file mode 100644 index 0000000..15eca80 --- /dev/null +++ b/rust/vendor/lzma-rs/src/encode/xz.rs @@ -0,0 +1,163 @@ +use crate::decode; +use crate::encode::lzma2; +use crate::encode::util; +use crate::xz::{footer, header, CheckMethod, StreamFlags}; +use byteorder::{LittleEndian, WriteBytesExt}; +use crc::{crc32, Hasher32}; +use std::io; +use std::io::Write; + +pub fn encode_stream<R, W>(input: &mut R, output: &mut W) -> io::Result<()> +where + R: io::BufRead, + W: io::Write, +{ + let stream_flags = StreamFlags { + check_method: CheckMethod::None, + }; + + // Header + write_header(output, stream_flags)?; + + // Block + let (unpadded_size, unpacked_size) = write_block(input, output)?; + + // Index + let index_size = write_index(output, unpadded_size, unpacked_size)?; + + // Footer + write_footer(output, stream_flags, index_size) +} + +fn write_header<W>(output: &mut W, stream_flags: StreamFlags) -> io::Result<()> +where + W: io::Write, +{ + output.write_all(header::XZ_MAGIC)?; + let mut digest = crc32::Digest::new(crc32::IEEE); + { + let mut digested = util::HasherWrite::new(output, &mut digest); + stream_flags.serialize(&mut digested)?; + } + let crc32 = digest.sum32(); + output.write_u32::<LittleEndian>(crc32)?; + Ok(()) +} + +fn write_footer<W>(output: &mut W, stream_flags: StreamFlags, index_size: usize) -> io::Result<()> +where + W: io::Write, +{ + let mut digest = crc32::Digest::new(crc32::IEEE); + let mut footer_buf: Vec<u8> = Vec::new(); + { + let mut digested = util::HasherWrite::new(&mut footer_buf, &mut digest); + + let backward_size = (index_size >> 2) - 1; + digested.write_u32::<LittleEndian>(backward_size as u32)?; + stream_flags.serialize(&mut digested)?; + } + let crc32 = digest.sum32(); + output.write_u32::<LittleEndian>(crc32)?; + output.write_all(footer_buf.as_slice())?; + + output.write_all(footer::XZ_MAGIC_FOOTER)?; + Ok(()) +} + +fn write_block<R, W>(input: &mut R, output: &mut W) -> io::Result<(usize, usize)> +where + R: io::BufRead, + W: io::Write, +{ + let (unpadded_size, unpacked_size) = { + let mut count_output = util::CountWrite::new(output); + + // Block header + let mut digest = crc32::Digest::new(crc32::IEEE); + { + let mut digested = util::HasherWrite::new(&mut count_output, &mut digest); + let header_size = 8; + digested.write_u8((header_size >> 2) as u8)?; + let flags = 0x00; // 1 filter, no (un)packed size provided + digested.write_u8(flags)?; + let filter_id = 0x21; // LZMA2 + digested.write_u8(filter_id)?; + let size_of_properties = 1; + digested.write_u8(size_of_properties)?; + let properties = 22; // TODO + digested.write_u8(properties)?; + let padding = [0, 0, 0]; + digested.write_all(&padding)?; + } + let crc32 = digest.sum32(); + count_output.write_u32::<LittleEndian>(crc32)?; + + // Block + let mut count_input = decode::util::CountBufRead::new(input); + lzma2::encode_stream(&mut count_input, &mut count_output)?; + (count_output.count(), count_input.count()) + }; + lzma_info!( + "Unpadded size = {}, unpacked_size = {}", + unpadded_size, + unpacked_size + ); + + let padding_size = ((unpadded_size ^ 0x03) + 1) & 0x03; + let padding = vec![0; padding_size]; + output.write_all(padding.as_slice())?; + // Checksum = None (cf. above) + + Ok((unpadded_size, unpacked_size)) +} + +fn write_index<W>(output: &mut W, unpadded_size: usize, unpacked_size: usize) -> io::Result<usize> +where + W: io::Write, +{ + let mut count_output = util::CountWrite::new(output); + + let mut digest = crc32::Digest::new(crc32::IEEE); + { + let mut digested = util::HasherWrite::new(&mut count_output, &mut digest); + digested.write_u8(0)?; // No more block + let num_records = 1; + write_multibyte(&mut digested, num_records)?; + + write_multibyte(&mut digested, unpadded_size as u64)?; + write_multibyte(&mut digested, unpacked_size as u64)?; + } + + // Padding + let count = count_output.count(); + let padding_size = ((count ^ 0x03) + 1) & 0x03; + { + let mut digested = util::HasherWrite::new(&mut count_output, &mut digest); + let padding = vec![0; padding_size]; + digested.write_all(padding.as_slice())?; + } + + let crc32 = digest.sum32(); + count_output.write_u32::<LittleEndian>(crc32)?; + + Ok(count_output.count()) +} + +fn write_multibyte<W>(output: &mut W, mut value: u64) -> io::Result<()> +where + W: io::Write, +{ + loop { + let byte = (value & 0x7F) as u8; + value >>= 7; + if value == 0 { + output.write_u8(byte)?; + break; + } else { + output.write_u8(0x80 | byte)?; + } + } + + Ok(()) +} diff --git a/rust/vendor/lzma-rs/src/error.rs b/rust/vendor/lzma-rs/src/error.rs new file mode 100644 index 0000000..be5bfcd --- /dev/null +++ b/rust/vendor/lzma-rs/src/error.rs @@ -0,0 +1,72 @@ +//! Error handling. + +use std::fmt::Display; +use std::io; +use std::result; + +/// Library errors. +#[derive(Debug)] +pub enum Error { + /// I/O error. + IoError(io::Error), + /// Not enough bytes to complete header + HeaderTooShort(io::Error), + /// LZMA error. + LzmaError(String), + /// XZ error. + XzError(String), +} + +/// Library result alias. +pub type Result<T> = result::Result<T, Error>; + +impl From<io::Error> for Error { + fn from(e: io::Error) -> Error { + Error::IoError(e) + } +} + +impl Display for Error { + fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::IoError(e) => write!(fmt, "io error: {}", e), + Error::HeaderTooShort(e) => write!(fmt, "header too short: {}", e), + Error::LzmaError(e) => write!(fmt, "lzma error: {}", e), + Error::XzError(e) => write!(fmt, "xz error: {}", e), + } + } +} + +impl std::error::Error for Error { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Error::IoError(e) | Error::HeaderTooShort(e) => Some(e), + Error::LzmaError(_) | Error::XzError(_) => None, + } + } +} + +#[cfg(test)] +mod test { + use super::Error; + + #[test] + fn test_display() { + assert_eq!( + Error::IoError(std::io::Error::new( + std::io::ErrorKind::Other, + "this is an error" + )) + .to_string(), + "io error: this is an error" + ); + assert_eq!( + Error::LzmaError("this is an error".to_string()).to_string(), + "lzma error: this is an error" + ); + assert_eq!( + Error::XzError("this is an error".to_string()).to_string(), + "xz error: this is an error" + ); + } +} diff --git a/rust/vendor/lzma-rs/src/lib.rs b/rust/vendor/lzma-rs/src/lib.rs new file mode 100644 index 0000000..763e8d8 --- /dev/null +++ b/rust/vendor/lzma-rs/src/lib.rs @@ -0,0 +1,103 @@ +//! Pure-Rust codecs for LZMA, LZMA2, and XZ. + +#![deny(missing_docs)] +#![deny(missing_debug_implementations)] +#![forbid(unsafe_code)] + +#[macro_use] +mod macros; + +mod decode; +mod encode; +pub mod error; +mod xz; + +use crate::decode::lzbuffer::LzBuffer; +use std::io; + +/// Compression helpers. +pub mod compress { + pub use crate::encode::options::*; +} + +/// Decompression helpers. +pub mod decompress { + pub use crate::decode::options::*; + #[cfg(feature = "stream")] + pub use crate::decode::stream::Stream; +} + +/// Decompress LZMA data with default [`Options`](decompress/struct.Options.html). +pub fn lzma_decompress<R: io::BufRead, W: io::Write>( + input: &mut R, + output: &mut W, +) -> error::Result<()> { + lzma_decompress_with_options(input, output, &decompress::Options::default()) +} + +/// Decompress LZMA data with the provided options. +pub fn lzma_decompress_with_options<R: io::BufRead, W: io::Write>( + input: &mut R, + output: &mut W, + options: &decompress::Options, +) -> error::Result<()> { + let params = decode::lzma::LzmaParams::read_header(input, options)?; + let mut decoder = if let Some(memlimit) = options.memlimit { + decode::lzma::new_circular_with_memlimit(output, params, memlimit)? + } else { + decode::lzma::new_circular(output, params)? + }; + + let mut rangecoder = decode::rangecoder::RangeDecoder::new(input) + .map_err(|e| error::Error::LzmaError(format!("LZMA stream too short: {}", e)))?; + decoder.process(&mut rangecoder)?; + decoder.output.finish()?; + Ok(()) +} + +/// Compresses data with LZMA and default [`Options`](compress/struct.Options.html). +pub fn lzma_compress<R: io::BufRead, W: io::Write>( + input: &mut R, + output: &mut W, +) -> io::Result<()> { + lzma_compress_with_options(input, output, &compress::Options::default()) +} + +/// Compress LZMA data with the provided options. +pub fn lzma_compress_with_options<R: io::BufRead, W: io::Write>( + input: &mut R, + output: &mut W, + options: &compress::Options, +) -> io::Result<()> { + let encoder = encode::dumbencoder::Encoder::from_stream(output, options)?; + encoder.process(input) +} + +/// Decompress LZMA2 data with default [`Options`](decompress/struct.Options.html). +pub fn lzma2_decompress<R: io::BufRead, W: io::Write>( + input: &mut R, + output: &mut W, +) -> error::Result<()> { + decode::lzma2::decode_stream(input, output) +} + +/// Compress data with LZMA2 and default [`Options`](compress/struct.Options.html). +pub fn lzma2_compress<R: io::BufRead, W: io::Write>( + input: &mut R, + output: &mut W, +) -> io::Result<()> { + encode::lzma2::encode_stream(input, output) +} + +/// Decompress XZ data with default [`Options`](decompress/struct.Options.html). +pub fn xz_decompress<R: io::BufRead, W: io::Write>( + input: &mut R, + output: &mut W, +) -> error::Result<()> { + decode::xz::decode_stream(input, output) +} + +/// Compress data with XZ and default [`Options`](compress/struct.Options.html). +pub fn xz_compress<R: io::BufRead, W: io::Write>(input: &mut R, output: &mut W) -> io::Result<()> { + encode::xz::encode_stream(input, output) +} diff --git a/rust/vendor/lzma-rs/src/macros.rs b/rust/vendor/lzma-rs/src/macros.rs new file mode 100644 index 0000000..82f1977 --- /dev/null +++ b/rust/vendor/lzma-rs/src/macros.rs @@ -0,0 +1,41 @@ +/// Log trace message (feature: enabled). +#[cfg(feature = "enable_logging")] +macro_rules! lzma_trace { + ($($arg:tt)+) => { + log::trace!($($arg)+); + } +} + +/// Log debug message (feature: enabled). +#[cfg(feature = "enable_logging")] +macro_rules! lzma_debug { + ($($arg:tt)+) => { + log::debug!($($arg)+); + } +} + +/// Log info message (feature: enabled). +#[cfg(feature = "enable_logging")] +macro_rules! lzma_info { + ($($arg:tt)+) => { + log::info!($($arg)+); + } +} + +/// Log trace message (feature: disabled). +#[cfg(not(feature = "enable_logging"))] +macro_rules! lzma_trace { + ($($arg:tt)+) => {}; +} + +/// Log debug message (feature: disabled). +#[cfg(not(feature = "enable_logging"))] +macro_rules! lzma_debug { + ($($arg:tt)+) => {}; +} + +/// Log info message (feature: disabled). +#[cfg(not(feature = "enable_logging"))] +macro_rules! lzma_info { + ($($arg:tt)+) => {}; +} diff --git a/rust/vendor/lzma-rs/src/xz/footer.rs b/rust/vendor/lzma-rs/src/xz/footer.rs new file mode 100644 index 0000000..73d74eb --- /dev/null +++ b/rust/vendor/lzma-rs/src/xz/footer.rs @@ -0,0 +1,4 @@ +//! XZ footer. + +/// File format trailing terminator, see sect. 2.1.2.4. +pub(crate) const XZ_MAGIC_FOOTER: &[u8] = &[0x59, 0x5A]; diff --git a/rust/vendor/lzma-rs/src/xz/header.rs b/rust/vendor/lzma-rs/src/xz/header.rs new file mode 100644 index 0000000..0266414 --- /dev/null +++ b/rust/vendor/lzma-rs/src/xz/header.rs @@ -0,0 +1,52 @@ +//! XZ header. + +use crate::decode::util; +use crate::error; +use crate::xz::StreamFlags; +use byteorder::{BigEndian, LittleEndian, ReadBytesExt}; +use crc::crc32::{self, Hasher32}; + +/// File format magic header signature, see sect. 2.1.1.1. +pub(crate) const XZ_MAGIC: &[u8] = &[0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00]; + +/// Stream Header, see sect. 2.1.1. +#[derive(Clone, Copy, Debug)] +pub(crate) struct StreamHeader { + pub(crate) stream_flags: StreamFlags, +} + +impl StreamHeader { + /// Parse a Stream Header from a buffered reader. + pub(crate) fn parse<BR>(input: &mut BR) -> error::Result<Self> + where + BR: std::io::BufRead, + { + if !util::read_tag(input, XZ_MAGIC)? { + return Err(error::Error::XzError(format!( + "Invalid XZ magic, expected {:?}", + XZ_MAGIC + ))); + } + + let (flags, digested) = { + let mut digest = crc32::Digest::new(crc32::IEEE); + let mut digest_rd = util::HasherRead::new(input, &mut digest); + let value = digest_rd.read_u16::<BigEndian>()?; + (value, digest.sum32()) + }; + + let crc32 = input.read_u32::<LittleEndian>()?; + if crc32 != digested { + return Err(error::Error::XzError(format!( + "Invalid header CRC32: expected 0x{:08x} but got 0x{:08x}", + crc32, digested + ))); + } + + let stream_flags = StreamFlags::parse(flags)?; + let header = Self { stream_flags }; + + lzma_info!("XZ check method: {:?}", header.stream_flags.check_method); + Ok(header) + } +} diff --git a/rust/vendor/lzma-rs/src/xz/mod.rs b/rust/vendor/lzma-rs/src/xz/mod.rs new file mode 100644 index 0000000..d68ed6c --- /dev/null +++ b/rust/vendor/lzma-rs/src/xz/mod.rs @@ -0,0 +1,117 @@ +//! Logic for handling `.xz` file format. +//! +//! Format specifications are at [https://tukaani.org/xz/xz-file-format.txt](spec). +//! +//! [spec]: https://tukaani.org/xz/xz-file-format.txt + +use crate::error; +use std::io; + +pub(crate) mod footer; +pub(crate) mod header; + +/// Stream flags, see sect. 2.1.1.2. +/// +/// This does not store the leading null byte, which is currently unused. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) struct StreamFlags { + pub(crate) check_method: CheckMethod, +} + +impl StreamFlags { + /// Parse Stream Flags from a 16bits value. + pub(crate) fn parse(input: u16) -> error::Result<Self> { + let flags_bytes = input.to_be_bytes(); + + if flags_bytes[0] != 0x00 { + return Err(error::Error::XzError(format!( + "Invalid null byte in Stream Flags: {:x}", + flags_bytes[0] + ))); + } + + let flags = Self { + check_method: CheckMethod::try_from(flags_bytes[1])?, + }; + Ok(flags) + } + + /// Serialize Stream Flags into a writer. + pub(crate) fn serialize<W>(self, writer: &mut W) -> io::Result<usize> + where + W: io::Write, + { + // First byte is currently unused and hard-coded to null. + writer + .write(&[0x00, self.check_method as u8]) + .map_err(Into::into) + } +} + +/// Stream check type, see sect. 2.1.1.2. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[repr(u8)] +pub enum CheckMethod { + None = 0x00, + Crc32 = 0x01, + Crc64 = 0x04, + Sha256 = 0x0A, +} + +impl CheckMethod { + /// Parse Check ID (second byte in Stream Flags). + pub fn try_from(id: u8) -> error::Result<CheckMethod> { + match id { + 0x00 => Ok(CheckMethod::None), + 0x01 => Ok(CheckMethod::Crc32), + 0x04 => Ok(CheckMethod::Crc64), + 0x0A => Ok(CheckMethod::Sha256), + _ => Err(error::Error::XzError(format!( + "Invalid check method {:x}, expected one of [0x00, 0x01, 0x04, 0x0A]", + id + ))), + } + } +} + +impl From<CheckMethod> for u8 { + fn from(method: CheckMethod) -> u8 { + method as u8 + } +} + +#[cfg(test)] +mod test { + use super::*; + use byteorder::{BigEndian, ReadBytesExt}; + use std::io::{Seek, SeekFrom}; + + #[test] + fn test_checkmethod_roundtrip() { + let mut count_valid = 0; + for input in 0..std::u8::MAX { + if let Ok(check) = CheckMethod::try_from(input) { + let output: u8 = check.into(); + assert_eq!(input, output); + count_valid += 1; + } + } + assert_eq!(count_valid, 4); + } + + #[test] + fn test_streamflags_roundtrip() { + let input = StreamFlags { + check_method: CheckMethod::Crc32, + }; + + let mut cursor = std::io::Cursor::new(vec![0u8; 2]); + let len = input.serialize(&mut cursor).unwrap(); + assert_eq!(len, 2); + + cursor.seek(SeekFrom::Start(0)).unwrap(); + let field = cursor.read_u16::<BigEndian>().unwrap(); + let output = StreamFlags::parse(field).unwrap(); + assert_eq!(input, output); + } +} |