summaryrefslogtreecommitdiffstats
path: root/vendor/ruzstd/src/decoding
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 02:49:50 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 02:49:50 +0000
commit9835e2ae736235810b4ea1c162ca5e65c547e770 (patch)
tree3fcebf40ed70e581d776a8a4c65923e8ec20e026 /vendor/ruzstd/src/decoding
parentReleasing progress-linux version 1.70.0+dfsg2-1~progress7.99u1. (diff)
downloadrustc-9835e2ae736235810b4ea1c162ca5e65c547e770.tar.xz
rustc-9835e2ae736235810b4ea1c162ca5e65c547e770.zip
Merging upstream version 1.71.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/ruzstd/src/decoding')
-rw-r--r--vendor/ruzstd/src/decoding/bit_reader.rs107
-rw-r--r--vendor/ruzstd/src/decoding/bit_reader_reverse.rs258
-rw-r--r--vendor/ruzstd/src/decoding/block_decoder.rs378
-rw-r--r--vendor/ruzstd/src/decoding/decodebuffer.rs423
-rw-r--r--vendor/ruzstd/src/decoding/dictionary.rs93
-rw-r--r--vendor/ruzstd/src/decoding/literals_section_decoder.rs180
-rw-r--r--vendor/ruzstd/src/decoding/mod.rs11
-rw-r--r--vendor/ruzstd/src/decoding/ringbuffer.rs632
-rw-r--r--vendor/ruzstd/src/decoding/scratch.rs128
-rw-r--r--vendor/ruzstd/src/decoding/sequence_execution.rs123
-rw-r--r--vendor/ruzstd/src/decoding/sequence_section_decoder.rs495
11 files changed, 2828 insertions, 0 deletions
diff --git a/vendor/ruzstd/src/decoding/bit_reader.rs b/vendor/ruzstd/src/decoding/bit_reader.rs
new file mode 100644
index 000000000..e07ac45c4
--- /dev/null
+++ b/vendor/ruzstd/src/decoding/bit_reader.rs
@@ -0,0 +1,107 @@
+pub struct BitReader<'s> {
+ idx: usize, //index counts bits already read
+ source: &'s [u8],
+}
+
+#[derive(Debug, thiserror::Error)]
+#[non_exhaustive]
+pub enum GetBitsError {
+ #[error("Cant serve this request. The reader is limited to {limit} bits, requested {num_requested_bits} bits")]
+ TooManyBits {
+ num_requested_bits: usize,
+ limit: u8,
+ },
+ #[error("Can't read {requested} bits, only have {remaining} bits left")]
+ NotEnoughRemainingBits { requested: usize, remaining: usize },
+}
+
+impl<'s> BitReader<'s> {
+ pub fn new(source: &'s [u8]) -> BitReader<'_> {
+ BitReader { idx: 0, source }
+ }
+
+ pub fn bits_left(&self) -> usize {
+ self.source.len() * 8 - self.idx
+ }
+
+ pub fn bits_read(&self) -> usize {
+ self.idx
+ }
+
+ pub fn return_bits(&mut self, n: usize) {
+ if n > self.idx {
+ panic!("Cant return this many bits");
+ }
+ self.idx -= n;
+ }
+
+ pub fn get_bits(&mut self, n: usize) -> Result<u64, GetBitsError> {
+ if n > 64 {
+ return Err(GetBitsError::TooManyBits {
+ num_requested_bits: n,
+ limit: 64,
+ });
+ }
+ if self.bits_left() < n {
+ return Err(GetBitsError::NotEnoughRemainingBits {
+ requested: n,
+ remaining: self.bits_left(),
+ });
+ }
+
+ let old_idx = self.idx;
+
+ let bits_left_in_current_byte = 8 - (self.idx % 8);
+ let bits_not_needed_in_current_byte = 8 - bits_left_in_current_byte;
+
+ //collect bits from the currently pointed to byte
+ let mut value = u64::from(self.source[self.idx / 8] >> bits_not_needed_in_current_byte);
+
+ if bits_left_in_current_byte >= n {
+ //no need for fancy stuff
+
+ //just mask all but the needed n bit
+ value &= (1 << n) - 1;
+ self.idx += n;
+ } else {
+ self.idx += bits_left_in_current_byte;
+
+ //n spans over multiple bytes
+ let full_bytes_needed = (n - bits_left_in_current_byte) / 8;
+ let bits_in_last_byte_needed = n - bits_left_in_current_byte - full_bytes_needed * 8;
+
+ assert!(
+ bits_left_in_current_byte + full_bytes_needed * 8 + bits_in_last_byte_needed == n
+ );
+
+ let mut bit_shift = bits_left_in_current_byte; //this many bits are already set in value
+
+ assert!(self.idx % 8 == 0);
+
+ //collect full bytes
+ for _ in 0..full_bytes_needed {
+ value |= u64::from(self.source[self.idx / 8]) << bit_shift;
+ self.idx += 8;
+ bit_shift += 8;
+ }
+
+ assert!(n - bit_shift == bits_in_last_byte_needed);
+
+ if bits_in_last_byte_needed > 0 {
+ let val_las_byte =
+ u64::from(self.source[self.idx / 8]) & ((1 << bits_in_last_byte_needed) - 1);
+ value |= val_las_byte << bit_shift;
+ self.idx += bits_in_last_byte_needed;
+ }
+ }
+
+ assert!(self.idx == old_idx + n);
+
+ Ok(value)
+ }
+
+ pub fn reset(&mut self, new_source: &'s [u8]) {
+ self.idx = 0;
+ self.source = new_source;
+ }
+}
diff --git a/vendor/ruzstd/src/decoding/bit_reader_reverse.rs b/vendor/ruzstd/src/decoding/bit_reader_reverse.rs
new file mode 100644
index 000000000..5bc5a2a74
--- /dev/null
+++ b/vendor/ruzstd/src/decoding/bit_reader_reverse.rs
@@ -0,0 +1,258 @@
+pub use super::bit_reader::GetBitsError;
+use byteorder::ByteOrder;
+use byteorder::LittleEndian;
+
+pub struct BitReaderReversed<'s> {
+ idx: isize, //index counts bits already read
+ source: &'s [u8],
+
+ bit_container: u64,
+ bits_in_container: u8,
+}
+
+impl<'s> BitReaderReversed<'s> {
+ pub fn bits_remaining(&self) -> isize {
+ self.idx + self.bits_in_container as isize
+ }
+
+ pub fn new(source: &'s [u8]) -> BitReaderReversed<'_> {
+ BitReaderReversed {
+ idx: source.len() as isize * 8,
+ source,
+ bit_container: 0,
+ bits_in_container: 0,
+ }
+ }
+
+ /// We refill the container in full bytes, shifting the still unread portion to the left, and filling the lower bits with new data
+ #[inline(always)]
+ fn refill_container(&mut self) {
+ let byte_idx = self.byte_idx() as usize;
+
+ let retain_bytes = (self.bits_in_container + 7) / 8;
+ let want_to_read_bits = 64 - (retain_bytes * 8);
+
+ // if there are >= 8 byte left to read we go a fast path:
+ // The slice is looking something like this |U..UCCCCCCCCR..R| Where U are some unread bytes, C are the bytes in the container, and R are already read bytes
+ // What we do is, we shift the container by a few bytes to the left by just reading a u64 from the correct position, rereading the portion we did not yet return from the conainer.
+ // Technically this would still work for positions lower than 8 but this guarantees that enough bytes are in the source and generally makes for less edge cases
+ if byte_idx >= 8 {
+ self.refill_fast(byte_idx, retain_bytes, want_to_read_bits)
+ } else {
+ // In the slow path we just read however many bytes we can
+ self.refill_slow(byte_idx, want_to_read_bits)
+ }
+ }
+
+ #[inline(always)]
+ fn refill_fast(&mut self, byte_idx: usize, retain_bytes: u8, want_to_read_bits: u8) {
+ let load_from_byte_idx = byte_idx - 7 + retain_bytes as usize;
+ let refill = LittleEndian::read_u64(&self.source[load_from_byte_idx..]);
+ self.bit_container = refill;
+ self.bits_in_container += want_to_read_bits;
+ self.idx -= want_to_read_bits as isize;
+ }
+
+ #[cold]
+ fn refill_slow(&mut self, byte_idx: usize, want_to_read_bits: u8) {
+ let can_read_bits = isize::min(want_to_read_bits as isize, self.idx);
+ let can_read_bytes = can_read_bits / 8;
+
+ match can_read_bytes {
+ 8 => {
+ self.bit_container = LittleEndian::read_u64(&self.source[byte_idx - 7..]);
+ self.bits_in_container += 64;
+ self.idx -= 64;
+ }
+ 6..=7 => {
+ self.bit_container <<= 48;
+ self.bits_in_container += 48;
+ self.bit_container |= LittleEndian::read_u48(&self.source[byte_idx - 5..]);
+ self.idx -= 48;
+ }
+ 4..=5 => {
+ self.bit_container <<= 32;
+ self.bits_in_container += 32;
+ self.bit_container |=
+ u64::from(LittleEndian::read_u32(&self.source[byte_idx - 3..]));
+ self.idx -= 32;
+ }
+ 2..=3 => {
+ self.bit_container <<= 16;
+ self.bits_in_container += 16;
+ self.bit_container |=
+ u64::from(LittleEndian::read_u16(&self.source[byte_idx - 1..]));
+ self.idx -= 16;
+ }
+ 1 => {
+ self.bit_container <<= 8;
+ self.bits_in_container += 8;
+ self.bit_container |= u64::from(self.source[byte_idx]);
+ self.idx -= 8;
+ }
+ _ => {
+ panic!("This cannot be reached");
+ }
+ }
+ }
+
+ /// Next byte that should be read into the container
+ /// Negative values mean that the source buffer as been read into the container completetly.
+ fn byte_idx(&self) -> isize {
+ (self.idx - 1) / 8
+ }
+
+ #[inline(always)]
+ pub fn get_bits(&mut self, n: u8) -> Result<u64, GetBitsError> {
+ if n == 0 {
+ return Ok(0);
+ }
+ if self.bits_in_container >= n {
+ return Ok(self.get_bits_unchecked(n));
+ }
+
+ self.get_bits_cold(n)
+ }
+
+ #[cold]
+ fn get_bits_cold(&mut self, n: u8) -> Result<u64, GetBitsError> {
+ if n > 56 {
+ return Err(GetBitsError::TooManyBits {
+ num_requested_bits: usize::from(n),
+ limit: 56,
+ });
+ }
+
+ let signed_n = n as isize;
+
+ if self.bits_remaining() <= 0 {
+ self.idx -= signed_n;
+ return Ok(0);
+ }
+
+ if self.bits_remaining() < signed_n {
+ let emulated_read_shift = signed_n - self.bits_remaining();
+ let v = self.get_bits(self.bits_remaining() as u8)?;
+ debug_assert!(self.idx == 0);
+ let value = v << emulated_read_shift;
+ self.idx -= emulated_read_shift;
+ return Ok(value);
+ }
+
+ while (self.bits_in_container < n) && self.idx > 0 {
+ self.refill_container();
+ }
+
+ debug_assert!(self.bits_in_container >= n);
+
+ //if we reach this point there are enough bits in the container
+
+ Ok(self.get_bits_unchecked(n))
+ }
+
+ #[inline(always)]
+ pub fn get_bits_triple(
+ &mut self,
+ n1: u8,
+ n2: u8,
+ n3: u8,
+ ) -> Result<(u64, u64, u64), GetBitsError> {
+ let sum = n1 as usize + n2 as usize + n3 as usize;
+ if sum == 0 {
+ return Ok((0, 0, 0));
+ }
+ if sum > 56 {
+ // try and get the values separatly
+ return Ok((self.get_bits(n1)?, self.get_bits(n2)?, self.get_bits(n3)?));
+ }
+ let sum = sum as u8;
+
+ if self.bits_in_container >= sum {
+ let v1 = if n1 == 0 {
+ 0
+ } else {
+ self.get_bits_unchecked(n1)
+ };
+ let v2 = if n2 == 0 {
+ 0
+ } else {
+ self.get_bits_unchecked(n2)
+ };
+ let v3 = if n3 == 0 {
+ 0
+ } else {
+ self.get_bits_unchecked(n3)
+ };
+
+ return Ok((v1, v2, v3));
+ }
+
+ self.get_bits_triple_cold(n1, n2, n3, sum)
+ }
+
+ #[cold]
+ fn get_bits_triple_cold(
+ &mut self,
+ n1: u8,
+ n2: u8,
+ n3: u8,
+ sum: u8,
+ ) -> Result<(u64, u64, u64), GetBitsError> {
+ let sum_signed = sum as isize;
+
+ if self.bits_remaining() <= 0 {
+ self.idx -= sum_signed;
+ return Ok((0, 0, 0));
+ }
+
+ if self.bits_remaining() < sum_signed {
+ return Ok((self.get_bits(n1)?, self.get_bits(n2)?, self.get_bits(n3)?));
+ }
+
+ while (self.bits_in_container < sum) && self.idx > 0 {
+ self.refill_container();
+ }
+
+ debug_assert!(self.bits_in_container >= sum);
+
+ //if we reach this point there are enough bits in the container
+
+ let v1 = if n1 == 0 {
+ 0
+ } else {
+ self.get_bits_unchecked(n1)
+ };
+ let v2 = if n2 == 0 {
+ 0
+ } else {
+ self.get_bits_unchecked(n2)
+ };
+ let v3 = if n3 == 0 {
+ 0
+ } else {
+ self.get_bits_unchecked(n3)
+ };
+
+ Ok((v1, v2, v3))
+ }
+
+ #[inline(always)]
+ fn get_bits_unchecked(&mut self, n: u8) -> u64 {
+ let shift_by = self.bits_in_container - n;
+ let mask = (1u64 << n) - 1u64;
+
+ let value = self.bit_container >> shift_by;
+ self.bits_in_container -= n;
+ let value_masked = value & mask;
+ debug_assert!(value_masked < (1 << n));
+
+ value_masked
+ }
+
+ pub fn reset(&mut self, new_source: &'s [u8]) {
+ self.idx = new_source.len() as isize * 8;
+ self.source = new_source;
+ self.bit_container = 0;
+ self.bits_in_container = 0;
+ }
+}
diff --git a/vendor/ruzstd/src/decoding/block_decoder.rs b/vendor/ruzstd/src/decoding/block_decoder.rs
new file mode 100644
index 000000000..11a4c28c1
--- /dev/null
+++ b/vendor/ruzstd/src/decoding/block_decoder.rs
@@ -0,0 +1,378 @@
+use super::super::blocks::block::BlockHeader;
+use super::super::blocks::block::BlockType;
+use super::super::blocks::literals_section::LiteralsSection;
+use super::super::blocks::literals_section::LiteralsSectionType;
+use super::super::blocks::sequence_section::SequencesHeader;
+use super::literals_section_decoder::{decode_literals, DecompressLiteralsError};
+use super::sequence_execution::ExecuteSequencesError;
+use super::sequence_section_decoder::decode_sequences;
+use super::sequence_section_decoder::DecodeSequenceError;
+use crate::blocks::literals_section::LiteralsSectionParseError;
+use crate::blocks::sequence_section::SequencesHeaderParseError;
+use crate::decoding::scratch::DecoderScratch;
+use crate::decoding::sequence_execution::execute_sequences;
+use std::io::{self, Read};
+
+pub struct BlockDecoder {
+ header_buffer: [u8; 3],
+ internal_state: DecoderState,
+}
+
+enum DecoderState {
+ ReadyToDecodeNextHeader,
+ ReadyToDecodeNextBody,
+ #[allow(dead_code)]
+ Failed, //TODO put "self.internal_state = DecoderState::Failed;" everywhere an unresolvable error occurs
+}
+
+#[derive(Debug, thiserror::Error)]
+#[non_exhaustive]
+pub enum BlockHeaderReadError {
+ #[error("Error while reading the block header")]
+ ReadError(#[from] io::Error),
+ #[error("Reserved block occured. This is considered corruption by the documentation")]
+ FoundReservedBlock,
+ #[error("Error getting block type: {0}")]
+ BlockTypeError(#[from] BlockTypeError),
+ #[error("Error getting block content size: {0}")]
+ BlockSizeError(#[from] BlockSizeError),
+}
+
+#[derive(Debug, thiserror::Error)]
+#[non_exhaustive]
+pub enum BlockTypeError {
+ #[error(
+ "Invalid Blocktype number. Is: {num} Should be one of: 0, 1, 2, 3 (3 is reserved though"
+ )]
+ InvalidBlocktypeNumber { num: u8 },
+}
+
+#[derive(Debug, thiserror::Error)]
+#[non_exhaustive]
+pub enum BlockSizeError {
+ #[error("Blocksize was bigger than the absolute maximum {ABSOLUTE_MAXIMUM_BLOCK_SIZE} (128kb). Is: {size}")]
+ BlockSizeTooLarge { size: u32 },
+}
+
+#[derive(Debug, thiserror::Error)]
+#[non_exhaustive]
+pub enum DecompressBlockError {
+ #[error("Error while reading the block content: {0}")]
+ BlockContentReadError(#[from] io::Error),
+ #[error("Malformed section header. Says literals would be this long: {expected_len} but there are only {remaining_bytes} bytes left")]
+ MalformedSectionHeader {
+ expected_len: usize,
+ remaining_bytes: usize,
+ },
+ #[error(transparent)]
+ DecompressLiteralsError(#[from] DecompressLiteralsError),
+ #[error(transparent)]
+ LiteralsSectionParseError(#[from] LiteralsSectionParseError),
+ #[error(transparent)]
+ SequencesHeaderParseError(#[from] SequencesHeaderParseError),
+ #[error(transparent)]
+ DecodeSequenceError(#[from] DecodeSequenceError),
+ #[error(transparent)]
+ ExecuteSequencesError(#[from] ExecuteSequencesError),
+}
+
+#[derive(Debug, thiserror::Error)]
+#[non_exhaustive]
+pub enum DecodeBlockContentError {
+ #[error("Can't decode next block if failed along the way. Results will be nonsense")]
+ DecoderStateIsFailed,
+ #[error("Cant decode next block body, while expecting to decode the header of the previous block. Results will be nonsense")]
+ ExpectedHeaderOfPreviousBlock,
+ #[error("Error while reading bytes for {step}: {source}")]
+ ReadError {
+ step: BlockType,
+ #[source]
+ source: io::Error,
+ },
+ #[error(transparent)]
+ DecompressBlockError(#[from] DecompressBlockError),
+}
+
+pub fn new() -> BlockDecoder {
+ BlockDecoder {
+ internal_state: DecoderState::ReadyToDecodeNextHeader,
+ header_buffer: [0u8; 3],
+ }
+}
+
+const ABSOLUTE_MAXIMUM_BLOCK_SIZE: u32 = 128 * 1024;
+
+impl BlockDecoder {
+ pub fn decode_block_content(
+ &mut self,
+ header: &BlockHeader,
+ workspace: &mut DecoderScratch, //reuse this as often as possible. Not only if the trees are reused but also reuse the allocations when building new trees
+ mut source: impl Read,
+ ) -> Result<u64, DecodeBlockContentError> {
+ match self.internal_state {
+ DecoderState::ReadyToDecodeNextBody => { /* Happy :) */ }
+ DecoderState::Failed => return Err(DecodeBlockContentError::DecoderStateIsFailed),
+ DecoderState::ReadyToDecodeNextHeader => {
+ return Err(DecodeBlockContentError::ExpectedHeaderOfPreviousBlock)
+ }
+ }
+
+ let block_type = header.block_type;
+ match block_type {
+ BlockType::RLE => {
+ const BATCH_SIZE: usize = 512;
+ let mut buf = [0u8; BATCH_SIZE];
+ let full_reads = header.decompressed_size / BATCH_SIZE as u32;
+ let single_read_size = header.decompressed_size % BATCH_SIZE as u32;
+
+ source.read_exact(&mut buf[0..1]).map_err(|err| {
+ DecodeBlockContentError::ReadError {
+ step: block_type,
+ source: err,
+ }
+ })?;
+ self.internal_state = DecoderState::ReadyToDecodeNextHeader;
+
+ for i in 1..BATCH_SIZE {
+ buf[i] = buf[0];
+ }
+
+ for _ in 0..full_reads {
+ workspace.buffer.push(&buf[..]);
+ }
+ let smaller = &mut buf[..single_read_size as usize];
+ workspace.buffer.push(smaller);
+
+ Ok(1)
+ }
+ BlockType::Raw => {
+ const BATCH_SIZE: usize = 128 * 1024;
+ let mut buf = [0u8; BATCH_SIZE];
+ let full_reads = header.decompressed_size / BATCH_SIZE as u32;
+ let single_read_size = header.decompressed_size % BATCH_SIZE as u32;
+
+ for _ in 0..full_reads {
+ source.read_exact(&mut buf[..]).map_err(|err| {
+ DecodeBlockContentError::ReadError {
+ step: block_type,
+ source: err,
+ }
+ })?;
+ workspace.buffer.push(&buf[..]);
+ }
+
+ let smaller = &mut buf[..single_read_size as usize];
+ source
+ .read_exact(smaller)
+ .map_err(|err| DecodeBlockContentError::ReadError {
+ step: block_type,
+ source: err,
+ })?;
+ workspace.buffer.push(smaller);
+
+ self.internal_state = DecoderState::ReadyToDecodeNextHeader;
+ Ok(u64::from(header.decompressed_size))
+ }
+
+ BlockType::Reserved => {
+ panic!("How did you even get this. The decoder should error out if it detects a reserved-type block");
+ }
+
+ BlockType::Compressed => {
+ self.decompress_block(header, workspace, source)?;
+
+ self.internal_state = DecoderState::ReadyToDecodeNextHeader;
+ Ok(u64::from(header.content_size))
+ }
+ }
+ }
+
+ fn decompress_block(
+ &mut self,
+ header: &BlockHeader,
+ workspace: &mut DecoderScratch, //reuse this as often as possible. Not only if the trees are reused but also reuse the allocations when building new trees
+ mut source: impl Read,
+ ) -> Result<(), DecompressBlockError> {
+ workspace
+ .block_content_buffer
+ .resize(header.content_size as usize, 0);
+
+ source.read_exact(workspace.block_content_buffer.as_mut_slice())?;
+ let raw = workspace.block_content_buffer.as_slice();
+
+ let mut section = LiteralsSection::new();
+ let bytes_in_literals_header = section.parse_from_header(raw)?;
+ let raw = &raw[bytes_in_literals_header as usize..];
+ if crate::VERBOSE {
+ println!(
+ "Found {} literalssection with regenerated size: {}, and compressed size: {:?}",
+ section.ls_type, section.regenerated_size, section.compressed_size
+ );
+ }
+
+ let upper_limit_for_literals = match section.compressed_size {
+ Some(x) => x as usize,
+ None => match section.ls_type {
+ LiteralsSectionType::RLE => 1,
+ LiteralsSectionType::Raw => section.regenerated_size as usize,
+ _ => panic!("Bug in this library"),
+ },
+ };
+
+ if raw.len() < upper_limit_for_literals {
+ return Err(DecompressBlockError::MalformedSectionHeader {
+ expected_len: upper_limit_for_literals,
+ remaining_bytes: raw.len(),
+ });
+ }
+
+ let raw_literals = &raw[..upper_limit_for_literals];
+ if crate::VERBOSE {
+ println!("Slice for literals: {}", raw_literals.len());
+ }
+
+ workspace.literals_buffer.clear(); //all literals of the previous block must have been used in the sequence execution anyways. just be defensive here
+ let bytes_used_in_literals_section = decode_literals(
+ &section,
+ &mut workspace.huf,
+ raw_literals,
+ &mut workspace.literals_buffer,
+ )?;
+ assert!(
+ section.regenerated_size == workspace.literals_buffer.len() as u32,
+ "Wrong number of literals: {}, Should have been: {}",
+ workspace.literals_buffer.len(),
+ section.regenerated_size
+ );
+ assert!(bytes_used_in_literals_section == upper_limit_for_literals as u32);
+
+ let raw = &raw[upper_limit_for_literals..];
+ if crate::VERBOSE {
+ println!("Slice for sequences with headers: {}", raw.len());
+ }
+
+ let mut seq_section = SequencesHeader::new();
+ let bytes_in_sequence_header = seq_section.parse_from_header(raw)?;
+ let raw = &raw[bytes_in_sequence_header as usize..];
+ if crate::VERBOSE {
+ println!(
+ "Found sequencessection with sequences: {} and size: {}",
+ seq_section.num_sequences,
+ raw.len()
+ );
+ }
+
+ assert!(
+ u32::from(bytes_in_literals_header)
+ + bytes_used_in_literals_section
+ + u32::from(bytes_in_sequence_header)
+ + raw.len() as u32
+ == header.content_size
+ );
+ if crate::VERBOSE {
+ println!("Slice for sequences: {}", raw.len());
+ }
+
+ if seq_section.num_sequences != 0 {
+ decode_sequences(
+ &seq_section,
+ raw,
+ &mut workspace.fse,
+ &mut workspace.sequences,
+ )?;
+ if crate::VERBOSE {
+ println!("Executing sequences");
+ }
+ execute_sequences(workspace)?;
+ } else {
+ workspace.buffer.push(&workspace.literals_buffer);
+ workspace.sequences.clear();
+ }
+
+ Ok(())
+ }
+
+ pub fn read_block_header(
+ &mut self,
+ mut r: impl Read,
+ ) -> Result<(BlockHeader, u8), BlockHeaderReadError> {
+ //match self.internal_state {
+ // DecoderState::ReadyToDecodeNextHeader => {/* Happy :) */},
+ // DecoderState::Failed => return Err(format!("Cant decode next block if failed along the way. Results will be nonsense")),
+ // DecoderState::ReadyToDecodeNextBody => return Err(format!("Cant decode next block header, while expecting to decode the body of the previous block. Results will be nonsense")),
+ //}
+
+ r.read_exact(&mut self.header_buffer[0..3])?;
+
+ let btype = self.block_type()?;
+ if let BlockType::Reserved = btype {
+ return Err(BlockHeaderReadError::FoundReservedBlock);
+ }
+
+ let block_size = self.block_content_size()?;
+ let decompressed_size = match btype {
+ BlockType::Raw => block_size,
+ BlockType::RLE => block_size,
+ BlockType::Reserved => 0, //should be catched above, this is an error state
+ BlockType::Compressed => 0, //unknown but will be smaller than 128kb (or window_size if that is smaller than 128kb)
+ };
+ let content_size = match btype {
+ BlockType::Raw => block_size,
+ BlockType::Compressed => block_size,
+ BlockType::RLE => 1,
+ BlockType::Reserved => 0, //should be catched above, this is an error state
+ };
+
+ let last_block = self.is_last();
+
+ self.reset_buffer();
+ self.internal_state = DecoderState::ReadyToDecodeNextBody;
+
+ //just return 3. Blockheaders always take 3 bytes
+ Ok((
+ BlockHeader {
+ last_block,
+ block_type: btype,
+ decompressed_size,
+ content_size,
+ },
+ 3,
+ ))
+ }
+
+ fn reset_buffer(&mut self) {
+ self.header_buffer[0] = 0;
+ self.header_buffer[1] = 0;
+ self.header_buffer[2] = 0;
+ }
+
+ fn is_last(&self) -> bool {
+ self.header_buffer[0] & 0x1 == 1
+ }
+
+ fn block_type(&self) -> Result<BlockType, BlockTypeError> {
+ let t = (self.header_buffer[0] >> 1) & 0x3;
+ match t {
+ 0 => Ok(BlockType::Raw),
+ 1 => Ok(BlockType::RLE),
+ 2 => Ok(BlockType::Compressed),
+ 3 => Ok(BlockType::Reserved),
+ other => Err(BlockTypeError::InvalidBlocktypeNumber { num: other }),
+ }
+ }
+
+ fn block_content_size(&self) -> Result<u32, BlockSizeError> {
+ let val = self.block_content_size_unchecked();
+ if val > ABSOLUTE_MAXIMUM_BLOCK_SIZE {
+ Err(BlockSizeError::BlockSizeTooLarge { size: val })
+ } else {
+ Ok(val)
+ }
+ }
+
+ fn block_content_size_unchecked(&self) -> u32 {
+ u32::from(self.header_buffer[0] >> 3) //push out type and last_block flags. Retain 5 bit
+ | (u32::from(self.header_buffer[1]) << 5)
+ | (u32::from(self.header_buffer[2]) << 13)
+ }
+}
diff --git a/vendor/ruzstd/src/decoding/decodebuffer.rs b/vendor/ruzstd/src/decoding/decodebuffer.rs
new file mode 100644
index 000000000..0dea7015e
--- /dev/null
+++ b/vendor/ruzstd/src/decoding/decodebuffer.rs
@@ -0,0 +1,423 @@
+use std::hash::Hasher;
+use std::io;
+
+use twox_hash::XxHash64;
+
+use super::ringbuffer::RingBuffer;
+
+pub struct Decodebuffer {
+ buffer: RingBuffer,
+ pub dict_content: Vec<u8>,
+
+ pub window_size: usize,
+ total_output_counter: u64,
+ pub hash: XxHash64,
+}
+
+#[derive(Debug, thiserror::Error)]
+#[non_exhaustive]
+pub enum DecodebufferError {
+ #[error("Need {need} bytes from the dictionary but it is only {got} bytes long")]
+ NotEnoughBytesInDictionary { got: usize, need: usize },
+ #[error("offset: {offset} bigger than buffer: {buf_len}")]
+ OffsetTooBig { offset: usize, buf_len: usize },
+}
+
+impl io::Read for Decodebuffer {
+ fn read(&mut self, target: &mut [u8]) -> io::Result<usize> {
+ let max_amount = self.can_drain_to_window_size().unwrap_or(0);
+ let amount = max_amount.min(target.len());
+
+ let mut written = 0;
+ self.drain_to(amount, |buf| {
+ target[written..][..buf.len()].copy_from_slice(buf);
+ written += buf.len();
+ (buf.len(), Ok(()))
+ })?;
+ Ok(amount)
+ }
+}
+
+impl Decodebuffer {
+ pub fn new(window_size: usize) -> Decodebuffer {
+ Decodebuffer {
+ buffer: RingBuffer::new(),
+ dict_content: Vec::new(),
+ window_size,
+ total_output_counter: 0,
+ hash: XxHash64::with_seed(0),
+ }
+ }
+
+ pub fn reset(&mut self, window_size: usize) {
+ self.window_size = window_size;
+ self.buffer.clear();
+ self.buffer.reserve(self.window_size);
+ self.dict_content.clear();
+ self.total_output_counter = 0;
+ self.hash = XxHash64::with_seed(0);
+ }
+
+ pub fn len(&self) -> usize {
+ self.buffer.len()
+ }
+
+ pub fn is_empty(&self) -> bool {
+ self.buffer.is_empty()
+ }
+
+ pub fn push(&mut self, data: &[u8]) {
+ self.buffer.extend(data);
+ self.total_output_counter += data.len() as u64;
+ }
+
+ pub fn repeat(&mut self, offset: usize, match_length: usize) -> Result<(), DecodebufferError> {
+ if offset > self.buffer.len() {
+ if self.total_output_counter <= self.window_size as u64 {
+ // at least part of that repeat is from the dictionary content
+ let bytes_from_dict = offset - self.buffer.len();
+
+ if bytes_from_dict > self.dict_content.len() {
+ return Err(DecodebufferError::NotEnoughBytesInDictionary {
+ got: self.dict_content.len(),
+ need: bytes_from_dict,
+ });
+ }
+
+ if bytes_from_dict < match_length {
+ let dict_slice =
+ &self.dict_content[self.dict_content.len() - bytes_from_dict..];
+ self.buffer.extend(dict_slice);
+
+ self.total_output_counter += bytes_from_dict as u64;
+ return self.repeat(self.buffer.len(), match_length - bytes_from_dict);
+ } else {
+ let low = self.dict_content.len() - bytes_from_dict;
+ let high = low + match_length;
+ let dict_slice = &self.dict_content[low..high];
+ self.buffer.extend(dict_slice);
+ }
+ } else {
+ return Err(DecodebufferError::OffsetTooBig {
+ offset,
+ buf_len: self.buffer.len(),
+ });
+ }
+ } else {
+ let buf_len = self.buffer.len();
+ let start_idx = buf_len - offset;
+ let end_idx = start_idx + match_length;
+
+ self.buffer.reserve(match_length);
+ if end_idx > buf_len {
+ // We need to copy in chunks.
+ // We have at max offset bytes in one chunk, the last one can be smaller
+ let mut start_idx = start_idx;
+ let mut copied_counter_left = match_length;
+ // TODO this can be optimized further I think.
+ // Each time we copy a chunk we have a repetiton of length 'offset', so we can copy offset * iteration many bytes from start_idx
+ while copied_counter_left > 0 {
+ let chunksize = usize::min(offset, copied_counter_left);
+
+ // SAFETY:
+ // we know that start_idx <= buf_len and start_idx + offset == buf_len and we reserverd match_length space
+ unsafe {
+ self.buffer
+ .extend_from_within_unchecked(start_idx, chunksize)
+ };
+ copied_counter_left -= chunksize;
+ start_idx += chunksize;
+ }
+ } else {
+ // can just copy parts of the existing buffer
+ // SAFETY:
+ // we know that start_idx and end_idx <= buf_len and we reserverd match_length space
+ unsafe {
+ self.buffer
+ .extend_from_within_unchecked(start_idx, match_length)
+ };
+ }
+
+ self.total_output_counter += match_length as u64;
+ }
+
+ Ok(())
+ }
+
+ // Check if and how many bytes can currently be drawn from the buffer
+ pub fn can_drain_to_window_size(&self) -> Option<usize> {
+ if self.buffer.len() > self.window_size {
+ Some(self.buffer.len() - self.window_size)
+ } else {
+ None
+ }
+ }
+
+ //How many bytes can be drained if the window_size does not have to be maintained
+ pub fn can_drain(&self) -> usize {
+ self.buffer.len()
+ }
+
+ //drain as much as possible while retaining enough so that decoding si still possible with the required window_size
+ //At best call only if can_drain_to_window_size reports a 'high' number of bytes to reduce allocations
+ pub fn drain_to_window_size(&mut self) -> Option<Vec<u8>> {
+ //TODO investigate if it is possible to return the std::vec::Drain iterator directly without collecting here
+ match self.can_drain_to_window_size() {
+ None => None,
+ Some(can_drain) => {
+ let mut vec = Vec::with_capacity(can_drain);
+ self.drain_to(can_drain, |buf| {
+ vec.extend_from_slice(buf);
+ (buf.len(), Ok(()))
+ })
+ .ok()?;
+ Some(vec)
+ }
+ }
+ }
+
+ pub fn drain_to_window_size_writer(&mut self, mut sink: impl io::Write) -> io::Result<usize> {
+ match self.can_drain_to_window_size() {
+ None => Ok(0),
+ Some(can_drain) => {
+ self.drain_to(can_drain, |buf| write_all_bytes(&mut sink, buf))?;
+ Ok(can_drain)
+ }
+ }
+ }
+
+ //drain the buffer completely
+ pub fn drain(&mut self) -> Vec<u8> {
+ let (slice1, slice2) = self.buffer.as_slices();
+ self.hash.write(slice1);
+ self.hash.write(slice2);
+
+ let mut vec = Vec::with_capacity(slice1.len() + slice2.len());
+ vec.extend_from_slice(slice1);
+ vec.extend_from_slice(slice2);
+ self.buffer.clear();
+ vec
+ }
+
+ pub fn drain_to_writer(&mut self, mut sink: impl io::Write) -> io::Result<usize> {
+ let len = self.buffer.len();
+ self.drain_to(len, |buf| write_all_bytes(&mut sink, buf))?;
+
+ Ok(len)
+ }
+
+ pub fn read_all(&mut self, target: &mut [u8]) -> io::Result<usize> {
+ let amount = self.buffer.len().min(target.len());
+
+ let mut written = 0;
+ self.drain_to(amount, |buf| {
+ target[written..][..buf.len()].copy_from_slice(buf);
+ written += buf.len();
+ (buf.len(), Ok(()))
+ })?;
+ Ok(amount)
+ }
+
+ /// Semantics of write_bytes:
+ /// Should dump as many of the provided bytes as possible to whatever sink until no bytes are left or an error is encountered
+ /// Return how many bytes have actually been dumped to the sink.
+ fn drain_to(
+ &mut self,
+ amount: usize,
+ mut write_bytes: impl FnMut(&[u8]) -> (usize, io::Result<()>),
+ ) -> io::Result<()> {
+ if amount == 0 {
+ return Ok(());
+ }
+
+ struct DrainGuard<'a> {
+ buffer: &'a mut RingBuffer,
+ amount: usize,
+ }
+
+ impl<'a> Drop for DrainGuard<'a> {
+ fn drop(&mut self) {
+ if self.amount != 0 {
+ self.buffer.drop_first_n(self.amount);
+ }
+ }
+ }
+
+ let mut drain_guard = DrainGuard {
+ buffer: &mut self.buffer,
+ amount: 0,
+ };
+
+ let (slice1, slice2) = drain_guard.buffer.as_slices();
+ let n1 = slice1.len().min(amount);
+ let n2 = slice2.len().min(amount - n1);
+
+ if n1 != 0 {
+ let (written1, res1) = write_bytes(&slice1[..n1]);
+ self.hash.write(&slice1[..written1]);
+ drain_guard.amount += written1;
+
+ // Apparently this is what clippy thinks is the best way of expressing this
+ res1?;
+
+ // Only if the first call to write_bytes was not a partial write we can continue with slice2
+ // Partial writes SHOULD never happen without res1 being an error, but lets just protect against it anyways.
+ if written1 == n1 && n2 != 0 {
+ let (written2, res2) = write_bytes(&slice2[..n2]);
+ self.hash.write(&slice2[..written2]);
+ drain_guard.amount += written2;
+
+ // Apparently this is what clippy thinks is the best way of expressing this
+ res2?;
+ }
+ }
+
+ // Make sure we don't accidentally drop `DrainGuard` earlier.
+ drop(drain_guard);
+
+ Ok(())
+ }
+}
+
+/// Like Write::write_all but returns partial write length even on error
+fn write_all_bytes(mut sink: impl io::Write, buf: &[u8]) -> (usize, io::Result<()>) {
+ let mut written = 0;
+ while written < buf.len() {
+ match sink.write(&buf[written..]) {
+ Ok(w) => written += w,
+ Err(e) => return (written, Err(e)),
+ }
+ }
+ (written, Ok(()))
+}
+
+#[cfg(test)]
+mod tests {
+ use super::Decodebuffer;
+ use std::io::Write;
+
+ #[test]
+ fn short_writer() {
+ struct ShortWriter {
+ buf: Vec<u8>,
+ write_len: usize,
+ }
+
+ impl Write for ShortWriter {
+ fn write(&mut self, buf: &[u8]) -> std::result::Result<usize, std::io::Error> {
+ if buf.len() > self.write_len {
+ self.buf.extend_from_slice(&buf[..self.write_len]);
+ Ok(self.write_len)
+ } else {
+ self.buf.extend_from_slice(buf);
+ Ok(buf.len())
+ }
+ }
+
+ fn flush(&mut self) -> std::result::Result<(), std::io::Error> {
+ Ok(())
+ }
+ }
+
+ let mut short_writer = ShortWriter {
+ buf: vec![],
+ write_len: 10,
+ };
+
+ let mut decode_buf = Decodebuffer::new(100);
+ decode_buf.push(b"0123456789");
+ decode_buf.repeat(10, 90).unwrap();
+ let repeats = 1000;
+ for _ in 0..repeats {
+ assert_eq!(decode_buf.len(), 100);
+ decode_buf.repeat(10, 50).unwrap();
+ assert_eq!(decode_buf.len(), 150);
+ decode_buf
+ .drain_to_window_size_writer(&mut short_writer)
+ .unwrap();
+ assert_eq!(decode_buf.len(), 100);
+ }
+
+ assert_eq!(short_writer.buf.len(), repeats * 50);
+ decode_buf.drain_to_writer(&mut short_writer).unwrap();
+ assert_eq!(short_writer.buf.len(), repeats * 50 + 100);
+ }
+
+ #[test]
+ fn wouldblock_writer() {
+ struct WouldblockWriter {
+ buf: Vec<u8>,
+ last_blocked: usize,
+ block_every: usize,
+ }
+
+ impl Write for WouldblockWriter {
+ fn write(&mut self, buf: &[u8]) -> std::result::Result<usize, std::io::Error> {
+ if self.last_blocked < self.block_every {
+ self.buf.extend_from_slice(buf);
+ self.last_blocked += 1;
+ Ok(buf.len())
+ } else {
+ self.last_blocked = 0;
+ Err(std::io::Error::from(std::io::ErrorKind::WouldBlock))
+ }
+ }
+
+ fn flush(&mut self) -> std::result::Result<(), std::io::Error> {
+ Ok(())
+ }
+ }
+
+ let mut short_writer = WouldblockWriter {
+ buf: vec![],
+ last_blocked: 0,
+ block_every: 5,
+ };
+
+ let mut decode_buf = Decodebuffer::new(100);
+ decode_buf.push(b"0123456789");
+ decode_buf.repeat(10, 90).unwrap();
+ let repeats = 1000;
+ for _ in 0..repeats {
+ assert_eq!(decode_buf.len(), 100);
+ decode_buf.repeat(10, 50).unwrap();
+ assert_eq!(decode_buf.len(), 150);
+ loop {
+ match decode_buf.drain_to_window_size_writer(&mut short_writer) {
+ Ok(written) => {
+ if written == 0 {
+ break;
+ }
+ }
+ Err(e) => {
+ if e.kind() == std::io::ErrorKind::WouldBlock {
+ continue;
+ } else {
+ panic!("Unexpected error {:?}", e);
+ }
+ }
+ }
+ }
+ assert_eq!(decode_buf.len(), 100);
+ }
+
+ assert_eq!(short_writer.buf.len(), repeats * 50);
+ loop {
+ match decode_buf.drain_to_writer(&mut short_writer) {
+ Ok(written) => {
+ if written == 0 {
+ break;
+ }
+ }
+ Err(e) => {
+ if e.kind() == std::io::ErrorKind::WouldBlock {
+ continue;
+ } else {
+ panic!("Unexpected error {:?}", e);
+ }
+ }
+ }
+ }
+ assert_eq!(short_writer.buf.len(), repeats * 50 + 100);
+ }
+}
diff --git a/vendor/ruzstd/src/decoding/dictionary.rs b/vendor/ruzstd/src/decoding/dictionary.rs
new file mode 100644
index 000000000..51fbcdf0b
--- /dev/null
+++ b/vendor/ruzstd/src/decoding/dictionary.rs
@@ -0,0 +1,93 @@
+use std::convert::TryInto;
+
+use crate::decoding::scratch::FSEScratch;
+use crate::decoding::scratch::HuffmanScratch;
+use crate::fse::FSETableError;
+use crate::huff0::HuffmanTableError;
+
+pub struct Dictionary {
+ pub id: u32,
+ pub fse: FSEScratch,
+ pub huf: HuffmanScratch,
+ pub dict_content: Vec<u8>,
+ pub offset_hist: [u32; 3],
+}
+
+#[derive(Debug, thiserror::Error)]
+#[non_exhaustive]
+pub enum DictionaryDecodeError {
+ #[error(
+ "Bad magic_num at start of the dictionary; Got: {got:#04X?}, Expected: {MAGIC_NUM:#04x?}"
+ )]
+ BadMagicNum { got: [u8; 4] },
+ #[error(transparent)]
+ FSETableError(#[from] FSETableError),
+ #[error(transparent)]
+ HuffmanTableError(#[from] HuffmanTableError),
+}
+
+pub const MAGIC_NUM: [u8; 4] = [0x37, 0xA4, 0x30, 0xEC];
+
+impl Dictionary {
+ /// parses the dictionary and set the tables
+ /// it returns the dict_id for checking with the frame's dict_id
+ pub fn decode_dict(raw: &[u8]) -> Result<Dictionary, DictionaryDecodeError> {
+ let mut new_dict = Dictionary {
+ id: 0,
+ fse: FSEScratch::new(),
+ huf: HuffmanScratch::new(),
+ dict_content: Vec::new(),
+ offset_hist: [2, 4, 8],
+ };
+
+ let magic_num: [u8; 4] = raw[..4].try_into().expect("optimized away");
+ if magic_num != MAGIC_NUM {
+ return Err(DictionaryDecodeError::BadMagicNum { got: magic_num });
+ }
+
+ let dict_id = raw[4..8].try_into().expect("optimized away");
+ let dict_id = u32::from_le_bytes(dict_id);
+ new_dict.id = dict_id;
+
+ let raw_tables = &raw[8..];
+
+ let huf_size = new_dict.huf.table.build_decoder(raw_tables)?;
+ let raw_tables = &raw_tables[huf_size as usize..];
+
+ let of_size = new_dict.fse.offsets.build_decoder(
+ raw_tables,
+ crate::decoding::sequence_section_decoder::OF_MAX_LOG,
+ )?;
+ let raw_tables = &raw_tables[of_size..];
+
+ let ml_size = new_dict.fse.match_lengths.build_decoder(
+ raw_tables,
+ crate::decoding::sequence_section_decoder::ML_MAX_LOG,
+ )?;
+ let raw_tables = &raw_tables[ml_size..];
+
+ let ll_size = new_dict.fse.literal_lengths.build_decoder(
+ raw_tables,
+ crate::decoding::sequence_section_decoder::LL_MAX_LOG,
+ )?;
+ let raw_tables = &raw_tables[ll_size..];
+
+ let offset1 = raw_tables[0..4].try_into().expect("optimized away");
+ let offset1 = u32::from_le_bytes(offset1);
+
+ let offset2 = raw_tables[4..8].try_into().expect("optimized away");
+ let offset2 = u32::from_le_bytes(offset2);
+
+ let offset3 = raw_tables[8..12].try_into().expect("optimized away");
+ let offset3 = u32::from_le_bytes(offset3);
+
+ new_dict.offset_hist[0] = offset1;
+ new_dict.offset_hist[1] = offset2;
+ new_dict.offset_hist[2] = offset3;
+
+ let raw_content = &raw_tables[12..];
+ new_dict.dict_content.extend(raw_content);
+
+ Ok(new_dict)
+ }
+}
diff --git a/vendor/ruzstd/src/decoding/literals_section_decoder.rs b/vendor/ruzstd/src/decoding/literals_section_decoder.rs
new file mode 100644
index 000000000..d947f87eb
--- /dev/null
+++ b/vendor/ruzstd/src/decoding/literals_section_decoder.rs
@@ -0,0 +1,180 @@
+use super::super::blocks::literals_section::{LiteralsSection, LiteralsSectionType};
+use super::bit_reader_reverse::{BitReaderReversed, GetBitsError};
+use super::scratch::HuffmanScratch;
+use crate::huff0::{HuffmanDecoder, HuffmanDecoderError, HuffmanTableError};
+
+#[derive(Debug, thiserror::Error)]
+#[non_exhaustive]
+pub enum DecompressLiteralsError {
+ #[error(
+ "compressed size was none even though it must be set to something for compressed literals"
+ )]
+ MissingCompressedSize,
+ #[error("num_streams was none even though it must be set to something (1 or 4) for compressed literals")]
+ MissingNumStreams,
+ #[error(transparent)]
+ GetBitsError(#[from] GetBitsError),
+ #[error(transparent)]
+ HuffmanTableError(#[from] HuffmanTableError),
+ #[error(transparent)]
+ HuffmanDecoderError(#[from] HuffmanDecoderError),
+ #[error("Tried to reuse huffman table but it was never initialized")]
+ UninitializedHuffmanTable,
+ #[error("Need 6 bytes to decode jump header, got {got} bytes")]
+ MissingBytesForJumpHeader { got: usize },
+ #[error("Need at least {needed} bytes to decode literals. Have: {got} bytes")]
+ MissingBytesForLiterals { got: usize, needed: usize },
+ #[error("Padding at the end of the sequence_section was more than a byte long: {skipped_bits} bits. Probably caused by data corruption")]
+ ExtraPadding { skipped_bits: i32 },
+ #[error("Bitstream was read till: {read_til}, should have been: {expected}")]
+ BitstreamReadMismatch { read_til: isize, expected: isize },
+ #[error("Did not decode enough literals: {decoded}, Should have been: {expected}")]
+ DecodedLiteralCountMismatch { decoded: usize, expected: usize },
+}
+
+pub fn decode_literals(
+ section: &LiteralsSection,
+ scratch: &mut HuffmanScratch,
+ source: &[u8],
+ target: &mut Vec<u8>,
+) -> Result<u32, DecompressLiteralsError> {
+ match section.ls_type {
+ LiteralsSectionType::Raw => {
+ target.extend(&source[0..section.regenerated_size as usize]);
+ Ok(section.regenerated_size)
+ }
+ LiteralsSectionType::RLE => {
+ target.resize(target.len() + section.regenerated_size as usize, source[0]);
+ Ok(1)
+ }
+ LiteralsSectionType::Compressed | LiteralsSectionType::Treeless => {
+ let bytes_read = decompress_literals(section, scratch, source, target)?;
+
+ //return sum of used bytes
+ Ok(bytes_read)
+ }
+ }
+}
+
+fn decompress_literals(
+ section: &LiteralsSection,
+ scratch: &mut HuffmanScratch,
+ source: &[u8],
+ target: &mut Vec<u8>,
+) -> Result<u32, DecompressLiteralsError> {
+ use DecompressLiteralsError as err;
+
+ let compressed_size = section.compressed_size.ok_or(err::MissingCompressedSize)? as usize;
+ let num_streams = section.num_streams.ok_or(err::MissingNumStreams)?;
+
+ target.reserve(section.regenerated_size as usize);
+ let source = &source[0..compressed_size];
+ let mut bytes_read = 0;
+
+ match section.ls_type {
+ LiteralsSectionType::Compressed => {
+ //read Huffman tree description
+ bytes_read += scratch.table.build_decoder(source)?;
+ if crate::VERBOSE {
+ println!("Built huffman table using {} bytes", bytes_read);
+ }
+ }
+ LiteralsSectionType::Treeless => {
+ if scratch.table.max_num_bits == 0 {
+ return Err(err::UninitializedHuffmanTable);
+ }
+ }
+ _ => { /* nothing to do, huffman tree has been provided by previous block */ }
+ }
+
+ let source = &source[bytes_read as usize..];
+
+ if num_streams == 4 {
+ //build jumptable
+ if source.len() < 6 {
+ return Err(err::MissingBytesForJumpHeader { got: source.len() });
+ }
+ let jump1 = source[0] as usize + ((source[1] as usize) << 8);
+ let jump2 = jump1 + source[2] as usize + ((source[3] as usize) << 8);
+ let jump3 = jump2 + source[4] as usize + ((source[5] as usize) << 8);
+ bytes_read += 6;
+ let source = &source[6..];
+
+ if source.len() < jump3 {
+ return Err(err::MissingBytesForLiterals {
+ got: source.len(),
+ needed: jump3,
+ });
+ }
+
+ //decode 4 streams
+ let stream1 = &source[..jump1];
+ let stream2 = &source[jump1..jump2];
+ let stream3 = &source[jump2..jump3];
+ let stream4 = &source[jump3..];
+
+ for stream in &[stream1, stream2, stream3, stream4] {
+ let mut decoder = HuffmanDecoder::new(&scratch.table);
+ let mut br = BitReaderReversed::new(stream);
+ //skip the 0 padding at the end of the last byte of the bit stream and throw away the first 1 found
+ let mut skipped_bits = 0;
+ loop {
+ let val = br.get_bits(1)?;
+ skipped_bits += 1;
+ if val == 1 || skipped_bits > 8 {
+ break;
+ }
+ }
+ if skipped_bits > 8 {
+ //if more than 7 bits are 0, this is not the correct end of the bitstream. Either a bug or corrupted data
+ return Err(DecompressLiteralsError::ExtraPadding { skipped_bits });
+ }
+ decoder.init_state(&mut br)?;
+
+ while br.bits_remaining() > -(scratch.table.max_num_bits as isize) {
+ target.push(decoder.decode_symbol());
+ decoder.next_state(&mut br)?;
+ }
+ if br.bits_remaining() != -(scratch.table.max_num_bits as isize) {
+ return Err(DecompressLiteralsError::BitstreamReadMismatch {
+ read_til: br.bits_remaining(),
+ expected: -(scratch.table.max_num_bits as isize),
+ });
+ }
+ }
+
+ bytes_read += source.len() as u32;
+ } else {
+ //just decode the one stream
+ assert!(num_streams == 1);
+ let mut decoder = HuffmanDecoder::new(&scratch.table);
+ let mut br = BitReaderReversed::new(source);
+ let mut skipped_bits = 0;
+ loop {
+ let val = br.get_bits(1)?;
+ skipped_bits += 1;
+ if val == 1 || skipped_bits > 8 {
+ break;
+ }
+ }
+ if skipped_bits > 8 {
+ //if more than 7 bits are 0, this is not the correct end of the bitstream. Either a bug or corrupted data
+ return Err(DecompressLiteralsError::ExtraPadding { skipped_bits });
+ }
+ decoder.init_state(&mut br)?;
+ while br.bits_remaining() > -(scratch.table.max_num_bits as isize) {
+ target.push(decoder.decode_symbol());
+ decoder.next_state(&mut br)?;
+ }
+ bytes_read += source.len() as u32;
+ }
+
+ if target.len() != section.regenerated_size as usize {
+ return Err(DecompressLiteralsError::DecodedLiteralCountMismatch {
+ decoded: target.len(),
+ expected: section.regenerated_size as usize,
+ });
+ }
+
+ Ok(bytes_read)
+}
diff --git a/vendor/ruzstd/src/decoding/mod.rs b/vendor/ruzstd/src/decoding/mod.rs
new file mode 100644
index 000000000..b89df3510
--- /dev/null
+++ b/vendor/ruzstd/src/decoding/mod.rs
@@ -0,0 +1,11 @@
+pub mod bit_reader;
+pub mod bit_reader_reverse;
+pub mod block_decoder;
+pub mod decodebuffer;
+pub mod dictionary;
+pub mod literals_section_decoder;
+mod ringbuffer;
+#[allow(dead_code)]
+pub mod scratch;
+pub mod sequence_execution;
+pub mod sequence_section_decoder;
diff --git a/vendor/ruzstd/src/decoding/ringbuffer.rs b/vendor/ruzstd/src/decoding/ringbuffer.rs
new file mode 100644
index 000000000..9e3e9ba5e
--- /dev/null
+++ b/vendor/ruzstd/src/decoding/ringbuffer.rs
@@ -0,0 +1,632 @@
+use std::{alloc::Layout, ptr::NonNull, slice};
+
+pub struct RingBuffer {
+ buf: NonNull<u8>,
+ cap: usize,
+ head: usize,
+ tail: usize,
+}
+
+impl RingBuffer {
+ pub fn new() -> Self {
+ RingBuffer {
+ buf: NonNull::dangling(),
+ cap: 0,
+ head: 0,
+ tail: 0,
+ }
+ }
+
+ pub fn len(&self) -> usize {
+ let (x, y) = self.data_slice_lengths();
+ x + y
+ }
+
+ pub fn free(&self) -> usize {
+ let (x, y) = self.free_slice_lengths();
+ (x + y).saturating_sub(1)
+ }
+
+ pub fn clear(&mut self) {
+ self.head = 0;
+ self.tail = 0;
+ }
+
+ pub fn is_empty(&self) -> bool {
+ self.head == self.tail
+ }
+
+ pub fn reserve(&mut self, amount: usize) {
+ let free = self.free();
+ if free >= amount {
+ return;
+ }
+
+ self.reserve_amortized(amount - free);
+ }
+
+ #[inline(never)]
+ #[cold]
+ fn reserve_amortized(&mut self, amount: usize) {
+ // SAFETY: if we were succesfully able to construct this layout when we allocated then it's also valid do so now
+ let current_layout = unsafe { Layout::array::<u8>(self.cap).unwrap_unchecked() };
+
+ // Always have at least 1 unused element as the sentinel.
+ let new_cap = usize::max(
+ self.cap.next_power_of_two(),
+ (self.cap + amount).next_power_of_two(),
+ ) + 1;
+
+ // Check that the capacity isn't bigger than isize::MAX, which is the max allowed by LLVM, or that
+ // we are on a >= 64 bit system which will never allow that much memory to be allocated
+ #[allow(clippy::assertions_on_constants)]
+ {
+ debug_assert!(usize::BITS >= 64 || new_cap < isize::MAX as usize);
+ }
+
+ let new_layout = Layout::array::<u8>(new_cap)
+ .unwrap_or_else(|_| panic!("Could not create layout for u8 array of size {}", new_cap));
+
+ // alloc the new memory region and panic if alloc fails
+ // TODO maybe rework this to generate an error?
+ let new_buf = unsafe {
+ let new_buf = std::alloc::alloc(new_layout);
+
+ NonNull::new(new_buf).expect("Allocating new space for the ringbuffer failed")
+ };
+
+ // If we had data before, copy it over to the newly alloced memory region
+ if self.cap > 0 {
+ let ((s1_ptr, s1_len), (s2_ptr, s2_len)) = self.data_slice_parts();
+
+ unsafe {
+ new_buf.as_ptr().copy_from_nonoverlapping(s1_ptr, s1_len);
+ new_buf
+ .as_ptr()
+ .add(s1_len)
+ .copy_from_nonoverlapping(s2_ptr, s2_len);
+ std::alloc::dealloc(self.buf.as_ptr(), current_layout);
+ }
+
+ self.tail = s1_len + s2_len;
+ self.head = 0;
+ }
+ self.buf = new_buf;
+ self.cap = new_cap;
+ }
+
+ #[allow(dead_code)]
+ pub fn push_back(&mut self, byte: u8) {
+ self.reserve(1);
+
+ unsafe { self.buf.as_ptr().add(self.tail).write(byte) };
+ self.tail = (self.tail + 1) % self.cap;
+ }
+
+ #[allow(dead_code)]
+ pub fn get(&self, idx: usize) -> Option<u8> {
+ if idx < self.len() {
+ let idx = (self.head + idx) % self.cap;
+ Some(unsafe { self.buf.as_ptr().add(idx).read() })
+ } else {
+ None
+ }
+ }
+
+ pub fn extend(&mut self, data: &[u8]) {
+ let len = data.len();
+ let ptr = data.as_ptr();
+ if len == 0 {
+ return;
+ }
+
+ self.reserve(len);
+
+ debug_assert!(self.len() + len <= self.cap - 1);
+ debug_assert!(self.free() >= len, "free: {} len: {}", self.free(), len);
+
+ let ((f1_ptr, f1_len), (f2_ptr, f2_len)) = self.free_slice_parts();
+ debug_assert!(f1_len + f2_len >= len, "{} + {} < {}", f1_len, f2_len, len);
+
+ let in_f1 = usize::min(len, f1_len);
+ let in_f2 = len - in_f1;
+
+ debug_assert!(in_f1 + in_f2 == len);
+
+ unsafe {
+ if in_f1 > 0 {
+ f1_ptr.copy_from_nonoverlapping(ptr, in_f1);
+ }
+ if in_f2 > 0 {
+ f2_ptr.copy_from_nonoverlapping(ptr.add(in_f1), in_f2);
+ }
+ }
+ self.tail = (self.tail + len) % self.cap;
+ }
+
+ pub fn drop_first_n(&mut self, amount: usize) {
+ debug_assert!(amount <= self.len());
+ let amount = usize::min(amount, self.len());
+ self.head = (self.head + amount) % self.cap;
+ }
+
+ fn data_slice_lengths(&self) -> (usize, usize) {
+ let len_after_head;
+ let len_to_tail;
+
+ // TODO can we do this branchless?
+ if self.tail >= self.head {
+ len_after_head = self.tail - self.head;
+ len_to_tail = 0;
+ } else {
+ len_after_head = self.cap - self.head;
+ len_to_tail = self.tail;
+ }
+ (len_after_head, len_to_tail)
+ }
+
+ fn data_slice_parts(&self) -> ((*const u8, usize), (*const u8, usize)) {
+ let (len_after_head, len_to_tail) = self.data_slice_lengths();
+
+ (
+ (unsafe { self.buf.as_ptr().add(self.head) }, len_after_head),
+ (self.buf.as_ptr(), len_to_tail),
+ )
+ }
+ pub fn as_slices(&self) -> (&[u8], &[u8]) {
+ let (s1, s2) = self.data_slice_parts();
+ unsafe {
+ let s1 = slice::from_raw_parts(s1.0, s1.1);
+ let s2 = slice::from_raw_parts(s2.0, s2.1);
+ (s1, s2)
+ }
+ }
+
+ fn free_slice_lengths(&self) -> (usize, usize) {
+ let len_to_head;
+ let len_after_tail;
+
+ // TODO can we do this branchless?
+ if self.tail < self.head {
+ len_after_tail = self.head - self.tail;
+ len_to_head = 0;
+ } else {
+ len_after_tail = self.cap - self.tail;
+ len_to_head = self.head;
+ }
+ (len_to_head, len_after_tail)
+ }
+
+ fn free_slice_parts(&self) -> ((*mut u8, usize), (*mut u8, usize)) {
+ let (len_to_head, len_after_tail) = self.free_slice_lengths();
+
+ (
+ (unsafe { self.buf.as_ptr().add(self.tail) }, len_after_tail),
+ (self.buf.as_ptr(), len_to_head),
+ )
+ }
+
+ #[allow(dead_code)]
+ pub fn extend_from_within(&mut self, start: usize, len: usize) {
+ if start + len > self.len() {
+ panic!(
+ "Calls to this functions must respect start ({}) + len ({}) <= self.len() ({})!",
+ start,
+ len,
+ self.len()
+ );
+ }
+
+ self.reserve(len);
+ unsafe { self.extend_from_within_unchecked(start, len) }
+ }
+
+ /// SAFETY:
+ /// Needs start + len <= self.len()
+ /// And more then len reserved space
+ #[warn(unsafe_op_in_unsafe_fn)]
+ pub unsafe fn extend_from_within_unchecked(&mut self, start: usize, len: usize) {
+ debug_assert!(!self.buf.as_ptr().is_null());
+
+ if self.head < self.tail {
+ // continous data slice |____HDDDDDDDT_____|
+ let after_tail = usize::min(len, self.cap - self.tail);
+ unsafe {
+ self.buf
+ .as_ptr()
+ .add(self.tail)
+ .copy_from_nonoverlapping(self.buf.as_ptr().add(self.head + start), after_tail);
+ if after_tail < len {
+ self.buf.as_ptr().copy_from_nonoverlapping(
+ self.buf.as_ptr().add(self.head + start + after_tail),
+ len - after_tail,
+ );
+ }
+ }
+ } else {
+ // continous free slice |DDDT_________HDDDD|
+ if self.head + start > self.cap {
+ let start = (self.head + start) % self.cap;
+ unsafe {
+ self.buf
+ .as_ptr()
+ .add(self.tail)
+ .copy_from_nonoverlapping(self.buf.as_ptr().add(start), len)
+ }
+ } else {
+ let after_start = usize::min(len, self.cap - self.head - start);
+ unsafe {
+ self.buf.as_ptr().add(self.tail).copy_from_nonoverlapping(
+ self.buf.as_ptr().add(self.head + start),
+ after_start,
+ );
+ if after_start < len {
+ self.buf
+ .as_ptr()
+ .add(self.tail + after_start)
+ .copy_from_nonoverlapping(self.buf.as_ptr(), len - after_start);
+ }
+ }
+ }
+ }
+
+ self.tail = (self.tail + len) % self.cap;
+ }
+
+ #[allow(dead_code)]
+ /// SAFETY:
+ /// Needs start + len <= self.len()
+ /// And more then len reserved space
+ pub unsafe fn extend_from_within_unchecked_branchless(&mut self, start: usize, len: usize) {
+ // data slices in raw parts
+ let ((s1_ptr, s1_len), (s2_ptr, s2_len)) = self.data_slice_parts();
+
+ debug_assert!(len <= s1_len + s2_len, "{} > {} + {}", len, s1_len, s2_len);
+
+ // calc the actually wanted slices in raw parts
+ let start_in_s1 = usize::min(s1_len, start);
+ let end_in_s1 = usize::min(s1_len, start + len);
+ let m1_ptr = s1_ptr.add(start_in_s1);
+ let m1_len = end_in_s1 - start_in_s1;
+
+ debug_assert!(end_in_s1 <= s1_len);
+ debug_assert!(start_in_s1 <= s1_len);
+
+ let start_in_s2 = start.saturating_sub(s1_len);
+ let end_in_s2 = start_in_s2 + (len - m1_len);
+ let m2_ptr = s2_ptr.add(start_in_s2);
+ let m2_len = end_in_s2 - start_in_s2;
+
+ debug_assert!(start_in_s2 <= s2_len);
+ debug_assert!(end_in_s2 <= s2_len);
+
+ debug_assert_eq!(len, m1_len + m2_len);
+
+ // the free slices, must hold: f1_len + f2_len >= m1_len + m2_len
+ let ((f1_ptr, f1_len), (f2_ptr, f2_len)) = self.free_slice_parts();
+
+ debug_assert!(f1_len + f2_len >= m1_len + m2_len);
+
+ // calc how many from where bytes go where
+ let m1_in_f1 = usize::min(m1_len, f1_len);
+ let m1_in_f2 = m1_len - m1_in_f1;
+ let m2_in_f1 = usize::min(f1_len - m1_in_f1, m2_len);
+ let m2_in_f2 = m2_len - m2_in_f1;
+
+ debug_assert_eq!(m1_len, m1_in_f1 + m1_in_f2);
+ debug_assert_eq!(m2_len, m2_in_f1 + m2_in_f2);
+ debug_assert!(f1_len >= m1_in_f1 + m2_in_f1);
+ debug_assert!(f2_len >= m1_in_f2 + m2_in_f2);
+ debug_assert_eq!(len, m1_in_f1 + m2_in_f1 + m1_in_f2 + m2_in_f2);
+
+ debug_assert!(self.buf.as_ptr().add(self.cap) > f1_ptr.add(m1_in_f1 + m2_in_f1));
+ debug_assert!(self.buf.as_ptr().add(self.cap) > f2_ptr.add(m1_in_f2 + m2_in_f2));
+
+ debug_assert!((m1_in_f2 > 0) ^ (m2_in_f1 > 0) || (m1_in_f2 == 0 && m2_in_f1 == 0));
+
+ copy_with_checks(
+ m1_ptr, m2_ptr, f1_ptr, f2_ptr, m1_in_f1, m2_in_f1, m1_in_f2, m2_in_f2,
+ );
+ self.tail = (self.tail + len) % self.cap;
+ }
+}
+
+impl Drop for RingBuffer {
+ fn drop(&mut self) {
+ if self.cap == 0 {
+ return;
+ }
+
+ // SAFETY: is we were succesfully able to construct this layout when we allocated then it's also valid do so now
+ let current_layout = unsafe { Layout::array::<u8>(self.cap).unwrap_unchecked() };
+
+ unsafe {
+ std::alloc::dealloc(self.buf.as_ptr(), current_layout);
+ }
+ }
+}
+
+#[allow(dead_code)]
+#[inline(always)]
+#[allow(clippy::too_many_arguments)]
+unsafe fn copy_without_checks(
+ m1_ptr: *const u8,
+ m2_ptr: *const u8,
+ f1_ptr: *mut u8,
+ f2_ptr: *mut u8,
+ m1_in_f1: usize,
+ m2_in_f1: usize,
+ m1_in_f2: usize,
+ m2_in_f2: usize,
+) {
+ f1_ptr.copy_from_nonoverlapping(m1_ptr, m1_in_f1);
+ f1_ptr
+ .add(m1_in_f1)
+ .copy_from_nonoverlapping(m2_ptr, m2_in_f1);
+
+ f2_ptr.copy_from_nonoverlapping(m1_ptr.add(m1_in_f1), m1_in_f2);
+ f2_ptr
+ .add(m1_in_f2)
+ .copy_from_nonoverlapping(m2_ptr.add(m2_in_f1), m2_in_f2);
+}
+
+#[allow(dead_code)]
+#[inline(always)]
+#[allow(clippy::too_many_arguments)]
+unsafe fn copy_with_checks(
+ m1_ptr: *const u8,
+ m2_ptr: *const u8,
+ f1_ptr: *mut u8,
+ f2_ptr: *mut u8,
+ m1_in_f1: usize,
+ m2_in_f1: usize,
+ m1_in_f2: usize,
+ m2_in_f2: usize,
+) {
+ if m1_in_f1 != 0 {
+ f1_ptr.copy_from_nonoverlapping(m1_ptr, m1_in_f1);
+ }
+ if m2_in_f1 != 0 {
+ f1_ptr
+ .add(m1_in_f1)
+ .copy_from_nonoverlapping(m2_ptr, m2_in_f1);
+ }
+
+ if m1_in_f2 != 0 {
+ f2_ptr.copy_from_nonoverlapping(m1_ptr.add(m1_in_f1), m1_in_f2);
+ }
+ if m2_in_f2 != 0 {
+ f2_ptr
+ .add(m1_in_f2)
+ .copy_from_nonoverlapping(m2_ptr.add(m2_in_f1), m2_in_f2);
+ }
+}
+
+#[allow(dead_code)]
+#[inline(always)]
+#[allow(clippy::too_many_arguments)]
+unsafe fn copy_with_nobranch_check(
+ m1_ptr: *const u8,
+ m2_ptr: *const u8,
+ f1_ptr: *mut u8,
+ f2_ptr: *mut u8,
+ m1_in_f1: usize,
+ m2_in_f1: usize,
+ m1_in_f2: usize,
+ m2_in_f2: usize,
+) {
+ let case = (m1_in_f1 > 0) as usize
+ | (((m2_in_f1 > 0) as usize) << 1)
+ | (((m1_in_f2 > 0) as usize) << 2)
+ | (((m2_in_f2 > 0) as usize) << 3);
+
+ match case {
+ 0 => {}
+
+ // one bit set
+ 1 => {
+ f1_ptr.copy_from_nonoverlapping(m1_ptr, m1_in_f1);
+ }
+ 2 => {
+ f1_ptr.copy_from_nonoverlapping(m2_ptr, m2_in_f1);
+ }
+ 4 => {
+ f2_ptr.copy_from_nonoverlapping(m1_ptr, m1_in_f2);
+ }
+ 8 => {
+ f2_ptr.copy_from_nonoverlapping(m2_ptr, m2_in_f2);
+ }
+
+ // two bit set
+ 3 => {
+ f1_ptr.copy_from_nonoverlapping(m1_ptr, m1_in_f1);
+ f1_ptr
+ .add(m1_in_f1)
+ .copy_from_nonoverlapping(m2_ptr, m2_in_f1);
+ }
+ 5 => {
+ f1_ptr.copy_from_nonoverlapping(m1_ptr, m1_in_f1);
+ f2_ptr.copy_from_nonoverlapping(m1_ptr.add(m1_in_f1), m1_in_f2);
+ }
+ 6 => std::hint::unreachable_unchecked(),
+ 7 => std::hint::unreachable_unchecked(),
+ 9 => {
+ f1_ptr.copy_from_nonoverlapping(m1_ptr, m1_in_f1);
+ f2_ptr.copy_from_nonoverlapping(m2_ptr, m2_in_f2);
+ }
+ 10 => {
+ f1_ptr.copy_from_nonoverlapping(m2_ptr, m2_in_f1);
+ f2_ptr.copy_from_nonoverlapping(m2_ptr.add(m2_in_f1), m2_in_f2);
+ }
+ 12 => {
+ f2_ptr.copy_from_nonoverlapping(m1_ptr, m1_in_f2);
+ f2_ptr
+ .add(m1_in_f2)
+ .copy_from_nonoverlapping(m2_ptr, m2_in_f2);
+ }
+
+ // three bit set
+ 11 => {
+ f1_ptr.copy_from_nonoverlapping(m1_ptr, m1_in_f1);
+ f1_ptr
+ .add(m1_in_f1)
+ .copy_from_nonoverlapping(m2_ptr, m2_in_f1);
+ f2_ptr.copy_from_nonoverlapping(m2_ptr.add(m2_in_f1), m2_in_f2);
+ }
+ 13 => {
+ f1_ptr.copy_from_nonoverlapping(m1_ptr, m1_in_f1);
+ f2_ptr.copy_from_nonoverlapping(m1_ptr.add(m1_in_f1), m1_in_f2);
+ f2_ptr
+ .add(m1_in_f2)
+ .copy_from_nonoverlapping(m2_ptr, m2_in_f2);
+ }
+ 14 => std::hint::unreachable_unchecked(),
+ 15 => std::hint::unreachable_unchecked(),
+ _ => std::hint::unreachable_unchecked(),
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::RingBuffer;
+
+ #[test]
+ fn smoke() {
+ let mut rb = RingBuffer::new();
+
+ rb.reserve(15);
+ assert_eq!(17, rb.cap);
+
+ rb.extend(b"0123456789");
+ assert_eq!(rb.len(), 10);
+ assert_eq!(rb.as_slices().0, b"0123456789");
+ assert_eq!(rb.as_slices().1, b"");
+
+ rb.drop_first_n(5);
+ assert_eq!(rb.len(), 5);
+ assert_eq!(rb.as_slices().0, b"56789");
+ assert_eq!(rb.as_slices().1, b"");
+
+ rb.extend_from_within(2, 3);
+ assert_eq!(rb.len(), 8);
+ assert_eq!(rb.as_slices().0, b"56789789");
+ assert_eq!(rb.as_slices().1, b"");
+
+ rb.extend_from_within(0, 3);
+ assert_eq!(rb.len(), 11);
+ assert_eq!(rb.as_slices().0, b"56789789567");
+ assert_eq!(rb.as_slices().1, b"");
+
+ rb.extend_from_within(0, 2);
+ assert_eq!(rb.len(), 13);
+ assert_eq!(rb.as_slices().0, b"567897895675");
+ assert_eq!(rb.as_slices().1, b"6");
+
+ rb.drop_first_n(11);
+ assert_eq!(rb.len(), 2);
+ assert_eq!(rb.as_slices().0, b"5");
+ assert_eq!(rb.as_slices().1, b"6");
+
+ rb.extend(b"0123456789");
+ assert_eq!(rb.len(), 12);
+ assert_eq!(rb.as_slices().0, b"5");
+ assert_eq!(rb.as_slices().1, b"60123456789");
+
+ rb.drop_first_n(11);
+ assert_eq!(rb.len(), 1);
+ assert_eq!(rb.as_slices().0, b"9");
+ assert_eq!(rb.as_slices().1, b"");
+
+ rb.extend(b"0123456789");
+ assert_eq!(rb.len(), 11);
+ assert_eq!(rb.as_slices().0, b"9012345");
+ assert_eq!(rb.as_slices().1, b"6789");
+ }
+
+ #[test]
+ fn edge_cases() {
+ // Fill exactly, then empty then fill again
+ let mut rb = RingBuffer::new();
+ rb.reserve(16);
+ assert_eq!(17, rb.cap);
+ rb.extend(b"0123456789012345");
+ assert_eq!(17, rb.cap);
+ assert_eq!(16, rb.len());
+ assert_eq!(0, rb.free());
+ rb.drop_first_n(16);
+ assert_eq!(0, rb.len());
+ assert_eq!(16, rb.free());
+ rb.extend(b"0123456789012345");
+ assert_eq!(16, rb.len());
+ assert_eq!(0, rb.free());
+ assert_eq!(17, rb.cap);
+ assert_eq!(1, rb.as_slices().0.len());
+ assert_eq!(15, rb.as_slices().1.len());
+
+ rb.clear();
+
+ // data in both slices and then reserve
+ rb.extend(b"0123456789012345");
+ rb.drop_first_n(8);
+ rb.extend(b"67890123");
+ assert_eq!(16, rb.len());
+ assert_eq!(0, rb.free());
+ assert_eq!(17, rb.cap);
+ assert_eq!(9, rb.as_slices().0.len());
+ assert_eq!(7, rb.as_slices().1.len());
+ rb.reserve(1);
+ assert_eq!(16, rb.len());
+ assert_eq!(16, rb.free());
+ assert_eq!(33, rb.cap);
+ assert_eq!(16, rb.as_slices().0.len());
+ assert_eq!(0, rb.as_slices().1.len());
+
+ rb.clear();
+
+ // fill exactly, then extend from within
+ rb.extend(b"0123456789012345");
+ rb.extend_from_within(0, 16);
+ assert_eq!(32, rb.len());
+ assert_eq!(0, rb.free());
+ assert_eq!(33, rb.cap);
+ assert_eq!(32, rb.as_slices().0.len());
+ assert_eq!(0, rb.as_slices().1.len());
+
+ // extend from within cases
+ let mut rb = RingBuffer::new();
+ rb.reserve(8);
+ rb.extend(b"01234567");
+ rb.drop_first_n(5);
+ rb.extend_from_within(0, 3);
+ assert_eq!(4, rb.as_slices().0.len());
+ assert_eq!(2, rb.as_slices().1.len());
+
+ rb.drop_first_n(2);
+ assert_eq!(2, rb.as_slices().0.len());
+ assert_eq!(2, rb.as_slices().1.len());
+ rb.extend_from_within(0, 4);
+ assert_eq!(2, rb.as_slices().0.len());
+ assert_eq!(6, rb.as_slices().1.len());
+
+ rb.drop_first_n(2);
+ assert_eq!(6, rb.as_slices().0.len());
+ assert_eq!(0, rb.as_slices().1.len());
+ rb.drop_first_n(2);
+ assert_eq!(4, rb.as_slices().0.len());
+ assert_eq!(0, rb.as_slices().1.len());
+ rb.extend_from_within(0, 4);
+ assert_eq!(7, rb.as_slices().0.len());
+ assert_eq!(1, rb.as_slices().1.len());
+
+ let mut rb = RingBuffer::new();
+ rb.reserve(8);
+ rb.extend(b"11111111");
+ rb.drop_first_n(7);
+ rb.extend(b"111");
+ assert_eq!(2, rb.as_slices().0.len());
+ assert_eq!(2, rb.as_slices().1.len());
+ rb.extend_from_within(0, 4);
+ assert_eq!(b"11", rb.as_slices().0);
+ assert_eq!(b"111111", rb.as_slices().1);
+ }
+}
diff --git a/vendor/ruzstd/src/decoding/scratch.rs b/vendor/ruzstd/src/decoding/scratch.rs
new file mode 100644
index 000000000..2bd753bde
--- /dev/null
+++ b/vendor/ruzstd/src/decoding/scratch.rs
@@ -0,0 +1,128 @@
+use super::super::blocks::sequence_section::Sequence;
+use super::decodebuffer::Decodebuffer;
+use crate::decoding::dictionary::Dictionary;
+use crate::fse::FSETable;
+use crate::huff0::HuffmanTable;
+
+pub struct DecoderScratch {
+ pub huf: HuffmanScratch,
+ pub fse: FSEScratch,
+ pub buffer: Decodebuffer,
+ pub offset_hist: [u32; 3],
+
+ pub literals_buffer: Vec<u8>,
+ pub sequences: Vec<Sequence>,
+ pub block_content_buffer: Vec<u8>,
+}
+
+impl DecoderScratch {
+ pub fn new(window_size: usize) -> DecoderScratch {
+ DecoderScratch {
+ huf: HuffmanScratch {
+ table: HuffmanTable::new(),
+ },
+ fse: FSEScratch {
+ offsets: FSETable::new(),
+ of_rle: None,
+ literal_lengths: FSETable::new(),
+ ll_rle: None,
+ match_lengths: FSETable::new(),
+ ml_rle: None,
+ },
+ buffer: Decodebuffer::new(window_size),
+ offset_hist: [1, 4, 8],
+
+ block_content_buffer: Vec::new(),
+ literals_buffer: Vec::new(),
+ sequences: Vec::new(),
+ }
+ }
+
+ pub fn reset(&mut self, window_size: usize) {
+ self.offset_hist = [1, 4, 8];
+ self.literals_buffer.clear();
+ self.sequences.clear();
+ self.block_content_buffer.clear();
+
+ self.buffer.reset(window_size);
+
+ self.fse.literal_lengths.reset();
+ self.fse.match_lengths.reset();
+ self.fse.offsets.reset();
+ self.fse.ll_rle = None;
+ self.fse.ml_rle = None;
+ self.fse.of_rle = None;
+
+ self.huf.table.reset();
+ }
+
+ pub fn use_dict(&mut self, dict: &Dictionary) {
+ self.fse = dict.fse.clone();
+ self.huf = dict.huf.clone();
+ self.offset_hist = dict.offset_hist;
+ self.buffer.dict_content = dict.dict_content.clone();
+ }
+
+ /// parses the dictionary and set the tables
+ /// it returns the dict_id for checking with the frame's dict_id
+ pub fn load_dict(
+ &mut self,
+ raw: &[u8],
+ ) -> Result<u32, super::dictionary::DictionaryDecodeError> {
+ let dict = super::dictionary::Dictionary::decode_dict(raw)?;
+
+ self.huf = dict.huf.clone();
+ self.fse = dict.fse.clone();
+ self.offset_hist = dict.offset_hist;
+ self.buffer.dict_content = dict.dict_content.clone();
+ Ok(dict.id)
+ }
+}
+
+#[derive(Clone)]
+pub struct HuffmanScratch {
+ pub table: HuffmanTable,
+}
+
+impl HuffmanScratch {
+ pub fn new() -> HuffmanScratch {
+ HuffmanScratch {
+ table: HuffmanTable::new(),
+ }
+ }
+}
+
+impl Default for HuffmanScratch {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+#[derive(Clone)]
+pub struct FSEScratch {
+ pub offsets: FSETable,
+ pub of_rle: Option<u8>,
+ pub literal_lengths: FSETable,
+ pub ll_rle: Option<u8>,
+ pub match_lengths: FSETable,
+ pub ml_rle: Option<u8>,
+}
+
+impl FSEScratch {
+ pub fn new() -> FSEScratch {
+ FSEScratch {
+ offsets: FSETable::new(),
+ of_rle: None,
+ literal_lengths: FSETable::new(),
+ ll_rle: None,
+ match_lengths: FSETable::new(),
+ ml_rle: None,
+ }
+ }
+}
+
+impl Default for FSEScratch {
+ fn default() -> Self {
+ Self::new()
+ }
+}
diff --git a/vendor/ruzstd/src/decoding/sequence_execution.rs b/vendor/ruzstd/src/decoding/sequence_execution.rs
new file mode 100644
index 000000000..19247ec62
--- /dev/null
+++ b/vendor/ruzstd/src/decoding/sequence_execution.rs
@@ -0,0 +1,123 @@
+use super::{decodebuffer::DecodebufferError, scratch::DecoderScratch};
+
+#[derive(Debug, thiserror::Error)]
+#[non_exhaustive]
+pub enum ExecuteSequencesError {
+ #[error(transparent)]
+ DecodebufferError(#[from] DecodebufferError),
+ #[error("Sequence wants to copy up to byte {wanted}. Bytes in literalsbuffer: {have}")]
+ NotEnoughBytesForSequence { wanted: usize, have: usize },
+ #[error("Illegal offset: 0 found")]
+ ZeroOffset,
+}
+
+pub fn execute_sequences(scratch: &mut DecoderScratch) -> Result<(), ExecuteSequencesError> {
+ let mut literals_copy_counter = 0;
+ let old_buffer_size = scratch.buffer.len();
+ let mut seq_sum = 0;
+
+ for idx in 0..scratch.sequences.len() {
+ let seq = scratch.sequences[idx];
+ if crate::VERBOSE {}
+ //println!("{}: {}", idx, seq);
+
+ if seq.ll > 0 {
+ let high = literals_copy_counter + seq.ll as usize;
+ if high > scratch.literals_buffer.len() {
+ return Err(ExecuteSequencesError::NotEnoughBytesForSequence {
+ wanted: high,
+ have: scratch.literals_buffer.len(),
+ });
+ }
+ let literals = &scratch.literals_buffer[literals_copy_counter..high];
+ literals_copy_counter += seq.ll as usize;
+
+ scratch.buffer.push(literals);
+ }
+
+ let actual_offset = do_offset_history(seq.of, seq.ll, &mut scratch.offset_hist);
+ if actual_offset == 0 {
+ return Err(ExecuteSequencesError::ZeroOffset);
+ }
+ if seq.ml > 0 {
+ scratch
+ .buffer
+ .repeat(actual_offset as usize, seq.ml as usize)?;
+ }
+
+ seq_sum += seq.ml;
+ seq_sum += seq.ll;
+ }
+ if literals_copy_counter < scratch.literals_buffer.len() {
+ let rest_literals = &scratch.literals_buffer[literals_copy_counter..];
+ scratch.buffer.push(rest_literals);
+ seq_sum += rest_literals.len() as u32;
+ }
+
+ let diff = scratch.buffer.len() - old_buffer_size;
+ assert!(
+ seq_sum as usize == diff,
+ "Seq_sum: {} is different from the difference in buffersize: {}",
+ seq_sum,
+ diff
+ );
+ Ok(())
+}
+
+fn do_offset_history(offset_value: u32, lit_len: u32, scratch: &mut [u32; 3]) -> u32 {
+ let actual_offset = if lit_len > 0 {
+ match offset_value {
+ 1..=3 => scratch[offset_value as usize - 1],
+ _ => {
+ //new offset
+ offset_value - 3
+ }
+ }
+ } else {
+ match offset_value {
+ 1..=2 => scratch[offset_value as usize],
+ 3 => scratch[0] - 1,
+ _ => {
+ //new offset
+ offset_value - 3
+ }
+ }
+ };
+
+ //update history
+ if lit_len > 0 {
+ match offset_value {
+ 1 => {
+ //nothing
+ }
+ 2 => {
+ scratch[1] = scratch[0];
+ scratch[0] = actual_offset;
+ }
+ _ => {
+ scratch[2] = scratch[1];
+ scratch[1] = scratch[0];
+ scratch[0] = actual_offset;
+ }
+ }
+ } else {
+ match offset_value {
+ 1 => {
+ scratch[1] = scratch[0];
+ scratch[0] = actual_offset;
+ }
+ 2 => {
+ scratch[2] = scratch[1];
+ scratch[1] = scratch[0];
+ scratch[0] = actual_offset;
+ }
+ _ => {
+ scratch[2] = scratch[1];
+ scratch[1] = scratch[0];
+ scratch[0] = actual_offset;
+ }
+ }
+ }
+
+ actual_offset
+}
diff --git a/vendor/ruzstd/src/decoding/sequence_section_decoder.rs b/vendor/ruzstd/src/decoding/sequence_section_decoder.rs
new file mode 100644
index 000000000..3d5990c05
--- /dev/null
+++ b/vendor/ruzstd/src/decoding/sequence_section_decoder.rs
@@ -0,0 +1,495 @@
+use super::super::blocks::sequence_section::ModeType;
+use super::super::blocks::sequence_section::Sequence;
+use super::super::blocks::sequence_section::SequencesHeader;
+use super::bit_reader_reverse::{BitReaderReversed, GetBitsError};
+use super::scratch::FSEScratch;
+use crate::fse::{FSEDecoder, FSEDecoderError, FSETableError};
+
+#[derive(Debug, thiserror::Error)]
+#[non_exhaustive]
+pub enum DecodeSequenceError {
+ #[error(transparent)]
+ GetBitsError(#[from] GetBitsError),
+ #[error(transparent)]
+ FSEDecoderError(#[from] FSEDecoderError),
+ #[error(transparent)]
+ FSETableError(#[from] FSETableError),
+ #[error("Padding at the end of the sequence_section was more than a byte long: {skipped_bits} bits. Probably caused by data corruption")]
+ ExtraPadding { skipped_bits: i32 },
+ #[error("Do not support offsets bigger than 1<<32; got: {offset_code}")]
+ UnsupportedOffset { offset_code: u8 },
+ #[error("Read an offset == 0. That is an illegal value for offsets")]
+ ZeroOffset,
+ #[error("Bytestream did not contain enough bytes to decode num_sequences")]
+ NotEnoughBytesForNumSequences,
+ #[error("Did not use full bitstream. Bits left: {bits_remaining} ({} bytes)", bits_remaining / 8)]
+ ExtraBits { bits_remaining: isize },
+ #[error("compression modes are none but they must be set to something")]
+ MissingCompressionMode,
+ #[error("Need a byte to read for RLE ll table")]
+ MissingByteForRleLlTable,
+ #[error("Need a byte to read for RLE of table")]
+ MissingByteForRleOfTable,
+ #[error("Need a byte to read for RLE ml table")]
+ MissingByteForRleMlTable,
+}
+
+pub fn decode_sequences(
+ section: &SequencesHeader,
+ source: &[u8],
+ scratch: &mut FSEScratch,
+ target: &mut Vec<Sequence>,
+) -> Result<(), DecodeSequenceError> {
+ let bytes_read = maybe_update_fse_tables(section, source, scratch)?;
+
+ if crate::VERBOSE {
+ println!("Updating tables used {} bytes", bytes_read);
+ }
+
+ let bit_stream = &source[bytes_read..];
+
+ let mut br = BitReaderReversed::new(bit_stream);
+
+ //skip the 0 padding at the end of the last byte of the bit stream and throw away the first 1 found
+ let mut skipped_bits = 0;
+ loop {
+ let val = br.get_bits(1)?;
+ skipped_bits += 1;
+ if val == 1 || skipped_bits > 8 {
+ break;
+ }
+ }
+ if skipped_bits > 8 {
+ //if more than 7 bits are 0, this is not the correct end of the bitstream. Either a bug or corrupted data
+ return Err(DecodeSequenceError::ExtraPadding { skipped_bits });
+ }
+
+ if scratch.ll_rle.is_some() || scratch.ml_rle.is_some() || scratch.of_rle.is_some() {
+ decode_sequences_with_rle(section, &mut br, scratch, target)
+ } else {
+ decode_sequences_without_rle(section, &mut br, scratch, target)
+ }
+}
+
+fn decode_sequences_with_rle(
+ section: &SequencesHeader,
+ br: &mut BitReaderReversed<'_>,
+ scratch: &mut FSEScratch,
+ target: &mut Vec<Sequence>,
+) -> Result<(), DecodeSequenceError> {
+ let mut ll_dec = FSEDecoder::new(&scratch.literal_lengths);
+ let mut ml_dec = FSEDecoder::new(&scratch.match_lengths);
+ let mut of_dec = FSEDecoder::new(&scratch.offsets);
+
+ if scratch.ll_rle.is_none() {
+ ll_dec.init_state(br)?;
+ }
+ if scratch.of_rle.is_none() {
+ of_dec.init_state(br)?;
+ }
+ if scratch.ml_rle.is_none() {
+ ml_dec.init_state(br)?;
+ }
+
+ target.clear();
+ target.reserve(section.num_sequences as usize);
+
+ for _seq_idx in 0..section.num_sequences {
+ //get the codes from either the RLE byte or from the decoder
+ let ll_code = if scratch.ll_rle.is_some() {
+ scratch.ll_rle.unwrap()
+ } else {
+ ll_dec.decode_symbol()
+ };
+ let ml_code = if scratch.ml_rle.is_some() {
+ scratch.ml_rle.unwrap()
+ } else {
+ ml_dec.decode_symbol()
+ };
+ let of_code = if scratch.of_rle.is_some() {
+ scratch.of_rle.unwrap()
+ } else {
+ of_dec.decode_symbol()
+ };
+
+ let (ll_value, ll_num_bits) = lookup_ll_code(ll_code);
+ let (ml_value, ml_num_bits) = lookup_ml_code(ml_code);
+
+ //println!("Sequence: {}", i);
+ //println!("of stat: {}", of_dec.state);
+ //println!("of Code: {}", of_code);
+ //println!("ll stat: {}", ll_dec.state);
+ //println!("ll bits: {}", ll_num_bits);
+ //println!("ll Code: {}", ll_value);
+ //println!("ml stat: {}", ml_dec.state);
+ //println!("ml bits: {}", ml_num_bits);
+ //println!("ml Code: {}", ml_value);
+ //println!("");
+
+ if of_code >= 32 {
+ return Err(DecodeSequenceError::UnsupportedOffset {
+ offset_code: of_code,
+ });
+ }
+
+ let (obits, ml_add, ll_add) = br.get_bits_triple(of_code, ml_num_bits, ll_num_bits)?;
+ let offset = obits as u32 + (1u32 << of_code);
+
+ if offset == 0 {
+ return Err(DecodeSequenceError::ZeroOffset);
+ }
+
+ target.push(Sequence {
+ ll: ll_value + ll_add as u32,
+ ml: ml_value + ml_add as u32,
+ of: offset,
+ });
+
+ if target.len() < section.num_sequences as usize {
+ //println!(
+ // "Bits left: {} ({} bytes)",
+ // br.bits_remaining(),
+ // br.bits_remaining() / 8,
+ //);
+ if scratch.ll_rle.is_none() {
+ ll_dec.update_state(br)?;
+ }
+ if scratch.ml_rle.is_none() {
+ ml_dec.update_state(br)?;
+ }
+ if scratch.of_rle.is_none() {
+ of_dec.update_state(br)?;
+ }
+ }
+
+ if br.bits_remaining() < 0 {
+ return Err(DecodeSequenceError::NotEnoughBytesForNumSequences);
+ }
+ }
+
+ if br.bits_remaining() > 0 {
+ Err(DecodeSequenceError::ExtraBits {
+ bits_remaining: br.bits_remaining(),
+ })
+ } else {
+ Ok(())
+ }
+}
+
+fn decode_sequences_without_rle(
+ section: &SequencesHeader,
+ br: &mut BitReaderReversed<'_>,
+ scratch: &mut FSEScratch,
+ target: &mut Vec<Sequence>,
+) -> Result<(), DecodeSequenceError> {
+ let mut ll_dec = FSEDecoder::new(&scratch.literal_lengths);
+ let mut ml_dec = FSEDecoder::new(&scratch.match_lengths);
+ let mut of_dec = FSEDecoder::new(&scratch.offsets);
+
+ ll_dec.init_state(br)?;
+ of_dec.init_state(br)?;
+ ml_dec.init_state(br)?;
+
+ target.clear();
+ target.reserve(section.num_sequences as usize);
+
+ for _seq_idx in 0..section.num_sequences {
+ let ll_code = ll_dec.decode_symbol();
+ let ml_code = ml_dec.decode_symbol();
+ let of_code = of_dec.decode_symbol();
+
+ let (ll_value, ll_num_bits) = lookup_ll_code(ll_code);
+ let (ml_value, ml_num_bits) = lookup_ml_code(ml_code);
+
+ if of_code >= 32 {
+ return Err(DecodeSequenceError::UnsupportedOffset {
+ offset_code: of_code,
+ });
+ }
+
+ let (obits, ml_add, ll_add) = br.get_bits_triple(of_code, ml_num_bits, ll_num_bits)?;
+ let offset = obits as u32 + (1u32 << of_code);
+
+ if offset == 0 {
+ return Err(DecodeSequenceError::ZeroOffset);
+ }
+
+ target.push(Sequence {
+ ll: ll_value + ll_add as u32,
+ ml: ml_value + ml_add as u32,
+ of: offset,
+ });
+
+ if target.len() < section.num_sequences as usize {
+ //println!(
+ // "Bits left: {} ({} bytes)",
+ // br.bits_remaining(),
+ // br.bits_remaining() / 8,
+ //);
+ ll_dec.update_state(br)?;
+ ml_dec.update_state(br)?;
+ of_dec.update_state(br)?;
+ }
+
+ if br.bits_remaining() < 0 {
+ return Err(DecodeSequenceError::NotEnoughBytesForNumSequences);
+ }
+ }
+
+ if br.bits_remaining() > 0 {
+ Err(DecodeSequenceError::ExtraBits {
+ bits_remaining: br.bits_remaining(),
+ })
+ } else {
+ Ok(())
+ }
+}
+
+fn lookup_ll_code(code: u8) -> (u32, u8) {
+ match code {
+ 0..=15 => (u32::from(code), 0),
+ 16 => (16, 1),
+ 17 => (18, 1),
+ 18 => (20, 1),
+ 19 => (22, 1),
+ 20 => (24, 2),
+ 21 => (28, 2),
+ 22 => (32, 3),
+ 23 => (40, 3),
+ 24 => (48, 4),
+ 25 => (64, 6),
+ 26 => (128, 7),
+ 27 => (256, 8),
+ 28 => (512, 9),
+ 29 => (1024, 10),
+ 30 => (2048, 11),
+ 31 => (4096, 12),
+ 32 => (8192, 13),
+ 33 => (16384, 14),
+ 34 => (32768, 15),
+ 35 => (65536, 16),
+ _ => (0, 255),
+ }
+}
+
+fn lookup_ml_code(code: u8) -> (u32, u8) {
+ match code {
+ 0..=31 => (u32::from(code) + 3, 0),
+ 32 => (35, 1),
+ 33 => (37, 1),
+ 34 => (39, 1),
+ 35 => (41, 1),
+ 36 => (43, 2),
+ 37 => (47, 2),
+ 38 => (51, 3),
+ 39 => (59, 3),
+ 40 => (67, 4),
+ 41 => (83, 4),
+ 42 => (99, 5),
+ 43 => (131, 7),
+ 44 => (259, 8),
+ 45 => (515, 9),
+ 46 => (1027, 10),
+ 47 => (2051, 11),
+ 48 => (4099, 12),
+ 49 => (8195, 13),
+ 50 => (16387, 14),
+ 51 => (32771, 15),
+ 52 => (65539, 16),
+ _ => (0, 255),
+ }
+}
+
+pub const LL_MAX_LOG: u8 = 9;
+pub const ML_MAX_LOG: u8 = 9;
+pub const OF_MAX_LOG: u8 = 8;
+
+fn maybe_update_fse_tables(
+ section: &SequencesHeader,
+ source: &[u8],
+ scratch: &mut FSEScratch,
+) -> Result<usize, DecodeSequenceError> {
+ let modes = section
+ .modes
+ .ok_or(DecodeSequenceError::MissingCompressionMode)?;
+
+ let mut bytes_read = 0;
+
+ match modes.ll_mode() {
+ ModeType::FSECompressed => {
+ let bytes = scratch.literal_lengths.build_decoder(source, LL_MAX_LOG)?;
+ bytes_read += bytes;
+ if crate::VERBOSE {
+ println!("Updating ll table");
+ println!("Used bytes: {}", bytes);
+ }
+ scratch.ll_rle = None;
+ }
+ ModeType::RLE => {
+ if crate::VERBOSE {
+ println!("Use RLE ll table");
+ }
+ if source.is_empty() {
+ return Err(DecodeSequenceError::MissingByteForRleLlTable);
+ }
+ bytes_read += 1;
+ scratch.ll_rle = Some(source[0]);
+ }
+ ModeType::Predefined => {
+ if crate::VERBOSE {
+ println!("Use predefined ll table");
+ }
+ scratch.literal_lengths.build_from_probabilities(
+ LL_DEFAULT_ACC_LOG,
+ &Vec::from(&LITERALS_LENGTH_DEFAULT_DISTRIBUTION[..]),
+ )?;
+ scratch.ll_rle = None;
+ }
+ ModeType::Repeat => {
+ if crate::VERBOSE {
+ println!("Repeat ll table");
+ }
+ /* Nothing to do */
+ }
+ };
+
+ let of_source = &source[bytes_read..];
+
+ match modes.of_mode() {
+ ModeType::FSECompressed => {
+ let bytes = scratch.offsets.build_decoder(of_source, OF_MAX_LOG)?;
+ if crate::VERBOSE {
+ println!("Updating of table");
+ println!("Used bytes: {}", bytes);
+ }
+ bytes_read += bytes;
+ scratch.of_rle = None;
+ }
+ ModeType::RLE => {
+ if crate::VERBOSE {
+ println!("Use RLE of table");
+ }
+ if of_source.is_empty() {
+ return Err(DecodeSequenceError::MissingByteForRleOfTable);
+ }
+ bytes_read += 1;
+ scratch.of_rle = Some(of_source[0]);
+ }
+ ModeType::Predefined => {
+ if crate::VERBOSE {
+ println!("Use predefined of table");
+ }
+ scratch.offsets.build_from_probabilities(
+ OF_DEFAULT_ACC_LOG,
+ &Vec::from(&OFFSET_DEFAULT_DISTRIBUTION[..]),
+ )?;
+ scratch.of_rle = None;
+ }
+ ModeType::Repeat => {
+ if crate::VERBOSE {
+ println!("Repeat of table");
+ }
+ /* Nothing to do */
+ }
+ };
+
+ let ml_source = &source[bytes_read..];
+
+ match modes.ml_mode() {
+ ModeType::FSECompressed => {
+ let bytes = scratch.match_lengths.build_decoder(ml_source, ML_MAX_LOG)?;
+ bytes_read += bytes;
+ if crate::VERBOSE {
+ println!("Updating ml table");
+ println!("Used bytes: {}", bytes);
+ }
+ scratch.ml_rle = None;
+ }
+ ModeType::RLE => {
+ if crate::VERBOSE {
+ println!("Use RLE ml table");
+ }
+ if ml_source.is_empty() {
+ return Err(DecodeSequenceError::MissingByteForRleMlTable);
+ }
+ bytes_read += 1;
+ scratch.ml_rle = Some(ml_source[0]);
+ }
+ ModeType::Predefined => {
+ if crate::VERBOSE {
+ println!("Use predefined ml table");
+ }
+ scratch.match_lengths.build_from_probabilities(
+ ML_DEFAULT_ACC_LOG,
+ &Vec::from(&MATCH_LENGTH_DEFAULT_DISTRIBUTION[..]),
+ )?;
+ scratch.ml_rle = None;
+ }
+ ModeType::Repeat => {
+ if crate::VERBOSE {
+ println!("Repeat ml table");
+ }
+ /* Nothing to do */
+ }
+ };
+
+ Ok(bytes_read)
+}
+
+const LL_DEFAULT_ACC_LOG: u8 = 6;
+const LITERALS_LENGTH_DEFAULT_DISTRIBUTION: [i32; 36] = [
+ 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
+ -1, -1, -1, -1,
+];
+
+const ML_DEFAULT_ACC_LOG: u8 = 6;
+const MATCH_LENGTH_DEFAULT_DISTRIBUTION: [i32; 53] = [
+ 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1,
+];
+
+const OF_DEFAULT_ACC_LOG: u8 = 5;
+const OFFSET_DEFAULT_DISTRIBUTION: [i32; 29] = [
+ 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1,
+];
+
+#[test]
+fn test_ll_default() {
+ let mut table = crate::fse::FSETable::new();
+ table
+ .build_from_probabilities(
+ LL_DEFAULT_ACC_LOG,
+ &Vec::from(&LITERALS_LENGTH_DEFAULT_DISTRIBUTION[..]),
+ )
+ .unwrap();
+
+ for idx in 0..table.decode.len() {
+ println!(
+ "{:3}: {:3} {:3} {:3}",
+ idx, table.decode[idx].symbol, table.decode[idx].num_bits, table.decode[idx].base_line
+ );
+ }
+
+ assert!(table.decode.len() == 64);
+
+ //just test a few values. TODO test all values
+ assert!(table.decode[0].symbol == 0);
+ assert!(table.decode[0].num_bits == 4);
+ assert!(table.decode[0].base_line == 0);
+
+ assert!(table.decode[19].symbol == 27);
+ assert!(table.decode[19].num_bits == 6);
+ assert!(table.decode[19].base_line == 0);
+
+ assert!(table.decode[39].symbol == 25);
+ assert!(table.decode[39].num_bits == 4);
+ assert!(table.decode[39].base_line == 16);
+
+ assert!(table.decode[60].symbol == 35);
+ assert!(table.decode[60].num_bits == 6);
+ assert!(table.decode[60].base_line == 0);
+
+ assert!(table.decode[59].symbol == 24);
+ assert!(table.decode[59].num_bits == 5);
+ assert!(table.decode[59].base_line == 32);
+}