summaryrefslogtreecommitdiffstats
path: root/vendor/ruzstd/src/decoding/block_decoder.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/ruzstd/src/decoding/block_decoder.rs')
-rw-r--r--vendor/ruzstd/src/decoding/block_decoder.rs378
1 files changed, 378 insertions, 0 deletions
diff --git a/vendor/ruzstd/src/decoding/block_decoder.rs b/vendor/ruzstd/src/decoding/block_decoder.rs
new file mode 100644
index 000000000..11a4c28c1
--- /dev/null
+++ b/vendor/ruzstd/src/decoding/block_decoder.rs
@@ -0,0 +1,378 @@
+use super::super::blocks::block::BlockHeader;
+use super::super::blocks::block::BlockType;
+use super::super::blocks::literals_section::LiteralsSection;
+use super::super::blocks::literals_section::LiteralsSectionType;
+use super::super::blocks::sequence_section::SequencesHeader;
+use super::literals_section_decoder::{decode_literals, DecompressLiteralsError};
+use super::sequence_execution::ExecuteSequencesError;
+use super::sequence_section_decoder::decode_sequences;
+use super::sequence_section_decoder::DecodeSequenceError;
+use crate::blocks::literals_section::LiteralsSectionParseError;
+use crate::blocks::sequence_section::SequencesHeaderParseError;
+use crate::decoding::scratch::DecoderScratch;
+use crate::decoding::sequence_execution::execute_sequences;
+use std::io::{self, Read};
+
+pub struct BlockDecoder {
+ header_buffer: [u8; 3],
+ internal_state: DecoderState,
+}
+
+enum DecoderState {
+ ReadyToDecodeNextHeader,
+ ReadyToDecodeNextBody,
+ #[allow(dead_code)]
+ Failed, //TODO put "self.internal_state = DecoderState::Failed;" everywhere an unresolvable error occurs
+}
+
+#[derive(Debug, thiserror::Error)]
+#[non_exhaustive]
+pub enum BlockHeaderReadError {
+ #[error("Error while reading the block header")]
+ ReadError(#[from] io::Error),
+ #[error("Reserved block occured. This is considered corruption by the documentation")]
+ FoundReservedBlock,
+ #[error("Error getting block type: {0}")]
+ BlockTypeError(#[from] BlockTypeError),
+ #[error("Error getting block content size: {0}")]
+ BlockSizeError(#[from] BlockSizeError),
+}
+
+#[derive(Debug, thiserror::Error)]
+#[non_exhaustive]
+pub enum BlockTypeError {
+ #[error(
+ "Invalid Blocktype number. Is: {num} Should be one of: 0, 1, 2, 3 (3 is reserved though"
+ )]
+ InvalidBlocktypeNumber { num: u8 },
+}
+
+#[derive(Debug, thiserror::Error)]
+#[non_exhaustive]
+pub enum BlockSizeError {
+ #[error("Blocksize was bigger than the absolute maximum {ABSOLUTE_MAXIMUM_BLOCK_SIZE} (128kb). Is: {size}")]
+ BlockSizeTooLarge { size: u32 },
+}
+
+#[derive(Debug, thiserror::Error)]
+#[non_exhaustive]
+pub enum DecompressBlockError {
+ #[error("Error while reading the block content: {0}")]
+ BlockContentReadError(#[from] io::Error),
+ #[error("Malformed section header. Says literals would be this long: {expected_len} but there are only {remaining_bytes} bytes left")]
+ MalformedSectionHeader {
+ expected_len: usize,
+ remaining_bytes: usize,
+ },
+ #[error(transparent)]
+ DecompressLiteralsError(#[from] DecompressLiteralsError),
+ #[error(transparent)]
+ LiteralsSectionParseError(#[from] LiteralsSectionParseError),
+ #[error(transparent)]
+ SequencesHeaderParseError(#[from] SequencesHeaderParseError),
+ #[error(transparent)]
+ DecodeSequenceError(#[from] DecodeSequenceError),
+ #[error(transparent)]
+ ExecuteSequencesError(#[from] ExecuteSequencesError),
+}
+
+#[derive(Debug, thiserror::Error)]
+#[non_exhaustive]
+pub enum DecodeBlockContentError {
+ #[error("Can't decode next block if failed along the way. Results will be nonsense")]
+ DecoderStateIsFailed,
+ #[error("Cant decode next block body, while expecting to decode the header of the previous block. Results will be nonsense")]
+ ExpectedHeaderOfPreviousBlock,
+ #[error("Error while reading bytes for {step}: {source}")]
+ ReadError {
+ step: BlockType,
+ #[source]
+ source: io::Error,
+ },
+ #[error(transparent)]
+ DecompressBlockError(#[from] DecompressBlockError),
+}
+
+pub fn new() -> BlockDecoder {
+ BlockDecoder {
+ internal_state: DecoderState::ReadyToDecodeNextHeader,
+ header_buffer: [0u8; 3],
+ }
+}
+
+const ABSOLUTE_MAXIMUM_BLOCK_SIZE: u32 = 128 * 1024;
+
+impl BlockDecoder {
+ pub fn decode_block_content(
+ &mut self,
+ header: &BlockHeader,
+ workspace: &mut DecoderScratch, //reuse this as often as possible. Not only if the trees are reused but also reuse the allocations when building new trees
+ mut source: impl Read,
+ ) -> Result<u64, DecodeBlockContentError> {
+ match self.internal_state {
+ DecoderState::ReadyToDecodeNextBody => { /* Happy :) */ }
+ DecoderState::Failed => return Err(DecodeBlockContentError::DecoderStateIsFailed),
+ DecoderState::ReadyToDecodeNextHeader => {
+ return Err(DecodeBlockContentError::ExpectedHeaderOfPreviousBlock)
+ }
+ }
+
+ let block_type = header.block_type;
+ match block_type {
+ BlockType::RLE => {
+ const BATCH_SIZE: usize = 512;
+ let mut buf = [0u8; BATCH_SIZE];
+ let full_reads = header.decompressed_size / BATCH_SIZE as u32;
+ let single_read_size = header.decompressed_size % BATCH_SIZE as u32;
+
+ source.read_exact(&mut buf[0..1]).map_err(|err| {
+ DecodeBlockContentError::ReadError {
+ step: block_type,
+ source: err,
+ }
+ })?;
+ self.internal_state = DecoderState::ReadyToDecodeNextHeader;
+
+ for i in 1..BATCH_SIZE {
+ buf[i] = buf[0];
+ }
+
+ for _ in 0..full_reads {
+ workspace.buffer.push(&buf[..]);
+ }
+ let smaller = &mut buf[..single_read_size as usize];
+ workspace.buffer.push(smaller);
+
+ Ok(1)
+ }
+ BlockType::Raw => {
+ const BATCH_SIZE: usize = 128 * 1024;
+ let mut buf = [0u8; BATCH_SIZE];
+ let full_reads = header.decompressed_size / BATCH_SIZE as u32;
+ let single_read_size = header.decompressed_size % BATCH_SIZE as u32;
+
+ for _ in 0..full_reads {
+ source.read_exact(&mut buf[..]).map_err(|err| {
+ DecodeBlockContentError::ReadError {
+ step: block_type,
+ source: err,
+ }
+ })?;
+ workspace.buffer.push(&buf[..]);
+ }
+
+ let smaller = &mut buf[..single_read_size as usize];
+ source
+ .read_exact(smaller)
+ .map_err(|err| DecodeBlockContentError::ReadError {
+ step: block_type,
+ source: err,
+ })?;
+ workspace.buffer.push(smaller);
+
+ self.internal_state = DecoderState::ReadyToDecodeNextHeader;
+ Ok(u64::from(header.decompressed_size))
+ }
+
+ BlockType::Reserved => {
+ panic!("How did you even get this. The decoder should error out if it detects a reserved-type block");
+ }
+
+ BlockType::Compressed => {
+ self.decompress_block(header, workspace, source)?;
+
+ self.internal_state = DecoderState::ReadyToDecodeNextHeader;
+ Ok(u64::from(header.content_size))
+ }
+ }
+ }
+
+ fn decompress_block(
+ &mut self,
+ header: &BlockHeader,
+ workspace: &mut DecoderScratch, //reuse this as often as possible. Not only if the trees are reused but also reuse the allocations when building new trees
+ mut source: impl Read,
+ ) -> Result<(), DecompressBlockError> {
+ workspace
+ .block_content_buffer
+ .resize(header.content_size as usize, 0);
+
+ source.read_exact(workspace.block_content_buffer.as_mut_slice())?;
+ let raw = workspace.block_content_buffer.as_slice();
+
+ let mut section = LiteralsSection::new();
+ let bytes_in_literals_header = section.parse_from_header(raw)?;
+ let raw = &raw[bytes_in_literals_header as usize..];
+ if crate::VERBOSE {
+ println!(
+ "Found {} literalssection with regenerated size: {}, and compressed size: {:?}",
+ section.ls_type, section.regenerated_size, section.compressed_size
+ );
+ }
+
+ let upper_limit_for_literals = match section.compressed_size {
+ Some(x) => x as usize,
+ None => match section.ls_type {
+ LiteralsSectionType::RLE => 1,
+ LiteralsSectionType::Raw => section.regenerated_size as usize,
+ _ => panic!("Bug in this library"),
+ },
+ };
+
+ if raw.len() < upper_limit_for_literals {
+ return Err(DecompressBlockError::MalformedSectionHeader {
+ expected_len: upper_limit_for_literals,
+ remaining_bytes: raw.len(),
+ });
+ }
+
+ let raw_literals = &raw[..upper_limit_for_literals];
+ if crate::VERBOSE {
+ println!("Slice for literals: {}", raw_literals.len());
+ }
+
+ workspace.literals_buffer.clear(); //all literals of the previous block must have been used in the sequence execution anyways. just be defensive here
+ let bytes_used_in_literals_section = decode_literals(
+ &section,
+ &mut workspace.huf,
+ raw_literals,
+ &mut workspace.literals_buffer,
+ )?;
+ assert!(
+ section.regenerated_size == workspace.literals_buffer.len() as u32,
+ "Wrong number of literals: {}, Should have been: {}",
+ workspace.literals_buffer.len(),
+ section.regenerated_size
+ );
+ assert!(bytes_used_in_literals_section == upper_limit_for_literals as u32);
+
+ let raw = &raw[upper_limit_for_literals..];
+ if crate::VERBOSE {
+ println!("Slice for sequences with headers: {}", raw.len());
+ }
+
+ let mut seq_section = SequencesHeader::new();
+ let bytes_in_sequence_header = seq_section.parse_from_header(raw)?;
+ let raw = &raw[bytes_in_sequence_header as usize..];
+ if crate::VERBOSE {
+ println!(
+ "Found sequencessection with sequences: {} and size: {}",
+ seq_section.num_sequences,
+ raw.len()
+ );
+ }
+
+ assert!(
+ u32::from(bytes_in_literals_header)
+ + bytes_used_in_literals_section
+ + u32::from(bytes_in_sequence_header)
+ + raw.len() as u32
+ == header.content_size
+ );
+ if crate::VERBOSE {
+ println!("Slice for sequences: {}", raw.len());
+ }
+
+ if seq_section.num_sequences != 0 {
+ decode_sequences(
+ &seq_section,
+ raw,
+ &mut workspace.fse,
+ &mut workspace.sequences,
+ )?;
+ if crate::VERBOSE {
+ println!("Executing sequences");
+ }
+ execute_sequences(workspace)?;
+ } else {
+ workspace.buffer.push(&workspace.literals_buffer);
+ workspace.sequences.clear();
+ }
+
+ Ok(())
+ }
+
+ pub fn read_block_header(
+ &mut self,
+ mut r: impl Read,
+ ) -> Result<(BlockHeader, u8), BlockHeaderReadError> {
+ //match self.internal_state {
+ // DecoderState::ReadyToDecodeNextHeader => {/* Happy :) */},
+ // DecoderState::Failed => return Err(format!("Cant decode next block if failed along the way. Results will be nonsense")),
+ // DecoderState::ReadyToDecodeNextBody => return Err(format!("Cant decode next block header, while expecting to decode the body of the previous block. Results will be nonsense")),
+ //}
+
+ r.read_exact(&mut self.header_buffer[0..3])?;
+
+ let btype = self.block_type()?;
+ if let BlockType::Reserved = btype {
+ return Err(BlockHeaderReadError::FoundReservedBlock);
+ }
+
+ let block_size = self.block_content_size()?;
+ let decompressed_size = match btype {
+ BlockType::Raw => block_size,
+ BlockType::RLE => block_size,
+ BlockType::Reserved => 0, //should be catched above, this is an error state
+ BlockType::Compressed => 0, //unknown but will be smaller than 128kb (or window_size if that is smaller than 128kb)
+ };
+ let content_size = match btype {
+ BlockType::Raw => block_size,
+ BlockType::Compressed => block_size,
+ BlockType::RLE => 1,
+ BlockType::Reserved => 0, //should be catched above, this is an error state
+ };
+
+ let last_block = self.is_last();
+
+ self.reset_buffer();
+ self.internal_state = DecoderState::ReadyToDecodeNextBody;
+
+ //just return 3. Blockheaders always take 3 bytes
+ Ok((
+ BlockHeader {
+ last_block,
+ block_type: btype,
+ decompressed_size,
+ content_size,
+ },
+ 3,
+ ))
+ }
+
+ fn reset_buffer(&mut self) {
+ self.header_buffer[0] = 0;
+ self.header_buffer[1] = 0;
+ self.header_buffer[2] = 0;
+ }
+
+ fn is_last(&self) -> bool {
+ self.header_buffer[0] & 0x1 == 1
+ }
+
+ fn block_type(&self) -> Result<BlockType, BlockTypeError> {
+ let t = (self.header_buffer[0] >> 1) & 0x3;
+ match t {
+ 0 => Ok(BlockType::Raw),
+ 1 => Ok(BlockType::RLE),
+ 2 => Ok(BlockType::Compressed),
+ 3 => Ok(BlockType::Reserved),
+ other => Err(BlockTypeError::InvalidBlocktypeNumber { num: other }),
+ }
+ }
+
+ fn block_content_size(&self) -> Result<u32, BlockSizeError> {
+ let val = self.block_content_size_unchecked();
+ if val > ABSOLUTE_MAXIMUM_BLOCK_SIZE {
+ Err(BlockSizeError::BlockSizeTooLarge { size: val })
+ } else {
+ Ok(val)
+ }
+ }
+
+ fn block_content_size_unchecked(&self) -> u32 {
+ u32::from(self.header_buffer[0] >> 3) //push out type and last_block flags. Retain 5 bit
+ | (u32::from(self.header_buffer[1]) << 5)
+ | (u32::from(self.header_buffer[2]) << 13)
+ }
+}