diff options
Diffstat (limited to 'vendor/ruzstd/src/decoding/block_decoder.rs')
-rw-r--r-- | vendor/ruzstd/src/decoding/block_decoder.rs | 378 |
1 files changed, 378 insertions, 0 deletions
diff --git a/vendor/ruzstd/src/decoding/block_decoder.rs b/vendor/ruzstd/src/decoding/block_decoder.rs new file mode 100644 index 000000000..11a4c28c1 --- /dev/null +++ b/vendor/ruzstd/src/decoding/block_decoder.rs @@ -0,0 +1,378 @@ +use super::super::blocks::block::BlockHeader; +use super::super::blocks::block::BlockType; +use super::super::blocks::literals_section::LiteralsSection; +use super::super::blocks::literals_section::LiteralsSectionType; +use super::super::blocks::sequence_section::SequencesHeader; +use super::literals_section_decoder::{decode_literals, DecompressLiteralsError}; +use super::sequence_execution::ExecuteSequencesError; +use super::sequence_section_decoder::decode_sequences; +use super::sequence_section_decoder::DecodeSequenceError; +use crate::blocks::literals_section::LiteralsSectionParseError; +use crate::blocks::sequence_section::SequencesHeaderParseError; +use crate::decoding::scratch::DecoderScratch; +use crate::decoding::sequence_execution::execute_sequences; +use std::io::{self, Read}; + +pub struct BlockDecoder { + header_buffer: [u8; 3], + internal_state: DecoderState, +} + +enum DecoderState { + ReadyToDecodeNextHeader, + ReadyToDecodeNextBody, + #[allow(dead_code)] + Failed, //TODO put "self.internal_state = DecoderState::Failed;" everywhere an unresolvable error occurs +} + +#[derive(Debug, thiserror::Error)] +#[non_exhaustive] +pub enum BlockHeaderReadError { + #[error("Error while reading the block header")] + ReadError(#[from] io::Error), + #[error("Reserved block occured. This is considered corruption by the documentation")] + FoundReservedBlock, + #[error("Error getting block type: {0}")] + BlockTypeError(#[from] BlockTypeError), + #[error("Error getting block content size: {0}")] + BlockSizeError(#[from] BlockSizeError), +} + +#[derive(Debug, thiserror::Error)] +#[non_exhaustive] +pub enum BlockTypeError { + #[error( + "Invalid Blocktype number. Is: {num} Should be one of: 0, 1, 2, 3 (3 is reserved though" + )] + InvalidBlocktypeNumber { num: u8 }, +} + +#[derive(Debug, thiserror::Error)] +#[non_exhaustive] +pub enum BlockSizeError { + #[error("Blocksize was bigger than the absolute maximum {ABSOLUTE_MAXIMUM_BLOCK_SIZE} (128kb). Is: {size}")] + BlockSizeTooLarge { size: u32 }, +} + +#[derive(Debug, thiserror::Error)] +#[non_exhaustive] +pub enum DecompressBlockError { + #[error("Error while reading the block content: {0}")] + BlockContentReadError(#[from] io::Error), + #[error("Malformed section header. Says literals would be this long: {expected_len} but there are only {remaining_bytes} bytes left")] + MalformedSectionHeader { + expected_len: usize, + remaining_bytes: usize, + }, + #[error(transparent)] + DecompressLiteralsError(#[from] DecompressLiteralsError), + #[error(transparent)] + LiteralsSectionParseError(#[from] LiteralsSectionParseError), + #[error(transparent)] + SequencesHeaderParseError(#[from] SequencesHeaderParseError), + #[error(transparent)] + DecodeSequenceError(#[from] DecodeSequenceError), + #[error(transparent)] + ExecuteSequencesError(#[from] ExecuteSequencesError), +} + +#[derive(Debug, thiserror::Error)] +#[non_exhaustive] +pub enum DecodeBlockContentError { + #[error("Can't decode next block if failed along the way. Results will be nonsense")] + DecoderStateIsFailed, + #[error("Cant decode next block body, while expecting to decode the header of the previous block. Results will be nonsense")] + ExpectedHeaderOfPreviousBlock, + #[error("Error while reading bytes for {step}: {source}")] + ReadError { + step: BlockType, + #[source] + source: io::Error, + }, + #[error(transparent)] + DecompressBlockError(#[from] DecompressBlockError), +} + +pub fn new() -> BlockDecoder { + BlockDecoder { + internal_state: DecoderState::ReadyToDecodeNextHeader, + header_buffer: [0u8; 3], + } +} + +const ABSOLUTE_MAXIMUM_BLOCK_SIZE: u32 = 128 * 1024; + +impl BlockDecoder { + pub fn decode_block_content( + &mut self, + header: &BlockHeader, + workspace: &mut DecoderScratch, //reuse this as often as possible. Not only if the trees are reused but also reuse the allocations when building new trees + mut source: impl Read, + ) -> Result<u64, DecodeBlockContentError> { + match self.internal_state { + DecoderState::ReadyToDecodeNextBody => { /* Happy :) */ } + DecoderState::Failed => return Err(DecodeBlockContentError::DecoderStateIsFailed), + DecoderState::ReadyToDecodeNextHeader => { + return Err(DecodeBlockContentError::ExpectedHeaderOfPreviousBlock) + } + } + + let block_type = header.block_type; + match block_type { + BlockType::RLE => { + const BATCH_SIZE: usize = 512; + let mut buf = [0u8; BATCH_SIZE]; + let full_reads = header.decompressed_size / BATCH_SIZE as u32; + let single_read_size = header.decompressed_size % BATCH_SIZE as u32; + + source.read_exact(&mut buf[0..1]).map_err(|err| { + DecodeBlockContentError::ReadError { + step: block_type, + source: err, + } + })?; + self.internal_state = DecoderState::ReadyToDecodeNextHeader; + + for i in 1..BATCH_SIZE { + buf[i] = buf[0]; + } + + for _ in 0..full_reads { + workspace.buffer.push(&buf[..]); + } + let smaller = &mut buf[..single_read_size as usize]; + workspace.buffer.push(smaller); + + Ok(1) + } + BlockType::Raw => { + const BATCH_SIZE: usize = 128 * 1024; + let mut buf = [0u8; BATCH_SIZE]; + let full_reads = header.decompressed_size / BATCH_SIZE as u32; + let single_read_size = header.decompressed_size % BATCH_SIZE as u32; + + for _ in 0..full_reads { + source.read_exact(&mut buf[..]).map_err(|err| { + DecodeBlockContentError::ReadError { + step: block_type, + source: err, + } + })?; + workspace.buffer.push(&buf[..]); + } + + let smaller = &mut buf[..single_read_size as usize]; + source + .read_exact(smaller) + .map_err(|err| DecodeBlockContentError::ReadError { + step: block_type, + source: err, + })?; + workspace.buffer.push(smaller); + + self.internal_state = DecoderState::ReadyToDecodeNextHeader; + Ok(u64::from(header.decompressed_size)) + } + + BlockType::Reserved => { + panic!("How did you even get this. The decoder should error out if it detects a reserved-type block"); + } + + BlockType::Compressed => { + self.decompress_block(header, workspace, source)?; + + self.internal_state = DecoderState::ReadyToDecodeNextHeader; + Ok(u64::from(header.content_size)) + } + } + } + + fn decompress_block( + &mut self, + header: &BlockHeader, + workspace: &mut DecoderScratch, //reuse this as often as possible. Not only if the trees are reused but also reuse the allocations when building new trees + mut source: impl Read, + ) -> Result<(), DecompressBlockError> { + workspace + .block_content_buffer + .resize(header.content_size as usize, 0); + + source.read_exact(workspace.block_content_buffer.as_mut_slice())?; + let raw = workspace.block_content_buffer.as_slice(); + + let mut section = LiteralsSection::new(); + let bytes_in_literals_header = section.parse_from_header(raw)?; + let raw = &raw[bytes_in_literals_header as usize..]; + if crate::VERBOSE { + println!( + "Found {} literalssection with regenerated size: {}, and compressed size: {:?}", + section.ls_type, section.regenerated_size, section.compressed_size + ); + } + + let upper_limit_for_literals = match section.compressed_size { + Some(x) => x as usize, + None => match section.ls_type { + LiteralsSectionType::RLE => 1, + LiteralsSectionType::Raw => section.regenerated_size as usize, + _ => panic!("Bug in this library"), + }, + }; + + if raw.len() < upper_limit_for_literals { + return Err(DecompressBlockError::MalformedSectionHeader { + expected_len: upper_limit_for_literals, + remaining_bytes: raw.len(), + }); + } + + let raw_literals = &raw[..upper_limit_for_literals]; + if crate::VERBOSE { + println!("Slice for literals: {}", raw_literals.len()); + } + + workspace.literals_buffer.clear(); //all literals of the previous block must have been used in the sequence execution anyways. just be defensive here + let bytes_used_in_literals_section = decode_literals( + §ion, + &mut workspace.huf, + raw_literals, + &mut workspace.literals_buffer, + )?; + assert!( + section.regenerated_size == workspace.literals_buffer.len() as u32, + "Wrong number of literals: {}, Should have been: {}", + workspace.literals_buffer.len(), + section.regenerated_size + ); + assert!(bytes_used_in_literals_section == upper_limit_for_literals as u32); + + let raw = &raw[upper_limit_for_literals..]; + if crate::VERBOSE { + println!("Slice for sequences with headers: {}", raw.len()); + } + + let mut seq_section = SequencesHeader::new(); + let bytes_in_sequence_header = seq_section.parse_from_header(raw)?; + let raw = &raw[bytes_in_sequence_header as usize..]; + if crate::VERBOSE { + println!( + "Found sequencessection with sequences: {} and size: {}", + seq_section.num_sequences, + raw.len() + ); + } + + assert!( + u32::from(bytes_in_literals_header) + + bytes_used_in_literals_section + + u32::from(bytes_in_sequence_header) + + raw.len() as u32 + == header.content_size + ); + if crate::VERBOSE { + println!("Slice for sequences: {}", raw.len()); + } + + if seq_section.num_sequences != 0 { + decode_sequences( + &seq_section, + raw, + &mut workspace.fse, + &mut workspace.sequences, + )?; + if crate::VERBOSE { + println!("Executing sequences"); + } + execute_sequences(workspace)?; + } else { + workspace.buffer.push(&workspace.literals_buffer); + workspace.sequences.clear(); + } + + Ok(()) + } + + pub fn read_block_header( + &mut self, + mut r: impl Read, + ) -> Result<(BlockHeader, u8), BlockHeaderReadError> { + //match self.internal_state { + // DecoderState::ReadyToDecodeNextHeader => {/* Happy :) */}, + // DecoderState::Failed => return Err(format!("Cant decode next block if failed along the way. Results will be nonsense")), + // DecoderState::ReadyToDecodeNextBody => return Err(format!("Cant decode next block header, while expecting to decode the body of the previous block. Results will be nonsense")), + //} + + r.read_exact(&mut self.header_buffer[0..3])?; + + let btype = self.block_type()?; + if let BlockType::Reserved = btype { + return Err(BlockHeaderReadError::FoundReservedBlock); + } + + let block_size = self.block_content_size()?; + let decompressed_size = match btype { + BlockType::Raw => block_size, + BlockType::RLE => block_size, + BlockType::Reserved => 0, //should be catched above, this is an error state + BlockType::Compressed => 0, //unknown but will be smaller than 128kb (or window_size if that is smaller than 128kb) + }; + let content_size = match btype { + BlockType::Raw => block_size, + BlockType::Compressed => block_size, + BlockType::RLE => 1, + BlockType::Reserved => 0, //should be catched above, this is an error state + }; + + let last_block = self.is_last(); + + self.reset_buffer(); + self.internal_state = DecoderState::ReadyToDecodeNextBody; + + //just return 3. Blockheaders always take 3 bytes + Ok(( + BlockHeader { + last_block, + block_type: btype, + decompressed_size, + content_size, + }, + 3, + )) + } + + fn reset_buffer(&mut self) { + self.header_buffer[0] = 0; + self.header_buffer[1] = 0; + self.header_buffer[2] = 0; + } + + fn is_last(&self) -> bool { + self.header_buffer[0] & 0x1 == 1 + } + + fn block_type(&self) -> Result<BlockType, BlockTypeError> { + let t = (self.header_buffer[0] >> 1) & 0x3; + match t { + 0 => Ok(BlockType::Raw), + 1 => Ok(BlockType::RLE), + 2 => Ok(BlockType::Compressed), + 3 => Ok(BlockType::Reserved), + other => Err(BlockTypeError::InvalidBlocktypeNumber { num: other }), + } + } + + fn block_content_size(&self) -> Result<u32, BlockSizeError> { + let val = self.block_content_size_unchecked(); + if val > ABSOLUTE_MAXIMUM_BLOCK_SIZE { + Err(BlockSizeError::BlockSizeTooLarge { size: val }) + } else { + Ok(val) + } + } + + fn block_content_size_unchecked(&self) -> u32 { + u32::from(self.header_buffer[0] >> 3) //push out type and last_block flags. Retain 5 bit + | (u32::from(self.header_buffer[1]) << 5) + | (u32::from(self.header_buffer[2]) << 13) + } +} |