summaryrefslogtreecommitdiffstats
path: root/third_party/rust/deflate/src/output_writer.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
commit2aa4a82499d4becd2284cdb482213d541b8804dd (patch)
treeb80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/deflate/src/output_writer.rs
parentInitial commit. (diff)
downloadfirefox-upstream.tar.xz
firefox-upstream.zip
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/deflate/src/output_writer.rs')
-rw-r--r--third_party/rust/deflate/src/output_writer.rs154
1 files changed, 154 insertions, 0 deletions
diff --git a/third_party/rust/deflate/src/output_writer.rs b/third_party/rust/deflate/src/output_writer.rs
new file mode 100644
index 0000000000..508ba5c675
--- /dev/null
+++ b/third_party/rust/deflate/src/output_writer.rs
@@ -0,0 +1,154 @@
+use std::u16;
+
+use lzvalue::LZValue;
+use huffman_table::{NUM_LITERALS_AND_LENGTHS, NUM_DISTANCE_CODES, END_OF_BLOCK_POSITION,
+ get_distance_code, get_length_code};
+
+/// The type used for representing how many times a literal, length or distance code has been ouput
+/// to the current buffer.
+/// As we are limiting the blocks to be at most 2^16 bytes long, we can represent frequencies using
+/// 16-bit values.
+pub type FrequencyType = u16;
+
+/// The maximum number of literals/lengths in the buffer, which in practice also means the maximum
+/// number of literals/lengths output before a new block is started.
+/// This should not be larger than the maximum value `FrequencyType` can represent to prevent
+/// overflowing (which would degrade, or in the worst case break compression).
+pub const MAX_BUFFER_LENGTH: usize = 1024 * 31;
+
+#[derive(Debug, PartialEq)]
+pub enum BufferStatus {
+ NotFull,
+ Full,
+}
+
+/// Struct that buffers lz77 data and keeps track of the usage of different codes
+pub struct DynamicWriter {
+ buffer: Vec<LZValue>,
+ // The two last length codes are not actually used, but only participates in code construction
+ // Therefore, we ignore them to get the correct number of lengths
+ frequencies: [FrequencyType; NUM_LITERALS_AND_LENGTHS],
+ distance_frequencies: [FrequencyType; NUM_DISTANCE_CODES],
+}
+
+impl DynamicWriter {
+ #[inline]
+ pub fn check_buffer_length(&self) -> BufferStatus {
+ if self.buffer.len() >= MAX_BUFFER_LENGTH {
+ BufferStatus::Full
+ } else {
+ BufferStatus::NotFull
+ }
+ }
+
+ #[inline]
+ pub fn write_literal(&mut self, literal: u8) -> BufferStatus {
+ debug_assert!(self.buffer.len() < MAX_BUFFER_LENGTH);
+ self.buffer.push(LZValue::literal(literal));
+ self.frequencies[usize::from(literal)] += 1;
+ self.check_buffer_length()
+ }
+
+ #[inline]
+ pub fn write_length_distance(&mut self, length: u16, distance: u16) -> BufferStatus {
+ self.buffer.push(LZValue::length_distance(length, distance));
+ let l_code_num = get_length_code(length);
+ // As we limit the buffer to 2^16 values, this should be safe from overflowing.
+ if cfg!(debug_assertions) {
+ self.frequencies[l_code_num] += 1;
+ } else {
+ // #Safety
+ // None of the values in the table of length code numbers will give a value
+ // that is out of bounds.
+ // There is a test to ensure that these functions can not produce too large values.
+ unsafe {
+ *self.frequencies.get_unchecked_mut(l_code_num) += 1;
+ }
+ }
+ let d_code_num = get_distance_code(distance);
+ // The compiler seems to be able to evade the bounds check here somehow.
+ self.distance_frequencies[usize::from(d_code_num)] += 1;
+ self.check_buffer_length()
+ }
+
+ pub fn buffer_length(&self) -> usize {
+ self.buffer.len()
+ }
+
+ pub fn get_buffer(&self) -> &[LZValue] {
+ &self.buffer
+ }
+
+ pub fn new() -> DynamicWriter {
+ let mut w = DynamicWriter {
+ buffer: Vec::with_capacity(MAX_BUFFER_LENGTH),
+ frequencies: [0; NUM_LITERALS_AND_LENGTHS],
+ distance_frequencies: [0; NUM_DISTANCE_CODES],
+ };
+ // This will always be 1,
+ // since there will always only be one end of block marker in each block
+ w.frequencies[END_OF_BLOCK_POSITION] = 1;
+ w
+ }
+
+ /// Special output function used with RLE compression
+ /// that avoids bothering to lookup a distance code.
+ #[inline]
+ pub fn write_length_rle(&mut self, length: u16) -> BufferStatus {
+ self.buffer.push(LZValue::length_distance(length, 1));
+ let l_code_num = get_length_code(length);
+ // As we limit the buffer to 2^16 values, this should be safe from overflowing.
+ if cfg!(debug_assertions) {
+ self.frequencies[l_code_num] += 1;
+ } else {
+ // #Safety
+ // None of the values in the table of length code numbers will give a value
+ // that is out of bounds.
+ // There is a test to ensure that these functions won't produce too large values.
+ unsafe {
+ *self.frequencies.get_unchecked_mut(l_code_num) += 1;
+ }
+ }
+ self.distance_frequencies[0] += 1;
+ self.check_buffer_length()
+ }
+
+ pub fn get_frequencies(&self) -> (&[u16], &[u16]) {
+ (&self.frequencies, &self.distance_frequencies)
+ }
+
+ pub fn clear_frequencies(&mut self) {
+ self.frequencies = [0; NUM_LITERALS_AND_LENGTHS];
+ self.distance_frequencies = [0; NUM_DISTANCE_CODES];
+ self.frequencies[END_OF_BLOCK_POSITION] = 1;
+ }
+
+ pub fn clear_data(&mut self) {
+ self.buffer.clear()
+ }
+
+ pub fn clear(&mut self) {
+ self.clear_frequencies();
+ self.clear_data();
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use huffman_table::{get_length_code, get_distance_code};
+ #[test]
+ /// Ensure that these function won't produce values that would overflow the output_writer
+ /// tables since we use some unsafe indexing.
+ fn array_bounds() {
+ let w = DynamicWriter::new();
+
+ for i in 0..u16::max_value() {
+ get_length_code(i) < w.frequencies.len();
+ }
+
+ for i in 0..u16::max_value() {
+ get_distance_code(i) < w.distance_frequencies.len() as u8;
+ }
+ }
+}