diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/deflate/src/rle.rs | |
parent | Initial commit. (diff) | |
download | firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.tar.xz firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/deflate/src/rle.rs')
-rw-r--r-- | third_party/rust/deflate/src/rle.rs | 107 |
1 files changed, 107 insertions, 0 deletions
diff --git a/third_party/rust/deflate/src/rle.rs b/third_party/rust/deflate/src/rle.rs new file mode 100644 index 0000000000..d7c38078dd --- /dev/null +++ b/third_party/rust/deflate/src/rle.rs @@ -0,0 +1,107 @@ +use lz77::{ProcessStatus, buffer_full}; +use output_writer::{BufferStatus, DynamicWriter}; +use huffman_table; + +use std::ops::Range; +use std::cmp; + +const MIN_MATCH: usize = huffman_table::MIN_MATCH as usize; +const MAX_MATCH: usize = huffman_table::MAX_MATCH as usize; + + +/// Simple match function for run-length encoding. +/// +/// Checks how many of the next bytes from the start of the slice `data` matches prev. +fn get_match_length_rle(data: &[u8], prev: u8) -> usize { + data.iter() + .take(MAX_MATCH) + .take_while(|&&b| b == prev) + .count() +} + +/// L77-Compress data using the RLE(Run-length encoding) strategy +/// +/// This function simply looks for runs of data of at least length 3. +pub fn process_chunk_greedy_rle( + data: &[u8], + iterated_data: &Range<usize>, + writer: &mut DynamicWriter, +) -> (usize, ProcessStatus) { + if data.is_empty() { + return (0, ProcessStatus::Ok); + }; + + let end = cmp::min(data.len(), iterated_data.end); + // Start on at least byte 1. + let start = cmp::max(iterated_data.start, 1); + // The previous byte. + let mut prev = data[start - 1]; + // Iterate through the requested range, but avoid going off the end. + let current_chunk = &data[cmp::min(start, end)..end]; + let mut insert_it = current_chunk.iter().enumerate(); + let mut overlap = 0; + // Make sure to output the first byte + if iterated_data.start == 0 && !data.is_empty() { + write_literal!(writer, data[0], 1); + } + + while let Some((n, &b)) = insert_it.next() { + let position = n + start; + let match_len = if prev == b { + //TODO: Avoid comparing with self here. + // Would use as_slice() but that doesn't work on an enumerated iterator. + get_match_length_rle(&data[position..], prev) + } else { + 0 + }; + if match_len >= MIN_MATCH { + if position + match_len > end { + overlap = position + match_len - end; + }; + let b_status = writer.write_length_rle(match_len as u16); + if b_status == BufferStatus::Full { + return (overlap, buffer_full(position + match_len)); + } + insert_it.nth(match_len - 2); + } else { + write_literal!(writer, b, position + 1); + } + prev = b; + } + + (overlap, ProcessStatus::Ok) +} + +#[cfg(test)] +mod test { + use super::*; + use lzvalue::{LZValue, lit, ld}; + + fn l(c: char) -> LZValue { + lit(c as u8) + } + + #[test] + fn rle_compress() { + let input = b"textaaaaaaaaatext"; + let mut w = DynamicWriter::new(); + let r = 0..input.len(); + let (overlap, _) = process_chunk_greedy_rle(input, &r, &mut w); + let expected = [ + l('t'), + l('e'), + l('x'), + l('t'), + l('a'), + ld(8, 1), + l('t'), + l('e'), + l('x'), + l('t'), + ]; + //println!("expected: {:?}", expected); + //println!("actual: {:?}", w.get_buffer()); + assert!(w.get_buffer() == expected); + assert_eq!(overlap, 0); + } +} |