summaryrefslogtreecommitdiffstats
path: root/vendor/snap/src/compress.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/snap/src/compress.rs')
-rw-r--r--vendor/snap/src/compress.rs539
1 files changed, 539 insertions, 0 deletions
diff --git a/vendor/snap/src/compress.rs b/vendor/snap/src/compress.rs
new file mode 100644
index 000000000..1a6638df4
--- /dev/null
+++ b/vendor/snap/src/compress.rs
@@ -0,0 +1,539 @@
+use std::fmt;
+use std::ops::{Deref, DerefMut};
+use std::ptr;
+
+use crate::bytes;
+use crate::error::{Error, Result};
+use crate::{MAX_BLOCK_SIZE, MAX_INPUT_SIZE};
+
+/// The total number of slots we permit for our hash table of 4 byte repeat
+/// sequences.
+const MAX_TABLE_SIZE: usize = 1 << 14;
+
+/// The size of a small hash table. This is useful for reducing overhead when
+/// compressing very small blocks of bytes.
+const SMALL_TABLE_SIZE: usize = 1 << 10;
+
+/// The total number of bytes that we always leave uncompressed at the end
+/// of the buffer. This in particular affords us some wiggle room during
+/// compression such that faster copy operations can be used.
+const INPUT_MARGIN: usize = 16 - 1;
+
+/// The minimum block size that we're willing to consider for compression.
+/// Anything smaller than this gets emitted as a literal.
+const MIN_NON_LITERAL_BLOCK_SIZE: usize = 1 + 1 + INPUT_MARGIN;
+
+/// Nice names for the various Snappy tags.
+enum Tag {
+ Literal = 0b00,
+ Copy1 = 0b01,
+ Copy2 = 0b10,
+ // Compression never actually emits a Copy4 operation and decompression
+ // uses tricks so that we never explicitly do case analysis on the copy
+ // operation type, therefore leading to the fact that we never use Copy4.
+ #[allow(dead_code)]
+ Copy4 = 0b11,
+}
+
+/// Returns the maximum compressed size given the uncompressed size.
+///
+/// If the uncompressed size exceeds the maximum allowable size then this
+/// returns 0.
+pub fn max_compress_len(input_len: usize) -> usize {
+ let input_len = input_len as u64;
+ if input_len > MAX_INPUT_SIZE {
+ return 0;
+ }
+ let max = 32 + input_len + (input_len / 6);
+ if max > MAX_INPUT_SIZE {
+ 0
+ } else {
+ max as usize
+ }
+}
+
+/// Encoder is a raw encoder for compressing bytes in the Snappy format.
+///
+/// Thie encoder does not use the Snappy frame format and simply compresses the
+/// given bytes in one big Snappy block (that is, it has a single header).
+///
+/// Unless you explicitly need the low-level control, you should use
+/// [`read::FrameEncoder`](../read/struct.FrameEncoder.html)
+/// or
+/// [`write::FrameEncoder`](../write/struct.FrameEncoder.html)
+/// instead, which compresses to the Snappy frame format.
+///
+/// It is beneficial to reuse an Encoder when possible.
+pub struct Encoder {
+ small: [u16; SMALL_TABLE_SIZE],
+ big: Vec<u16>,
+}
+
+impl fmt::Debug for Encoder {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "Encoder(...)")
+ }
+}
+
+impl Encoder {
+ /// Return a new encoder that can be used for compressing bytes.
+ pub fn new() -> Encoder {
+ Encoder { small: [0; SMALL_TABLE_SIZE], big: vec![] }
+ }
+
+ /// Compresses all bytes in `input` into `output`.
+ ///
+ /// `input` can be any arbitrary sequence of bytes.
+ ///
+ /// `output` must be large enough to hold the maximum possible compressed
+ /// size of `input`, which can be computed using `max_compress_len`.
+ ///
+ /// On success, this returns the number of bytes written to `output`.
+ ///
+ /// # Errors
+ ///
+ /// This method returns an error in the following circumstances:
+ ///
+ /// * The total number of bytes to compress exceeds `2^32 - 1`.
+ /// * `output` has length less than `max_compress_len(input.len())`.
+ pub fn compress(
+ &mut self,
+ mut input: &[u8],
+ output: &mut [u8],
+ ) -> Result<usize> {
+ match max_compress_len(input.len()) {
+ 0 => {
+ return Err(Error::TooBig {
+ given: input.len() as u64,
+ max: MAX_INPUT_SIZE,
+ });
+ }
+ min if output.len() < min => {
+ return Err(Error::BufferTooSmall {
+ given: output.len() as u64,
+ min: min as u64,
+ });
+ }
+ _ => {}
+ }
+ // Handle an edge case specially.
+ if input.is_empty() {
+ // Encodes a varint of 0, denoting the total size of uncompressed
+ // bytes.
+ output[0] = 0;
+ return Ok(1);
+ }
+ // Write the Snappy header, which is just the total number of
+ // uncompressed bytes.
+ let mut d = bytes::write_varu64(output, input.len() as u64);
+ while !input.is_empty() {
+ // Find the next block.
+ let mut src = input;
+ if src.len() > MAX_BLOCK_SIZE {
+ src = &src[..MAX_BLOCK_SIZE as usize];
+ }
+ input = &input[src.len()..];
+
+ // If the block is smallish, then don't waste time on it and just
+ // emit a literal.
+ let mut block = Block::new(src, output, d);
+ if block.src.len() < MIN_NON_LITERAL_BLOCK_SIZE {
+ let lit_end = block.src.len();
+ unsafe {
+ // SAFETY: next_emit is zero (in bounds) and the end is
+ // the length of the block (in bounds).
+ block.emit_literal(lit_end);
+ }
+ } else {
+ let table = self.block_table(block.src.len());
+ block.compress(table);
+ }
+ d = block.d;
+ }
+ Ok(d)
+ }
+
+ /// Compresses all bytes in `input` into a freshly allocated `Vec`.
+ ///
+ /// This is just like the `compress` method, except it allocates a `Vec`
+ /// with the right size for you. (This is intended to be a convenience
+ /// method.)
+ ///
+ /// This method returns an error under the same circumstances that
+ /// `compress` does.
+ pub fn compress_vec(&mut self, input: &[u8]) -> Result<Vec<u8>> {
+ let mut buf = vec![0; max_compress_len(input.len())];
+ let n = self.compress(input, &mut buf)?;
+ buf.truncate(n);
+ Ok(buf)
+ }
+}
+
+struct Block<'s, 'd> {
+ src: &'s [u8],
+ s: usize,
+ s_limit: usize,
+ dst: &'d mut [u8],
+ d: usize,
+ next_emit: usize,
+}
+
+impl<'s, 'd> Block<'s, 'd> {
+ #[inline(always)]
+ fn new(src: &'s [u8], dst: &'d mut [u8], d: usize) -> Block<'s, 'd> {
+ Block {
+ src: src,
+ s: 0,
+ s_limit: src.len(),
+ dst: dst,
+ d: d,
+ next_emit: 0,
+ }
+ }
+
+ #[inline(always)]
+ fn compress(&mut self, mut table: BlockTable<'_>) {
+ debug_assert!(!table.is_empty());
+ debug_assert!(self.src.len() >= MIN_NON_LITERAL_BLOCK_SIZE);
+
+ self.s += 1;
+ self.s_limit -= INPUT_MARGIN;
+ let mut next_hash =
+ table.hash(bytes::read_u32_le(&self.src[self.s..]));
+ loop {
+ let mut skip = 32;
+ let mut candidate;
+ let mut s_next = self.s;
+ loop {
+ self.s = s_next;
+ let bytes_between_hash_lookups = skip >> 5;
+ s_next = self.s + bytes_between_hash_lookups;
+ skip += bytes_between_hash_lookups;
+ if s_next > self.s_limit {
+ return self.done();
+ }
+ unsafe {
+ // SAFETY: next_hash is always computed by table.hash
+ // which is guaranteed to be in bounds.
+ candidate = *table.get_unchecked(next_hash) as usize;
+ *table.get_unchecked_mut(next_hash) = self.s as u16;
+
+ let srcp = self.src.as_ptr();
+ // SAFETY: s_next is guaranteed to be less than s_limit by
+ // the conditional above, which implies s_next is in
+ // bounds.
+ let x = bytes::loadu_u32_le(srcp.add(s_next));
+ next_hash = table.hash(x);
+ // SAFETY: self.s is always less than s_next, so it is also
+ // in bounds by the argument above.
+ //
+ // candidate is extracted from table, which is only ever
+ // set to valid positions in the block and is therefore
+ // also in bounds.
+ //
+ // We only need to compare y/z for equality, so we don't
+ // need to both with endianness. cur corresponds to the
+ // bytes at the current position and cand corresponds to
+ // a potential match. If they're equal, we declare victory
+ // and move below to try and extend the match.
+ let cur = bytes::loadu_u32_ne(srcp.add(self.s));
+ let cand = bytes::loadu_u32_ne(srcp.add(candidate));
+ if cur == cand {
+ break;
+ }
+ }
+ }
+ // While the above found a candidate for compression, before we
+ // emit a copy operation for it, we need to make sure that we emit
+ // any bytes between the last copy operation and this one as a
+ // literal.
+ let lit_end = self.s;
+ unsafe {
+ // SAFETY: next_emit is set to a previous value of self.s,
+ // which is guaranteed to be less than s_limit (in bounds).
+ // lit_end is set to the current value of self.s, also
+ // guaranteed to be less than s_limit (in bounds).
+ self.emit_literal(lit_end);
+ }
+ loop {
+ // Look for more matching bytes starting at the position of
+ // the candidate and the current src position. We increment
+ // self.s and candidate by 4 since we already know the first 4
+ // bytes match.
+ let base = self.s;
+ self.s += 4;
+ unsafe {
+ // SAFETY: candidate is always set to a value from our
+ // hash table, which only contains positions in self.src
+ // that have been seen for this block that occurred before
+ // self.s.
+ self.extend_match(candidate + 4);
+ }
+ let (offset, len) = (base - candidate, self.s - base);
+ self.emit_copy(offset, len);
+ self.next_emit = self.s;
+ if self.s >= self.s_limit {
+ return self.done();
+ }
+ // Update the hash table with the byte sequences
+ // self.src[self.s - 1..self.s + 3] and
+ // self.src[self.s..self.s + 4]. Instead of reading 4 bytes
+ // twice, we read 8 bytes once.
+ //
+ // If we happen to get a hit on self.src[self.s..self.s + 4],
+ // then continue this loop and extend the match.
+ unsafe {
+ let srcp = self.src.as_ptr();
+ // SAFETY: self.s can never exceed s_limit given by the
+ // conditional above and self.s is guaranteed to be
+ // non-zero and is therefore in bounds.
+ let x = bytes::loadu_u64_le(srcp.add(self.s - 1));
+ // The lower 4 bytes of x correspond to
+ // self.src[self.s - 1..self.s + 3].
+ let prev_hash = table.hash(x as u32);
+ // SAFETY: Hash values are guaranteed to be in bounds.
+ *table.get_unchecked_mut(prev_hash) = (self.s - 1) as u16;
+ // The lower 4 bytes of x>>8 correspond to
+ // self.src[self.s..self.s + 4].
+ let cur_hash = table.hash((x >> 8) as u32);
+ // SAFETY: Hash values are guaranteed to be in bounds.
+ candidate = *table.get_unchecked(cur_hash) as usize;
+ *table.get_unchecked_mut(cur_hash) = self.s as u16;
+
+ // SAFETY: candidate is set from table, which always
+ // contains valid positions in the current block.
+ let y = bytes::loadu_u32_le(srcp.add(candidate));
+ if (x >> 8) as u32 != y {
+ // If we didn't get a hit, update the next hash
+ // and move on. Our initial 8 byte read continues to
+ // pay off.
+ next_hash = table.hash((x >> 16) as u32);
+ self.s += 1;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /// Emits one or more copy operations with the given offset and length.
+ /// offset must be in the range [1, 65535] and len must be in the range
+ /// [4, 65535].
+ #[inline(always)]
+ fn emit_copy(&mut self, offset: usize, mut len: usize) {
+ debug_assert!(1 <= offset && offset <= 65535);
+ // Copy operations only allow lengths up to 64, but we'll allow bigger
+ // lengths and emit as many operations as we need.
+ //
+ // N.B. Since our block size is 64KB, we never actually emit a copy 4
+ // operation.
+ debug_assert!(4 <= len && len <= 65535);
+
+ // Emit copy 2 operations until we don't have to.
+ // We check on 68 here and emit a shorter copy than 64 below because
+ // it is cheaper to, e.g., encode a length 67 copy as a length 60
+ // copy 2 followed by a length 7 copy 1 than to encode it as a length
+ // 64 copy 2 followed by a length 3 copy 2. They key here is that a
+ // copy 1 operation requires at least length 4 which forces a length 3
+ // copy to use a copy 2 operation.
+ while len >= 68 {
+ self.emit_copy2(offset, 64);
+ len -= 64;
+ }
+ if len > 64 {
+ self.emit_copy2(offset, 60);
+ len -= 60;
+ }
+ // If we can squeeze the last copy into a copy 1 operation, do it.
+ if len <= 11 && offset <= 2047 {
+ self.dst[self.d] = (((offset >> 8) as u8) << 5)
+ | (((len - 4) as u8) << 2)
+ | (Tag::Copy1 as u8);
+ self.dst[self.d + 1] = offset as u8;
+ self.d += 2;
+ } else {
+ self.emit_copy2(offset, len);
+ }
+ }
+
+ /// Emits a "copy 2" operation with the given offset and length. The
+ /// offset and length must be valid for a copy 2 operation. i.e., offset
+ /// must be in the range [1, 65535] and len must be in the range [1, 64].
+ #[inline(always)]
+ fn emit_copy2(&mut self, offset: usize, len: usize) {
+ debug_assert!(1 <= offset && offset <= 65535);
+ debug_assert!(1 <= len && len <= 64);
+ self.dst[self.d] = (((len - 1) as u8) << 2) | (Tag::Copy2 as u8);
+ bytes::write_u16_le(offset as u16, &mut self.dst[self.d + 1..]);
+ self.d += 3;
+ }
+
+ /// Attempts to extend a match from the current position in self.src with
+ /// the candidate position given.
+ ///
+ /// This method uses unaligned loads and elides bounds checks, so the
+ /// caller must guarantee that cand points to a valid location in self.src
+ /// and is less than the current position in src.
+ #[inline(always)]
+ unsafe fn extend_match(&mut self, mut cand: usize) {
+ debug_assert!(cand < self.s);
+ while self.s + 8 <= self.src.len() {
+ let srcp = self.src.as_ptr();
+ // SAFETY: The loop invariant guarantees that there is at least
+ // 8 bytes to read at self.src + self.s. Since cand must be
+ // guaranteed by the caller to be valid and less than self.s, it
+ // also has enough room to read 8 bytes.
+ //
+ // TODO(ag): Despite my best efforts, I couldn't get this to
+ // autovectorize with 128-bit loads. The logic after the loads
+ // appears to be a little too clever...
+ let x = bytes::loadu_u64_ne(srcp.add(self.s));
+ let y = bytes::loadu_u64_ne(srcp.add(cand));
+ if x == y {
+ // If all 8 bytes are equal, move on...
+ self.s += 8;
+ cand += 8;
+ } else {
+ // Otherwise, find the last byte that was equal. We can do
+ // this efficiently by interpreted x/y as little endian
+ // numbers, which lets us use the number of trailing zeroes
+ // as a proxy for the number of equivalent bits (after an XOR).
+ let z = x.to_le() ^ y.to_le();
+ self.s += z.trailing_zeros() as usize / 8;
+ return;
+ }
+ }
+ // When we have fewer than 8 bytes left in the block, fall back to the
+ // slow loop.
+ while self.s < self.src.len() && self.src[self.s] == self.src[cand] {
+ self.s += 1;
+ cand += 1;
+ }
+ }
+
+ /// Executes any cleanup when the current block has finished compressing.
+ /// In particular, it emits any leftover bytes as a literal.
+ #[inline(always)]
+ fn done(&mut self) {
+ if self.next_emit < self.src.len() {
+ let lit_end = self.src.len();
+ unsafe {
+ // SAFETY: Both next_emit and lit_end are trivially in bounds
+ // given the conditional and definition above.
+ self.emit_literal(lit_end);
+ }
+ }
+ }
+
+ /// Emits a literal from self.src[self.next_emit..lit_end].
+ ///
+ /// This uses unaligned loads and elides bounds checks, so the caller must
+ /// guarantee that self.src[self.next_emit..lit_end] is valid.
+ #[inline(always)]
+ unsafe fn emit_literal(&mut self, lit_end: usize) {
+ let lit_start = self.next_emit;
+ let len = lit_end - lit_start;
+ let n = len.checked_sub(1).unwrap();
+ if n <= 59 {
+ self.dst[self.d] = ((n as u8) << 2) | (Tag::Literal as u8);
+ self.d += 1;
+ if len <= 16 && lit_start + 16 <= self.src.len() {
+ // SAFETY: lit_start is equivalent to self.next_emit, which is
+ // only set to self.s immediately following a copy emit. The
+ // conditional above also ensures that there is at least 16
+ // bytes of room in both src and dst.
+ //
+ // dst is big enough because the buffer is guaranteed to
+ // be big enough to hold biggest possible compressed size plus
+ // an extra 32 bytes, which exceeds the 16 byte copy here.
+ let srcp = self.src.as_ptr().add(lit_start);
+ let dstp = self.dst.as_mut_ptr().add(self.d);
+ ptr::copy_nonoverlapping(srcp, dstp, 16);
+ self.d += len;
+ return;
+ }
+ } else if n < 256 {
+ self.dst[self.d] = (60 << 2) | (Tag::Literal as u8);
+ self.dst[self.d + 1] = n as u8;
+ self.d += 2;
+ } else {
+ self.dst[self.d] = (61 << 2) | (Tag::Literal as u8);
+ bytes::write_u16_le(n as u16, &mut self.dst[self.d + 1..]);
+ self.d += 3;
+ }
+ // SAFETY: lit_start is equivalent to self.next_emit, which is only set
+ // to self.s immediately following a copy, which implies that it always
+ // points to valid bytes in self.src.
+ //
+ // We can't guarantee that there are at least len bytes though, which
+ // must be guaranteed by the caller and is why this method is unsafe.
+ let srcp = self.src.as_ptr().add(lit_start);
+ let dstp = self.dst.as_mut_ptr().add(self.d);
+ ptr::copy_nonoverlapping(srcp, dstp, len);
+ self.d += len;
+ }
+}
+
+/// `BlockTable` is a map from 4 byte sequences to positions of their most
+/// recent occurrence in a block. In particular, this table lets us quickly
+/// find candidates for compression.
+///
+/// We expose the `hash` method so that callers can be fastidious about the
+/// number of times a hash is computed.
+struct BlockTable<'a> {
+ table: &'a mut [u16],
+ /// The number of bits required to shift the hash such that the result
+ /// is less than table.len().
+ shift: u32,
+}
+
+impl Encoder {
+ fn block_table(&mut self, block_size: usize) -> BlockTable<'_> {
+ let mut shift: u32 = 32 - 8;
+ let mut table_size = 256;
+ while table_size < MAX_TABLE_SIZE && table_size < block_size {
+ shift -= 1;
+ table_size *= 2;
+ }
+ // If our block size is small, then use a small stack allocated table
+ // instead of putting a bigger one on the heap. This particular
+ // optimization is important if the caller is using Snappy to compress
+ // many small blocks. (The memset savings alone is considerable.)
+ let table: &mut [u16] = if table_size <= SMALL_TABLE_SIZE {
+ &mut self.small[0..table_size]
+ } else {
+ if self.big.is_empty() {
+ // Interestingly, using `self.big.resize` here led to some
+ // very weird code getting generated that led to a large
+ // slow down. Forcing the issue with a new vec seems to
+ // fix it. ---AG
+ self.big = vec![0; MAX_TABLE_SIZE];
+ }
+ &mut self.big[0..table_size]
+ };
+ for x in &mut *table {
+ *x = 0;
+ }
+ BlockTable { table: table, shift: shift }
+ }
+}
+
+impl<'a> BlockTable<'a> {
+ #[inline(always)]
+ fn hash(&self, x: u32) -> usize {
+ (x.wrapping_mul(0x1E35A7BD) >> self.shift) as usize
+ }
+}
+
+impl<'a> Deref for BlockTable<'a> {
+ type Target = [u16];
+ fn deref(&self) -> &[u16] {
+ self.table
+ }
+}
+
+impl<'a> DerefMut for BlockTable<'a> {
+ fn deref_mut(&mut self) -> &mut [u16] {
+ self.table
+ }
+}