summaryrefslogtreecommitdiffstats
path: root/vendor/snap/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
commit698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree173a775858bd501c378080a10dca74132f05bc50 /vendor/snap/src
parentInitial commit. (diff)
downloadrustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz
rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--vendor/snap/src/bytes.rs118
-rw-r--r--vendor/snap/src/compress.rs539
-rw-r--r--vendor/snap/src/crc32.rs111
-rw-r--r--vendor/snap/src/crc32_table.rs2
-rw-r--r--vendor/snap/src/decompress.rs470
-rw-r--r--vendor/snap/src/error.rs333
-rw-r--r--vendor/snap/src/frame.rs104
-rw-r--r--vendor/snap/src/lib.rs109
-rw-r--r--vendor/snap/src/raw.rs14
-rw-r--r--vendor/snap/src/read.rs450
-rw-r--r--vendor/snap/src/tag.rs2
-rw-r--r--vendor/snap/src/varint.rs31
-rw-r--r--vendor/snap/src/write.rs215
13 files changed, 2498 insertions, 0 deletions
diff --git a/vendor/snap/src/bytes.rs b/vendor/snap/src/bytes.rs
new file mode 100644
index 000000000..4f198c668
--- /dev/null
+++ b/vendor/snap/src/bytes.rs
@@ -0,0 +1,118 @@
+use std::convert::TryInto;
+use std::io;
+
+/// Read a u16 in little endian format from the beginning of the given slice.
+/// This panics if the slice has length less than 2.
+pub fn read_u16_le(slice: &[u8]) -> u16 {
+ u16::from_le_bytes(slice[..2].try_into().unwrap())
+}
+
+/// Read a u24 (returned as a u32 with the most significant 8 bits always set
+/// to 0) in little endian format from the beginning of the given slice. This
+/// panics if the slice has length less than 3.
+pub fn read_u24_le(slice: &[u8]) -> u32 {
+ slice[0] as u32 | (slice[1] as u32) << 8 | (slice[2] as u32) << 16
+}
+
+/// Read a u32 in little endian format from the beginning of the given slice.
+/// This panics if the slice has length less than 4.
+pub fn read_u32_le(slice: &[u8]) -> u32 {
+ u32::from_le_bytes(slice[..4].try_into().unwrap())
+}
+
+/// Like read_u32_le, but from an io::Read implementation. If io::Read does
+/// not yield at least 4 bytes, then this returns an unexpected EOF error.
+pub fn io_read_u32_le<R: io::Read>(mut rdr: R) -> io::Result<u32> {
+ let mut buf = [0; 4];
+ rdr.read_exact(&mut buf)?;
+ Ok(u32::from_le_bytes(buf))
+}
+
+/// Write a u16 in little endian format to the beginning of the given slice.
+/// This panics if the slice has length less than 2.
+pub fn write_u16_le(n: u16, slice: &mut [u8]) {
+ assert!(slice.len() >= 2);
+ let bytes = n.to_le_bytes();
+ slice[0] = bytes[0];
+ slice[1] = bytes[1];
+}
+
+/// Write a u24 (given as a u32 where the most significant 8 bits are ignored)
+/// in little endian format to the beginning of the given slice. This panics
+/// if the slice has length less than 3.
+pub fn write_u24_le(n: u32, slice: &mut [u8]) {
+ slice[0] = n as u8;
+ slice[1] = (n >> 8) as u8;
+ slice[2] = (n >> 16) as u8;
+}
+
+/// Write a u32 in little endian format to the beginning of the given slice.
+/// This panics if the slice has length less than 4.
+pub fn write_u32_le(n: u32, slice: &mut [u8]) {
+ assert!(slice.len() >= 4);
+ let bytes = n.to_le_bytes();
+ slice[0] = bytes[0];
+ slice[1] = bytes[1];
+ slice[2] = bytes[2];
+ slice[3] = bytes[3];
+}
+
+/// https://developers.google.com/protocol-buffers/docs/encoding#varints
+pub fn write_varu64(data: &mut [u8], mut n: u64) -> usize {
+ let mut i = 0;
+ while n >= 0b1000_0000 {
+ data[i] = (n as u8) | 0b1000_0000;
+ n >>= 7;
+ i += 1;
+ }
+ data[i] = n as u8;
+ i + 1
+}
+
+/// https://developers.google.com/protocol-buffers/docs/encoding#varints
+pub fn read_varu64(data: &[u8]) -> (u64, usize) {
+ let mut n: u64 = 0;
+ let mut shift: u32 = 0;
+ for (i, &b) in data.iter().enumerate() {
+ if b < 0b1000_0000 {
+ return match (b as u64).checked_shl(shift) {
+ None => (0, 0),
+ Some(b) => (n | b, i + 1),
+ };
+ }
+ match ((b as u64) & 0b0111_1111).checked_shl(shift) {
+ None => return (0, 0),
+ Some(b) => n |= b,
+ }
+ shift += 7;
+ }
+ (0, 0)
+}
+
+/// Does an unaligned load of a little endian encoded u32.
+///
+/// This is unsafe because `data` must point to some memory of size at least 4.
+pub unsafe fn loadu_u32_le(data: *const u8) -> u32 {
+ loadu_u32_ne(data).to_le()
+}
+
+/// Does an unaligned load of a native endian encoded u32.
+///
+/// This is unsafe because `data` must point to some memory of size at least 4.
+pub unsafe fn loadu_u32_ne(data: *const u8) -> u32 {
+ (data as *const u32).read_unaligned()
+}
+
+/// Does an unaligned load of a little endian encoded u64.
+///
+/// This is unsafe because `data` must point to some memory of size at least 8.
+pub unsafe fn loadu_u64_le(data: *const u8) -> u64 {
+ loadu_u64_ne(data).to_le()
+}
+
+/// Does an unaligned load of a native endian encoded u64.
+///
+/// This is unsafe because `data` must point to some memory of size at least 8.
+pub unsafe fn loadu_u64_ne(data: *const u8) -> u64 {
+ (data as *const u64).read_unaligned()
+}
diff --git a/vendor/snap/src/compress.rs b/vendor/snap/src/compress.rs
new file mode 100644
index 000000000..1a6638df4
--- /dev/null
+++ b/vendor/snap/src/compress.rs
@@ -0,0 +1,539 @@
+use std::fmt;
+use std::ops::{Deref, DerefMut};
+use std::ptr;
+
+use crate::bytes;
+use crate::error::{Error, Result};
+use crate::{MAX_BLOCK_SIZE, MAX_INPUT_SIZE};
+
+/// The total number of slots we permit for our hash table of 4 byte repeat
+/// sequences.
+const MAX_TABLE_SIZE: usize = 1 << 14;
+
+/// The size of a small hash table. This is useful for reducing overhead when
+/// compressing very small blocks of bytes.
+const SMALL_TABLE_SIZE: usize = 1 << 10;
+
+/// The total number of bytes that we always leave uncompressed at the end
+/// of the buffer. This in particular affords us some wiggle room during
+/// compression such that faster copy operations can be used.
+const INPUT_MARGIN: usize = 16 - 1;
+
+/// The minimum block size that we're willing to consider for compression.
+/// Anything smaller than this gets emitted as a literal.
+const MIN_NON_LITERAL_BLOCK_SIZE: usize = 1 + 1 + INPUT_MARGIN;
+
+/// Nice names for the various Snappy tags.
+enum Tag {
+ Literal = 0b00,
+ Copy1 = 0b01,
+ Copy2 = 0b10,
+ // Compression never actually emits a Copy4 operation and decompression
+ // uses tricks so that we never explicitly do case analysis on the copy
+ // operation type, therefore leading to the fact that we never use Copy4.
+ #[allow(dead_code)]
+ Copy4 = 0b11,
+}
+
+/// Returns the maximum compressed size given the uncompressed size.
+///
+/// If the uncompressed size exceeds the maximum allowable size then this
+/// returns 0.
+pub fn max_compress_len(input_len: usize) -> usize {
+ let input_len = input_len as u64;
+ if input_len > MAX_INPUT_SIZE {
+ return 0;
+ }
+ let max = 32 + input_len + (input_len / 6);
+ if max > MAX_INPUT_SIZE {
+ 0
+ } else {
+ max as usize
+ }
+}
+
+/// Encoder is a raw encoder for compressing bytes in the Snappy format.
+///
+/// Thie encoder does not use the Snappy frame format and simply compresses the
+/// given bytes in one big Snappy block (that is, it has a single header).
+///
+/// Unless you explicitly need the low-level control, you should use
+/// [`read::FrameEncoder`](../read/struct.FrameEncoder.html)
+/// or
+/// [`write::FrameEncoder`](../write/struct.FrameEncoder.html)
+/// instead, which compresses to the Snappy frame format.
+///
+/// It is beneficial to reuse an Encoder when possible.
+pub struct Encoder {
+ small: [u16; SMALL_TABLE_SIZE],
+ big: Vec<u16>,
+}
+
+impl fmt::Debug for Encoder {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "Encoder(...)")
+ }
+}
+
+impl Encoder {
+ /// Return a new encoder that can be used for compressing bytes.
+ pub fn new() -> Encoder {
+ Encoder { small: [0; SMALL_TABLE_SIZE], big: vec![] }
+ }
+
+ /// Compresses all bytes in `input` into `output`.
+ ///
+ /// `input` can be any arbitrary sequence of bytes.
+ ///
+ /// `output` must be large enough to hold the maximum possible compressed
+ /// size of `input`, which can be computed using `max_compress_len`.
+ ///
+ /// On success, this returns the number of bytes written to `output`.
+ ///
+ /// # Errors
+ ///
+ /// This method returns an error in the following circumstances:
+ ///
+ /// * The total number of bytes to compress exceeds `2^32 - 1`.
+ /// * `output` has length less than `max_compress_len(input.len())`.
+ pub fn compress(
+ &mut self,
+ mut input: &[u8],
+ output: &mut [u8],
+ ) -> Result<usize> {
+ match max_compress_len(input.len()) {
+ 0 => {
+ return Err(Error::TooBig {
+ given: input.len() as u64,
+ max: MAX_INPUT_SIZE,
+ });
+ }
+ min if output.len() < min => {
+ return Err(Error::BufferTooSmall {
+ given: output.len() as u64,
+ min: min as u64,
+ });
+ }
+ _ => {}
+ }
+ // Handle an edge case specially.
+ if input.is_empty() {
+ // Encodes a varint of 0, denoting the total size of uncompressed
+ // bytes.
+ output[0] = 0;
+ return Ok(1);
+ }
+ // Write the Snappy header, which is just the total number of
+ // uncompressed bytes.
+ let mut d = bytes::write_varu64(output, input.len() as u64);
+ while !input.is_empty() {
+ // Find the next block.
+ let mut src = input;
+ if src.len() > MAX_BLOCK_SIZE {
+ src = &src[..MAX_BLOCK_SIZE as usize];
+ }
+ input = &input[src.len()..];
+
+ // If the block is smallish, then don't waste time on it and just
+ // emit a literal.
+ let mut block = Block::new(src, output, d);
+ if block.src.len() < MIN_NON_LITERAL_BLOCK_SIZE {
+ let lit_end = block.src.len();
+ unsafe {
+ // SAFETY: next_emit is zero (in bounds) and the end is
+ // the length of the block (in bounds).
+ block.emit_literal(lit_end);
+ }
+ } else {
+ let table = self.block_table(block.src.len());
+ block.compress(table);
+ }
+ d = block.d;
+ }
+ Ok(d)
+ }
+
+ /// Compresses all bytes in `input` into a freshly allocated `Vec`.
+ ///
+ /// This is just like the `compress` method, except it allocates a `Vec`
+ /// with the right size for you. (This is intended to be a convenience
+ /// method.)
+ ///
+ /// This method returns an error under the same circumstances that
+ /// `compress` does.
+ pub fn compress_vec(&mut self, input: &[u8]) -> Result<Vec<u8>> {
+ let mut buf = vec![0; max_compress_len(input.len())];
+ let n = self.compress(input, &mut buf)?;
+ buf.truncate(n);
+ Ok(buf)
+ }
+}
+
+struct Block<'s, 'd> {
+ src: &'s [u8],
+ s: usize,
+ s_limit: usize,
+ dst: &'d mut [u8],
+ d: usize,
+ next_emit: usize,
+}
+
+impl<'s, 'd> Block<'s, 'd> {
+ #[inline(always)]
+ fn new(src: &'s [u8], dst: &'d mut [u8], d: usize) -> Block<'s, 'd> {
+ Block {
+ src: src,
+ s: 0,
+ s_limit: src.len(),
+ dst: dst,
+ d: d,
+ next_emit: 0,
+ }
+ }
+
+ #[inline(always)]
+ fn compress(&mut self, mut table: BlockTable<'_>) {
+ debug_assert!(!table.is_empty());
+ debug_assert!(self.src.len() >= MIN_NON_LITERAL_BLOCK_SIZE);
+
+ self.s += 1;
+ self.s_limit -= INPUT_MARGIN;
+ let mut next_hash =
+ table.hash(bytes::read_u32_le(&self.src[self.s..]));
+ loop {
+ let mut skip = 32;
+ let mut candidate;
+ let mut s_next = self.s;
+ loop {
+ self.s = s_next;
+ let bytes_between_hash_lookups = skip >> 5;
+ s_next = self.s + bytes_between_hash_lookups;
+ skip += bytes_between_hash_lookups;
+ if s_next > self.s_limit {
+ return self.done();
+ }
+ unsafe {
+ // SAFETY: next_hash is always computed by table.hash
+ // which is guaranteed to be in bounds.
+ candidate = *table.get_unchecked(next_hash) as usize;
+ *table.get_unchecked_mut(next_hash) = self.s as u16;
+
+ let srcp = self.src.as_ptr();
+ // SAFETY: s_next is guaranteed to be less than s_limit by
+ // the conditional above, which implies s_next is in
+ // bounds.
+ let x = bytes::loadu_u32_le(srcp.add(s_next));
+ next_hash = table.hash(x);
+ // SAFETY: self.s is always less than s_next, so it is also
+ // in bounds by the argument above.
+ //
+ // candidate is extracted from table, which is only ever
+ // set to valid positions in the block and is therefore
+ // also in bounds.
+ //
+ // We only need to compare y/z for equality, so we don't
+ // need to both with endianness. cur corresponds to the
+ // bytes at the current position and cand corresponds to
+ // a potential match. If they're equal, we declare victory
+ // and move below to try and extend the match.
+ let cur = bytes::loadu_u32_ne(srcp.add(self.s));
+ let cand = bytes::loadu_u32_ne(srcp.add(candidate));
+ if cur == cand {
+ break;
+ }
+ }
+ }
+ // While the above found a candidate for compression, before we
+ // emit a copy operation for it, we need to make sure that we emit
+ // any bytes between the last copy operation and this one as a
+ // literal.
+ let lit_end = self.s;
+ unsafe {
+ // SAFETY: next_emit is set to a previous value of self.s,
+ // which is guaranteed to be less than s_limit (in bounds).
+ // lit_end is set to the current value of self.s, also
+ // guaranteed to be less than s_limit (in bounds).
+ self.emit_literal(lit_end);
+ }
+ loop {
+ // Look for more matching bytes starting at the position of
+ // the candidate and the current src position. We increment
+ // self.s and candidate by 4 since we already know the first 4
+ // bytes match.
+ let base = self.s;
+ self.s += 4;
+ unsafe {
+ // SAFETY: candidate is always set to a value from our
+ // hash table, which only contains positions in self.src
+ // that have been seen for this block that occurred before
+ // self.s.
+ self.extend_match(candidate + 4);
+ }
+ let (offset, len) = (base - candidate, self.s - base);
+ self.emit_copy(offset, len);
+ self.next_emit = self.s;
+ if self.s >= self.s_limit {
+ return self.done();
+ }
+ // Update the hash table with the byte sequences
+ // self.src[self.s - 1..self.s + 3] and
+ // self.src[self.s..self.s + 4]. Instead of reading 4 bytes
+ // twice, we read 8 bytes once.
+ //
+ // If we happen to get a hit on self.src[self.s..self.s + 4],
+ // then continue this loop and extend the match.
+ unsafe {
+ let srcp = self.src.as_ptr();
+ // SAFETY: self.s can never exceed s_limit given by the
+ // conditional above and self.s is guaranteed to be
+ // non-zero and is therefore in bounds.
+ let x = bytes::loadu_u64_le(srcp.add(self.s - 1));
+ // The lower 4 bytes of x correspond to
+ // self.src[self.s - 1..self.s + 3].
+ let prev_hash = table.hash(x as u32);
+ // SAFETY: Hash values are guaranteed to be in bounds.
+ *table.get_unchecked_mut(prev_hash) = (self.s - 1) as u16;
+ // The lower 4 bytes of x>>8 correspond to
+ // self.src[self.s..self.s + 4].
+ let cur_hash = table.hash((x >> 8) as u32);
+ // SAFETY: Hash values are guaranteed to be in bounds.
+ candidate = *table.get_unchecked(cur_hash) as usize;
+ *table.get_unchecked_mut(cur_hash) = self.s as u16;
+
+ // SAFETY: candidate is set from table, which always
+ // contains valid positions in the current block.
+ let y = bytes::loadu_u32_le(srcp.add(candidate));
+ if (x >> 8) as u32 != y {
+ // If we didn't get a hit, update the next hash
+ // and move on. Our initial 8 byte read continues to
+ // pay off.
+ next_hash = table.hash((x >> 16) as u32);
+ self.s += 1;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /// Emits one or more copy operations with the given offset and length.
+ /// offset must be in the range [1, 65535] and len must be in the range
+ /// [4, 65535].
+ #[inline(always)]
+ fn emit_copy(&mut self, offset: usize, mut len: usize) {
+ debug_assert!(1 <= offset && offset <= 65535);
+ // Copy operations only allow lengths up to 64, but we'll allow bigger
+ // lengths and emit as many operations as we need.
+ //
+ // N.B. Since our block size is 64KB, we never actually emit a copy 4
+ // operation.
+ debug_assert!(4 <= len && len <= 65535);
+
+ // Emit copy 2 operations until we don't have to.
+ // We check on 68 here and emit a shorter copy than 64 below because
+ // it is cheaper to, e.g., encode a length 67 copy as a length 60
+ // copy 2 followed by a length 7 copy 1 than to encode it as a length
+ // 64 copy 2 followed by a length 3 copy 2. They key here is that a
+ // copy 1 operation requires at least length 4 which forces a length 3
+ // copy to use a copy 2 operation.
+ while len >= 68 {
+ self.emit_copy2(offset, 64);
+ len -= 64;
+ }
+ if len > 64 {
+ self.emit_copy2(offset, 60);
+ len -= 60;
+ }
+ // If we can squeeze the last copy into a copy 1 operation, do it.
+ if len <= 11 && offset <= 2047 {
+ self.dst[self.d] = (((offset >> 8) as u8) << 5)
+ | (((len - 4) as u8) << 2)
+ | (Tag::Copy1 as u8);
+ self.dst[self.d + 1] = offset as u8;
+ self.d += 2;
+ } else {
+ self.emit_copy2(offset, len);
+ }
+ }
+
+ /// Emits a "copy 2" operation with the given offset and length. The
+ /// offset and length must be valid for a copy 2 operation. i.e., offset
+ /// must be in the range [1, 65535] and len must be in the range [1, 64].
+ #[inline(always)]
+ fn emit_copy2(&mut self, offset: usize, len: usize) {
+ debug_assert!(1 <= offset && offset <= 65535);
+ debug_assert!(1 <= len && len <= 64);
+ self.dst[self.d] = (((len - 1) as u8) << 2) | (Tag::Copy2 as u8);
+ bytes::write_u16_le(offset as u16, &mut self.dst[self.d + 1..]);
+ self.d += 3;
+ }
+
+ /// Attempts to extend a match from the current position in self.src with
+ /// the candidate position given.
+ ///
+ /// This method uses unaligned loads and elides bounds checks, so the
+ /// caller must guarantee that cand points to a valid location in self.src
+ /// and is less than the current position in src.
+ #[inline(always)]
+ unsafe fn extend_match(&mut self, mut cand: usize) {
+ debug_assert!(cand < self.s);
+ while self.s + 8 <= self.src.len() {
+ let srcp = self.src.as_ptr();
+ // SAFETY: The loop invariant guarantees that there is at least
+ // 8 bytes to read at self.src + self.s. Since cand must be
+ // guaranteed by the caller to be valid and less than self.s, it
+ // also has enough room to read 8 bytes.
+ //
+ // TODO(ag): Despite my best efforts, I couldn't get this to
+ // autovectorize with 128-bit loads. The logic after the loads
+ // appears to be a little too clever...
+ let x = bytes::loadu_u64_ne(srcp.add(self.s));
+ let y = bytes::loadu_u64_ne(srcp.add(cand));
+ if x == y {
+ // If all 8 bytes are equal, move on...
+ self.s += 8;
+ cand += 8;
+ } else {
+ // Otherwise, find the last byte that was equal. We can do
+ // this efficiently by interpreted x/y as little endian
+ // numbers, which lets us use the number of trailing zeroes
+ // as a proxy for the number of equivalent bits (after an XOR).
+ let z = x.to_le() ^ y.to_le();
+ self.s += z.trailing_zeros() as usize / 8;
+ return;
+ }
+ }
+ // When we have fewer than 8 bytes left in the block, fall back to the
+ // slow loop.
+ while self.s < self.src.len() && self.src[self.s] == self.src[cand] {
+ self.s += 1;
+ cand += 1;
+ }
+ }
+
+ /// Executes any cleanup when the current block has finished compressing.
+ /// In particular, it emits any leftover bytes as a literal.
+ #[inline(always)]
+ fn done(&mut self) {
+ if self.next_emit < self.src.len() {
+ let lit_end = self.src.len();
+ unsafe {
+ // SAFETY: Both next_emit and lit_end are trivially in bounds
+ // given the conditional and definition above.
+ self.emit_literal(lit_end);
+ }
+ }
+ }
+
+ /// Emits a literal from self.src[self.next_emit..lit_end].
+ ///
+ /// This uses unaligned loads and elides bounds checks, so the caller must
+ /// guarantee that self.src[self.next_emit..lit_end] is valid.
+ #[inline(always)]
+ unsafe fn emit_literal(&mut self, lit_end: usize) {
+ let lit_start = self.next_emit;
+ let len = lit_end - lit_start;
+ let n = len.checked_sub(1).unwrap();
+ if n <= 59 {
+ self.dst[self.d] = ((n as u8) << 2) | (Tag::Literal as u8);
+ self.d += 1;
+ if len <= 16 && lit_start + 16 <= self.src.len() {
+ // SAFETY: lit_start is equivalent to self.next_emit, which is
+ // only set to self.s immediately following a copy emit. The
+ // conditional above also ensures that there is at least 16
+ // bytes of room in both src and dst.
+ //
+ // dst is big enough because the buffer is guaranteed to
+ // be big enough to hold biggest possible compressed size plus
+ // an extra 32 bytes, which exceeds the 16 byte copy here.
+ let srcp = self.src.as_ptr().add(lit_start);
+ let dstp = self.dst.as_mut_ptr().add(self.d);
+ ptr::copy_nonoverlapping(srcp, dstp, 16);
+ self.d += len;
+ return;
+ }
+ } else if n < 256 {
+ self.dst[self.d] = (60 << 2) | (Tag::Literal as u8);
+ self.dst[self.d + 1] = n as u8;
+ self.d += 2;
+ } else {
+ self.dst[self.d] = (61 << 2) | (Tag::Literal as u8);
+ bytes::write_u16_le(n as u16, &mut self.dst[self.d + 1..]);
+ self.d += 3;
+ }
+ // SAFETY: lit_start is equivalent to self.next_emit, which is only set
+ // to self.s immediately following a copy, which implies that it always
+ // points to valid bytes in self.src.
+ //
+ // We can't guarantee that there are at least len bytes though, which
+ // must be guaranteed by the caller and is why this method is unsafe.
+ let srcp = self.src.as_ptr().add(lit_start);
+ let dstp = self.dst.as_mut_ptr().add(self.d);
+ ptr::copy_nonoverlapping(srcp, dstp, len);
+ self.d += len;
+ }
+}
+
+/// `BlockTable` is a map from 4 byte sequences to positions of their most
+/// recent occurrence in a block. In particular, this table lets us quickly
+/// find candidates for compression.
+///
+/// We expose the `hash` method so that callers can be fastidious about the
+/// number of times a hash is computed.
+struct BlockTable<'a> {
+ table: &'a mut [u16],
+ /// The number of bits required to shift the hash such that the result
+ /// is less than table.len().
+ shift: u32,
+}
+
+impl Encoder {
+ fn block_table(&mut self, block_size: usize) -> BlockTable<'_> {
+ let mut shift: u32 = 32 - 8;
+ let mut table_size = 256;
+ while table_size < MAX_TABLE_SIZE && table_size < block_size {
+ shift -= 1;
+ table_size *= 2;
+ }
+ // If our block size is small, then use a small stack allocated table
+ // instead of putting a bigger one on the heap. This particular
+ // optimization is important if the caller is using Snappy to compress
+ // many small blocks. (The memset savings alone is considerable.)
+ let table: &mut [u16] = if table_size <= SMALL_TABLE_SIZE {
+ &mut self.small[0..table_size]
+ } else {
+ if self.big.is_empty() {
+ // Interestingly, using `self.big.resize` here led to some
+ // very weird code getting generated that led to a large
+ // slow down. Forcing the issue with a new vec seems to
+ // fix it. ---AG
+ self.big = vec![0; MAX_TABLE_SIZE];
+ }
+ &mut self.big[0..table_size]
+ };
+ for x in &mut *table {
+ *x = 0;
+ }
+ BlockTable { table: table, shift: shift }
+ }
+}
+
+impl<'a> BlockTable<'a> {
+ #[inline(always)]
+ fn hash(&self, x: u32) -> usize {
+ (x.wrapping_mul(0x1E35A7BD) >> self.shift) as usize
+ }
+}
+
+impl<'a> Deref for BlockTable<'a> {
+ type Target = [u16];
+ fn deref(&self) -> &[u16] {
+ self.table
+ }
+}
+
+impl<'a> DerefMut for BlockTable<'a> {
+ fn deref_mut(&mut self) -> &mut [u16] {
+ self.table
+ }
+}
diff --git a/vendor/snap/src/crc32.rs b/vendor/snap/src/crc32.rs
new file mode 100644
index 000000000..1298ef04a
--- /dev/null
+++ b/vendor/snap/src/crc32.rs
@@ -0,0 +1,111 @@
+use crate::bytes;
+use crate::crc32_table::{TABLE, TABLE16};
+
+/// Provides a simple API to generate "masked" CRC32C checksums specifically
+/// for use in Snappy. When available, this will make use of SSE 4.2 to compute
+/// checksums. Otherwise, it falls back to only-marginally-slower "slicing by
+/// 16" technique.
+///
+/// The main purpose of this type is to cache the CPU feature check and expose
+/// a safe API.
+#[derive(Clone, Copy, Debug)]
+pub struct CheckSummer {
+ sse42: bool,
+}
+
+impl CheckSummer {
+ /// Create a new checksummer that can compute CRC32C checksums on arbitrary
+ /// bytes.
+ #[cfg(not(target_arch = "x86_64"))]
+ pub fn new() -> CheckSummer {
+ CheckSummer { sse42: false }
+ }
+
+ /// Create a new checksummer that can compute CRC32C checksums on arbitrary
+ /// bytes.
+ #[cfg(target_arch = "x86_64")]
+ pub fn new() -> CheckSummer {
+ CheckSummer { sse42: is_x86_feature_detected!("sse4.2") }
+ }
+
+ /// Returns the "masked" CRC32 checksum of `buf` using the Castagnoli
+ /// polynomial. This "masked" checksum is defined by the Snappy frame
+ /// format. Masking is supposed to make the checksum robust with respect to
+ /// the data that contains the checksum itself.
+ pub fn crc32c_masked(&self, buf: &[u8]) -> u32 {
+ let sum = self.crc32c(buf);
+ (sum.wrapping_shr(15) | sum.wrapping_shl(17)).wrapping_add(0xA282EAD8)
+ }
+
+ /// Returns the CRC32 checksum of `buf` using the Castagnoli polynomial.
+ #[cfg(not(target_arch = "x86_64"))]
+ fn crc32c(&self, buf: &[u8]) -> u32 {
+ crc32c_slice16(buf)
+ }
+
+ /// Returns the CRC32 checksum of `buf` using the Castagnoli polynomial.
+ #[cfg(target_arch = "x86_64")]
+ fn crc32c(&self, buf: &[u8]) -> u32 {
+ if self.sse42 {
+ // SAFETY: When sse42 is true, we are guaranteed to be running on
+ // a CPU that supports SSE 4.2.
+ unsafe { crc32c_sse(buf) }
+ } else {
+ crc32c_slice16(buf)
+ }
+ }
+}
+
+#[cfg(target_arch = "x86_64")]
+#[target_feature(enable = "sse4.2")]
+unsafe fn crc32c_sse(buf: &[u8]) -> u32 {
+ use std::arch::x86_64::*;
+
+ let mut crc = !0u32;
+ // SAFETY: This is safe since alignment is handled by align_to (oh how I
+ // love you) and since 8 adjacent u8's are guaranteed to have the same
+ // in-memory representation as u64 for all possible values.
+ let (prefix, u64s, suffix) = buf.align_to::<u64>();
+ for &b in prefix {
+ // SAFETY: Safe since we have sse4.2 enabled.
+ crc = _mm_crc32_u8(crc, b);
+ }
+ for &n in u64s {
+ // SAFETY: Safe since we have sse4.2 enabled.
+ crc = _mm_crc32_u64(crc as u64, n) as u32;
+ }
+ for &b in suffix {
+ // SAFETY: Safe since we have sse4.2 enabled.
+ crc = _mm_crc32_u8(crc, b);
+ }
+ !crc
+}
+
+/// Returns the CRC32 checksum of `buf` using the Castagnoli polynomial.
+fn crc32c_slice16(mut buf: &[u8]) -> u32 {
+ let mut crc: u32 = !0;
+ while buf.len() >= 16 {
+ crc ^= bytes::read_u32_le(buf);
+ crc = TABLE16[0][buf[15] as usize]
+ ^ TABLE16[1][buf[14] as usize]
+ ^ TABLE16[2][buf[13] as usize]
+ ^ TABLE16[3][buf[12] as usize]
+ ^ TABLE16[4][buf[11] as usize]
+ ^ TABLE16[5][buf[10] as usize]
+ ^ TABLE16[6][buf[9] as usize]
+ ^ TABLE16[7][buf[8] as usize]
+ ^ TABLE16[8][buf[7] as usize]
+ ^ TABLE16[9][buf[6] as usize]
+ ^ TABLE16[10][buf[5] as usize]
+ ^ TABLE16[11][buf[4] as usize]
+ ^ TABLE16[12][(crc >> 24) as u8 as usize]
+ ^ TABLE16[13][(crc >> 16) as u8 as usize]
+ ^ TABLE16[14][(crc >> 8) as u8 as usize]
+ ^ TABLE16[15][(crc) as u8 as usize];
+ buf = &buf[16..];
+ }
+ for &b in buf {
+ crc = TABLE[((crc as u8) ^ b) as usize] ^ (crc >> 8);
+ }
+ !crc
+}
diff --git a/vendor/snap/src/crc32_table.rs b/vendor/snap/src/crc32_table.rs
new file mode 100644
index 000000000..7821b5d86
--- /dev/null
+++ b/vendor/snap/src/crc32_table.rs
@@ -0,0 +1,2 @@
+// Generated by build.rs.
+include!(concat!(env!("OUT_DIR"), "/crc32_table.rs"));
diff --git a/vendor/snap/src/decompress.rs b/vendor/snap/src/decompress.rs
new file mode 100644
index 000000000..07ab16b4c
--- /dev/null
+++ b/vendor/snap/src/decompress.rs
@@ -0,0 +1,470 @@
+use std::ptr;
+
+use crate::bytes;
+use crate::error::{Error, Result};
+use crate::tag;
+use crate::MAX_INPUT_SIZE;
+
+/// A lookup table for quickly computing the various attributes derived from a
+/// tag byte.
+const TAG_LOOKUP_TABLE: TagLookupTable = TagLookupTable(tag::TAG_LOOKUP_TABLE);
+
+/// `WORD_MASK` is a map from the size of an integer in bytes to its
+/// corresponding on a 32 bit integer. This is used when we need to read an
+/// integer and we know there are at least 4 bytes to read from a buffer. In
+/// this case, we can read a 32 bit little endian integer and mask out only the
+/// bits we need. This in particular saves a branch.
+const WORD_MASK: [usize; 5] = [0, 0xFF, 0xFFFF, 0xFFFFFF, 0xFFFFFFFF];
+
+/// Returns the decompressed size (in bytes) of the compressed bytes given.
+///
+/// `input` must be a sequence of bytes returned by a conforming Snappy
+/// compressor.
+///
+/// # Errors
+///
+/// This function returns an error in the following circumstances:
+///
+/// * An invalid Snappy header was seen.
+/// * The total space required for decompression exceeds `2^32 - 1`.
+pub fn decompress_len(input: &[u8]) -> Result<usize> {
+ if input.is_empty() {
+ return Ok(0);
+ }
+ Ok(Header::read(input)?.decompress_len)
+}
+
+/// Decoder is a raw decoder for decompressing bytes in the Snappy format.
+///
+/// This decoder does not use the Snappy frame format and simply decompresses
+/// the given bytes as if it were returned from `Encoder`.
+///
+/// Unless you explicitly need the low-level control, you should use
+/// [`read::FrameDecoder`](../read/struct.FrameDecoder.html)
+/// instead, which decompresses the Snappy frame format.
+#[derive(Clone, Debug, Default)]
+pub struct Decoder {
+ // Place holder for potential future fields.
+ _dummy: (),
+}
+
+impl Decoder {
+ /// Return a new decoder that can be used for decompressing bytes.
+ pub fn new() -> Decoder {
+ Decoder { _dummy: () }
+ }
+
+ /// Decompresses all bytes in `input` into `output`.
+ ///
+ /// `input` must be a sequence of bytes returned by a conforming Snappy
+ /// compressor.
+ ///
+ /// The size of `output` must be large enough to hold all decompressed
+ /// bytes from the `input`. The size required can be queried with the
+ /// `decompress_len` function.
+ ///
+ /// On success, this returns the number of bytes written to `output`.
+ ///
+ /// # Errors
+ ///
+ /// This method returns an error in the following circumstances:
+ ///
+ /// * Invalid compressed Snappy data was seen.
+ /// * The total space required for decompression exceeds `2^32 - 1`.
+ /// * `output` has length less than `decompress_len(input)`.
+ pub fn decompress(
+ &mut self,
+ input: &[u8],
+ output: &mut [u8],
+ ) -> Result<usize> {
+ if input.is_empty() {
+ return Err(Error::Empty);
+ }
+ let hdr = Header::read(input)?;
+ if hdr.decompress_len > output.len() {
+ return Err(Error::BufferTooSmall {
+ given: output.len() as u64,
+ min: hdr.decompress_len as u64,
+ });
+ }
+ let dst = &mut output[..hdr.decompress_len];
+ let mut dec =
+ Decompress { src: &input[hdr.len..], s: 0, dst: dst, d: 0 };
+ dec.decompress()?;
+ Ok(dec.dst.len())
+ }
+
+ /// Decompresses all bytes in `input` into a freshly allocated `Vec`.
+ ///
+ /// This is just like the `decompress` method, except it allocates a `Vec`
+ /// with the right size for you. (This is intended to be a convenience
+ /// method.)
+ ///
+ /// This method returns an error under the same circumstances that
+ /// `decompress` does.
+ pub fn decompress_vec(&mut self, input: &[u8]) -> Result<Vec<u8>> {
+ let mut buf = vec![0; decompress_len(input)?];
+ let n = self.decompress(input, &mut buf)?;
+ buf.truncate(n);
+ Ok(buf)
+ }
+}
+
+/// Decompress is the state of the Snappy compressor.
+struct Decompress<'s, 'd> {
+ /// The original compressed bytes not including the header.
+ src: &'s [u8],
+ /// The current position in the compressed bytes.
+ s: usize,
+ /// The output buffer to write the decompressed bytes.
+ dst: &'d mut [u8],
+ /// The current position in the decompressed buffer.
+ d: usize,
+}
+
+impl<'s, 'd> Decompress<'s, 'd> {
+ /// Decompresses snappy compressed bytes in `src` to `dst`.
+ ///
+ /// This assumes that the header has already been read and that `dst` is
+ /// big enough to store all decompressed bytes.
+ fn decompress(&mut self) -> Result<()> {
+ while self.s < self.src.len() {
+ let byte = self.src[self.s];
+ self.s += 1;
+ if byte & 0b000000_11 == 0 {
+ let len = (byte >> 2) as usize + 1;
+ self.read_literal(len)?;
+ } else {
+ self.read_copy(byte)?;
+ }
+ }
+ if self.d != self.dst.len() {
+ return Err(Error::HeaderMismatch {
+ expected_len: self.dst.len() as u64,
+ got_len: self.d as u64,
+ });
+ }
+ Ok(())
+ }
+
+ /// Decompresses a literal from `src` starting at `s` to `dst` starting at
+ /// `d` and returns the updated values of `s` and `d`. `s` should point to
+ /// the byte immediately proceding the literal tag byte.
+ ///
+ /// `len` is the length of the literal if it's <=60. Otherwise, it's the
+ /// length tag, indicating the number of bytes needed to read a little
+ /// endian integer at `src[s..]`. i.e., `61 => 1 byte`, `62 => 2 bytes`,
+ /// `63 => 3 bytes` and `64 => 4 bytes`.
+ ///
+ /// `len` must be <=64.
+ #[inline(always)]
+ fn read_literal(&mut self, len: usize) -> Result<()> {
+ debug_assert!(len <= 64);
+ let mut len = len as u64;
+ // As an optimization for the common case, if the literal length is
+ // <=16 and we have enough room in both `src` and `dst`, copy the
+ // literal using unaligned loads and stores.
+ //
+ // We pick 16 bytes with the hope that it optimizes down to a 128 bit
+ // load/store.
+ if len <= 16
+ && self.s + 16 <= self.src.len()
+ && self.d + 16 <= self.dst.len()
+ {
+ unsafe {
+ // SAFETY: We know both src and dst have at least 16 bytes of
+ // wiggle room after s/d, even if `len` is <16, so the copy is
+ // safe.
+ let srcp = self.src.as_ptr().add(self.s);
+ let dstp = self.dst.as_mut_ptr().add(self.d);
+ // Hopefully uses SIMD registers for 128 bit load/store.
+ ptr::copy_nonoverlapping(srcp, dstp, 16);
+ }
+ self.d += len as usize;
+ self.s += len as usize;
+ return Ok(());
+ }
+ // When the length is bigger than 60, it indicates that we need to read
+ // an additional 1-4 bytes to get the real length of the literal.
+ if len >= 61 {
+ // If there aren't at least 4 bytes left to read then we know this
+ // is corrupt because the literal must have length >=61.
+ if self.s as u64 + 4 > self.src.len() as u64 {
+ return Err(Error::Literal {
+ len: 4,
+ src_len: (self.src.len() - self.s) as u64,
+ dst_len: (self.dst.len() - self.d) as u64,
+ });
+ }
+ // Since we know there are 4 bytes left to read, read a 32 bit LE
+ // integer and mask away the bits we don't need.
+ let byte_count = len as usize - 60;
+ len = bytes::read_u32_le(&self.src[self.s..]) as u64;
+ len = (len & (WORD_MASK[byte_count] as u64)) + 1;
+ self.s += byte_count;
+ }
+ // If there's not enough buffer left to load or store this literal,
+ // then the input is corrupt.
+ // if self.s + len > self.src.len() || self.d + len > self.dst.len() {
+ if ((self.src.len() - self.s) as u64) < len
+ || ((self.dst.len() - self.d) as u64) < len
+ {
+ return Err(Error::Literal {
+ len: len,
+ src_len: (self.src.len() - self.s) as u64,
+ dst_len: (self.dst.len() - self.d) as u64,
+ });
+ }
+ unsafe {
+ // SAFETY: We've already checked the bounds, so we know this copy
+ // is correct.
+ let srcp = self.src.as_ptr().add(self.s);
+ let dstp = self.dst.as_mut_ptr().add(self.d);
+ ptr::copy_nonoverlapping(srcp, dstp, len as usize);
+ }
+ self.s += len as usize;
+ self.d += len as usize;
+ Ok(())
+ }
+
+ /// Reads a copy from `src` and writes the decompressed bytes to `dst`. `s`
+ /// should point to the byte immediately proceding the copy tag byte.
+ #[inline(always)]
+ fn read_copy(&mut self, tag_byte: u8) -> Result<()> {
+ // Find the copy offset and len, then advance the input past the copy.
+ // The rest of this function deals with reading/writing to output only.
+ let entry = TAG_LOOKUP_TABLE.entry(tag_byte);
+ let offset = entry.offset(self.src, self.s)?;
+ let len = entry.len();
+ self.s += entry.num_tag_bytes();
+
+ // What we really care about here is whether `d == 0` or `d < offset`.
+ // To save an extra branch, use `d < offset - 1` instead. If `d` is
+ // `0`, then `offset.wrapping_sub(1)` will be usize::MAX which is also
+ // the max value of `d`.
+ if self.d <= offset.wrapping_sub(1) {
+ return Err(Error::Offset {
+ offset: offset as u64,
+ dst_pos: self.d as u64,
+ });
+ }
+ // When all is said and done, dst is advanced to end.
+ let end = self.d + len;
+ // When the copy is small and the offset is at least 8 bytes away from
+ // `d`, then we can decompress the copy with two 64 bit unaligned
+ // loads/stores.
+ if offset >= 8 && len <= 16 && self.d + 16 <= self.dst.len() {
+ unsafe {
+ // SAFETY: We know dstp points to at least 16 bytes of memory
+ // from the condition above, and we also know that dstp is
+ // preceded by at least `offset` bytes from the `d <= offset`
+ // check above.
+ //
+ // We also know that dstp and dstp-8 do not overlap from the
+ // check above, justifying the use of copy_nonoverlapping.
+ let dstp = self.dst.as_mut_ptr().add(self.d);
+ let srcp = dstp.sub(offset);
+ // We can't do a single 16 byte load/store because src/dst may
+ // overlap with each other. Namely, the second copy here may
+ // copy bytes written in the first copy!
+ ptr::copy_nonoverlapping(srcp, dstp, 8);
+ ptr::copy_nonoverlapping(srcp.add(8), dstp.add(8), 8);
+ }
+ // If we have some wiggle room, try to decompress the copy 16 bytes
+ // at a time with 128 bit unaligned loads/stores. Remember, we can't
+ // just do a memcpy because decompressing copies may require copying
+ // overlapping memory.
+ //
+ // We need the extra wiggle room to make effective use of 128 bit
+ // loads/stores. Even if the store ends up copying more data than we
+ // need, we're careful to advance `d` by the correct amount at the end.
+ } else if end + 24 <= self.dst.len() {
+ unsafe {
+ // SAFETY: We know that dstp is preceded by at least `offset`
+ // bytes from the `d <= offset` check above.
+ //
+ // We don't know whether dstp overlaps with srcp, so we start
+ // by copying from srcp to dstp until they no longer overlap.
+ // The worst case is when dstp-src = 3 and copy length = 1. The
+ // first loop will issue these copy operations before stopping:
+ //
+ // [-1, 14] -> [0, 15]
+ // [-1, 14] -> [3, 18]
+ // [-1, 14] -> [9, 24]
+ //
+ // But the copy had length 1, so it was only supposed to write
+ // to [0, 0]. But the last copy wrote to [9, 24], which is 24
+ // extra bytes in dst *beyond* the end of the copy, which is
+ // guaranteed by the conditional above.
+ let mut dstp = self.dst.as_mut_ptr().add(self.d);
+ let mut srcp = dstp.sub(offset);
+ loop {
+ debug_assert!(dstp >= srcp);
+ let diff = (dstp as usize) - (srcp as usize);
+ if diff >= 16 {
+ break;
+ }
+ // srcp and dstp can overlap, so use ptr::copy.
+ debug_assert!(self.d + 16 <= self.dst.len());
+ ptr::copy(srcp, dstp, 16);
+ self.d += diff as usize;
+ dstp = dstp.add(diff);
+ }
+ while self.d < end {
+ ptr::copy_nonoverlapping(srcp, dstp, 16);
+ srcp = srcp.add(16);
+ dstp = dstp.add(16);
+ self.d += 16;
+ }
+ // At this point, `d` is likely wrong. We correct it before
+ // returning. It's correct value is `end`.
+ }
+ } else {
+ if end > self.dst.len() {
+ return Err(Error::CopyWrite {
+ len: len as u64,
+ dst_len: (self.dst.len() - self.d) as u64,
+ });
+ }
+ // Finally, the slow byte-by-byte case, which should only be used
+ // for the last few bytes of decompression.
+ while self.d != end {
+ self.dst[self.d] = self.dst[self.d - offset];
+ self.d += 1;
+ }
+ }
+ self.d = end;
+ Ok(())
+ }
+}
+
+/// Header represents the single varint that starts every Snappy compressed
+/// block.
+#[derive(Debug)]
+struct Header {
+ /// The length of the header in bytes (i.e., the varint).
+ len: usize,
+ /// The length of the original decompressed input in bytes.
+ decompress_len: usize,
+}
+
+impl Header {
+ /// Reads the varint header from the given input.
+ ///
+ /// If there was a problem reading the header then an error is returned.
+ /// If a header is returned then it is guaranteed to be valid.
+ #[inline(always)]
+ fn read(input: &[u8]) -> Result<Header> {
+ let (decompress_len, header_len) = bytes::read_varu64(input);
+ if header_len == 0 {
+ return Err(Error::Header);
+ }
+ if decompress_len > MAX_INPUT_SIZE {
+ return Err(Error::TooBig {
+ given: decompress_len as u64,
+ max: MAX_INPUT_SIZE,
+ });
+ }
+ Ok(Header { len: header_len, decompress_len: decompress_len as usize })
+ }
+}
+
+/// A lookup table for quickly computing the various attributes derived from
+/// a tag byte. The attributes are most useful for the three "copy" tags
+/// and include the length of the copy, part of the offset (for copy 1-byte
+/// only) and the total number of bytes proceding the tag byte that encode
+/// the other part of the offset (1 for copy 1, 2 for copy 2 and 4 for copy 4).
+///
+/// More specifically, the keys of the table are u8s and the values are u16s.
+/// The bits of the values are laid out as follows:
+///
+/// xxaa abbb xxcc cccc
+///
+/// Where `a` is the number of bytes, `b` are the three bits of the offset
+/// for copy 1 (the other 8 bits are in the byte proceding the tag byte; for
+/// copy 2 and copy 4, `b = 0`), and `c` is the length of the copy (max of 64).
+///
+/// We could pack this in fewer bits, but the position of the three `b` bits
+/// lines up with the most significant three bits in the total offset for copy
+/// 1, which avoids an extra shift instruction.
+///
+/// In sum, this table is useful because it reduces branches and various
+/// arithmetic operations.
+struct TagLookupTable([u16; 256]);
+
+impl TagLookupTable {
+ /// Look up the tag entry given the tag `byte`.
+ #[inline(always)]
+ fn entry(&self, byte: u8) -> TagEntry {
+ TagEntry(self.0[byte as usize] as usize)
+ }
+}
+
+/// Represents a single entry in the tag lookup table.
+///
+/// See the documentation in `TagLookupTable` for the bit layout.
+///
+/// The type is a `usize` for convenience.
+struct TagEntry(usize);
+
+impl TagEntry {
+ /// Return the total number of bytes proceding this tag byte required to
+ /// encode the offset.
+ fn num_tag_bytes(&self) -> usize {
+ self.0 >> 11
+ }
+
+ /// Return the total copy length, capped at 255.
+ fn len(&self) -> usize {
+ self.0 & 0xFF
+ }
+
+ /// Return the copy offset corresponding to this copy operation. `s` should
+ /// point to the position just after the tag byte that this entry was read
+ /// from.
+ ///
+ /// This requires reading from the compressed input since the offset is
+ /// encoded in bytes proceding the tag byte.
+ fn offset(&self, src: &[u8], s: usize) -> Result<usize> {
+ let num_tag_bytes = self.num_tag_bytes();
+ let trailer =
+ // It is critical for this case to come first, since it is the
+ // fast path. We really hope that this case gets branch
+ // predicted.
+ if s + 4 <= src.len() {
+ unsafe {
+ // SAFETY: The conditional above guarantees that
+ // src[s..s+4] is valid to read from.
+ let p = src.as_ptr().add(s);
+ // We use WORD_MASK here to mask out the bits we don't
+ // need. While we're guaranteed to read 4 valid bytes,
+ // not all of those bytes are necessarily part of the
+ // offset. This is the key optimization: we don't need to
+ // branch on num_tag_bytes.
+ bytes::loadu_u32_le(p) as usize & WORD_MASK[num_tag_bytes]
+ }
+ } else if num_tag_bytes == 1 {
+ if s >= src.len() {
+ return Err(Error::CopyRead {
+ len: 1,
+ src_len: (src.len() - s) as u64,
+ });
+ }
+ src[s] as usize
+ } else if num_tag_bytes == 2 {
+ if s + 1 >= src.len() {
+ return Err(Error::CopyRead {
+ len: 2,
+ src_len: (src.len() - s) as u64,
+ });
+ }
+ bytes::read_u16_le(&src[s..]) as usize
+ } else {
+ return Err(Error::CopyRead {
+ len: num_tag_bytes as u64,
+ src_len: (src.len() - s) as u64,
+ });
+ };
+ Ok((self.0 & 0b0000_0111_0000_0000) | trailer)
+ }
+}
diff --git a/vendor/snap/src/error.rs b/vendor/snap/src/error.rs
new file mode 100644
index 000000000..8f0b400a6
--- /dev/null
+++ b/vendor/snap/src/error.rs
@@ -0,0 +1,333 @@
+use std::fmt;
+use std::io;
+use std::result;
+
+/// A convenient type alias for `Result<T, snap::Error>`.
+pub type Result<T> = result::Result<T, Error>;
+
+/// `IntoInnerError` occurs when consuming an encoder fails.
+///
+/// Consuming the encoder causes a flush to happen. If the flush fails, then
+/// this error is returned, which contains both the original encoder and the
+/// error that occurred.
+///
+/// The type parameter `W` is the unconsumed writer.
+pub struct IntoInnerError<W> {
+ wtr: W,
+ err: io::Error,
+}
+
+impl<W> IntoInnerError<W> {
+ pub(crate) fn new(wtr: W, err: io::Error) -> IntoInnerError<W> {
+ IntoInnerError { wtr, err }
+ }
+
+ /// Returns the error which caused the call to `into_inner` to fail.
+ ///
+ /// This error was returned when attempting to flush the internal buffer.
+ pub fn error(&self) -> &io::Error {
+ &self.err
+ }
+
+ /// Returns the underlying writer which generated the error.
+ ///
+ /// The returned value can be used for error recovery, such as
+ /// re-inspecting the buffer.
+ pub fn into_inner(self) -> W {
+ self.wtr
+ }
+}
+
+impl<W: std::any::Any> std::error::Error for IntoInnerError<W> {}
+
+impl<W> fmt::Display for IntoInnerError<W> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ self.err.fmt(f)
+ }
+}
+
+impl<W> fmt::Debug for IntoInnerError<W> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ self.err.fmt(f)
+ }
+}
+
+/// Error describes all the possible errors that may occur during Snappy
+/// compression or decompression.
+///
+/// Note that it's unlikely that you'll need to care about the specific error
+/// reported since all of them indicate a corrupt Snappy data or a limitation
+/// that cannot be worked around. Therefore,
+/// `From<snap::Error> for std::io::Error` is provided so that any Snappy
+/// errors will be converted to a `std::io::Error` automatically when using
+/// `try!`.
+#[derive(Clone, Debug)]
+pub enum Error {
+ /// This error occurs when the given input is too big. This can happen
+ /// during compression or decompression.
+ TooBig {
+ /// The size of the given input.
+ given: u64,
+ /// The maximum allowed size of an input buffer.
+ max: u64,
+ },
+ /// This error occurs when the given buffer is too small to contain the
+ /// maximum possible compressed bytes or the total number of decompressed
+ /// bytes.
+ BufferTooSmall {
+ /// The size of the given output buffer.
+ given: u64,
+ /// The minimum size of the output buffer.
+ min: u64,
+ },
+ /// This error occurs when trying to decompress a zero length buffer.
+ Empty,
+ /// This error occurs when an invalid header is found during decompression.
+ Header,
+ /// This error occurs when there is a mismatch between the number of
+ /// decompressed bytes reported in the header and the number of
+ /// actual decompressed bytes. In this error case, the number of actual
+ /// decompressed bytes is always less than the number reported in the
+ /// header.
+ HeaderMismatch {
+ /// The total number of decompressed bytes expected (i.e., the header
+ /// value).
+ expected_len: u64,
+ /// The total number of actual decompressed bytes.
+ got_len: u64,
+ },
+ /// This error occurs during decompression when there was a problem
+ /// reading a literal.
+ Literal {
+ /// The expected length of the literal.
+ len: u64,
+ /// The number of remaining bytes in the compressed bytes.
+ src_len: u64,
+ /// The number of remaining slots in the decompression buffer.
+ dst_len: u64,
+ },
+ /// This error occurs during decompression when there was a problem
+ /// reading a copy.
+ CopyRead {
+ /// The expected length of the copy (as encoded in the compressed
+ /// bytes).
+ len: u64,
+ /// The number of remaining bytes in the compressed bytes.
+ src_len: u64,
+ },
+ /// This error occurs during decompression when there was a problem
+ /// writing a copy to the decompression buffer.
+ CopyWrite {
+ /// The length of the copy (i.e., the total number of bytes to be
+ /// produced by this copy in the decompression buffer).
+ len: u64,
+ /// The number of remaining bytes in the decompression buffer.
+ dst_len: u64,
+ },
+ /// This error occurs during decompression when an invalid copy offset
+ /// is found. An offset is invalid if it is zero or if it is out of bounds.
+ Offset {
+ /// The offset that was read.
+ offset: u64,
+ /// The current position in the decompression buffer. If the offset is
+ /// non-zero, then the offset must be greater than this position.
+ dst_pos: u64,
+ },
+ /// This error occurs when a stream header chunk type was expected but got
+ /// a different chunk type.
+ /// This error only occurs when reading a Snappy frame formatted stream.
+ StreamHeader {
+ /// The chunk type byte that was read.
+ byte: u8,
+ },
+ /// This error occurs when the magic stream headers bytes do not match
+ /// what is expected.
+ /// This error only occurs when reading a Snappy frame formatted stream.
+ StreamHeaderMismatch {
+ /// The bytes that were read.
+ bytes: Vec<u8>,
+ },
+ /// This error occurs when an unsupported chunk type is seen.
+ /// This error only occurs when reading a Snappy frame formatted stream.
+ UnsupportedChunkType {
+ /// The chunk type byte that was read.
+ byte: u8,
+ },
+ /// This error occurs when trying to read a chunk with an unexpected or
+ /// incorrect length when reading a Snappy frame formatted stream.
+ /// This error only occurs when reading a Snappy frame formatted stream.
+ UnsupportedChunkLength {
+ /// The length of the chunk encountered.
+ len: u64,
+ /// True when this error occured while reading the stream header.
+ header: bool,
+ },
+ /// This error occurs when a checksum validity check fails.
+ /// This error only occurs when reading a Snappy frame formatted stream.
+ Checksum {
+ /// The expected checksum read from the stream.
+ expected: u32,
+ /// The computed checksum.
+ got: u32,
+ },
+}
+
+impl From<Error> for io::Error {
+ fn from(err: Error) -> io::Error {
+ io::Error::new(io::ErrorKind::Other, err)
+ }
+}
+
+impl Eq for Error {}
+
+impl PartialEq for Error {
+ fn eq(&self, other: &Error) -> bool {
+ use self::Error::*;
+ match (self, other) {
+ (
+ &TooBig { given: given1, max: max1 },
+ &TooBig { given: given2, max: max2 },
+ ) => (given1, max1) == (given2, max2),
+ (
+ &BufferTooSmall { given: given1, min: min1 },
+ &BufferTooSmall { given: given2, min: min2 },
+ ) => (given1, min1) == (given2, min2),
+ (&Empty, &Empty) | (&Header, &Header) => true,
+ (
+ &HeaderMismatch { expected_len: elen1, got_len: glen1 },
+ &HeaderMismatch { expected_len: elen2, got_len: glen2 },
+ ) => (elen1, glen1) == (elen2, glen2),
+ (
+ &Literal { len: len1, src_len: src_len1, dst_len: dst_len1 },
+ &Literal { len: len2, src_len: src_len2, dst_len: dst_len2 },
+ ) => (len1, src_len1, dst_len1) == (len2, src_len2, dst_len2),
+ (
+ &CopyRead { len: len1, src_len: src_len1 },
+ &CopyRead { len: len2, src_len: src_len2 },
+ ) => (len1, src_len1) == (len2, src_len2),
+ (
+ &CopyWrite { len: len1, dst_len: dst_len1 },
+ &CopyWrite { len: len2, dst_len: dst_len2 },
+ ) => (len1, dst_len1) == (len2, dst_len2),
+ (
+ &Offset { offset: offset1, dst_pos: dst_pos1 },
+ &Offset { offset: offset2, dst_pos: dst_pos2 },
+ ) => (offset1, dst_pos1) == (offset2, dst_pos2),
+ (&StreamHeader { byte: byte1 }, &StreamHeader { byte: byte2 }) => {
+ byte1 == byte2
+ }
+ (
+ &StreamHeaderMismatch { bytes: ref bytes1 },
+ &StreamHeaderMismatch { bytes: ref bytes2 },
+ ) => bytes1 == bytes2,
+ (
+ &UnsupportedChunkType { byte: byte1 },
+ &UnsupportedChunkType { byte: byte2 },
+ ) => byte1 == byte2,
+ (
+ &UnsupportedChunkLength { len: len1, header: header1 },
+ &UnsupportedChunkLength { len: len2, header: header2 },
+ ) => (len1, header1) == (len2, header2),
+ (
+ &Checksum { expected: e1, got: g1 },
+ &Checksum { expected: e2, got: g2 },
+ ) => (e1, g1) == (e2, g2),
+ _ => false,
+ }
+ }
+}
+
+impl std::error::Error for Error {}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match *self {
+ Error::TooBig { given, max } => write!(
+ f,
+ "snappy: input buffer (size = {}) is larger than \
+ allowed (size = {})",
+ given, max
+ ),
+ Error::BufferTooSmall { given, min } => write!(
+ f,
+ "snappy: output buffer (size = {}) is smaller than \
+ required (size = {})",
+ given, min
+ ),
+ Error::Empty => write!(f, "snappy: corrupt input (empty)"),
+ Error::Header => {
+ write!(f, "snappy: corrupt input (invalid header)")
+ }
+ Error::HeaderMismatch { expected_len, got_len } => write!(
+ f,
+ "snappy: corrupt input (header mismatch; expected \
+ {} decompressed bytes but got {})",
+ expected_len, got_len
+ ),
+ Error::Literal { len, src_len, dst_len } => write!(
+ f,
+ "snappy: corrupt input (expected literal read of \
+ length {}; remaining src: {}; remaining dst: {})",
+ len, src_len, dst_len
+ ),
+ Error::CopyRead { len, src_len } => write!(
+ f,
+ "snappy: corrupt input (expected copy read of \
+ length {}; remaining src: {})",
+ len, src_len
+ ),
+ Error::CopyWrite { len, dst_len } => write!(
+ f,
+ "snappy: corrupt input (expected copy write of \
+ length {}; remaining dst: {})",
+ len, dst_len
+ ),
+ Error::Offset { offset, dst_pos } => write!(
+ f,
+ "snappy: corrupt input (expected valid offset but \
+ got offset {}; dst position: {})",
+ offset, dst_pos
+ ),
+ Error::StreamHeader { byte } => write!(
+ f,
+ "snappy: corrupt input (expected stream header but \
+ got unexpected chunk type byte {})",
+ byte
+ ),
+ Error::StreamHeaderMismatch { ref bytes } => write!(
+ f,
+ "snappy: corrupt input (expected sNaPpY stream \
+ header but got {})",
+ escape(&**bytes)
+ ),
+ Error::UnsupportedChunkType { byte } => write!(
+ f,
+ "snappy: corrupt input (unsupported chunk type: {})",
+ byte
+ ),
+ Error::UnsupportedChunkLength { len, header: false } => write!(
+ f,
+ "snappy: corrupt input \
+ (unsupported chunk length: {})",
+ len
+ ),
+ Error::UnsupportedChunkLength { len, header: true } => write!(
+ f,
+ "snappy: corrupt input \
+ (invalid stream header length: {})",
+ len
+ ),
+ Error::Checksum { expected, got } => write!(
+ f,
+ "snappy: corrupt input (bad checksum; \
+ expected: {}, got: {})",
+ expected, got
+ ),
+ }
+ }
+}
+
+fn escape(bytes: &[u8]) -> String {
+ use std::ascii::escape_default;
+ bytes.iter().flat_map(|&b| escape_default(b)).map(|b| b as char).collect()
+}
diff --git a/vendor/snap/src/frame.rs b/vendor/snap/src/frame.rs
new file mode 100644
index 000000000..3c56f9a88
--- /dev/null
+++ b/vendor/snap/src/frame.rs
@@ -0,0 +1,104 @@
+use crate::bytes;
+use crate::compress::{max_compress_len, Encoder};
+use crate::crc32::CheckSummer;
+use crate::error::Error;
+use crate::MAX_BLOCK_SIZE;
+
+/// The maximum chunk of compressed bytes that can be processed at one time.
+///
+/// This is computed via `max_compress_len(MAX_BLOCK_SIZE)`.
+///
+/// TODO(ag): Replace with const fn once they support nominal branching.
+pub const MAX_COMPRESS_BLOCK_SIZE: usize = 76490;
+
+/// The special magic string that starts any stream.
+///
+/// This may appear more than once in a stream in order to support easy
+/// concatenation of files compressed in the Snappy frame format.
+pub const STREAM_IDENTIFIER: &'static [u8] = b"\xFF\x06\x00\x00sNaPpY";
+
+/// The body of the special stream identifier.
+pub const STREAM_BODY: &'static [u8] = b"sNaPpY";
+
+/// The length of a snappy chunk type (1 byte), packet length (3 bytes)
+/// and CRC field (4 bytes). This is technically the chunk header _plus_
+/// the CRC present in most chunks.
+pub const CHUNK_HEADER_AND_CRC_SIZE: usize = 8;
+
+/// An enumeration describing each of the 4 main chunk types.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub enum ChunkType {
+ Stream = 0xFF,
+ Compressed = 0x00,
+ Uncompressed = 0x01,
+ Padding = 0xFE,
+}
+
+impl ChunkType {
+ /// Converts a byte to one of the four defined chunk types represented by
+ /// a single byte. If the chunk type is reserved, then it is returned as
+ /// an Err.
+ pub fn from_u8(b: u8) -> Result<ChunkType, u8> {
+ match b {
+ 0xFF => Ok(ChunkType::Stream),
+ 0x00 => Ok(ChunkType::Compressed),
+ 0x01 => Ok(ChunkType::Uncompressed),
+ 0xFE => Ok(ChunkType::Padding),
+ b => Err(b),
+ }
+ }
+}
+
+/// Compress a single frame (or decide to pass it through uncompressed). This
+/// will output a frame header in `dst_chunk_header`, and it will return a slice
+/// pointing to the data to use in the frame. The `dst_chunk_header` array must
+/// always have a size of 8 bytes.
+///
+/// If `always_use_dst` is set to false, the return value may point into either
+/// `src` (for data we couldn't compress) or into `dst` (for data we could
+/// compress). If `always_use_dst` is true, the data will always be in `dst`.
+/// This is a bit weird, but because of Rust's ownership rules, it's easiest
+/// for a single function to always be in charge of writing to `dst`.
+pub fn compress_frame<'a>(
+ enc: &mut Encoder,
+ checksummer: CheckSummer,
+ src: &'a [u8],
+ dst_chunk_header: &mut [u8],
+ dst: &'a mut [u8],
+ always_use_dst: bool,
+) -> Result<&'a [u8], Error> {
+ // This is a purely internal function, with a bunch of preconditions.
+ assert!(src.len() <= MAX_BLOCK_SIZE);
+ assert!(dst.len() >= max_compress_len(MAX_BLOCK_SIZE));
+ assert_eq!(dst_chunk_header.len(), CHUNK_HEADER_AND_CRC_SIZE);
+
+ // Build a checksum of our _uncompressed_ data.
+ let checksum = checksummer.crc32c_masked(src);
+
+ // Compress the buffer. If compression sucked, throw it out and
+ // write uncompressed bytes instead. Since our buffer is at most
+ // MAX_BLOCK_SIZE and our dst buffer has size
+ // max_compress_len(MAX_BLOCK_SIZE), we have enough space.
+ let compress_len = enc.compress(src, dst)?;
+ let (chunk_type, chunk_len) =
+ // We add 4 to the chunk_len because of the checksum.
+ if compress_len >= src.len() - (src.len() / 8) {
+ (ChunkType::Uncompressed, 4 + src.len())
+ } else {
+ (ChunkType::Compressed, 4 + compress_len)
+ };
+
+ dst_chunk_header[0] = chunk_type as u8;
+ bytes::write_u24_le(chunk_len as u32, &mut dst_chunk_header[1..]);
+ bytes::write_u32_le(checksum, &mut dst_chunk_header[4..]);
+
+ // Return the data to put in our frame.
+ if chunk_type == ChunkType::Compressed {
+ Ok(&dst[0..compress_len])
+ } else if always_use_dst {
+ dst[..src.len()].copy_from_slice(src);
+ Ok(&dst[..src.len()])
+ } else {
+ Ok(src)
+ }
+}
diff --git a/vendor/snap/src/lib.rs b/vendor/snap/src/lib.rs
new file mode 100644
index 000000000..231b04755
--- /dev/null
+++ b/vendor/snap/src/lib.rs
@@ -0,0 +1,109 @@
+/*!
+This crate provides an implementation of the
+[Snappy compression format](https://github.com/google/snappy/blob/master/format_description.txt),
+as well as the
+[framing format](https://github.com/google/snappy/blob/master/framing_format.txt).
+The goal of Snappy is to provide reasonable compression at high speed. On a
+modern CPU, Snappy can compress data at about 300 MB/sec or more and can
+decompress data at about 800 MB/sec or more.
+
+# Install
+
+To use this crate with
+[Cargo](https://doc.rust-lang.org/cargo/),
+simply add it as a dependency to your `Cargo.toml`:
+
+```ignore
+[dependencies]
+snap = "1"
+```
+
+# Overview
+
+This crate provides two ways to use Snappy. The first way is through the
+[`read::FrameDecoder`](read/struct.FrameDecoder.html)
+and
+[`write::FrameEncoder`](write/struct.FrameEncoder.html)
+types, which implement the `std::io::Read` and `std::io::Write` traits with the
+Snappy frame format. Unless you have a specific reason to the contrary, you
+should only use the Snappy frame format. Specifically, the Snappy frame format
+permits streaming compression or decompression.
+
+The second way is through the
+[`raw::Decoder`](raw/struct.Decoder.html)
+and
+[`raw::Encoder`](raw/struct.Encoder.html)
+types. These types provide lower level control to the raw Snappy format, and
+don't support a streaming interface directly. You should only use these types
+if you know you specifically need the Snappy raw format.
+
+Finally, the `Error` type in this crate provides an exhaustive list of error
+conditions that are probably useless in most circumstances. Therefore,
+`From<snap::Error> for io::Error` is implemented in this crate, which will let
+you automatically convert a Snappy error to an `std::io::Error` (when using
+`?`) with an appropriate error message to display to an end user.
+
+# Example: compress data on `stdin`
+
+This program reads data from `stdin`, compresses it and emits it to `stdout`.
+This example can be found in `examples/compress.rs`:
+
+```no_run
+use std::io;
+
+fn main() {
+ let stdin = io::stdin();
+ let stdout = io::stdout();
+
+ let mut rdr = stdin.lock();
+ // Wrap the stdout writer in a Snappy writer.
+ let mut wtr = snap::write::FrameEncoder::new(stdout.lock());
+ io::copy(&mut rdr, &mut wtr).expect("I/O operation failed");
+}
+```
+
+# Example: decompress data on `stdin`
+
+This program reads data from `stdin`, decompresses it and emits it to `stdout`.
+This example can be found in `examples/decompress.rs`:
+
+```no_run
+use std::io;
+
+fn main() {
+ let stdin = io::stdin();
+ let stdout = io::stdout();
+
+ // Wrap the stdin reader in a Snappy reader.
+ let mut rdr = snap::read::FrameDecoder::new(stdin.lock());
+ let mut wtr = stdout.lock();
+ io::copy(&mut rdr, &mut wtr).expect("I/O operation failed");
+}
+```
+*/
+
+#![deny(missing_docs)]
+
+#[cfg(test)]
+doc_comment::doctest!("../README.md");
+
+pub use crate::error::{Error, Result};
+
+/// We don't permit compressing a block bigger than what can fit in a u32.
+const MAX_INPUT_SIZE: u64 = std::u32::MAX as u64;
+
+/// The maximum number of bytes that we process at once. A block is the unit
+/// at which we scan for candidates for compression.
+const MAX_BLOCK_SIZE: usize = 1 << 16;
+
+mod bytes;
+mod compress;
+mod crc32;
+mod crc32_table;
+mod decompress;
+mod error;
+mod frame;
+pub mod raw;
+pub mod read;
+mod tag;
+pub mod write;
diff --git a/vendor/snap/src/raw.rs b/vendor/snap/src/raw.rs
new file mode 100644
index 000000000..8b0eed263
--- /dev/null
+++ b/vendor/snap/src/raw.rs
@@ -0,0 +1,14 @@
+/*!
+This module provides a raw Snappy encoder and decoder.
+
+A raw Snappy encoder/decoder can only compress/decompress a fixed amount of
+data at a time. For this reason, this module is lower level and more difficult
+to use than the higher level streaming readers and writers exposed as part of
+the [`read`](../read/index.html) and [`write`](../write/index.html) modules.
+
+Generally, one only needs to use the raw format if some other source is
+generating raw Snappy compressed data and you have no choice but to do the
+same. Otherwise, the Snappy frame format should probably always be preferred.
+*/
+pub use crate::compress::{max_compress_len, Encoder};
+pub use crate::decompress::{decompress_len, Decoder};
diff --git a/vendor/snap/src/read.rs b/vendor/snap/src/read.rs
new file mode 100644
index 000000000..a924bf91d
--- /dev/null
+++ b/vendor/snap/src/read.rs
@@ -0,0 +1,450 @@
+/*!
+This module provides two `std::io::Read` implementations:
+
+* [`read::FrameDecoder`](struct.FrameDecoder.html)
+ wraps another `std::io::Read` implemenation, and decompresses data encoded
+ using the Snappy frame format. Use this if you have a compressed data source
+ and wish to read it as uncompressed data.
+* [`read::FrameEncoder`](struct.FrameEncoder.html)
+ wraps another `std::io::Read` implemenation, and compresses data encoded
+ using the Snappy frame format. Use this if you have uncompressed data source
+ and wish to read it as compressed data.
+
+Typically, `read::FrameDecoder` is the version that you'll want.
+*/
+
+use std::cmp;
+use std::fmt;
+use std::io;
+
+use crate::bytes;
+use crate::compress::Encoder;
+use crate::crc32::CheckSummer;
+use crate::decompress::{decompress_len, Decoder};
+use crate::error::Error;
+use crate::frame::{
+ compress_frame, ChunkType, CHUNK_HEADER_AND_CRC_SIZE,
+ MAX_COMPRESS_BLOCK_SIZE, STREAM_BODY, STREAM_IDENTIFIER,
+};
+use crate::MAX_BLOCK_SIZE;
+
+/// The maximum size of a compressed block, including the header and stream
+/// identifier, that can be emitted by FrameEncoder.
+const MAX_READ_FRAME_ENCODER_BLOCK_SIZE: usize = STREAM_IDENTIFIER.len()
+ + CHUNK_HEADER_AND_CRC_SIZE
+ + MAX_COMPRESS_BLOCK_SIZE;
+
+/// A reader for decompressing a Snappy stream.
+///
+/// This `FrameDecoder` wraps any other reader that implements `std::io::Read`.
+/// Bytes read from this reader are decompressed using the
+/// [Snappy frame format](https://github.com/google/snappy/blob/master/framing_format.txt)
+/// (file extension `sz`, MIME type `application/x-snappy-framed`).
+///
+/// This reader can potentially make many small reads from the underlying
+/// stream depending on its format, therefore, passing in a buffered reader
+/// may be beneficial.
+pub struct FrameDecoder<R: io::Read> {
+ /// The underlying reader.
+ r: R,
+ /// A Snappy decoder that we reuse that does the actual block based
+ /// decompression.
+ dec: Decoder,
+ /// A CRC32 checksummer that is configured to either use the portable
+ /// fallback version or the SSE4.2 accelerated version when the right CPU
+ /// features are available.
+ checksummer: CheckSummer,
+ /// The compressed bytes buffer, taken from the underlying reader.
+ src: Vec<u8>,
+ /// The decompressed bytes buffer. Bytes are decompressed from src to dst
+ /// before being passed back to the caller.
+ dst: Vec<u8>,
+ /// Index into dst: starting point of bytes not yet given back to caller.
+ dsts: usize,
+ /// Index into dst: ending point of bytes not yet given back to caller.
+ dste: usize,
+ /// Whether we've read the special stream header or not.
+ read_stream_ident: bool,
+}
+
+impl<R: io::Read> FrameDecoder<R> {
+ /// Create a new reader for streaming Snappy decompression.
+ pub fn new(rdr: R) -> FrameDecoder<R> {
+ FrameDecoder {
+ r: rdr,
+ dec: Decoder::new(),
+ checksummer: CheckSummer::new(),
+ src: vec![0; MAX_COMPRESS_BLOCK_SIZE],
+ dst: vec![0; MAX_BLOCK_SIZE],
+ dsts: 0,
+ dste: 0,
+ read_stream_ident: false,
+ }
+ }
+
+ /// Gets a reference to the underlying reader in this decoder.
+ pub fn get_ref(&self) -> &R {
+ &self.r
+ }
+
+ /// Gets a mutable reference to the underlying reader in this decoder.
+ ///
+ /// Note that mutation of the stream may result in surprising results if
+ /// this decoder is continued to be used.
+ pub fn get_mut(&mut self) -> &mut R {
+ &mut self.r
+ }
+}
+
+impl<R: io::Read> io::Read for FrameDecoder<R> {
+ fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+ macro_rules! fail {
+ ($err:expr) => {
+ return Err(io::Error::from($err))
+ };
+ }
+ loop {
+ if self.dsts < self.dste {
+ let len = cmp::min(self.dste - self.dsts, buf.len());
+ let dste = self.dsts.checked_add(len).unwrap();
+ buf[0..len].copy_from_slice(&self.dst[self.dsts..dste]);
+ self.dsts = dste;
+ return Ok(len);
+ }
+ if !read_exact_eof(&mut self.r, &mut self.src[0..4])? {
+ return Ok(0);
+ }
+ let ty = ChunkType::from_u8(self.src[0]);
+ if !self.read_stream_ident {
+ if ty != Ok(ChunkType::Stream) {
+ fail!(Error::StreamHeader { byte: self.src[0] });
+ }
+ self.read_stream_ident = true;
+ }
+ let len64 = bytes::read_u24_le(&self.src[1..]) as u64;
+ if len64 > self.src.len() as u64 {
+ fail!(Error::UnsupportedChunkLength {
+ len: len64,
+ header: false,
+ });
+ }
+ let len = len64 as usize;
+ match ty {
+ Err(b) if 0x02 <= b && b <= 0x7F => {
+ // Spec says that chunk types 0x02-0x7F are reserved and
+ // conformant decoders must return an error.
+ fail!(Error::UnsupportedChunkType { byte: b });
+ }
+ Err(b) if 0x80 <= b && b <= 0xFD => {
+ // Spec says that chunk types 0x80-0xFD are reserved but
+ // skippable.
+ self.r.read_exact(&mut self.src[0..len])?;
+ }
+ Err(b) => {
+ // Can never happen. 0x02-0x7F and 0x80-0xFD are handled
+ // above in the error case. That leaves 0x00, 0x01, 0xFE
+ // and 0xFF, each of which correspond to one of the four
+ // defined chunk types.
+ unreachable!("BUG: unhandled chunk type: {}", b);
+ }
+ Ok(ChunkType::Padding) => {
+ // Just read and move on.
+ self.r.read_exact(&mut self.src[0..len])?;
+ }
+ Ok(ChunkType::Stream) => {
+ if len != STREAM_BODY.len() {
+ fail!(Error::UnsupportedChunkLength {
+ len: len64,
+ header: true,
+ })
+ }
+ self.r.read_exact(&mut self.src[0..len])?;
+ if &self.src[0..len] != STREAM_BODY {
+ fail!(Error::StreamHeaderMismatch {
+ bytes: self.src[0..len].to_vec(),
+ });
+ }
+ }
+ Ok(ChunkType::Uncompressed) => {
+ if len < 4 {
+ fail!(Error::UnsupportedChunkLength {
+ len: len as u64,
+ header: false,
+ });
+ }
+ let expected_sum = bytes::io_read_u32_le(&mut self.r)?;
+ let n = len - 4;
+ if n > self.dst.len() {
+ fail!(Error::UnsupportedChunkLength {
+ len: n as u64,
+ header: false,
+ });
+ }
+ self.r.read_exact(&mut self.dst[0..n])?;
+ let got_sum =
+ self.checksummer.crc32c_masked(&self.dst[0..n]);
+ if expected_sum != got_sum {
+ fail!(Error::Checksum {
+ expected: expected_sum,
+ got: got_sum,
+ });
+ }
+ self.dsts = 0;
+ self.dste = n;
+ }
+ Ok(ChunkType::Compressed) => {
+ if len < 4 {
+ fail!(Error::UnsupportedChunkLength {
+ len: len as u64,
+ header: false,
+ });
+ }
+ let expected_sum = bytes::io_read_u32_le(&mut self.r)?;
+ let sn = len - 4;
+ if sn > self.src.len() {
+ fail!(Error::UnsupportedChunkLength {
+ len: len64,
+ header: false,
+ });
+ }
+ self.r.read_exact(&mut self.src[0..sn])?;
+ let dn = decompress_len(&self.src)?;
+ if dn > self.dst.len() {
+ fail!(Error::UnsupportedChunkLength {
+ len: dn as u64,
+ header: false,
+ });
+ }
+ self.dec
+ .decompress(&self.src[0..sn], &mut self.dst[0..dn])?;
+ let got_sum =
+ self.checksummer.crc32c_masked(&self.dst[0..dn]);
+ if expected_sum != got_sum {
+ fail!(Error::Checksum {
+ expected: expected_sum,
+ got: got_sum,
+ });
+ }
+ self.dsts = 0;
+ self.dste = dn;
+ }
+ }
+ }
+ }
+}
+
+impl<R: fmt::Debug + io::Read> fmt::Debug for FrameDecoder<R> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.debug_struct("FrameDecoder")
+ .field("r", &self.r)
+ .field("dec", &self.dec)
+ .field("checksummer", &self.checksummer)
+ .field("src", &"[...]")
+ .field("dst", &"[...]")
+ .field("dsts", &self.dsts)
+ .field("dste", &self.dste)
+ .field("read_stream_ident", &self.read_stream_ident)
+ .finish()
+ }
+}
+
+/// A reader for compressing data using snappy as it is read.
+///
+/// This `FrameEncoder` wraps any other reader that implements `std::io::Read`.
+/// Bytes read from this reader are compressed using the
+/// [Snappy frame format](https://github.com/google/snappy/blob/master/framing_format.txt)
+/// (file extension `sz`, MIME type `application/x-snappy-framed`).
+///
+/// Usually you'll want
+/// [`read::FrameDecoder`](struct.FrameDecoder.html)
+/// (for decompressing while reading) or
+/// [`write::FrameEncoder`](../write/struct.FrameEncoder.html)
+/// (for compressing while writing) instead.
+///
+/// Unlike `FrameDecoder`, this will attempt to make large reads roughly
+/// equivalent to the size of a single Snappy block. Therefore, callers may not
+/// benefit from using a buffered reader.
+pub struct FrameEncoder<R: io::Read> {
+ /// Internally, we split `FrameEncoder` in two to keep the borrow checker
+ /// happy. The `inner` member contains everything that `read_frame` needs
+ /// to fetch a frame's worth of data and compress it.
+ inner: Inner<R>,
+ /// Data that we've encoded and are ready to return to our caller.
+ dst: Vec<u8>,
+ /// Starting point of bytes in `dst` not yet given back to the caller.
+ dsts: usize,
+ /// Ending point of bytes in `dst` that we want to give to our caller.
+ dste: usize,
+}
+
+struct Inner<R: io::Read> {
+ /// The underlying data source.
+ r: R,
+ /// An encoder that we reuse that does the actual block based compression.
+ enc: Encoder,
+ /// A CRC32 checksummer that is configured to either use the portable
+ /// fallback version or the SSE4.2 accelerated version when the right CPU
+ /// features are available.
+ checksummer: CheckSummer,
+ /// Data taken from the underlying `r`, and not yet compressed.
+ src: Vec<u8>,
+ /// Have we written the standard snappy header to `dst` yet?
+ wrote_stream_ident: bool,
+}
+
+impl<R: io::Read> FrameEncoder<R> {
+ /// Create a new reader for streaming Snappy compression.
+ pub fn new(rdr: R) -> FrameEncoder<R> {
+ FrameEncoder {
+ inner: Inner {
+ r: rdr,
+ enc: Encoder::new(),
+ checksummer: CheckSummer::new(),
+ src: vec![0; MAX_BLOCK_SIZE],
+ wrote_stream_ident: false,
+ },
+ dst: vec![0; MAX_READ_FRAME_ENCODER_BLOCK_SIZE],
+ dsts: 0,
+ dste: 0,
+ }
+ }
+
+ /// Gets a reference to the underlying reader in this decoder.
+ pub fn get_ref(&self) -> &R {
+ &self.inner.r
+ }
+
+ /// Gets a mutable reference to the underlying reader in this decoder.
+ ///
+ /// Note that mutation of the stream may result in surprising results if
+ /// this encoder is continued to be used.
+ pub fn get_mut(&mut self) -> &mut R {
+ &mut self.inner.r
+ }
+
+ /// Read previously compressed data from `self.dst`, returning the number of
+ /// bytes read. If `self.dst` is empty, returns 0.
+ fn read_from_dst(&mut self, buf: &mut [u8]) -> usize {
+ let available_bytes = self.dste - self.dsts;
+ let count = cmp::min(available_bytes, buf.len());
+ buf[..count].copy_from_slice(&self.dst[self.dsts..self.dsts + count]);
+ self.dsts += count;
+ count
+ }
+}
+
+impl<R: io::Read> io::Read for FrameEncoder<R> {
+ fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+ // Try reading previously compressed bytes from our `dst` buffer, if
+ // any.
+ let count = self.read_from_dst(buf);
+
+ if count > 0 {
+ // We had some bytes in our `dst` buffer that we used.
+ Ok(count)
+ } else if buf.len() >= MAX_READ_FRAME_ENCODER_BLOCK_SIZE {
+ // Our output `buf` is big enough that we can directly write into
+ // it, so bypass `dst` entirely.
+ self.inner.read_frame(buf)
+ } else {
+ // We need to refill `self.dst`, and then return some bytes from
+ // that.
+ let count = self.inner.read_frame(&mut self.dst)?;
+ self.dsts = 0;
+ self.dste = count;
+ Ok(self.read_from_dst(buf))
+ }
+ }
+}
+
+impl<R: io::Read> Inner<R> {
+ /// Read from `self.r`, and create a new frame, writing it to `dst`, which
+ /// must be at least `MAX_READ_FRAME_ENCODER_BLOCK_SIZE` bytes in size.
+ fn read_frame(&mut self, dst: &mut [u8]) -> io::Result<usize> {
+ debug_assert!(dst.len() >= MAX_READ_FRAME_ENCODER_BLOCK_SIZE);
+
+ // We make one read to the underlying reader. If the underlying reader
+ // doesn't fill the buffer but there are still bytes to be read, then
+ // compression won't be optimal. The alternative would be to block
+ // until our buffer is maximally full (or we see EOF), but this seems
+ // more surprising. In general, io::Read implementations should try to
+ // fill the caller's buffer as much as they can, so this seems like the
+ // better choice.
+ let nread = self.r.read(&mut self.src)?;
+ if nread == 0 {
+ return Ok(0);
+ }
+
+ // If we haven't yet written the stream header to `dst`, write it.
+ let mut dst_write_start = 0;
+ if !self.wrote_stream_ident {
+ dst[0..STREAM_IDENTIFIER.len()].copy_from_slice(STREAM_IDENTIFIER);
+ dst_write_start += STREAM_IDENTIFIER.len();
+ self.wrote_stream_ident = true;
+ }
+
+ // Reserve space for our chunk header. We need to use `split_at_mut` so
+ // that we can get two mutable slices pointing at non-overlapping parts
+ // of `dst`.
+ let (chunk_header, remaining_dst) =
+ dst[dst_write_start..].split_at_mut(CHUNK_HEADER_AND_CRC_SIZE);
+ dst_write_start += CHUNK_HEADER_AND_CRC_SIZE;
+
+ // Compress our frame if possible, telling `compress_frame` to always
+ // put the output in `dst`.
+ let frame_data = compress_frame(
+ &mut self.enc,
+ self.checksummer,
+ &self.src[..nread],
+ chunk_header,
+ remaining_dst,
+ true,
+ )?;
+ Ok(dst_write_start + frame_data.len())
+ }
+}
+
+impl<R: fmt::Debug + io::Read> fmt::Debug for FrameEncoder<R> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.debug_struct("FrameEncoder")
+ .field("inner", &self.inner)
+ .field("dst", &"[...]")
+ .field("dsts", &self.dsts)
+ .field("dste", &self.dste)
+ .finish()
+ }
+}
+
+impl<R: fmt::Debug + io::Read> fmt::Debug for Inner<R> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.debug_struct("Inner")
+ .field("r", &self.r)
+ .field("enc", &self.enc)
+ .field("checksummer", &self.checksummer)
+ .field("src", &"[...]")
+ .field("wrote_stream_ident", &self.wrote_stream_ident)
+ .finish()
+ }
+}
+
+// read_exact_eof is like Read::read_exact, except it detects EOF
+// and returns Ok(false) instead of an error.
+//
+// If buf was read successfully, it returns Ok(true).
+fn read_exact_eof<R: io::Read>(
+ rdr: &mut R,
+ buf: &mut [u8],
+) -> io::Result<bool> {
+ match rdr.read(buf) {
+ // EOF
+ Ok(0) => Ok(false),
+ // Read everything w/ the read call
+ Ok(i) if i == buf.len() => Ok(true),
+ // There's some bytes left to fill, which can be deferred to read_exact
+ Ok(i) => {
+ rdr.read_exact(&mut buf[i..])?;
+ Ok(true)
+ }
+ Err(e) => Err(e),
+ }
+}
diff --git a/vendor/snap/src/tag.rs b/vendor/snap/src/tag.rs
new file mode 100644
index 000000000..6344a13fc
--- /dev/null
+++ b/vendor/snap/src/tag.rs
@@ -0,0 +1,2 @@
+// Generated by build.rs.
+include!(concat!(env!("OUT_DIR"), "/tag.rs"));
diff --git a/vendor/snap/src/varint.rs b/vendor/snap/src/varint.rs
new file mode 100644
index 000000000..d4a11384f
--- /dev/null
+++ b/vendor/snap/src/varint.rs
@@ -0,0 +1,31 @@
+/// https://developers.google.com/protocol-buffers/docs/encoding#varints
+pub fn write_varu64(data: &mut [u8], mut n: u64) -> usize {
+ let mut i = 0;
+ while n >= 0b1000_0000 {
+ data[i] = (n as u8) | 0b1000_0000;
+ n >>= 7;
+ i += 1;
+ }
+ data[i] = n as u8;
+ i + 1
+}
+
+/// https://developers.google.com/protocol-buffers/docs/encoding#varints
+pub fn read_varu64(data: &[u8]) -> (u64, usize) {
+ let mut n: u64 = 0;
+ let mut shift: u32 = 0;
+ for (i, &b) in data.iter().enumerate() {
+ if b < 0b1000_0000 {
+ return match (b as u64).checked_shl(shift) {
+ None => (0, 0),
+ Some(b) => (n | b, i + 1),
+ };
+ }
+ match ((b as u64) & 0b0111_1111).checked_shl(shift) {
+ None => return (0, 0),
+ Some(b) => n |= b,
+ }
+ shift += 7;
+ }
+ (0, 0)
+}
diff --git a/vendor/snap/src/write.rs b/vendor/snap/src/write.rs
new file mode 100644
index 000000000..7975bd18e
--- /dev/null
+++ b/vendor/snap/src/write.rs
@@ -0,0 +1,215 @@
+/*!
+This module provides a `std::io::Write` implementation:
+
+- `write::FrameEncoder` wraps another `std::io::Write` implemenation, and
+ compresses data encoded using the Snappy frame format. Use this if you have
+ uncompressed data source and wish to write it as compressed data.
+
+It would also be possible to provide a `write::FrameDecoder`, which decompresses
+data as it writes it, but it hasn't been implemented yet.
+*/
+
+use std::fmt;
+use std::io::{self, Write};
+
+use crate::compress::Encoder;
+use crate::crc32::CheckSummer;
+pub use crate::error::IntoInnerError;
+use crate::frame::{
+ compress_frame, CHUNK_HEADER_AND_CRC_SIZE, MAX_COMPRESS_BLOCK_SIZE,
+ STREAM_IDENTIFIER,
+};
+use crate::MAX_BLOCK_SIZE;
+
+/// A writer for compressing a Snappy stream.
+///
+/// This `FrameEncoder` wraps any other writer that implements `io::Write`.
+/// Bytes written to this writer are compressed using the [Snappy frame
+/// format](https://github.com/google/snappy/blob/master/framing_format.txt)
+/// (file extension `sz`, MIME type `application/x-snappy-framed`).
+///
+/// Writes are buffered automatically, so there's no need to wrap the given
+/// writer in a `std::io::BufWriter`.
+///
+/// The writer will be flushed automatically when it is dropped. If an error
+/// occurs, it is ignored.
+pub struct FrameEncoder<W: io::Write> {
+ /// Our main internal state, split out for borrowck reasons (happily paid).
+ ///
+ /// Also, it's an `Option` so we can move out of it even though
+ /// `FrameEncoder` impls `Drop`.
+ inner: Option<Inner<W>>,
+ /// Our buffer of uncompressed bytes. This isn't part of `inner` because
+ /// we may write bytes directly from the caller if the given buffer was
+ /// big enough. As a result, the main `write` implementation needs to
+ /// accept either the internal buffer or the caller's bytes directly. Since
+ /// `write` requires a mutable borrow, we satisfy the borrow checker by
+ /// separating `src` from the rest of the state.
+ src: Vec<u8>,
+}
+
+struct Inner<W> {
+ /// The underlying writer.
+ w: W,
+ /// An encoder that we reuse that does the actual block based compression.
+ enc: Encoder,
+ /// A CRC32 checksummer that is configured to either use the portable
+ /// fallback version or the SSE4.2 accelerated version when the right CPU
+ /// features are available.
+ checksummer: CheckSummer,
+ /// The compressed bytes buffer. Bytes are compressed from src (usually)
+ /// to dst before being written to w.
+ dst: Vec<u8>,
+ /// When false, the stream identifier (with magic bytes) must precede the
+ /// next write.
+ wrote_stream_ident: bool,
+ /// Space for writing the header of a chunk before writing it to the
+ /// underlying writer.
+ chunk_header: [u8; 8],
+}
+
+impl<W: io::Write> FrameEncoder<W> {
+ /// Create a new writer for streaming Snappy compression.
+ pub fn new(wtr: W) -> FrameEncoder<W> {
+ FrameEncoder {
+ inner: Some(Inner {
+ w: wtr,
+ enc: Encoder::new(),
+ checksummer: CheckSummer::new(),
+ dst: vec![0; MAX_COMPRESS_BLOCK_SIZE],
+ wrote_stream_ident: false,
+ chunk_header: [0; CHUNK_HEADER_AND_CRC_SIZE],
+ }),
+ src: Vec::with_capacity(MAX_BLOCK_SIZE),
+ }
+ }
+
+ /// Returns the underlying stream, consuming and flushing this writer.
+ ///
+ /// If flushing the writer caused an error, then an `IntoInnerError` is
+ /// returned, which contains both the writer and the original writer.
+ pub fn into_inner(mut self) -> Result<W, IntoInnerError<FrameEncoder<W>>> {
+ match self.flush() {
+ Ok(()) => Ok(self.inner.take().unwrap().w),
+ Err(err) => Err(IntoInnerError::new(self, err)),
+ }
+ }
+
+ /// Gets a reference to the underlying writer in this encoder.
+ pub fn get_ref(&self) -> &W {
+ &self.inner.as_ref().unwrap().w
+ }
+
+ /// Gets a reference to the underlying writer in this encoder.
+ ///
+ /// Note that mutating the output/input state of the stream may corrupt
+ /// this encoder, so care must be taken when using this method.
+ pub fn get_mut(&mut self) -> &mut W {
+ &mut self.inner.as_mut().unwrap().w
+ }
+}
+
+impl<W: io::Write> Drop for FrameEncoder<W> {
+ fn drop(&mut self) {
+ if self.inner.is_some() {
+ // Ignore errors because we can't conceivably return an error and
+ // panicing in a dtor is bad juju.
+ let _ = self.flush();
+ }
+ }
+}
+
+impl<W: io::Write> io::Write for FrameEncoder<W> {
+ fn write(&mut self, mut buf: &[u8]) -> io::Result<usize> {
+ let mut total = 0;
+ // If there isn't enough room to add buf to src, then add only a piece
+ // of it, flush it and mush on.
+ loop {
+ let free = self.src.capacity() - self.src.len();
+ // n is the number of bytes extracted from buf.
+ let n = if buf.len() <= free {
+ break;
+ } else if self.src.is_empty() {
+ // If buf is bigger than our entire buffer then avoid
+ // the indirection and write the buffer directly.
+ self.inner.as_mut().unwrap().write(buf)?
+ } else {
+ self.src.extend_from_slice(&buf[0..free]);
+ self.flush()?;
+ free
+ };
+ buf = &buf[n..];
+ total += n;
+ }
+ // We're only here if buf.len() will fit within the available space of
+ // self.src.
+ debug_assert!(buf.len() <= (self.src.capacity() - self.src.len()));
+ self.src.extend_from_slice(buf);
+ total += buf.len();
+ // We should never expand or contract self.src.
+ debug_assert!(self.src.capacity() == MAX_BLOCK_SIZE);
+ Ok(total)
+ }
+
+ fn flush(&mut self) -> io::Result<()> {
+ if self.src.is_empty() {
+ return Ok(());
+ }
+ self.inner.as_mut().unwrap().write(&self.src)?;
+ self.src.truncate(0);
+ Ok(())
+ }
+}
+
+impl<W: io::Write> Inner<W> {
+ fn write(&mut self, mut buf: &[u8]) -> io::Result<usize> {
+ let mut total = 0;
+ if !self.wrote_stream_ident {
+ self.wrote_stream_ident = true;
+ self.w.write_all(STREAM_IDENTIFIER)?;
+ }
+ while !buf.is_empty() {
+ // Advance buf and get our block.
+ let mut src = buf;
+ if src.len() > MAX_BLOCK_SIZE {
+ src = &src[0..MAX_BLOCK_SIZE];
+ }
+ buf = &buf[src.len()..];
+
+ let frame_data = compress_frame(
+ &mut self.enc,
+ self.checksummer,
+ src,
+ &mut self.chunk_header,
+ &mut self.dst,
+ false,
+ )?;
+ self.w.write_all(&self.chunk_header)?;
+ self.w.write_all(frame_data)?;
+ total += src.len();
+ }
+ Ok(total)
+ }
+}
+
+impl<W: fmt::Debug + io::Write> fmt::Debug for FrameEncoder<W> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.debug_struct("FrameEncoder")
+ .field("inner", &self.inner)
+ .field("src", &"[...]")
+ .finish()
+ }
+}
+
+impl<W: fmt::Debug + io::Write> fmt::Debug for Inner<W> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.debug_struct("Inner")
+ .field("w", &self.w)
+ .field("enc", &self.enc)
+ .field("checksummer", &self.checksummer)
+ .field("dst", &"[...]")
+ .field("wrote_stream_ident", &self.wrote_stream_ident)
+ .field("chunk_header", &self.chunk_header)
+ .finish()
+ }
+}