summaryrefslogtreecommitdiffstats
path: root/other-licenses/snappy/src/snappy_unittest.cc
diff options
context:
space:
mode:
Diffstat (limited to 'other-licenses/snappy/src/snappy_unittest.cc')
-rw-r--r--other-licenses/snappy/src/snappy_unittest.cc966
1 files changed, 966 insertions, 0 deletions
diff --git a/other-licenses/snappy/src/snappy_unittest.cc b/other-licenses/snappy/src/snappy_unittest.cc
new file mode 100644
index 0000000000..7a85635d73
--- /dev/null
+++ b/other-licenses/snappy/src/snappy_unittest.cc
@@ -0,0 +1,966 @@
+// Copyright 2005 and onwards Google Inc.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <algorithm>
+#include <cmath>
+#include <cstdlib>
+#include <random>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "snappy-test.h"
+
+#include "gtest/gtest.h"
+
+#include "snappy-internal.h"
+#include "snappy-sinksource.h"
+#include "snappy.h"
+#include "snappy_test_data.h"
+
+SNAPPY_FLAG(bool, snappy_dump_decompression_table, false,
+ "If true, we print the decompression table during tests.");
+
+namespace snappy {
+
+namespace {
+
+#if defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
+
+// To test against code that reads beyond its input, this class copies a
+// string to a newly allocated group of pages, the last of which
+// is made unreadable via mprotect. Note that we need to allocate the
+// memory with mmap(), as POSIX allows mprotect() only on memory allocated
+// with mmap(), and some malloc/posix_memalign implementations expect to
+// be able to read previously allocated memory while doing heap allocations.
+class DataEndingAtUnreadablePage {
+ public:
+ explicit DataEndingAtUnreadablePage(const std::string& s) {
+ const size_t page_size = sysconf(_SC_PAGESIZE);
+ const size_t size = s.size();
+ // Round up space for string to a multiple of page_size.
+ size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
+ alloc_size_ = space_for_string + page_size;
+ mem_ = mmap(NULL, alloc_size_,
+ PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ CHECK_NE(MAP_FAILED, mem_);
+ protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
+ char* dst = protected_page_ - size;
+ std::memcpy(dst, s.data(), size);
+ data_ = dst;
+ size_ = size;
+ // Make guard page unreadable.
+ CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE));
+ }
+
+ ~DataEndingAtUnreadablePage() {
+ const size_t page_size = sysconf(_SC_PAGESIZE);
+ // Undo the mprotect.
+ CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE));
+ CHECK_EQ(0, munmap(mem_, alloc_size_));
+ }
+
+ const char* data() const { return data_; }
+ size_t size() const { return size_; }
+
+ private:
+ size_t alloc_size_;
+ void* mem_;
+ char* protected_page_;
+ const char* data_;
+ size_t size_;
+};
+
+#else // defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF)
+
+// Fallback for systems without mmap.
+using DataEndingAtUnreadablePage = std::string;
+
+#endif
+
+int VerifyString(const std::string& input) {
+ std::string compressed;
+ DataEndingAtUnreadablePage i(input);
+ const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
+ CHECK_EQ(written, compressed.size());
+ CHECK_LE(compressed.size(),
+ snappy::MaxCompressedLength(input.size()));
+ CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
+
+ std::string uncompressed;
+ DataEndingAtUnreadablePage c(compressed);
+ CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
+ CHECK_EQ(uncompressed, input);
+ return uncompressed.size();
+}
+
+void VerifyStringSink(const std::string& input) {
+ std::string compressed;
+ DataEndingAtUnreadablePage i(input);
+ const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
+ CHECK_EQ(written, compressed.size());
+ CHECK_LE(compressed.size(),
+ snappy::MaxCompressedLength(input.size()));
+ CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
+
+ std::string uncompressed;
+ uncompressed.resize(input.size());
+ snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed));
+ DataEndingAtUnreadablePage c(compressed);
+ snappy::ByteArraySource source(c.data(), c.size());
+ CHECK(snappy::Uncompress(&source, &sink));
+ CHECK_EQ(uncompressed, input);
+}
+
+void VerifyIOVec(const std::string& input) {
+ std::string compressed;
+ DataEndingAtUnreadablePage i(input);
+ const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
+ CHECK_EQ(written, compressed.size());
+ CHECK_LE(compressed.size(),
+ snappy::MaxCompressedLength(input.size()));
+ CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
+
+ // Try uncompressing into an iovec containing a random number of entries
+ // ranging from 1 to 10.
+ char* buf = new char[input.size()];
+ std::minstd_rand0 rng(input.size());
+ std::uniform_int_distribution<size_t> uniform_1_to_10(1, 10);
+ size_t num = uniform_1_to_10(rng);
+ if (input.size() < num) {
+ num = input.size();
+ }
+ struct iovec* iov = new iovec[num];
+ size_t used_so_far = 0;
+ std::bernoulli_distribution one_in_five(1.0 / 5);
+ for (size_t i = 0; i < num; ++i) {
+ assert(used_so_far < input.size());
+ iov[i].iov_base = buf + used_so_far;
+ if (i == num - 1) {
+ iov[i].iov_len = input.size() - used_so_far;
+ } else {
+ // Randomly choose to insert a 0 byte entry.
+ if (one_in_five(rng)) {
+ iov[i].iov_len = 0;
+ } else {
+ std::uniform_int_distribution<size_t> uniform_not_used_so_far(
+ 0, input.size() - used_so_far - 1);
+ iov[i].iov_len = uniform_not_used_so_far(rng);
+ }
+ }
+ used_so_far += iov[i].iov_len;
+ }
+ CHECK(snappy::RawUncompressToIOVec(
+ compressed.data(), compressed.size(), iov, num));
+ CHECK(!memcmp(buf, input.data(), input.size()));
+ delete[] iov;
+ delete[] buf;
+}
+
+// Test that data compressed by a compressor that does not
+// obey block sizes is uncompressed properly.
+void VerifyNonBlockedCompression(const std::string& input) {
+ if (input.length() > snappy::kBlockSize) {
+ // We cannot test larger blocks than the maximum block size, obviously.
+ return;
+ }
+
+ std::string prefix;
+ Varint::Append32(&prefix, input.size());
+
+ // Setup compression table
+ snappy::internal::WorkingMemory wmem(input.size());
+ int table_size;
+ uint16_t* table = wmem.GetHashTable(input.size(), &table_size);
+
+ // Compress entire input in one shot
+ std::string compressed;
+ compressed += prefix;
+ compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size()));
+ char* dest = string_as_array(&compressed) + prefix.size();
+ char* end = snappy::internal::CompressFragment(input.data(), input.size(),
+ dest, table, table_size);
+ compressed.resize(end - compressed.data());
+
+ // Uncompress into std::string
+ std::string uncomp_str;
+ CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
+ CHECK_EQ(uncomp_str, input);
+
+ // Uncompress using source/sink
+ std::string uncomp_str2;
+ uncomp_str2.resize(input.size());
+ snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2));
+ snappy::ByteArraySource source(compressed.data(), compressed.size());
+ CHECK(snappy::Uncompress(&source, &sink));
+ CHECK_EQ(uncomp_str2, input);
+
+ // Uncompress into iovec
+ {
+ static const int kNumBlocks = 10;
+ struct iovec vec[kNumBlocks];
+ const int block_size = 1 + input.size() / kNumBlocks;
+ std::string iovec_data(block_size * kNumBlocks, 'x');
+ for (int i = 0; i < kNumBlocks; ++i) {
+ vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
+ vec[i].iov_len = block_size;
+ }
+ CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(),
+ vec, kNumBlocks));
+ CHECK_EQ(std::string(iovec_data.data(), input.size()), input);
+ }
+}
+
+// Expand the input so that it is at least K times as big as block size
+std::string Expand(const std::string& input) {
+ static const int K = 3;
+ std::string data = input;
+ while (data.size() < K * snappy::kBlockSize) {
+ data += input;
+ }
+ return data;
+}
+
+int Verify(const std::string& input) {
+ VLOG(1) << "Verifying input of size " << input.size();
+
+ // Compress using string based routines
+ const int result = VerifyString(input);
+
+ // Verify using sink based routines
+ VerifyStringSink(input);
+
+ VerifyNonBlockedCompression(input);
+ VerifyIOVec(input);
+ if (!input.empty()) {
+ const std::string expanded = Expand(input);
+ VerifyNonBlockedCompression(expanded);
+ VerifyIOVec(input);
+ }
+
+ return result;
+}
+
+bool IsValidCompressedBuffer(const std::string& c) {
+ return snappy::IsValidCompressedBuffer(c.data(), c.size());
+}
+bool Uncompress(const std::string& c, std::string* u) {
+ return snappy::Uncompress(c.data(), c.size(), u);
+}
+
+// This test checks to ensure that snappy doesn't coredump if it gets
+// corrupted data.
+TEST(CorruptedTest, VerifyCorrupted) {
+ std::string source = "making sure we don't crash with corrupted input";
+ VLOG(1) << source;
+ std::string dest;
+ std::string uncmp;
+ snappy::Compress(source.data(), source.size(), &dest);
+
+ // Mess around with the data. It's hard to simulate all possible
+ // corruptions; this is just one example ...
+ CHECK_GT(dest.size(), 3);
+ dest[1]--;
+ dest[3]++;
+ // this really ought to fail.
+ CHECK(!IsValidCompressedBuffer(dest));
+ CHECK(!Uncompress(dest, &uncmp));
+
+ // This is testing for a security bug - a buffer that decompresses to 100k
+ // but we lie in the snappy header and only reserve 0 bytes of memory :)
+ source.resize(100000);
+ for (char& source_char : source) {
+ source_char = 'A';
+ }
+ snappy::Compress(source.data(), source.size(), &dest);
+ dest[0] = dest[1] = dest[2] = dest[3] = 0;
+ CHECK(!IsValidCompressedBuffer(dest));
+ CHECK(!Uncompress(dest, &uncmp));
+
+ if (sizeof(void *) == 4) {
+ // Another security check; check a crazy big length can't DoS us with an
+ // over-allocation.
+ // Currently this is done only for 32-bit builds. On 64-bit builds,
+ // where 3 GB might be an acceptable allocation size, Uncompress()
+ // attempts to decompress, and sometimes causes the test to run out of
+ // memory.
+ dest[0] = dest[1] = dest[2] = dest[3] = '\xff';
+ // This decodes to a really large size, i.e., about 3 GB.
+ dest[4] = 'k';
+ CHECK(!IsValidCompressedBuffer(dest));
+ CHECK(!Uncompress(dest, &uncmp));
+ } else {
+ LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build";
+ }
+
+ // This decodes to about 2 MB; much smaller, but should still fail.
+ dest[0] = dest[1] = dest[2] = '\xff';
+ dest[3] = 0x00;
+ CHECK(!IsValidCompressedBuffer(dest));
+ CHECK(!Uncompress(dest, &uncmp));
+
+ // try reading stuff in from a bad file.
+ for (int i = 1; i <= 3; ++i) {
+ std::string data =
+ ReadTestDataFile(StrFormat("baddata%d.snappy", i).c_str(), 0);
+ std::string uncmp;
+ // check that we don't return a crazy length
+ size_t ulen;
+ CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
+ || (ulen < (1<<20)));
+ uint32_t ulen2;
+ snappy::ByteArraySource source(data.data(), data.size());
+ CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
+ (ulen2 < (1<<20)));
+ CHECK(!IsValidCompressedBuffer(data));
+ CHECK(!Uncompress(data, &uncmp));
+ }
+}
+
+// Helper routines to construct arbitrary compressed strings.
+// These mirror the compression code in snappy.cc, but are copied
+// here so that we can bypass some limitations in the how snappy.cc
+// invokes these routines.
+void AppendLiteral(std::string* dst, const std::string& literal) {
+ if (literal.empty()) return;
+ int n = literal.size() - 1;
+ if (n < 60) {
+ // Fit length in tag byte
+ dst->push_back(0 | (n << 2));
+ } else {
+ // Encode in upcoming bytes
+ char number[4];
+ int count = 0;
+ while (n > 0) {
+ number[count++] = n & 0xff;
+ n >>= 8;
+ }
+ dst->push_back(0 | ((59+count) << 2));
+ *dst += std::string(number, count);
+ }
+ *dst += literal;
+}
+
+void AppendCopy(std::string* dst, int offset, int length) {
+ while (length > 0) {
+ // Figure out how much to copy in one shot
+ int to_copy;
+ if (length >= 68) {
+ to_copy = 64;
+ } else if (length > 64) {
+ to_copy = 60;
+ } else {
+ to_copy = length;
+ }
+ length -= to_copy;
+
+ if ((to_copy >= 4) && (to_copy < 12) && (offset < 2048)) {
+ assert(to_copy-4 < 8); // Must fit in 3 bits
+ dst->push_back(1 | ((to_copy-4) << 2) | ((offset >> 8) << 5));
+ dst->push_back(offset & 0xff);
+ } else if (offset < 65536) {
+ dst->push_back(2 | ((to_copy-1) << 2));
+ dst->push_back(offset & 0xff);
+ dst->push_back(offset >> 8);
+ } else {
+ dst->push_back(3 | ((to_copy-1) << 2));
+ dst->push_back(offset & 0xff);
+ dst->push_back((offset >> 8) & 0xff);
+ dst->push_back((offset >> 16) & 0xff);
+ dst->push_back((offset >> 24) & 0xff);
+ }
+ }
+}
+
+TEST(Snappy, SimpleTests) {
+ Verify("");
+ Verify("a");
+ Verify("ab");
+ Verify("abc");
+
+ Verify("aaaaaaa" + std::string(16, 'b') + std::string("aaaaa") + "abc");
+ Verify("aaaaaaa" + std::string(256, 'b') + std::string("aaaaa") + "abc");
+ Verify("aaaaaaa" + std::string(2047, 'b') + std::string("aaaaa") + "abc");
+ Verify("aaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
+ Verify("abcaaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
+}
+
+// Regression test for cr/345340892.
+TEST(Snappy, AppendSelfPatternExtensionEdgeCases) {
+ Verify("abcabcabcabcabcabcab");
+ Verify("abcabcabcabcabcabcab0123456789ABCDEF");
+
+ Verify("abcabcabcabcabcabcabcabcabcabcabcabc");
+ Verify("abcabcabcabcabcabcabcabcabcabcabcabc0123456789ABCDEF");
+}
+
+// Regression test for cr/345340892.
+TEST(Snappy, AppendSelfPatternExtensionEdgeCasesExhaustive) {
+ std::mt19937 rng;
+ std::uniform_int_distribution<int> uniform_byte(0, 255);
+ for (int pattern_size = 1; pattern_size <= 18; ++pattern_size) {
+ for (int length = 1; length <= 64; ++length) {
+ for (int extra_bytes_after_pattern : {0, 1, 15, 16, 128}) {
+ const int size = pattern_size + length + extra_bytes_after_pattern;
+ std::string input;
+ input.resize(size);
+ for (int i = 0; i < pattern_size; ++i) {
+ input[i] = 'a' + i;
+ }
+ for (int i = 0; i < length; ++i) {
+ input[pattern_size + i] = input[i];
+ }
+ for (int i = 0; i < extra_bytes_after_pattern; ++i) {
+ input[pattern_size + length + i] =
+ static_cast<char>(uniform_byte(rng));
+ }
+ Verify(input);
+ }
+ }
+ }
+}
+
+// Verify max blowup (lots of four-byte copies)
+TEST(Snappy, MaxBlowup) {
+ std::mt19937 rng;
+ std::uniform_int_distribution<int> uniform_byte(0, 255);
+ std::string input;
+ for (int i = 0; i < 80000; ++i)
+ input.push_back(static_cast<char>(uniform_byte(rng)));
+
+ for (int i = 0; i < 80000; i += 4) {
+ std::string four_bytes(input.end() - i - 4, input.end() - i);
+ input.append(four_bytes);
+ }
+ Verify(input);
+}
+
+TEST(Snappy, RandomData) {
+ std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
+ std::uniform_int_distribution<int> uniform_0_to_3(0, 3);
+ std::uniform_int_distribution<int> uniform_0_to_8(0, 8);
+ std::uniform_int_distribution<int> uniform_byte(0, 255);
+ std::uniform_int_distribution<size_t> uniform_4k(0, 4095);
+ std::uniform_int_distribution<size_t> uniform_64k(0, 65535);
+ std::bernoulli_distribution one_in_ten(1.0 / 10);
+
+ constexpr int num_ops = 20000;
+ for (int i = 0; i < num_ops; ++i) {
+ if ((i % 1000) == 0) {
+ VLOG(0) << "Random op " << i << " of " << num_ops;
+ }
+
+ std::string x;
+ size_t len = uniform_4k(rng);
+ if (i < 100) {
+ len = 65536 + uniform_64k(rng);
+ }
+ while (x.size() < len) {
+ int run_len = 1;
+ if (one_in_ten(rng)) {
+ int skewed_bits = uniform_0_to_8(rng);
+ // int is guaranteed to hold at least 16 bits, this uses at most 8 bits.
+ std::uniform_int_distribution<int> skewed_low(0,
+ (1 << skewed_bits) - 1);
+ run_len = skewed_low(rng);
+ }
+ char c = static_cast<char>(uniform_byte(rng));
+ if (i >= 100) {
+ int skewed_bits = uniform_0_to_3(rng);
+ // int is guaranteed to hold at least 16 bits, this uses at most 3 bits.
+ std::uniform_int_distribution<int> skewed_low(0,
+ (1 << skewed_bits) - 1);
+ c = static_cast<char>(skewed_low(rng));
+ }
+ while (run_len-- > 0 && x.size() < len) {
+ x.push_back(c);
+ }
+ }
+
+ Verify(x);
+ }
+}
+
+TEST(Snappy, FourByteOffset) {
+ // The new compressor cannot generate four-byte offsets since
+ // it chops up the input into 32KB pieces. So we hand-emit the
+ // copy manually.
+
+ // The two fragments that make up the input string.
+ std::string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
+ std::string fragment2 = "some other string";
+
+ // How many times each fragment is emitted.
+ const int n1 = 2;
+ const int n2 = 100000 / fragment2.size();
+ const size_t length = n1 * fragment1.size() + n2 * fragment2.size();
+
+ std::string compressed;
+ Varint::Append32(&compressed, length);
+
+ AppendLiteral(&compressed, fragment1);
+ std::string src = fragment1;
+ for (int i = 0; i < n2; ++i) {
+ AppendLiteral(&compressed, fragment2);
+ src += fragment2;
+ }
+ AppendCopy(&compressed, src.size(), fragment1.size());
+ src += fragment1;
+ CHECK_EQ(length, src.size());
+
+ std::string uncompressed;
+ CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
+ CHECK(snappy::Uncompress(compressed.data(), compressed.size(),
+ &uncompressed));
+ CHECK_EQ(uncompressed, src);
+}
+
+TEST(Snappy, IOVecEdgeCases) {
+ // Test some tricky edge cases in the iovec output that are not necessarily
+ // exercised by random tests.
+
+ // Our output blocks look like this initially (the last iovec is bigger
+ // than depicted):
+ // [ ] [ ] [ ] [ ] [ ]
+ static const int kLengths[] = { 2, 1, 4, 8, 128 };
+
+ struct iovec iov[ARRAYSIZE(kLengths)];
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
+ iov[i].iov_base = new char[kLengths[i]];
+ iov[i].iov_len = kLengths[i];
+ }
+
+ std::string compressed;
+ Varint::Append32(&compressed, 22);
+
+ // A literal whose output crosses three blocks.
+ // [ab] [c] [123 ] [ ] [ ]
+ AppendLiteral(&compressed, "abc123");
+
+ // A copy whose output crosses two blocks (source and destination
+ // segments marked).
+ // [ab] [c] [1231] [23 ] [ ]
+ // ^--^ --
+ AppendCopy(&compressed, 3, 3);
+
+ // A copy where the input is, at first, in the block before the output:
+ //
+ // [ab] [c] [1231] [231231 ] [ ]
+ // ^--- ^---
+ // Then during the copy, the pointers move such that the input and
+ // output pointers are in the same block:
+ //
+ // [ab] [c] [1231] [23123123] [ ]
+ // ^- ^-
+ // And then they move again, so that the output pointer is no longer
+ // in the same block as the input pointer:
+ // [ab] [c] [1231] [23123123] [123 ]
+ // ^-- ^--
+ AppendCopy(&compressed, 6, 9);
+
+ // Finally, a copy where the input is from several blocks back,
+ // and it also crosses three blocks:
+ //
+ // [ab] [c] [1231] [23123123] [123b ]
+ // ^ ^
+ // [ab] [c] [1231] [23123123] [123bc ]
+ // ^ ^
+ // [ab] [c] [1231] [23123123] [123bc12 ]
+ // ^- ^-
+ AppendCopy(&compressed, 17, 4);
+
+ CHECK(snappy::RawUncompressToIOVec(
+ compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
+ CHECK_EQ(0, memcmp(iov[0].iov_base, "ab", 2));
+ CHECK_EQ(0, memcmp(iov[1].iov_base, "c", 1));
+ CHECK_EQ(0, memcmp(iov[2].iov_base, "1231", 4));
+ CHECK_EQ(0, memcmp(iov[3].iov_base, "23123123", 8));
+ CHECK_EQ(0, memcmp(iov[4].iov_base, "123bc12", 7));
+
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
+ delete[] reinterpret_cast<char *>(iov[i].iov_base);
+ }
+}
+
+TEST(Snappy, IOVecLiteralOverflow) {
+ static const int kLengths[] = { 3, 4 };
+
+ struct iovec iov[ARRAYSIZE(kLengths)];
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
+ iov[i].iov_base = new char[kLengths[i]];
+ iov[i].iov_len = kLengths[i];
+ }
+
+ std::string compressed;
+ Varint::Append32(&compressed, 8);
+
+ AppendLiteral(&compressed, "12345678");
+
+ CHECK(!snappy::RawUncompressToIOVec(
+ compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
+
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
+ delete[] reinterpret_cast<char *>(iov[i].iov_base);
+ }
+}
+
+TEST(Snappy, IOVecCopyOverflow) {
+ static const int kLengths[] = { 3, 4 };
+
+ struct iovec iov[ARRAYSIZE(kLengths)];
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
+ iov[i].iov_base = new char[kLengths[i]];
+ iov[i].iov_len = kLengths[i];
+ }
+
+ std::string compressed;
+ Varint::Append32(&compressed, 8);
+
+ AppendLiteral(&compressed, "123");
+ AppendCopy(&compressed, 3, 5);
+
+ CHECK(!snappy::RawUncompressToIOVec(
+ compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
+
+ for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
+ delete[] reinterpret_cast<char *>(iov[i].iov_base);
+ }
+}
+
+bool CheckUncompressedLength(const std::string& compressed, size_t* ulength) {
+ const bool result1 = snappy::GetUncompressedLength(compressed.data(),
+ compressed.size(),
+ ulength);
+
+ snappy::ByteArraySource source(compressed.data(), compressed.size());
+ uint32_t length;
+ const bool result2 = snappy::GetUncompressedLength(&source, &length);
+ CHECK_EQ(result1, result2);
+ return result1;
+}
+
+TEST(SnappyCorruption, TruncatedVarint) {
+ std::string compressed, uncompressed;
+ size_t ulength;
+ compressed.push_back('\xf0');
+ CHECK(!CheckUncompressedLength(compressed, &ulength));
+ CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
+ CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
+ &uncompressed));
+}
+
+TEST(SnappyCorruption, UnterminatedVarint) {
+ std::string compressed, uncompressed;
+ size_t ulength;
+ compressed.push_back('\x80');
+ compressed.push_back('\x80');
+ compressed.push_back('\x80');
+ compressed.push_back('\x80');
+ compressed.push_back('\x80');
+ compressed.push_back(10);
+ CHECK(!CheckUncompressedLength(compressed, &ulength));
+ CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
+ CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
+ &uncompressed));
+}
+
+TEST(SnappyCorruption, OverflowingVarint) {
+ std::string compressed, uncompressed;
+ size_t ulength;
+ compressed.push_back('\xfb');
+ compressed.push_back('\xff');
+ compressed.push_back('\xff');
+ compressed.push_back('\xff');
+ compressed.push_back('\x7f');
+ CHECK(!CheckUncompressedLength(compressed, &ulength));
+ CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
+ CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
+ &uncompressed));
+}
+
+TEST(Snappy, ReadPastEndOfBuffer) {
+ // Check that we do not read past end of input
+
+ // Make a compressed string that ends with a single-byte literal
+ std::string compressed;
+ Varint::Append32(&compressed, 1);
+ AppendLiteral(&compressed, "x");
+
+ std::string uncompressed;
+ DataEndingAtUnreadablePage c(compressed);
+ CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
+ CHECK_EQ(uncompressed, std::string("x"));
+}
+
+// Check for an infinite loop caused by a copy with offset==0
+TEST(Snappy, ZeroOffsetCopy) {
+ const char* compressed = "\x40\x12\x00\x00";
+ // \x40 Length (must be > kMaxIncrementCopyOverflow)
+ // \x12\x00\x00 Copy with offset==0, length==5
+ char uncompressed[100];
+ EXPECT_FALSE(snappy::RawUncompress(compressed, 4, uncompressed));
+}
+
+TEST(Snappy, ZeroOffsetCopyValidation) {
+ const char* compressed = "\x05\x12\x00\x00";
+ // \x05 Length
+ // \x12\x00\x00 Copy with offset==0, length==5
+ EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
+}
+
+int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
+ uint64_t data;
+ std::pair<size_t, bool> p =
+ snappy::internal::FindMatchLength(s1, s2, s2 + length, &data);
+ CHECK_EQ(p.first < 8, p.second);
+ return p.first;
+}
+
+TEST(Snappy, FindMatchLength) {
+ // Exercise all different code paths through the function.
+ // 64-bit version:
+
+ // Hit s1_limit in 64-bit loop, hit s1_limit in single-character loop.
+ EXPECT_EQ(6, TestFindMatchLength("012345", "012345", 6));
+ EXPECT_EQ(11, TestFindMatchLength("01234567abc", "01234567abc", 11));
+
+ // Hit s1_limit in 64-bit loop, find a non-match in single-character loop.
+ EXPECT_EQ(9, TestFindMatchLength("01234567abc", "01234567axc", 9));
+
+ // Same, but edge cases.
+ EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc!", 11));
+ EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc?", 11));
+
+ // Find non-match at once in first loop.
+ EXPECT_EQ(0, TestFindMatchLength("01234567xxxxxxxx", "?1234567xxxxxxxx", 16));
+ EXPECT_EQ(1, TestFindMatchLength("01234567xxxxxxxx", "0?234567xxxxxxxx", 16));
+ EXPECT_EQ(4, TestFindMatchLength("01234567xxxxxxxx", "01237654xxxxxxxx", 16));
+ EXPECT_EQ(7, TestFindMatchLength("01234567xxxxxxxx", "0123456?xxxxxxxx", 16));
+
+ // Find non-match in first loop after one block.
+ EXPECT_EQ(8, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
+ "abcdefgh?1234567xxxxxxxx", 24));
+ EXPECT_EQ(9, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
+ "abcdefgh0?234567xxxxxxxx", 24));
+ EXPECT_EQ(12, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
+ "abcdefgh01237654xxxxxxxx", 24));
+ EXPECT_EQ(15, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
+ "abcdefgh0123456?xxxxxxxx", 24));
+
+ // 32-bit version:
+
+ // Short matches.
+ EXPECT_EQ(0, TestFindMatchLength("01234567", "?1234567", 8));
+ EXPECT_EQ(1, TestFindMatchLength("01234567", "0?234567", 8));
+ EXPECT_EQ(2, TestFindMatchLength("01234567", "01?34567", 8));
+ EXPECT_EQ(3, TestFindMatchLength("01234567", "012?4567", 8));
+ EXPECT_EQ(4, TestFindMatchLength("01234567", "0123?567", 8));
+ EXPECT_EQ(5, TestFindMatchLength("01234567", "01234?67", 8));
+ EXPECT_EQ(6, TestFindMatchLength("01234567", "012345?7", 8));
+ EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 8));
+ EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 7));
+ EXPECT_EQ(7, TestFindMatchLength("01234567!", "0123456??", 7));
+
+ // Hit s1_limit in 32-bit loop, hit s1_limit in single-character loop.
+ EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd", "xxxxxxabcd", 10));
+ EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd?", "xxxxxxabcd?", 10));
+ EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcdef", "xxxxxxabcdef", 13));
+
+ // Same, but edge cases.
+ EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc!", 12));
+ EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc?", 12));
+
+ // Hit s1_limit in 32-bit loop, find a non-match in single-character loop.
+ EXPECT_EQ(11, TestFindMatchLength("xxxxxx0123abc", "xxxxxx0123axc", 13));
+
+ // Find non-match at once in first loop.
+ EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123xxxxxxxx",
+ "xxxxxx?123xxxxxxxx", 18));
+ EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123xxxxxxxx",
+ "xxxxxx0?23xxxxxxxx", 18));
+ EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123xxxxxxxx",
+ "xxxxxx0132xxxxxxxx", 18));
+ EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123xxxxxxxx",
+ "xxxxxx012?xxxxxxxx", 18));
+
+ // Same, but edge cases.
+ EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123", "xxxxxx?123", 10));
+ EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123", "xxxxxx0?23", 10));
+ EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123", "xxxxxx0132", 10));
+ EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123", "xxxxxx012?", 10));
+
+ // Find non-match in first loop after one block.
+ EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123xx",
+ "xxxxxxabcd?123xx", 16));
+ EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123xx",
+ "xxxxxxabcd0?23xx", 16));
+ EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123xx",
+ "xxxxxxabcd0132xx", 16));
+ EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123xx",
+ "xxxxxxabcd012?xx", 16));
+
+ // Same, but edge cases.
+ EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd?123", 14));
+ EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0?23", 14));
+ EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0132", 14));
+ EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd012?", 14));
+}
+
+TEST(Snappy, FindMatchLengthRandom) {
+ constexpr int kNumTrials = 10000;
+ constexpr int kTypicalLength = 10;
+ std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
+ std::uniform_int_distribution<int> uniform_byte(0, 255);
+ std::bernoulli_distribution one_in_two(1.0 / 2);
+ std::bernoulli_distribution one_in_typical_length(1.0 / kTypicalLength);
+
+ for (int i = 0; i < kNumTrials; ++i) {
+ std::string s, t;
+ char a = static_cast<char>(uniform_byte(rng));
+ char b = static_cast<char>(uniform_byte(rng));
+ while (!one_in_typical_length(rng)) {
+ s.push_back(one_in_two(rng) ? a : b);
+ t.push_back(one_in_two(rng) ? a : b);
+ }
+ DataEndingAtUnreadablePage u(s);
+ DataEndingAtUnreadablePage v(t);
+ size_t matched = TestFindMatchLength(u.data(), v.data(), t.size());
+ if (matched == t.size()) {
+ EXPECT_EQ(s, t);
+ } else {
+ EXPECT_NE(s[matched], t[matched]);
+ for (size_t j = 0; j < matched; ++j) {
+ EXPECT_EQ(s[j], t[j]);
+ }
+ }
+ }
+}
+
+uint16_t MakeEntry(unsigned int extra, unsigned int len,
+ unsigned int copy_offset) {
+ // Check that all of the fields fit within the allocated space
+ assert(extra == (extra & 0x7)); // At most 3 bits
+ assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
+ assert(len == (len & 0x7f)); // At most 7 bits
+ return len | (copy_offset << 8) | (extra << 11);
+}
+
+// Check that the decompression table is correct, and optionally print out
+// the computed one.
+TEST(Snappy, VerifyCharTable) {
+ using snappy::internal::LITERAL;
+ using snappy::internal::COPY_1_BYTE_OFFSET;
+ using snappy::internal::COPY_2_BYTE_OFFSET;
+ using snappy::internal::COPY_4_BYTE_OFFSET;
+ using snappy::internal::char_table;
+
+ uint16_t dst[256];
+
+ // Place invalid entries in all places to detect missing initialization
+ int assigned = 0;
+ for (int i = 0; i < 256; ++i) {
+ dst[i] = 0xffff;
+ }
+
+ // Small LITERAL entries. We store (len-1) in the top 6 bits.
+ for (uint8_t len = 1; len <= 60; ++len) {
+ dst[LITERAL | ((len - 1) << 2)] = MakeEntry(0, len, 0);
+ assigned++;
+ }
+
+ // Large LITERAL entries. We use 60..63 in the high 6 bits to
+ // encode the number of bytes of length info that follow the opcode.
+ for (uint8_t extra_bytes = 1; extra_bytes <= 4; ++extra_bytes) {
+ // We set the length field in the lookup table to 1 because extra
+ // bytes encode len-1.
+ dst[LITERAL | ((extra_bytes + 59) << 2)] = MakeEntry(extra_bytes, 1, 0);
+ assigned++;
+ }
+
+ // COPY_1_BYTE_OFFSET.
+ //
+ // The tag byte in the compressed data stores len-4 in 3 bits, and
+ // offset/256 in 5 bits. offset%256 is stored in the next byte.
+ //
+ // This format is used for length in range [4..11] and offset in
+ // range [0..2047]
+ for (uint8_t len = 4; len < 12; ++len) {
+ for (uint16_t offset = 0; offset < 2048; offset += 256) {
+ uint8_t offset_high = static_cast<uint8_t>(offset >> 8);
+ dst[COPY_1_BYTE_OFFSET | ((len - 4) << 2) | (offset_high << 5)] =
+ MakeEntry(1, len, offset_high);
+ assigned++;
+ }
+ }
+
+ // COPY_2_BYTE_OFFSET.
+ // Tag contains len-1 in top 6 bits, and offset in next two bytes.
+ for (uint8_t len = 1; len <= 64; ++len) {
+ dst[COPY_2_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(2, len, 0);
+ assigned++;
+ }
+
+ // COPY_4_BYTE_OFFSET.
+ // Tag contents len-1 in top 6 bits, and offset in next four bytes.
+ for (uint8_t len = 1; len <= 64; ++len) {
+ dst[COPY_4_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(4, len, 0);
+ assigned++;
+ }
+
+ // Check that each entry was initialized exactly once.
+ EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256";
+ for (int i = 0; i < 256; ++i) {
+ EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i;
+ }
+
+ if (snappy::GetFlag(FLAGS_snappy_dump_decompression_table)) {
+ std::printf("static const uint16_t char_table[256] = {\n ");
+ for (int i = 0; i < 256; ++i) {
+ std::printf("0x%04x%s",
+ dst[i],
+ ((i == 255) ? "\n" : (((i % 8) == 7) ? ",\n " : ", ")));
+ }
+ std::printf("};\n");
+ }
+
+ // Check that computed table matched recorded table.
+ for (int i = 0; i < 256; ++i) {
+ EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i;
+ }
+}
+
+TEST(Snappy, TestBenchmarkFiles) {
+ for (int i = 0; i < ARRAYSIZE(kTestDataFiles); ++i) {
+ Verify(ReadTestDataFile(kTestDataFiles[i].filename,
+ kTestDataFiles[i].size_limit));
+ }
+}
+
+} // namespace
+
+} // namespace snappy