From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Sun, 21 Apr 2024 13:54:28 +0200
Subject: Adding upstream version 18.2.2.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 src/rocksdb/util/aligned_buffer.h                |  234 +
 src/rocksdb/util/async_file_reader.cc            |   73 +
 src/rocksdb/util/async_file_reader.h             |  144 +
 src/rocksdb/util/autovector.h                    |  406 ++
 src/rocksdb/util/autovector_test.cc              |  331 ++
 src/rocksdb/util/bloom_impl.h                    |  489 ++
 src/rocksdb/util/bloom_test.cc                   | 1175 +++++
 src/rocksdb/util/build_version.cc.in             |   81 +
 src/rocksdb/util/cast_util.h                     |   42 +
 src/rocksdb/util/channel.h                       |   69 +
 src/rocksdb/util/cleanable.cc                    |  181 +
 src/rocksdb/util/coding.cc                       |   90 +
 src/rocksdb/util/coding.h                        |  389 ++
 src/rocksdb/util/coding_lean.h                   |  101 +
 src/rocksdb/util/coding_test.cc                  |  217 +
 src/rocksdb/util/compaction_job_stats_impl.cc    |  100 +
 src/rocksdb/util/comparator.cc                   |  391 ++
 src/rocksdb/util/compression.cc                  |  122 +
 src/rocksdb/util/compression.h                   | 1786 ++++++++
 src/rocksdb/util/compression_context_cache.cc    |  106 +
 src/rocksdb/util/compression_context_cache.h     |   47 +
 src/rocksdb/util/concurrent_task_limiter_impl.cc |   64 +
 src/rocksdb/util/concurrent_task_limiter_impl.h  |   67 +
 src/rocksdb/util/core_local.h                    |   83 +
 src/rocksdb/util/coro_utils.h                    |  112 +
 src/rocksdb/util/crc32c.cc                       | 1351 ++++++
 src/rocksdb/util/crc32c.h                        |   56 +
 src/rocksdb/util/crc32c_arm64.cc                 |  215 +
 src/rocksdb/util/crc32c_arm64.h                  |   52 +
 src/rocksdb/util/crc32c_ppc.c                    |   94 +
 src/rocksdb/util/crc32c_ppc.h                    |   22 +
 src/rocksdb/util/crc32c_ppc_asm.S                |  756 +++
 src/rocksdb/util/crc32c_ppc_constants.h          |  900 ++++
 src/rocksdb/util/crc32c_test.cc                  |  213 +
 src/rocksdb/util/defer.h                         |   82 +
 src/rocksdb/util/defer_test.cc                   |   51 +
 src/rocksdb/util/distributed_mutex.h             |   48 +
 src/rocksdb/util/duplicate_detector.h            |   71 +
 src/rocksdb/util/dynamic_bloom.cc                |   70 +
 src/rocksdb/util/dynamic_bloom.h                 |  214 +
 src/rocksdb/util/dynamic_bloom_test.cc           |  325 ++
 src/rocksdb/util/fastrange.h                     |  114 +
 src/rocksdb/util/file_checksum_helper.cc         |  172 +
 src/rocksdb/util/file_checksum_helper.h          |  100 +
 src/rocksdb/util/file_reader_writer_test.cc      | 1066 +++++
 src/rocksdb/util/filelock_test.cc                |  148 +
 src/rocksdb/util/filter_bench.cc                 |  840 ++++
 src/rocksdb/util/gflags_compat.h                 |   30 +
 src/rocksdb/util/hash.cc                         |  201 +
 src/rocksdb/util/hash.h                          |  137 +
 src/rocksdb/util/hash128.h                       |   26 +
 src/rocksdb/util/hash_containers.h               |   51 +
 src/rocksdb/util/hash_map.h                      |   67 +
 src/rocksdb/util/hash_test.cc                    |  853 ++++
 src/rocksdb/util/heap.h                          |  174 +
 src/rocksdb/util/heap_test.cc                    |  131 +
 src/rocksdb/util/kv_map.h                        |   33 +
 src/rocksdb/util/log_write_bench.cc              |   88 +
 src/rocksdb/util/math.h                          |  294 ++
 src/rocksdb/util/math128.h                       |  316 ++
 src/rocksdb/util/murmurhash.cc                   |  196 +
 src/rocksdb/util/murmurhash.h                    |   43 +
 src/rocksdb/util/mutexlock.h                     |  180 +
 src/rocksdb/util/ppc-opcode.h                    |   27 +
 src/rocksdb/util/random.cc                       |   62 +
 src/rocksdb/util/random.h                        |  190 +
 src/rocksdb/util/random_test.cc                  |  107 +
 src/rocksdb/util/rate_limiter.cc                 |  378 ++
 src/rocksdb/util/rate_limiter.h                  |  146 +
 src/rocksdb/util/rate_limiter_test.cc            |  476 ++
 src/rocksdb/util/repeatable_thread.h             |  149 +
 src/rocksdb/util/repeatable_thread_test.cc       |  111 +
 src/rocksdb/util/ribbon_alg.h                    | 1225 +++++
 src/rocksdb/util/ribbon_config.cc                |  506 ++
 src/rocksdb/util/ribbon_config.h                 |  182 +
 src/rocksdb/util/ribbon_impl.h                   | 1137 +++++
 src/rocksdb/util/ribbon_test.cc                  | 1308 ++++++
 src/rocksdb/util/set_comparator.h                |   24 +
 src/rocksdb/util/single_thread_executor.h        |   56 +
 src/rocksdb/util/slice.cc                        |  405 ++
 src/rocksdb/util/slice_test.cc                   |  191 +
 src/rocksdb/util/slice_transform_test.cc         |  154 +
 src/rocksdb/util/status.cc                       |  154 +
 src/rocksdb/util/stderr_logger.cc                |   30 +
 src/rocksdb/util/stderr_logger.h                 |   31 +
 src/rocksdb/util/stop_watch.h                    |  118 +
 src/rocksdb/util/string_util.cc                  |  504 ++
 src/rocksdb/util/string_util.h                   |  177 +
 src/rocksdb/util/thread_guard.h                  |   41 +
 src/rocksdb/util/thread_list_test.cc             |  360 ++
 src/rocksdb/util/thread_local.cc                 |  521 +++
 src/rocksdb/util/thread_local.h                  |  100 +
 src/rocksdb/util/thread_local_test.cc            |  582 +++
 src/rocksdb/util/thread_operation.h              |  112 +
 src/rocksdb/util/threadpool_imp.cc               |  551 +++
 src/rocksdb/util/threadpool_imp.h                |  120 +
 src/rocksdb/util/timer.h                         |  340 ++
 src/rocksdb/util/timer_queue.h                   |  231 +
 src/rocksdb/util/timer_queue_test.cc             |   73 +
 src/rocksdb/util/timer_test.cc                   |  402 ++
 src/rocksdb/util/user_comparator_wrapper.h       |   64 +
 src/rocksdb/util/vector_iterator.h               |  118 +
 src/rocksdb/util/work_queue.h                    |  150 +
 src/rocksdb/util/work_queue_test.cc              |  272 ++
 src/rocksdb/util/xxhash.cc                       |   48 +
 src/rocksdb/util/xxhash.h                        | 5346 ++++++++++++++++++++++
 src/rocksdb/util/xxph3.h                         | 1764 +++++++
 107 files changed, 35723 insertions(+)
 create mode 100644 src/rocksdb/util/aligned_buffer.h
 create mode 100644 src/rocksdb/util/async_file_reader.cc
 create mode 100644 src/rocksdb/util/async_file_reader.h
 create mode 100644 src/rocksdb/util/autovector.h
 create mode 100644 src/rocksdb/util/autovector_test.cc
 create mode 100644 src/rocksdb/util/bloom_impl.h
 create mode 100644 src/rocksdb/util/bloom_test.cc
 create mode 100644 src/rocksdb/util/build_version.cc.in
 create mode 100644 src/rocksdb/util/cast_util.h
 create mode 100644 src/rocksdb/util/channel.h
 create mode 100644 src/rocksdb/util/cleanable.cc
 create mode 100644 src/rocksdb/util/coding.cc
 create mode 100644 src/rocksdb/util/coding.h
 create mode 100644 src/rocksdb/util/coding_lean.h
 create mode 100644 src/rocksdb/util/coding_test.cc
 create mode 100644 src/rocksdb/util/compaction_job_stats_impl.cc
 create mode 100644 src/rocksdb/util/comparator.cc
 create mode 100644 src/rocksdb/util/compression.cc
 create mode 100644 src/rocksdb/util/compression.h
 create mode 100644 src/rocksdb/util/compression_context_cache.cc
 create mode 100644 src/rocksdb/util/compression_context_cache.h
 create mode 100644 src/rocksdb/util/concurrent_task_limiter_impl.cc
 create mode 100644 src/rocksdb/util/concurrent_task_limiter_impl.h
 create mode 100644 src/rocksdb/util/core_local.h
 create mode 100644 src/rocksdb/util/coro_utils.h
 create mode 100644 src/rocksdb/util/crc32c.cc
 create mode 100644 src/rocksdb/util/crc32c.h
 create mode 100644 src/rocksdb/util/crc32c_arm64.cc
 create mode 100644 src/rocksdb/util/crc32c_arm64.h
 create mode 100644 src/rocksdb/util/crc32c_ppc.c
 create mode 100644 src/rocksdb/util/crc32c_ppc.h
 create mode 100644 src/rocksdb/util/crc32c_ppc_asm.S
 create mode 100644 src/rocksdb/util/crc32c_ppc_constants.h
 create mode 100644 src/rocksdb/util/crc32c_test.cc
 create mode 100644 src/rocksdb/util/defer.h
 create mode 100644 src/rocksdb/util/defer_test.cc
 create mode 100644 src/rocksdb/util/distributed_mutex.h
 create mode 100644 src/rocksdb/util/duplicate_detector.h
 create mode 100644 src/rocksdb/util/dynamic_bloom.cc
 create mode 100644 src/rocksdb/util/dynamic_bloom.h
 create mode 100644 src/rocksdb/util/dynamic_bloom_test.cc
 create mode 100644 src/rocksdb/util/fastrange.h
 create mode 100644 src/rocksdb/util/file_checksum_helper.cc
 create mode 100644 src/rocksdb/util/file_checksum_helper.h
 create mode 100644 src/rocksdb/util/file_reader_writer_test.cc
 create mode 100644 src/rocksdb/util/filelock_test.cc
 create mode 100644 src/rocksdb/util/filter_bench.cc
 create mode 100644 src/rocksdb/util/gflags_compat.h
 create mode 100644 src/rocksdb/util/hash.cc
 create mode 100644 src/rocksdb/util/hash.h
 create mode 100644 src/rocksdb/util/hash128.h
 create mode 100644 src/rocksdb/util/hash_containers.h
 create mode 100644 src/rocksdb/util/hash_map.h
 create mode 100644 src/rocksdb/util/hash_test.cc
 create mode 100644 src/rocksdb/util/heap.h
 create mode 100644 src/rocksdb/util/heap_test.cc
 create mode 100644 src/rocksdb/util/kv_map.h
 create mode 100644 src/rocksdb/util/log_write_bench.cc
 create mode 100644 src/rocksdb/util/math.h
 create mode 100644 src/rocksdb/util/math128.h
 create mode 100644 src/rocksdb/util/murmurhash.cc
 create mode 100644 src/rocksdb/util/murmurhash.h
 create mode 100644 src/rocksdb/util/mutexlock.h
 create mode 100644 src/rocksdb/util/ppc-opcode.h
 create mode 100644 src/rocksdb/util/random.cc
 create mode 100644 src/rocksdb/util/random.h
 create mode 100644 src/rocksdb/util/random_test.cc
 create mode 100644 src/rocksdb/util/rate_limiter.cc
 create mode 100644 src/rocksdb/util/rate_limiter.h
 create mode 100644 src/rocksdb/util/rate_limiter_test.cc
 create mode 100644 src/rocksdb/util/repeatable_thread.h
 create mode 100644 src/rocksdb/util/repeatable_thread_test.cc
 create mode 100644 src/rocksdb/util/ribbon_alg.h
 create mode 100644 src/rocksdb/util/ribbon_config.cc
 create mode 100644 src/rocksdb/util/ribbon_config.h
 create mode 100644 src/rocksdb/util/ribbon_impl.h
 create mode 100644 src/rocksdb/util/ribbon_test.cc
 create mode 100644 src/rocksdb/util/set_comparator.h
 create mode 100644 src/rocksdb/util/single_thread_executor.h
 create mode 100644 src/rocksdb/util/slice.cc
 create mode 100644 src/rocksdb/util/slice_test.cc
 create mode 100644 src/rocksdb/util/slice_transform_test.cc
 create mode 100644 src/rocksdb/util/status.cc
 create mode 100644 src/rocksdb/util/stderr_logger.cc
 create mode 100644 src/rocksdb/util/stderr_logger.h
 create mode 100644 src/rocksdb/util/stop_watch.h
 create mode 100644 src/rocksdb/util/string_util.cc
 create mode 100644 src/rocksdb/util/string_util.h
 create mode 100644 src/rocksdb/util/thread_guard.h
 create mode 100644 src/rocksdb/util/thread_list_test.cc
 create mode 100644 src/rocksdb/util/thread_local.cc
 create mode 100644 src/rocksdb/util/thread_local.h
 create mode 100644 src/rocksdb/util/thread_local_test.cc
 create mode 100644 src/rocksdb/util/thread_operation.h
 create mode 100644 src/rocksdb/util/threadpool_imp.cc
 create mode 100644 src/rocksdb/util/threadpool_imp.h
 create mode 100644 src/rocksdb/util/timer.h
 create mode 100644 src/rocksdb/util/timer_queue.h
 create mode 100644 src/rocksdb/util/timer_queue_test.cc
 create mode 100644 src/rocksdb/util/timer_test.cc
 create mode 100644 src/rocksdb/util/user_comparator_wrapper.h
 create mode 100644 src/rocksdb/util/vector_iterator.h
 create mode 100644 src/rocksdb/util/work_queue.h
 create mode 100644 src/rocksdb/util/work_queue_test.cc
 create mode 100644 src/rocksdb/util/xxhash.cc
 create mode 100644 src/rocksdb/util/xxhash.h
 create mode 100644 src/rocksdb/util/xxph3.h

(limited to 'src/rocksdb/util')

diff --git a/src/rocksdb/util/aligned_buffer.h b/src/rocksdb/util/aligned_buffer.h
new file mode 100644
index 000000000..95ee5dfe8
--- /dev/null
+++ b/src/rocksdb/util/aligned_buffer.h
@@ -0,0 +1,234 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include <algorithm>
+
+#include "port/port.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// This file contains utilities to handle the alignment of pages and buffers.
+
+// Truncate to a multiple of page_size, which is also a page boundary. This
+// helps to figuring out the right alignment.
+// Example:
+//   TruncateToPageBoundary(4096, 5000)  => 4096
+//   TruncateToPageBoundary((4096, 10000) => 8192
+inline size_t TruncateToPageBoundary(size_t page_size, size_t s) {
+  s -= (s & (page_size - 1));
+  assert((s % page_size) == 0);
+  return s;
+}
+
+// Round up x to a multiple of y.
+// Example:
+//   Roundup(13, 5)   => 15
+//   Roundup(201, 16) => 208
+inline size_t Roundup(size_t x, size_t y) { return ((x + y - 1) / y) * y; }
+
+// Round down x to a multiple of y.
+// Example:
+//   Rounddown(13, 5)   => 10
+//   Rounddown(201, 16) => 192
+inline size_t Rounddown(size_t x, size_t y) { return (x / y) * y; }
+
+// AlignedBuffer manages a buffer by taking alignment into consideration, and
+// aligns the buffer start and end positions. It is mainly used for direct I/O,
+// though it can be used other purposes as well.
+// It also supports expanding the managed buffer, and copying whole or part of
+// the data from old buffer into the new expanded buffer. Such a copy especially
+// helps in cases avoiding an IO to re-fetch the data from disk.
+//
+// Example:
+//   AlignedBuffer buf;
+//   buf.Alignment(alignment);
+//   buf.AllocateNewBuffer(user_requested_buf_size);
+//   ...
+//   buf.AllocateNewBuffer(2*user_requested_buf_size, /*copy_data*/ true,
+//                         copy_offset, copy_len);
+class AlignedBuffer {
+  size_t alignment_;
+  std::unique_ptr<char[]> buf_;
+  size_t capacity_;
+  size_t cursize_;
+  char* bufstart_;
+
+ public:
+  AlignedBuffer()
+      : alignment_(), capacity_(0), cursize_(0), bufstart_(nullptr) {}
+
+  AlignedBuffer(AlignedBuffer&& o) noexcept { *this = std::move(o); }
+
+  AlignedBuffer& operator=(AlignedBuffer&& o) noexcept {
+    alignment_ = std::move(o.alignment_);
+    buf_ = std::move(o.buf_);
+    capacity_ = std::move(o.capacity_);
+    cursize_ = std::move(o.cursize_);
+    bufstart_ = std::move(o.bufstart_);
+    return *this;
+  }
+
+  AlignedBuffer(const AlignedBuffer&) = delete;
+
+  AlignedBuffer& operator=(const AlignedBuffer&) = delete;
+
+  static bool isAligned(const void* ptr, size_t alignment) {
+    return reinterpret_cast<uintptr_t>(ptr) % alignment == 0;
+  }
+
+  static bool isAligned(size_t n, size_t alignment) {
+    return n % alignment == 0;
+  }
+
+  size_t Alignment() const { return alignment_; }
+
+  size_t Capacity() const { return capacity_; }
+
+  size_t CurrentSize() const { return cursize_; }
+
+  const char* BufferStart() const { return bufstart_; }
+
+  char* BufferStart() { return bufstart_; }
+
+  void Clear() { cursize_ = 0; }
+
+  char* Release() {
+    cursize_ = 0;
+    capacity_ = 0;
+    bufstart_ = nullptr;
+    return buf_.release();
+  }
+
+  void Alignment(size_t alignment) {
+    assert(alignment > 0);
+    assert((alignment & (alignment - 1)) == 0);
+    alignment_ = alignment;
+  }
+
+  // Allocates a new buffer and sets the start position to the first aligned
+  // byte.
+  //
+  // requested_capacity: requested new buffer capacity. This capacity will be
+  //     rounded up based on alignment.
+  // copy_data: Copy data from old buffer to new buffer. If copy_offset and
+  //     copy_len are not passed in and the new requested capacity is bigger
+  //     than the existing buffer's capacity, the data in the exising buffer is
+  //     fully copied over to the new buffer.
+  // copy_offset: Copy data from this offset in old buffer.
+  // copy_len: Number of bytes to copy.
+  //
+  // The function does nothing if the new requested_capacity is smaller than
+  // the current buffer capacity and copy_data is true i.e. the old buffer is
+  // retained as is.
+  void AllocateNewBuffer(size_t requested_capacity, bool copy_data = false,
+                         uint64_t copy_offset = 0, size_t copy_len = 0) {
+    assert(alignment_ > 0);
+    assert((alignment_ & (alignment_ - 1)) == 0);
+
+    copy_len = copy_len > 0 ? copy_len : cursize_;
+    if (copy_data && requested_capacity < copy_len) {
+      // If we are downsizing to a capacity that is smaller than the current
+      // data in the buffer -- Ignore the request.
+      return;
+    }
+
+    size_t new_capacity = Roundup(requested_capacity, alignment_);
+    char* new_buf = new char[new_capacity + alignment_];
+    char* new_bufstart = reinterpret_cast<char*>(
+        (reinterpret_cast<uintptr_t>(new_buf) + (alignment_ - 1)) &
+        ~static_cast<uintptr_t>(alignment_ - 1));
+
+    if (copy_data) {
+      assert(bufstart_ + copy_offset + copy_len <= bufstart_ + cursize_);
+      memcpy(new_bufstart, bufstart_ + copy_offset, copy_len);
+      cursize_ = copy_len;
+    } else {
+      cursize_ = 0;
+    }
+
+    bufstart_ = new_bufstart;
+    capacity_ = new_capacity;
+    buf_.reset(new_buf);
+  }
+
+  // Append to the buffer.
+  //
+  // src         : source to copy the data from.
+  // append_size : number of bytes to copy from src.
+  // Returns the number of bytes appended.
+  //
+  // If append_size is more than the remaining buffer size only the
+  // remaining-size worth of bytes are copied.
+  size_t Append(const char* src, size_t append_size) {
+    size_t buffer_remaining = capacity_ - cursize_;
+    size_t to_copy = std::min(append_size, buffer_remaining);
+
+    if (to_copy > 0) {
+      memcpy(bufstart_ + cursize_, src, to_copy);
+      cursize_ += to_copy;
+    }
+    return to_copy;
+  }
+
+  // Read from the buffer.
+  //
+  // dest      : destination buffer to copy the data to.
+  // offset    : the buffer offset to start reading from.
+  // read_size : the number of bytes to copy from the buffer to dest.
+  // Returns the number of bytes read/copied to dest.
+  size_t Read(char* dest, size_t offset, size_t read_size) const {
+    assert(offset < cursize_);
+
+    size_t to_read = 0;
+    if (offset < cursize_) {
+      to_read = std::min(cursize_ - offset, read_size);
+    }
+    if (to_read > 0) {
+      memcpy(dest, bufstart_ + offset, to_read);
+    }
+    return to_read;
+  }
+
+  // Pad to the end of alignment with "padding"
+  void PadToAlignmentWith(int padding) {
+    size_t total_size = Roundup(cursize_, alignment_);
+    size_t pad_size = total_size - cursize_;
+
+    if (pad_size > 0) {
+      assert((pad_size + cursize_) <= capacity_);
+      memset(bufstart_ + cursize_, padding, pad_size);
+      cursize_ += pad_size;
+    }
+  }
+
+  void PadWith(size_t pad_size, int padding) {
+    assert((pad_size + cursize_) <= capacity_);
+    memset(bufstart_ + cursize_, padding, pad_size);
+    cursize_ += pad_size;
+  }
+
+  // After a partial flush move the tail to the beginning of the buffer.
+  void RefitTail(size_t tail_offset, size_t tail_size) {
+    if (tail_size > 0) {
+      memmove(bufstart_, bufstart_ + tail_offset, tail_size);
+    }
+    cursize_ = tail_size;
+  }
+
+  // Returns a place to start appending.
+  // WARNING: Note that it is possible to write past the end of the buffer if
+  // the buffer is modified without using the write APIs or encapsulation
+  // offered by AlignedBuffer. It is up to the user to guard against such
+  // errors.
+  char* Destination() { return bufstart_ + cursize_; }
+
+  void Size(size_t cursize) { cursize_ = cursize; }
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/async_file_reader.cc b/src/rocksdb/util/async_file_reader.cc
new file mode 100644
index 000000000..8401a6b44
--- /dev/null
+++ b/src/rocksdb/util/async_file_reader.cc
@@ -0,0 +1,73 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#if USE_COROUTINES
+#include "util/async_file_reader.h"
+
+namespace ROCKSDB_NAMESPACE {
+bool AsyncFileReader::MultiReadAsyncImpl(ReadAwaiter* awaiter) {
+  if (tail_) {
+    tail_->next_ = awaiter;
+  }
+  tail_ = awaiter;
+  if (!head_) {
+    head_ = awaiter;
+  }
+  num_reqs_ += awaiter->num_reqs_;
+  awaiter->io_handle_.resize(awaiter->num_reqs_);
+  awaiter->del_fn_.resize(awaiter->num_reqs_);
+  for (size_t i = 0; i < awaiter->num_reqs_; ++i) {
+    awaiter->file_
+        ->ReadAsync(
+            awaiter->read_reqs_[i], awaiter->opts_,
+            [](const FSReadRequest& req, void* cb_arg) {
+              FSReadRequest* read_req = static_cast<FSReadRequest*>(cb_arg);
+              read_req->status = req.status;
+              read_req->result = req.result;
+            },
+            &awaiter->read_reqs_[i], &awaiter->io_handle_[i],
+            &awaiter->del_fn_[i], /*aligned_buf=*/nullptr)
+        .PermitUncheckedError();
+  }
+  return true;
+}
+
+void AsyncFileReader::Wait() {
+  if (!head_) {
+    return;
+  }
+  ReadAwaiter* waiter;
+  std::vector<void*> io_handles;
+  io_handles.reserve(num_reqs_);
+  waiter = head_;
+  do {
+    for (size_t i = 0; i < waiter->num_reqs_; ++i) {
+      if (waiter->io_handle_[i]) {
+        io_handles.push_back(waiter->io_handle_[i]);
+      }
+    }
+  } while (waiter != tail_ && (waiter = waiter->next_));
+  if (io_handles.size() > 0) {
+    StopWatch sw(SystemClock::Default().get(), stats_, POLL_WAIT_MICROS);
+    fs_->Poll(io_handles, io_handles.size()).PermitUncheckedError();
+  }
+  do {
+    waiter = head_;
+    head_ = waiter->next_;
+
+    for (size_t i = 0; i < waiter->num_reqs_; ++i) {
+      if (waiter->io_handle_[i] && waiter->del_fn_[i]) {
+        waiter->del_fn_[i](waiter->io_handle_[i]);
+      }
+    }
+    waiter->awaiting_coro_.resume();
+  } while (waiter != tail_);
+  head_ = tail_ = nullptr;
+  RecordInHistogram(stats_, MULTIGET_IO_BATCH_SIZE, num_reqs_);
+  num_reqs_ = 0;
+}
+}  // namespace ROCKSDB_NAMESPACE
+#endif  // USE_COROUTINES
diff --git a/src/rocksdb/util/async_file_reader.h b/src/rocksdb/util/async_file_reader.h
new file mode 100644
index 000000000..df69a840e
--- /dev/null
+++ b/src/rocksdb/util/async_file_reader.h
@@ -0,0 +1,144 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).#pragma once
+#pragma once
+
+#if USE_COROUTINES
+#include "file/random_access_file_reader.h"
+#include "folly/experimental/coro/ViaIfAsync.h"
+#include "port/port.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/statistics.h"
+#include "util/autovector.h"
+#include "util/stop_watch.h"
+
+namespace ROCKSDB_NAMESPACE {
+class SingleThreadExecutor;
+
+// AsyncFileReader implements the Awaitable concept, which allows calling
+// coroutines to co_await it. When the AsyncFileReader Awaitable is
+// resumed, it initiates the fie reads requested by the awaiting caller
+// by calling RandomAccessFileReader's ReadAsync. It then suspends the
+// awaiting coroutine. The suspended awaiter is later resumed by Wait().
+class AsyncFileReader {
+  class ReadAwaiter;
+  template <typename Awaiter>
+  class ReadOperation;
+
+ public:
+  AsyncFileReader(FileSystem* fs, Statistics* stats) : fs_(fs), stats_(stats) {}
+
+  ~AsyncFileReader() {}
+
+  ReadOperation<ReadAwaiter> MultiReadAsync(RandomAccessFileReader* file,
+                                            const IOOptions& opts,
+                                            FSReadRequest* read_reqs,
+                                            size_t num_reqs,
+                                            AlignedBuf* aligned_buf) noexcept {
+    return ReadOperation<ReadAwaiter>{*this,     file,     opts,
+                                      read_reqs, num_reqs, aligned_buf};
+  }
+
+ private:
+  friend SingleThreadExecutor;
+
+  // Implementation of the Awaitable concept
+  class ReadAwaiter {
+   public:
+    explicit ReadAwaiter(AsyncFileReader& reader, RandomAccessFileReader* file,
+                         const IOOptions& opts, FSReadRequest* read_reqs,
+                         size_t num_reqs, AlignedBuf* /*aligned_buf*/) noexcept
+        : reader_(reader),
+          file_(file),
+          opts_(opts),
+          read_reqs_(read_reqs),
+          num_reqs_(num_reqs),
+          next_(nullptr) {}
+
+    bool await_ready() noexcept { return false; }
+
+    // A return value of true means suspend the awaiter (calling coroutine). The
+    // awaiting_coro parameter is the handle of the awaiter. The handle can be
+    // resumed later, so we cache it here.
+    bool await_suspend(
+        folly::coro::impl::coroutine_handle<> awaiting_coro) noexcept {
+      awaiting_coro_ = awaiting_coro;
+      // MultiReadAsyncImpl always returns true, so caller will be suspended
+      return reader_.MultiReadAsyncImpl(this);
+    }
+
+    void await_resume() noexcept {}
+
+   private:
+    friend AsyncFileReader;
+
+    // The parameters passed to MultiReadAsync are cached here when the caller
+    // calls MultiReadAsync. Later, when the execution of this awaitable is
+    // started, these are used to do the actual IO
+    AsyncFileReader& reader_;
+    RandomAccessFileReader* file_;
+    const IOOptions& opts_;
+    FSReadRequest* read_reqs_;
+    size_t num_reqs_;
+    autovector<void*, 32> io_handle_;
+    autovector<IOHandleDeleter, 32> del_fn_;
+    folly::coro::impl::coroutine_handle<> awaiting_coro_;
+    // Use this to link to the next ReadAwaiter in the suspended coroutine
+    // list. The head and tail of the list are tracked by AsyncFileReader.
+    // We use this approach rather than an STL container in order to avoid
+    // extra memory allocations. The coroutine call already allocates a
+    // ReadAwaiter object.
+    ReadAwaiter* next_;
+  };
+
+  // An instance of ReadOperation is returned to the caller of MultiGetAsync.
+  // This represents an awaitable that can be started later.
+  template <typename Awaiter>
+  class ReadOperation {
+   public:
+    explicit ReadOperation(AsyncFileReader& reader,
+                           RandomAccessFileReader* file, const IOOptions& opts,
+                           FSReadRequest* read_reqs, size_t num_reqs,
+                           AlignedBuf* aligned_buf) noexcept
+        : reader_(reader),
+          file_(file),
+          opts_(opts),
+          read_reqs_(read_reqs),
+          num_reqs_(num_reqs),
+          aligned_buf_(aligned_buf) {}
+
+    auto viaIfAsync(folly::Executor::KeepAlive<> executor) const {
+      return folly::coro::co_viaIfAsync(
+          std::move(executor),
+          Awaiter{reader_, file_, opts_, read_reqs_, num_reqs_, aligned_buf_});
+    }
+
+   private:
+    AsyncFileReader& reader_;
+    RandomAccessFileReader* file_;
+    const IOOptions& opts_;
+    FSReadRequest* read_reqs_;
+    size_t num_reqs_;
+    AlignedBuf* aligned_buf_;
+  };
+
+  // This function does the actual work when this awaitable starts execution
+  bool MultiReadAsyncImpl(ReadAwaiter* awaiter);
+
+  // Called by the SingleThreadExecutor to poll for async IO completion.
+  // This also resumes the awaiting coroutines.
+  void Wait();
+
+  // Head of the queue of awaiters waiting for async IO completion
+  ReadAwaiter* head_ = nullptr;
+  // Tail of the awaiter queue
+  ReadAwaiter* tail_ = nullptr;
+  // Total number of pending async IOs
+  size_t num_reqs_ = 0;
+  FileSystem* fs_;
+  Statistics* stats_;
+};
+}  // namespace ROCKSDB_NAMESPACE
+#endif  // USE_COROUTINES
diff --git a/src/rocksdb/util/autovector.h b/src/rocksdb/util/autovector.h
new file mode 100644
index 000000000..f758473b7
--- /dev/null
+++ b/src/rocksdb/util/autovector.h
@@ -0,0 +1,406 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <algorithm>
+#include <cassert>
+#include <initializer_list>
+#include <iterator>
+#include <stdexcept>
+#include <vector>
+
+#include "port/lang.h"
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+#ifdef ROCKSDB_LITE
+template <class T, size_t kSize = 8>
+class autovector : public std::vector<T> {
+  using std::vector<T>::vector;
+
+ public:
+  autovector() {
+    // Make sure the initial vector has space for kSize elements
+    std::vector<T>::reserve(kSize);
+  }
+};
+#else
+// A vector that leverages pre-allocated stack-based array to achieve better
+// performance for array with small amount of items.
+//
+// The interface resembles that of vector, but with less features since we aim
+// to solve the problem that we have in hand, rather than implementing a
+// full-fledged generic container.
+//
+// Currently we don't support:
+//  * shrink_to_fit()
+//     If used correctly, in most cases, people should not touch the
+//     underlying vector at all.
+//  * random insert()/erase(), please only use push_back()/pop_back().
+//  * No move/swap operations. Each autovector instance has a
+//     stack-allocated array and if we want support move/swap operations, we
+//     need to copy the arrays other than just swapping the pointers. In this
+//     case we'll just explicitly forbid these operations since they may
+//     lead users to make false assumption by thinking they are inexpensive
+//     operations.
+//
+// Naming style of public methods almost follows that of the STL's.
+template <class T, size_t kSize = 8>
+class autovector {
+ public:
+  // General STL-style container member types.
+  using value_type = T;
+  using difference_type = typename std::vector<T>::difference_type;
+  using size_type = typename std::vector<T>::size_type;
+  using reference = value_type&;
+  using const_reference = const value_type&;
+  using pointer = value_type*;
+  using const_pointer = const value_type*;
+
+  // This class is the base for regular/const iterator
+  template <class TAutoVector, class TValueType>
+  class iterator_impl {
+   public:
+    // -- iterator traits
+    using self_type = iterator_impl<TAutoVector, TValueType>;
+    using value_type = TValueType;
+    using reference = TValueType&;
+    using pointer = TValueType*;
+    using difference_type = typename TAutoVector::difference_type;
+    using iterator_category = std::random_access_iterator_tag;
+
+    iterator_impl(TAutoVector* vect, size_t index)
+        : vect_(vect), index_(index){};
+    iterator_impl(const iterator_impl&) = default;
+    ~iterator_impl() {}
+    iterator_impl& operator=(const iterator_impl&) = default;
+
+    // -- Advancement
+    // ++iterator
+    self_type& operator++() {
+      ++index_;
+      return *this;
+    }
+
+    // iterator++
+    self_type operator++(int) {
+      auto old = *this;
+      ++index_;
+      return old;
+    }
+
+    // --iterator
+    self_type& operator--() {
+      --index_;
+      return *this;
+    }
+
+    // iterator--
+    self_type operator--(int) {
+      auto old = *this;
+      --index_;
+      return old;
+    }
+
+    self_type operator-(difference_type len) const {
+      return self_type(vect_, index_ - len);
+    }
+
+    difference_type operator-(const self_type& other) const {
+      assert(vect_ == other.vect_);
+      return index_ - other.index_;
+    }
+
+    self_type operator+(difference_type len) const {
+      return self_type(vect_, index_ + len);
+    }
+
+    self_type& operator+=(difference_type len) {
+      index_ += len;
+      return *this;
+    }
+
+    self_type& operator-=(difference_type len) {
+      index_ -= len;
+      return *this;
+    }
+
+    // -- Reference
+    reference operator*() const {
+      assert(vect_->size() >= index_);
+      return (*vect_)[index_];
+    }
+
+    pointer operator->() const {
+      assert(vect_->size() >= index_);
+      return &(*vect_)[index_];
+    }
+
+    reference operator[](difference_type len) const { return *(*this + len); }
+
+    // -- Logical Operators
+    bool operator==(const self_type& other) const {
+      assert(vect_ == other.vect_);
+      return index_ == other.index_;
+    }
+
+    bool operator!=(const self_type& other) const { return !(*this == other); }
+
+    bool operator>(const self_type& other) const {
+      assert(vect_ == other.vect_);
+      return index_ > other.index_;
+    }
+
+    bool operator<(const self_type& other) const {
+      assert(vect_ == other.vect_);
+      return index_ < other.index_;
+    }
+
+    bool operator>=(const self_type& other) const {
+      assert(vect_ == other.vect_);
+      return index_ >= other.index_;
+    }
+
+    bool operator<=(const self_type& other) const {
+      assert(vect_ == other.vect_);
+      return index_ <= other.index_;
+    }
+
+   private:
+    TAutoVector* vect_ = nullptr;
+    size_t index_ = 0;
+  };
+
+  using iterator = iterator_impl<autovector, value_type>;
+  using const_iterator = iterator_impl<const autovector, const value_type>;
+  using reverse_iterator = std::reverse_iterator<iterator>;
+  using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+
+  autovector() : values_(reinterpret_cast<pointer>(buf_)) {}
+
+  autovector(std::initializer_list<T> init_list)
+      : values_(reinterpret_cast<pointer>(buf_)) {
+    for (const T& item : init_list) {
+      push_back(item);
+    }
+  }
+
+  ~autovector() { clear(); }
+
+  // -- Immutable operations
+  // Indicate if all data resides in in-stack data structure.
+  bool only_in_stack() const {
+    // If no element was inserted at all, the vector's capacity will be `0`.
+    return vect_.capacity() == 0;
+  }
+
+  size_type size() const { return num_stack_items_ + vect_.size(); }
+
+  // resize does not guarantee anything about the contents of the newly
+  // available elements
+  void resize(size_type n) {
+    if (n > kSize) {
+      vect_.resize(n - kSize);
+      while (num_stack_items_ < kSize) {
+        new ((void*)(&values_[num_stack_items_++])) value_type();
+      }
+      num_stack_items_ = kSize;
+    } else {
+      vect_.clear();
+      while (num_stack_items_ < n) {
+        new ((void*)(&values_[num_stack_items_++])) value_type();
+      }
+      while (num_stack_items_ > n) {
+        values_[--num_stack_items_].~value_type();
+      }
+    }
+  }
+
+  bool empty() const { return size() == 0; }
+
+  size_type capacity() const { return kSize + vect_.capacity(); }
+
+  void reserve(size_t cap) {
+    if (cap > kSize) {
+      vect_.reserve(cap - kSize);
+    }
+
+    assert(cap <= capacity());
+  }
+
+  const_reference operator[](size_type n) const {
+    assert(n < size());
+    if (n < kSize) {
+      return values_[n];
+    }
+    return vect_[n - kSize];
+  }
+
+  reference operator[](size_type n) {
+    assert(n < size());
+    if (n < kSize) {
+      return values_[n];
+    }
+    return vect_[n - kSize];
+  }
+
+  const_reference at(size_type n) const {
+    assert(n < size());
+    return (*this)[n];
+  }
+
+  reference at(size_type n) {
+    assert(n < size());
+    return (*this)[n];
+  }
+
+  reference front() {
+    assert(!empty());
+    return *begin();
+  }
+
+  const_reference front() const {
+    assert(!empty());
+    return *begin();
+  }
+
+  reference back() {
+    assert(!empty());
+    return *(end() - 1);
+  }
+
+  const_reference back() const {
+    assert(!empty());
+    return *(end() - 1);
+  }
+
+  // -- Mutable Operations
+  void push_back(T&& item) {
+    if (num_stack_items_ < kSize) {
+      new ((void*)(&values_[num_stack_items_])) value_type();
+      values_[num_stack_items_++] = std::move(item);
+    } else {
+      vect_.push_back(item);
+    }
+  }
+
+  void push_back(const T& item) {
+    if (num_stack_items_ < kSize) {
+      new ((void*)(&values_[num_stack_items_])) value_type();
+      values_[num_stack_items_++] = item;
+    } else {
+      vect_.push_back(item);
+    }
+  }
+
+  template <class... Args>
+#if _LIBCPP_STD_VER > 14
+  reference emplace_back(Args&&... args) {
+    if (num_stack_items_ < kSize) {
+      return *(new ((void*)(&values_[num_stack_items_++]))
+                   value_type(std::forward<Args>(args)...));
+    } else {
+      return vect_.emplace_back(std::forward<Args>(args)...);
+    }
+  }
+#else
+  void emplace_back(Args&&... args) {
+    if (num_stack_items_ < kSize) {
+      new ((void*)(&values_[num_stack_items_++]))
+          value_type(std::forward<Args>(args)...);
+    } else {
+      vect_.emplace_back(std::forward<Args>(args)...);
+    }
+  }
+#endif
+
+  void pop_back() {
+    assert(!empty());
+    if (!vect_.empty()) {
+      vect_.pop_back();
+    } else {
+      values_[--num_stack_items_].~value_type();
+    }
+  }
+
+  void clear() {
+    while (num_stack_items_ > 0) {
+      values_[--num_stack_items_].~value_type();
+    }
+    vect_.clear();
+  }
+
+  // -- Copy and Assignment
+  autovector& assign(const autovector& other);
+
+  autovector(const autovector& other) { assign(other); }
+
+  autovector& operator=(const autovector& other) { return assign(other); }
+
+  autovector(autovector&& other) noexcept { *this = std::move(other); }
+  autovector& operator=(autovector&& other);
+
+  // -- Iterator Operations
+  iterator begin() { return iterator(this, 0); }
+
+  const_iterator begin() const { return const_iterator(this, 0); }
+
+  iterator end() { return iterator(this, this->size()); }
+
+  const_iterator end() const { return const_iterator(this, this->size()); }
+
+  reverse_iterator rbegin() { return reverse_iterator(end()); }
+
+  const_reverse_iterator rbegin() const {
+    return const_reverse_iterator(end());
+  }
+
+  reverse_iterator rend() { return reverse_iterator(begin()); }
+
+  const_reverse_iterator rend() const {
+    return const_reverse_iterator(begin());
+  }
+
+ private:
+  size_type num_stack_items_ = 0;  // current number of items
+  alignas(alignof(
+      value_type)) char buf_[kSize *
+                             sizeof(value_type)];  // the first `kSize` items
+  pointer values_;
+  // used only if there are more than `kSize` items.
+  std::vector<T> vect_;
+};
+
+template <class T, size_t kSize>
+autovector<T, kSize>& autovector<T, kSize>::assign(
+    const autovector<T, kSize>& other) {
+  values_ = reinterpret_cast<pointer>(buf_);
+  // copy the internal vector
+  vect_.assign(other.vect_.begin(), other.vect_.end());
+
+  // copy array
+  num_stack_items_ = other.num_stack_items_;
+  std::copy(other.values_, other.values_ + num_stack_items_, values_);
+
+  return *this;
+}
+
+template <class T, size_t kSize>
+autovector<T, kSize>& autovector<T, kSize>::operator=(
+    autovector<T, kSize>&& other) {
+  values_ = reinterpret_cast<pointer>(buf_);
+  vect_ = std::move(other.vect_);
+  size_t n = other.num_stack_items_;
+  num_stack_items_ = n;
+  other.num_stack_items_ = 0;
+  for (size_t i = 0; i < n; ++i) {
+    values_[i] = std::move(other.values_[i]);
+  }
+  return *this;
+}
+
+#endif  // ROCKSDB_LITE
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/autovector_test.cc b/src/rocksdb/util/autovector_test.cc
new file mode 100644
index 000000000..8c7c39ce6
--- /dev/null
+++ b/src/rocksdb/util/autovector_test.cc
@@ -0,0 +1,331 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "util/autovector.h"
+
+#include <atomic>
+#include <iostream>
+#include <string>
+#include <utility>
+
+#include "rocksdb/env.h"
+#include "test_util/testharness.h"
+#include "test_util/testutil.h"
+#include "util/string_util.h"
+
+using std::cout;
+using std::endl;
+
+namespace ROCKSDB_NAMESPACE {
+
+class AutoVectorTest : public testing::Test {};
+const unsigned long kSize = 8;
+
+namespace {
+template <class T>
+void AssertAutoVectorOnlyInStack(autovector<T, kSize>* vec, bool result) {
+#ifndef ROCKSDB_LITE
+  ASSERT_EQ(vec->only_in_stack(), result);
+#else
+  (void)vec;
+  (void)result;
+#endif  // !ROCKSDB_LITE
+}
+}  // namespace
+
+TEST_F(AutoVectorTest, PushBackAndPopBack) {
+  autovector<size_t, kSize> vec;
+  ASSERT_TRUE(vec.empty());
+  ASSERT_EQ(0ul, vec.size());
+
+  for (size_t i = 0; i < 1000 * kSize; ++i) {
+    vec.push_back(i);
+    ASSERT_TRUE(!vec.empty());
+    if (i < kSize) {
+      AssertAutoVectorOnlyInStack(&vec, true);
+    } else {
+      AssertAutoVectorOnlyInStack(&vec, false);
+    }
+    ASSERT_EQ(i + 1, vec.size());
+    ASSERT_EQ(i, vec[i]);
+    ASSERT_EQ(i, vec.at(i));
+  }
+
+  size_t size = vec.size();
+  while (size != 0) {
+    vec.pop_back();
+    // will always be in heap
+    AssertAutoVectorOnlyInStack(&vec, false);
+    ASSERT_EQ(--size, vec.size());
+  }
+
+  ASSERT_TRUE(vec.empty());
+}
+
+TEST_F(AutoVectorTest, EmplaceBack) {
+  using ValType = std::pair<size_t, std::string>;
+  autovector<ValType, kSize> vec;
+
+  for (size_t i = 0; i < 1000 * kSize; ++i) {
+    vec.emplace_back(i, std::to_string(i + 123));
+    ASSERT_TRUE(!vec.empty());
+    if (i < kSize) {
+      AssertAutoVectorOnlyInStack(&vec, true);
+    } else {
+      AssertAutoVectorOnlyInStack(&vec, false);
+    }
+
+    ASSERT_EQ(i + 1, vec.size());
+    ASSERT_EQ(i, vec[i].first);
+    ASSERT_EQ(std::to_string(i + 123), vec[i].second);
+  }
+
+  vec.clear();
+  ASSERT_TRUE(vec.empty());
+  AssertAutoVectorOnlyInStack(&vec, false);
+}
+
+TEST_F(AutoVectorTest, Resize) {
+  autovector<size_t, kSize> vec;
+
+  vec.resize(kSize);
+  AssertAutoVectorOnlyInStack(&vec, true);
+  for (size_t i = 0; i < kSize; ++i) {
+    vec[i] = i;
+  }
+
+  vec.resize(kSize * 2);
+  AssertAutoVectorOnlyInStack(&vec, false);
+  for (size_t i = 0; i < kSize; ++i) {
+    ASSERT_EQ(vec[i], i);
+  }
+  for (size_t i = 0; i < kSize; ++i) {
+    vec[i + kSize] = i;
+  }
+
+  vec.resize(1);
+  ASSERT_EQ(1U, vec.size());
+}
+
+namespace {
+void AssertEqual(const autovector<size_t, kSize>& a,
+                 const autovector<size_t, kSize>& b) {
+  ASSERT_EQ(a.size(), b.size());
+  ASSERT_EQ(a.empty(), b.empty());
+#ifndef ROCKSDB_LITE
+  ASSERT_EQ(a.only_in_stack(), b.only_in_stack());
+#endif  // !ROCKSDB_LITE
+  for (size_t i = 0; i < a.size(); ++i) {
+    ASSERT_EQ(a[i], b[i]);
+  }
+}
+}  // namespace
+
+TEST_F(AutoVectorTest, CopyAndAssignment) {
+  // Test both heap-allocated and stack-allocated cases.
+  for (auto size : {kSize / 2, kSize * 1000}) {
+    autovector<size_t, kSize> vec;
+    for (size_t i = 0; i < size; ++i) {
+      vec.push_back(i);
+    }
+
+    {
+      autovector<size_t, kSize> other;
+      other = vec;
+      AssertEqual(other, vec);
+    }
+
+    {
+      autovector<size_t, kSize> other(vec);
+      AssertEqual(other, vec);
+    }
+  }
+}
+
+TEST_F(AutoVectorTest, Iterators) {
+  autovector<std::string, kSize> vec;
+  for (size_t i = 0; i < kSize * 1000; ++i) {
+    vec.push_back(std::to_string(i));
+  }
+
+  // basic operator test
+  ASSERT_EQ(vec.front(), *vec.begin());
+  ASSERT_EQ(vec.back(), *(vec.end() - 1));
+  ASSERT_TRUE(vec.begin() < vec.end());
+
+  // non-const iterator
+  size_t index = 0;
+  for (const auto& item : vec) {
+    ASSERT_EQ(vec[index++], item);
+  }
+
+  index = vec.size() - 1;
+  for (auto pos = vec.rbegin(); pos != vec.rend(); ++pos) {
+    ASSERT_EQ(vec[index--], *pos);
+  }
+
+  // const iterator
+  const auto& cvec = vec;
+  index = 0;
+  for (const auto& item : cvec) {
+    ASSERT_EQ(cvec[index++], item);
+  }
+
+  index = vec.size() - 1;
+  for (auto pos = cvec.rbegin(); pos != cvec.rend(); ++pos) {
+    ASSERT_EQ(cvec[index--], *pos);
+  }
+
+  // forward and backward
+  auto pos = vec.begin();
+  while (pos != vec.end()) {
+    auto old_val = *pos;
+    auto old = pos++;
+    // HACK: make sure -> works
+    ASSERT_TRUE(!old->empty());
+    ASSERT_EQ(old_val, *old);
+    ASSERT_TRUE(pos == vec.end() || old_val != *pos);
+  }
+
+  pos = vec.begin();
+  for (size_t i = 0; i < vec.size(); i += 2) {
+    // Cannot use ASSERT_EQ since that macro depends on iostream serialization
+    ASSERT_TRUE(pos + 2 - 2 == pos);
+    pos += 2;
+    ASSERT_TRUE(pos >= vec.begin());
+    ASSERT_TRUE(pos <= vec.end());
+
+    size_t diff = static_cast<size_t>(pos - vec.begin());
+    ASSERT_EQ(i + 2, diff);
+  }
+}
+
+namespace {
+std::vector<std::string> GetTestKeys(size_t size) {
+  std::vector<std::string> keys;
+  keys.resize(size);
+
+  int index = 0;
+  for (auto& key : keys) {
+    key = "item-" + std::to_string(index++);
+  }
+  return keys;
+}
+}  // namespace
+
+template <class TVector>
+void BenchmarkVectorCreationAndInsertion(
+    std::string name, size_t ops, size_t item_size,
+    const std::vector<typename TVector::value_type>& items) {
+  auto env = Env::Default();
+
+  int index = 0;
+  auto start_time = env->NowNanos();
+  auto ops_remaining = ops;
+  while (ops_remaining--) {
+    TVector v;
+    for (size_t i = 0; i < item_size; ++i) {
+      v.push_back(items[index++]);
+    }
+  }
+  auto elapsed = env->NowNanos() - start_time;
+  cout << "created " << ops << " " << name << " instances:\n\t"
+       << "each was inserted with " << item_size << " elements\n\t"
+       << "total time elapsed: " << elapsed << " (ns)" << endl;
+}
+
+template <class TVector>
+size_t BenchmarkSequenceAccess(std::string name, size_t ops, size_t elem_size) {
+  TVector v;
+  for (const auto& item : GetTestKeys(elem_size)) {
+    v.push_back(item);
+  }
+  auto env = Env::Default();
+
+  auto ops_remaining = ops;
+  auto start_time = env->NowNanos();
+  size_t total = 0;
+  while (ops_remaining--) {
+    auto end = v.end();
+    for (auto pos = v.begin(); pos != end; ++pos) {
+      total += pos->size();
+    }
+  }
+  auto elapsed = env->NowNanos() - start_time;
+  cout << "performed " << ops << " sequence access against " << name << "\n\t"
+       << "size: " << elem_size << "\n\t"
+       << "total time elapsed: " << elapsed << " (ns)" << endl;
+  // HACK avoid compiler's optimization to ignore total
+  return total;
+}
+
+// This test case only reports the performance between std::vector<std::string>
+// and autovector<std::string>. We chose string for comparison because in most
+// of our use cases we used std::vector<std::string>.
+TEST_F(AutoVectorTest, PerfBench) {
+  // We run same operations for kOps times in order to get a more fair result.
+  size_t kOps = 100000;
+
+  // Creation and insertion test
+  // Test the case when there is:
+  //  * no element inserted: internal array of std::vector may not really get
+  //    initialize.
+  //  * one element inserted: internal array of std::vector must have
+  //    initialized.
+  //  * kSize elements inserted. This shows the most time we'll spend if we
+  //    keep everything in stack.
+  //  * 2 * kSize elements inserted. The internal vector of
+  //    autovector must have been initialized.
+  cout << "=====================================================" << endl;
+  cout << "Creation and Insertion Test (value type: std::string)" << endl;
+  cout << "=====================================================" << endl;
+
+  // pre-generated unique keys
+  auto string_keys = GetTestKeys(kOps * 2 * kSize);
+  for (auto insertions : {0ul, 1ul, kSize / 2, kSize, 2 * kSize}) {
+    BenchmarkVectorCreationAndInsertion<std::vector<std::string>>(
+        "std::vector<std::string>", kOps, insertions, string_keys);
+    BenchmarkVectorCreationAndInsertion<autovector<std::string, kSize>>(
+        "autovector<std::string>", kOps, insertions, string_keys);
+    cout << "-----------------------------------" << endl;
+  }
+
+  cout << "=====================================================" << endl;
+  cout << "Creation and Insertion Test (value type: uint64_t)" << endl;
+  cout << "=====================================================" << endl;
+
+  // pre-generated unique keys
+  std::vector<uint64_t> int_keys(kOps * 2 * kSize);
+  for (size_t i = 0; i < kOps * 2 * kSize; ++i) {
+    int_keys[i] = i;
+  }
+  for (auto insertions : {0ul, 1ul, kSize / 2, kSize, 2 * kSize}) {
+    BenchmarkVectorCreationAndInsertion<std::vector<uint64_t>>(
+        "std::vector<uint64_t>", kOps, insertions, int_keys);
+    BenchmarkVectorCreationAndInsertion<autovector<uint64_t, kSize>>(
+        "autovector<uint64_t>", kOps, insertions, int_keys);
+    cout << "-----------------------------------" << endl;
+  }
+
+  // Sequence Access Test
+  cout << "=====================================================" << endl;
+  cout << "Sequence Access Test" << endl;
+  cout << "=====================================================" << endl;
+  for (auto elem_size : {kSize / 2, kSize, 2 * kSize}) {
+    BenchmarkSequenceAccess<std::vector<std::string>>("std::vector", kOps,
+                                                      elem_size);
+    BenchmarkSequenceAccess<autovector<std::string, kSize>>("autovector", kOps,
+                                                            elem_size);
+    cout << "-----------------------------------" << endl;
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
+int main(int argc, char** argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/src/rocksdb/util/bloom_impl.h b/src/rocksdb/util/bloom_impl.h
new file mode 100644
index 000000000..fadd012d3
--- /dev/null
+++ b/src/rocksdb/util/bloom_impl.h
@@ -0,0 +1,489 @@
+//  Copyright (c) 2019-present, Facebook, Inc. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Implementation details of various Bloom filter implementations used in
+// RocksDB. (DynamicBloom is in a separate file for now because it
+// supports concurrent write.)
+
+#pragma once
+#include <stddef.h>
+#include <stdint.h>
+
+#include <cmath>
+
+#include "port/port.h"  // for PREFETCH
+#include "rocksdb/slice.h"
+#include "util/hash.h"
+
+#ifdef HAVE_AVX2
+#include <immintrin.h>
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+class BloomMath {
+ public:
+  // False positive rate of a standard Bloom filter, for given ratio of
+  // filter memory bits to added keys, and number of probes per operation.
+  // (The false positive rate is effectively independent of scale, assuming
+  // the implementation scales OK.)
+  static double StandardFpRate(double bits_per_key, int num_probes) {
+    // Standard very-good-estimate formula. See
+    // https://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives
+    return std::pow(1.0 - std::exp(-num_probes / bits_per_key), num_probes);
+  }
+
+  // False positive rate of a "blocked"/"shareded"/"cache-local" Bloom filter,
+  // for given ratio of filter memory bits to added keys, number of probes per
+  // operation (all within the given block or cache line size), and block or
+  // cache line size.
+  static double CacheLocalFpRate(double bits_per_key, int num_probes,
+                                 int cache_line_bits) {
+    if (bits_per_key <= 0.0) {
+      // Fix a discontinuity
+      return 1.0;
+    }
+    double keys_per_cache_line = cache_line_bits / bits_per_key;
+    // A reasonable estimate is the average of the FP rates for one standard
+    // deviation above and below the mean bucket occupancy. See
+    // https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter#the-math
+    double keys_stddev = std::sqrt(keys_per_cache_line);
+    double crowded_fp = StandardFpRate(
+        cache_line_bits / (keys_per_cache_line + keys_stddev), num_probes);
+    double uncrowded_fp = StandardFpRate(
+        cache_line_bits / (keys_per_cache_line - keys_stddev), num_probes);
+    return (crowded_fp + uncrowded_fp) / 2;
+  }
+
+  // False positive rate of querying a new item against `num_keys` items, all
+  // hashed to `fingerprint_bits` bits. (This assumes the fingerprint hashes
+  // themselves are stored losslessly. See Section 4 of
+  // http://www.ccs.neu.edu/home/pete/pub/bloom-filters-verification.pdf)
+  static double FingerprintFpRate(size_t num_keys, int fingerprint_bits) {
+    double inv_fingerprint_space = std::pow(0.5, fingerprint_bits);
+    // Base estimate assumes each key maps to a unique fingerprint.
+    // Could be > 1 in extreme cases.
+    double base_estimate = num_keys * inv_fingerprint_space;
+    // To account for potential overlap, we choose between two formulas
+    if (base_estimate > 0.0001) {
+      // A very good formula assuming we don't construct a floating point
+      // number extremely close to 1. Always produces a probability < 1.
+      return 1.0 - std::exp(-base_estimate);
+    } else {
+      // A very good formula when base_estimate is far below 1. (Subtract
+      // away the integral-approximated sum that some key has same hash as
+      // one coming before it in a list.)
+      return base_estimate - (base_estimate * base_estimate * 0.5);
+    }
+  }
+
+  // Returns the probably of either of two independent(-ish) events
+  // happening, given their probabilities. (This is useful for combining
+  // results from StandardFpRate or CacheLocalFpRate with FingerprintFpRate
+  // for a hash-efficient Bloom filter's FP rate. See Section 4 of
+  // http://www.ccs.neu.edu/home/pete/pub/bloom-filters-verification.pdf)
+  static double IndependentProbabilitySum(double rate1, double rate2) {
+    // Use formula that avoids floating point extremely close to 1 if
+    // rates are extremely small.
+    return rate1 + rate2 - (rate1 * rate2);
+  }
+};
+
+// A fast, flexible, and accurate cache-local Bloom implementation with
+// SIMD-optimized query performance (currently using AVX2 on Intel). Write
+// performance and non-SIMD read are very good, benefiting from FastRange32
+// used in place of % and single-cycle multiplication on recent processors.
+//
+// Most other SIMD Bloom implementations sacrifice flexibility and/or
+// accuracy by requiring num_probes to be a power of two and restricting
+// where each probe can occur in a cache line. This implementation sacrifices
+// SIMD-optimization for add (might still be possible, especially with AVX512)
+// in favor of allowing any num_probes, not crossing cache line boundary,
+// and accuracy close to theoretical best accuracy for a cache-local Bloom.
+// E.g. theoretical best for 10 bits/key, num_probes=6, and 512-bit bucket
+// (Intel cache line size) is 0.9535% FP rate. This implementation yields
+// about 0.957%. (Compare to LegacyLocalityBloomImpl<false> at 1.138%, or
+// about 0.951% for 1024-bit buckets, cache line size for some ARM CPUs.)
+//
+// This implementation can use a 32-bit hash (let h2 be h1 * 0x9e3779b9) or
+// a 64-bit hash (split into two uint32s). With many millions of keys, the
+// false positive rate associated with using a 32-bit hash can dominate the
+// false positive rate of the underlying filter. At 10 bits/key setting, the
+// inflection point is about 40 million keys, so 32-bit hash is a bad idea
+// with 10s of millions of keys or more.
+//
+// Despite accepting a 64-bit hash, this implementation uses 32-bit fastrange
+// to pick a cache line, which can be faster than 64-bit in some cases.
+// This only hurts accuracy as you get into 10s of GB for a single filter,
+// and accuracy abruptly breaks down at 256GB (2^32 cache lines). Switch to
+// 64-bit fastrange if you need filters so big. ;)
+//
+// Using only a 32-bit input hash within each cache line has negligible
+// impact for any reasonable cache line / bucket size, for arbitrary filter
+// size, and potentially saves intermediate data size in some cases vs.
+// tracking full 64 bits. (Even in an implementation using 64-bit arithmetic
+// to generate indices, I might do the same, as a single multiplication
+// suffices to generate a sufficiently mixed 64 bits from 32 bits.)
+//
+// This implementation is currently tied to Intel cache line size, 64 bytes ==
+// 512 bits. If there's sufficient demand for other cache line sizes, this is
+// a pretty good implementation to extend, but slight performance enhancements
+// are possible with an alternate implementation (probably not very compatible
+// with SIMD):
+// (1) Use rotation in addition to multiplication for remixing
+// (like murmur hash). (Using multiplication alone *slightly* hurts accuracy
+// because lower bits never depend on original upper bits.)
+// (2) Extract more than one bit index from each re-mix. (Only if rotation
+// or similar is part of remix, because otherwise you're making the
+// multiplication-only problem worse.)
+// (3) Re-mix full 64 bit hash, to get maximum number of bit indices per
+// re-mix.
+//
+class FastLocalBloomImpl {
+ public:
+  // NOTE: this has only been validated to enough accuracy for producing
+  // reasonable warnings / user feedback, not for making functional decisions.
+  static double EstimatedFpRate(size_t keys, size_t bytes, int num_probes,
+                                int hash_bits) {
+    return BloomMath::IndependentProbabilitySum(
+        BloomMath::CacheLocalFpRate(8.0 * bytes / keys, num_probes,
+                                    /*cache line bits*/ 512),
+        BloomMath::FingerprintFpRate(keys, hash_bits));
+  }
+
+  static inline int ChooseNumProbes(int millibits_per_key) {
+    // Since this implementation can (with AVX2) make up to 8 probes
+    // for the same cost, we pick the most accurate num_probes, based
+    // on actual tests of the implementation. Note that for higher
+    // bits/key, the best choice for cache-local Bloom can be notably
+    // smaller than standard bloom, e.g. 9 instead of 11 @ 16 b/k.
+    if (millibits_per_key <= 2080) {
+      return 1;
+    } else if (millibits_per_key <= 3580) {
+      return 2;
+    } else if (millibits_per_key <= 5100) {
+      return 3;
+    } else if (millibits_per_key <= 6640) {
+      return 4;
+    } else if (millibits_per_key <= 8300) {
+      return 5;
+    } else if (millibits_per_key <= 10070) {
+      return 6;
+    } else if (millibits_per_key <= 11720) {
+      return 7;
+    } else if (millibits_per_key <= 14001) {
+      // Would be something like <= 13800 but sacrificing *slightly* for
+      // more settings using <= 8 probes.
+      return 8;
+    } else if (millibits_per_key <= 16050) {
+      return 9;
+    } else if (millibits_per_key <= 18300) {
+      return 10;
+    } else if (millibits_per_key <= 22001) {
+      return 11;
+    } else if (millibits_per_key <= 25501) {
+      return 12;
+    } else if (millibits_per_key > 50000) {
+      // Top out at 24 probes (three sets of 8)
+      return 24;
+    } else {
+      // Roughly optimal choices for remaining range
+      // e.g.
+      // 28000 -> 12, 28001 -> 13
+      // 50000 -> 23, 50001 -> 24
+      return (millibits_per_key - 1) / 2000 - 1;
+    }
+  }
+
+  static inline void AddHash(uint32_t h1, uint32_t h2, uint32_t len_bytes,
+                             int num_probes, char *data) {
+    uint32_t bytes_to_cache_line = FastRange32(len_bytes >> 6, h1) << 6;
+    AddHashPrepared(h2, num_probes, data + bytes_to_cache_line);
+  }
+
+  static inline void AddHashPrepared(uint32_t h2, int num_probes,
+                                     char *data_at_cache_line) {
+    uint32_t h = h2;
+    for (int i = 0; i < num_probes; ++i, h *= uint32_t{0x9e3779b9}) {
+      // 9-bit address within 512 bit cache line
+      int bitpos = h >> (32 - 9);
+      data_at_cache_line[bitpos >> 3] |= (uint8_t{1} << (bitpos & 7));
+    }
+  }
+
+  static inline void PrepareHash(uint32_t h1, uint32_t len_bytes,
+                                 const char *data,
+                                 uint32_t /*out*/ *byte_offset) {
+    uint32_t bytes_to_cache_line = FastRange32(len_bytes >> 6, h1) << 6;
+    PREFETCH(data + bytes_to_cache_line, 0 /* rw */, 1 /* locality */);
+    PREFETCH(data + bytes_to_cache_line + 63, 0 /* rw */, 1 /* locality */);
+    *byte_offset = bytes_to_cache_line;
+  }
+
+  static inline bool HashMayMatch(uint32_t h1, uint32_t h2, uint32_t len_bytes,
+                                  int num_probes, const char *data) {
+    uint32_t bytes_to_cache_line = FastRange32(len_bytes >> 6, h1) << 6;
+    return HashMayMatchPrepared(h2, num_probes, data + bytes_to_cache_line);
+  }
+
+  static inline bool HashMayMatchPrepared(uint32_t h2, int num_probes,
+                                          const char *data_at_cache_line) {
+    uint32_t h = h2;
+#ifdef HAVE_AVX2
+    int rem_probes = num_probes;
+
+    // NOTE: For better performance for num_probes in {1, 2, 9, 10, 17, 18,
+    // etc.} one can insert specialized code for rem_probes <= 2, bypassing
+    // the SIMD code in those cases. There is a detectable but minor overhead
+    // applied to other values of num_probes (when not statically determined),
+    // but smoother performance curve vs. num_probes. But for now, when
+    // in doubt, don't add unnecessary code.
+
+    // Powers of 32-bit golden ratio, mod 2**32.
+    const __m256i multipliers =
+        _mm256_setr_epi32(0x00000001, 0x9e3779b9, 0xe35e67b1, 0x734297e9,
+                          0x35fbe861, 0xdeb7c719, 0x448b211, 0x3459b749);
+
+    for (;;) {
+      // Eight copies of hash
+      __m256i hash_vector = _mm256_set1_epi32(h);
+
+      // Same effect as repeated multiplication by 0x9e3779b9 thanks to
+      // associativity of multiplication.
+      hash_vector = _mm256_mullo_epi32(hash_vector, multipliers);
+
+      // Now the top 9 bits of each of the eight 32-bit values in
+      // hash_vector are bit addresses for probes within the cache line.
+      // While the platform-independent code uses byte addressing (6 bits
+      // to pick a byte + 3 bits to pick a bit within a byte), here we work
+      // with 32-bit words (4 bits to pick a word + 5 bits to pick a bit
+      // within a word) because that works well with AVX2 and is equivalent
+      // under little-endian.
+
+      // Shift each right by 28 bits to get 4-bit word addresses.
+      const __m256i word_addresses = _mm256_srli_epi32(hash_vector, 28);
+
+      // Gather 32-bit values spread over 512 bits by 4-bit address. In
+      // essence, we are dereferencing eight pointers within the cache
+      // line.
+      //
+      // Option 1: AVX2 gather (seems to be a little slow - understandable)
+      // const __m256i value_vector =
+      //     _mm256_i32gather_epi32(static_cast<const int
+      //     *>(data_at_cache_line),
+      //                            word_addresses,
+      //                            /*bytes / i32*/ 4);
+      // END Option 1
+      // Potentially unaligned as we're not *always* cache-aligned -> loadu
+      const __m256i *mm_data =
+          reinterpret_cast<const __m256i *>(data_at_cache_line);
+      __m256i lower = _mm256_loadu_si256(mm_data);
+      __m256i upper = _mm256_loadu_si256(mm_data + 1);
+      // Option 2: AVX512VL permute hack
+      // Only negligibly faster than Option 3, so not yet worth supporting
+      // const __m256i value_vector =
+      //    _mm256_permutex2var_epi32(lower, word_addresses, upper);
+      // END Option 2
+      // Option 3: AVX2 permute+blend hack
+      // Use lowest three bits to order probing values, as if all from same
+      // 256 bit piece.
+      lower = _mm256_permutevar8x32_epi32(lower, word_addresses);
+      upper = _mm256_permutevar8x32_epi32(upper, word_addresses);
+      // Just top 1 bit of address, to select between lower and upper.
+      const __m256i upper_lower_selector = _mm256_srai_epi32(hash_vector, 31);
+      // Finally: the next 8 probed 32-bit values, in probing sequence order.
+      const __m256i value_vector =
+          _mm256_blendv_epi8(lower, upper, upper_lower_selector);
+      // END Option 3
+
+      // We might not need to probe all 8, so build a mask for selecting only
+      // what we need. (The k_selector(s) could be pre-computed but that
+      // doesn't seem to make a noticeable performance difference.)
+      const __m256i zero_to_seven = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
+      // Subtract rem_probes from each of those constants
+      __m256i k_selector =
+          _mm256_sub_epi32(zero_to_seven, _mm256_set1_epi32(rem_probes));
+      // Negative after subtract -> use/select
+      // Keep only high bit (logical shift right each by 31).
+      k_selector = _mm256_srli_epi32(k_selector, 31);
+
+      // Strip off the 4 bit word address (shift left)
+      __m256i bit_addresses = _mm256_slli_epi32(hash_vector, 4);
+      // And keep only 5-bit (32 - 27) bit-within-32-bit-word addresses.
+      bit_addresses = _mm256_srli_epi32(bit_addresses, 27);
+      // Build a bit mask
+      const __m256i bit_mask = _mm256_sllv_epi32(k_selector, bit_addresses);
+
+      // Like ((~value_vector) & bit_mask) == 0)
+      bool match = _mm256_testc_si256(value_vector, bit_mask) != 0;
+
+      // This check first so that it's easy for branch predictor to optimize
+      // num_probes <= 8 case, making it free of unpredictable branches.
+      if (rem_probes <= 8) {
+        return match;
+      } else if (!match) {
+        return false;
+      }
+      // otherwise
+      // Need another iteration. 0xab25f4c1 == golden ratio to the 8th power
+      h *= 0xab25f4c1;
+      rem_probes -= 8;
+    }
+#else
+    for (int i = 0; i < num_probes; ++i, h *= uint32_t{0x9e3779b9}) {
+      // 9-bit address within 512 bit cache line
+      int bitpos = h >> (32 - 9);
+      if ((data_at_cache_line[bitpos >> 3] & (char(1) << (bitpos & 7))) == 0) {
+        return false;
+      }
+    }
+    return true;
+#endif
+  }
+};
+
+// A legacy Bloom filter implementation with no locality of probes (slow).
+// It uses double hashing to generate a sequence of hash values.
+// Asymptotic analysis is in [Kirsch,Mitzenmacher 2006], but known to have
+// subtle accuracy flaws for practical sizes [Dillinger,Manolios 2004].
+//
+// DO NOT REUSE
+//
+class LegacyNoLocalityBloomImpl {
+ public:
+  static inline int ChooseNumProbes(int bits_per_key) {
+    // We intentionally round down to reduce probing cost a little bit
+    int num_probes = static_cast<int>(bits_per_key * 0.69);  // 0.69 =~ ln(2)
+    if (num_probes < 1) num_probes = 1;
+    if (num_probes > 30) num_probes = 30;
+    return num_probes;
+  }
+
+  static inline void AddHash(uint32_t h, uint32_t total_bits, int num_probes,
+                             char *data) {
+    const uint32_t delta = (h >> 17) | (h << 15);  // Rotate right 17 bits
+    for (int i = 0; i < num_probes; i++) {
+      const uint32_t bitpos = h % total_bits;
+      data[bitpos / 8] |= (1 << (bitpos % 8));
+      h += delta;
+    }
+  }
+
+  static inline bool HashMayMatch(uint32_t h, uint32_t total_bits,
+                                  int num_probes, const char *data) {
+    const uint32_t delta = (h >> 17) | (h << 15);  // Rotate right 17 bits
+    for (int i = 0; i < num_probes; i++) {
+      const uint32_t bitpos = h % total_bits;
+      if ((data[bitpos / 8] & (1 << (bitpos % 8))) == 0) {
+        return false;
+      }
+      h += delta;
+    }
+    return true;
+  }
+};
+
+// A legacy Bloom filter implementation with probes local to a single
+// cache line (fast). Because SST files might be transported between
+// platforms, the cache line size is a parameter rather than hard coded.
+// (But if specified as a constant parameter, an optimizing compiler
+// should take advantage of that.)
+//
+// When ExtraRotates is false, this implementation is notably deficient in
+// accuracy. Specifically, it uses double hashing with a 1/512 chance of the
+// increment being zero (when cache line size is 512 bits). Thus, there's a
+// 1/512 chance of probing only one index, which we'd expect to incur about
+// a 1/2 * 1/512 or absolute 0.1% FP rate penalty. More detail at
+// https://github.com/facebook/rocksdb/issues/4120
+//
+// DO NOT REUSE
+//
+template <bool ExtraRotates>
+class LegacyLocalityBloomImpl {
+ private:
+  static inline uint32_t GetLine(uint32_t h, uint32_t num_lines) {
+    uint32_t offset_h = ExtraRotates ? (h >> 11) | (h << 21) : h;
+    return offset_h % num_lines;
+  }
+
+ public:
+  // NOTE: this has only been validated to enough accuracy for producing
+  // reasonable warnings / user feedback, not for making functional decisions.
+  static double EstimatedFpRate(size_t keys, size_t bytes, int num_probes) {
+    double bits_per_key = 8.0 * bytes / keys;
+    double filter_rate = BloomMath::CacheLocalFpRate(bits_per_key, num_probes,
+                                                     /*cache line bits*/ 512);
+    if (!ExtraRotates) {
+      // Good estimate of impact of flaw in index computation.
+      // Adds roughly 0.002 around 50 bits/key and 0.001 around 100 bits/key.
+      // The + 22 shifts it nicely to fit for lower bits/key.
+      filter_rate += 0.1 / (bits_per_key * 0.75 + 22);
+    } else {
+      // Not yet validated
+      assert(false);
+    }
+    // Always uses 32-bit hash
+    double fingerprint_rate = BloomMath::FingerprintFpRate(keys, 32);
+    return BloomMath::IndependentProbabilitySum(filter_rate, fingerprint_rate);
+  }
+
+  static inline void AddHash(uint32_t h, uint32_t num_lines, int num_probes,
+                             char *data, int log2_cache_line_bytes) {
+    const int log2_cache_line_bits = log2_cache_line_bytes + 3;
+
+    char *data_at_offset =
+        data + (GetLine(h, num_lines) << log2_cache_line_bytes);
+    const uint32_t delta = (h >> 17) | (h << 15);
+    for (int i = 0; i < num_probes; ++i) {
+      // Mask to bit-within-cache-line address
+      const uint32_t bitpos = h & ((1 << log2_cache_line_bits) - 1);
+      data_at_offset[bitpos / 8] |= (1 << (bitpos % 8));
+      if (ExtraRotates) {
+        h = (h >> log2_cache_line_bits) | (h << (32 - log2_cache_line_bits));
+      }
+      h += delta;
+    }
+  }
+
+  static inline void PrepareHashMayMatch(uint32_t h, uint32_t num_lines,
+                                         const char *data,
+                                         uint32_t /*out*/ *byte_offset,
+                                         int log2_cache_line_bytes) {
+    uint32_t b = GetLine(h, num_lines) << log2_cache_line_bytes;
+    PREFETCH(data + b, 0 /* rw */, 1 /* locality */);
+    PREFETCH(data + b + ((1 << log2_cache_line_bytes) - 1), 0 /* rw */,
+             1 /* locality */);
+    *byte_offset = b;
+  }
+
+  static inline bool HashMayMatch(uint32_t h, uint32_t num_lines,
+                                  int num_probes, const char *data,
+                                  int log2_cache_line_bytes) {
+    uint32_t b = GetLine(h, num_lines) << log2_cache_line_bytes;
+    return HashMayMatchPrepared(h, num_probes, data + b, log2_cache_line_bytes);
+  }
+
+  static inline bool HashMayMatchPrepared(uint32_t h, int num_probes,
+                                          const char *data_at_offset,
+                                          int log2_cache_line_bytes) {
+    const int log2_cache_line_bits = log2_cache_line_bytes + 3;
+
+    const uint32_t delta = (h >> 17) | (h << 15);
+    for (int i = 0; i < num_probes; ++i) {
+      // Mask to bit-within-cache-line address
+      const uint32_t bitpos = h & ((1 << log2_cache_line_bits) - 1);
+      if (((data_at_offset[bitpos / 8]) & (1 << (bitpos % 8))) == 0) {
+        return false;
+      }
+      if (ExtraRotates) {
+        h = (h >> log2_cache_line_bits) | (h << (32 - log2_cache_line_bits));
+      }
+      h += delta;
+    }
+    return true;
+  }
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/bloom_test.cc b/src/rocksdb/util/bloom_test.cc
new file mode 100644
index 000000000..9d509ac3d
--- /dev/null
+++ b/src/rocksdb/util/bloom_test.cc
@@ -0,0 +1,1175 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifndef GFLAGS
+#include <cstdio>
+int main() {
+  fprintf(stderr, "Please install gflags to run this test... Skipping...\n");
+  return 0;
+}
+#else
+
+#include <array>
+#include <cmath>
+#include <vector>
+
+#include "cache/cache_entry_roles.h"
+#include "cache/cache_reservation_manager.h"
+#include "memory/arena.h"
+#include "port/jemalloc_helper.h"
+#include "rocksdb/filter_policy.h"
+#include "table/block_based/filter_policy_internal.h"
+#include "test_util/testharness.h"
+#include "test_util/testutil.h"
+#include "util/gflags_compat.h"
+#include "util/hash.h"
+
+using GFLAGS_NAMESPACE::ParseCommandLineFlags;
+
+// The test is not fully designed for bits_per_key other than 10, but with
+// this parameter you can easily explore the behavior of other bits_per_key.
+// See also filter_bench.
+DEFINE_int32(bits_per_key, 10, "");
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+const std::string kLegacyBloom = test::LegacyBloomFilterPolicy::kClassName();
+const std::string kFastLocalBloom =
+    test::FastLocalBloomFilterPolicy::kClassName();
+const std::string kStandard128Ribbon =
+    test::Standard128RibbonFilterPolicy::kClassName();
+}  // namespace
+
+static const int kVerbose = 1;
+
+static Slice Key(int i, char* buffer) {
+  std::string s;
+  PutFixed32(&s, static_cast<uint32_t>(i));
+  memcpy(buffer, s.c_str(), sizeof(i));
+  return Slice(buffer, sizeof(i));
+}
+
+static int NextLength(int length) {
+  if (length < 10) {
+    length += 1;
+  } else if (length < 100) {
+    length += 10;
+  } else if (length < 1000) {
+    length += 100;
+  } else {
+    length += 1000;
+  }
+  return length;
+}
+
+class FullBloomTest : public testing::TestWithParam<std::string> {
+ protected:
+  BlockBasedTableOptions table_options_;
+
+ private:
+  std::shared_ptr<const FilterPolicy>& policy_;
+  std::unique_ptr<FilterBitsBuilder> bits_builder_;
+  std::unique_ptr<FilterBitsReader> bits_reader_;
+  std::unique_ptr<const char[]> buf_;
+  size_t filter_size_;
+
+ public:
+  FullBloomTest() : policy_(table_options_.filter_policy), filter_size_(0) {
+    ResetPolicy();
+  }
+
+  BuiltinFilterBitsBuilder* GetBuiltinFilterBitsBuilder() {
+    // Throws on bad cast
+    return dynamic_cast<BuiltinFilterBitsBuilder*>(bits_builder_.get());
+  }
+
+  const BloomLikeFilterPolicy* GetBloomLikeFilterPolicy() {
+    // Throws on bad cast
+    return &dynamic_cast<const BloomLikeFilterPolicy&>(*policy_);
+  }
+
+  void Reset() {
+    bits_builder_.reset(BloomFilterPolicy::GetBuilderFromContext(
+        FilterBuildingContext(table_options_)));
+    bits_reader_.reset(nullptr);
+    buf_.reset(nullptr);
+    filter_size_ = 0;
+  }
+
+  void ResetPolicy(double bits_per_key) {
+    policy_ = BloomLikeFilterPolicy::Create(GetParam(), bits_per_key);
+    Reset();
+  }
+
+  void ResetPolicy() { ResetPolicy(FLAGS_bits_per_key); }
+
+  void Add(const Slice& s) { bits_builder_->AddKey(s); }
+
+  void OpenRaw(const Slice& s) {
+    bits_reader_.reset(policy_->GetFilterBitsReader(s));
+  }
+
+  void Build() {
+    Slice filter = bits_builder_->Finish(&buf_);
+    bits_reader_.reset(policy_->GetFilterBitsReader(filter));
+    filter_size_ = filter.size();
+  }
+
+  size_t FilterSize() const { return filter_size_; }
+
+  Slice FilterData() { return Slice(buf_.get(), filter_size_); }
+
+  int GetNumProbesFromFilterData() {
+    assert(filter_size_ >= 5);
+    int8_t raw_num_probes = static_cast<int8_t>(buf_.get()[filter_size_ - 5]);
+    if (raw_num_probes == -1) {  // New bloom filter marker
+      return static_cast<uint8_t>(buf_.get()[filter_size_ - 3]);
+    } else {
+      return raw_num_probes;
+    }
+  }
+
+  int GetRibbonSeedFromFilterData() {
+    assert(filter_size_ >= 5);
+    // Check for ribbon marker
+    assert(-2 == static_cast<int8_t>(buf_.get()[filter_size_ - 5]));
+    return static_cast<uint8_t>(buf_.get()[filter_size_ - 4]);
+  }
+
+  bool Matches(const Slice& s) {
+    if (bits_reader_ == nullptr) {
+      Build();
+    }
+    return bits_reader_->MayMatch(s);
+  }
+
+  // Provides a kind of fingerprint on the Bloom filter's
+  // behavior, for reasonbly high FP rates.
+  uint64_t PackedMatches() {
+    char buffer[sizeof(int)];
+    uint64_t result = 0;
+    for (int i = 0; i < 64; i++) {
+      if (Matches(Key(i + 12345, buffer))) {
+        result |= uint64_t{1} << i;
+      }
+    }
+    return result;
+  }
+
+  // Provides a kind of fingerprint on the Bloom filter's
+  // behavior, for lower FP rates.
+  std::string FirstFPs(int count) {
+    char buffer[sizeof(int)];
+    std::string rv;
+    int fp_count = 0;
+    for (int i = 0; i < 1000000; i++) {
+      // Pack four match booleans into each hexadecimal digit
+      if (Matches(Key(i + 1000000, buffer))) {
+        ++fp_count;
+        rv += std::to_string(i);
+        if (fp_count == count) {
+          break;
+        }
+        rv += ',';
+      }
+    }
+    return rv;
+  }
+
+  double FalsePositiveRate() {
+    char buffer[sizeof(int)];
+    int result = 0;
+    for (int i = 0; i < 10000; i++) {
+      if (Matches(Key(i + 1000000000, buffer))) {
+        result++;
+      }
+    }
+    return result / 10000.0;
+  }
+};
+
+TEST_P(FullBloomTest, FilterSize) {
+  // In addition to checking the consistency of space computation, we are
+  // checking that denoted and computed doubles are interpreted as expected
+  // as bits_per_key values.
+  bool some_computed_less_than_denoted = false;
+  // Note: to avoid unproductive configurations, bits_per_key < 0.5 is rounded
+  // down to 0 (no filter), and 0.5 <= bits_per_key < 1.0 is rounded up to 1
+  // bit per key (1000 millibits). Also, enforced maximum is 100 bits per key
+  // (100000 millibits).
+  for (auto bpk : std::vector<std::pair<double, int> >{{-HUGE_VAL, 0},
+                                                       {-INFINITY, 0},
+                                                       {0.0, 0},
+                                                       {0.499, 0},
+                                                       {0.5, 1000},
+                                                       {1.234, 1234},
+                                                       {3.456, 3456},
+                                                       {9.5, 9500},
+                                                       {10.0, 10000},
+                                                       {10.499, 10499},
+                                                       {21.345, 21345},
+                                                       {99.999, 99999},
+                                                       {1234.0, 100000},
+                                                       {HUGE_VAL, 100000},
+                                                       {INFINITY, 100000},
+                                                       {NAN, 100000}}) {
+    ResetPolicy(bpk.first);
+    auto bfp = GetBloomLikeFilterPolicy();
+    EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey());
+    EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey());
+
+    double computed = bpk.first;
+    // This transforms e.g. 9.5 -> 9.499999999999998, which we still
+    // round to 10 for whole bits per key.
+    computed += 0.5;
+    computed /= 1234567.0;
+    computed *= 1234567.0;
+    computed -= 0.5;
+    some_computed_less_than_denoted |= (computed < bpk.first);
+    ResetPolicy(computed);
+    bfp = GetBloomLikeFilterPolicy();
+    EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey());
+    EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey());
+
+    auto bits_builder = GetBuiltinFilterBitsBuilder();
+    if (bpk.second == 0) {
+      ASSERT_EQ(bits_builder, nullptr);
+      continue;
+    }
+
+    size_t n = 1;
+    size_t space = 0;
+    for (; n < 1000000; n += 1 + n / 1000) {
+      // Ensure consistency between CalculateSpace and ApproximateNumEntries
+      space = bits_builder->CalculateSpace(n);
+      size_t n2 = bits_builder->ApproximateNumEntries(space);
+      EXPECT_GE(n2, n);
+      size_t space2 = bits_builder->CalculateSpace(n2);
+      if (n > 12000 && GetParam() == kStandard128Ribbon) {
+        // TODO(peterd): better approximation?
+        EXPECT_GE(space2, space);
+        EXPECT_LE(space2 * 0.998, space * 1.0);
+      } else {
+        EXPECT_EQ(space2, space);
+      }
+    }
+    // Until size_t overflow
+    for (; n < (n + n / 3); n += n / 3) {
+      // Ensure space computation is not overflowing; capped is OK
+      size_t space2 = bits_builder->CalculateSpace(n);
+      EXPECT_GE(space2, space);
+      space = space2;
+    }
+  }
+  // Check that the compiler hasn't optimized our computation into nothing
+  EXPECT_TRUE(some_computed_less_than_denoted);
+  ResetPolicy();
+}
+
+TEST_P(FullBloomTest, FullEmptyFilter) {
+  // Empty filter is not match, at this level
+  ASSERT_TRUE(!Matches("hello"));
+  ASSERT_TRUE(!Matches("world"));
+}
+
+TEST_P(FullBloomTest, FullSmall) {
+  Add("hello");
+  Add("world");
+  ASSERT_TRUE(Matches("hello"));
+  ASSERT_TRUE(Matches("world"));
+  ASSERT_TRUE(!Matches("x"));
+  ASSERT_TRUE(!Matches("foo"));
+}
+
+TEST_P(FullBloomTest, FullVaryingLengths) {
+  char buffer[sizeof(int)];
+
+  // Count number of filters that significantly exceed the false positive rate
+  int mediocre_filters = 0;
+  int good_filters = 0;
+
+  for (int length = 1; length <= 10000; length = NextLength(length)) {
+    Reset();
+    for (int i = 0; i < length; i++) {
+      Add(Key(i, buffer));
+    }
+    Build();
+
+    EXPECT_LE(FilterSize(), (size_t)((length * FLAGS_bits_per_key / 8) +
+                                     CACHE_LINE_SIZE * 2 + 5));
+
+    // All added keys must match
+    for (int i = 0; i < length; i++) {
+      ASSERT_TRUE(Matches(Key(i, buffer)))
+          << "Length " << length << "; key " << i;
+    }
+
+    // Check false positive rate
+    double rate = FalsePositiveRate();
+    if (kVerbose >= 1) {
+      fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
+              rate * 100.0, length, static_cast<int>(FilterSize()));
+    }
+    if (FLAGS_bits_per_key == 10) {
+      EXPECT_LE(rate, 0.02);  // Must not be over 2%
+      if (rate > 0.0125) {
+        mediocre_filters++;  // Allowed, but not too often
+      } else {
+        good_filters++;
+      }
+    }
+  }
+  if (kVerbose >= 1) {
+    fprintf(stderr, "Filters: %d good, %d mediocre\n", good_filters,
+            mediocre_filters);
+  }
+  EXPECT_LE(mediocre_filters, good_filters / 5);
+}
+
+TEST_P(FullBloomTest, OptimizeForMemory) {
+  char buffer[sizeof(int)];
+  for (bool offm : {true, false}) {
+    table_options_.optimize_filters_for_memory = offm;
+    ResetPolicy();
+    Random32 rnd(12345);
+    uint64_t total_size = 0;
+    uint64_t total_mem = 0;
+    int64_t total_keys = 0;
+    double total_fp_rate = 0;
+    constexpr int nfilters = 100;
+    for (int i = 0; i < nfilters; ++i) {
+      int nkeys = static_cast<int>(rnd.Uniformish(10000)) + 100;
+      Reset();
+      for (int j = 0; j < nkeys; ++j) {
+        Add(Key(j, buffer));
+      }
+      Build();
+      size_t size = FilterData().size();
+      total_size += size;
+      // optimize_filters_for_memory currently depends on malloc_usable_size
+      // but we run the rest of the test to ensure no bad behavior without it.
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+      size = malloc_usable_size(const_cast<char*>(FilterData().data()));
+#endif  // ROCKSDB_MALLOC_USABLE_SIZE
+      total_mem += size;
+      total_keys += nkeys;
+      total_fp_rate += FalsePositiveRate();
+    }
+    if (FLAGS_bits_per_key == 10) {
+      EXPECT_LE(total_fp_rate / double{nfilters}, 0.011);
+      EXPECT_GE(total_fp_rate / double{nfilters},
+                CACHE_LINE_SIZE >= 256 ? 0.007 : 0.008);
+    }
+
+    int64_t ex_min_total_size = int64_t{FLAGS_bits_per_key} * total_keys / 8;
+    if (GetParam() == kStandard128Ribbon) {
+      // ~ 30% savings vs. Bloom filter
+      ex_min_total_size = 7 * ex_min_total_size / 10;
+    }
+    EXPECT_GE(static_cast<int64_t>(total_size), ex_min_total_size);
+
+    int64_t blocked_bloom_overhead = nfilters * (CACHE_LINE_SIZE + 5);
+    if (GetParam() == kLegacyBloom) {
+      // this config can add extra cache line to make odd number
+      blocked_bloom_overhead += nfilters * CACHE_LINE_SIZE;
+    }
+
+    EXPECT_GE(total_mem, total_size);
+
+    // optimize_filters_for_memory not implemented with legacy Bloom
+    if (offm && GetParam() != kLegacyBloom) {
+      // This value can include a small extra penalty for kExtraPadding
+      fprintf(stderr, "Internal fragmentation (optimized): %g%%\n",
+              (total_mem - total_size) * 100.0 / total_size);
+      // Less than 1% internal fragmentation
+      EXPECT_LE(total_mem, total_size * 101 / 100);
+      // Up to 2% storage penalty
+      EXPECT_LE(static_cast<int64_t>(total_size),
+                ex_min_total_size * 102 / 100 + blocked_bloom_overhead);
+    } else {
+      fprintf(stderr, "Internal fragmentation (not optimized): %g%%\n",
+              (total_mem - total_size) * 100.0 / total_size);
+      // TODO: add control checks for more allocators?
+#ifdef ROCKSDB_JEMALLOC
+      fprintf(stderr, "Jemalloc detected? %d\n", HasJemalloc());
+      if (HasJemalloc()) {
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+        // More than 5% internal fragmentation
+        EXPECT_GE(total_mem, total_size * 105 / 100);
+#endif  // ROCKSDB_MALLOC_USABLE_SIZE
+      }
+#endif  // ROCKSDB_JEMALLOC
+      // No storage penalty, just usual overhead
+      EXPECT_LE(static_cast<int64_t>(total_size),
+                ex_min_total_size + blocked_bloom_overhead);
+    }
+  }
+}
+
+class ChargeFilterConstructionTest : public testing::Test {};
+TEST_F(ChargeFilterConstructionTest, RibbonFilterFallBackOnLargeBanding) {
+  constexpr std::size_t kCacheCapacity =
+      8 * CacheReservationManagerImpl<
+              CacheEntryRole::kFilterConstruction>::GetDummyEntrySize();
+  constexpr std::size_t num_entries_for_cache_full = kCacheCapacity / 8;
+
+  for (CacheEntryRoleOptions::Decision charge_filter_construction_mem :
+       {CacheEntryRoleOptions::Decision::kEnabled,
+        CacheEntryRoleOptions::Decision::kDisabled}) {
+    bool will_fall_back = charge_filter_construction_mem ==
+                          CacheEntryRoleOptions::Decision::kEnabled;
+
+    BlockBasedTableOptions table_options;
+    table_options.cache_usage_options.options_overrides.insert(
+        {CacheEntryRole::kFilterConstruction,
+         {/*.charged = */ charge_filter_construction_mem}});
+    LRUCacheOptions lo;
+    lo.capacity = kCacheCapacity;
+    lo.num_shard_bits = 0;  // 2^0 shard
+    lo.strict_capacity_limit = true;
+    std::shared_ptr<Cache> cache(NewLRUCache(lo));
+    table_options.block_cache = cache;
+    table_options.filter_policy =
+        BloomLikeFilterPolicy::Create(kStandard128Ribbon, FLAGS_bits_per_key);
+    FilterBuildingContext ctx(table_options);
+    std::unique_ptr<FilterBitsBuilder> filter_bits_builder(
+        table_options.filter_policy->GetBuilderWithContext(ctx));
+
+    char key_buffer[sizeof(int)];
+    for (std::size_t i = 0; i < num_entries_for_cache_full; ++i) {
+      filter_bits_builder->AddKey(Key(static_cast<int>(i), key_buffer));
+    }
+
+    std::unique_ptr<const char[]> buf;
+    Slice filter = filter_bits_builder->Finish(&buf);
+
+    // To verify Ribbon Filter fallbacks to Bloom Filter properly
+    // based on cache charging result
+    // See BloomFilterPolicy::GetBloomBitsReader re: metadata
+    // -1 = Marker for newer Bloom implementations
+    // -2 = Marker for Standard128 Ribbon
+    if (will_fall_back) {
+      EXPECT_EQ(filter.data()[filter.size() - 5], static_cast<char>(-1));
+    } else {
+      EXPECT_EQ(filter.data()[filter.size() - 5], static_cast<char>(-2));
+    }
+
+    if (charge_filter_construction_mem ==
+        CacheEntryRoleOptions::Decision::kEnabled) {
+      const size_t dummy_entry_num = static_cast<std::size_t>(std::ceil(
+          filter.size() * 1.0 /
+          CacheReservationManagerImpl<
+              CacheEntryRole::kFilterConstruction>::GetDummyEntrySize()));
+      EXPECT_GE(
+          cache->GetPinnedUsage(),
+          dummy_entry_num *
+              CacheReservationManagerImpl<
+                  CacheEntryRole::kFilterConstruction>::GetDummyEntrySize());
+      EXPECT_LT(
+          cache->GetPinnedUsage(),
+          (dummy_entry_num + 1) *
+              CacheReservationManagerImpl<
+                  CacheEntryRole::kFilterConstruction>::GetDummyEntrySize());
+    } else {
+      EXPECT_EQ(cache->GetPinnedUsage(), 0);
+    }
+  }
+}
+
+namespace {
+inline uint32_t SelectByCacheLineSize(uint32_t for64, uint32_t for128,
+                                      uint32_t for256) {
+  (void)for64;
+  (void)for128;
+  (void)for256;
+#if CACHE_LINE_SIZE == 64
+  return for64;
+#elif CACHE_LINE_SIZE == 128
+  return for128;
+#elif CACHE_LINE_SIZE == 256
+  return for256;
+#else
+#error "CACHE_LINE_SIZE unknown or unrecognized"
+#endif
+}
+}  // namespace
+
+// Ensure the implementation doesn't accidentally change in an
+// incompatible way. This test doesn't check the reading side
+// (FirstFPs/PackedMatches) for LegacyBloom because it requires the
+// ability to read filters generated using other cache line sizes.
+// See RawSchema.
+TEST_P(FullBloomTest, Schema) {
+#define EXPECT_EQ_Bloom(a, b)               \
+  {                                         \
+    if (GetParam() != kStandard128Ribbon) { \
+      EXPECT_EQ(a, b);                      \
+    }                                       \
+  }
+#define EXPECT_EQ_Ribbon(a, b)              \
+  {                                         \
+    if (GetParam() == kStandard128Ribbon) { \
+      EXPECT_EQ(a, b);                      \
+    }                                       \
+  }
+#define EXPECT_EQ_FastBloom(a, b)        \
+  {                                      \
+    if (GetParam() == kFastLocalBloom) { \
+      EXPECT_EQ(a, b);                   \
+    }                                    \
+  }
+#define EXPECT_EQ_LegacyBloom(a, b)   \
+  {                                   \
+    if (GetParam() == kLegacyBloom) { \
+      EXPECT_EQ(a, b);                \
+    }                                 \
+  }
+#define EXPECT_EQ_NotLegacy(a, b)     \
+  {                                   \
+    if (GetParam() != kLegacyBloom) { \
+      EXPECT_EQ(a, b);                \
+    }                                 \
+  }
+
+  char buffer[sizeof(int)];
+
+  // First do a small number of keys, where Ribbon config will fall back on
+  // fast Bloom filter and generate the same data
+  ResetPolicy(5);  // num_probes = 3
+  for (int key = 0; key < 87; key++) {
+    Add(Key(key, buffer));
+  }
+  Build();
+  EXPECT_EQ(GetNumProbesFromFilterData(), 3);
+
+  EXPECT_EQ_NotLegacy(BloomHash(FilterData()), 4130687756U);
+
+  EXPECT_EQ_NotLegacy("31,38,40,43,61,83,86,112,125,131", FirstFPs(10));
+
+  // Now use enough keys so that changing bits / key by 1 is guaranteed to
+  // change number of allocated cache lines. So keys > max cache line bits.
+
+  // Note that the first attempted Ribbon seed is determined by the hash
+  // of the first key added (for pseudorandomness in practice, determinism in
+  // testing)
+
+  ResetPolicy(2);  // num_probes = 1
+  for (int key = 0; key < 2087; key++) {
+    Add(Key(key, buffer));
+  }
+  Build();
+  EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 1);
+  EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
+
+  EXPECT_EQ_LegacyBloom(
+      BloomHash(FilterData()),
+      SelectByCacheLineSize(1567096579, 1964771444, 2659542661U));
+  EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3817481309U);
+  EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1705851228U);
+
+  EXPECT_EQ_FastBloom("11,13,17,25,29,30,35,37,45,53", FirstFPs(10));
+  EXPECT_EQ_Ribbon("3,8,10,17,19,20,23,28,31,32", FirstFPs(10));
+
+  ResetPolicy(3);  // num_probes = 2
+  for (int key = 0; key < 2087; key++) {
+    Add(Key(key, buffer));
+  }
+  Build();
+  EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 2);
+  EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
+
+  EXPECT_EQ_LegacyBloom(
+      BloomHash(FilterData()),
+      SelectByCacheLineSize(2707206547U, 2571983456U, 218344685));
+  EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2807269961U);
+  EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1095342358U);
+
+  EXPECT_EQ_FastBloom("4,15,17,24,27,28,29,53,63,70", FirstFPs(10));
+  EXPECT_EQ_Ribbon("3,17,20,28,32,33,36,43,49,54", FirstFPs(10));
+
+  ResetPolicy(5);  // num_probes = 3
+  for (int key = 0; key < 2087; key++) {
+    Add(Key(key, buffer));
+  }
+  Build();
+  EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 3);
+  EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
+
+  EXPECT_EQ_LegacyBloom(
+      BloomHash(FilterData()),
+      SelectByCacheLineSize(515748486, 94611728, 2436112214U));
+  EXPECT_EQ_FastBloom(BloomHash(FilterData()), 204628445U);
+  EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3971337699U);
+
+  EXPECT_EQ_FastBloom("15,24,29,39,53,87,89,100,103,104", FirstFPs(10));
+  EXPECT_EQ_Ribbon("3,33,36,43,67,70,76,78,84,102", FirstFPs(10));
+
+  ResetPolicy(8);  // num_probes = 5
+  for (int key = 0; key < 2087; key++) {
+    Add(Key(key, buffer));
+  }
+  Build();
+  EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 5);
+  EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
+
+  EXPECT_EQ_LegacyBloom(
+      BloomHash(FilterData()),
+      SelectByCacheLineSize(1302145999, 2811644657U, 756553699));
+  EXPECT_EQ_FastBloom(BloomHash(FilterData()), 355564975U);
+  EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3651449053U);
+
+  EXPECT_EQ_FastBloom("16,60,66,126,220,238,244,256,265,287", FirstFPs(10));
+  EXPECT_EQ_Ribbon("33,187,203,296,300,322,411,419,547,582", FirstFPs(10));
+
+  ResetPolicy(9);  // num_probes = 6
+  for (int key = 0; key < 2087; key++) {
+    Add(Key(key, buffer));
+  }
+  Build();
+  EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
+  EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
+
+  EXPECT_EQ_LegacyBloom(
+      BloomHash(FilterData()),
+      SelectByCacheLineSize(2092755149, 661139132, 1182970461));
+  EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2137566013U);
+  EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1005676675U);
+
+  EXPECT_EQ_FastBloom("156,367,791,872,945,1015,1139,1159,1265", FirstFPs(9));
+  EXPECT_EQ_Ribbon("33,187,203,296,411,419,604,612,615,619", FirstFPs(10));
+
+  ResetPolicy(11);  // num_probes = 7
+  for (int key = 0; key < 2087; key++) {
+    Add(Key(key, buffer));
+  }
+  Build();
+  EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 7);
+  EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
+
+  EXPECT_EQ_LegacyBloom(
+      BloomHash(FilterData()),
+      SelectByCacheLineSize(3755609649U, 1812694762, 1449142939));
+  EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2561502687U);
+  EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3129900846U);
+
+  EXPECT_EQ_FastBloom("34,74,130,236,643,882,962,1015,1035,1110", FirstFPs(10));
+  EXPECT_EQ_Ribbon("411,419,623,665,727,794,955,1052,1323,1330", FirstFPs(10));
+
+  // This used to be 9 probes, but 8 is a better choice for speed,
+  // especially with SIMD groups of 8 probes, with essentially no
+  // change in FP rate.
+  // FP rate @ 9 probes, old Bloom: 0.4321%
+  // FP rate @ 9 probes, new Bloom: 0.1846%
+  // FP rate @ 8 probes, new Bloom: 0.1843%
+  ResetPolicy(14);  // num_probes = 8 (new), 9 (old)
+  for (int key = 0; key < 2087; key++) {
+    Add(Key(key, buffer));
+  }
+  Build();
+  EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 9);
+  EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 8);
+  EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
+
+  EXPECT_EQ_LegacyBloom(
+      BloomHash(FilterData()),
+      SelectByCacheLineSize(178861123, 379087593, 2574136516U));
+  EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3709876890U);
+  EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1855638875U);
+
+  EXPECT_EQ_FastBloom("130,240,522,565,989,2002,2526,3147,3543", FirstFPs(9));
+  EXPECT_EQ_Ribbon("665,727,1323,1755,3866,4232,4442,4492,4736", FirstFPs(9));
+
+  // This used to be 11 probes, but 9 is a better choice for speed
+  // AND accuracy.
+  // FP rate @ 11 probes, old Bloom: 0.3571%
+  // FP rate @ 11 probes, new Bloom: 0.0884%
+  // FP rate @  9 probes, new Bloom: 0.0843%
+  ResetPolicy(16);  // num_probes = 9 (new), 11 (old)
+  for (int key = 0; key < 2087; key++) {
+    Add(Key(key, buffer));
+  }
+  Build();
+  EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 11);
+  EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 9);
+  EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
+
+  EXPECT_EQ_LegacyBloom(
+      BloomHash(FilterData()),
+      SelectByCacheLineSize(1129406313, 3049154394U, 1727750964));
+  EXPECT_EQ_FastBloom(BloomHash(FilterData()), 1087138490U);
+  EXPECT_EQ_Ribbon(BloomHash(FilterData()), 459379967U);
+
+  EXPECT_EQ_FastBloom("3299,3611,3916,6620,7822,8079,8482,8942", FirstFPs(8));
+  EXPECT_EQ_Ribbon("727,1323,1755,4442,4736,5386,6974,7154,8222", FirstFPs(9));
+
+  ResetPolicy(10);  // num_probes = 6, but different memory ratio vs. 9
+  for (int key = 0; key < 2087; key++) {
+    Add(Key(key, buffer));
+  }
+  Build();
+  EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
+  EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
+
+  EXPECT_EQ_LegacyBloom(
+      BloomHash(FilterData()),
+      SelectByCacheLineSize(1478976371, 2910591341U, 1182970461));
+  EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2498541272U);
+  EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1273231667U);
+
+  EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9));
+  EXPECT_EQ_Ribbon("296,411,419,612,619,623,630,665,686,727", FirstFPs(10));
+
+  ResetPolicy(10);
+  for (int key = /*CHANGED*/ 1; key < 2087; key++) {
+    Add(Key(key, buffer));
+  }
+  Build();
+  EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
+  EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), /*CHANGED*/ 184);
+
+  EXPECT_EQ_LegacyBloom(
+      BloomHash(FilterData()),
+      SelectByCacheLineSize(4205696321U, 1132081253U, 2385981855U));
+  EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2058382345U);
+  EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3007790572U);
+
+  EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9));
+  EXPECT_EQ_Ribbon("33,152,383,497,589,633,737,781,911,990", FirstFPs(10));
+
+  ResetPolicy(10);
+  for (int key = 1; key < /*CHANGED*/ 2088; key++) {
+    Add(Key(key, buffer));
+  }
+  Build();
+  EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
+  EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184);
+
+  EXPECT_EQ_LegacyBloom(
+      BloomHash(FilterData()),
+      SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
+  EXPECT_EQ_FastBloom(BloomHash(FilterData()), 23699164U);
+  EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1942323379U);
+
+  EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9));
+  EXPECT_EQ_Ribbon("33,95,360,589,737,911,990,1048,1081,1414", FirstFPs(10));
+
+  // With new fractional bits_per_key, check that we are rounding to
+  // whole bits per key for old Bloom filters but fractional for
+  // new Bloom filter.
+  ResetPolicy(9.5);
+  for (int key = 1; key < 2088; key++) {
+    Add(Key(key, buffer));
+  }
+  Build();
+  EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
+  EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184);
+
+  EXPECT_EQ_LegacyBloom(
+      BloomHash(FilterData()),
+      /*SAME*/ SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
+  EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3166884174U);
+  EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1148258663U);
+
+  EXPECT_EQ_FastBloom("126,156,367,444,458,791,813,976,1015", FirstFPs(9));
+  EXPECT_EQ_Ribbon("33,54,95,360,589,693,737,911,990,1048", FirstFPs(10));
+
+  ResetPolicy(10.499);
+  for (int key = 1; key < 2088; key++) {
+    Add(Key(key, buffer));
+  }
+  Build();
+  EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 6);
+  EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 7);
+  EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184);
+
+  EXPECT_EQ_LegacyBloom(
+      BloomHash(FilterData()),
+      /*SAME*/ SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
+  EXPECT_EQ_FastBloom(BloomHash(FilterData()), 4098502778U);
+  EXPECT_EQ_Ribbon(BloomHash(FilterData()), 792138188U);
+
+  EXPECT_EQ_FastBloom("16,236,240,472,1015,1045,1111,1409,1465", FirstFPs(9));
+  EXPECT_EQ_Ribbon("33,95,360,589,737,990,1048,1081,1414,1643", FirstFPs(10));
+
+  ResetPolicy();
+}
+
+// A helper class for testing custom or corrupt filter bits as read by
+// built-in FilterBitsReaders.
+struct RawFilterTester {
+  // Buffer, from which we always return a tail Slice, so the
+  // last five bytes are always the metadata bytes.
+  std::array<char, 3000> data_;
+  // Points five bytes from the end
+  char* metadata_ptr_;
+
+  RawFilterTester() : metadata_ptr_(&*(data_.end() - 5)) {}
+
+  Slice ResetNoFill(uint32_t len_without_metadata, uint32_t num_lines,
+                    uint32_t num_probes) {
+    metadata_ptr_[0] = static_cast<char>(num_probes);
+    EncodeFixed32(metadata_ptr_ + 1, num_lines);
+    uint32_t len = len_without_metadata + /*metadata*/ 5;
+    assert(len <= data_.size());
+    return Slice(metadata_ptr_ - len_without_metadata, len);
+  }
+
+  Slice Reset(uint32_t len_without_metadata, uint32_t num_lines,
+              uint32_t num_probes, bool fill_ones) {
+    data_.fill(fill_ones ? 0xff : 0);
+    return ResetNoFill(len_without_metadata, num_lines, num_probes);
+  }
+
+  Slice ResetWeirdFill(uint32_t len_without_metadata, uint32_t num_lines,
+                       uint32_t num_probes) {
+    for (uint32_t i = 0; i < data_.size(); ++i) {
+      data_[i] = static_cast<char>(0x7b7b >> (i % 7));
+    }
+    return ResetNoFill(len_without_metadata, num_lines, num_probes);
+  }
+};
+
+TEST_P(FullBloomTest, RawSchema) {
+  RawFilterTester cft;
+  // Legacy Bloom configurations
+  // Two probes, about 3/4 bits set: ~50% "FP" rate
+  // One 256-byte cache line.
+  OpenRaw(cft.ResetWeirdFill(256, 1, 2));
+  EXPECT_EQ(uint64_t{11384799501900898790U}, PackedMatches());
+
+  // Two 128-byte cache lines.
+  OpenRaw(cft.ResetWeirdFill(256, 2, 2));
+  EXPECT_EQ(uint64_t{10157853359773492589U}, PackedMatches());
+
+  // Four 64-byte cache lines.
+  OpenRaw(cft.ResetWeirdFill(256, 4, 2));
+  EXPECT_EQ(uint64_t{7123594913907464682U}, PackedMatches());
+
+  // Fast local Bloom configurations (marker 255 -> -1)
+  // Two probes, about 3/4 bits set: ~50% "FP" rate
+  // Four 64-byte cache lines.
+  OpenRaw(cft.ResetWeirdFill(256, 2U << 8, 255));
+  EXPECT_EQ(uint64_t{9957045189927952471U}, PackedMatches());
+
+  // Ribbon configurations (marker 254 -> -2)
+
+  // Even though the builder never builds configurations this
+  // small (preferring Bloom), we can test that the configuration
+  // can be read, for possible future-proofing.
+
+  // 256 slots, one result column = 32 bytes (2 blocks, seed 0)
+  // ~50% FP rate:
+  // 0b0101010111110101010000110000011011011111100100001110010011101010
+  OpenRaw(cft.ResetWeirdFill(32, 2U << 8, 254));
+  EXPECT_EQ(uint64_t{6193930559317665002U}, PackedMatches());
+
+  // 256 slots, three-to-four result columns = 112 bytes
+  // ~ 1 in 10 FP rate:
+  // 0b0000000000100000000000000000000001000001000000010000101000000000
+  OpenRaw(cft.ResetWeirdFill(112, 2U << 8, 254));
+  EXPECT_EQ(uint64_t{9007200345328128U}, PackedMatches());
+}
+
+TEST_P(FullBloomTest, CorruptFilters) {
+  RawFilterTester cft;
+
+  for (bool fill : {false, true}) {
+    // Legacy Bloom configurations
+    // Good filter bits - returns same as fill
+    OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 6, fill));
+    ASSERT_EQ(fill, Matches("hello"));
+    ASSERT_EQ(fill, Matches("world"));
+
+    // Good filter bits - returns same as fill
+    OpenRaw(cft.Reset(CACHE_LINE_SIZE * 3, 3, 6, fill));
+    ASSERT_EQ(fill, Matches("hello"));
+    ASSERT_EQ(fill, Matches("world"));
+
+    // Good filter bits - returns same as fill
+    // 256 is unusual but legal cache line size
+    OpenRaw(cft.Reset(256 * 3, 3, 6, fill));
+    ASSERT_EQ(fill, Matches("hello"));
+    ASSERT_EQ(fill, Matches("world"));
+
+    // Good filter bits - returns same as fill
+    // 30 should be max num_probes
+    OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 30, fill));
+    ASSERT_EQ(fill, Matches("hello"));
+    ASSERT_EQ(fill, Matches("world"));
+
+    // Good filter bits - returns same as fill
+    // 1 should be min num_probes
+    OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 1, fill));
+    ASSERT_EQ(fill, Matches("hello"));
+    ASSERT_EQ(fill, Matches("world"));
+
+    // Type 1 trivial filter bits - returns true as if FP by zero probes
+    OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 0, fill));
+    ASSERT_TRUE(Matches("hello"));
+    ASSERT_TRUE(Matches("world"));
+
+    // Type 2 trivial filter bits - returns false as if built from zero keys
+    OpenRaw(cft.Reset(0, 0, 6, fill));
+    ASSERT_FALSE(Matches("hello"));
+    ASSERT_FALSE(Matches("world"));
+
+    // Type 2 trivial filter bits - returns false as if built from zero keys
+    OpenRaw(cft.Reset(0, 37, 6, fill));
+    ASSERT_FALSE(Matches("hello"));
+    ASSERT_FALSE(Matches("world"));
+
+    // Type 2 trivial filter bits - returns false as 0 size trumps 0 probes
+    OpenRaw(cft.Reset(0, 0, 0, fill));
+    ASSERT_FALSE(Matches("hello"));
+    ASSERT_FALSE(Matches("world"));
+
+    // Bad filter bits - returns true for safety
+    // No solution to 0 * x == CACHE_LINE_SIZE
+    OpenRaw(cft.Reset(CACHE_LINE_SIZE, 0, 6, fill));
+    ASSERT_TRUE(Matches("hello"));
+    ASSERT_TRUE(Matches("world"));
+
+    // Bad filter bits - returns true for safety
+    // Can't have 3 * x == 4 for integer x
+    OpenRaw(cft.Reset(4, 3, 6, fill));
+    ASSERT_TRUE(Matches("hello"));
+    ASSERT_TRUE(Matches("world"));
+
+    // Bad filter bits - returns true for safety
+    // 97 bytes is not a power of two, so not a legal cache line size
+    OpenRaw(cft.Reset(97 * 3, 3, 6, fill));
+    ASSERT_TRUE(Matches("hello"));
+    ASSERT_TRUE(Matches("world"));
+
+    // Bad filter bits - returns true for safety
+    // 65 bytes is not a power of two, so not a legal cache line size
+    OpenRaw(cft.Reset(65 * 3, 3, 6, fill));
+    ASSERT_TRUE(Matches("hello"));
+    ASSERT_TRUE(Matches("world"));
+
+    // Bad filter bits - returns false as if built from zero keys
+    // < 5 bytes overall means missing even metadata
+    OpenRaw(cft.Reset(static_cast<uint32_t>(-1), 3, 6, fill));
+    ASSERT_FALSE(Matches("hello"));
+    ASSERT_FALSE(Matches("world"));
+
+    OpenRaw(cft.Reset(static_cast<uint32_t>(-5), 3, 6, fill));
+    ASSERT_FALSE(Matches("hello"));
+    ASSERT_FALSE(Matches("world"));
+
+    // Dubious filter bits - returns same as fill (for now)
+    // 31 is not a useful num_probes, nor generated by RocksDB unless directly
+    // using filter bits API without BloomFilterPolicy.
+    OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 31, fill));
+    ASSERT_EQ(fill, Matches("hello"));
+    ASSERT_EQ(fill, Matches("world"));
+
+    // Dubious filter bits - returns same as fill (for now)
+    // Similar, with 127, largest positive char
+    OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 127, fill));
+    ASSERT_EQ(fill, Matches("hello"));
+    ASSERT_EQ(fill, Matches("world"));
+
+    // Dubious filter bits - returns true (for now)
+    // num_probes set to 128 / -128, lowest negative char
+    // NB: Bug in implementation interprets this as negative and has same
+    // effect as zero probes, but effectively reserves negative char values
+    // for future use.
+    OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 128, fill));
+    ASSERT_TRUE(Matches("hello"));
+    ASSERT_TRUE(Matches("world"));
+
+    // Dubious filter bits - returns true (for now)
+    // Similar, with 253 / -3
+    OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 253, fill));
+    ASSERT_TRUE(Matches("hello"));
+    ASSERT_TRUE(Matches("world"));
+
+    // #########################################################
+    // Fast local Bloom configurations (marker 255 -> -1)
+    // Good config with six probes
+    OpenRaw(cft.Reset(CACHE_LINE_SIZE, 6U << 8, 255, fill));
+    ASSERT_EQ(fill, Matches("hello"));
+    ASSERT_EQ(fill, Matches("world"));
+
+    // Becomes bad/reserved config (always true) if any other byte set
+    OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | 1U, 255, fill));
+    ASSERT_TRUE(Matches("hello"));
+    ASSERT_TRUE(Matches("world"));
+
+    OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | (1U << 16), 255, fill));
+    ASSERT_TRUE(Matches("hello"));
+    ASSERT_TRUE(Matches("world"));
+
+    OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | (1U << 24), 255, fill));
+    ASSERT_TRUE(Matches("hello"));
+    ASSERT_TRUE(Matches("world"));
+
+    // Good config, max 30 probes
+    OpenRaw(cft.Reset(CACHE_LINE_SIZE, 30U << 8, 255, fill));
+    ASSERT_EQ(fill, Matches("hello"));
+    ASSERT_EQ(fill, Matches("world"));
+
+    // Bad/reserved config (always true) if more than 30
+    OpenRaw(cft.Reset(CACHE_LINE_SIZE, 31U << 8, 255, fill));
+    ASSERT_TRUE(Matches("hello"));
+    ASSERT_TRUE(Matches("world"));
+
+    OpenRaw(cft.Reset(CACHE_LINE_SIZE, 33U << 8, 255, fill));
+    ASSERT_TRUE(Matches("hello"));
+    ASSERT_TRUE(Matches("world"));
+
+    OpenRaw(cft.Reset(CACHE_LINE_SIZE, 66U << 8, 255, fill));
+    ASSERT_TRUE(Matches("hello"));
+    ASSERT_TRUE(Matches("world"));
+
+    OpenRaw(cft.Reset(CACHE_LINE_SIZE, 130U << 8, 255, fill));
+    ASSERT_TRUE(Matches("hello"));
+    ASSERT_TRUE(Matches("world"));
+  }
+
+  // #########################################################
+  // Ribbon configurations (marker 254 -> -2)
+  // ("fill" doesn't work to detect good configurations, we just
+  // have to rely on TN probability)
+
+  // Good: 2 blocks * 16 bytes / segment * 4 columns = 128 bytes
+  // seed = 123
+  OpenRaw(cft.Reset(128, (2U << 8) + 123U, 254, false));
+  ASSERT_FALSE(Matches("hello"));
+  ASSERT_FALSE(Matches("world"));
+
+  // Good: 2 blocks * 16 bytes / segment * 8 columns = 256 bytes
+  OpenRaw(cft.Reset(256, (2U << 8) + 123U, 254, false));
+  ASSERT_FALSE(Matches("hello"));
+  ASSERT_FALSE(Matches("world"));
+
+  // Surprisingly OK: 5000 blocks (640,000 slots) in only 1024 bits
+  // -> average close to 0 columns
+  OpenRaw(cft.Reset(128, (5000U << 8) + 123U, 254, false));
+  // *Almost* all FPs
+  ASSERT_TRUE(Matches("hello"));
+  ASSERT_TRUE(Matches("world"));
+  // Need many queries to find a "true negative"
+  for (int i = 0; Matches(std::to_string(i)); ++i) {
+    ASSERT_LT(i, 1000);
+  }
+
+  // Bad: 1 block not allowed (for implementation detail reasons)
+  OpenRaw(cft.Reset(128, (1U << 8) + 123U, 254, false));
+  ASSERT_TRUE(Matches("hello"));
+  ASSERT_TRUE(Matches("world"));
+
+  // Bad: 0 blocks not allowed
+  OpenRaw(cft.Reset(128, (0U << 8) + 123U, 254, false));
+  ASSERT_TRUE(Matches("hello"));
+  ASSERT_TRUE(Matches("world"));
+}
+
+INSTANTIATE_TEST_CASE_P(Full, FullBloomTest,
+                        testing::Values(kLegacyBloom, kFastLocalBloom,
+                                        kStandard128Ribbon));
+
+static double GetEffectiveBitsPerKey(FilterBitsBuilder* builder) {
+  union {
+    uint64_t key_value = 0;
+    char key_bytes[8];
+  };
+
+  const unsigned kNumKeys = 1000;
+
+  Slice key_slice{key_bytes, 8};
+  for (key_value = 0; key_value < kNumKeys; ++key_value) {
+    builder->AddKey(key_slice);
+  }
+
+  std::unique_ptr<const char[]> buf;
+  auto filter = builder->Finish(&buf);
+  return filter.size() * /*bits per byte*/ 8 / (1.0 * kNumKeys);
+}
+
+static void SetTestingLevel(int levelish, FilterBuildingContext* ctx) {
+  if (levelish == -1) {
+    // Flush is treated as level -1 for this option but actually level 0
+    ctx->level_at_creation = 0;
+    ctx->reason = TableFileCreationReason::kFlush;
+  } else {
+    ctx->level_at_creation = levelish;
+    ctx->reason = TableFileCreationReason::kCompaction;
+  }
+}
+
+TEST(RibbonTest, RibbonTestLevelThreshold) {
+  BlockBasedTableOptions opts;
+  FilterBuildingContext ctx(opts);
+  // A few settings
+  for (CompactionStyle cs : {kCompactionStyleLevel, kCompactionStyleUniversal,
+                             kCompactionStyleFIFO, kCompactionStyleNone}) {
+    ctx.compaction_style = cs;
+    for (int bloom_before_level : {-1, 0, 1, 10}) {
+      std::vector<std::unique_ptr<const FilterPolicy> > policies;
+      policies.emplace_back(NewRibbonFilterPolicy(10, bloom_before_level));
+
+      if (bloom_before_level == 0) {
+        // Also test new API default
+        policies.emplace_back(NewRibbonFilterPolicy(10));
+      }
+
+      for (std::unique_ptr<const FilterPolicy>& policy : policies) {
+        // Claim to be generating filter for this level
+        SetTestingLevel(bloom_before_level, &ctx);
+
+        std::unique_ptr<FilterBitsBuilder> builder{
+            policy->GetBuilderWithContext(ctx)};
+
+        // Must be Ribbon (more space efficient than 10 bits per key)
+        ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8);
+
+        if (bloom_before_level >= 0) {
+          // Claim to be generating filter for previous level
+          SetTestingLevel(bloom_before_level - 1, &ctx);
+
+          builder.reset(policy->GetBuilderWithContext(ctx));
+
+          if (cs == kCompactionStyleLevel || cs == kCompactionStyleUniversal) {
+            // Level is considered.
+            // Must be Bloom (~ 10 bits per key)
+            ASSERT_GT(GetEffectiveBitsPerKey(builder.get()), 9);
+          } else {
+            // Level is ignored under non-traditional compaction styles.
+            // Must be Ribbon (more space efficient than 10 bits per key)
+            ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8);
+          }
+        }
+
+        // Like SST file writer
+        ctx.level_at_creation = -1;
+        ctx.reason = TableFileCreationReason::kMisc;
+
+        builder.reset(policy->GetBuilderWithContext(ctx));
+
+        // Must be Ribbon (more space efficient than 10 bits per key)
+        ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8);
+      }
+    }
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
+int main(int argc, char** argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+  ParseCommandLineFlags(&argc, &argv, true);
+
+  return RUN_ALL_TESTS();
+}
+
+#endif  // GFLAGS
diff --git a/src/rocksdb/util/build_version.cc.in b/src/rocksdb/util/build_version.cc.in
new file mode 100644
index 000000000..c1706dc1f
--- /dev/null
+++ b/src/rocksdb/util/build_version.cc.in
@@ -0,0 +1,81 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+
+#include <memory>
+
+#include "rocksdb/version.h"
+#include "rocksdb/utilities/object_registry.h"
+#include "util/string_util.h"
+
+// The build script may replace these values with real values based
+// on whether or not GIT is available and the platform settings
+static const std::string rocksdb_build_git_sha  = "rocksdb_build_git_sha:@GIT_SHA@";
+static const std::string rocksdb_build_git_tag = "rocksdb_build_git_tag:@GIT_TAG@";
+#define HAS_GIT_CHANGES @GIT_MOD@
+#if HAS_GIT_CHANGES == 0
+// If HAS_GIT_CHANGES is 0, the GIT date is used.
+// Use the time the branch/tag was last modified
+static const std::string rocksdb_build_date = "rocksdb_build_date:@GIT_DATE@";
+#else
+// If HAS_GIT_CHANGES is > 0, the branch/tag has modifications.
+// Use the time the build was created.
+static const std::string rocksdb_build_date = "rocksdb_build_date:@BUILD_DATE@";
+#endif
+
+#ifndef ROCKSDB_LITE
+extern "C" {
+@ROCKSDB_PLUGIN_EXTERNS@
+} // extern "C"
+
+std::unordered_map<std::string, ROCKSDB_NAMESPACE::RegistrarFunc> ROCKSDB_NAMESPACE::ObjectRegistry::builtins_ = {
+  @ROCKSDB_PLUGIN_BUILTINS@
+};
+#endif //ROCKSDB_LITE
+
+namespace ROCKSDB_NAMESPACE {
+static void AddProperty(std::unordered_map<std::string, std::string> *props, const std::string& name) {
+  size_t colon = name.find(":");
+  if (colon != std::string::npos && colon > 0 && colon < name.length() - 1) {
+    // If we found a "@:", then this property was a build-time substitution that failed.  Skip it
+    size_t at = name.find("@", colon);
+    if (at != colon + 1) {
+      // Everything before the colon is the name, after is the value
+      (*props)[name.substr(0, colon)] = name.substr(colon + 1);
+    }
+  }
+}
+
+static std::unordered_map<std::string, std::string>* LoadPropertiesSet() {
+  auto * properties = new std::unordered_map<std::string, std::string>();
+  AddProperty(properties, rocksdb_build_git_sha);
+  AddProperty(properties, rocksdb_build_git_tag);
+  AddProperty(properties, rocksdb_build_date);
+  return properties;
+}
+
+const std::unordered_map<std::string, std::string>& GetRocksBuildProperties() {
+  static std::unique_ptr<std::unordered_map<std::string, std::string>> props(LoadPropertiesSet());
+  return *props;
+}
+
+std::string GetRocksVersionAsString(bool with_patch) {
+  std::string version = std::to_string(ROCKSDB_MAJOR) + "." + std::to_string(ROCKSDB_MINOR);
+  if (with_patch) {
+    return version + "." + std::to_string(ROCKSDB_PATCH);
+  } else {
+    return version;
+ }
+}
+
+std::string GetRocksBuildInfoAsString(const std::string& program, bool verbose) {
+  std::string info = program + " (RocksDB) " + GetRocksVersionAsString(true);
+  if (verbose) {
+    for (const auto& it : GetRocksBuildProperties()) {
+      info.append("\n    ");
+      info.append(it.first);
+      info.append(": ");
+      info.append(it.second);
+    }
+  }
+  return info;
+}
+} // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/cast_util.h b/src/rocksdb/util/cast_util.h
new file mode 100644
index 000000000..c91b6ff1e
--- /dev/null
+++ b/src/rocksdb/util/cast_util.h
@@ -0,0 +1,42 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <type_traits>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+// The helper function to assert the move from dynamic_cast<> to
+// static_cast<> is correct. This function is to deal with legacy code.
+// It is not recommended to add new code to issue class casting. The preferred
+// solution is to implement the functionality without a need of casting.
+template <class DestClass, class SrcClass>
+inline DestClass* static_cast_with_check(SrcClass* x) {
+  DestClass* ret = static_cast<DestClass*>(x);
+#ifdef ROCKSDB_USE_RTTI
+  assert(ret == dynamic_cast<DestClass*>(x));
+#endif
+  return ret;
+}
+
+// A wrapper around static_cast for lossless conversion between integral
+// types, including enum types. For example, this can be used for converting
+// between signed/unsigned or enum type and underlying type without fear of
+// stripping away data, now or in the future.
+template <typename To, typename From>
+inline To lossless_cast(From x) {
+  using FromValue = typename std::remove_reference<From>::type;
+  static_assert(
+      std::is_integral<FromValue>::value || std::is_enum<FromValue>::value,
+      "Only works on integral types");
+  static_assert(std::is_integral<To>::value || std::is_enum<To>::value,
+                "Only works on integral types");
+  static_assert(sizeof(To) >= sizeof(FromValue), "Must be lossless");
+  return static_cast<To>(x);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/channel.h b/src/rocksdb/util/channel.h
new file mode 100644
index 000000000..19b956297
--- /dev/null
+++ b/src/rocksdb/util/channel.h
@@ -0,0 +1,69 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <condition_variable>
+#include <mutex>
+#include <queue>
+#include <utility>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+template <class T>
+class channel {
+ public:
+  explicit channel() : eof_(false) {}
+
+  channel(const channel&) = delete;
+  void operator=(const channel&) = delete;
+
+  void sendEof() {
+    std::lock_guard<std::mutex> lk(lock_);
+    eof_ = true;
+    cv_.notify_all();
+  }
+
+  bool eof() {
+    std::lock_guard<std::mutex> lk(lock_);
+    return buffer_.empty() && eof_;
+  }
+
+  size_t size() const {
+    std::lock_guard<std::mutex> lk(lock_);
+    return buffer_.size();
+  }
+
+  // writes elem to the queue
+  void write(T&& elem) {
+    std::unique_lock<std::mutex> lk(lock_);
+    buffer_.emplace(std::forward<T>(elem));
+    cv_.notify_one();
+  }
+
+  /// Moves a dequeued element onto elem, blocking until an element
+  /// is available.
+  // returns false if EOF
+  bool read(T& elem) {
+    std::unique_lock<std::mutex> lk(lock_);
+    cv_.wait(lk, [&] { return eof_ || !buffer_.empty(); });
+    if (eof_ && buffer_.empty()) {
+      return false;
+    }
+    elem = std::move(buffer_.front());
+    buffer_.pop();
+    cv_.notify_one();
+    return true;
+  }
+
+ private:
+  std::condition_variable cv_;
+  mutable std::mutex lock_;
+  std::queue<T> buffer_;
+  bool eof_;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/cleanable.cc b/src/rocksdb/util/cleanable.cc
new file mode 100644
index 000000000..89a7ab9be
--- /dev/null
+++ b/src/rocksdb/util/cleanable.cc
@@ -0,0 +1,181 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "rocksdb/cleanable.h"
+
+#include <atomic>
+#include <cassert>
+#include <utility>
+
+namespace ROCKSDB_NAMESPACE {
+
+Cleanable::Cleanable() {
+  cleanup_.function = nullptr;
+  cleanup_.next = nullptr;
+}
+
+Cleanable::~Cleanable() { DoCleanup(); }
+
+Cleanable::Cleanable(Cleanable&& other) noexcept { *this = std::move(other); }
+
+Cleanable& Cleanable::operator=(Cleanable&& other) noexcept {
+  assert(this != &other);  // https://stackoverflow.com/a/9322542/454544
+  cleanup_ = other.cleanup_;
+  other.cleanup_.function = nullptr;
+  other.cleanup_.next = nullptr;
+  return *this;
+}
+
+// If the entire linked list was on heap we could have simply add attach one
+// link list to another. However the head is an embeded object to avoid the cost
+// of creating objects for most of the use cases when the Cleanable has only one
+// Cleanup to do. We could put evernything on heap if benchmarks show no
+// negative impact on performance.
+// Also we need to iterate on the linked list since there is no pointer to the
+// tail. We can add the tail pointer but maintainin it might negatively impact
+// the perforamnce for the common case of one cleanup where tail pointer is not
+// needed. Again benchmarks could clarify that.
+// Even without a tail pointer we could iterate on the list, find the tail, and
+// have only that node updated without the need to insert the Cleanups one by
+// one. This however would be redundant when the source Cleanable has one or a
+// few Cleanups which is the case most of the time.
+// TODO(myabandeh): if the list is too long we should maintain a tail pointer
+// and have the entire list (minus the head that has to be inserted separately)
+// merged with the target linked list at once.
+void Cleanable::DelegateCleanupsTo(Cleanable* other) {
+  assert(other != nullptr);
+  if (cleanup_.function == nullptr) {
+    return;
+  }
+  Cleanup* c = &cleanup_;
+  other->RegisterCleanup(c->function, c->arg1, c->arg2);
+  c = c->next;
+  while (c != nullptr) {
+    Cleanup* next = c->next;
+    other->RegisterCleanup(c);
+    c = next;
+  }
+  cleanup_.function = nullptr;
+  cleanup_.next = nullptr;
+}
+
+void Cleanable::RegisterCleanup(Cleanable::Cleanup* c) {
+  assert(c != nullptr);
+  if (cleanup_.function == nullptr) {
+    cleanup_.function = c->function;
+    cleanup_.arg1 = c->arg1;
+    cleanup_.arg2 = c->arg2;
+    delete c;
+  } else {
+    c->next = cleanup_.next;
+    cleanup_.next = c;
+  }
+}
+
+void Cleanable::RegisterCleanup(CleanupFunction func, void* arg1, void* arg2) {
+  assert(func != nullptr);
+  Cleanup* c;
+  if (cleanup_.function == nullptr) {
+    c = &cleanup_;
+  } else {
+    c = new Cleanup;
+    c->next = cleanup_.next;
+    cleanup_.next = c;
+  }
+  c->function = func;
+  c->arg1 = arg1;
+  c->arg2 = arg2;
+}
+
+struct SharedCleanablePtr::Impl : public Cleanable {
+  std::atomic<unsigned> ref_count{1};  // Start with 1 ref
+  void Ref() { ref_count.fetch_add(1, std::memory_order_relaxed); }
+  void Unref() {
+    if (ref_count.fetch_sub(1, std::memory_order_relaxed) == 1) {
+      // Last ref
+      delete this;
+    }
+  }
+  static void UnrefWrapper(void* arg1, void* /*arg2*/) {
+    static_cast<SharedCleanablePtr::Impl*>(arg1)->Unref();
+  }
+};
+
+void SharedCleanablePtr::Reset() {
+  if (ptr_) {
+    ptr_->Unref();
+    ptr_ = nullptr;
+  }
+}
+
+void SharedCleanablePtr::Allocate() {
+  Reset();
+  ptr_ = new Impl();
+}
+
+SharedCleanablePtr::SharedCleanablePtr(const SharedCleanablePtr& from) {
+  *this = from;
+}
+
+SharedCleanablePtr::SharedCleanablePtr(SharedCleanablePtr&& from) noexcept {
+  *this = std::move(from);
+}
+
+SharedCleanablePtr& SharedCleanablePtr::operator=(
+    const SharedCleanablePtr& from) {
+  if (this != &from) {
+    Reset();
+    ptr_ = from.ptr_;
+    if (ptr_) {
+      ptr_->Ref();
+    }
+  }
+  return *this;
+}
+
+SharedCleanablePtr& SharedCleanablePtr::operator=(
+    SharedCleanablePtr&& from) noexcept {
+  assert(this != &from);  // https://stackoverflow.com/a/9322542/454544
+  Reset();
+  ptr_ = from.ptr_;
+  from.ptr_ = nullptr;
+  return *this;
+}
+
+SharedCleanablePtr::~SharedCleanablePtr() { Reset(); }
+
+Cleanable& SharedCleanablePtr::operator*() {
+  return *ptr_;  // implicit upcast
+}
+
+Cleanable* SharedCleanablePtr::operator->() {
+  return ptr_;  // implicit upcast
+}
+
+Cleanable* SharedCleanablePtr::get() {
+  return ptr_;  // implicit upcast
+}
+
+void SharedCleanablePtr::RegisterCopyWith(Cleanable* target) {
+  if (ptr_) {
+    // "Virtual" copy of the pointer
+    ptr_->Ref();
+    target->RegisterCleanup(&Impl::UnrefWrapper, ptr_, nullptr);
+  }
+}
+
+void SharedCleanablePtr::MoveAsCleanupTo(Cleanable* target) {
+  if (ptr_) {
+    // "Virtual" move of the pointer
+    target->RegisterCleanup(&Impl::UnrefWrapper, ptr_, nullptr);
+    ptr_ = nullptr;
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/coding.cc b/src/rocksdb/util/coding.cc
new file mode 100644
index 000000000..3da8afaa2
--- /dev/null
+++ b/src/rocksdb/util/coding.cc
@@ -0,0 +1,90 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/coding.h"
+
+#include <algorithm>
+
+#include "rocksdb/slice.h"
+#include "rocksdb/slice_transform.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// conversion' conversion from 'type1' to 'type2', possible loss of data
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4244)
+#endif
+char* EncodeVarint32(char* dst, uint32_t v) {
+  // Operate on characters as unsigneds
+  unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
+  static const int B = 128;
+  if (v < (1 << 7)) {
+    *(ptr++) = v;
+  } else if (v < (1 << 14)) {
+    *(ptr++) = v | B;
+    *(ptr++) = v >> 7;
+  } else if (v < (1 << 21)) {
+    *(ptr++) = v | B;
+    *(ptr++) = (v >> 7) | B;
+    *(ptr++) = v >> 14;
+  } else if (v < (1 << 28)) {
+    *(ptr++) = v | B;
+    *(ptr++) = (v >> 7) | B;
+    *(ptr++) = (v >> 14) | B;
+    *(ptr++) = v >> 21;
+  } else {
+    *(ptr++) = v | B;
+    *(ptr++) = (v >> 7) | B;
+    *(ptr++) = (v >> 14) | B;
+    *(ptr++) = (v >> 21) | B;
+    *(ptr++) = v >> 28;
+  }
+  return reinterpret_cast<char*>(ptr);
+}
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+const char* GetVarint32PtrFallback(const char* p, const char* limit,
+                                   uint32_t* value) {
+  uint32_t result = 0;
+  for (uint32_t shift = 0; shift <= 28 && p < limit; shift += 7) {
+    uint32_t byte = *(reinterpret_cast<const unsigned char*>(p));
+    p++;
+    if (byte & 128) {
+      // More bytes are present
+      result |= ((byte & 127) << shift);
+    } else {
+      result |= (byte << shift);
+      *value = result;
+      return reinterpret_cast<const char*>(p);
+    }
+  }
+  return nullptr;
+}
+
+const char* GetVarint64Ptr(const char* p, const char* limit, uint64_t* value) {
+  uint64_t result = 0;
+  for (uint32_t shift = 0; shift <= 63 && p < limit; shift += 7) {
+    uint64_t byte = *(reinterpret_cast<const unsigned char*>(p));
+    p++;
+    if (byte & 128) {
+      // More bytes are present
+      result |= ((byte & 127) << shift);
+    } else {
+      result |= (byte << shift);
+      *value = result;
+      return reinterpret_cast<const char*>(p);
+    }
+  }
+  return nullptr;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/coding.h b/src/rocksdb/util/coding.h
new file mode 100644
index 000000000..3168fd2fd
--- /dev/null
+++ b/src/rocksdb/util/coding.h
@@ -0,0 +1,389 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Encoding independent of machine byte order:
+// * Fixed-length numbers are encoded with least-significant byte first
+//   (little endian, native order on Intel and others)
+// * In addition we support variable length "varint" encoding
+// * Strings are encoded prefixed by their length in varint format
+//
+// Some related functions are provided in coding_lean.h
+
+#pragma once
+#include <algorithm>
+#include <string>
+
+#include "port/port.h"
+#include "rocksdb/slice.h"
+#include "util/coding_lean.h"
+
+// Some processors does not allow unaligned access to memory
+#if defined(__sparc)
+#define PLATFORM_UNALIGNED_ACCESS_NOT_ALLOWED
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+// The maximum length of a varint in bytes for 64-bit.
+const uint32_t kMaxVarint64Length = 10;
+
+// Standard Put... routines append to a string
+extern void PutFixed16(std::string* dst, uint16_t value);
+extern void PutFixed32(std::string* dst, uint32_t value);
+extern void PutFixed64(std::string* dst, uint64_t value);
+extern void PutVarint32(std::string* dst, uint32_t value);
+extern void PutVarint32Varint32(std::string* dst, uint32_t value1,
+                                uint32_t value2);
+extern void PutVarint32Varint32Varint32(std::string* dst, uint32_t value1,
+                                        uint32_t value2, uint32_t value3);
+extern void PutVarint64(std::string* dst, uint64_t value);
+extern void PutVarint64Varint64(std::string* dst, uint64_t value1,
+                                uint64_t value2);
+extern void PutVarint32Varint64(std::string* dst, uint32_t value1,
+                                uint64_t value2);
+extern void PutVarint32Varint32Varint64(std::string* dst, uint32_t value1,
+                                        uint32_t value2, uint64_t value3);
+extern void PutLengthPrefixedSlice(std::string* dst, const Slice& value);
+extern void PutLengthPrefixedSliceParts(std::string* dst,
+                                        const SliceParts& slice_parts);
+extern void PutLengthPrefixedSlicePartsWithPadding(
+    std::string* dst, const SliceParts& slice_parts, size_t pad_sz);
+
+// Standard Get... routines parse a value from the beginning of a Slice
+// and advance the slice past the parsed value.
+extern bool GetFixed64(Slice* input, uint64_t* value);
+extern bool GetFixed32(Slice* input, uint32_t* value);
+extern bool GetFixed16(Slice* input, uint16_t* value);
+extern bool GetVarint32(Slice* input, uint32_t* value);
+extern bool GetVarint64(Slice* input, uint64_t* value);
+extern bool GetVarsignedint64(Slice* input, int64_t* value);
+extern bool GetLengthPrefixedSlice(Slice* input, Slice* result);
+// This function assumes data is well-formed.
+extern Slice GetLengthPrefixedSlice(const char* data);
+
+extern Slice GetSliceUntil(Slice* slice, char delimiter);
+
+// Borrowed from
+// https://github.com/facebook/fbthrift/blob/449a5f77f9f9bae72c9eb5e78093247eef185c04/thrift/lib/cpp/util/VarintUtils-inl.h#L202-L208
+constexpr inline uint64_t i64ToZigzag(const int64_t l) {
+  return (static_cast<uint64_t>(l) << 1) ^ static_cast<uint64_t>(l >> 63);
+}
+inline int64_t zigzagToI64(uint64_t n) {
+  return (n >> 1) ^ -static_cast<int64_t>(n & 1);
+}
+
+// Pointer-based variants of GetVarint...  These either store a value
+// in *v and return a pointer just past the parsed value, or return
+// nullptr on error.  These routines only look at bytes in the range
+// [p..limit-1]
+extern const char* GetVarint32Ptr(const char* p, const char* limit,
+                                  uint32_t* v);
+extern const char* GetVarint64Ptr(const char* p, const char* limit,
+                                  uint64_t* v);
+inline const char* GetVarsignedint64Ptr(const char* p, const char* limit,
+                                        int64_t* value) {
+  uint64_t u = 0;
+  const char* ret = GetVarint64Ptr(p, limit, &u);
+  *value = zigzagToI64(u);
+  return ret;
+}
+
+// Returns the length of the varint32 or varint64 encoding of "v"
+extern int VarintLength(uint64_t v);
+
+// Lower-level versions of Put... that write directly into a character buffer
+// and return a pointer just past the last byte written.
+// REQUIRES: dst has enough space for the value being written
+extern char* EncodeVarint32(char* dst, uint32_t value);
+extern char* EncodeVarint64(char* dst, uint64_t value);
+
+// Internal routine for use by fallback path of GetVarint32Ptr
+extern const char* GetVarint32PtrFallback(const char* p, const char* limit,
+                                          uint32_t* value);
+inline const char* GetVarint32Ptr(const char* p, const char* limit,
+                                  uint32_t* value) {
+  if (p < limit) {
+    uint32_t result = *(reinterpret_cast<const unsigned char*>(p));
+    if ((result & 128) == 0) {
+      *value = result;
+      return p + 1;
+    }
+  }
+  return GetVarint32PtrFallback(p, limit, value);
+}
+
+// Pull the last 8 bits and cast it to a character
+inline void PutFixed16(std::string* dst, uint16_t value) {
+  if (port::kLittleEndian) {
+    dst->append(const_cast<const char*>(reinterpret_cast<char*>(&value)),
+                sizeof(value));
+  } else {
+    char buf[sizeof(value)];
+    EncodeFixed16(buf, value);
+    dst->append(buf, sizeof(buf));
+  }
+}
+
+inline void PutFixed32(std::string* dst, uint32_t value) {
+  if (port::kLittleEndian) {
+    dst->append(const_cast<const char*>(reinterpret_cast<char*>(&value)),
+                sizeof(value));
+  } else {
+    char buf[sizeof(value)];
+    EncodeFixed32(buf, value);
+    dst->append(buf, sizeof(buf));
+  }
+}
+
+inline void PutFixed64(std::string* dst, uint64_t value) {
+  if (port::kLittleEndian) {
+    dst->append(const_cast<const char*>(reinterpret_cast<char*>(&value)),
+                sizeof(value));
+  } else {
+    char buf[sizeof(value)];
+    EncodeFixed64(buf, value);
+    dst->append(buf, sizeof(buf));
+  }
+}
+
+inline void PutVarint32(std::string* dst, uint32_t v) {
+  char buf[5];
+  char* ptr = EncodeVarint32(buf, v);
+  dst->append(buf, static_cast<size_t>(ptr - buf));
+}
+
+inline void PutVarint32Varint32(std::string* dst, uint32_t v1, uint32_t v2) {
+  char buf[10];
+  char* ptr = EncodeVarint32(buf, v1);
+  ptr = EncodeVarint32(ptr, v2);
+  dst->append(buf, static_cast<size_t>(ptr - buf));
+}
+
+inline void PutVarint32Varint32Varint32(std::string* dst, uint32_t v1,
+                                        uint32_t v2, uint32_t v3) {
+  char buf[15];
+  char* ptr = EncodeVarint32(buf, v1);
+  ptr = EncodeVarint32(ptr, v2);
+  ptr = EncodeVarint32(ptr, v3);
+  dst->append(buf, static_cast<size_t>(ptr - buf));
+}
+
+inline char* EncodeVarint64(char* dst, uint64_t v) {
+  static const unsigned int B = 128;
+  unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
+  while (v >= B) {
+    *(ptr++) = (v & (B - 1)) | B;
+    v >>= 7;
+  }
+  *(ptr++) = static_cast<unsigned char>(v);
+  return reinterpret_cast<char*>(ptr);
+}
+
+inline void PutVarint64(std::string* dst, uint64_t v) {
+  char buf[kMaxVarint64Length];
+  char* ptr = EncodeVarint64(buf, v);
+  dst->append(buf, static_cast<size_t>(ptr - buf));
+}
+
+inline void PutVarsignedint64(std::string* dst, int64_t v) {
+  char buf[kMaxVarint64Length];
+  // Using Zigzag format to convert signed to unsigned
+  char* ptr = EncodeVarint64(buf, i64ToZigzag(v));
+  dst->append(buf, static_cast<size_t>(ptr - buf));
+}
+
+inline void PutVarint64Varint64(std::string* dst, uint64_t v1, uint64_t v2) {
+  char buf[20];
+  char* ptr = EncodeVarint64(buf, v1);
+  ptr = EncodeVarint64(ptr, v2);
+  dst->append(buf, static_cast<size_t>(ptr - buf));
+}
+
+inline void PutVarint32Varint64(std::string* dst, uint32_t v1, uint64_t v2) {
+  char buf[15];
+  char* ptr = EncodeVarint32(buf, v1);
+  ptr = EncodeVarint64(ptr, v2);
+  dst->append(buf, static_cast<size_t>(ptr - buf));
+}
+
+inline void PutVarint32Varint32Varint64(std::string* dst, uint32_t v1,
+                                        uint32_t v2, uint64_t v3) {
+  char buf[20];
+  char* ptr = EncodeVarint32(buf, v1);
+  ptr = EncodeVarint32(ptr, v2);
+  ptr = EncodeVarint64(ptr, v3);
+  dst->append(buf, static_cast<size_t>(ptr - buf));
+}
+
+inline void PutLengthPrefixedSlice(std::string* dst, const Slice& value) {
+  PutVarint32(dst, static_cast<uint32_t>(value.size()));
+  dst->append(value.data(), value.size());
+}
+
+inline void PutLengthPrefixedSliceParts(std::string* dst, size_t total_bytes,
+                                        const SliceParts& slice_parts) {
+  for (int i = 0; i < slice_parts.num_parts; ++i) {
+    total_bytes += slice_parts.parts[i].size();
+  }
+  PutVarint32(dst, static_cast<uint32_t>(total_bytes));
+  for (int i = 0; i < slice_parts.num_parts; ++i) {
+    dst->append(slice_parts.parts[i].data(), slice_parts.parts[i].size());
+  }
+}
+
+inline void PutLengthPrefixedSliceParts(std::string* dst,
+                                        const SliceParts& slice_parts) {
+  PutLengthPrefixedSliceParts(dst, /*total_bytes=*/0, slice_parts);
+}
+
+inline void PutLengthPrefixedSlicePartsWithPadding(
+    std::string* dst, const SliceParts& slice_parts, size_t pad_sz) {
+  PutLengthPrefixedSliceParts(dst, /*total_bytes=*/pad_sz, slice_parts);
+  dst->append(pad_sz, '\0');
+}
+
+inline int VarintLength(uint64_t v) {
+  int len = 1;
+  while (v >= 128) {
+    v >>= 7;
+    len++;
+  }
+  return len;
+}
+
+inline bool GetFixed64(Slice* input, uint64_t* value) {
+  if (input->size() < sizeof(uint64_t)) {
+    return false;
+  }
+  *value = DecodeFixed64(input->data());
+  input->remove_prefix(sizeof(uint64_t));
+  return true;
+}
+
+inline bool GetFixed32(Slice* input, uint32_t* value) {
+  if (input->size() < sizeof(uint32_t)) {
+    return false;
+  }
+  *value = DecodeFixed32(input->data());
+  input->remove_prefix(sizeof(uint32_t));
+  return true;
+}
+
+inline bool GetFixed16(Slice* input, uint16_t* value) {
+  if (input->size() < sizeof(uint16_t)) {
+    return false;
+  }
+  *value = DecodeFixed16(input->data());
+  input->remove_prefix(sizeof(uint16_t));
+  return true;
+}
+
+inline bool GetVarint32(Slice* input, uint32_t* value) {
+  const char* p = input->data();
+  const char* limit = p + input->size();
+  const char* q = GetVarint32Ptr(p, limit, value);
+  if (q == nullptr) {
+    return false;
+  } else {
+    *input = Slice(q, static_cast<size_t>(limit - q));
+    return true;
+  }
+}
+
+inline bool GetVarint64(Slice* input, uint64_t* value) {
+  const char* p = input->data();
+  const char* limit = p + input->size();
+  const char* q = GetVarint64Ptr(p, limit, value);
+  if (q == nullptr) {
+    return false;
+  } else {
+    *input = Slice(q, static_cast<size_t>(limit - q));
+    return true;
+  }
+}
+
+inline bool GetVarsignedint64(Slice* input, int64_t* value) {
+  const char* p = input->data();
+  const char* limit = p + input->size();
+  const char* q = GetVarsignedint64Ptr(p, limit, value);
+  if (q == nullptr) {
+    return false;
+  } else {
+    *input = Slice(q, static_cast<size_t>(limit - q));
+    return true;
+  }
+}
+
+inline bool GetLengthPrefixedSlice(Slice* input, Slice* result) {
+  uint32_t len = 0;
+  if (GetVarint32(input, &len) && input->size() >= len) {
+    *result = Slice(input->data(), len);
+    input->remove_prefix(len);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+inline Slice GetLengthPrefixedSlice(const char* data) {
+  uint32_t len = 0;
+  // +5: we assume "data" is not corrupted
+  // unsigned char is 7 bits, uint32_t is 32 bits, need 5 unsigned char
+  auto p = GetVarint32Ptr(data, data + 5 /* limit */, &len);
+  return Slice(p, len);
+}
+
+inline Slice GetSliceUntil(Slice* slice, char delimiter) {
+  uint32_t len = 0;
+  for (len = 0; len < slice->size() && slice->data()[len] != delimiter; ++len) {
+    // nothing
+  }
+
+  Slice ret(slice->data(), len);
+  slice->remove_prefix(len + ((len < slice->size()) ? 1 : 0));
+  return ret;
+}
+
+template <class T>
+#ifdef ROCKSDB_UBSAN_RUN
+#if defined(__clang__)
+__attribute__((__no_sanitize__("alignment")))
+#elif defined(__GNUC__)
+__attribute__((__no_sanitize_undefined__))
+#endif
+#endif
+inline void
+PutUnaligned(T* memory, const T& value) {
+#if defined(PLATFORM_UNALIGNED_ACCESS_NOT_ALLOWED)
+  char* nonAlignedMemory = reinterpret_cast<char*>(memory);
+  memcpy(nonAlignedMemory, reinterpret_cast<const char*>(&value), sizeof(T));
+#else
+  *memory = value;
+#endif
+}
+
+template <class T>
+#ifdef ROCKSDB_UBSAN_RUN
+#if defined(__clang__)
+__attribute__((__no_sanitize__("alignment")))
+#elif defined(__GNUC__)
+__attribute__((__no_sanitize_undefined__))
+#endif
+#endif
+inline void
+GetUnaligned(const T* memory, T* value) {
+#if defined(PLATFORM_UNALIGNED_ACCESS_NOT_ALLOWED)
+  char* nonAlignedMemory = reinterpret_cast<char*>(value);
+  memcpy(nonAlignedMemory, reinterpret_cast<const char*>(memory), sizeof(T));
+#else
+  *value = *memory;
+#endif
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/coding_lean.h b/src/rocksdb/util/coding_lean.h
new file mode 100644
index 000000000..6966f7a66
--- /dev/null
+++ b/src/rocksdb/util/coding_lean.h
@@ -0,0 +1,101 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+// Encoding independent of machine byte order:
+// * Fixed-length numbers are encoded with least-significant byte first
+//   (little endian, native order on Intel and others)
+//
+// More functions in coding.h
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+
+#include "port/port.h"  // for port::kLittleEndian
+
+namespace ROCKSDB_NAMESPACE {
+
+// Lower-level versions of Put... that write directly into a character buffer
+// REQUIRES: dst has enough space for the value being written
+// -- Implementation of the functions declared above
+inline void EncodeFixed16(char* buf, uint16_t value) {
+  if (port::kLittleEndian) {
+    memcpy(buf, &value, sizeof(value));
+  } else {
+    buf[0] = value & 0xff;
+    buf[1] = (value >> 8) & 0xff;
+  }
+}
+
+inline void EncodeFixed32(char* buf, uint32_t value) {
+  if (port::kLittleEndian) {
+    memcpy(buf, &value, sizeof(value));
+  } else {
+    buf[0] = value & 0xff;
+    buf[1] = (value >> 8) & 0xff;
+    buf[2] = (value >> 16) & 0xff;
+    buf[3] = (value >> 24) & 0xff;
+  }
+}
+
+inline void EncodeFixed64(char* buf, uint64_t value) {
+  if (port::kLittleEndian) {
+    memcpy(buf, &value, sizeof(value));
+  } else {
+    buf[0] = value & 0xff;
+    buf[1] = (value >> 8) & 0xff;
+    buf[2] = (value >> 16) & 0xff;
+    buf[3] = (value >> 24) & 0xff;
+    buf[4] = (value >> 32) & 0xff;
+    buf[5] = (value >> 40) & 0xff;
+    buf[6] = (value >> 48) & 0xff;
+    buf[7] = (value >> 56) & 0xff;
+  }
+}
+
+// Lower-level versions of Get... that read directly from a character buffer
+// without any bounds checking.
+
+inline uint16_t DecodeFixed16(const char* ptr) {
+  if (port::kLittleEndian) {
+    // Load the raw bytes
+    uint16_t result;
+    memcpy(&result, ptr, sizeof(result));  // gcc optimizes this to a plain load
+    return result;
+  } else {
+    return ((static_cast<uint16_t>(static_cast<unsigned char>(ptr[0]))) |
+            (static_cast<uint16_t>(static_cast<unsigned char>(ptr[1])) << 8));
+  }
+}
+
+inline uint32_t DecodeFixed32(const char* ptr) {
+  if (port::kLittleEndian) {
+    // Load the raw bytes
+    uint32_t result;
+    memcpy(&result, ptr, sizeof(result));  // gcc optimizes this to a plain load
+    return result;
+  } else {
+    return ((static_cast<uint32_t>(static_cast<unsigned char>(ptr[0]))) |
+            (static_cast<uint32_t>(static_cast<unsigned char>(ptr[1])) << 8) |
+            (static_cast<uint32_t>(static_cast<unsigned char>(ptr[2])) << 16) |
+            (static_cast<uint32_t>(static_cast<unsigned char>(ptr[3])) << 24));
+  }
+}
+
+inline uint64_t DecodeFixed64(const char* ptr) {
+  if (port::kLittleEndian) {
+    // Load the raw bytes
+    uint64_t result;
+    memcpy(&result, ptr, sizeof(result));  // gcc optimizes this to a plain load
+    return result;
+  } else {
+    uint64_t lo = DecodeFixed32(ptr);
+    uint64_t hi = DecodeFixed32(ptr + 4);
+    return (hi << 32) | lo;
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/coding_test.cc b/src/rocksdb/util/coding_test.cc
new file mode 100644
index 000000000..79dd7b82e
--- /dev/null
+++ b/src/rocksdb/util/coding_test.cc
@@ -0,0 +1,217 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/coding.h"
+
+#include "test_util/testharness.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Coding {};
+TEST(Coding, Fixed16) {
+  std::string s;
+  for (uint16_t v = 0; v < 0xFFFF; v++) {
+    PutFixed16(&s, v);
+  }
+
+  const char* p = s.data();
+  for (uint16_t v = 0; v < 0xFFFF; v++) {
+    uint16_t actual = DecodeFixed16(p);
+    ASSERT_EQ(v, actual);
+    p += sizeof(uint16_t);
+  }
+}
+
+TEST(Coding, Fixed32) {
+  std::string s;
+  for (uint32_t v = 0; v < 100000; v++) {
+    PutFixed32(&s, v);
+  }
+
+  const char* p = s.data();
+  for (uint32_t v = 0; v < 100000; v++) {
+    uint32_t actual = DecodeFixed32(p);
+    ASSERT_EQ(v, actual);
+    p += sizeof(uint32_t);
+  }
+}
+
+TEST(Coding, Fixed64) {
+  std::string s;
+  for (int power = 0; power <= 63; power++) {
+    uint64_t v = static_cast<uint64_t>(1) << power;
+    PutFixed64(&s, v - 1);
+    PutFixed64(&s, v + 0);
+    PutFixed64(&s, v + 1);
+  }
+
+  const char* p = s.data();
+  for (int power = 0; power <= 63; power++) {
+    uint64_t v = static_cast<uint64_t>(1) << power;
+    uint64_t actual = 0;
+    actual = DecodeFixed64(p);
+    ASSERT_EQ(v - 1, actual);
+    p += sizeof(uint64_t);
+
+    actual = DecodeFixed64(p);
+    ASSERT_EQ(v + 0, actual);
+    p += sizeof(uint64_t);
+
+    actual = DecodeFixed64(p);
+    ASSERT_EQ(v + 1, actual);
+    p += sizeof(uint64_t);
+  }
+}
+
+// Test that encoding routines generate little-endian encodings
+TEST(Coding, EncodingOutput) {
+  std::string dst;
+  PutFixed32(&dst, 0x04030201);
+  ASSERT_EQ(4U, dst.size());
+  ASSERT_EQ(0x01, static_cast<int>(dst[0]));
+  ASSERT_EQ(0x02, static_cast<int>(dst[1]));
+  ASSERT_EQ(0x03, static_cast<int>(dst[2]));
+  ASSERT_EQ(0x04, static_cast<int>(dst[3]));
+
+  dst.clear();
+  PutFixed64(&dst, 0x0807060504030201ull);
+  ASSERT_EQ(8U, dst.size());
+  ASSERT_EQ(0x01, static_cast<int>(dst[0]));
+  ASSERT_EQ(0x02, static_cast<int>(dst[1]));
+  ASSERT_EQ(0x03, static_cast<int>(dst[2]));
+  ASSERT_EQ(0x04, static_cast<int>(dst[3]));
+  ASSERT_EQ(0x05, static_cast<int>(dst[4]));
+  ASSERT_EQ(0x06, static_cast<int>(dst[5]));
+  ASSERT_EQ(0x07, static_cast<int>(dst[6]));
+  ASSERT_EQ(0x08, static_cast<int>(dst[7]));
+}
+
+TEST(Coding, Varint32) {
+  std::string s;
+  for (uint32_t i = 0; i < (32 * 32); i++) {
+    uint32_t v = (i / 32) << (i % 32);
+    PutVarint32(&s, v);
+  }
+
+  const char* p = s.data();
+  const char* limit = p + s.size();
+  for (uint32_t i = 0; i < (32 * 32); i++) {
+    uint32_t expected = (i / 32) << (i % 32);
+    uint32_t actual = 0;
+    const char* start = p;
+    p = GetVarint32Ptr(p, limit, &actual);
+    ASSERT_TRUE(p != nullptr);
+    ASSERT_EQ(expected, actual);
+    ASSERT_EQ(VarintLength(actual), p - start);
+  }
+  ASSERT_EQ(p, s.data() + s.size());
+}
+
+TEST(Coding, Varint64) {
+  // Construct the list of values to check
+  std::vector<uint64_t> values;
+  // Some special values
+  values.push_back(0);
+  values.push_back(100);
+  values.push_back(~static_cast<uint64_t>(0));
+  values.push_back(~static_cast<uint64_t>(0) - 1);
+  for (uint32_t k = 0; k < 64; k++) {
+    // Test values near powers of two
+    const uint64_t power = 1ull << k;
+    values.push_back(power);
+    values.push_back(power - 1);
+    values.push_back(power + 1);
+  };
+
+  std::string s;
+  for (unsigned int i = 0; i < values.size(); i++) {
+    PutVarint64(&s, values[i]);
+  }
+
+  const char* p = s.data();
+  const char* limit = p + s.size();
+  for (unsigned int i = 0; i < values.size(); i++) {
+    ASSERT_TRUE(p < limit);
+    uint64_t actual = 0;
+    const char* start = p;
+    p = GetVarint64Ptr(p, limit, &actual);
+    ASSERT_TRUE(p != nullptr);
+    ASSERT_EQ(values[i], actual);
+    ASSERT_EQ(VarintLength(actual), p - start);
+  }
+  ASSERT_EQ(p, limit);
+}
+
+TEST(Coding, Varint32Overflow) {
+  uint32_t result;
+  std::string input("\x81\x82\x83\x84\x85\x11");
+  ASSERT_TRUE(GetVarint32Ptr(input.data(), input.data() + input.size(),
+                             &result) == nullptr);
+}
+
+TEST(Coding, Varint32Truncation) {
+  uint32_t large_value = (1u << 31) + 100;
+  std::string s;
+  PutVarint32(&s, large_value);
+  uint32_t result;
+  for (unsigned int len = 0; len + 1 < s.size(); len++) {
+    ASSERT_TRUE(GetVarint32Ptr(s.data(), s.data() + len, &result) == nullptr);
+  }
+  ASSERT_TRUE(GetVarint32Ptr(s.data(), s.data() + s.size(), &result) !=
+              nullptr);
+  ASSERT_EQ(large_value, result);
+}
+
+TEST(Coding, Varint64Overflow) {
+  uint64_t result;
+  std::string input("\x81\x82\x83\x84\x85\x81\x82\x83\x84\x85\x11");
+  ASSERT_TRUE(GetVarint64Ptr(input.data(), input.data() + input.size(),
+                             &result) == nullptr);
+}
+
+TEST(Coding, Varint64Truncation) {
+  uint64_t large_value = (1ull << 63) + 100ull;
+  std::string s;
+  PutVarint64(&s, large_value);
+  uint64_t result;
+  for (unsigned int len = 0; len + 1 < s.size(); len++) {
+    ASSERT_TRUE(GetVarint64Ptr(s.data(), s.data() + len, &result) == nullptr);
+  }
+  ASSERT_TRUE(GetVarint64Ptr(s.data(), s.data() + s.size(), &result) !=
+              nullptr);
+  ASSERT_EQ(large_value, result);
+}
+
+TEST(Coding, Strings) {
+  std::string s;
+  PutLengthPrefixedSlice(&s, Slice(""));
+  PutLengthPrefixedSlice(&s, Slice("foo"));
+  PutLengthPrefixedSlice(&s, Slice("bar"));
+  PutLengthPrefixedSlice(&s, Slice(std::string(200, 'x')));
+
+  Slice input(s);
+  Slice v;
+  ASSERT_TRUE(GetLengthPrefixedSlice(&input, &v));
+  ASSERT_EQ("", v.ToString());
+  ASSERT_TRUE(GetLengthPrefixedSlice(&input, &v));
+  ASSERT_EQ("foo", v.ToString());
+  ASSERT_TRUE(GetLengthPrefixedSlice(&input, &v));
+  ASSERT_EQ("bar", v.ToString());
+  ASSERT_TRUE(GetLengthPrefixedSlice(&input, &v));
+  ASSERT_EQ(std::string(200, 'x'), v.ToString());
+  ASSERT_EQ("", input.ToString());
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
+int main(int argc, char** argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/src/rocksdb/util/compaction_job_stats_impl.cc b/src/rocksdb/util/compaction_job_stats_impl.cc
new file mode 100644
index 000000000..cfab2a4fe
--- /dev/null
+++ b/src/rocksdb/util/compaction_job_stats_impl.cc
@@ -0,0 +1,100 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/compaction_job_stats.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+#ifndef ROCKSDB_LITE
+
+void CompactionJobStats::Reset() {
+  elapsed_micros = 0;
+  cpu_micros = 0;
+
+  num_input_records = 0;
+  num_blobs_read = 0;
+  num_input_files = 0;
+  num_input_files_at_output_level = 0;
+
+  num_output_records = 0;
+  num_output_files = 0;
+  num_output_files_blob = 0;
+
+  is_full_compaction = false;
+  is_manual_compaction = false;
+
+  total_input_bytes = 0;
+  total_blob_bytes_read = 0;
+  total_output_bytes = 0;
+  total_output_bytes_blob = 0;
+
+  num_records_replaced = 0;
+
+  total_input_raw_key_bytes = 0;
+  total_input_raw_value_bytes = 0;
+
+  num_input_deletion_records = 0;
+  num_expired_deletion_records = 0;
+
+  num_corrupt_keys = 0;
+
+  file_write_nanos = 0;
+  file_range_sync_nanos = 0;
+  file_fsync_nanos = 0;
+  file_prepare_write_nanos = 0;
+
+  smallest_output_key_prefix.clear();
+  largest_output_key_prefix.clear();
+
+  num_single_del_fallthru = 0;
+  num_single_del_mismatch = 0;
+}
+
+void CompactionJobStats::Add(const CompactionJobStats& stats) {
+  elapsed_micros += stats.elapsed_micros;
+  cpu_micros += stats.cpu_micros;
+
+  num_input_records += stats.num_input_records;
+  num_blobs_read += stats.num_blobs_read;
+  num_input_files += stats.num_input_files;
+  num_input_files_at_output_level += stats.num_input_files_at_output_level;
+
+  num_output_records += stats.num_output_records;
+  num_output_files += stats.num_output_files;
+  num_output_files_blob += stats.num_output_files_blob;
+
+  total_input_bytes += stats.total_input_bytes;
+  total_blob_bytes_read += stats.total_blob_bytes_read;
+  total_output_bytes += stats.total_output_bytes;
+  total_output_bytes_blob += stats.total_output_bytes_blob;
+
+  num_records_replaced += stats.num_records_replaced;
+
+  total_input_raw_key_bytes += stats.total_input_raw_key_bytes;
+  total_input_raw_value_bytes += stats.total_input_raw_value_bytes;
+
+  num_input_deletion_records += stats.num_input_deletion_records;
+  num_expired_deletion_records += stats.num_expired_deletion_records;
+
+  num_corrupt_keys += stats.num_corrupt_keys;
+
+  file_write_nanos += stats.file_write_nanos;
+  file_range_sync_nanos += stats.file_range_sync_nanos;
+  file_fsync_nanos += stats.file_fsync_nanos;
+  file_prepare_write_nanos += stats.file_prepare_write_nanos;
+
+  num_single_del_fallthru += stats.num_single_del_fallthru;
+  num_single_del_mismatch += stats.num_single_del_mismatch;
+}
+
+#else
+
+void CompactionJobStats::Reset() {}
+
+void CompactionJobStats::Add(const CompactionJobStats& /*stats*/) {}
+
+#endif  // !ROCKSDB_LITE
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/comparator.cc b/src/rocksdb/util/comparator.cc
new file mode 100644
index 000000000..f85ed69ee
--- /dev/null
+++ b/src/rocksdb/util/comparator.cc
@@ -0,0 +1,391 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "rocksdb/comparator.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <memory>
+#include <mutex>
+#include <sstream>
+
+#include "db/dbformat.h"
+#include "port/lang.h"
+#include "port/port.h"
+#include "rocksdb/convenience.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/utilities/customizable_util.h"
+#include "rocksdb/utilities/object_registry.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+class BytewiseComparatorImpl : public Comparator {
+ public:
+  BytewiseComparatorImpl() {}
+  static const char* kClassName() { return "leveldb.BytewiseComparator"; }
+  const char* Name() const override { return kClassName(); }
+
+  int Compare(const Slice& a, const Slice& b) const override {
+    return a.compare(b);
+  }
+
+  bool Equal(const Slice& a, const Slice& b) const override { return a == b; }
+
+  void FindShortestSeparator(std::string* start,
+                             const Slice& limit) const override {
+    // Find length of common prefix
+    size_t min_length = std::min(start->size(), limit.size());
+    size_t diff_index = 0;
+    while ((diff_index < min_length) &&
+           ((*start)[diff_index] == limit[diff_index])) {
+      diff_index++;
+    }
+
+    if (diff_index >= min_length) {
+      // Do not shorten if one string is a prefix of the other
+    } else {
+      uint8_t start_byte = static_cast<uint8_t>((*start)[diff_index]);
+      uint8_t limit_byte = static_cast<uint8_t>(limit[diff_index]);
+      if (start_byte >= limit_byte) {
+        // Cannot shorten since limit is smaller than start or start is
+        // already the shortest possible.
+        return;
+      }
+      assert(start_byte < limit_byte);
+
+      if (diff_index < limit.size() - 1 || start_byte + 1 < limit_byte) {
+        (*start)[diff_index]++;
+        start->resize(diff_index + 1);
+      } else {
+        //     v
+        // A A 1 A A A
+        // A A 2
+        //
+        // Incrementing the current byte will make start bigger than limit, we
+        // will skip this byte, and find the first non 0xFF byte in start and
+        // increment it.
+        diff_index++;
+
+        while (diff_index < start->size()) {
+          // Keep moving until we find the first non 0xFF byte to
+          // increment it
+          if (static_cast<uint8_t>((*start)[diff_index]) <
+              static_cast<uint8_t>(0xff)) {
+            (*start)[diff_index]++;
+            start->resize(diff_index + 1);
+            break;
+          }
+          diff_index++;
+        }
+      }
+      assert(Compare(*start, limit) < 0);
+    }
+  }
+
+  void FindShortSuccessor(std::string* key) const override {
+    // Find first character that can be incremented
+    size_t n = key->size();
+    for (size_t i = 0; i < n; i++) {
+      const uint8_t byte = (*key)[i];
+      if (byte != static_cast<uint8_t>(0xff)) {
+        (*key)[i] = byte + 1;
+        key->resize(i + 1);
+        return;
+      }
+    }
+    // *key is a run of 0xffs.  Leave it alone.
+  }
+
+  bool IsSameLengthImmediateSuccessor(const Slice& s,
+                                      const Slice& t) const override {
+    if (s.size() != t.size() || s.size() == 0) {
+      return false;
+    }
+    size_t diff_ind = s.difference_offset(t);
+    // same slice
+    if (diff_ind >= s.size()) return false;
+    uint8_t byte_s = static_cast<uint8_t>(s[diff_ind]);
+    uint8_t byte_t = static_cast<uint8_t>(t[diff_ind]);
+    // first different byte must be consecutive, and remaining bytes must be
+    // 0xff for s and 0x00 for t
+    if (byte_s != uint8_t{0xff} && byte_s + 1 == byte_t) {
+      for (size_t i = diff_ind + 1; i < s.size(); ++i) {
+        byte_s = static_cast<uint8_t>(s[i]);
+        byte_t = static_cast<uint8_t>(t[i]);
+        if (byte_s != uint8_t{0xff} || byte_t != uint8_t{0x00}) {
+          return false;
+        }
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  bool CanKeysWithDifferentByteContentsBeEqual() const override {
+    return false;
+  }
+
+  using Comparator::CompareWithoutTimestamp;
+  int CompareWithoutTimestamp(const Slice& a, bool /*a_has_ts*/, const Slice& b,
+                              bool /*b_has_ts*/) const override {
+    return a.compare(b);
+  }
+
+  bool EqualWithoutTimestamp(const Slice& a, const Slice& b) const override {
+    return a == b;
+  }
+};
+
+class ReverseBytewiseComparatorImpl : public BytewiseComparatorImpl {
+ public:
+  ReverseBytewiseComparatorImpl() {}
+
+  static const char* kClassName() {
+    return "rocksdb.ReverseBytewiseComparator";
+  }
+  const char* Name() const override { return kClassName(); }
+
+  int Compare(const Slice& a, const Slice& b) const override {
+    return -a.compare(b);
+  }
+
+  void FindShortestSeparator(std::string* start,
+                             const Slice& limit) const override {
+    // Find length of common prefix
+    size_t min_length = std::min(start->size(), limit.size());
+    size_t diff_index = 0;
+    while ((diff_index < min_length) &&
+           ((*start)[diff_index] == limit[diff_index])) {
+      diff_index++;
+    }
+
+    assert(diff_index <= min_length);
+    if (diff_index == min_length) {
+      // Do not shorten if one string is a prefix of the other
+      //
+      // We could handle cases like:
+      //     V
+      // A A 2 X Y
+      // A A 2
+      // in a similar way as BytewiseComparator::FindShortestSeparator().
+      // We keep it simple by not implementing it. We can come back to it
+      // later when needed.
+    } else {
+      uint8_t start_byte = static_cast<uint8_t>((*start)[diff_index]);
+      uint8_t limit_byte = static_cast<uint8_t>(limit[diff_index]);
+      if (start_byte > limit_byte && diff_index < start->size() - 1) {
+        // Case like
+        //     V
+        // A A 3 A A
+        // A A 1 B B
+        //
+        // or
+        //     v
+        // A A 2 A A
+        // A A 1 B B
+        // In this case "AA2" will be good.
+#ifndef NDEBUG
+        std::string old_start = *start;
+#endif
+        start->resize(diff_index + 1);
+#ifndef NDEBUG
+        assert(old_start >= *start);
+#endif
+        assert(Slice(*start).compare(limit) > 0);
+      }
+    }
+  }
+
+  void FindShortSuccessor(std::string* /*key*/) const override {
+    // Don't do anything for simplicity.
+  }
+
+  bool IsSameLengthImmediateSuccessor(const Slice& s,
+                                      const Slice& t) const override {
+    // Always returning false to prevent surfacing design flaws in
+    // auto_prefix_mode
+    (void)s, (void)t;
+    return false;
+    // "Correct" implementation:
+    // return BytewiseComparatorImpl::IsSameLengthImmediateSuccessor(t, s);
+  }
+
+  bool CanKeysWithDifferentByteContentsBeEqual() const override {
+    return false;
+  }
+
+  using Comparator::CompareWithoutTimestamp;
+  int CompareWithoutTimestamp(const Slice& a, bool /*a_has_ts*/, const Slice& b,
+                              bool /*b_has_ts*/) const override {
+    return -a.compare(b);
+  }
+};
+
+// EXPERIMENTAL
+// Comparator with 64-bit integer timestamp.
+// We did not performance test this yet.
+template <typename TComparator>
+class ComparatorWithU64TsImpl : public Comparator {
+  static_assert(std::is_base_of<Comparator, TComparator>::value,
+                "template type must be a inherited type of comparator");
+
+ public:
+  explicit ComparatorWithU64TsImpl() : Comparator(/*ts_sz=*/sizeof(uint64_t)) {
+    assert(cmp_without_ts_.timestamp_size() == 0);
+  }
+
+  static const char* kClassName() {
+    static std::string class_name = kClassNameInternal();
+    return class_name.c_str();
+  }
+
+  const char* Name() const override { return kClassName(); }
+
+  void FindShortSuccessor(std::string*) const override {}
+  void FindShortestSeparator(std::string*, const Slice&) const override {}
+  int Compare(const Slice& a, const Slice& b) const override {
+    int ret = CompareWithoutTimestamp(a, b);
+    size_t ts_sz = timestamp_size();
+    if (ret != 0) {
+      return ret;
+    }
+    // Compare timestamp.
+    // For the same user key with different timestamps, larger (newer) timestamp
+    // comes first.
+    return -CompareTimestamp(ExtractTimestampFromUserKey(a, ts_sz),
+                             ExtractTimestampFromUserKey(b, ts_sz));
+  }
+  using Comparator::CompareWithoutTimestamp;
+  int CompareWithoutTimestamp(const Slice& a, bool a_has_ts, const Slice& b,
+                              bool b_has_ts) const override {
+    const size_t ts_sz = timestamp_size();
+    assert(!a_has_ts || a.size() >= ts_sz);
+    assert(!b_has_ts || b.size() >= ts_sz);
+    Slice lhs = a_has_ts ? StripTimestampFromUserKey(a, ts_sz) : a;
+    Slice rhs = b_has_ts ? StripTimestampFromUserKey(b, ts_sz) : b;
+    return cmp_without_ts_.Compare(lhs, rhs);
+  }
+  int CompareTimestamp(const Slice& ts1, const Slice& ts2) const override {
+    assert(ts1.size() == sizeof(uint64_t));
+    assert(ts2.size() == sizeof(uint64_t));
+    uint64_t lhs = DecodeFixed64(ts1.data());
+    uint64_t rhs = DecodeFixed64(ts2.data());
+    if (lhs < rhs) {
+      return -1;
+    } else if (lhs > rhs) {
+      return 1;
+    } else {
+      return 0;
+    }
+  }
+
+ private:
+  static std::string kClassNameInternal() {
+    std::stringstream ss;
+    ss << TComparator::kClassName() << ".u64ts";
+    return ss.str();
+  }
+
+  TComparator cmp_without_ts_;
+};
+
+}  // namespace
+
+const Comparator* BytewiseComparator() {
+  STATIC_AVOID_DESTRUCTION(BytewiseComparatorImpl, bytewise);
+  return &bytewise;
+}
+
+const Comparator* ReverseBytewiseComparator() {
+  STATIC_AVOID_DESTRUCTION(ReverseBytewiseComparatorImpl, rbytewise);
+  return &rbytewise;
+}
+
+const Comparator* BytewiseComparatorWithU64Ts() {
+  STATIC_AVOID_DESTRUCTION(ComparatorWithU64TsImpl<BytewiseComparatorImpl>,
+                           comp_with_u64_ts);
+  return &comp_with_u64_ts;
+}
+
+#ifndef ROCKSDB_LITE
+static int RegisterBuiltinComparators(ObjectLibrary& library,
+                                      const std::string& /*arg*/) {
+  library.AddFactory<const Comparator>(
+      BytewiseComparatorImpl::kClassName(),
+      [](const std::string& /*uri*/,
+         std::unique_ptr<const Comparator>* /*guard */,
+         std::string* /* errmsg */) { return BytewiseComparator(); });
+  library.AddFactory<const Comparator>(
+      ReverseBytewiseComparatorImpl::kClassName(),
+      [](const std::string& /*uri*/,
+         std::unique_ptr<const Comparator>* /*guard */,
+         std::string* /* errmsg */) { return ReverseBytewiseComparator(); });
+  library.AddFactory<const Comparator>(
+      ComparatorWithU64TsImpl<BytewiseComparatorImpl>::kClassName(),
+      [](const std::string& /*uri*/,
+         std::unique_ptr<const Comparator>* /*guard */,
+         std::string* /* errmsg */) { return BytewiseComparatorWithU64Ts(); });
+  return 3;
+}
+#endif  // ROCKSDB_LITE
+
+Status Comparator::CreateFromString(const ConfigOptions& config_options,
+                                    const std::string& value,
+                                    const Comparator** result) {
+#ifndef ROCKSDB_LITE
+  static std::once_flag once;
+  std::call_once(once, [&]() {
+    RegisterBuiltinComparators(*(ObjectLibrary::Default().get()), "");
+  });
+#endif  // ROCKSDB_LITE
+  std::string id;
+  std::unordered_map<std::string, std::string> opt_map;
+  Status status = Customizable::GetOptionsMap(config_options, *result, value,
+                                              &id, &opt_map);
+  if (!status.ok()) {  // GetOptionsMap failed
+    return status;
+  }
+  if (id == BytewiseComparatorImpl::kClassName()) {
+    *result = BytewiseComparator();
+  } else if (id == ReverseBytewiseComparatorImpl::kClassName()) {
+    *result = ReverseBytewiseComparator();
+  } else if (id ==
+             ComparatorWithU64TsImpl<BytewiseComparatorImpl>::kClassName()) {
+    *result = BytewiseComparatorWithU64Ts();
+  } else if (value.empty()) {
+    // No Id and no options.  Clear the object
+    *result = nullptr;
+    return Status::OK();
+  } else if (id.empty()) {  // We have no Id but have options.  Not good
+    return Status::NotSupported("Cannot reset object ", id);
+  } else {
+#ifndef ROCKSDB_LITE
+    status = config_options.registry->NewStaticObject(id, result);
+#else
+    status = Status::NotSupported("Cannot load object in LITE mode ", id);
+#endif  // ROCKSDB_LITE
+    if (!status.ok()) {
+      if (config_options.ignore_unsupported_options &&
+          status.IsNotSupported()) {
+        return Status::OK();
+      } else {
+        return status;
+      }
+    } else {
+      Comparator* comparator = const_cast<Comparator*>(*result);
+      status =
+          Customizable::ConfigureNewObject(config_options, comparator, opt_map);
+    }
+  }
+  return status;
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/compression.cc b/src/rocksdb/util/compression.cc
new file mode 100644
index 000000000..8e2f01b12
--- /dev/null
+++ b/src/rocksdb/util/compression.cc
@@ -0,0 +1,122 @@
+// Copyright (c) 2022-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "util/compression.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+StreamingCompress* StreamingCompress::Create(CompressionType compression_type,
+                                             const CompressionOptions& opts,
+                                             uint32_t compress_format_version,
+                                             size_t max_output_len) {
+  switch (compression_type) {
+    case kZSTD: {
+      if (!ZSTD_Streaming_Supported()) {
+        return nullptr;
+      }
+      return new ZSTDStreamingCompress(opts, compress_format_version,
+                                       max_output_len);
+    }
+    default:
+      return nullptr;
+  }
+}
+
+StreamingUncompress* StreamingUncompress::Create(
+    CompressionType compression_type, uint32_t compress_format_version,
+    size_t max_output_len) {
+  switch (compression_type) {
+    case kZSTD: {
+      if (!ZSTD_Streaming_Supported()) {
+        return nullptr;
+      }
+      return new ZSTDStreamingUncompress(compress_format_version,
+                                         max_output_len);
+    }
+    default:
+      return nullptr;
+  }
+}
+
+int ZSTDStreamingCompress::Compress(const char* input, size_t input_size,
+                                    char* output, size_t* output_pos) {
+  assert(input != nullptr && output != nullptr && output_pos != nullptr);
+  *output_pos = 0;
+  // Don't need to compress an empty input
+  if (input_size == 0) {
+    return 0;
+  }
+#ifndef ZSTD_STREAMING
+  (void)input;
+  (void)input_size;
+  (void)output;
+  return -1;
+#else
+  if (input_buffer_.src == nullptr || input_buffer_.src != input) {
+    // New input
+    // Catch errors where the previous input was not fully decompressed.
+    assert(input_buffer_.pos == input_buffer_.size);
+    input_buffer_ = {input, input_size, /*pos=*/0};
+  } else if (input_buffer_.src == input) {
+    // Same input, not fully compressed.
+  }
+  ZSTD_outBuffer output_buffer = {output, max_output_len_, /*pos=*/0};
+  const size_t remaining =
+      ZSTD_compressStream2(cctx_, &output_buffer, &input_buffer_, ZSTD_e_end);
+  if (ZSTD_isError(remaining)) {
+    // Failure
+    Reset();
+    return -1;
+  }
+  // Success
+  *output_pos = output_buffer.pos;
+  return (int)remaining;
+#endif
+}
+
+void ZSTDStreamingCompress::Reset() {
+#ifdef ZSTD_STREAMING
+  ZSTD_CCtx_reset(cctx_, ZSTD_ResetDirective::ZSTD_reset_session_only);
+  input_buffer_ = {/*src=*/nullptr, /*size=*/0, /*pos=*/0};
+#endif
+}
+
+int ZSTDStreamingUncompress::Uncompress(const char* input, size_t input_size,
+                                        char* output, size_t* output_pos) {
+  assert(input != nullptr && output != nullptr && output_pos != nullptr);
+  *output_pos = 0;
+  // Don't need to uncompress an empty input
+  if (input_size == 0) {
+    return 0;
+  }
+#ifdef ZSTD_STREAMING
+  if (input_buffer_.src != input) {
+    // New input
+    input_buffer_ = {input, input_size, /*pos=*/0};
+  }
+  ZSTD_outBuffer output_buffer = {output, max_output_len_, /*pos=*/0};
+  size_t ret = ZSTD_decompressStream(dctx_, &output_buffer, &input_buffer_);
+  if (ZSTD_isError(ret)) {
+    Reset();
+    return -1;
+  }
+  *output_pos = output_buffer.pos;
+  return (int)(input_buffer_.size - input_buffer_.pos);
+#else
+  (void)input;
+  (void)input_size;
+  (void)output;
+  return -1;
+#endif
+}
+
+void ZSTDStreamingUncompress::Reset() {
+#ifdef ZSTD_STREAMING
+  ZSTD_DCtx_reset(dctx_, ZSTD_ResetDirective::ZSTD_reset_session_only);
+  input_buffer_ = {/*src=*/nullptr, /*size=*/0, /*pos=*/0};
+#endif
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/compression.h b/src/rocksdb/util/compression.h
new file mode 100644
index 000000000..0d4febcfb
--- /dev/null
+++ b/src/rocksdb/util/compression.h
@@ -0,0 +1,1786 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+#pragma once
+
+#include <algorithm>
+#include <limits>
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+#ifdef OS_FREEBSD
+#include <malloc_np.h>
+#else  // OS_FREEBSD
+#include <malloc.h>
+#endif  // OS_FREEBSD
+#endif  // ROCKSDB_MALLOC_USABLE_SIZE
+#include <string>
+
+#include "memory/memory_allocator.h"
+#include "rocksdb/options.h"
+#include "rocksdb/table.h"
+#include "test_util/sync_point.h"
+#include "util/coding.h"
+#include "util/compression_context_cache.h"
+#include "util/string_util.h"
+
+#ifdef SNAPPY
+#include <snappy.h>
+#endif
+
+#ifdef ZLIB
+#include <zlib.h>
+#endif
+
+#ifdef BZIP2
+#include <bzlib.h>
+#endif
+
+#if defined(LZ4)
+#include <lz4.h>
+#include <lz4hc.h>
+#endif
+
+#if defined(ZSTD)
+#include <zstd.h>
+// v1.1.3+
+#if ZSTD_VERSION_NUMBER >= 10103
+#include <zdict.h>
+#endif  // ZSTD_VERSION_NUMBER >= 10103
+// v1.4.0+
+#if ZSTD_VERSION_NUMBER >= 10400
+#define ZSTD_STREAMING
+#endif  // ZSTD_VERSION_NUMBER >= 10400
+namespace ROCKSDB_NAMESPACE {
+// Need this for the context allocation override
+// On windows we need to do this explicitly
+#if (ZSTD_VERSION_NUMBER >= 500)
+#if defined(ROCKSDB_JEMALLOC) && defined(OS_WIN) && \
+    defined(ZSTD_STATIC_LINKING_ONLY)
+#define ROCKSDB_ZSTD_CUSTOM_MEM
+namespace port {
+ZSTD_customMem GetJeZstdAllocationOverrides();
+}  // namespace port
+#endif  // defined(ROCKSDB_JEMALLOC) && defined(OS_WIN) &&
+        // defined(ZSTD_STATIC_LINKING_ONLY)
+
+// We require `ZSTD_sizeof_DDict` and `ZSTD_createDDict_byReference` to use
+// `ZSTD_DDict`. The former was introduced in v1.0.0 and the latter was
+// introduced in v1.1.3. But an important bug fix for `ZSTD_sizeof_DDict` came
+// in v1.1.4, so that is the version we require. As of today's latest version
+// (v1.3.8), they are both still in the experimental API, which means they are
+// only exported when the compiler flag `ZSTD_STATIC_LINKING_ONLY` is set.
+#if defined(ZSTD_STATIC_LINKING_ONLY) && ZSTD_VERSION_NUMBER >= 10104
+#define ROCKSDB_ZSTD_DDICT
+#endif  // defined(ZSTD_STATIC_LINKING_ONLY) && ZSTD_VERSION_NUMBER >= 10104
+
+// Cached data represents a portion that can be re-used
+// If, in the future we have more than one native context to
+// cache we can arrange this as a tuple
+class ZSTDUncompressCachedData {
+ public:
+  using ZSTDNativeContext = ZSTD_DCtx*;
+  ZSTDUncompressCachedData() {}
+  // Init from cache
+  ZSTDUncompressCachedData(const ZSTDUncompressCachedData& o) = delete;
+  ZSTDUncompressCachedData& operator=(const ZSTDUncompressCachedData&) = delete;
+  ZSTDUncompressCachedData(ZSTDUncompressCachedData&& o) noexcept
+      : ZSTDUncompressCachedData() {
+    *this = std::move(o);
+  }
+  ZSTDUncompressCachedData& operator=(ZSTDUncompressCachedData&& o) noexcept {
+    assert(zstd_ctx_ == nullptr);
+    std::swap(zstd_ctx_, o.zstd_ctx_);
+    std::swap(cache_idx_, o.cache_idx_);
+    return *this;
+  }
+  ZSTDNativeContext Get() const { return zstd_ctx_; }
+  int64_t GetCacheIndex() const { return cache_idx_; }
+  void CreateIfNeeded() {
+    if (zstd_ctx_ == nullptr) {
+#ifdef ROCKSDB_ZSTD_CUSTOM_MEM
+      zstd_ctx_ =
+          ZSTD_createDCtx_advanced(port::GetJeZstdAllocationOverrides());
+#else   // ROCKSDB_ZSTD_CUSTOM_MEM
+      zstd_ctx_ = ZSTD_createDCtx();
+#endif  // ROCKSDB_ZSTD_CUSTOM_MEM
+      cache_idx_ = -1;
+    }
+  }
+  void InitFromCache(const ZSTDUncompressCachedData& o, int64_t idx) {
+    zstd_ctx_ = o.zstd_ctx_;
+    cache_idx_ = idx;
+  }
+  ~ZSTDUncompressCachedData() {
+    if (zstd_ctx_ != nullptr && cache_idx_ == -1) {
+      ZSTD_freeDCtx(zstd_ctx_);
+    }
+  }
+
+ private:
+  ZSTDNativeContext zstd_ctx_ = nullptr;
+  int64_t cache_idx_ = -1;  // -1 means this instance owns the context
+};
+#endif  // (ZSTD_VERSION_NUMBER >= 500)
+}  // namespace ROCKSDB_NAMESPACE
+#endif  // ZSTD
+
+#if !(defined ZSTD) || !(ZSTD_VERSION_NUMBER >= 500)
+namespace ROCKSDB_NAMESPACE {
+class ZSTDUncompressCachedData {
+  void* padding;  // unused
+ public:
+  using ZSTDNativeContext = void*;
+  ZSTDUncompressCachedData() {}
+  ZSTDUncompressCachedData(const ZSTDUncompressCachedData&) {}
+  ZSTDUncompressCachedData& operator=(const ZSTDUncompressCachedData&) = delete;
+  ZSTDUncompressCachedData(ZSTDUncompressCachedData&&) noexcept = default;
+  ZSTDUncompressCachedData& operator=(ZSTDUncompressCachedData&&) noexcept =
+      default;
+  ZSTDNativeContext Get() const { return nullptr; }
+  int64_t GetCacheIndex() const { return -1; }
+  void CreateIfNeeded() {}
+  void InitFromCache(const ZSTDUncompressCachedData&, int64_t) {}
+
+ private:
+  void ignore_padding__() { padding = nullptr; }
+};
+}  // namespace ROCKSDB_NAMESPACE
+#endif
+
+#if defined(XPRESS)
+#include "port/xpress.h"
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+// Holds dictionary and related data, like ZSTD's digested compression
+// dictionary.
+struct CompressionDict {
+#if ZSTD_VERSION_NUMBER >= 700
+  ZSTD_CDict* zstd_cdict_ = nullptr;
+#endif  // ZSTD_VERSION_NUMBER >= 700
+  std::string dict_;
+
+ public:
+#if ZSTD_VERSION_NUMBER >= 700
+  CompressionDict(std::string dict, CompressionType type, int level) {
+#else   // ZSTD_VERSION_NUMBER >= 700
+  CompressionDict(std::string dict, CompressionType /*type*/, int /*level*/) {
+#endif  // ZSTD_VERSION_NUMBER >= 700
+    dict_ = std::move(dict);
+#if ZSTD_VERSION_NUMBER >= 700
+    zstd_cdict_ = nullptr;
+    if (!dict_.empty() && (type == kZSTD || type == kZSTDNotFinalCompression)) {
+      if (level == CompressionOptions::kDefaultCompressionLevel) {
+        // 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see
+        // https://github.com/facebook/zstd/issues/1148
+        level = 3;
+      }
+      // Should be safe (but slower) if below call fails as we'll use the
+      // raw dictionary to compress.
+      zstd_cdict_ = ZSTD_createCDict(dict_.data(), dict_.size(), level);
+      assert(zstd_cdict_ != nullptr);
+    }
+#endif  // ZSTD_VERSION_NUMBER >= 700
+  }
+
+  ~CompressionDict() {
+#if ZSTD_VERSION_NUMBER >= 700
+    size_t res = 0;
+    if (zstd_cdict_ != nullptr) {
+      res = ZSTD_freeCDict(zstd_cdict_);
+    }
+    assert(res == 0);  // Last I checked they can't fail
+    (void)res;         // prevent unused var warning
+#endif                 // ZSTD_VERSION_NUMBER >= 700
+  }
+
+#if ZSTD_VERSION_NUMBER >= 700
+  const ZSTD_CDict* GetDigestedZstdCDict() const { return zstd_cdict_; }
+#endif  // ZSTD_VERSION_NUMBER >= 700
+
+  Slice GetRawDict() const { return dict_; }
+
+  static const CompressionDict& GetEmptyDict() {
+    static CompressionDict empty_dict{};
+    return empty_dict;
+  }
+
+  CompressionDict() = default;
+  // Disable copy/move
+  CompressionDict(const CompressionDict&) = delete;
+  CompressionDict& operator=(const CompressionDict&) = delete;
+  CompressionDict(CompressionDict&&) = delete;
+  CompressionDict& operator=(CompressionDict&&) = delete;
+};
+
+// Holds dictionary and related data, like ZSTD's digested uncompression
+// dictionary.
+struct UncompressionDict {
+  // Block containing the data for the compression dictionary in case the
+  // constructor that takes a string parameter is used.
+  std::string dict_;
+
+  // Block containing the data for the compression dictionary in case the
+  // constructor that takes a Slice parameter is used and the passed in
+  // CacheAllocationPtr is not nullptr.
+  CacheAllocationPtr allocation_;
+
+  // Slice pointing to the compression dictionary data. Can point to
+  // dict_, allocation_, or some other memory location, depending on how
+  // the object was constructed.
+  Slice slice_;
+
+#ifdef ROCKSDB_ZSTD_DDICT
+  // Processed version of the contents of slice_ for ZSTD compression.
+  ZSTD_DDict* zstd_ddict_ = nullptr;
+#endif  // ROCKSDB_ZSTD_DDICT
+
+#ifdef ROCKSDB_ZSTD_DDICT
+  UncompressionDict(std::string dict, bool using_zstd)
+#else   // ROCKSDB_ZSTD_DDICT
+  UncompressionDict(std::string dict, bool /* using_zstd */)
+#endif  // ROCKSDB_ZSTD_DDICT
+      : dict_(std::move(dict)), slice_(dict_) {
+#ifdef ROCKSDB_ZSTD_DDICT
+    if (!slice_.empty() && using_zstd) {
+      zstd_ddict_ = ZSTD_createDDict_byReference(slice_.data(), slice_.size());
+      assert(zstd_ddict_ != nullptr);
+    }
+#endif  // ROCKSDB_ZSTD_DDICT
+  }
+
+#ifdef ROCKSDB_ZSTD_DDICT
+  UncompressionDict(Slice slice, CacheAllocationPtr&& allocation,
+                    bool using_zstd)
+#else   // ROCKSDB_ZSTD_DDICT
+  UncompressionDict(Slice slice, CacheAllocationPtr&& allocation,
+                    bool /* using_zstd */)
+#endif  // ROCKSDB_ZSTD_DDICT
+      : allocation_(std::move(allocation)), slice_(std::move(slice)) {
+#ifdef ROCKSDB_ZSTD_DDICT
+    if (!slice_.empty() && using_zstd) {
+      zstd_ddict_ = ZSTD_createDDict_byReference(slice_.data(), slice_.size());
+      assert(zstd_ddict_ != nullptr);
+    }
+#endif  // ROCKSDB_ZSTD_DDICT
+  }
+
+  UncompressionDict(UncompressionDict&& rhs)
+      : dict_(std::move(rhs.dict_)),
+        allocation_(std::move(rhs.allocation_)),
+        slice_(std::move(rhs.slice_))
+#ifdef ROCKSDB_ZSTD_DDICT
+        ,
+        zstd_ddict_(rhs.zstd_ddict_)
+#endif
+  {
+#ifdef ROCKSDB_ZSTD_DDICT
+    rhs.zstd_ddict_ = nullptr;
+#endif
+  }
+
+  ~UncompressionDict() {
+#ifdef ROCKSDB_ZSTD_DDICT
+    size_t res = 0;
+    if (zstd_ddict_ != nullptr) {
+      res = ZSTD_freeDDict(zstd_ddict_);
+    }
+    assert(res == 0);  // Last I checked they can't fail
+    (void)res;         // prevent unused var warning
+#endif                 // ROCKSDB_ZSTD_DDICT
+  }
+
+  UncompressionDict& operator=(UncompressionDict&& rhs) {
+    if (this == &rhs) {
+      return *this;
+    }
+
+    dict_ = std::move(rhs.dict_);
+    allocation_ = std::move(rhs.allocation_);
+    slice_ = std::move(rhs.slice_);
+
+#ifdef ROCKSDB_ZSTD_DDICT
+    zstd_ddict_ = rhs.zstd_ddict_;
+    rhs.zstd_ddict_ = nullptr;
+#endif
+
+    return *this;
+  }
+
+  // The object is self-contained if the string constructor is used, or the
+  // Slice constructor is invoked with a non-null allocation. Otherwise, it
+  // is the caller's responsibility to ensure that the underlying storage
+  // outlives this object.
+  bool own_bytes() const { return !dict_.empty() || allocation_; }
+
+  const Slice& GetRawDict() const { return slice_; }
+
+#ifdef ROCKSDB_ZSTD_DDICT
+  const ZSTD_DDict* GetDigestedZstdDDict() const { return zstd_ddict_; }
+#endif  // ROCKSDB_ZSTD_DDICT
+
+  static const UncompressionDict& GetEmptyDict() {
+    static UncompressionDict empty_dict{};
+    return empty_dict;
+  }
+
+  size_t ApproximateMemoryUsage() const {
+    size_t usage = sizeof(struct UncompressionDict);
+    usage += dict_.size();
+    if (allocation_) {
+      auto allocator = allocation_.get_deleter().allocator;
+      if (allocator) {
+        usage += allocator->UsableSize(allocation_.get(), slice_.size());
+      } else {
+        usage += slice_.size();
+      }
+    }
+#ifdef ROCKSDB_ZSTD_DDICT
+    usage += ZSTD_sizeof_DDict(zstd_ddict_);
+#endif  // ROCKSDB_ZSTD_DDICT
+    return usage;
+  }
+
+  UncompressionDict() = default;
+  // Disable copy
+  UncompressionDict(const CompressionDict&) = delete;
+  UncompressionDict& operator=(const CompressionDict&) = delete;
+};
+
+class CompressionContext {
+ private:
+#if defined(ZSTD) && (ZSTD_VERSION_NUMBER >= 500)
+  ZSTD_CCtx* zstd_ctx_ = nullptr;
+  void CreateNativeContext(CompressionType type) {
+    if (type == kZSTD || type == kZSTDNotFinalCompression) {
+#ifdef ROCKSDB_ZSTD_CUSTOM_MEM
+      zstd_ctx_ =
+          ZSTD_createCCtx_advanced(port::GetJeZstdAllocationOverrides());
+#else   // ROCKSDB_ZSTD_CUSTOM_MEM
+      zstd_ctx_ = ZSTD_createCCtx();
+#endif  // ROCKSDB_ZSTD_CUSTOM_MEM
+    }
+  }
+  void DestroyNativeContext() {
+    if (zstd_ctx_ != nullptr) {
+      ZSTD_freeCCtx(zstd_ctx_);
+    }
+  }
+
+ public:
+  // callable inside ZSTD_Compress
+  ZSTD_CCtx* ZSTDPreallocCtx() const {
+    assert(zstd_ctx_ != nullptr);
+    return zstd_ctx_;
+  }
+
+#else   // ZSTD && (ZSTD_VERSION_NUMBER >= 500)
+ private:
+  void CreateNativeContext(CompressionType /* type */) {}
+  void DestroyNativeContext() {}
+#endif  // ZSTD && (ZSTD_VERSION_NUMBER >= 500)
+ public:
+  explicit CompressionContext(CompressionType type) {
+    CreateNativeContext(type);
+  }
+  ~CompressionContext() { DestroyNativeContext(); }
+  CompressionContext(const CompressionContext&) = delete;
+  CompressionContext& operator=(const CompressionContext&) = delete;
+};
+
+class CompressionInfo {
+  const CompressionOptions& opts_;
+  const CompressionContext& context_;
+  const CompressionDict& dict_;
+  const CompressionType type_;
+  const uint64_t sample_for_compression_;
+
+ public:
+  CompressionInfo(const CompressionOptions& _opts,
+                  const CompressionContext& _context,
+                  const CompressionDict& _dict, CompressionType _type,
+                  uint64_t _sample_for_compression)
+      : opts_(_opts),
+        context_(_context),
+        dict_(_dict),
+        type_(_type),
+        sample_for_compression_(_sample_for_compression) {}
+
+  const CompressionOptions& options() const { return opts_; }
+  const CompressionContext& context() const { return context_; }
+  const CompressionDict& dict() const { return dict_; }
+  CompressionType type() const { return type_; }
+  uint64_t SampleForCompression() const { return sample_for_compression_; }
+};
+
+class UncompressionContext {
+ private:
+  CompressionContextCache* ctx_cache_ = nullptr;
+  ZSTDUncompressCachedData uncomp_cached_data_;
+
+ public:
+  explicit UncompressionContext(CompressionType type) {
+    if (type == kZSTD || type == kZSTDNotFinalCompression) {
+      ctx_cache_ = CompressionContextCache::Instance();
+      uncomp_cached_data_ = ctx_cache_->GetCachedZSTDUncompressData();
+    }
+  }
+  ~UncompressionContext() {
+    if (uncomp_cached_data_.GetCacheIndex() != -1) {
+      assert(ctx_cache_ != nullptr);
+      ctx_cache_->ReturnCachedZSTDUncompressData(
+          uncomp_cached_data_.GetCacheIndex());
+    }
+  }
+  UncompressionContext(const UncompressionContext&) = delete;
+  UncompressionContext& operator=(const UncompressionContext&) = delete;
+
+  ZSTDUncompressCachedData::ZSTDNativeContext GetZSTDContext() const {
+    return uncomp_cached_data_.Get();
+  }
+};
+
+class UncompressionInfo {
+  const UncompressionContext& context_;
+  const UncompressionDict& dict_;
+  const CompressionType type_;
+
+ public:
+  UncompressionInfo(const UncompressionContext& _context,
+                    const UncompressionDict& _dict, CompressionType _type)
+      : context_(_context), dict_(_dict), type_(_type) {}
+
+  const UncompressionContext& context() const { return context_; }
+  const UncompressionDict& dict() const { return dict_; }
+  CompressionType type() const { return type_; }
+};
+
+inline bool Snappy_Supported() {
+#ifdef SNAPPY
+  return true;
+#else
+  return false;
+#endif
+}
+
+inline bool Zlib_Supported() {
+#ifdef ZLIB
+  return true;
+#else
+  return false;
+#endif
+}
+
+inline bool BZip2_Supported() {
+#ifdef BZIP2
+  return true;
+#else
+  return false;
+#endif
+}
+
+inline bool LZ4_Supported() {
+#ifdef LZ4
+  return true;
+#else
+  return false;
+#endif
+}
+
+inline bool XPRESS_Supported() {
+#ifdef XPRESS
+  return true;
+#else
+  return false;
+#endif
+}
+
+inline bool ZSTD_Supported() {
+#ifdef ZSTD
+  // ZSTD format is finalized since version 0.8.0.
+  return (ZSTD_versionNumber() >= 800);
+#else
+  return false;
+#endif
+}
+
+inline bool ZSTDNotFinal_Supported() {
+#ifdef ZSTD
+  return true;
+#else
+  return false;
+#endif
+}
+
+inline bool ZSTD_Streaming_Supported() {
+#if defined(ZSTD) && defined(ZSTD_STREAMING)
+  return true;
+#else
+  return false;
+#endif
+}
+
+inline bool StreamingCompressionTypeSupported(
+    CompressionType compression_type) {
+  switch (compression_type) {
+    case kNoCompression:
+      return true;
+    case kZSTD:
+      return ZSTD_Streaming_Supported();
+    default:
+      return false;
+  }
+}
+
+inline bool CompressionTypeSupported(CompressionType compression_type) {
+  switch (compression_type) {
+    case kNoCompression:
+      return true;
+    case kSnappyCompression:
+      return Snappy_Supported();
+    case kZlibCompression:
+      return Zlib_Supported();
+    case kBZip2Compression:
+      return BZip2_Supported();
+    case kLZ4Compression:
+      return LZ4_Supported();
+    case kLZ4HCCompression:
+      return LZ4_Supported();
+    case kXpressCompression:
+      return XPRESS_Supported();
+    case kZSTDNotFinalCompression:
+      return ZSTDNotFinal_Supported();
+    case kZSTD:
+      return ZSTD_Supported();
+    default:
+      assert(false);
+      return false;
+  }
+}
+
+inline bool DictCompressionTypeSupported(CompressionType compression_type) {
+  switch (compression_type) {
+    case kNoCompression:
+      return false;
+    case kSnappyCompression:
+      return false;
+    case kZlibCompression:
+      return Zlib_Supported();
+    case kBZip2Compression:
+      return false;
+    case kLZ4Compression:
+    case kLZ4HCCompression:
+#if LZ4_VERSION_NUMBER >= 10400  // r124+
+      return LZ4_Supported();
+#else
+      return false;
+#endif
+    case kXpressCompression:
+      return false;
+    case kZSTDNotFinalCompression:
+#if ZSTD_VERSION_NUMBER >= 500  // v0.5.0+
+      return ZSTDNotFinal_Supported();
+#else
+      return false;
+#endif
+    case kZSTD:
+#if ZSTD_VERSION_NUMBER >= 500  // v0.5.0+
+      return ZSTD_Supported();
+#else
+      return false;
+#endif
+    default:
+      assert(false);
+      return false;
+  }
+}
+
+inline std::string CompressionTypeToString(CompressionType compression_type) {
+  switch (compression_type) {
+    case kNoCompression:
+      return "NoCompression";
+    case kSnappyCompression:
+      return "Snappy";
+    case kZlibCompression:
+      return "Zlib";
+    case kBZip2Compression:
+      return "BZip2";
+    case kLZ4Compression:
+      return "LZ4";
+    case kLZ4HCCompression:
+      return "LZ4HC";
+    case kXpressCompression:
+      return "Xpress";
+    case kZSTD:
+      return "ZSTD";
+    case kZSTDNotFinalCompression:
+      return "ZSTDNotFinal";
+    case kDisableCompressionOption:
+      return "DisableOption";
+    default:
+      assert(false);
+      return "";
+  }
+}
+
+inline std::string CompressionOptionsToString(
+    CompressionOptions& compression_options) {
+  std::string result;
+  result.reserve(512);
+  result.append("window_bits=")
+      .append(std::to_string(compression_options.window_bits))
+      .append("; ");
+  result.append("level=")
+      .append(std::to_string(compression_options.level))
+      .append("; ");
+  result.append("strategy=")
+      .append(std::to_string(compression_options.strategy))
+      .append("; ");
+  result.append("max_dict_bytes=")
+      .append(std::to_string(compression_options.max_dict_bytes))
+      .append("; ");
+  result.append("zstd_max_train_bytes=")
+      .append(std::to_string(compression_options.zstd_max_train_bytes))
+      .append("; ");
+  result.append("enabled=")
+      .append(std::to_string(compression_options.enabled))
+      .append("; ");
+  result.append("max_dict_buffer_bytes=")
+      .append(std::to_string(compression_options.max_dict_buffer_bytes))
+      .append("; ");
+  result.append("use_zstd_dict_trainer=")
+      .append(std::to_string(compression_options.use_zstd_dict_trainer))
+      .append("; ");
+  return result;
+}
+
+// compress_format_version can have two values:
+// 1 -- decompressed sizes for BZip2 and Zlib are not included in the compressed
+// block. Also, decompressed sizes for LZ4 are encoded in platform-dependent
+// way.
+// 2 -- Zlib, BZip2 and LZ4 encode decompressed size as Varint32 just before the
+// start of compressed block. Snappy format is the same as version 1.
+
+inline bool Snappy_Compress(const CompressionInfo& /*info*/, const char* input,
+                            size_t length, ::std::string* output) {
+#ifdef SNAPPY
+  output->resize(snappy::MaxCompressedLength(length));
+  size_t outlen;
+  snappy::RawCompress(input, length, &(*output)[0], &outlen);
+  output->resize(outlen);
+  return true;
+#else
+  (void)input;
+  (void)length;
+  (void)output;
+  return false;
+#endif
+}
+
+inline CacheAllocationPtr Snappy_Uncompress(
+    const char* input, size_t length, size_t* uncompressed_size,
+    MemoryAllocator* allocator = nullptr) {
+#ifdef SNAPPY
+  size_t uncompressed_length = 0;
+  if (!snappy::GetUncompressedLength(input, length, &uncompressed_length)) {
+    return nullptr;
+  }
+
+  CacheAllocationPtr output = AllocateBlock(uncompressed_length, allocator);
+
+  if (!snappy::RawUncompress(input, length, output.get())) {
+    return nullptr;
+  }
+
+  *uncompressed_size = uncompressed_length;
+
+  return output;
+#else
+  (void)input;
+  (void)length;
+  (void)uncompressed_size;
+  (void)allocator;
+  return nullptr;
+#endif
+}
+
+namespace compression {
+// returns size
+inline size_t PutDecompressedSizeInfo(std::string* output, uint32_t length) {
+  PutVarint32(output, length);
+  return output->size();
+}
+
+inline bool GetDecompressedSizeInfo(const char** input_data,
+                                    size_t* input_length,
+                                    uint32_t* output_len) {
+  auto new_input_data =
+      GetVarint32Ptr(*input_data, *input_data + *input_length, output_len);
+  if (new_input_data == nullptr) {
+    return false;
+  }
+  *input_length -= (new_input_data - *input_data);
+  *input_data = new_input_data;
+  return true;
+}
+}  // namespace compression
+
+// compress_format_version == 1 -- decompressed size is not included in the
+// block header
+// compress_format_version == 2 -- decompressed size is included in the block
+// header in varint32 format
+// @param compression_dict Data for presetting the compression library's
+//    dictionary.
+inline bool Zlib_Compress(const CompressionInfo& info,
+                          uint32_t compress_format_version, const char* input,
+                          size_t length, ::std::string* output) {
+#ifdef ZLIB
+  if (length > std::numeric_limits<uint32_t>::max()) {
+    // Can't compress more than 4GB
+    return false;
+  }
+
+  size_t output_header_len = 0;
+  if (compress_format_version == 2) {
+    output_header_len = compression::PutDecompressedSizeInfo(
+        output, static_cast<uint32_t>(length));
+  }
+
+  // The memLevel parameter specifies how much memory should be allocated for
+  // the internal compression state.
+  // memLevel=1 uses minimum memory but is slow and reduces compression ratio.
+  // memLevel=9 uses maximum memory for optimal speed.
+  // The default value is 8. See zconf.h for more details.
+  static const int memLevel = 8;
+  int level;
+  if (info.options().level == CompressionOptions::kDefaultCompressionLevel) {
+    level = Z_DEFAULT_COMPRESSION;
+  } else {
+    level = info.options().level;
+  }
+  z_stream _stream;
+  memset(&_stream, 0, sizeof(z_stream));
+  int st = deflateInit2(&_stream, level, Z_DEFLATED, info.options().window_bits,
+                        memLevel, info.options().strategy);
+  if (st != Z_OK) {
+    return false;
+  }
+
+  Slice compression_dict = info.dict().GetRawDict();
+  if (compression_dict.size()) {
+    // Initialize the compression library's dictionary
+    st = deflateSetDictionary(
+        &_stream, reinterpret_cast<const Bytef*>(compression_dict.data()),
+        static_cast<unsigned int>(compression_dict.size()));
+    if (st != Z_OK) {
+      deflateEnd(&_stream);
+      return false;
+    }
+  }
+
+  // Get an upper bound on the compressed size.
+  size_t upper_bound =
+      deflateBound(&_stream, static_cast<unsigned long>(length));
+  output->resize(output_header_len + upper_bound);
+
+  // Compress the input, and put compressed data in output.
+  _stream.next_in = (Bytef*)input;
+  _stream.avail_in = static_cast<unsigned int>(length);
+
+  // Initialize the output size.
+  _stream.avail_out = static_cast<unsigned int>(upper_bound);
+  _stream.next_out = reinterpret_cast<Bytef*>(&(*output)[output_header_len]);
+
+  bool compressed = false;
+  st = deflate(&_stream, Z_FINISH);
+  if (st == Z_STREAM_END) {
+    compressed = true;
+    output->resize(output->size() - _stream.avail_out);
+  }
+  // The only return value we really care about is Z_STREAM_END.
+  // Z_OK means insufficient output space. This means the compression is
+  // bigger than decompressed size. Just fail the compression in that case.
+
+  deflateEnd(&_stream);
+  return compressed;
+#else
+  (void)info;
+  (void)compress_format_version;
+  (void)input;
+  (void)length;
+  (void)output;
+  return false;
+#endif
+}
+
+// compress_format_version == 1 -- decompressed size is not included in the
+// block header
+// compress_format_version == 2 -- decompressed size is included in the block
+// header in varint32 format
+// @param compression_dict Data for presetting the compression library's
+//    dictionary.
+inline CacheAllocationPtr Zlib_Uncompress(
+    const UncompressionInfo& info, const char* input_data, size_t input_length,
+    size_t* uncompressed_size, uint32_t compress_format_version,
+    MemoryAllocator* allocator = nullptr, int windowBits = -14) {
+#ifdef ZLIB
+  uint32_t output_len = 0;
+  if (compress_format_version == 2) {
+    if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
+                                              &output_len)) {
+      return nullptr;
+    }
+  } else {
+    // Assume the decompressed data size will 5x of compressed size, but round
+    // to the page size
+    size_t proposed_output_len = ((input_length * 5) & (~(4096 - 1))) + 4096;
+    output_len = static_cast<uint32_t>(
+        std::min(proposed_output_len,
+                 static_cast<size_t>(std::numeric_limits<uint32_t>::max())));
+  }
+
+  z_stream _stream;
+  memset(&_stream, 0, sizeof(z_stream));
+
+  // For raw inflate, the windowBits should be -8..-15.
+  // If windowBits is bigger than zero, it will use either zlib
+  // header or gzip header. Adding 32 to it will do automatic detection.
+  int st =
+      inflateInit2(&_stream, windowBits > 0 ? windowBits + 32 : windowBits);
+  if (st != Z_OK) {
+    return nullptr;
+  }
+
+  const Slice& compression_dict = info.dict().GetRawDict();
+  if (compression_dict.size()) {
+    // Initialize the compression library's dictionary
+    st = inflateSetDictionary(
+        &_stream, reinterpret_cast<const Bytef*>(compression_dict.data()),
+        static_cast<unsigned int>(compression_dict.size()));
+    if (st != Z_OK) {
+      return nullptr;
+    }
+  }
+
+  _stream.next_in = (Bytef*)input_data;
+  _stream.avail_in = static_cast<unsigned int>(input_length);
+
+  auto output = AllocateBlock(output_len, allocator);
+
+  _stream.next_out = (Bytef*)output.get();
+  _stream.avail_out = static_cast<unsigned int>(output_len);
+
+  bool done = false;
+  while (!done) {
+    st = inflate(&_stream, Z_SYNC_FLUSH);
+    switch (st) {
+      case Z_STREAM_END:
+        done = true;
+        break;
+      case Z_OK: {
+        // No output space. Increase the output space by 20%.
+        // We should never run out of output space if
+        // compress_format_version == 2
+        assert(compress_format_version != 2);
+        size_t old_sz = output_len;
+        uint32_t output_len_delta = output_len / 5;
+        output_len += output_len_delta < 10 ? 10 : output_len_delta;
+        auto tmp = AllocateBlock(output_len, allocator);
+        memcpy(tmp.get(), output.get(), old_sz);
+        output = std::move(tmp);
+
+        // Set more output.
+        _stream.next_out = (Bytef*)(output.get() + old_sz);
+        _stream.avail_out = static_cast<unsigned int>(output_len - old_sz);
+        break;
+      }
+      case Z_BUF_ERROR:
+      default:
+        inflateEnd(&_stream);
+        return nullptr;
+    }
+  }
+
+  // If we encoded decompressed block size, we should have no bytes left
+  assert(compress_format_version != 2 || _stream.avail_out == 0);
+  assert(output_len >= _stream.avail_out);
+  *uncompressed_size = output_len - _stream.avail_out;
+  inflateEnd(&_stream);
+  return output;
+#else
+  (void)info;
+  (void)input_data;
+  (void)input_length;
+  (void)uncompressed_size;
+  (void)compress_format_version;
+  (void)allocator;
+  (void)windowBits;
+  return nullptr;
+#endif
+}
+
+// compress_format_version == 1 -- decompressed size is not included in the
+// block header
+// compress_format_version == 2 -- decompressed size is included in the block
+// header in varint32 format
+inline bool BZip2_Compress(const CompressionInfo& /*info*/,
+                           uint32_t compress_format_version, const char* input,
+                           size_t length, ::std::string* output) {
+#ifdef BZIP2
+  if (length > std::numeric_limits<uint32_t>::max()) {
+    // Can't compress more than 4GB
+    return false;
+  }
+  size_t output_header_len = 0;
+  if (compress_format_version == 2) {
+    output_header_len = compression::PutDecompressedSizeInfo(
+        output, static_cast<uint32_t>(length));
+  }
+  // Resize output to be the plain data length.
+  // This may not be big enough if the compression actually expands data.
+  output->resize(output_header_len + length);
+
+  bz_stream _stream;
+  memset(&_stream, 0, sizeof(bz_stream));
+
+  // Block size 1 is 100K.
+  // 0 is for silent.
+  // 30 is the default workFactor
+  int st = BZ2_bzCompressInit(&_stream, 1, 0, 30);
+  if (st != BZ_OK) {
+    return false;
+  }
+
+  // Compress the input, and put compressed data in output.
+  _stream.next_in = (char*)input;
+  _stream.avail_in = static_cast<unsigned int>(length);
+
+  // Initialize the output size.
+  _stream.avail_out = static_cast<unsigned int>(length);
+  _stream.next_out = reinterpret_cast<char*>(&(*output)[output_header_len]);
+
+  bool compressed = false;
+  st = BZ2_bzCompress(&_stream, BZ_FINISH);
+  if (st == BZ_STREAM_END) {
+    compressed = true;
+    output->resize(output->size() - _stream.avail_out);
+  }
+  // The only return value we really care about is BZ_STREAM_END.
+  // BZ_FINISH_OK means insufficient output space. This means the compression
+  // is bigger than decompressed size. Just fail the compression in that case.
+
+  BZ2_bzCompressEnd(&_stream);
+  return compressed;
+#else
+  (void)compress_format_version;
+  (void)input;
+  (void)length;
+  (void)output;
+  return false;
+#endif
+}
+
+// compress_format_version == 1 -- decompressed size is not included in the
+// block header
+// compress_format_version == 2 -- decompressed size is included in the block
+// header in varint32 format
+inline CacheAllocationPtr BZip2_Uncompress(
+    const char* input_data, size_t input_length, size_t* uncompressed_size,
+    uint32_t compress_format_version, MemoryAllocator* allocator = nullptr) {
+#ifdef BZIP2
+  uint32_t output_len = 0;
+  if (compress_format_version == 2) {
+    if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
+                                              &output_len)) {
+      return nullptr;
+    }
+  } else {
+    // Assume the decompressed data size will 5x of compressed size, but round
+    // to the next page size
+    size_t proposed_output_len = ((input_length * 5) & (~(4096 - 1))) + 4096;
+    output_len = static_cast<uint32_t>(
+        std::min(proposed_output_len,
+                 static_cast<size_t>(std::numeric_limits<uint32_t>::max())));
+  }
+
+  bz_stream _stream;
+  memset(&_stream, 0, sizeof(bz_stream));
+
+  int st = BZ2_bzDecompressInit(&_stream, 0, 0);
+  if (st != BZ_OK) {
+    return nullptr;
+  }
+
+  _stream.next_in = (char*)input_data;
+  _stream.avail_in = static_cast<unsigned int>(input_length);
+
+  auto output = AllocateBlock(output_len, allocator);
+
+  _stream.next_out = (char*)output.get();
+  _stream.avail_out = static_cast<unsigned int>(output_len);
+
+  bool done = false;
+  while (!done) {
+    st = BZ2_bzDecompress(&_stream);
+    switch (st) {
+      case BZ_STREAM_END:
+        done = true;
+        break;
+      case BZ_OK: {
+        // No output space. Increase the output space by 20%.
+        // We should never run out of output space if
+        // compress_format_version == 2
+        assert(compress_format_version != 2);
+        uint32_t old_sz = output_len;
+        output_len = output_len * 1.2;
+        auto tmp = AllocateBlock(output_len, allocator);
+        memcpy(tmp.get(), output.get(), old_sz);
+        output = std::move(tmp);
+
+        // Set more output.
+        _stream.next_out = (char*)(output.get() + old_sz);
+        _stream.avail_out = static_cast<unsigned int>(output_len - old_sz);
+        break;
+      }
+      default:
+        BZ2_bzDecompressEnd(&_stream);
+        return nullptr;
+    }
+  }
+
+  // If we encoded decompressed block size, we should have no bytes left
+  assert(compress_format_version != 2 || _stream.avail_out == 0);
+  assert(output_len >= _stream.avail_out);
+  *uncompressed_size = output_len - _stream.avail_out;
+  BZ2_bzDecompressEnd(&_stream);
+  return output;
+#else
+  (void)input_data;
+  (void)input_length;
+  (void)uncompressed_size;
+  (void)compress_format_version;
+  (void)allocator;
+  return nullptr;
+#endif
+}
+
+// compress_format_version == 1 -- decompressed size is included in the
+// block header using memcpy, which makes database non-portable)
+// compress_format_version == 2 -- decompressed size is included in the block
+// header in varint32 format
+// @param compression_dict Data for presetting the compression library's
+//    dictionary.
+inline bool LZ4_Compress(const CompressionInfo& info,
+                         uint32_t compress_format_version, const char* input,
+                         size_t length, ::std::string* output) {
+#ifdef LZ4
+  if (length > std::numeric_limits<uint32_t>::max()) {
+    // Can't compress more than 4GB
+    return false;
+  }
+
+  size_t output_header_len = 0;
+  if (compress_format_version == 2) {
+    // new encoding, using varint32 to store size information
+    output_header_len = compression::PutDecompressedSizeInfo(
+        output, static_cast<uint32_t>(length));
+  } else {
+    // legacy encoding, which is not really portable (depends on big/little
+    // endianness)
+    output_header_len = 8;
+    output->resize(output_header_len);
+    char* p = const_cast<char*>(output->c_str());
+    memcpy(p, &length, sizeof(length));
+  }
+  int compress_bound = LZ4_compressBound(static_cast<int>(length));
+  output->resize(static_cast<size_t>(output_header_len + compress_bound));
+
+  int outlen;
+#if LZ4_VERSION_NUMBER >= 10400  // r124+
+  LZ4_stream_t* stream = LZ4_createStream();
+  Slice compression_dict = info.dict().GetRawDict();
+  if (compression_dict.size()) {
+    LZ4_loadDict(stream, compression_dict.data(),
+                 static_cast<int>(compression_dict.size()));
+  }
+#if LZ4_VERSION_NUMBER >= 10700  // r129+
+  outlen =
+      LZ4_compress_fast_continue(stream, input, &(*output)[output_header_len],
+                                 static_cast<int>(length), compress_bound, 1);
+#else  // up to r128
+  outlen = LZ4_compress_limitedOutput_continue(
+      stream, input, &(*output)[output_header_len], static_cast<int>(length),
+      compress_bound);
+#endif
+  LZ4_freeStream(stream);
+#else   // up to r123
+  outlen = LZ4_compress_limitedOutput(input, &(*output)[output_header_len],
+                                      static_cast<int>(length), compress_bound);
+#endif  // LZ4_VERSION_NUMBER >= 10400
+
+  if (outlen == 0) {
+    return false;
+  }
+  output->resize(static_cast<size_t>(output_header_len + outlen));
+  return true;
+#else  // LZ4
+  (void)info;
+  (void)compress_format_version;
+  (void)input;
+  (void)length;
+  (void)output;
+  return false;
+#endif
+}
+
+// compress_format_version == 1 -- decompressed size is included in the
+// block header using memcpy, which makes database non-portable)
+// compress_format_version == 2 -- decompressed size is included in the block
+// header in varint32 format
+// @param compression_dict Data for presetting the compression library's
+//    dictionary.
+inline CacheAllocationPtr LZ4_Uncompress(const UncompressionInfo& info,
+                                         const char* input_data,
+                                         size_t input_length,
+                                         size_t* uncompressed_size,
+                                         uint32_t compress_format_version,
+                                         MemoryAllocator* allocator = nullptr) {
+#ifdef LZ4
+  uint32_t output_len = 0;
+  if (compress_format_version == 2) {
+    // new encoding, using varint32 to store size information
+    if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
+                                              &output_len)) {
+      return nullptr;
+    }
+  } else {
+    // legacy encoding, which is not really portable (depends on big/little
+    // endianness)
+    if (input_length < 8) {
+      return nullptr;
+    }
+    if (port::kLittleEndian) {
+      memcpy(&output_len, input_data, sizeof(output_len));
+    } else {
+      memcpy(&output_len, input_data + 4, sizeof(output_len));
+    }
+    input_length -= 8;
+    input_data += 8;
+  }
+
+  auto output = AllocateBlock(output_len, allocator);
+
+  int decompress_bytes = 0;
+
+#if LZ4_VERSION_NUMBER >= 10400  // r124+
+  LZ4_streamDecode_t* stream = LZ4_createStreamDecode();
+  const Slice& compression_dict = info.dict().GetRawDict();
+  if (compression_dict.size()) {
+    LZ4_setStreamDecode(stream, compression_dict.data(),
+                        static_cast<int>(compression_dict.size()));
+  }
+  decompress_bytes = LZ4_decompress_safe_continue(
+      stream, input_data, output.get(), static_cast<int>(input_length),
+      static_cast<int>(output_len));
+  LZ4_freeStreamDecode(stream);
+#else   // up to r123
+  decompress_bytes = LZ4_decompress_safe(input_data, output.get(),
+                                         static_cast<int>(input_length),
+                                         static_cast<int>(output_len));
+#endif  // LZ4_VERSION_NUMBER >= 10400
+
+  if (decompress_bytes < 0) {
+    return nullptr;
+  }
+  assert(decompress_bytes == static_cast<int>(output_len));
+  *uncompressed_size = decompress_bytes;
+  return output;
+#else  // LZ4
+  (void)info;
+  (void)input_data;
+  (void)input_length;
+  (void)uncompressed_size;
+  (void)compress_format_version;
+  (void)allocator;
+  return nullptr;
+#endif
+}
+
+// compress_format_version == 1 -- decompressed size is included in the
+// block header using memcpy, which makes database non-portable)
+// compress_format_version == 2 -- decompressed size is included in the block
+// header in varint32 format
+// @param compression_dict Data for presetting the compression library's
+//    dictionary.
+inline bool LZ4HC_Compress(const CompressionInfo& info,
+                           uint32_t compress_format_version, const char* input,
+                           size_t length, ::std::string* output) {
+#ifdef LZ4
+  if (length > std::numeric_limits<uint32_t>::max()) {
+    // Can't compress more than 4GB
+    return false;
+  }
+
+  size_t output_header_len = 0;
+  if (compress_format_version == 2) {
+    // new encoding, using varint32 to store size information
+    output_header_len = compression::PutDecompressedSizeInfo(
+        output, static_cast<uint32_t>(length));
+  } else {
+    // legacy encoding, which is not really portable (depends on big/little
+    // endianness)
+    output_header_len = 8;
+    output->resize(output_header_len);
+    char* p = const_cast<char*>(output->c_str());
+    memcpy(p, &length, sizeof(length));
+  }
+  int compress_bound = LZ4_compressBound(static_cast<int>(length));
+  output->resize(static_cast<size_t>(output_header_len + compress_bound));
+
+  int outlen;
+  int level;
+  if (info.options().level == CompressionOptions::kDefaultCompressionLevel) {
+    level = 0;  // lz4hc.h says any value < 1 will be sanitized to default
+  } else {
+    level = info.options().level;
+  }
+#if LZ4_VERSION_NUMBER >= 10400  // r124+
+  LZ4_streamHC_t* stream = LZ4_createStreamHC();
+  LZ4_resetStreamHC(stream, level);
+  Slice compression_dict = info.dict().GetRawDict();
+  const char* compression_dict_data =
+      compression_dict.size() > 0 ? compression_dict.data() : nullptr;
+  size_t compression_dict_size = compression_dict.size();
+  if (compression_dict_data != nullptr) {
+    LZ4_loadDictHC(stream, compression_dict_data,
+                   static_cast<int>(compression_dict_size));
+  }
+
+#if LZ4_VERSION_NUMBER >= 10700  // r129+
+  outlen =
+      LZ4_compress_HC_continue(stream, input, &(*output)[output_header_len],
+                               static_cast<int>(length), compress_bound);
+#else   // r124-r128
+  outlen = LZ4_compressHC_limitedOutput_continue(
+      stream, input, &(*output)[output_header_len], static_cast<int>(length),
+      compress_bound);
+#endif  // LZ4_VERSION_NUMBER >= 10700
+  LZ4_freeStreamHC(stream);
+
+#elif LZ4_VERSION_MAJOR  // r113-r123
+  outlen = LZ4_compressHC2_limitedOutput(input, &(*output)[output_header_len],
+                                         static_cast<int>(length),
+                                         compress_bound, level);
+#else                    // up to r112
+  outlen =
+      LZ4_compressHC_limitedOutput(input, &(*output)[output_header_len],
+                                   static_cast<int>(length), compress_bound);
+#endif                   // LZ4_VERSION_NUMBER >= 10400
+
+  if (outlen == 0) {
+    return false;
+  }
+  output->resize(static_cast<size_t>(output_header_len + outlen));
+  return true;
+#else  // LZ4
+  (void)info;
+  (void)compress_format_version;
+  (void)input;
+  (void)length;
+  (void)output;
+  return false;
+#endif
+}
+
+#ifdef XPRESS
+inline bool XPRESS_Compress(const char* input, size_t length,
+                            std::string* output) {
+  return port::xpress::Compress(input, length, output);
+}
+#else
+inline bool XPRESS_Compress(const char* /*input*/, size_t /*length*/,
+                            std::string* /*output*/) {
+  return false;
+}
+#endif
+
+#ifdef XPRESS
+inline char* XPRESS_Uncompress(const char* input_data, size_t input_length,
+                               size_t* uncompressed_size) {
+  return port::xpress::Decompress(input_data, input_length, uncompressed_size);
+}
+#else
+inline char* XPRESS_Uncompress(const char* /*input_data*/,
+                               size_t /*input_length*/,
+                               size_t* /*uncompressed_size*/) {
+  return nullptr;
+}
+#endif
+
+inline bool ZSTD_Compress(const CompressionInfo& info, const char* input,
+                          size_t length, ::std::string* output) {
+#ifdef ZSTD
+  if (length > std::numeric_limits<uint32_t>::max()) {
+    // Can't compress more than 4GB
+    return false;
+  }
+
+  size_t output_header_len = compression::PutDecompressedSizeInfo(
+      output, static_cast<uint32_t>(length));
+
+  size_t compressBound = ZSTD_compressBound(length);
+  output->resize(static_cast<size_t>(output_header_len + compressBound));
+  size_t outlen = 0;
+  int level;
+  if (info.options().level == CompressionOptions::kDefaultCompressionLevel) {
+    // 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see
+    // https://github.com/facebook/zstd/issues/1148
+    level = 3;
+  } else {
+    level = info.options().level;
+  }
+#if ZSTD_VERSION_NUMBER >= 500  // v0.5.0+
+  ZSTD_CCtx* context = info.context().ZSTDPreallocCtx();
+  assert(context != nullptr);
+#if ZSTD_VERSION_NUMBER >= 700  // v0.7.0+
+  if (info.dict().GetDigestedZstdCDict() != nullptr) {
+    outlen = ZSTD_compress_usingCDict(context, &(*output)[output_header_len],
+                                      compressBound, input, length,
+                                      info.dict().GetDigestedZstdCDict());
+  }
+#endif  // ZSTD_VERSION_NUMBER >= 700
+  if (outlen == 0) {
+    outlen = ZSTD_compress_usingDict(context, &(*output)[output_header_len],
+                                     compressBound, input, length,
+                                     info.dict().GetRawDict().data(),
+                                     info.dict().GetRawDict().size(), level);
+  }
+#else   // up to v0.4.x
+  outlen = ZSTD_compress(&(*output)[output_header_len], compressBound, input,
+                         length, level);
+#endif  // ZSTD_VERSION_NUMBER >= 500
+  if (outlen == 0) {
+    return false;
+  }
+  output->resize(output_header_len + outlen);
+  return true;
+#else  // ZSTD
+  (void)info;
+  (void)input;
+  (void)length;
+  (void)output;
+  return false;
+#endif
+}
+
+// @param compression_dict Data for presetting the compression library's
+//    dictionary.
+inline CacheAllocationPtr ZSTD_Uncompress(
+    const UncompressionInfo& info, const char* input_data, size_t input_length,
+    size_t* uncompressed_size, MemoryAllocator* allocator = nullptr) {
+#ifdef ZSTD
+  uint32_t output_len = 0;
+  if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
+                                            &output_len)) {
+    return nullptr;
+  }
+
+  auto output = AllocateBlock(output_len, allocator);
+  size_t actual_output_length = 0;
+#if ZSTD_VERSION_NUMBER >= 500  // v0.5.0+
+  ZSTD_DCtx* context = info.context().GetZSTDContext();
+  assert(context != nullptr);
+#ifdef ROCKSDB_ZSTD_DDICT
+  if (info.dict().GetDigestedZstdDDict() != nullptr) {
+    actual_output_length = ZSTD_decompress_usingDDict(
+        context, output.get(), output_len, input_data, input_length,
+        info.dict().GetDigestedZstdDDict());
+  }
+#endif  // ROCKSDB_ZSTD_DDICT
+  if (actual_output_length == 0) {
+    actual_output_length = ZSTD_decompress_usingDict(
+        context, output.get(), output_len, input_data, input_length,
+        info.dict().GetRawDict().data(), info.dict().GetRawDict().size());
+  }
+#else   // up to v0.4.x
+  (void)info;
+  actual_output_length =
+      ZSTD_decompress(output.get(), output_len, input_data, input_length);
+#endif  // ZSTD_VERSION_NUMBER >= 500
+  assert(actual_output_length == output_len);
+  *uncompressed_size = actual_output_length;
+  return output;
+#else  // ZSTD
+  (void)info;
+  (void)input_data;
+  (void)input_length;
+  (void)uncompressed_size;
+  (void)allocator;
+  return nullptr;
+#endif
+}
+
+inline bool ZSTD_TrainDictionarySupported() {
+#ifdef ZSTD
+  // Dictionary trainer is available since v0.6.1 for static linking, but not
+  // available for dynamic linking until v1.1.3. For now we enable the feature
+  // in v1.1.3+ only.
+  return (ZSTD_versionNumber() >= 10103);
+#else
+  return false;
+#endif
+}
+
+inline std::string ZSTD_TrainDictionary(const std::string& samples,
+                                        const std::vector<size_t>& sample_lens,
+                                        size_t max_dict_bytes) {
+  // Dictionary trainer is available since v0.6.1 for static linking, but not
+  // available for dynamic linking until v1.1.3. For now we enable the feature
+  // in v1.1.3+ only.
+#if ZSTD_VERSION_NUMBER >= 10103  // v1.1.3+
+  assert(samples.empty() == sample_lens.empty());
+  if (samples.empty()) {
+    return "";
+  }
+  std::string dict_data(max_dict_bytes, '\0');
+  size_t dict_len = ZDICT_trainFromBuffer(
+      &dict_data[0], max_dict_bytes, &samples[0], &sample_lens[0],
+      static_cast<unsigned>(sample_lens.size()));
+  if (ZDICT_isError(dict_len)) {
+    return "";
+  }
+  assert(dict_len <= max_dict_bytes);
+  dict_data.resize(dict_len);
+  return dict_data;
+#else   // up to v1.1.2
+  assert(false);
+  (void)samples;
+  (void)sample_lens;
+  (void)max_dict_bytes;
+  return "";
+#endif  // ZSTD_VERSION_NUMBER >= 10103
+}
+
+inline std::string ZSTD_TrainDictionary(const std::string& samples,
+                                        size_t sample_len_shift,
+                                        size_t max_dict_bytes) {
+  // Dictionary trainer is available since v0.6.1, but ZSTD was marked stable
+  // only since v0.8.0. For now we enable the feature in stable versions only.
+#if ZSTD_VERSION_NUMBER >= 10103  // v1.1.3+
+  // skips potential partial sample at the end of "samples"
+  size_t num_samples = samples.size() >> sample_len_shift;
+  std::vector<size_t> sample_lens(num_samples, size_t(1) << sample_len_shift);
+  return ZSTD_TrainDictionary(samples, sample_lens, max_dict_bytes);
+#else   // up to v1.1.2
+  assert(false);
+  (void)samples;
+  (void)sample_len_shift;
+  (void)max_dict_bytes;
+  return "";
+#endif  // ZSTD_VERSION_NUMBER >= 10103
+}
+
+inline bool ZSTD_FinalizeDictionarySupported() {
+#ifdef ZSTD
+  // ZDICT_finalizeDictionary API is stable since v1.4.5
+  return (ZSTD_versionNumber() >= 10405);
+#else
+  return false;
+#endif
+}
+
+inline std::string ZSTD_FinalizeDictionary(
+    const std::string& samples, const std::vector<size_t>& sample_lens,
+    size_t max_dict_bytes, int level) {
+  // ZDICT_finalizeDictionary is stable since version v1.4.5
+#if ZSTD_VERSION_NUMBER >= 10405  // v1.4.5+
+  assert(samples.empty() == sample_lens.empty());
+  if (samples.empty()) {
+    return "";
+  }
+  if (level == CompressionOptions::kDefaultCompressionLevel) {
+    // 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see
+    // https://github.com/facebook/zstd/issues/1148
+    level = 3;
+  }
+  std::string dict_data(max_dict_bytes, '\0');
+  size_t dict_len = ZDICT_finalizeDictionary(
+      dict_data.data(), max_dict_bytes, samples.data(),
+      std::min(static_cast<size_t>(samples.size()), max_dict_bytes),
+      samples.data(), sample_lens.data(),
+      static_cast<unsigned>(sample_lens.size()),
+      {level, 0 /* notificationLevel */, 0 /* dictID */});
+  if (ZDICT_isError(dict_len)) {
+    return "";
+  } else {
+    assert(dict_len <= max_dict_bytes);
+    dict_data.resize(dict_len);
+    return dict_data;
+  }
+#else   // up to v1.4.4
+  (void)samples;
+  (void)sample_lens;
+  (void)max_dict_bytes;
+  (void)level;
+  return "";
+#endif  // ZSTD_VERSION_NUMBER >= 10405
+}
+
+inline bool CompressData(const Slice& raw,
+                         const CompressionInfo& compression_info,
+                         uint32_t compress_format_version,
+                         std::string* compressed_output) {
+  bool ret = false;
+
+  // Will return compressed block contents if (1) the compression method is
+  // supported in this platform and (2) the compression rate is "good enough".
+  switch (compression_info.type()) {
+    case kSnappyCompression:
+      ret = Snappy_Compress(compression_info, raw.data(), raw.size(),
+                            compressed_output);
+      break;
+    case kZlibCompression:
+      ret = Zlib_Compress(compression_info, compress_format_version, raw.data(),
+                          raw.size(), compressed_output);
+      break;
+    case kBZip2Compression:
+      ret = BZip2_Compress(compression_info, compress_format_version,
+                           raw.data(), raw.size(), compressed_output);
+      break;
+    case kLZ4Compression:
+      ret = LZ4_Compress(compression_info, compress_format_version, raw.data(),
+                         raw.size(), compressed_output);
+      break;
+    case kLZ4HCCompression:
+      ret = LZ4HC_Compress(compression_info, compress_format_version,
+                           raw.data(), raw.size(), compressed_output);
+      break;
+    case kXpressCompression:
+      ret = XPRESS_Compress(raw.data(), raw.size(), compressed_output);
+      break;
+    case kZSTD:
+    case kZSTDNotFinalCompression:
+      ret = ZSTD_Compress(compression_info, raw.data(), raw.size(),
+                          compressed_output);
+      break;
+    default:
+      // Do not recognize this compression type
+      break;
+  }
+
+  TEST_SYNC_POINT_CALLBACK("CompressData:TamperWithReturnValue",
+                           static_cast<void*>(&ret));
+
+  return ret;
+}
+
+inline CacheAllocationPtr UncompressData(
+    const UncompressionInfo& uncompression_info, const char* data, size_t n,
+    size_t* uncompressed_size, uint32_t compress_format_version,
+    MemoryAllocator* allocator = nullptr) {
+  switch (uncompression_info.type()) {
+    case kSnappyCompression:
+      return Snappy_Uncompress(data, n, uncompressed_size, allocator);
+    case kZlibCompression:
+      return Zlib_Uncompress(uncompression_info, data, n, uncompressed_size,
+                             compress_format_version, allocator);
+    case kBZip2Compression:
+      return BZip2_Uncompress(data, n, uncompressed_size,
+                              compress_format_version, allocator);
+    case kLZ4Compression:
+    case kLZ4HCCompression:
+      return LZ4_Uncompress(uncompression_info, data, n, uncompressed_size,
+                            compress_format_version, allocator);
+    case kXpressCompression:
+      // XPRESS allocates memory internally, thus no support for custom
+      // allocator.
+      return CacheAllocationPtr(XPRESS_Uncompress(data, n, uncompressed_size));
+    case kZSTD:
+    case kZSTDNotFinalCompression:
+      return ZSTD_Uncompress(uncompression_info, data, n, uncompressed_size,
+                             allocator);
+    default:
+      return CacheAllocationPtr();
+  }
+}
+
+// Records the compression type for subsequent WAL records.
+class CompressionTypeRecord {
+ public:
+  explicit CompressionTypeRecord(CompressionType compression_type)
+      : compression_type_(compression_type) {}
+
+  CompressionType GetCompressionType() const { return compression_type_; }
+
+  inline void EncodeTo(std::string* dst) const {
+    assert(dst != nullptr);
+    PutFixed32(dst, compression_type_);
+  }
+
+  inline Status DecodeFrom(Slice* src) {
+    constexpr char class_name[] = "CompressionTypeRecord";
+
+    uint32_t val;
+    if (!GetFixed32(src, &val)) {
+      return Status::Corruption(class_name,
+                                "Error decoding WAL compression type");
+    }
+    CompressionType compression_type = static_cast<CompressionType>(val);
+    if (!StreamingCompressionTypeSupported(compression_type)) {
+      return Status::Corruption(class_name,
+                                "WAL compression type not supported");
+    }
+    compression_type_ = compression_type;
+    return Status::OK();
+  }
+
+  inline std::string DebugString() const {
+    return "compression_type: " + CompressionTypeToString(compression_type_);
+  }
+
+ private:
+  CompressionType compression_type_;
+};
+
+// Base class to implement compression for a stream of buffers.
+// Instantiate an implementation of the class using Create() with the
+// compression type and use Compress() repeatedly.
+// The output buffer needs to be at least max_output_len.
+// Call Reset() in between frame boundaries or in case of an error.
+// NOTE: This class is not thread safe.
+class StreamingCompress {
+ public:
+  StreamingCompress(CompressionType compression_type,
+                    const CompressionOptions& opts,
+                    uint32_t compress_format_version, size_t max_output_len)
+      : compression_type_(compression_type),
+        opts_(opts),
+        compress_format_version_(compress_format_version),
+        max_output_len_(max_output_len) {}
+  virtual ~StreamingCompress() = default;
+  // compress should be called repeatedly with the same input till the method
+  // returns 0
+  // Parameters:
+  // input - buffer to compress
+  // input_size - size of input buffer
+  // output - compressed buffer allocated by caller, should be at least
+  // max_output_len
+  // output_size - size of the output buffer
+  // Returns -1 for errors, the remaining size of the input buffer that needs to
+  // be compressed
+  virtual int Compress(const char* input, size_t input_size, char* output,
+                       size_t* output_pos) = 0;
+  // static method to create object of a class inherited from StreamingCompress
+  // based on the actual compression type.
+  static StreamingCompress* Create(CompressionType compression_type,
+                                   const CompressionOptions& opts,
+                                   uint32_t compress_format_version,
+                                   size_t max_output_len);
+  virtual void Reset() = 0;
+
+ protected:
+  const CompressionType compression_type_;
+  const CompressionOptions opts_;
+  const uint32_t compress_format_version_;
+  const size_t max_output_len_;
+};
+
+// Base class to uncompress a stream of compressed buffers.
+// Instantiate an implementation of the class using Create() with the
+// compression type and use Uncompress() repeatedly.
+// The output buffer needs to be at least max_output_len.
+// Call Reset() in between frame boundaries or in case of an error.
+// NOTE: This class is not thread safe.
+class StreamingUncompress {
+ public:
+  StreamingUncompress(CompressionType compression_type,
+                      uint32_t compress_format_version, size_t max_output_len)
+      : compression_type_(compression_type),
+        compress_format_version_(compress_format_version),
+        max_output_len_(max_output_len) {}
+  virtual ~StreamingUncompress() = default;
+  // uncompress should be called again with the same input if output_size is
+  // equal to max_output_len or with the next input fragment.
+  // Parameters:
+  // input - buffer to uncompress
+  // input_size - size of input buffer
+  // output - uncompressed buffer allocated by caller, should be at least
+  // max_output_len
+  // output_size - size of the output buffer
+  // Returns -1 for errors, remaining input to be processed otherwise.
+  virtual int Uncompress(const char* input, size_t input_size, char* output,
+                         size_t* output_pos) = 0;
+  static StreamingUncompress* Create(CompressionType compression_type,
+                                     uint32_t compress_format_version,
+                                     size_t max_output_len);
+  virtual void Reset() = 0;
+
+ protected:
+  CompressionType compression_type_;
+  uint32_t compress_format_version_;
+  size_t max_output_len_;
+};
+
+class ZSTDStreamingCompress final : public StreamingCompress {
+ public:
+  explicit ZSTDStreamingCompress(const CompressionOptions& opts,
+                                 uint32_t compress_format_version,
+                                 size_t max_output_len)
+      : StreamingCompress(kZSTD, opts, compress_format_version,
+                          max_output_len) {
+#ifdef ZSTD_STREAMING
+    cctx_ = ZSTD_createCCtx();
+    // Each compressed frame will have a checksum
+    ZSTD_CCtx_setParameter(cctx_, ZSTD_c_checksumFlag, 1);
+    assert(cctx_ != nullptr);
+    input_buffer_ = {/*src=*/nullptr, /*size=*/0, /*pos=*/0};
+#endif
+  }
+  ~ZSTDStreamingCompress() override {
+#ifdef ZSTD_STREAMING
+    ZSTD_freeCCtx(cctx_);
+#endif
+  }
+  int Compress(const char* input, size_t input_size, char* output,
+               size_t* output_pos) override;
+  void Reset() override;
+#ifdef ZSTD_STREAMING
+  ZSTD_CCtx* cctx_;
+  ZSTD_inBuffer input_buffer_;
+#endif
+};
+
+class ZSTDStreamingUncompress final : public StreamingUncompress {
+ public:
+  explicit ZSTDStreamingUncompress(uint32_t compress_format_version,
+                                   size_t max_output_len)
+      : StreamingUncompress(kZSTD, compress_format_version, max_output_len) {
+#ifdef ZSTD_STREAMING
+    dctx_ = ZSTD_createDCtx();
+    assert(dctx_ != nullptr);
+    input_buffer_ = {/*src=*/nullptr, /*size=*/0, /*pos=*/0};
+#endif
+  }
+  ~ZSTDStreamingUncompress() override {
+#ifdef ZSTD_STREAMING
+    ZSTD_freeDCtx(dctx_);
+#endif
+  }
+  int Uncompress(const char* input, size_t input_size, char* output,
+                 size_t* output_size) override;
+  void Reset() override;
+
+ private:
+#ifdef ZSTD_STREAMING
+  ZSTD_DCtx* dctx_;
+  ZSTD_inBuffer input_buffer_;
+#endif
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/compression_context_cache.cc b/src/rocksdb/util/compression_context_cache.cc
new file mode 100644
index 000000000..52c3fac72
--- /dev/null
+++ b/src/rocksdb/util/compression_context_cache.cc
@@ -0,0 +1,106 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+
+#include "util/compression_context_cache.h"
+
+#include <atomic>
+
+#include "util/compression.h"
+#include "util/core_local.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace compression_cache {
+
+void* const SentinelValue = nullptr;
+// Cache ZSTD uncompression contexts for reads
+// if needed we can add ZSTD compression context caching
+// which is currently is not done since BlockBasedTableBuilder
+// simply creates one compression context per new SST file.
+struct ZSTDCachedData {
+  // We choose to cache the below structure instead of a ptr
+  // because we want to avoid a) native types leak b) make
+  // cache use transparent for the user
+  ZSTDUncompressCachedData uncomp_cached_data_;
+  std::atomic<void*> zstd_uncomp_sentinel_;
+
+  char
+      padding[(CACHE_LINE_SIZE -
+               (sizeof(ZSTDUncompressCachedData) + sizeof(std::atomic<void*>)) %
+                   CACHE_LINE_SIZE)];  // unused padding field
+
+  ZSTDCachedData() : zstd_uncomp_sentinel_(&uncomp_cached_data_) {}
+  ZSTDCachedData(const ZSTDCachedData&) = delete;
+  ZSTDCachedData& operator=(const ZSTDCachedData&) = delete;
+
+  ZSTDUncompressCachedData GetUncompressData(int64_t idx) {
+    ZSTDUncompressCachedData result;
+    void* expected = &uncomp_cached_data_;
+    if (zstd_uncomp_sentinel_.compare_exchange_strong(expected,
+                                                      SentinelValue)) {
+      uncomp_cached_data_.CreateIfNeeded();
+      result.InitFromCache(uncomp_cached_data_, idx);
+    } else {
+      // Creates one time use data
+      result.CreateIfNeeded();
+    }
+    return result;
+  }
+  // Return the entry back into circulation
+  // This is executed only when we successfully obtained
+  // in the first place
+  void ReturnUncompressData() {
+    if (zstd_uncomp_sentinel_.exchange(&uncomp_cached_data_) != SentinelValue) {
+      // Means we are returning while not having it acquired.
+      assert(false);
+    }
+  }
+};
+static_assert(sizeof(ZSTDCachedData) % CACHE_LINE_SIZE == 0,
+              "Expected CACHE_LINE_SIZE alignment");
+}  // namespace compression_cache
+
+class CompressionContextCache::Rep {
+ public:
+  Rep() {}
+  ZSTDUncompressCachedData GetZSTDUncompressData() {
+    auto p = per_core_uncompr_.AccessElementAndIndex();
+    int64_t idx = static_cast<int64_t>(p.second);
+    return p.first->GetUncompressData(idx);
+  }
+  void ReturnZSTDUncompressData(int64_t idx) {
+    assert(idx >= 0);
+    auto* cn = per_core_uncompr_.AccessAtCore(static_cast<size_t>(idx));
+    cn->ReturnUncompressData();
+  }
+
+ private:
+  CoreLocalArray<compression_cache::ZSTDCachedData> per_core_uncompr_;
+};
+
+CompressionContextCache::CompressionContextCache() : rep_(new Rep()) {}
+
+CompressionContextCache* CompressionContextCache::Instance() {
+  static CompressionContextCache instance;
+  return &instance;
+}
+
+void CompressionContextCache::InitSingleton() { Instance(); }
+
+ZSTDUncompressCachedData
+CompressionContextCache::GetCachedZSTDUncompressData() {
+  return rep_->GetZSTDUncompressData();
+}
+
+void CompressionContextCache::ReturnCachedZSTDUncompressData(int64_t idx) {
+  rep_->ReturnZSTDUncompressData(idx);
+}
+
+CompressionContextCache::~CompressionContextCache() { delete rep_; }
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/compression_context_cache.h b/src/rocksdb/util/compression_context_cache.h
new file mode 100644
index 000000000..7b7b2d507
--- /dev/null
+++ b/src/rocksdb/util/compression_context_cache.h
@@ -0,0 +1,47 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+
+// Compression context cache allows to cache compression/uncompression contexts
+// This helps with Random Read latencies and reduces CPU utilization
+// Caching is implemented using CoreLocal facility. Compression/Uncompression
+// instances are cached on a per core basis using CoreLocalArray. A borrowed
+// instance is atomically replaced with a sentinel value for the time of being
+// used. If it turns out that another thread is already makes use of the
+// instance we still create one on the heap which is later is destroyed.
+
+#pragma once
+
+#include <stdint.h>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+class ZSTDUncompressCachedData;
+
+class CompressionContextCache {
+ public:
+  // Singleton
+  static CompressionContextCache* Instance();
+  static void InitSingleton();
+  CompressionContextCache(const CompressionContextCache&) = delete;
+  CompressionContextCache& operator=(const CompressionContextCache&) = delete;
+
+  ZSTDUncompressCachedData GetCachedZSTDUncompressData();
+  void ReturnCachedZSTDUncompressData(int64_t idx);
+
+ private:
+  // Singleton
+  CompressionContextCache();
+  ~CompressionContextCache();
+
+  class Rep;
+  Rep* rep_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/concurrent_task_limiter_impl.cc b/src/rocksdb/util/concurrent_task_limiter_impl.cc
new file mode 100644
index 000000000..a0fc7331f
--- /dev/null
+++ b/src/rocksdb/util/concurrent_task_limiter_impl.cc
@@ -0,0 +1,64 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/concurrent_task_limiter_impl.h"
+
+#include "rocksdb/concurrent_task_limiter.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+ConcurrentTaskLimiterImpl::ConcurrentTaskLimiterImpl(
+    const std::string& name, int32_t max_outstanding_task)
+    : name_(name),
+      max_outstanding_tasks_{max_outstanding_task},
+      outstanding_tasks_{0} {}
+
+ConcurrentTaskLimiterImpl::~ConcurrentTaskLimiterImpl() {
+  assert(outstanding_tasks_ == 0);
+}
+
+const std::string& ConcurrentTaskLimiterImpl::GetName() const { return name_; }
+
+void ConcurrentTaskLimiterImpl::SetMaxOutstandingTask(int32_t limit) {
+  max_outstanding_tasks_.store(limit, std::memory_order_relaxed);
+}
+
+void ConcurrentTaskLimiterImpl::ResetMaxOutstandingTask() {
+  max_outstanding_tasks_.store(-1, std::memory_order_relaxed);
+}
+
+int32_t ConcurrentTaskLimiterImpl::GetOutstandingTask() const {
+  return outstanding_tasks_.load(std::memory_order_relaxed);
+}
+
+std::unique_ptr<TaskLimiterToken> ConcurrentTaskLimiterImpl::GetToken(
+    bool force) {
+  int32_t limit = max_outstanding_tasks_.load(std::memory_order_relaxed);
+  int32_t tasks = outstanding_tasks_.load(std::memory_order_relaxed);
+  // force = true, bypass the throttle.
+  // limit < 0 means unlimited tasks.
+  while (force || limit < 0 || tasks < limit) {
+    if (outstanding_tasks_.compare_exchange_weak(tasks, tasks + 1)) {
+      return std::unique_ptr<TaskLimiterToken>(new TaskLimiterToken(this));
+    }
+  }
+  return nullptr;
+}
+
+ConcurrentTaskLimiter* NewConcurrentTaskLimiter(const std::string& name,
+                                                int32_t limit) {
+  return new ConcurrentTaskLimiterImpl(name, limit);
+}
+
+TaskLimiterToken::~TaskLimiterToken() {
+  --limiter_->outstanding_tasks_;
+  assert(limiter_->outstanding_tasks_ >= 0);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/concurrent_task_limiter_impl.h b/src/rocksdb/util/concurrent_task_limiter_impl.h
new file mode 100644
index 000000000..4952ae23a
--- /dev/null
+++ b/src/rocksdb/util/concurrent_task_limiter_impl.h
@@ -0,0 +1,67 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <atomic>
+#include <memory>
+
+#include "rocksdb/concurrent_task_limiter.h"
+#include "rocksdb/env.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class TaskLimiterToken;
+
+class ConcurrentTaskLimiterImpl : public ConcurrentTaskLimiter {
+ public:
+  explicit ConcurrentTaskLimiterImpl(const std::string& name,
+                                     int32_t max_outstanding_task);
+  // No copying allowed
+  ConcurrentTaskLimiterImpl(const ConcurrentTaskLimiterImpl&) = delete;
+  ConcurrentTaskLimiterImpl& operator=(const ConcurrentTaskLimiterImpl&) =
+      delete;
+
+  virtual ~ConcurrentTaskLimiterImpl();
+
+  virtual const std::string& GetName() const override;
+
+  virtual void SetMaxOutstandingTask(int32_t limit) override;
+
+  virtual void ResetMaxOutstandingTask() override;
+
+  virtual int32_t GetOutstandingTask() const override;
+
+  // Request token for adding a new task.
+  // If force == true, it requests a token bypassing throttle.
+  // Returns nullptr if it got throttled.
+  virtual std::unique_ptr<TaskLimiterToken> GetToken(bool force);
+
+ private:
+  friend class TaskLimiterToken;
+
+  std::string name_;
+  std::atomic<int32_t> max_outstanding_tasks_;
+  std::atomic<int32_t> outstanding_tasks_;
+};
+
+class TaskLimiterToken {
+ public:
+  explicit TaskLimiterToken(ConcurrentTaskLimiterImpl* limiter)
+      : limiter_(limiter) {}
+  ~TaskLimiterToken();
+
+ private:
+  ConcurrentTaskLimiterImpl* limiter_;
+
+  // no copying allowed
+  TaskLimiterToken(const TaskLimiterToken&) = delete;
+  void operator=(const TaskLimiterToken&) = delete;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/core_local.h b/src/rocksdb/util/core_local.h
new file mode 100644
index 000000000..b444a1152
--- /dev/null
+++ b/src/rocksdb/util/core_local.h
@@ -0,0 +1,83 @@
+//  Copyright (c) 2017-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cstddef>
+#include <thread>
+#include <utility>
+#include <vector>
+
+#include "port/likely.h"
+#include "port/port.h"
+#include "util/random.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// An array of core-local values. Ideally the value type, T, is cache aligned to
+// prevent false sharing.
+template <typename T>
+class CoreLocalArray {
+ public:
+  CoreLocalArray();
+
+  size_t Size() const;
+  // returns pointer to the element corresponding to the core that the thread
+  // currently runs on.
+  T* Access() const;
+  // same as above, but also returns the core index, which the client can cache
+  // to reduce how often core ID needs to be retrieved. Only do this if some
+  // inaccuracy is tolerable, as the thread may migrate to a different core.
+  std::pair<T*, size_t> AccessElementAndIndex() const;
+  // returns pointer to element for the specified core index. This can be used,
+  // e.g., for aggregation, or if the client caches core index.
+  T* AccessAtCore(size_t core_idx) const;
+
+ private:
+  std::unique_ptr<T[]> data_;
+  int size_shift_;
+};
+
+template <typename T>
+CoreLocalArray<T>::CoreLocalArray() {
+  int num_cpus = static_cast<int>(std::thread::hardware_concurrency());
+  // find a power of two >= num_cpus and >= 8
+  size_shift_ = 3;
+  while (1 << size_shift_ < num_cpus) {
+    ++size_shift_;
+  }
+  data_.reset(new T[static_cast<size_t>(1) << size_shift_]);
+}
+
+template <typename T>
+size_t CoreLocalArray<T>::Size() const {
+  return static_cast<size_t>(1) << size_shift_;
+}
+
+template <typename T>
+T* CoreLocalArray<T>::Access() const {
+  return AccessElementAndIndex().first;
+}
+
+template <typename T>
+std::pair<T*, size_t> CoreLocalArray<T>::AccessElementAndIndex() const {
+  int cpuid = port::PhysicalCoreID();
+  size_t core_idx;
+  if (UNLIKELY(cpuid < 0)) {
+    // cpu id unavailable, just pick randomly
+    core_idx = Random::GetTLSInstance()->Uniform(1 << size_shift_);
+  } else {
+    core_idx = static_cast<size_t>(cpuid & ((1 << size_shift_) - 1));
+  }
+  return {AccessAtCore(core_idx), core_idx};
+}
+
+template <typename T>
+T* CoreLocalArray<T>::AccessAtCore(size_t core_idx) const {
+  assert(core_idx < static_cast<size_t>(1) << size_shift_);
+  return &data_[core_idx];
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/coro_utils.h b/src/rocksdb/util/coro_utils.h
new file mode 100644
index 000000000..5b4211135
--- /dev/null
+++ b/src/rocksdb/util/coro_utils.h
@@ -0,0 +1,112 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#if defined(USE_COROUTINES)
+#include "folly/experimental/coro/Coroutine.h"
+#include "folly/experimental/coro/Task.h"
+#endif
+#include "rocksdb/rocksdb_namespace.h"
+
+// This file has two sctions. The first section applies to all instances of
+// header file inclusion and has an include guard. The second section is
+// meant for multiple inclusions in the same source file, and is idempotent.
+namespace ROCKSDB_NAMESPACE {
+
+#ifndef UTIL_CORO_UTILS_H_
+#define UTIL_CORO_UTILS_H_
+
+#if defined(USE_COROUTINES)
+
+// The follwoing macros expand to regular and coroutine function
+// declarations for a given function
+#define DECLARE_SYNC_AND_ASYNC(__ret_type__, __func_name__, ...) \
+  __ret_type__ __func_name__(__VA_ARGS__);                       \
+  folly::coro::Task<__ret_type__> __func_name__##Coroutine(__VA_ARGS__);
+
+#define DECLARE_SYNC_AND_ASYNC_OVERRIDE(__ret_type__, __func_name__, ...) \
+  __ret_type__ __func_name__(__VA_ARGS__) override;                       \
+  folly::coro::Task<__ret_type__> __func_name__##Coroutine(__VA_ARGS__)   \
+      override;
+
+#define DECLARE_SYNC_AND_ASYNC_CONST(__ret_type__, __func_name__, ...) \
+  __ret_type__ __func_name__(__VA_ARGS__) const;                       \
+  folly::coro::Task<__ret_type__> __func_name__##Coroutine(__VA_ARGS__) const;
+
+constexpr bool using_coroutines() { return true; }
+#else  // !USE_COROUTINES
+
+// The follwoing macros expand to a regular function declaration for a given
+// function
+#define DECLARE_SYNC_AND_ASYNC(__ret_type__, __func_name__, ...) \
+  __ret_type__ __func_name__(__VA_ARGS__);
+
+#define DECLARE_SYNC_AND_ASYNC_OVERRIDE(__ret_type__, __func_name__, ...) \
+  __ret_type__ __func_name__(__VA_ARGS__) override;
+
+#define DECLARE_SYNC_AND_ASYNC_CONST(__ret_type__, __func_name__, ...) \
+  __ret_type__ __func_name__(__VA_ARGS__) const;
+
+constexpr bool using_coroutines() { return false; }
+#endif  // USE_COROUTINES
+#endif  // UTIL_CORO_UTILS_H_
+
+// The following section of the file is meant to be included twice in a
+// source file - once defining WITH_COROUTINES and once defining
+// WITHOUT_COROUTINES
+#undef DEFINE_SYNC_AND_ASYNC
+#undef CO_AWAIT
+#undef CO_RETURN
+
+#if defined(WITH_COROUTINES) && defined(USE_COROUTINES)
+
+// This macro should be used in the beginning of the function
+// definition. The declaration should have been done using one of the
+// DECLARE_SYNC_AND_ASYNC* macros. It expands to the return type and
+// the function name with the Coroutine suffix. For example -
+// DEFINE_SYNC_AND_ASYNC(int, foo)(bool bar) {}
+// would expand to -
+// folly::coro::Task<int> fooCoroutine(bool bar) {}
+#define DEFINE_SYNC_AND_ASYNC(__ret_type__, __func_name__) \
+  folly::coro::Task<__ret_type__> __func_name__##Coroutine
+
+// This macro should be used to call a function that might be a
+// coroutine. It expands to the correct function name and prefixes
+// the co_await operator if necessary. For example -
+// s = CO_AWAIT(foo)(true);
+// if the code is compiled WITH_COROUTINES, would expand to
+// s = co_await fooCoroutine(true);
+// if compiled WITHOUT_COROUTINES, would expand to
+// s = foo(true);
+#define CO_AWAIT(__func_name__) co_await __func_name__##Coroutine
+
+#define CO_RETURN co_return
+
+#elif defined(WITHOUT_COROUTINES)
+
+// This macro should be used in the beginning of the function
+// definition. The declaration should have been done using one of the
+// DECLARE_SYNC_AND_ASYNC* macros. It expands to the return type and
+// the function name without the Coroutine suffix. For example -
+// DEFINE_SYNC_AND_ASYNC(int, foo)(bool bar) {}
+// would expand to -
+// int foo(bool bar) {}
+#define DEFINE_SYNC_AND_ASYNC(__ret_type__, __func_name__) \
+  __ret_type__ __func_name__
+
+// This macro should be used to call a function that might be a
+// coroutine. It expands to the correct function name and prefixes
+// the co_await operator if necessary. For example -
+// s = CO_AWAIT(foo)(true);
+// if the code is compiled WITH_COROUTINES, would expand to
+// s = co_await fooCoroutine(true);
+// if compiled WITHOUT_COROUTINES, would expand to
+// s = foo(true);
+#define CO_AWAIT(__func_name__) __func_name__
+
+#define CO_RETURN return
+
+#endif  // DO_NOT_USE_COROUTINES
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/crc32c.cc b/src/rocksdb/util/crc32c.cc
new file mode 100644
index 000000000..d71c71c2e
--- /dev/null
+++ b/src/rocksdb/util/crc32c.cc
@@ -0,0 +1,1351 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// A portable implementation of crc32c, optimized to handle
+// four bytes at a time.
+#include "util/crc32c.h"
+
+#include <stdint.h>
+
+#include <array>
+#include <utility>
+#ifdef HAVE_SSE42
+#include <nmmintrin.h>
+#include <wmmintrin.h>
+#endif
+
+#include "port/lang.h"
+#include "util/coding.h"
+#include "util/crc32c_arm64.h"
+#include "util/math.h"
+
+#ifdef __powerpc64__
+#include "util/crc32c_ppc.h"
+#include "util/crc32c_ppc_constants.h"
+
+#if __linux__
+#ifdef ROCKSDB_AUXV_GETAUXVAL_PRESENT
+#include <sys/auxv.h>
+#endif
+
+#ifndef PPC_FEATURE2_VEC_CRYPTO
+#define PPC_FEATURE2_VEC_CRYPTO 0x02000000
+#endif
+
+#ifndef AT_HWCAP2
+#define AT_HWCAP2 26
+#endif
+
+#elif __FreeBSD__
+#include <machine/cpu.h>
+#include <sys/auxv.h>
+#include <sys/elf_common.h>
+#endif /* __linux__ */
+
+#endif
+
+#if defined(HAVE_ARM64_CRC)
+bool pmull_runtime_flag = false;
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+namespace crc32c {
+
+#if defined(HAVE_POWER8) && defined(HAS_ALTIVEC)
+#ifdef __powerpc64__
+static int arch_ppc_crc32 = 0;
+#endif /* __powerpc64__ */
+#endif
+
+static const uint32_t table0_[256] = {
+    0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c,
+    0x26a1e7e8, 0xd4ca64eb, 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
+    0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, 0x105ec76f, 0xe235446c,
+    0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
+    0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc,
+    0xbc267848, 0x4e4dfb4b, 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
+    0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, 0xaa64d611, 0x580f5512,
+    0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
+    0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad,
+    0x1642ae59, 0xe4292d5a, 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
+    0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, 0x417b1dbc, 0xb3109ebf,
+    0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
+    0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f,
+    0xed03a29b, 0x1f682198, 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
+    0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, 0xdbfc821c, 0x2997011f,
+    0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
+    0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e,
+    0x4767748a, 0xb50cf789, 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
+    0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, 0x7198540d, 0x83f3d70e,
+    0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
+    0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de,
+    0xdde0eb2a, 0x2f8b6829, 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
+    0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, 0x082f63b7, 0xfa44e0b4,
+    0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
+    0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b,
+    0xb4091bff, 0x466298fc, 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
+    0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, 0xa24bb5a6, 0x502036a5,
+    0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
+    0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975,
+    0x0e330a81, 0xfc588982, 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
+    0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, 0x38cc2a06, 0xcaa7a905,
+    0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
+    0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8,
+    0xe52cc12c, 0x1747422f, 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
+    0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, 0xd3d3e1ab, 0x21b862a8,
+    0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
+    0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78,
+    0x7fab5e8c, 0x8dc0dd8f, 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
+    0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, 0x69e9f0d5, 0x9b8273d6,
+    0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
+    0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69,
+    0xd5cf889d, 0x27a40b9e, 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
+    0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351};
+static const uint32_t table1_[256] = {
+    0x00000000, 0x13a29877, 0x274530ee, 0x34e7a899, 0x4e8a61dc, 0x5d28f9ab,
+    0x69cf5132, 0x7a6dc945, 0x9d14c3b8, 0x8eb65bcf, 0xba51f356, 0xa9f36b21,
+    0xd39ea264, 0xc03c3a13, 0xf4db928a, 0xe7790afd, 0x3fc5f181, 0x2c6769f6,
+    0x1880c16f, 0x0b225918, 0x714f905d, 0x62ed082a, 0x560aa0b3, 0x45a838c4,
+    0xa2d13239, 0xb173aa4e, 0x859402d7, 0x96369aa0, 0xec5b53e5, 0xfff9cb92,
+    0xcb1e630b, 0xd8bcfb7c, 0x7f8be302, 0x6c297b75, 0x58ced3ec, 0x4b6c4b9b,
+    0x310182de, 0x22a31aa9, 0x1644b230, 0x05e62a47, 0xe29f20ba, 0xf13db8cd,
+    0xc5da1054, 0xd6788823, 0xac154166, 0xbfb7d911, 0x8b507188, 0x98f2e9ff,
+    0x404e1283, 0x53ec8af4, 0x670b226d, 0x74a9ba1a, 0x0ec4735f, 0x1d66eb28,
+    0x298143b1, 0x3a23dbc6, 0xdd5ad13b, 0xcef8494c, 0xfa1fe1d5, 0xe9bd79a2,
+    0x93d0b0e7, 0x80722890, 0xb4958009, 0xa737187e, 0xff17c604, 0xecb55e73,
+    0xd852f6ea, 0xcbf06e9d, 0xb19da7d8, 0xa23f3faf, 0x96d89736, 0x857a0f41,
+    0x620305bc, 0x71a19dcb, 0x45463552, 0x56e4ad25, 0x2c896460, 0x3f2bfc17,
+    0x0bcc548e, 0x186eccf9, 0xc0d23785, 0xd370aff2, 0xe797076b, 0xf4359f1c,
+    0x8e585659, 0x9dface2e, 0xa91d66b7, 0xbabffec0, 0x5dc6f43d, 0x4e646c4a,
+    0x7a83c4d3, 0x69215ca4, 0x134c95e1, 0x00ee0d96, 0x3409a50f, 0x27ab3d78,
+    0x809c2506, 0x933ebd71, 0xa7d915e8, 0xb47b8d9f, 0xce1644da, 0xddb4dcad,
+    0xe9537434, 0xfaf1ec43, 0x1d88e6be, 0x0e2a7ec9, 0x3acdd650, 0x296f4e27,
+    0x53028762, 0x40a01f15, 0x7447b78c, 0x67e52ffb, 0xbf59d487, 0xacfb4cf0,
+    0x981ce469, 0x8bbe7c1e, 0xf1d3b55b, 0xe2712d2c, 0xd69685b5, 0xc5341dc2,
+    0x224d173f, 0x31ef8f48, 0x050827d1, 0x16aabfa6, 0x6cc776e3, 0x7f65ee94,
+    0x4b82460d, 0x5820de7a, 0xfbc3faf9, 0xe861628e, 0xdc86ca17, 0xcf245260,
+    0xb5499b25, 0xa6eb0352, 0x920cabcb, 0x81ae33bc, 0x66d73941, 0x7575a136,
+    0x419209af, 0x523091d8, 0x285d589d, 0x3bffc0ea, 0x0f186873, 0x1cbaf004,
+    0xc4060b78, 0xd7a4930f, 0xe3433b96, 0xf0e1a3e1, 0x8a8c6aa4, 0x992ef2d3,
+    0xadc95a4a, 0xbe6bc23d, 0x5912c8c0, 0x4ab050b7, 0x7e57f82e, 0x6df56059,
+    0x1798a91c, 0x043a316b, 0x30dd99f2, 0x237f0185, 0x844819fb, 0x97ea818c,
+    0xa30d2915, 0xb0afb162, 0xcac27827, 0xd960e050, 0xed8748c9, 0xfe25d0be,
+    0x195cda43, 0x0afe4234, 0x3e19eaad, 0x2dbb72da, 0x57d6bb9f, 0x447423e8,
+    0x70938b71, 0x63311306, 0xbb8de87a, 0xa82f700d, 0x9cc8d894, 0x8f6a40e3,
+    0xf50789a6, 0xe6a511d1, 0xd242b948, 0xc1e0213f, 0x26992bc2, 0x353bb3b5,
+    0x01dc1b2c, 0x127e835b, 0x68134a1e, 0x7bb1d269, 0x4f567af0, 0x5cf4e287,
+    0x04d43cfd, 0x1776a48a, 0x23910c13, 0x30339464, 0x4a5e5d21, 0x59fcc556,
+    0x6d1b6dcf, 0x7eb9f5b8, 0x99c0ff45, 0x8a626732, 0xbe85cfab, 0xad2757dc,
+    0xd74a9e99, 0xc4e806ee, 0xf00fae77, 0xe3ad3600, 0x3b11cd7c, 0x28b3550b,
+    0x1c54fd92, 0x0ff665e5, 0x759baca0, 0x663934d7, 0x52de9c4e, 0x417c0439,
+    0xa6050ec4, 0xb5a796b3, 0x81403e2a, 0x92e2a65d, 0xe88f6f18, 0xfb2df76f,
+    0xcfca5ff6, 0xdc68c781, 0x7b5fdfff, 0x68fd4788, 0x5c1aef11, 0x4fb87766,
+    0x35d5be23, 0x26772654, 0x12908ecd, 0x013216ba, 0xe64b1c47, 0xf5e98430,
+    0xc10e2ca9, 0xd2acb4de, 0xa8c17d9b, 0xbb63e5ec, 0x8f844d75, 0x9c26d502,
+    0x449a2e7e, 0x5738b609, 0x63df1e90, 0x707d86e7, 0x0a104fa2, 0x19b2d7d5,
+    0x2d557f4c, 0x3ef7e73b, 0xd98eedc6, 0xca2c75b1, 0xfecbdd28, 0xed69455f,
+    0x97048c1a, 0x84a6146d, 0xb041bcf4, 0xa3e32483};
+static const uint32_t table2_[256] = {
+    0x00000000, 0xa541927e, 0x4f6f520d, 0xea2ec073, 0x9edea41a, 0x3b9f3664,
+    0xd1b1f617, 0x74f06469, 0x38513ec5, 0x9d10acbb, 0x773e6cc8, 0xd27ffeb6,
+    0xa68f9adf, 0x03ce08a1, 0xe9e0c8d2, 0x4ca15aac, 0x70a27d8a, 0xd5e3eff4,
+    0x3fcd2f87, 0x9a8cbdf9, 0xee7cd990, 0x4b3d4bee, 0xa1138b9d, 0x045219e3,
+    0x48f3434f, 0xedb2d131, 0x079c1142, 0xa2dd833c, 0xd62de755, 0x736c752b,
+    0x9942b558, 0x3c032726, 0xe144fb14, 0x4405696a, 0xae2ba919, 0x0b6a3b67,
+    0x7f9a5f0e, 0xdadbcd70, 0x30f50d03, 0x95b49f7d, 0xd915c5d1, 0x7c5457af,
+    0x967a97dc, 0x333b05a2, 0x47cb61cb, 0xe28af3b5, 0x08a433c6, 0xade5a1b8,
+    0x91e6869e, 0x34a714e0, 0xde89d493, 0x7bc846ed, 0x0f382284, 0xaa79b0fa,
+    0x40577089, 0xe516e2f7, 0xa9b7b85b, 0x0cf62a25, 0xe6d8ea56, 0x43997828,
+    0x37691c41, 0x92288e3f, 0x78064e4c, 0xdd47dc32, 0xc76580d9, 0x622412a7,
+    0x880ad2d4, 0x2d4b40aa, 0x59bb24c3, 0xfcfab6bd, 0x16d476ce, 0xb395e4b0,
+    0xff34be1c, 0x5a752c62, 0xb05bec11, 0x151a7e6f, 0x61ea1a06, 0xc4ab8878,
+    0x2e85480b, 0x8bc4da75, 0xb7c7fd53, 0x12866f2d, 0xf8a8af5e, 0x5de93d20,
+    0x29195949, 0x8c58cb37, 0x66760b44, 0xc337993a, 0x8f96c396, 0x2ad751e8,
+    0xc0f9919b, 0x65b803e5, 0x1148678c, 0xb409f5f2, 0x5e273581, 0xfb66a7ff,
+    0x26217bcd, 0x8360e9b3, 0x694e29c0, 0xcc0fbbbe, 0xb8ffdfd7, 0x1dbe4da9,
+    0xf7908dda, 0x52d11fa4, 0x1e704508, 0xbb31d776, 0x511f1705, 0xf45e857b,
+    0x80aee112, 0x25ef736c, 0xcfc1b31f, 0x6a802161, 0x56830647, 0xf3c29439,
+    0x19ec544a, 0xbcadc634, 0xc85da25d, 0x6d1c3023, 0x8732f050, 0x2273622e,
+    0x6ed23882, 0xcb93aafc, 0x21bd6a8f, 0x84fcf8f1, 0xf00c9c98, 0x554d0ee6,
+    0xbf63ce95, 0x1a225ceb, 0x8b277743, 0x2e66e53d, 0xc448254e, 0x6109b730,
+    0x15f9d359, 0xb0b84127, 0x5a968154, 0xffd7132a, 0xb3764986, 0x1637dbf8,
+    0xfc191b8b, 0x595889f5, 0x2da8ed9c, 0x88e97fe2, 0x62c7bf91, 0xc7862def,
+    0xfb850ac9, 0x5ec498b7, 0xb4ea58c4, 0x11abcaba, 0x655baed3, 0xc01a3cad,
+    0x2a34fcde, 0x8f756ea0, 0xc3d4340c, 0x6695a672, 0x8cbb6601, 0x29faf47f,
+    0x5d0a9016, 0xf84b0268, 0x1265c21b, 0xb7245065, 0x6a638c57, 0xcf221e29,
+    0x250cde5a, 0x804d4c24, 0xf4bd284d, 0x51fcba33, 0xbbd27a40, 0x1e93e83e,
+    0x5232b292, 0xf77320ec, 0x1d5de09f, 0xb81c72e1, 0xccec1688, 0x69ad84f6,
+    0x83834485, 0x26c2d6fb, 0x1ac1f1dd, 0xbf8063a3, 0x55aea3d0, 0xf0ef31ae,
+    0x841f55c7, 0x215ec7b9, 0xcb7007ca, 0x6e3195b4, 0x2290cf18, 0x87d15d66,
+    0x6dff9d15, 0xc8be0f6b, 0xbc4e6b02, 0x190ff97c, 0xf321390f, 0x5660ab71,
+    0x4c42f79a, 0xe90365e4, 0x032da597, 0xa66c37e9, 0xd29c5380, 0x77ddc1fe,
+    0x9df3018d, 0x38b293f3, 0x7413c95f, 0xd1525b21, 0x3b7c9b52, 0x9e3d092c,
+    0xeacd6d45, 0x4f8cff3b, 0xa5a23f48, 0x00e3ad36, 0x3ce08a10, 0x99a1186e,
+    0x738fd81d, 0xd6ce4a63, 0xa23e2e0a, 0x077fbc74, 0xed517c07, 0x4810ee79,
+    0x04b1b4d5, 0xa1f026ab, 0x4bdee6d8, 0xee9f74a6, 0x9a6f10cf, 0x3f2e82b1,
+    0xd50042c2, 0x7041d0bc, 0xad060c8e, 0x08479ef0, 0xe2695e83, 0x4728ccfd,
+    0x33d8a894, 0x96993aea, 0x7cb7fa99, 0xd9f668e7, 0x9557324b, 0x3016a035,
+    0xda386046, 0x7f79f238, 0x0b899651, 0xaec8042f, 0x44e6c45c, 0xe1a75622,
+    0xdda47104, 0x78e5e37a, 0x92cb2309, 0x378ab177, 0x437ad51e, 0xe63b4760,
+    0x0c158713, 0xa954156d, 0xe5f54fc1, 0x40b4ddbf, 0xaa9a1dcc, 0x0fdb8fb2,
+    0x7b2bebdb, 0xde6a79a5, 0x3444b9d6, 0x91052ba8};
+static const uint32_t table3_[256] = {
+    0x00000000, 0xdd45aab8, 0xbf672381, 0x62228939, 0x7b2231f3, 0xa6679b4b,
+    0xc4451272, 0x1900b8ca, 0xf64463e6, 0x2b01c95e, 0x49234067, 0x9466eadf,
+    0x8d665215, 0x5023f8ad, 0x32017194, 0xef44db2c, 0xe964b13d, 0x34211b85,
+    0x560392bc, 0x8b463804, 0x924680ce, 0x4f032a76, 0x2d21a34f, 0xf06409f7,
+    0x1f20d2db, 0xc2657863, 0xa047f15a, 0x7d025be2, 0x6402e328, 0xb9474990,
+    0xdb65c0a9, 0x06206a11, 0xd725148b, 0x0a60be33, 0x6842370a, 0xb5079db2,
+    0xac072578, 0x71428fc0, 0x136006f9, 0xce25ac41, 0x2161776d, 0xfc24ddd5,
+    0x9e0654ec, 0x4343fe54, 0x5a43469e, 0x8706ec26, 0xe524651f, 0x3861cfa7,
+    0x3e41a5b6, 0xe3040f0e, 0x81268637, 0x5c632c8f, 0x45639445, 0x98263efd,
+    0xfa04b7c4, 0x27411d7c, 0xc805c650, 0x15406ce8, 0x7762e5d1, 0xaa274f69,
+    0xb327f7a3, 0x6e625d1b, 0x0c40d422, 0xd1057e9a, 0xaba65fe7, 0x76e3f55f,
+    0x14c17c66, 0xc984d6de, 0xd0846e14, 0x0dc1c4ac, 0x6fe34d95, 0xb2a6e72d,
+    0x5de23c01, 0x80a796b9, 0xe2851f80, 0x3fc0b538, 0x26c00df2, 0xfb85a74a,
+    0x99a72e73, 0x44e284cb, 0x42c2eeda, 0x9f874462, 0xfda5cd5b, 0x20e067e3,
+    0x39e0df29, 0xe4a57591, 0x8687fca8, 0x5bc25610, 0xb4868d3c, 0x69c32784,
+    0x0be1aebd, 0xd6a40405, 0xcfa4bccf, 0x12e11677, 0x70c39f4e, 0xad8635f6,
+    0x7c834b6c, 0xa1c6e1d4, 0xc3e468ed, 0x1ea1c255, 0x07a17a9f, 0xdae4d027,
+    0xb8c6591e, 0x6583f3a6, 0x8ac7288a, 0x57828232, 0x35a00b0b, 0xe8e5a1b3,
+    0xf1e51979, 0x2ca0b3c1, 0x4e823af8, 0x93c79040, 0x95e7fa51, 0x48a250e9,
+    0x2a80d9d0, 0xf7c57368, 0xeec5cba2, 0x3380611a, 0x51a2e823, 0x8ce7429b,
+    0x63a399b7, 0xbee6330f, 0xdcc4ba36, 0x0181108e, 0x1881a844, 0xc5c402fc,
+    0xa7e68bc5, 0x7aa3217d, 0x52a0c93f, 0x8fe56387, 0xedc7eabe, 0x30824006,
+    0x2982f8cc, 0xf4c75274, 0x96e5db4d, 0x4ba071f5, 0xa4e4aad9, 0x79a10061,
+    0x1b838958, 0xc6c623e0, 0xdfc69b2a, 0x02833192, 0x60a1b8ab, 0xbde41213,
+    0xbbc47802, 0x6681d2ba, 0x04a35b83, 0xd9e6f13b, 0xc0e649f1, 0x1da3e349,
+    0x7f816a70, 0xa2c4c0c8, 0x4d801be4, 0x90c5b15c, 0xf2e73865, 0x2fa292dd,
+    0x36a22a17, 0xebe780af, 0x89c50996, 0x5480a32e, 0x8585ddb4, 0x58c0770c,
+    0x3ae2fe35, 0xe7a7548d, 0xfea7ec47, 0x23e246ff, 0x41c0cfc6, 0x9c85657e,
+    0x73c1be52, 0xae8414ea, 0xcca69dd3, 0x11e3376b, 0x08e38fa1, 0xd5a62519,
+    0xb784ac20, 0x6ac10698, 0x6ce16c89, 0xb1a4c631, 0xd3864f08, 0x0ec3e5b0,
+    0x17c35d7a, 0xca86f7c2, 0xa8a47efb, 0x75e1d443, 0x9aa50f6f, 0x47e0a5d7,
+    0x25c22cee, 0xf8878656, 0xe1873e9c, 0x3cc29424, 0x5ee01d1d, 0x83a5b7a5,
+    0xf90696d8, 0x24433c60, 0x4661b559, 0x9b241fe1, 0x8224a72b, 0x5f610d93,
+    0x3d4384aa, 0xe0062e12, 0x0f42f53e, 0xd2075f86, 0xb025d6bf, 0x6d607c07,
+    0x7460c4cd, 0xa9256e75, 0xcb07e74c, 0x16424df4, 0x106227e5, 0xcd278d5d,
+    0xaf050464, 0x7240aedc, 0x6b401616, 0xb605bcae, 0xd4273597, 0x09629f2f,
+    0xe6264403, 0x3b63eebb, 0x59416782, 0x8404cd3a, 0x9d0475f0, 0x4041df48,
+    0x22635671, 0xff26fcc9, 0x2e238253, 0xf36628eb, 0x9144a1d2, 0x4c010b6a,
+    0x5501b3a0, 0x88441918, 0xea669021, 0x37233a99, 0xd867e1b5, 0x05224b0d,
+    0x6700c234, 0xba45688c, 0xa345d046, 0x7e007afe, 0x1c22f3c7, 0xc167597f,
+    0xc747336e, 0x1a0299d6, 0x782010ef, 0xa565ba57, 0xbc65029d, 0x6120a825,
+    0x0302211c, 0xde478ba4, 0x31035088, 0xec46fa30, 0x8e647309, 0x5321d9b1,
+    0x4a21617b, 0x9764cbc3, 0xf54642fa, 0x2803e842};
+
+// Used to fetch a naturally-aligned 32-bit word in little endian byte-order
+static inline uint32_t LE_LOAD32(const uint8_t* p) {
+  return DecodeFixed32(reinterpret_cast<const char*>(p));
+}
+
+#if defined(HAVE_SSE42) && (defined(__LP64__) || defined(_WIN64))
+static inline uint64_t LE_LOAD64(const uint8_t* p) {
+  return DecodeFixed64(reinterpret_cast<const char*>(p));
+}
+#endif
+
+static inline void Slow_CRC32(uint64_t* l, uint8_t const** p) {
+  uint32_t c = static_cast<uint32_t>(*l ^ LE_LOAD32(*p));
+  *p += 4;
+  *l = table3_[c & 0xff] ^ table2_[(c >> 8) & 0xff] ^
+       table1_[(c >> 16) & 0xff] ^ table0_[c >> 24];
+  // DO it twice.
+  c = static_cast<uint32_t>(*l ^ LE_LOAD32(*p));
+  *p += 4;
+  *l = table3_[c & 0xff] ^ table2_[(c >> 8) & 0xff] ^
+       table1_[(c >> 16) & 0xff] ^ table0_[c >> 24];
+}
+
+#if (!(defined(HAVE_POWER8) && defined(HAS_ALTIVEC))) && \
+        (!defined(HAVE_ARM64_CRC)) ||                    \
+    defined(NO_THREEWAY_CRC32C)
+static inline void Fast_CRC32(uint64_t* l, uint8_t const** p) {
+#ifndef HAVE_SSE42
+  Slow_CRC32(l, p);
+#elif defined(__LP64__) || defined(_WIN64)
+  *l = _mm_crc32_u64(*l, LE_LOAD64(*p));
+  *p += 8;
+#else
+  *l = _mm_crc32_u32(static_cast<unsigned int>(*l), LE_LOAD32(*p));
+  *p += 4;
+  *l = _mm_crc32_u32(static_cast<unsigned int>(*l), LE_LOAD32(*p));
+  *p += 4;
+#endif
+}
+#endif
+
+template <void (*CRC32)(uint64_t*, uint8_t const**)>
+uint32_t ExtendImpl(uint32_t crc, const char* buf, size_t size) {
+  const uint8_t* p = reinterpret_cast<const uint8_t*>(buf);
+  const uint8_t* e = p + size;
+  uint64_t l = crc ^ 0xffffffffu;
+
+// Align n to (1 << m) byte boundary
+#define ALIGN(n, m) ((n + ((1 << m) - 1)) & ~((1 << m) - 1))
+
+#define STEP1                  \
+  do {                         \
+    int c = (l & 0xff) ^ *p++; \
+    l = table0_[c] ^ (l >> 8); \
+  } while (0)
+
+  // Point x at first 16-byte aligned byte in string.  This might be
+  // just past the end of the string.
+  const uintptr_t pval = reinterpret_cast<uintptr_t>(p);
+  const uint8_t* x = reinterpret_cast<const uint8_t*>(ALIGN(pval, 4));
+  if (x <= e) {
+    // Process bytes until finished or p is 16-byte aligned
+    while (p != x) {
+      STEP1;
+    }
+  }
+  // Process bytes 16 at a time
+  while ((e - p) >= 16) {
+    CRC32(&l, &p);
+    CRC32(&l, &p);
+  }
+  // Process bytes 8 at a time
+  while ((e - p) >= 8) {
+    CRC32(&l, &p);
+  }
+  // Process the last few bytes
+  while (p != e) {
+    STEP1;
+  }
+#undef STEP1
+#undef ALIGN
+  return static_cast<uint32_t>(l ^ 0xffffffffu);
+}
+
+// Detect if ARM64 CRC or not.
+#ifndef HAVE_ARM64_CRC
+// Detect if SS42 or not.
+#ifndef HAVE_POWER8
+
+static bool isSSE42() {
+#ifndef HAVE_SSE42
+  return false;
+#elif defined(__GNUC__) && defined(__x86_64__) && !defined(IOS_CROSS_COMPILE)
+  uint32_t c_;
+  __asm__("cpuid" : "=c"(c_) : "a"(1) : "ebx", "edx");
+  return c_ & (1U << 20);  // copied from CpuId.h in Folly. Test SSE42
+#elif defined(_WIN64)
+  int info[4];
+  __cpuidex(info, 0x00000001, 0);
+  return (info[2] & ((int)1 << 20)) != 0;
+#else
+  return false;
+#endif
+}
+
+static bool isPCLMULQDQ() {
+#ifndef HAVE_SSE42
+  // in build_detect_platform we set this macro when both SSE42 and PCLMULQDQ
+  // are supported by compiler
+  return false;
+#elif defined(__GNUC__) && defined(__x86_64__) && !defined(IOS_CROSS_COMPILE)
+  uint32_t c_;
+  __asm__("cpuid" : "=c"(c_) : "a"(1) : "ebx", "edx");
+  return c_ & (1U << 1);  // PCLMULQDQ is in bit 1 (not bit 0)
+#elif defined(_WIN64)
+  int info[4];
+  __cpuidex(info, 0x00000001, 0);
+  return (info[2] & ((int)1 << 1)) != 0;
+#else
+  return false;
+#endif
+}
+
+#endif  // HAVE_POWER8
+#endif  // HAVE_ARM64_CRC
+
+using Function = uint32_t (*)(uint32_t, const char*, size_t);
+
+#if defined(HAVE_POWER8) && defined(HAS_ALTIVEC)
+uint32_t ExtendPPCImpl(uint32_t crc, const char* buf, size_t size) {
+  return crc32c_ppc(crc, (const unsigned char*)buf, size);
+}
+
+#if __linux__
+static int arch_ppc_probe(void) {
+  arch_ppc_crc32 = 0;
+
+#if defined(__powerpc64__) && defined(ROCKSDB_AUXV_GETAUXVAL_PRESENT)
+  if (getauxval(AT_HWCAP2) & PPC_FEATURE2_VEC_CRYPTO) arch_ppc_crc32 = 1;
+#endif /* __powerpc64__ */
+
+  return arch_ppc_crc32;
+}
+#elif __FreeBSD__
+static int arch_ppc_probe(void) {
+  unsigned long cpufeatures;
+  arch_ppc_crc32 = 0;
+
+#if defined(__powerpc64__)
+  elf_aux_info(AT_HWCAP2, &cpufeatures, sizeof(cpufeatures));
+  if (cpufeatures & PPC_FEATURE2_HAS_VEC_CRYPTO) arch_ppc_crc32 = 1;
+#endif  /* __powerpc64__ */
+
+  return arch_ppc_crc32;
+}
+#endif  // __linux__
+
+static bool isAltiVec() {
+  if (arch_ppc_probe()) {
+    return true;
+  } else {
+    return false;
+  }
+}
+#endif
+
+#if defined(HAVE_ARM64_CRC)
+uint32_t ExtendARMImpl(uint32_t crc, const char* buf, size_t size) {
+  return crc32c_arm64(crc, (const unsigned char*)buf, size);
+}
+#endif
+
+std::string IsFastCrc32Supported() {
+  bool has_fast_crc = false;
+  std::string fast_zero_msg;
+  std::string arch;
+#ifdef HAVE_POWER8
+#ifdef HAS_ALTIVEC
+  if (arch_ppc_probe()) {
+    has_fast_crc = true;
+    arch = "PPC";
+  }
+#else
+  has_fast_crc = false;
+  arch = "PPC";
+#endif
+#elif defined(HAVE_ARM64_CRC)
+  if (crc32c_runtime_check()) {
+    has_fast_crc = true;
+    arch = "Arm64";
+    pmull_runtime_flag = crc32c_pmull_runtime_check();
+  } else {
+    has_fast_crc = false;
+    arch = "Arm64";
+  }
+#else
+  has_fast_crc = isSSE42();
+  arch = "x86";
+#endif
+  if (has_fast_crc) {
+    fast_zero_msg.append("Supported on " + arch);
+  } else {
+    fast_zero_msg.append("Not supported on " + arch);
+  }
+  return fast_zero_msg;
+}
+
+/*
+ * Copyright 2016 Ferry Toth, Exalon Delft BV, The Netherlands
+ *  This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the author be held liable for any damages
+ * arising from the use of this software.
+ *  Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *  1. The origin of this software must not be misrepresented; you must not
+ *   claim that you wrote the original software. If you use this software
+ *   in a product, an acknowledgment in the product documentation would be
+ *   appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *   misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ *  Ferry Toth
+ * ftoth@exalondelft.nl
+ *
+ * https://github.com/htot/crc32c
+ *
+ * Modified by Facebook
+ *
+ * Original intel whitepaper:
+ * "Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction"
+ * https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/crc-iscsi-polynomial-crc32-instruction-paper.pdf
+ *
+ * This version is from the folly library, created by Dave Watson
+ * <davejwatson@fb.com>
+ *
+ */
+#if defined HAVE_SSE42 && defined HAVE_PCLMUL
+
+#define CRCtriplet(crc, buf, offset)                  \
+  crc##0 = _mm_crc32_u64(crc##0, *(buf##0 + offset)); \
+  crc##1 = _mm_crc32_u64(crc##1, *(buf##1 + offset)); \
+  crc##2 = _mm_crc32_u64(crc##2, *(buf##2 + offset));
+
+#define CRCduplet(crc, buf, offset)                   \
+  crc##0 = _mm_crc32_u64(crc##0, *(buf##0 + offset)); \
+  crc##1 = _mm_crc32_u64(crc##1, *(buf##1 + offset));
+
+#define CRCsinglet(crc, buf, offset) \
+  crc = _mm_crc32_u64(crc, *(uint64_t*)(buf + offset));
+
+// Numbers taken directly from intel whitepaper.
+// clang-format off
+const uint64_t clmul_constants[] = {
+    0x14cd00bd6, 0x105ec76f0, 0x0ba4fc28e, 0x14cd00bd6,
+    0x1d82c63da, 0x0f20c0dfe, 0x09e4addf8, 0x0ba4fc28e,
+    0x039d3b296, 0x1384aa63a, 0x102f9b8a2, 0x1d82c63da,
+    0x14237f5e6, 0x01c291d04, 0x00d3b6092, 0x09e4addf8,
+    0x0c96cfdc0, 0x0740eef02, 0x18266e456, 0x039d3b296,
+    0x0daece73e, 0x0083a6eec, 0x0ab7aff2a, 0x102f9b8a2,
+    0x1248ea574, 0x1c1733996, 0x083348832, 0x14237f5e6,
+    0x12c743124, 0x02ad91c30, 0x0b9e02b86, 0x00d3b6092,
+    0x018b33a4e, 0x06992cea2, 0x1b331e26a, 0x0c96cfdc0,
+    0x17d35ba46, 0x07e908048, 0x1bf2e8b8a, 0x18266e456,
+    0x1a3e0968a, 0x11ed1f9d8, 0x0ce7f39f4, 0x0daece73e,
+    0x061d82e56, 0x0f1d0f55e, 0x0d270f1a2, 0x0ab7aff2a,
+    0x1c3f5f66c, 0x0a87ab8a8, 0x12ed0daac, 0x1248ea574,
+    0x065863b64, 0x08462d800, 0x11eef4f8e, 0x083348832,
+    0x1ee54f54c, 0x071d111a8, 0x0b3e32c28, 0x12c743124,
+    0x0064f7f26, 0x0ffd852c6, 0x0dd7e3b0c, 0x0b9e02b86,
+    0x0f285651c, 0x0dcb17aa4, 0x010746f3c, 0x018b33a4e,
+    0x1c24afea4, 0x0f37c5aee, 0x0271d9844, 0x1b331e26a,
+    0x08e766a0c, 0x06051d5a2, 0x093a5f730, 0x17d35ba46,
+    0x06cb08e5c, 0x11d5ca20e, 0x06b749fb2, 0x1bf2e8b8a,
+    0x1167f94f2, 0x021f3d99c, 0x0cec3662e, 0x1a3e0968a,
+    0x19329634a, 0x08f158014, 0x0e6fc4e6a, 0x0ce7f39f4,
+    0x08227bb8a, 0x1a5e82106, 0x0b0cd4768, 0x061d82e56,
+    0x13c2b89c4, 0x188815ab2, 0x0d7a4825c, 0x0d270f1a2,
+    0x10f5ff2ba, 0x105405f3e, 0x00167d312, 0x1c3f5f66c,
+    0x0f6076544, 0x0e9adf796, 0x026f6a60a, 0x12ed0daac,
+    0x1a2adb74e, 0x096638b34, 0x19d34af3a, 0x065863b64,
+    0x049c3cc9c, 0x1e50585a0, 0x068bce87a, 0x11eef4f8e,
+    0x1524fa6c6, 0x19f1c69dc, 0x16cba8aca, 0x1ee54f54c,
+    0x042d98888, 0x12913343e, 0x1329d9f7e, 0x0b3e32c28,
+    0x1b1c69528, 0x088f25a3a, 0x02178513a, 0x0064f7f26,
+    0x0e0ac139e, 0x04e36f0b0, 0x0170076fa, 0x0dd7e3b0c,
+    0x141a1a2e2, 0x0bd6f81f8, 0x16ad828b4, 0x0f285651c,
+    0x041d17b64, 0x19425cbba, 0x1fae1cc66, 0x010746f3c,
+    0x1a75b4b00, 0x18db37e8a, 0x0f872e54c, 0x1c24afea4,
+    0x01e41e9fc, 0x04c144932, 0x086d8e4d2, 0x0271d9844,
+    0x160f7af7a, 0x052148f02, 0x05bb8f1bc, 0x08e766a0c,
+    0x0a90fd27a, 0x0a3c6f37a, 0x0b3af077a, 0x093a5f730,
+    0x04984d782, 0x1d22c238e, 0x0ca6ef3ac, 0x06cb08e5c,
+    0x0234e0b26, 0x063ded06a, 0x1d88abd4a, 0x06b749fb2,
+    0x04597456a, 0x04d56973c, 0x0e9e28eb4, 0x1167f94f2,
+    0x07b3ff57a, 0x19385bf2e, 0x0c9c8b782, 0x0cec3662e,
+    0x13a9cba9e, 0x0e417f38a, 0x093e106a4, 0x19329634a,
+    0x167001a9c, 0x14e727980, 0x1ddffc5d4, 0x0e6fc4e6a,
+    0x00df04680, 0x0d104b8fc, 0x02342001e, 0x08227bb8a,
+    0x00a2a8d7e, 0x05b397730, 0x168763fa6, 0x0b0cd4768,
+    0x1ed5a407a, 0x0e78eb416, 0x0d2c3ed1a, 0x13c2b89c4,
+    0x0995a5724, 0x1641378f0, 0x19b1afbc4, 0x0d7a4825c,
+    0x109ffedc0, 0x08d96551c, 0x0f2271e60, 0x10f5ff2ba,
+    0x00b0bf8ca, 0x00bf80dd2, 0x123888b7a, 0x00167d312,
+    0x1e888f7dc, 0x18dcddd1c, 0x002ee03b2, 0x0f6076544,
+    0x183e8d8fe, 0x06a45d2b2, 0x133d7a042, 0x026f6a60a,
+    0x116b0f50c, 0x1dd3e10e8, 0x05fabe670, 0x1a2adb74e,
+    0x130004488, 0x0de87806c, 0x000bcf5f6, 0x19d34af3a,
+    0x18f0c7078, 0x014338754, 0x017f27698, 0x049c3cc9c,
+    0x058ca5f00, 0x15e3e77ee, 0x1af900c24, 0x068bce87a,
+    0x0b5cfca28, 0x0dd07448e, 0x0ded288f8, 0x1524fa6c6,
+    0x059f229bc, 0x1d8048348, 0x06d390dec, 0x16cba8aca,
+    0x037170390, 0x0a3e3e02c, 0x06353c1cc, 0x042d98888,
+    0x0c4584f5c, 0x0d73c7bea, 0x1f16a3418, 0x1329d9f7e,
+    0x0531377e2, 0x185137662, 0x1d8d9ca7c, 0x1b1c69528,
+    0x0b25b29f2, 0x18a08b5bc, 0x19fb2a8b0, 0x02178513a,
+    0x1a08fe6ac, 0x1da758ae0, 0x045cddf4e, 0x0e0ac139e,
+    0x1a91647f2, 0x169cf9eb0, 0x1a0f717c4, 0x0170076fa,
+};
+
+// Compute the crc32c value for buffer smaller than 8
+#ifdef ROCKSDB_UBSAN_RUN
+#if defined(__clang__)
+__attribute__((__no_sanitize__("alignment")))
+#elif defined(__GNUC__)
+__attribute__((__no_sanitize_undefined__))
+#endif
+#endif
+inline void align_to_8(
+    size_t len,
+    uint64_t& crc0, // crc so far, updated on return
+    const unsigned char*& next) { // next data pointer, updated on return
+  uint32_t crc32bit = static_cast<uint32_t>(crc0);
+  if (len & 0x04) {
+    crc32bit = _mm_crc32_u32(crc32bit, *(uint32_t*)next);
+    next += sizeof(uint32_t);
+  }
+  if (len & 0x02) {
+    crc32bit = _mm_crc32_u16(crc32bit, *(uint16_t*)next);
+    next += sizeof(uint16_t);
+  }
+  if (len & 0x01) {
+    crc32bit = _mm_crc32_u8(crc32bit, *(next));
+    next++;
+  }
+  crc0 = crc32bit;
+}
+
+//
+// CombineCRC performs pclmulqdq multiplication of 2 partial CRC's and a well
+// chosen constant and xor's these with the remaining CRC.
+//
+inline uint64_t CombineCRC(
+    size_t block_size,
+    uint64_t crc0,
+    uint64_t crc1,
+    uint64_t crc2,
+    const uint64_t* next2) {
+  const auto multiplier =
+      *(reinterpret_cast<const __m128i*>(clmul_constants) + block_size - 1);
+  const auto crc0_xmm = _mm_set_epi64x(0, crc0);
+  const auto res0 = _mm_clmulepi64_si128(crc0_xmm, multiplier, 0x00);
+  const auto crc1_xmm = _mm_set_epi64x(0, crc1);
+  const auto res1 = _mm_clmulepi64_si128(crc1_xmm, multiplier, 0x10);
+  const auto res = _mm_xor_si128(res0, res1);
+  crc0 = _mm_cvtsi128_si64(res);
+  crc0 = crc0 ^ *((uint64_t*)next2 - 1);
+  crc2 = _mm_crc32_u64(crc2, crc0);
+  return crc2;
+}
+
+// Compute CRC-32C using the Intel hardware instruction.
+#ifdef ROCKSDB_UBSAN_RUN
+#if defined(__clang__)
+__attribute__((__no_sanitize__("alignment")))
+#elif defined(__GNUC__)
+__attribute__((__no_sanitize_undefined__))
+#endif
+#endif
+uint32_t crc32c_3way(uint32_t crc, const char* buf, size_t len) {
+  const unsigned char* next = (const unsigned char*)buf;
+  uint64_t count;
+  uint64_t crc0, crc1, crc2;
+  crc0 = crc ^ 0xffffffffu;
+
+
+  if (len >= 8) {
+    // if len > 216 then align and use triplets
+    if (len > 216) {
+      {
+        // Work on the bytes (< 8) before the first 8-byte alignment addr starts
+        uint64_t align_bytes = (8 - (uintptr_t)next) & 7;
+        len -= align_bytes;
+        align_to_8(align_bytes, crc0, next);
+      }
+
+      // Now work on the remaining blocks
+      count = len / 24; // number of triplets
+      len %= 24; // bytes remaining
+      uint64_t n = count >> 7; // #blocks = first block + full blocks
+      uint64_t block_size = count & 127;
+      if (block_size == 0) {
+        block_size = 128;
+      } else {
+        n++;
+      }
+      // points to the first byte of the next block
+      const uint64_t* next0 = (uint64_t*)next + block_size;
+      const uint64_t* next1 = next0 + block_size;
+      const uint64_t* next2 = next1 + block_size;
+
+      crc1 = crc2 = 0;
+      // Use Duff's device, a for() loop inside a switch()
+      // statement. This needs to execute at least once, round len
+      // down to nearest triplet multiple
+      switch (block_size) {
+        case 128:
+          do {
+            // jumps here for a full block of len 128
+            CRCtriplet(crc, next, -128);
+            FALLTHROUGH_INTENDED;
+            case 127:
+              // jumps here or below for the first block smaller
+              CRCtriplet(crc, next, -127);
+              FALLTHROUGH_INTENDED;
+            case 126:
+              CRCtriplet(crc, next, -126); // than 128
+              FALLTHROUGH_INTENDED;
+            case 125:
+              CRCtriplet(crc, next, -125);
+              FALLTHROUGH_INTENDED;
+            case 124:
+              CRCtriplet(crc, next, -124);
+              FALLTHROUGH_INTENDED;
+            case 123:
+              CRCtriplet(crc, next, -123);
+              FALLTHROUGH_INTENDED;
+            case 122:
+              CRCtriplet(crc, next, -122);
+              FALLTHROUGH_INTENDED;
+            case 121:
+              CRCtriplet(crc, next, -121);
+              FALLTHROUGH_INTENDED;
+            case 120:
+              CRCtriplet(crc, next, -120);
+              FALLTHROUGH_INTENDED;
+            case 119:
+              CRCtriplet(crc, next, -119);
+              FALLTHROUGH_INTENDED;
+            case 118:
+              CRCtriplet(crc, next, -118);
+              FALLTHROUGH_INTENDED;
+            case 117:
+              CRCtriplet(crc, next, -117);
+              FALLTHROUGH_INTENDED;
+            case 116:
+              CRCtriplet(crc, next, -116);
+              FALLTHROUGH_INTENDED;
+            case 115:
+              CRCtriplet(crc, next, -115);
+              FALLTHROUGH_INTENDED;
+            case 114:
+              CRCtriplet(crc, next, -114);
+              FALLTHROUGH_INTENDED;
+            case 113:
+              CRCtriplet(crc, next, -113);
+              FALLTHROUGH_INTENDED;
+            case 112:
+              CRCtriplet(crc, next, -112);
+              FALLTHROUGH_INTENDED;
+            case 111:
+              CRCtriplet(crc, next, -111);
+              FALLTHROUGH_INTENDED;
+            case 110:
+              CRCtriplet(crc, next, -110);
+              FALLTHROUGH_INTENDED;
+            case 109:
+              CRCtriplet(crc, next, -109);
+              FALLTHROUGH_INTENDED;
+            case 108:
+              CRCtriplet(crc, next, -108);
+              FALLTHROUGH_INTENDED;
+            case 107:
+              CRCtriplet(crc, next, -107);
+              FALLTHROUGH_INTENDED;
+            case 106:
+              CRCtriplet(crc, next, -106);
+              FALLTHROUGH_INTENDED;
+            case 105:
+              CRCtriplet(crc, next, -105);
+              FALLTHROUGH_INTENDED;
+            case 104:
+              CRCtriplet(crc, next, -104);
+              FALLTHROUGH_INTENDED;
+            case 103:
+              CRCtriplet(crc, next, -103);
+              FALLTHROUGH_INTENDED;
+            case 102:
+              CRCtriplet(crc, next, -102);
+              FALLTHROUGH_INTENDED;
+            case 101:
+              CRCtriplet(crc, next, -101);
+              FALLTHROUGH_INTENDED;
+            case 100:
+              CRCtriplet(crc, next, -100);
+              FALLTHROUGH_INTENDED;
+            case 99:
+              CRCtriplet(crc, next, -99);
+              FALLTHROUGH_INTENDED;
+            case 98:
+              CRCtriplet(crc, next, -98);
+              FALLTHROUGH_INTENDED;
+            case 97:
+              CRCtriplet(crc, next, -97);
+              FALLTHROUGH_INTENDED;
+            case 96:
+              CRCtriplet(crc, next, -96);
+              FALLTHROUGH_INTENDED;
+            case 95:
+              CRCtriplet(crc, next, -95);
+              FALLTHROUGH_INTENDED;
+            case 94:
+              CRCtriplet(crc, next, -94);
+              FALLTHROUGH_INTENDED;
+            case 93:
+              CRCtriplet(crc, next, -93);
+              FALLTHROUGH_INTENDED;
+            case 92:
+              CRCtriplet(crc, next, -92);
+              FALLTHROUGH_INTENDED;
+            case 91:
+              CRCtriplet(crc, next, -91);
+              FALLTHROUGH_INTENDED;
+            case 90:
+              CRCtriplet(crc, next, -90);
+              FALLTHROUGH_INTENDED;
+            case 89:
+              CRCtriplet(crc, next, -89);
+              FALLTHROUGH_INTENDED;
+            case 88:
+              CRCtriplet(crc, next, -88);
+              FALLTHROUGH_INTENDED;
+            case 87:
+              CRCtriplet(crc, next, -87);
+              FALLTHROUGH_INTENDED;
+            case 86:
+              CRCtriplet(crc, next, -86);
+              FALLTHROUGH_INTENDED;
+            case 85:
+              CRCtriplet(crc, next, -85);
+              FALLTHROUGH_INTENDED;
+            case 84:
+              CRCtriplet(crc, next, -84);
+              FALLTHROUGH_INTENDED;
+            case 83:
+              CRCtriplet(crc, next, -83);
+              FALLTHROUGH_INTENDED;
+            case 82:
+              CRCtriplet(crc, next, -82);
+              FALLTHROUGH_INTENDED;
+            case 81:
+              CRCtriplet(crc, next, -81);
+              FALLTHROUGH_INTENDED;
+            case 80:
+              CRCtriplet(crc, next, -80);
+              FALLTHROUGH_INTENDED;
+            case 79:
+              CRCtriplet(crc, next, -79);
+              FALLTHROUGH_INTENDED;
+            case 78:
+              CRCtriplet(crc, next, -78);
+              FALLTHROUGH_INTENDED;
+            case 77:
+              CRCtriplet(crc, next, -77);
+              FALLTHROUGH_INTENDED;
+            case 76:
+              CRCtriplet(crc, next, -76);
+              FALLTHROUGH_INTENDED;
+            case 75:
+              CRCtriplet(crc, next, -75);
+              FALLTHROUGH_INTENDED;
+            case 74:
+              CRCtriplet(crc, next, -74);
+              FALLTHROUGH_INTENDED;
+            case 73:
+              CRCtriplet(crc, next, -73);
+              FALLTHROUGH_INTENDED;
+            case 72:
+              CRCtriplet(crc, next, -72);
+              FALLTHROUGH_INTENDED;
+            case 71:
+              CRCtriplet(crc, next, -71);
+              FALLTHROUGH_INTENDED;
+            case 70:
+              CRCtriplet(crc, next, -70);
+              FALLTHROUGH_INTENDED;
+            case 69:
+              CRCtriplet(crc, next, -69);
+              FALLTHROUGH_INTENDED;
+            case 68:
+              CRCtriplet(crc, next, -68);
+              FALLTHROUGH_INTENDED;
+            case 67:
+              CRCtriplet(crc, next, -67);
+              FALLTHROUGH_INTENDED;
+            case 66:
+              CRCtriplet(crc, next, -66);
+              FALLTHROUGH_INTENDED;
+            case 65:
+              CRCtriplet(crc, next, -65);
+              FALLTHROUGH_INTENDED;
+            case 64:
+              CRCtriplet(crc, next, -64);
+              FALLTHROUGH_INTENDED;
+            case 63:
+              CRCtriplet(crc, next, -63);
+              FALLTHROUGH_INTENDED;
+            case 62:
+              CRCtriplet(crc, next, -62);
+              FALLTHROUGH_INTENDED;
+            case 61:
+              CRCtriplet(crc, next, -61);
+              FALLTHROUGH_INTENDED;
+            case 60:
+              CRCtriplet(crc, next, -60);
+              FALLTHROUGH_INTENDED;
+            case 59:
+              CRCtriplet(crc, next, -59);
+              FALLTHROUGH_INTENDED;
+            case 58:
+              CRCtriplet(crc, next, -58);
+              FALLTHROUGH_INTENDED;
+            case 57:
+              CRCtriplet(crc, next, -57);
+              FALLTHROUGH_INTENDED;
+            case 56:
+              CRCtriplet(crc, next, -56);
+              FALLTHROUGH_INTENDED;
+            case 55:
+              CRCtriplet(crc, next, -55);
+              FALLTHROUGH_INTENDED;
+            case 54:
+              CRCtriplet(crc, next, -54);
+              FALLTHROUGH_INTENDED;
+            case 53:
+              CRCtriplet(crc, next, -53);
+              FALLTHROUGH_INTENDED;
+            case 52:
+              CRCtriplet(crc, next, -52);
+              FALLTHROUGH_INTENDED;
+            case 51:
+              CRCtriplet(crc, next, -51);
+              FALLTHROUGH_INTENDED;
+            case 50:
+              CRCtriplet(crc, next, -50);
+              FALLTHROUGH_INTENDED;
+            case 49:
+              CRCtriplet(crc, next, -49);
+              FALLTHROUGH_INTENDED;
+            case 48:
+              CRCtriplet(crc, next, -48);
+              FALLTHROUGH_INTENDED;
+            case 47:
+              CRCtriplet(crc, next, -47);
+              FALLTHROUGH_INTENDED;
+            case 46:
+              CRCtriplet(crc, next, -46);
+              FALLTHROUGH_INTENDED;
+            case 45:
+              CRCtriplet(crc, next, -45);
+              FALLTHROUGH_INTENDED;
+            case 44:
+              CRCtriplet(crc, next, -44);
+              FALLTHROUGH_INTENDED;
+            case 43:
+              CRCtriplet(crc, next, -43);
+              FALLTHROUGH_INTENDED;
+            case 42:
+              CRCtriplet(crc, next, -42);
+              FALLTHROUGH_INTENDED;
+            case 41:
+              CRCtriplet(crc, next, -41);
+              FALLTHROUGH_INTENDED;
+            case 40:
+              CRCtriplet(crc, next, -40);
+              FALLTHROUGH_INTENDED;
+            case 39:
+              CRCtriplet(crc, next, -39);
+              FALLTHROUGH_INTENDED;
+            case 38:
+              CRCtriplet(crc, next, -38);
+              FALLTHROUGH_INTENDED;
+            case 37:
+              CRCtriplet(crc, next, -37);
+              FALLTHROUGH_INTENDED;
+            case 36:
+              CRCtriplet(crc, next, -36);
+              FALLTHROUGH_INTENDED;
+            case 35:
+              CRCtriplet(crc, next, -35);
+              FALLTHROUGH_INTENDED;
+            case 34:
+              CRCtriplet(crc, next, -34);
+              FALLTHROUGH_INTENDED;
+            case 33:
+              CRCtriplet(crc, next, -33);
+              FALLTHROUGH_INTENDED;
+            case 32:
+              CRCtriplet(crc, next, -32);
+              FALLTHROUGH_INTENDED;
+            case 31:
+              CRCtriplet(crc, next, -31);
+              FALLTHROUGH_INTENDED;
+            case 30:
+              CRCtriplet(crc, next, -30);
+              FALLTHROUGH_INTENDED;
+            case 29:
+              CRCtriplet(crc, next, -29);
+              FALLTHROUGH_INTENDED;
+            case 28:
+              CRCtriplet(crc, next, -28);
+              FALLTHROUGH_INTENDED;
+            case 27:
+              CRCtriplet(crc, next, -27);
+              FALLTHROUGH_INTENDED;
+            case 26:
+              CRCtriplet(crc, next, -26);
+              FALLTHROUGH_INTENDED;
+            case 25:
+              CRCtriplet(crc, next, -25);
+              FALLTHROUGH_INTENDED;
+            case 24:
+              CRCtriplet(crc, next, -24);
+              FALLTHROUGH_INTENDED;
+            case 23:
+              CRCtriplet(crc, next, -23);
+              FALLTHROUGH_INTENDED;
+            case 22:
+              CRCtriplet(crc, next, -22);
+              FALLTHROUGH_INTENDED;
+            case 21:
+              CRCtriplet(crc, next, -21);
+              FALLTHROUGH_INTENDED;
+            case 20:
+              CRCtriplet(crc, next, -20);
+              FALLTHROUGH_INTENDED;
+            case 19:
+              CRCtriplet(crc, next, -19);
+              FALLTHROUGH_INTENDED;
+            case 18:
+              CRCtriplet(crc, next, -18);
+              FALLTHROUGH_INTENDED;
+            case 17:
+              CRCtriplet(crc, next, -17);
+              FALLTHROUGH_INTENDED;
+            case 16:
+              CRCtriplet(crc, next, -16);
+              FALLTHROUGH_INTENDED;
+            case 15:
+              CRCtriplet(crc, next, -15);
+              FALLTHROUGH_INTENDED;
+            case 14:
+              CRCtriplet(crc, next, -14);
+              FALLTHROUGH_INTENDED;
+            case 13:
+              CRCtriplet(crc, next, -13);
+              FALLTHROUGH_INTENDED;
+            case 12:
+              CRCtriplet(crc, next, -12);
+              FALLTHROUGH_INTENDED;
+            case 11:
+              CRCtriplet(crc, next, -11);
+              FALLTHROUGH_INTENDED;
+            case 10:
+              CRCtriplet(crc, next, -10);
+              FALLTHROUGH_INTENDED;
+            case 9:
+              CRCtriplet(crc, next, -9);
+              FALLTHROUGH_INTENDED;
+            case 8:
+              CRCtriplet(crc, next, -8);
+              FALLTHROUGH_INTENDED;
+            case 7:
+              CRCtriplet(crc, next, -7);
+              FALLTHROUGH_INTENDED;
+            case 6:
+              CRCtriplet(crc, next, -6);
+              FALLTHROUGH_INTENDED;
+            case 5:
+              CRCtriplet(crc, next, -5);
+              FALLTHROUGH_INTENDED;
+            case 4:
+              CRCtriplet(crc, next, -4);
+              FALLTHROUGH_INTENDED;
+            case 3:
+              CRCtriplet(crc, next, -3);
+              FALLTHROUGH_INTENDED;
+            case 2:
+              CRCtriplet(crc, next, -2);
+              FALLTHROUGH_INTENDED;
+            case 1:
+              CRCduplet(crc, next, -1); // the final triplet is actually only 2
+              //{ CombineCRC(); }
+              crc0 = CombineCRC(block_size, crc0, crc1, crc2, next2);
+              if (--n > 0) {
+                crc1 = crc2 = 0;
+                block_size = 128;
+                // points to the first byte of the next block
+                next0 = next2 + 128;
+                next1 = next0 + 128; // from here on all blocks are 128 long
+                next2 = next1 + 128;
+              }
+              FALLTHROUGH_INTENDED;
+            case 0:;
+          } while (n > 0);
+      }
+      next = (const unsigned char*)next2;
+    }
+    uint64_t count2 = len >> 3; // 216 of less bytes is 27 or less singlets
+    len = len & 7;
+    next += (count2 * 8);
+    switch (count2) {
+      case 27:
+        CRCsinglet(crc0, next, -27 * 8);
+        FALLTHROUGH_INTENDED;
+      case 26:
+        CRCsinglet(crc0, next, -26 * 8);
+        FALLTHROUGH_INTENDED;
+      case 25:
+        CRCsinglet(crc0, next, -25 * 8);
+        FALLTHROUGH_INTENDED;
+      case 24:
+        CRCsinglet(crc0, next, -24 * 8);
+        FALLTHROUGH_INTENDED;
+      case 23:
+        CRCsinglet(crc0, next, -23 * 8);
+        FALLTHROUGH_INTENDED;
+      case 22:
+        CRCsinglet(crc0, next, -22 * 8);
+        FALLTHROUGH_INTENDED;
+      case 21:
+        CRCsinglet(crc0, next, -21 * 8);
+        FALLTHROUGH_INTENDED;
+      case 20:
+        CRCsinglet(crc0, next, -20 * 8);
+        FALLTHROUGH_INTENDED;
+      case 19:
+        CRCsinglet(crc0, next, -19 * 8);
+        FALLTHROUGH_INTENDED;
+      case 18:
+        CRCsinglet(crc0, next, -18 * 8);
+        FALLTHROUGH_INTENDED;
+      case 17:
+        CRCsinglet(crc0, next, -17 * 8);
+        FALLTHROUGH_INTENDED;
+      case 16:
+        CRCsinglet(crc0, next, -16 * 8);
+        FALLTHROUGH_INTENDED;
+      case 15:
+        CRCsinglet(crc0, next, -15 * 8);
+        FALLTHROUGH_INTENDED;
+      case 14:
+        CRCsinglet(crc0, next, -14 * 8);
+        FALLTHROUGH_INTENDED;
+      case 13:
+        CRCsinglet(crc0, next, -13 * 8);
+        FALLTHROUGH_INTENDED;
+      case 12:
+        CRCsinglet(crc0, next, -12 * 8);
+        FALLTHROUGH_INTENDED;
+      case 11:
+        CRCsinglet(crc0, next, -11 * 8);
+        FALLTHROUGH_INTENDED;
+      case 10:
+        CRCsinglet(crc0, next, -10 * 8);
+        FALLTHROUGH_INTENDED;
+      case 9:
+        CRCsinglet(crc0, next, -9 * 8);
+        FALLTHROUGH_INTENDED;
+      case 8:
+        CRCsinglet(crc0, next, -8 * 8);
+        FALLTHROUGH_INTENDED;
+      case 7:
+        CRCsinglet(crc0, next, -7 * 8);
+        FALLTHROUGH_INTENDED;
+      case 6:
+        CRCsinglet(crc0, next, -6 * 8);
+        FALLTHROUGH_INTENDED;
+      case 5:
+        CRCsinglet(crc0, next, -5 * 8);
+        FALLTHROUGH_INTENDED;
+      case 4:
+        CRCsinglet(crc0, next, -4 * 8);
+        FALLTHROUGH_INTENDED;
+      case 3:
+        CRCsinglet(crc0, next, -3 * 8);
+        FALLTHROUGH_INTENDED;
+      case 2:
+        CRCsinglet(crc0, next, -2 * 8);
+        FALLTHROUGH_INTENDED;
+      case 1:
+        CRCsinglet(crc0, next, -1 * 8);
+        FALLTHROUGH_INTENDED;
+      case 0:;
+    }
+  }
+  {
+    align_to_8(len, crc0, next);
+    return (uint32_t)crc0 ^ 0xffffffffu;
+  }
+}
+
+#endif //HAVE_SSE42 && HAVE_PCLMUL
+
+static inline Function Choose_Extend() {
+#ifdef HAVE_POWER8
+  return isAltiVec() ? ExtendPPCImpl : ExtendImpl<Slow_CRC32>;
+#elif defined(HAVE_ARM64_CRC)
+  if(crc32c_runtime_check()) {
+    pmull_runtime_flag = crc32c_pmull_runtime_check();
+    return ExtendARMImpl;
+  } else {
+    return ExtendImpl<Slow_CRC32>;
+  }
+#else
+  if (isSSE42()) {
+    if (isPCLMULQDQ()) {
+#if (defined HAVE_SSE42 && defined HAVE_PCLMUL) && !defined NO_THREEWAY_CRC32C
+      return crc32c_3way;
+#else
+    return ExtendImpl<Fast_CRC32>; // Fast_CRC32 will check HAVE_SSE42 itself
+#endif
+    }
+    else {  // no runtime PCLMULQDQ support but has SSE42 support
+      return ExtendImpl<Fast_CRC32>;
+    }
+  } // end of isSSE42()
+  else {
+    return ExtendImpl<Slow_CRC32>;
+  }
+#endif
+}
+
+static Function ChosenExtend = Choose_Extend();
+uint32_t Extend(uint32_t crc, const char* buf, size_t size) {
+  return ChosenExtend(crc, buf, size);
+}
+
+// The code for crc32c combine, copied with permission from folly
+
+// Standard galois-field multiply.  The only modification is that a,
+// b, m, and p are all bit-reflected.
+//
+// https://en.wikipedia.org/wiki/Finite_field_arithmetic
+static constexpr uint32_t gf_multiply_sw_1(
+    size_t i, uint32_t p, uint32_t a, uint32_t b, uint32_t m) {
+  // clang-format off
+  return i == 32 ? p : gf_multiply_sw_1(
+      /* i = */ i + 1,
+      /* p = */ p ^ ((0u-((b >> 31) & 1)) & a),
+      /* a = */ (a >> 1) ^ ((0u-(a & 1)) & m),
+      /* b = */ b << 1,
+      /* m = */ m);
+  // clang-format on
+}
+static constexpr uint32_t gf_multiply_sw(uint32_t a, uint32_t b, uint32_t m) {
+  return gf_multiply_sw_1(/* i = */ 0, /* p = */ 0, a, b, m);
+}
+
+static constexpr uint32_t gf_square_sw(uint32_t a, uint32_t m) {
+  return gf_multiply_sw(a, a, m);
+}
+
+template <size_t i, uint32_t m>
+struct gf_powers_memo {
+  static constexpr uint32_t value =
+      gf_square_sw(gf_powers_memo<i - 1, m>::value, m);
+};
+template <uint32_t m>
+struct gf_powers_memo<0, m> {
+  static constexpr uint32_t value = m;
+};
+
+template <typename T, T... Ints>
+struct integer_sequence {
+  using value_type = T;
+  static constexpr size_t size() { return sizeof...(Ints); }
+};
+
+template <typename T, std::size_t N, T... Is>
+struct make_integer_sequence : make_integer_sequence<T, N - 1, N - 1, Is...> {};
+
+template <typename T, T... Is>
+struct make_integer_sequence<T, 0, Is...> : integer_sequence<T, Is...> {};
+
+template <std::size_t N>
+using make_index_sequence = make_integer_sequence<std::size_t, N>;
+
+template <uint32_t m>
+struct gf_powers_make {
+  template <size_t... i>
+  using index_sequence = integer_sequence<size_t, i...>;
+  template <size_t... i>
+  constexpr std::array<uint32_t, sizeof...(i)> operator()(
+      index_sequence<i...>) const {
+    return std::array<uint32_t, sizeof...(i)>{{gf_powers_memo<i, m>::value...}};
+  }
+};
+
+static constexpr uint32_t crc32c_m = 0x82f63b78;
+
+static constexpr std::array<uint32_t, 62> const crc32c_powers =
+    gf_powers_make<crc32c_m>{}(make_index_sequence<62>{});
+
+// Expects a "pure" crc (see Crc32cCombine)
+static uint32_t Crc32AppendZeroes(
+    uint32_t crc, size_t len_over_4, uint32_t polynomial,
+    std::array<uint32_t, 62> const& powers_array) {
+  auto powers = powers_array.data();
+  // Append by multiplying by consecutive powers of two of the zeroes
+  // array
+  size_t len_bits = len_over_4;
+
+  while (len_bits) {
+    // Advance directly to next bit set.
+    auto r = CountTrailingZeroBits(len_bits);
+    len_bits >>= r;
+    powers += r;
+
+    crc = gf_multiply_sw(crc, *powers, polynomial);
+
+    len_bits >>= 1;
+    powers++;
+  }
+
+  return crc;
+}
+
+static inline uint32_t InvertedToPure(uint32_t crc) { return ~crc; }
+
+static inline uint32_t PureToInverted(uint32_t crc) { return ~crc; }
+
+static inline uint32_t PureExtend(uint32_t crc, const char* buf, size_t size) {
+  return InvertedToPure(Extend(PureToInverted(crc), buf, size));
+}
+
+// Background:
+// RocksDB uses two kinds of crc32c values: masked and unmasked. Neither is
+// a "pure" CRC because a pure CRC satisfies (^ for xor)
+//  crc(a ^ b) = crc(a) ^ crc(b)
+// The unmasked is closest, and this function takes unmasked crc32c values.
+// The unmasked values are impure in two ways:
+// * The initial setting at the start of CRC computation is all 1 bits
+// (like -1) instead of zero.
+// * The result has all bits invered.
+// Note that together, these result in the empty string having a crc32c of
+// zero. See
+// https://en.wikipedia.org/wiki/Computation_of_cyclic_redundancy_checks#CRC_variants
+//
+// Simplified version of strategy, using xor through pure CRCs (+ for concat):
+//
+// pure_crc(str1 + str2) = pure_crc(str1 + zeros(len(str2))) ^
+//                         pure_crc(zeros(len(str1)) + str2)
+//
+// because the xor of these two zero-padded strings is str1 + str2. For pure
+// CRC, leading zeros don't affect the result, so we only need
+//
+// pure_crc(str1 + str2) = pure_crc(str1 + zeros(len(str2))) ^
+//                         pure_crc(str2)
+//
+// Considering we aren't working with pure CRCs, what is actually in the input?
+//
+// crc1 = PureToInverted(PureExtendCrc32c(-1, zeros, crc1len) ^
+//                       PureCrc32c(str1, crc1len))
+// crc2 = PureToInverted(PureExtendCrc32c(-1, zeros, crc2len) ^
+//                       PureCrc32c(str2, crc2len))
+//
+// The result we want to compute is
+// combined = PureToInverted(PureExtendCrc32c(PureExtendCrc32c(-1, zeros,
+//                                                             crc1len) ^
+//                                            PureCrc32c(str1, crc1len),
+//                                            zeros, crc2len) ^
+//                           PureCrc32c(str2, crc2len))
+//
+// Thus, in addition to extending crc1 over the length of str2 in (virtual)
+// zeros, we need to cancel out the -1 initializer that was used in computing
+// crc2. To cancel it out, we also need to extend it over crc2len in zeros.
+// To simplify, since the end of str1 and that -1 initializer for crc2 are at
+// the same logical position, we can combine them before we extend over the
+// zeros.
+uint32_t Crc32cCombine(uint32_t crc1, uint32_t crc2, size_t crc2len) {
+  uint32_t pure_crc1_with_init = InvertedToPure(crc1);
+  uint32_t pure_crc2_with_init = InvertedToPure(crc2);
+  uint32_t pure_crc2_init = static_cast<uint32_t>(-1);
+
+  // Append up to 32 bits of zeroes in the normal way
+  char zeros[4] = {0, 0, 0, 0};
+  auto len = crc2len & 3;
+  uint32_t tmp = pure_crc1_with_init ^ pure_crc2_init;
+  if (len) {
+    tmp = PureExtend(tmp, zeros, len);
+  }
+  return PureToInverted(
+      Crc32AppendZeroes(tmp, crc2len / 4, crc32c_m, crc32c_powers) ^
+      pure_crc2_with_init);
+}
+
+}  // namespace crc32c
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/crc32c.h b/src/rocksdb/util/crc32c.h
new file mode 100644
index 000000000..a08ad60af
--- /dev/null
+++ b/src/rocksdb/util/crc32c.h
@@ -0,0 +1,56 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace crc32c {
+
+extern std::string IsFastCrc32Supported();
+
+// Return the crc32c of concat(A, data[0,n-1]) where init_crc is the
+// crc32c of some string A.  Extend() is often used to maintain the
+// crc32c of a stream of data.
+extern uint32_t Extend(uint32_t init_crc, const char* data, size_t n);
+
+// Takes two unmasked crc32c values, and the length of the string from
+// which `crc2` was computed, and computes a crc32c value for the
+// concatenation of the original two input strings. Running time is
+// ~ log(crc2len).
+extern uint32_t Crc32cCombine(uint32_t crc1, uint32_t crc2, size_t crc2len);
+
+// Return the crc32c of data[0,n-1]
+inline uint32_t Value(const char* data, size_t n) { return Extend(0, data, n); }
+
+static const uint32_t kMaskDelta = 0xa282ead8ul;
+
+// Return a masked representation of crc.
+//
+// Motivation: it is problematic to compute the CRC of a string that
+// contains embedded CRCs.  Therefore we recommend that CRCs stored
+// somewhere (e.g., in files) should be masked before being stored.
+inline uint32_t Mask(uint32_t crc) {
+  // Rotate right by 15 bits and add a constant.
+  return ((crc >> 15) | (crc << 17)) + kMaskDelta;
+}
+
+// Return the crc whose masked representation is masked_crc.
+inline uint32_t Unmask(uint32_t masked_crc) {
+  uint32_t rot = masked_crc - kMaskDelta;
+  return ((rot >> 17) | (rot << 15));
+}
+
+}  // namespace crc32c
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/crc32c_arm64.cc b/src/rocksdb/util/crc32c_arm64.cc
new file mode 100644
index 000000000..4885f4fe1
--- /dev/null
+++ b/src/rocksdb/util/crc32c_arm64.cc
@@ -0,0 +1,215 @@
+//  Copyright (c) 2018, Arm Limited and affiliates. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "util/crc32c_arm64.h"
+
+#if defined(HAVE_ARM64_CRC)
+
+#if defined(__linux__)
+#include <asm/hwcap.h>
+#endif
+#ifdef ROCKSDB_AUXV_GETAUXVAL_PRESENT
+#include <sys/auxv.h>
+#endif
+#ifndef HWCAP_CRC32
+#define HWCAP_CRC32 (1 << 7)
+#endif
+#ifndef HWCAP_PMULL
+#define HWCAP_PMULL (1 << 4)
+#endif
+#if defined(__APPLE__)
+#include <sys/sysctl.h>
+#endif
+#if defined(__OpenBSD__)
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <machine/cpu.h>
+#include <machine/armreg.h>
+#endif
+
+#ifdef HAVE_ARM64_CRYPTO
+/* unfolding to compute 8 * 3 = 24 bytes parallelly */
+#define CRC32C24BYTES(ITR)                                    \
+  crc1 = crc32c_u64(crc1, *(buf64 + BLK_LENGTH + (ITR)));     \
+  crc2 = crc32c_u64(crc2, *(buf64 + BLK_LENGTH * 2 + (ITR))); \
+  crc0 = crc32c_u64(crc0, *(buf64 + (ITR)));
+
+/* unfolding to compute 24 * 7 = 168 bytes parallelly */
+#define CRC32C7X24BYTES(ITR)   \
+  do {                         \
+    CRC32C24BYTES((ITR)*7 + 0) \
+    CRC32C24BYTES((ITR)*7 + 1) \
+    CRC32C24BYTES((ITR)*7 + 2) \
+    CRC32C24BYTES((ITR)*7 + 3) \
+    CRC32C24BYTES((ITR)*7 + 4) \
+    CRC32C24BYTES((ITR)*7 + 5) \
+    CRC32C24BYTES((ITR)*7 + 6) \
+  } while (0)
+#endif
+
+extern bool pmull_runtime_flag;
+
+uint32_t crc32c_runtime_check(void) {
+#if defined(ROCKSDB_AUXV_GETAUXVAL_PRESENT) || defined(__FreeBSD__)
+  uint64_t auxv = 0;
+#if defined(ROCKSDB_AUXV_GETAUXVAL_PRESENT)
+  auxv = getauxval(AT_HWCAP);
+#elif defined(__FreeBSD__)
+  elf_aux_info(AT_HWCAP, &auxv, sizeof(auxv));
+#endif
+  return (auxv & HWCAP_CRC32) != 0;
+#elif defined(__APPLE__)
+  int r;
+  size_t l = sizeof(r);
+  if (sysctlbyname("hw.optional.armv8_crc32", &r, &l, NULL, 0) == -1) return 0;
+  return r == 1;
+#elif defined(__OpenBSD__)
+  int r = 0;
+  const int isar0_mib[] = { CTL_MACHDEP, CPU_ID_AA64ISAR0 };
+  uint64_t isar0;
+  size_t len = sizeof(isar0);
+
+  if (sysctl(isar0_mib, 2, &isar0, &len, NULL, 0) != -1) {
+      if (ID_AA64ISAR0_CRC32(isar0) >= ID_AA64ISAR0_CRC32_BASE)
+        r = 1;
+  }
+  return r;
+#else
+  return 0;
+#endif
+}
+
+bool crc32c_pmull_runtime_check(void) {
+#if defined(ROCKSDB_AUXV_GETAUXVAL_PRESENT) || defined(__FreeBSD__)
+  uint64_t auxv = 0;
+#if defined(ROCKSDB_AUXV_GETAUXVAL_PRESENT)
+  auxv = getauxval(AT_HWCAP);
+#elif defined(__FreeBSD__)
+  elf_aux_info(AT_HWCAP, &auxv, sizeof(auxv));
+#endif
+  return (auxv & HWCAP_PMULL) != 0;
+#elif defined(__APPLE__)
+  return true;
+#elif defined(__OpenBSD__)
+  bool r = false;
+  const int isar0_mib[] = { CTL_MACHDEP, CPU_ID_AA64ISAR0 };
+  uint64_t isar0;
+  size_t len = sizeof(isar0);
+
+  if (sysctl(isar0_mib, 2, &isar0, &len, NULL, 0) != -1) {
+      if (ID_AA64ISAR0_AES(isar0) >= ID_AA64ISAR0_AES_PMULL)
+        r = true;
+  }
+  return r;
+#else
+  return false;
+#endif
+}
+
+#ifdef ROCKSDB_UBSAN_RUN
+#if defined(__clang__)
+__attribute__((__no_sanitize__("alignment")))
+#elif defined(__GNUC__)
+__attribute__((__no_sanitize_undefined__))
+#endif
+#endif
+uint32_t
+crc32c_arm64(uint32_t crc, unsigned char const *data, size_t len) {
+  const uint8_t *buf8;
+  const uint64_t *buf64 = (uint64_t *)data;
+  int length = (int)len;
+  crc ^= 0xffffffff;
+
+  /*
+   * Pmull runtime check here.
+   * Raspberry Pi supports crc32 but doesn't support pmull.
+   * Skip Crc32c Parallel computation if no crypto extension available.
+   */
+  if (pmull_runtime_flag) {
+/* Macro (HAVE_ARM64_CRYPTO) is used for compiling check  */
+#ifdef HAVE_ARM64_CRYPTO
+/* Crc32c Parallel computation
+ *   Algorithm comes from Intel whitepaper:
+ *   crc-iscsi-polynomial-crc32-instruction-paper
+ *
+ * Input data is divided into three equal-sized blocks
+ *   Three parallel blocks (crc0, crc1, crc2) for 1024 Bytes
+ *   One Block: 42(BLK_LENGTH) * 8(step length: crc32c_u64) bytes
+ */
+#define BLK_LENGTH 42
+    while (length >= 1024) {
+      uint64_t t0, t1;
+      uint32_t crc0 = 0, crc1 = 0, crc2 = 0;
+
+      /* Parallel Param:
+       *   k0 = CRC32(x ^ (42 * 8 * 8 * 2 - 1));
+       *   k1 = CRC32(x ^ (42 * 8 * 8 - 1));
+       */
+      uint32_t k0 = 0xe417f38a, k1 = 0x8f158014;
+
+      /* Prefetch data for following block to avoid cache miss */
+      PREF1KL1((uint8_t *)buf64, 1024);
+
+      /* First 8 byte for better pipelining */
+      crc0 = crc32c_u64(crc, *buf64++);
+
+      /* 3 blocks crc32c parallel computation
+       * Macro unfolding to compute parallelly
+       * 168 * 6 = 1008 (bytes)
+       */
+      CRC32C7X24BYTES(0);
+      CRC32C7X24BYTES(1);
+      CRC32C7X24BYTES(2);
+      CRC32C7X24BYTES(3);
+      CRC32C7X24BYTES(4);
+      CRC32C7X24BYTES(5);
+      buf64 += (BLK_LENGTH * 3);
+
+      /* Last 8 bytes */
+      crc = crc32c_u64(crc2, *buf64++);
+
+      t0 = (uint64_t)vmull_p64(crc0, k0);
+      t1 = (uint64_t)vmull_p64(crc1, k1);
+
+      /* Merge (crc0, crc1, crc2) -> crc */
+      crc1 = crc32c_u64(0, t1);
+      crc ^= crc1;
+      crc0 = crc32c_u64(0, t0);
+      crc ^= crc0;
+
+      length -= 1024;
+    }
+
+    if (length == 0) return crc ^ (0xffffffffU);
+#endif
+  }  // if Pmull runtime check here
+
+  buf8 = (const uint8_t *)buf64;
+  while (length >= 8) {
+    crc = crc32c_u64(crc, *(const uint64_t *)buf8);
+    buf8 += 8;
+    length -= 8;
+  }
+
+  /* The following is more efficient than the straight loop */
+  if (length >= 4) {
+    crc = crc32c_u32(crc, *(const uint32_t *)buf8);
+    buf8 += 4;
+    length -= 4;
+  }
+
+  if (length >= 2) {
+    crc = crc32c_u16(crc, *(const uint16_t *)buf8);
+    buf8 += 2;
+    length -= 2;
+  }
+
+  if (length >= 1) crc = crc32c_u8(crc, *buf8);
+
+  crc ^= 0xffffffff;
+  return crc;
+}
+
+#endif
diff --git a/src/rocksdb/util/crc32c_arm64.h b/src/rocksdb/util/crc32c_arm64.h
new file mode 100644
index 000000000..4b27fe871
--- /dev/null
+++ b/src/rocksdb/util/crc32c_arm64.h
@@ -0,0 +1,52 @@
+//  Copyright (c) 2018, Arm Limited and affiliates. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#ifndef UTIL_CRC32C_ARM64_H
+#define UTIL_CRC32C_ARM64_H
+
+#include <cinttypes>
+#include <cstddef>
+
+#if defined(__aarch64__) || defined(__AARCH64__)
+
+#ifdef __ARM_FEATURE_CRC32
+#define HAVE_ARM64_CRC
+#include <arm_acle.h>
+#define crc32c_u8(crc, v) __crc32cb(crc, v)
+#define crc32c_u16(crc, v) __crc32ch(crc, v)
+#define crc32c_u32(crc, v) __crc32cw(crc, v)
+#define crc32c_u64(crc, v) __crc32cd(crc, v)
+// clang-format off
+#define PREF4X64L1(buffer, PREF_OFFSET, ITR)                \
+  __asm__("PRFM PLDL1KEEP, [%x[v],%[c]]" ::[v] "r"(buffer), \
+          [c] "I"((PREF_OFFSET) + ((ITR) + 0) * 64));       \
+  __asm__("PRFM PLDL1KEEP, [%x[v],%[c]]" ::[v] "r"(buffer), \
+          [c] "I"((PREF_OFFSET) + ((ITR) + 1) * 64));       \
+  __asm__("PRFM PLDL1KEEP, [%x[v],%[c]]" ::[v] "r"(buffer), \
+          [c] "I"((PREF_OFFSET) + ((ITR) + 2) * 64));       \
+  __asm__("PRFM PLDL1KEEP, [%x[v],%[c]]" ::[v] "r"(buffer), \
+          [c] "I"((PREF_OFFSET) + ((ITR) + 3) * 64));
+// clang-format on
+
+#define PREF1KL1(buffer, PREF_OFFSET)  \
+  PREF4X64L1(buffer, (PREF_OFFSET), 0) \
+  PREF4X64L1(buffer, (PREF_OFFSET), 4) \
+  PREF4X64L1(buffer, (PREF_OFFSET), 8) \
+  PREF4X64L1(buffer, (PREF_OFFSET), 12)
+
+extern uint32_t crc32c_arm64(uint32_t crc, unsigned char const *data,
+                             size_t len);
+extern uint32_t crc32c_runtime_check(void);
+extern bool crc32c_pmull_runtime_check(void);
+
+#ifdef __ARM_FEATURE_CRYPTO
+#define HAVE_ARM64_CRYPTO
+#include <arm_neon.h>
+#endif  // __ARM_FEATURE_CRYPTO
+#endif  // __ARM_FEATURE_CRC32
+
+#endif  // defined(__aarch64__) || defined(__AARCH64__)
+
+#endif
diff --git a/src/rocksdb/util/crc32c_ppc.c b/src/rocksdb/util/crc32c_ppc.c
new file mode 100644
index 000000000..b37dfb158
--- /dev/null
+++ b/src/rocksdb/util/crc32c_ppc.c
@@ -0,0 +1,94 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  Copyright (c) 2017 International Business Machines Corp.
+//  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#define CRC_TABLE
+#include <stdint.h>
+#include <stdlib.h>
+#include <strings.h>
+#include "util/crc32c_ppc_constants.h"
+
+#define VMX_ALIGN 16
+#define VMX_ALIGN_MASK (VMX_ALIGN - 1)
+
+#ifdef REFLECT
+static unsigned int crc32_align(unsigned int crc, unsigned char const *p,
+                                unsigned long len) {
+  while (len--) crc = crc_table[(crc ^ *p++) & 0xff] ^ (crc >> 8);
+  return crc;
+}
+#endif
+
+#ifdef HAVE_POWER8
+unsigned int __crc32_vpmsum(unsigned int crc, unsigned char const *p,
+                            unsigned long len);
+
+static uint32_t crc32_vpmsum(uint32_t crc, unsigned char const *data,
+                             size_t len) {
+  unsigned int prealign;
+  unsigned int tail;
+
+#ifdef CRC_XOR
+  crc ^= 0xffffffff;
+#endif
+
+  if (len < VMX_ALIGN + VMX_ALIGN_MASK) {
+    crc = crc32_align(crc, data, (unsigned long)len);
+    goto out;
+  }
+
+  if ((unsigned long)data & VMX_ALIGN_MASK) {
+    prealign = VMX_ALIGN - ((unsigned long)data & VMX_ALIGN_MASK);
+    crc = crc32_align(crc, data, prealign);
+    len -= prealign;
+    data += prealign;
+  }
+
+  crc = __crc32_vpmsum(crc, data, (unsigned long)len & ~VMX_ALIGN_MASK);
+
+  tail = len & VMX_ALIGN_MASK;
+  if (tail) {
+    data += len & ~VMX_ALIGN_MASK;
+    crc = crc32_align(crc, data, tail);
+  }
+
+out:
+#ifdef CRC_XOR
+  crc ^= 0xffffffff;
+#endif
+
+  return crc;
+}
+
+/* This wrapper function works around the fact that crc32_vpmsum
+ * does not gracefully handle the case where the data pointer is NULL.  There
+ * may be room for performance improvement here.
+ */
+uint32_t crc32c_ppc(uint32_t crc, unsigned char const *data, size_t len) {
+  unsigned char *buf2;
+
+  if (!data) {
+    buf2 = (unsigned char *)malloc(len);
+    bzero(buf2, len);
+    crc = crc32_vpmsum(crc, buf2, len);
+    free(buf2);
+  } else {
+    crc = crc32_vpmsum(crc, data, (unsigned long)len);
+  }
+  return crc;
+}
+
+#else /* HAVE_POWER8 */
+
+/* This symbol has to exist on non-ppc architectures (and on legacy
+ * ppc systems using power7 or below) in order to compile properly
+ * there, even though it won't be called.
+ */
+uint32_t crc32c_ppc(uint32_t crc, unsigned char const *data, size_t len) {
+  return 0;
+}
+
+#endif /* HAVE_POWER8 */
diff --git a/src/rocksdb/util/crc32c_ppc.h b/src/rocksdb/util/crc32c_ppc.h
new file mode 100644
index 000000000..f0b0b66d5
--- /dev/null
+++ b/src/rocksdb/util/crc32c_ppc.h
@@ -0,0 +1,22 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  Copyright (c) 2017 International Business Machines Corp.
+//  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern uint32_t crc32c_ppc(uint32_t crc, unsigned char const *buffer,
+                           size_t len);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/rocksdb/util/crc32c_ppc_asm.S b/src/rocksdb/util/crc32c_ppc_asm.S
new file mode 100644
index 000000000..6959ba839
--- /dev/null
+++ b/src/rocksdb/util/crc32c_ppc_asm.S
@@ -0,0 +1,756 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  Copyright (c) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
+//  Copyright (c) 2017 International Business Machines Corp.
+//  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#if defined (__clang__)
+#include "third-party/gcc/ppc-asm.h"
+#else
+#include <ppc-asm.h>
+#endif
+#include "ppc-opcode.h"
+
+#undef toc
+
+#ifndef r1
+#define r1 1
+#endif
+
+#ifndef r2
+#define r2 2
+#endif
+
+	.section	.rodata
+.balign 16
+
+.byteswap_constant:
+	/* byte reverse permute constant */
+	.octa 0x0F0E0D0C0B0A09080706050403020100
+
+#define __ASSEMBLY__
+#include "crc32c_ppc_constants.h"
+
+	.text
+
+#if defined(__BIG_ENDIAN__) && defined(REFLECT)
+#define BYTESWAP_DATA
+#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT)
+#define BYTESWAP_DATA
+#else
+#undef BYTESWAP_DATA
+#endif
+
+#define off16		r25
+#define off32		r26
+#define off48		r27
+#define off64		r28
+#define off80		r29
+#define off96		r30
+#define off112		r31
+
+#define const1		v24
+#define const2		v25
+
+#define byteswap	v26
+#define	mask_32bit	v27
+#define	mask_64bit	v28
+#define zeroes		v29
+
+#ifdef BYTESWAP_DATA
+#define VPERM(A, B, C, D) vperm	A, B, C, D
+#else
+#define VPERM(A, B, C, D)
+#endif
+
+/* unsigned int __crc32_vpmsum(unsigned int crc, void *p, unsigned long len) */
+FUNC_START(__crc32_vpmsum)
+	std	r31,-8(r1)
+	std	r30,-16(r1)
+	std	r29,-24(r1)
+	std	r28,-32(r1)
+	std	r27,-40(r1)
+	std	r26,-48(r1)
+	std	r25,-56(r1)
+
+	li	off16,16
+	li	off32,32
+	li	off48,48
+	li	off64,64
+	li	off80,80
+	li	off96,96
+	li	off112,112
+	li	r0,0
+
+	/* Enough room for saving 10 non volatile VMX registers */
+	subi	r6,r1,56+10*16
+	subi	r7,r1,56+2*16
+
+	stvx	v20,0,r6
+	stvx	v21,off16,r6
+	stvx	v22,off32,r6
+	stvx	v23,off48,r6
+	stvx	v24,off64,r6
+	stvx	v25,off80,r6
+	stvx	v26,off96,r6
+	stvx	v27,off112,r6
+	stvx	v28,0,r7
+	stvx	v29,off16,r7
+
+	mr	r10,r3
+
+	vxor	zeroes,zeroes,zeroes
+	vspltisw v0,-1
+
+	vsldoi	mask_32bit,zeroes,v0,4
+	vsldoi	mask_64bit,zeroes,v0,8
+
+	/* Get the initial value into v8 */
+	vxor	v8,v8,v8
+	MTVRD(v8, r3)
+#ifdef REFLECT
+	vsldoi	v8,zeroes,v8,8	/* shift into bottom 32 bits */
+#else
+	vsldoi	v8,v8,zeroes,4	/* shift into top 32 bits */
+#endif
+
+#ifdef BYTESWAP_DATA
+	addis	r3,r2,.byteswap_constant@toc@ha
+	addi	r3,r3,.byteswap_constant@toc@l
+
+	lvx	byteswap,0,r3
+	addi	r3,r3,16
+#endif
+
+	cmpdi	r5,256
+	blt	.Lshort
+
+	rldicr	r6,r5,0,56
+
+	/* Checksum in blocks of MAX_SIZE */
+1:	lis	r7,MAX_SIZE@h
+	ori	r7,r7,MAX_SIZE@l
+	mr	r9,r7
+	cmpd	r6,r7
+	bgt	2f
+	mr	r7,r6
+2:	subf	r6,r7,r6
+
+	/* our main loop does 128 bytes at a time */
+	srdi	r7,r7,7
+
+	/*
+	 * Work out the offset into the constants table to start at. Each
+	 * constant is 16 bytes, and it is used against 128 bytes of input
+	 * data - 128 / 16 = 8
+	 */
+	sldi	r8,r7,4
+	srdi	r9,r9,3
+	subf	r8,r8,r9
+
+	/* We reduce our final 128 bytes in a separate step */
+	addi	r7,r7,-1
+	mtctr	r7
+
+	addis	r3,r2,.constants@toc@ha
+	addi	r3,r3,.constants@toc@l
+
+	/* Find the start of our constants */
+	add	r3,r3,r8
+
+	/* zero v0-v7 which will contain our checksums */
+	vxor	v0,v0,v0
+	vxor	v1,v1,v1
+	vxor	v2,v2,v2
+	vxor	v3,v3,v3
+	vxor	v4,v4,v4
+	vxor	v5,v5,v5
+	vxor	v6,v6,v6
+	vxor	v7,v7,v7
+
+	lvx	const1,0,r3
+
+	/*
+	 * If we are looping back to consume more data we use the values
+	 * already in v16-v23.
+	 */
+	cmpdi	r0,1
+	beq	2f
+
+	/* First warm up pass */
+	lvx	v16,0,r4
+	lvx	v17,off16,r4
+	VPERM(v16,v16,v16,byteswap)
+	VPERM(v17,v17,v17,byteswap)
+	lvx	v18,off32,r4
+	lvx	v19,off48,r4
+	VPERM(v18,v18,v18,byteswap)
+	VPERM(v19,v19,v19,byteswap)
+	lvx	v20,off64,r4
+	lvx	v21,off80,r4
+	VPERM(v20,v20,v20,byteswap)
+	VPERM(v21,v21,v21,byteswap)
+	lvx	v22,off96,r4
+	lvx	v23,off112,r4
+	VPERM(v22,v22,v22,byteswap)
+	VPERM(v23,v23,v23,byteswap)
+	addi	r4,r4,8*16
+
+	/* xor in initial value */
+	vxor	v16,v16,v8
+
+2:	bdz	.Lfirst_warm_up_done
+
+	addi	r3,r3,16
+	lvx	const2,0,r3
+
+	/* Second warm up pass */
+	VPMSUMD(v8,v16,const1)
+	lvx	v16,0,r4
+	VPERM(v16,v16,v16,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v9,v17,const1)
+	lvx	v17,off16,r4
+	VPERM(v17,v17,v17,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v10,v18,const1)
+	lvx	v18,off32,r4
+	VPERM(v18,v18,v18,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v11,v19,const1)
+	lvx	v19,off48,r4
+	VPERM(v19,v19,v19,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v12,v20,const1)
+	lvx	v20,off64,r4
+	VPERM(v20,v20,v20,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v13,v21,const1)
+	lvx	v21,off80,r4
+	VPERM(v21,v21,v21,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v14,v22,const1)
+	lvx	v22,off96,r4
+	VPERM(v22,v22,v22,byteswap)
+	ori	r2,r2,0
+
+	VPMSUMD(v15,v23,const1)
+	lvx	v23,off112,r4
+	VPERM(v23,v23,v23,byteswap)
+
+	addi	r4,r4,8*16
+
+	bdz	.Lfirst_cool_down
+
+	/*
+	 * main loop. We modulo schedule it such that it takes three iterations
+	 * to complete - first iteration load, second iteration vpmsum, third
+	 * iteration xor.
+	 */
+	.balign	16
+4:	lvx	const1,0,r3
+	addi	r3,r3,16
+	ori	r2,r2,0
+
+	vxor	v0,v0,v8
+	VPMSUMD(v8,v16,const2)
+	lvx	v16,0,r4
+	VPERM(v16,v16,v16,byteswap)
+	ori	r2,r2,0
+
+	vxor	v1,v1,v9
+	VPMSUMD(v9,v17,const2)
+	lvx	v17,off16,r4
+	VPERM(v17,v17,v17,byteswap)
+	ori	r2,r2,0
+
+	vxor	v2,v2,v10
+	VPMSUMD(v10,v18,const2)
+	lvx	v18,off32,r4
+	VPERM(v18,v18,v18,byteswap)
+	ori	r2,r2,0
+
+	vxor	v3,v3,v11
+	VPMSUMD(v11,v19,const2)
+	lvx	v19,off48,r4
+	VPERM(v19,v19,v19,byteswap)
+	lvx	const2,0,r3
+	ori	r2,r2,0
+
+	vxor	v4,v4,v12
+	VPMSUMD(v12,v20,const1)
+	lvx	v20,off64,r4
+	VPERM(v20,v20,v20,byteswap)
+	ori	r2,r2,0
+
+	vxor	v5,v5,v13
+	VPMSUMD(v13,v21,const1)
+	lvx	v21,off80,r4
+	VPERM(v21,v21,v21,byteswap)
+	ori	r2,r2,0
+
+	vxor	v6,v6,v14
+	VPMSUMD(v14,v22,const1)
+	lvx	v22,off96,r4
+	VPERM(v22,v22,v22,byteswap)
+	ori	r2,r2,0
+
+	vxor	v7,v7,v15
+	VPMSUMD(v15,v23,const1)
+	lvx	v23,off112,r4
+	VPERM(v23,v23,v23,byteswap)
+
+	addi	r4,r4,8*16
+
+	bdnz	4b
+
+.Lfirst_cool_down:
+	/* First cool down pass */
+	lvx	const1,0,r3
+	addi	r3,r3,16
+
+	vxor	v0,v0,v8
+	VPMSUMD(v8,v16,const1)
+	ori	r2,r2,0
+
+	vxor	v1,v1,v9
+	VPMSUMD(v9,v17,const1)
+	ori	r2,r2,0
+
+	vxor	v2,v2,v10
+	VPMSUMD(v10,v18,const1)
+	ori	r2,r2,0
+
+	vxor	v3,v3,v11
+	VPMSUMD(v11,v19,const1)
+	ori	r2,r2,0
+
+	vxor	v4,v4,v12
+	VPMSUMD(v12,v20,const1)
+	ori	r2,r2,0
+
+	vxor	v5,v5,v13
+	VPMSUMD(v13,v21,const1)
+	ori	r2,r2,0
+
+	vxor	v6,v6,v14
+	VPMSUMD(v14,v22,const1)
+	ori	r2,r2,0
+
+	vxor	v7,v7,v15
+	VPMSUMD(v15,v23,const1)
+	ori	r2,r2,0
+
+.Lsecond_cool_down:
+	/* Second cool down pass */
+	vxor	v0,v0,v8
+	vxor	v1,v1,v9
+	vxor	v2,v2,v10
+	vxor	v3,v3,v11
+	vxor	v4,v4,v12
+	vxor	v5,v5,v13
+	vxor	v6,v6,v14
+	vxor	v7,v7,v15
+
+#ifdef REFLECT
+	/*
+	 * vpmsumd produces a 96 bit result in the least significant bits
+	 * of the register. Since we are bit reflected we have to shift it
+	 * left 32 bits so it occupies the least significant bits in the
+	 * bit reflected domain.
+	 */
+	vsldoi	v0,v0,zeroes,4
+	vsldoi	v1,v1,zeroes,4
+	vsldoi	v2,v2,zeroes,4
+	vsldoi	v3,v3,zeroes,4
+	vsldoi	v4,v4,zeroes,4
+	vsldoi	v5,v5,zeroes,4
+	vsldoi	v6,v6,zeroes,4
+	vsldoi	v7,v7,zeroes,4
+#endif
+
+	/* xor with last 1024 bits */
+	lvx	v8,0,r4
+	lvx	v9,off16,r4
+	VPERM(v8,v8,v8,byteswap)
+	VPERM(v9,v9,v9,byteswap)
+	lvx	v10,off32,r4
+	lvx	v11,off48,r4
+	VPERM(v10,v10,v10,byteswap)
+	VPERM(v11,v11,v11,byteswap)
+	lvx	v12,off64,r4
+	lvx	v13,off80,r4
+	VPERM(v12,v12,v12,byteswap)
+	VPERM(v13,v13,v13,byteswap)
+	lvx	v14,off96,r4
+	lvx	v15,off112,r4
+	VPERM(v14,v14,v14,byteswap)
+	VPERM(v15,v15,v15,byteswap)
+
+	addi	r4,r4,8*16
+
+	vxor	v16,v0,v8
+	vxor	v17,v1,v9
+	vxor	v18,v2,v10
+	vxor	v19,v3,v11
+	vxor	v20,v4,v12
+	vxor	v21,v5,v13
+	vxor	v22,v6,v14
+	vxor	v23,v7,v15
+
+	li	r0,1
+	cmpdi	r6,0
+	addi	r6,r6,128
+	bne	1b
+
+	/* Work out how many bytes we have left */
+	andi.	r5,r5,127
+
+	/* Calculate where in the constant table we need to start */
+	subfic	r6,r5,128
+	add	r3,r3,r6
+
+	/* How many 16 byte chunks are in the tail */
+	srdi	r7,r5,4
+	mtctr	r7
+
+	/*
+	 * Reduce the previously calculated 1024 bits to 64 bits, shifting
+	 * 32 bits to include the trailing 32 bits of zeros
+	 */
+	lvx	v0,0,r3
+	lvx	v1,off16,r3
+	lvx	v2,off32,r3
+	lvx	v3,off48,r3
+	lvx	v4,off64,r3
+	lvx	v5,off80,r3
+	lvx	v6,off96,r3
+	lvx	v7,off112,r3
+	addi	r3,r3,8*16
+
+	VPMSUMW(v0,v16,v0)
+	VPMSUMW(v1,v17,v1)
+	VPMSUMW(v2,v18,v2)
+	VPMSUMW(v3,v19,v3)
+	VPMSUMW(v4,v20,v4)
+	VPMSUMW(v5,v21,v5)
+	VPMSUMW(v6,v22,v6)
+	VPMSUMW(v7,v23,v7)
+
+	/* Now reduce the tail (0 - 112 bytes) */
+	cmpdi	r7,0
+	beq	1f
+
+	lvx	v16,0,r4
+	lvx	v17,0,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off16,r4
+	lvx	v17,off16,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off32,r4
+	lvx	v17,off32,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off48,r4
+	lvx	v17,off48,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off64,r4
+	lvx	v17,off64,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off80,r4
+	lvx	v17,off80,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+	bdz	1f
+
+	lvx	v16,off96,r4
+	lvx	v17,off96,r3
+	VPERM(v16,v16,v16,byteswap)
+	VPMSUMW(v16,v16,v17)
+	vxor	v0,v0,v16
+
+	/* Now xor all the parallel chunks together */
+1:	vxor	v0,v0,v1
+	vxor	v2,v2,v3
+	vxor	v4,v4,v5
+	vxor	v6,v6,v7
+
+	vxor	v0,v0,v2
+	vxor	v4,v4,v6
+
+	vxor	v0,v0,v4
+
+.Lbarrett_reduction:
+	/* Barrett constants */
+	addis	r3,r2,.barrett_constants@toc@ha
+	addi	r3,r3,.barrett_constants@toc@l
+
+	lvx	const1,0,r3
+	lvx	const2,off16,r3
+
+	vsldoi	v1,v0,v0,8
+	vxor	v0,v0,v1		/* xor two 64 bit results together */
+
+#ifdef REFLECT
+	/* shift left one bit */
+	vspltisb v1,1
+	vsl	v0,v0,v1
+#endif
+
+	vand	v0,v0,mask_64bit
+
+#ifndef REFLECT
+	/*
+	 * Now for the Barrett reduction algorithm. The idea is to calculate q,
+	 * the multiple of our polynomial that we need to subtract. By
+	 * doing the computation 2x bits higher (ie 64 bits) and shifting the
+	 * result back down 2x bits, we round down to the nearest multiple.
+	 */
+	VPMSUMD(v1,v0,const1)	/* ma */
+	vsldoi	v1,zeroes,v1,8	/* q = floor(ma/(2^64)) */
+	VPMSUMD(v1,v1,const2)	/* qn */
+	vxor	v0,v0,v1	/* a - qn, subtraction is xor in GF(2) */
+
+	/*
+	 * Get the result into r3. We need to shift it left 8 bytes:
+	 * V0 [ 0 1 2 X ]
+	 * V0 [ 0 X 2 3 ]
+	 */
+	vsldoi	v0,v0,zeroes,8	/* shift result into top 64 bits */
+#else
+	/*
+	 * The reflected version of Barrett reduction. Instead of bit
+	 * reflecting our data (which is expensive to do), we bit reflect our
+	 * constants and our algorithm, which means the intermediate data in
+	 * our vector registers goes from 0-63 instead of 63-0. We can reflect
+	 * the algorithm because we don't carry in mod 2 arithmetic.
+	 */
+	vand	v1,v0,mask_32bit	/* bottom 32 bits of a */
+	VPMSUMD(v1,v1,const1)		/* ma */
+	vand	v1,v1,mask_32bit	/* bottom 32bits of ma */
+	VPMSUMD(v1,v1,const2)		/* qn */
+	vxor	v0,v0,v1		/* a - qn, subtraction is xor in GF(2) */
+
+	/*
+	 * Since we are bit reflected, the result (ie the low 32 bits) is in
+	 * the high 32 bits. We just need to shift it left 4 bytes
+	 * V0 [ 0 1 X 3 ]
+	 * V0 [ 0 X 2 3 ]
+	 */
+	vsldoi	v0,v0,zeroes,4		/* shift result into top 64 bits of */
+#endif
+
+	/* Get it into r3 */
+	MFVRD(r3, v0)
+
+.Lout:
+	subi	r6,r1,56+10*16
+	subi	r7,r1,56+2*16
+
+	lvx	v20,0,r6
+	lvx	v21,off16,r6
+	lvx	v22,off32,r6
+	lvx	v23,off48,r6
+	lvx	v24,off64,r6
+	lvx	v25,off80,r6
+	lvx	v26,off96,r6
+	lvx	v27,off112,r6
+	lvx	v28,0,r7
+	lvx	v29,off16,r7
+
+	ld	r31,-8(r1)
+	ld	r30,-16(r1)
+	ld	r29,-24(r1)
+	ld	r28,-32(r1)
+	ld	r27,-40(r1)
+	ld	r26,-48(r1)
+	ld	r25,-56(r1)
+
+	blr
+
+.Lfirst_warm_up_done:
+	lvx	const1,0,r3
+	addi	r3,r3,16
+
+	VPMSUMD(v8,v16,const1)
+	VPMSUMD(v9,v17,const1)
+	VPMSUMD(v10,v18,const1)
+	VPMSUMD(v11,v19,const1)
+	VPMSUMD(v12,v20,const1)
+	VPMSUMD(v13,v21,const1)
+	VPMSUMD(v14,v22,const1)
+	VPMSUMD(v15,v23,const1)
+
+	b	.Lsecond_cool_down
+
+.Lshort:
+	cmpdi	r5,0
+	beq	.Lzero
+
+	addis	r3,r2,.short_constants@toc@ha
+	addi	r3,r3,.short_constants@toc@l
+
+	/* Calculate where in the constant table we need to start */
+	subfic	r6,r5,256
+	add	r3,r3,r6
+
+	/* How many 16 byte chunks? */
+	srdi	r7,r5,4
+	mtctr	r7
+
+	vxor	v19,v19,v19
+	vxor	v20,v20,v20
+
+	lvx	v0,0,r4
+	lvx	v16,0,r3
+	VPERM(v0,v0,v16,byteswap)
+	vxor	v0,v0,v8	/* xor in initial value */
+	VPMSUMW(v0,v0,v16)
+	bdz	.Lv0
+
+	lvx	v1,off16,r4
+	lvx	v17,off16,r3
+	VPERM(v1,v1,v17,byteswap)
+	VPMSUMW(v1,v1,v17)
+	bdz	.Lv1
+
+	lvx	v2,off32,r4
+	lvx	v16,off32,r3
+	VPERM(v2,v2,v16,byteswap)
+	VPMSUMW(v2,v2,v16)
+	bdz	.Lv2
+
+	lvx	v3,off48,r4
+	lvx	v17,off48,r3
+	VPERM(v3,v3,v17,byteswap)
+	VPMSUMW(v3,v3,v17)
+	bdz	.Lv3
+
+	lvx	v4,off64,r4
+	lvx	v16,off64,r3
+	VPERM(v4,v4,v16,byteswap)
+	VPMSUMW(v4,v4,v16)
+	bdz	.Lv4
+
+	lvx	v5,off80,r4
+	lvx	v17,off80,r3
+	VPERM(v5,v5,v17,byteswap)
+	VPMSUMW(v5,v5,v17)
+	bdz	.Lv5
+
+	lvx	v6,off96,r4
+	lvx	v16,off96,r3
+	VPERM(v6,v6,v16,byteswap)
+	VPMSUMW(v6,v6,v16)
+	bdz	.Lv6
+
+	lvx	v7,off112,r4
+	lvx	v17,off112,r3
+	VPERM(v7,v7,v17,byteswap)
+	VPMSUMW(v7,v7,v17)
+	bdz	.Lv7
+
+	addi	r3,r3,128
+	addi	r4,r4,128
+
+	lvx	v8,0,r4
+	lvx	v16,0,r3
+	VPERM(v8,v8,v16,byteswap)
+	VPMSUMW(v8,v8,v16)
+	bdz	.Lv8
+
+	lvx	v9,off16,r4
+	lvx	v17,off16,r3
+	VPERM(v9,v9,v17,byteswap)
+	VPMSUMW(v9,v9,v17)
+	bdz	.Lv9
+
+	lvx	v10,off32,r4
+	lvx	v16,off32,r3
+	VPERM(v10,v10,v16,byteswap)
+	VPMSUMW(v10,v10,v16)
+	bdz	.Lv10
+
+	lvx	v11,off48,r4
+	lvx	v17,off48,r3
+	VPERM(v11,v11,v17,byteswap)
+	VPMSUMW(v11,v11,v17)
+	bdz	.Lv11
+
+	lvx	v12,off64,r4
+	lvx	v16,off64,r3
+	VPERM(v12,v12,v16,byteswap)
+	VPMSUMW(v12,v12,v16)
+	bdz	.Lv12
+
+	lvx	v13,off80,r4
+	lvx	v17,off80,r3
+	VPERM(v13,v13,v17,byteswap)
+	VPMSUMW(v13,v13,v17)
+	bdz	.Lv13
+
+	lvx	v14,off96,r4
+	lvx	v16,off96,r3
+	VPERM(v14,v14,v16,byteswap)
+	VPMSUMW(v14,v14,v16)
+	bdz	.Lv14
+
+	lvx	v15,off112,r4
+	lvx	v17,off112,r3
+	VPERM(v15,v15,v17,byteswap)
+	VPMSUMW(v15,v15,v17)
+
+.Lv15:	vxor	v19,v19,v15
+.Lv14:	vxor	v20,v20,v14
+.Lv13:	vxor	v19,v19,v13
+.Lv12:	vxor	v20,v20,v12
+.Lv11:	vxor	v19,v19,v11
+.Lv10:	vxor	v20,v20,v10
+.Lv9:	vxor	v19,v19,v9
+.Lv8:	vxor	v20,v20,v8
+.Lv7:	vxor	v19,v19,v7
+.Lv6:	vxor	v20,v20,v6
+.Lv5:	vxor	v19,v19,v5
+.Lv4:	vxor	v20,v20,v4
+.Lv3:	vxor	v19,v19,v3
+.Lv2:	vxor	v20,v20,v2
+.Lv1:	vxor	v19,v19,v1
+.Lv0:	vxor	v20,v20,v0
+
+	vxor	v0,v19,v20
+
+	b	.Lbarrett_reduction
+
+.Lzero:
+	mr	r3,r10
+	b	.Lout
+
+FUNC_END(__crc32_vpmsum)
diff --git a/src/rocksdb/util/crc32c_ppc_constants.h b/src/rocksdb/util/crc32c_ppc_constants.h
new file mode 100644
index 000000000..f6494cd01
--- /dev/null
+++ b/src/rocksdb/util/crc32c_ppc_constants.h
@@ -0,0 +1,900 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  Copyright (C) 2015, 2017 International Business Machines Corp.
+//  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#define CRC 0x1edc6f41
+#define REFLECT
+#define CRC_XOR
+
+#ifndef __ASSEMBLY__
+#ifdef CRC_TABLE
+static const unsigned int crc_table[] = {
+    0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c,
+    0x26a1e7e8, 0xd4ca64eb, 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
+    0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, 0x105ec76f, 0xe235446c,
+    0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
+    0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc,
+    0xbc267848, 0x4e4dfb4b, 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
+    0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, 0xaa64d611, 0x580f5512,
+    0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
+    0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad,
+    0x1642ae59, 0xe4292d5a, 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
+    0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, 0x417b1dbc, 0xb3109ebf,
+    0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
+    0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f,
+    0xed03a29b, 0x1f682198, 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
+    0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, 0xdbfc821c, 0x2997011f,
+    0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
+    0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e,
+    0x4767748a, 0xb50cf789, 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
+    0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, 0x7198540d, 0x83f3d70e,
+    0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
+    0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de,
+    0xdde0eb2a, 0x2f8b6829, 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
+    0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, 0x082f63b7, 0xfa44e0b4,
+    0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
+    0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b,
+    0xb4091bff, 0x466298fc, 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
+    0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, 0xa24bb5a6, 0x502036a5,
+    0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
+    0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975,
+    0x0e330a81, 0xfc588982, 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
+    0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, 0x38cc2a06, 0xcaa7a905,
+    0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
+    0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8,
+    0xe52cc12c, 0x1747422f, 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
+    0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, 0xd3d3e1ab, 0x21b862a8,
+    0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
+    0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78,
+    0x7fab5e8c, 0x8dc0dd8f, 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
+    0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, 0x69e9f0d5, 0x9b8273d6,
+    0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
+    0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69,
+    0xd5cf889d, 0x27a40b9e, 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
+    0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,
+};
+
+#endif
+
+#else
+#define MAX_SIZE 32768
+.constants :
+
+        /* Reduce 262144 kbits to 1024 bits */
+        /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */
+        .octa 0x00000000b6ca9e20000000009c37c408
+
+        /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */
+        .octa 0x00000000350249a800000001b51df26c
+
+        /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */
+        .octa 0x00000001862dac54000000000724b9d0
+
+        /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */
+        .octa 0x00000001d87fb48c00000001c00532fe
+
+        /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */
+        .octa 0x00000001f39b699e00000000f05a9362
+
+        /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */
+        .octa 0x0000000101da11b400000001e1007970
+
+        /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */
+        .octa 0x00000001cab571e000000000a57366ee
+
+        /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */
+        .octa 0x00000000c7020cfe0000000192011284
+
+        /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */
+        .octa 0x00000000cdaed1ae0000000162716d9a
+
+        /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */
+        .octa 0x00000001e804effc00000000cd97ecde
+
+        /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */
+        .octa 0x0000000077c3ea3a0000000058812bc0
+
+        /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */
+        .octa 0x0000000068df31b40000000088b8c12e
+
+        /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */
+        .octa 0x00000000b059b6c200000001230b234c
+
+        /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */
+        .octa 0x0000000145fb8ed800000001120b416e
+
+        /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */
+        .octa 0x00000000cbc0916800000001974aecb0
+
+        /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */
+        .octa 0x000000005ceeedc2000000008ee3f226
+
+        /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */
+        .octa 0x0000000047d74e8600000001089aba9a
+
+        /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */
+        .octa 0x00000001407e9e220000000065113872
+
+        /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */
+        .octa 0x00000001da967bda000000005c07ec10
+
+        /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */
+        .octa 0x000000006c8983680000000187590924
+
+        /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */
+        .octa 0x00000000f2d14c9800000000e35da7c6
+
+        /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */
+        .octa 0x00000001993c6ad4000000000415855a
+
+        /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */
+        .octa 0x000000014683d1ac0000000073617758
+
+        /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */
+        .octa 0x00000001a7c93e6c0000000176021d28
+
+        /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */
+        .octa 0x000000010211e90a00000001c358fd0a
+
+        /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */
+        .octa 0x000000001119403e00000001ff7a2c18
+
+        /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */
+        .octa 0x000000001c3261aa00000000f2d9f7e4
+
+        /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */
+        .octa 0x000000014e37a634000000016cf1f9c8
+
+        /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */
+        .octa 0x0000000073786c0c000000010af9279a
+
+        /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */
+        .octa 0x000000011dc037f80000000004f101e8
+
+        /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */
+        .octa 0x0000000031433dfc0000000070bcf184
+
+        /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */
+        .octa 0x000000009cde8348000000000a8de642
+
+        /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */
+        .octa 0x0000000038d3c2a60000000062ea130c
+
+        /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */
+        .octa 0x000000011b25f26000000001eb31cbb2
+
+        /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */
+        .octa 0x000000001629e6f00000000170783448
+
+        /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */
+        .octa 0x0000000160838b4c00000001a684b4c6
+
+        /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */
+        .octa 0x000000007a44011c00000000253ca5b4
+
+        /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */
+        .octa 0x00000000226f417a0000000057b4b1e2
+
+        /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */
+        .octa 0x0000000045eb2eb400000000b6bd084c
+
+        /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */
+        .octa 0x000000014459d70c0000000123c2d592
+
+        /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */
+        .octa 0x00000001d406ed8200000000159dafce
+
+        /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */
+        .octa 0x0000000160c8e1a80000000127e1a64e
+
+        /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */
+        .octa 0x0000000027ba80980000000056860754
+
+        /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */
+        .octa 0x000000006d92d01800000001e661aae8
+
+        /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */
+        .octa 0x000000012ed7e3f200000000f82c6166
+
+        /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */
+        .octa 0x000000002dc8778800000000c4f9c7ae
+
+        /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */
+        .octa 0x0000000018240bb80000000074203d20
+
+        /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */
+        .octa 0x000000001ad381580000000198173052
+
+        /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */
+        .octa 0x00000001396b78f200000001ce8aba54
+
+        /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */
+        .octa 0x000000011a68133400000001850d5d94
+
+        /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */
+        .octa 0x000000012104732e00000001d609239c
+
+        /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */
+        .octa 0x00000000a140d90c000000001595f048
+
+        /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */
+        .octa 0x00000001b7215eda0000000042ccee08
+
+        /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */
+        .octa 0x00000001aaf1df3c000000010a389d74
+
+        /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */
+        .octa 0x0000000029d15b8a000000012a840da6
+
+        /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */
+        .octa 0x00000000f1a96922000000001d181c0c
+
+        /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */
+        .octa 0x00000001ac80d03c0000000068b7d1f6
+
+        /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */
+        .octa 0x000000000f11d56a000000005b0f14fc
+
+        /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */
+        .octa 0x00000001f1c022a20000000179e9e730
+
+        /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */
+        .octa 0x0000000173d00ae200000001ce1368d6
+
+        /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */
+        .octa 0x00000001d4ffe4ac0000000112c3a84c
+
+        /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */
+        .octa 0x000000016edc5ae400000000de940fee
+
+        /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */
+        .octa 0x00000001f1a0214000000000fe896b7e
+
+        /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */
+        .octa 0x00000000ca0b28a000000001f797431c
+
+        /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */
+        .octa 0x00000001928e30a20000000053e989ba
+
+        /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */
+        .octa 0x0000000097b1b002000000003920cd16
+
+        /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */
+        .octa 0x00000000b15bf90600000001e6f579b8
+
+        /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */
+        .octa 0x00000000411c5d52000000007493cb0a
+
+        /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */
+        .octa 0x00000001c36f330000000001bdd376d8
+
+        /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */
+        .octa 0x00000001119227e0000000016badfee6
+
+        /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */
+        .octa 0x00000000114d47020000000071de5c58
+
+        /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */
+        .octa 0x00000000458b5b9800000000453f317c
+
+        /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */
+        .octa 0x000000012e31fb8e0000000121675cce
+
+        /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */
+        .octa 0x000000005cf619d800000001f409ee92
+
+        /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */
+        .octa 0x0000000063f4d8b200000000f36b9c88
+
+        /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */
+        .octa 0x000000004138dc8a0000000036b398f4
+
+        /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */
+        .octa 0x00000001d29ee8e000000001748f9adc
+
+        /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */
+        .octa 0x000000006a08ace800000001be94ec00
+
+        /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */
+        .octa 0x0000000127d4201000000000b74370d6
+
+        /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */
+        .octa 0x0000000019d76b6200000001174d0b98
+
+        /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */
+        .octa 0x00000001b1471f6e00000000befc06a4
+
+        /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */
+        .octa 0x00000001f64c19cc00000001ae125288
+
+        /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */
+        .octa 0x00000000003c0ea00000000095c19b34
+
+        /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */
+        .octa 0x000000014d73abf600000001a78496f2
+
+        /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */
+        .octa 0x00000001620eb84400000001ac5390a0
+
+        /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */
+        .octa 0x0000000147655048000000002a80ed6e
+
+        /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */
+        .octa 0x0000000067b5077e00000001fa9b0128
+
+        /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */
+        .octa 0x0000000010ffe20600000001ea94929e
+
+        /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */
+        .octa 0x000000000fee8f1e0000000125f4305c
+
+        /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */
+        .octa 0x00000001da26fbae00000001471e2002
+
+        /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */
+        .octa 0x00000001b3a8bd880000000132d2253a
+
+        /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */
+        .octa 0x00000000e8f3898e00000000f26b3592
+
+        /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */
+        .octa 0x00000000b0d0d28c00000000bc8b67b0
+
+        /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */
+        .octa 0x0000000030f2a798000000013a826ef2
+
+        /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */
+        .octa 0x000000000fba10020000000081482c84
+
+        /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */
+        .octa 0x00000000bdb9bd7200000000e77307c2
+
+        /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */
+        .octa 0x0000000075d3bf5a00000000d4a07ec8
+
+        /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */
+        .octa 0x00000000ef1f98a00000000017102100
+
+        /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */
+        .octa 0x00000000689c760200000000db406486
+
+        /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */
+        .octa 0x000000016d5fa5fe0000000192db7f88
+
+        /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */
+        .octa 0x00000001d0d2b9ca000000018bf67b1e
+
+        /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */
+        .octa 0x0000000041e7b470000000007c09163e
+
+        /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */
+        .octa 0x00000001cbb6495e000000000adac060
+
+        /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */
+        .octa 0x000000010052a0b000000000bd8316ae
+
+        /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */
+        .octa 0x00000001d8effb5c000000019f09ab54
+
+        /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */
+        .octa 0x00000001d969853c0000000125155542
+
+        /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */
+        .octa 0x00000000523ccce2000000018fdb5882
+
+        /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */
+        .octa 0x000000001e2436bc00000000e794b3f4
+
+        /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */
+        .octa 0x00000000ddd1c3a2000000016f9bb022
+
+        /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */
+        .octa 0x0000000019fcfe3800000000290c9978
+
+        /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */
+        .octa 0x00000001ce95db640000000083c0f350
+
+        /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */
+        .octa 0x00000000af5828060000000173ea6628
+
+        /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */
+        .octa 0x00000001006388f600000001c8b4e00a
+
+        /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */
+        .octa 0x0000000179eca00a00000000de95d6aa
+
+        /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */
+        .octa 0x0000000122410a6a000000010b7f7248
+
+        /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */
+        .octa 0x000000004288e87c00000001326e3a06
+
+        /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */
+        .octa 0x000000016c5490da00000000bb62c2e6
+
+        /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */
+        .octa 0x00000000d1c71f6e0000000156a4b2c2
+
+        /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */
+        .octa 0x00000001b4ce08a6000000011dfe763a
+
+        /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */
+        .octa 0x00000001466ba60c000000007bcca8e2
+
+        /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */
+        .octa 0x00000001f6c488a40000000186118faa
+
+        /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */
+        .octa 0x000000013bfb06820000000111a65a88
+
+        /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */
+        .octa 0x00000000690e9e54000000003565e1c4
+
+        /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */
+        .octa 0x00000000281346b6000000012ed02a82
+
+        /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */
+        .octa 0x000000015646402400000000c486ecfc
+
+        /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */
+        .octa 0x000000016063a8dc0000000001b951b2
+
+        /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */
+        .octa 0x0000000116a663620000000048143916
+
+        /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */
+        .octa 0x000000017e8aa4d200000001dc2ae124
+
+        /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */
+        .octa 0x00000001728eb10c00000001416c58d6
+
+        /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */
+        .octa 0x00000001b08fd7fa00000000a479744a
+
+        /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */
+        .octa 0x00000001092a16e80000000096ca3a26
+
+        /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */
+        .octa 0x00000000a505637c00000000ff223d4e
+
+        /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */
+        .octa 0x00000000d94869b2000000010e84da42
+
+        /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */
+        .octa 0x00000001c8b203ae00000001b61ba3d0
+
+        /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */
+        .octa 0x000000005704aea000000000680f2de8
+
+        /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */
+        .octa 0x000000012e295fa2000000008772a9a8
+
+        /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */
+        .octa 0x000000011d0908bc0000000155f295bc
+
+        /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */
+        .octa 0x0000000193ed97ea00000000595f9282
+
+        /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */
+        .octa 0x000000013a0f1c520000000164b1c25a
+
+        /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */
+        .octa 0x000000010c2c40c000000000fbd67c50
+
+        /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */
+        .octa 0x00000000ff6fac3e0000000096076268
+
+        /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */
+        .octa 0x000000017b3609c000000001d288e4cc
+
+        /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */
+        .octa 0x0000000088c8c92200000001eaac1bdc
+
+        /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */
+        .octa 0x00000001751baae600000001f1ea39e2
+
+        /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */
+        .octa 0x000000010795297200000001eb6506fc
+
+        /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */
+        .octa 0x0000000162b00abe000000010f806ffe
+
+        /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */
+        .octa 0x000000000d7b404c000000010408481e
+
+        /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */
+        .octa 0x00000000763b13d40000000188260534
+
+        /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */
+        .octa 0x00000000f6dc22d80000000058fc73e0
+
+        /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */
+        .octa 0x000000007daae06000000000391c59b8
+
+        /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */
+        .octa 0x000000013359ab7c000000018b638400
+
+        /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */
+        .octa 0x000000008add438a000000011738f5c4
+
+        /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */
+        .octa 0x00000001edbefdea000000008cf7c6da
+
+        /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */
+        .octa 0x000000004104e0f800000001ef97fb16
+
+        /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */
+        .octa 0x00000000b48a82220000000102130e20
+
+        /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */
+        .octa 0x00000001bcb4684400000000db968898
+
+        /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */
+        .octa 0x000000013293ce0a00000000b5047b5e
+
+        /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */
+        .octa 0x00000001710d0844000000010b90fdb2
+
+        /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */
+        .octa 0x0000000117907f6e000000004834a32e
+
+        /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */
+        .octa 0x0000000087ddf93e0000000059c8f2b0
+
+        /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */
+        .octa 0x000000005970e9b00000000122cec508
+
+        /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */
+        .octa 0x0000000185b2b7d0000000000a330cda
+
+        /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */
+        .octa 0x00000001dcee0efc000000014a47148c
+
+        /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */
+        .octa 0x0000000030da27220000000042c61cb8
+
+        /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */
+        .octa 0x000000012f925a180000000012fe6960
+
+        /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */
+        .octa 0x00000000dd2e357c00000000dbda2c20
+
+        /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */
+        .octa 0x00000000071c80de000000011122410c
+
+        /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */
+        .octa 0x000000011513140a00000000977b2070
+
+        /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */
+        .octa 0x00000001df876e8e000000014050438e
+
+        /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */
+        .octa 0x000000015f81d6ce0000000147c840e8
+
+        /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */
+        .octa 0x000000019dd94dbe00000001cc7c88ce
+
+        /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */
+        .octa 0x00000001373d206e00000001476b35a4
+
+        /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */
+        .octa 0x00000000668ccade000000013d52d508
+
+        /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */
+        .octa 0x00000001b192d268000000008e4be32e
+
+        /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */
+        .octa 0x00000000e30f3a7800000000024120fe
+
+        /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */
+        .octa 0x000000010ef1f7bc00000000ddecddb4
+
+        /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */
+        .octa 0x00000001f5ac738000000000d4d403bc
+
+        /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */
+        .octa 0x000000011822ea7000000001734b89aa
+
+        /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */
+        .octa 0x00000000c3a33848000000010e7a58d6
+
+        /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */
+        .octa 0x00000001bd151c2400000001f9f04e9c
+
+        /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */
+        .octa 0x0000000056002d7600000000b692225e
+
+        /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */
+        .octa 0x000000014657c4f4000000019b8d3f3e
+
+        /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */
+        .octa 0x0000000113742d7c00000001a874f11e
+
+        /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */
+        .octa 0x000000019c5920ba000000010d5a4254
+
+        /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */
+        .octa 0x000000005216d2d600000000bbb2f5d6
+
+        /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */
+        .octa 0x0000000136f5ad8a0000000179cc0e36
+
+        /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */
+        .octa 0x000000018b07beb600000001dca1da4a
+
+        /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */
+        .octa 0x00000000db1e93b000000000feb1a192
+
+        /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */
+        .octa 0x000000000b96fa3a00000000d1eeedd6
+
+        /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */
+        .octa 0x00000001d9968af0000000008fad9bb4
+
+        /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */
+        .octa 0x000000000e4a77a200000001884938e4
+
+        /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */
+        .octa 0x00000000508c2ac800000001bc2e9bc0
+
+        /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */
+        .octa 0x0000000021572a8000000001f9658a68
+
+        /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */
+        .octa 0x00000001b859daf2000000001b9224fc
+
+        /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */
+        .octa 0x000000016f7884740000000055b2fb84
+
+        /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */
+        .octa 0x00000001b438810e000000018b090348
+
+        /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */
+        .octa 0x0000000095ddc6f2000000011ccbd5ea
+
+        /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */
+        .octa 0x00000001d977c20c0000000007ae47f8
+
+        /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */
+        .octa 0x00000000ebedb99a0000000172acbec0
+
+        /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */
+        .octa 0x00000001df9e9e9200000001c6e3ff20
+
+        /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */
+        .octa 0x00000001a4a3f95200000000e1b38744
+
+        /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */
+        .octa 0x00000000e2f5122000000000791585b2
+
+        /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */
+        .octa 0x000000004aa01f3e00000000ac53b894
+
+        /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */
+        .octa 0x00000000b3e90a5800000001ed5f2cf4
+
+        /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */
+        .octa 0x000000000c9ca2aa00000001df48b2e0
+
+        /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */
+        .octa 0x000000015168231600000000049c1c62
+
+        /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */
+        .octa 0x0000000036fce78c000000017c460c12
+
+        /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */
+        .octa 0x000000009037dc10000000015be4da7e
+
+        /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */
+        .octa 0x00000000d3298582000000010f38f668
+
+        /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */
+        .octa 0x00000001b42e8ad60000000039f40a00
+
+        /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */
+        .octa 0x00000000142a983800000000bd4c10c4
+
+        /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */
+        .octa 0x0000000109c7f1900000000042db1d98
+
+        /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */
+        .octa 0x0000000056ff931000000001c905bae6
+
+        /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */
+        .octa 0x00000001594513aa00000000069d40ea
+
+        /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */
+        .octa 0x00000001e3b5b1e8000000008e4fbad0
+
+        /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */
+        .octa 0x000000011dd5fc080000000047bedd46
+
+        /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */
+        .octa 0x00000001675f0cc20000000026396bf8
+
+        /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */
+        .octa 0x00000000d1c8dd4400000000379beb92
+
+        /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */
+        .octa 0x0000000115ebd3d8000000000abae54a
+
+        /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */
+        .octa 0x00000001ecbd0dac0000000007e6a128
+
+        /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */
+        .octa 0x00000000cdf67af2000000000ade29d2
+
+        /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */
+        .octa 0x000000004c01ff4c00000000f974c45c
+
+        /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */
+        .octa 0x00000000f2d8657e00000000e77ac60a
+
+        /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */
+        .octa 0x000000006bae74c40000000145895816
+
+        /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */
+        .octa 0x0000000152af8aa00000000038e362be
+
+        /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */
+        .octa 0x0000000004663802000000007f991a64
+
+        /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */
+        .octa 0x00000001ab2f5afc00000000fa366d3a
+
+        /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */
+        .octa 0x0000000074a4ebd400000001a2bb34f0
+
+        /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */
+        .octa 0x00000001d7ab3a4c0000000028a9981e
+
+        /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */
+        .octa 0x00000001a8da60c600000001dbc672be
+
+        /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */
+        .octa 0x000000013cf6382000000000b04d77f6
+
+        /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */
+        .octa 0x00000000bec12e1e0000000124400d96
+
+        /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */
+        .octa 0x00000001c6368010000000014ca4b414
+
+        /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */
+        .octa 0x00000001e6e78758000000012fe2c938
+
+        /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */
+        .octa 0x000000008d7f2b3c00000001faed01e6
+
+        /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */
+        .octa 0x000000016b4a156e000000007e80ecfe
+
+        /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */
+        .octa 0x00000001c63cfeb60000000098daee94
+
+        /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */
+        .octa 0x000000015f902670000000010a04edea
+
+        /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */
+        .octa 0x00000001cd5de11e00000001c00b4524
+
+        /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */
+        .octa 0x000000001acaec540000000170296550
+
+        /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */
+        .octa 0x000000002bd0ca780000000181afaa48
+
+        /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */
+        .octa 0x0000000032d63d5c0000000185a31ffa
+
+        /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */
+        .octa 0x000000001c6d4e4c000000002469f608
+
+        /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */
+        .octa 0x0000000106a60b92000000006980102a
+
+        /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */
+        .octa 0x00000000d3855e120000000111ea9ca8
+
+        /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */
+        .octa 0x00000000e312563600000001bd1d29ce
+
+        /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */
+        .octa 0x000000009e8f7ea400000001b34b9580
+
+        /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */
+        .octa 0x00000001c82e562c000000003076054e
+
+        /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */
+        .octa 0x00000000ca9f09ce000000012a608ea4
+
+        /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */
+        .octa 0x00000000c63764e600000000784d05fe
+
+        /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */
+        .octa 0x0000000168d2e49e000000016ef0d82a
+
+        /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */
+        .octa 0x00000000e986c1480000000075bda454
+
+        /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */
+        .octa 0x00000000cfb65894000000003dc0a1c4
+
+        /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */
+        .octa 0x0000000111cadee400000000e9a5d8be
+
+        /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */
+        .octa 0x0000000171fb63ce00000001609bc4b4
+
+        .short_constants :
+
+        /* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include
+           the trailing 32 bits of zeros */
+        /* x^1952 mod p(x)`, x^1984 mod p(x)`, x^2016 mod p(x)`, x^2048 mod
+           p(x)` */
+        .octa 0x7fec2963e5bf80485cf015c388e56f72
+
+        /* x^1824 mod p(x)`, x^1856 mod p(x)`, x^1888 mod p(x)`, x^1920 mod
+           p(x)` */
+        .octa 0x38e888d4844752a9963a18920246e2e6
+
+        /* x^1696 mod p(x)`, x^1728 mod p(x)`, x^1760 mod p(x)`, x^1792 mod
+           p(x)` */
+        .octa 0x42316c00730206ad419a441956993a31
+
+        /* x^1568 mod p(x)`, x^1600 mod p(x)`, x^1632 mod p(x)`, x^1664 mod
+           p(x)` */
+        .octa 0x543d5c543e65ddf9924752ba2b830011
+
+        /* x^1440 mod p(x)`, x^1472 mod p(x)`, x^1504 mod p(x)`, x^1536 mod
+           p(x)` */
+        .octa 0x78e87aaf56767c9255bd7f9518e4a304
+
+        /* x^1312 mod p(x)`, x^1344 mod p(x)`, x^1376 mod p(x)`, x^1408 mod
+           p(x)` */
+        .octa 0x8f68fcec1903da7f6d76739fe0553f1e
+
+        /* x^1184 mod p(x)`, x^1216 mod p(x)`, x^1248 mod p(x)`, x^1280 mod
+           p(x)` */
+        .octa 0x3f4840246791d588c133722b1fe0b5c3
+
+        /* x^1056 mod p(x)`, x^1088 mod p(x)`, x^1120 mod p(x)`, x^1152 mod
+           p(x)` */
+        .octa 0x34c96751b04de25a64b67ee0e55ef1f3
+
+        /* x^928 mod p(x)`, x^960 mod p(x)`, x^992 mod p(x)`, x^1024 mod p(x)`
+         */
+        .octa 0x156c8e180b4a395b069db049b8fdb1e7
+
+        /* x^800 mod p(x)`, x^832 mod p(x)`, x^864 mod p(x)`, x^896 mod p(x)` */
+        .octa 0xe0b99ccbe661f7bea11bfaf3c9e90b9e
+
+        /* x^672 mod p(x)`, x^704 mod p(x)`, x^736 mod p(x)`, x^768 mod p(x)` */
+        .octa 0x041d37768cd75659817cdc5119b29a35
+
+        /* x^544 mod p(x)`, x^576 mod p(x)`, x^608 mod p(x)`, x^640 mod p(x)` */
+        .octa 0x3a0777818cfaa9651ce9d94b36c41f1c
+
+        /* x^416 mod p(x)`, x^448 mod p(x)`, x^480 mod p(x)`, x^512 mod p(x)` */
+        .octa 0x0e148e8252377a554f256efcb82be955
+
+        /* x^288 mod p(x)`, x^320 mod p(x)`, x^352 mod p(x)`, x^384 mod p(x)` */
+        .octa 0x9c25531d19e65ddeec1631edb2dea967
+
+        /* x^160 mod p(x)`, x^192 mod p(x)`, x^224 mod p(x)`, x^256 mod p(x)` */
+        .octa 0x790606ff9957c0a65d27e147510ac59a
+
+        /* x^32 mod p(x)`, x^64 mod p(x)`, x^96 mod p(x)`, x^128 mod p(x)` */
+        .octa 0x82f63b786ea2d55ca66805eb18b8ea18
+
+        .barrett_constants :
+        /* 33 bit reflected Barrett constant m - (4^32)/n */
+        .octa 0x000000000000000000000000dea713f1 /* x^64 div p(x)` */
+        /* 33 bit reflected Barrett constant n */
+        .octa 0x00000000000000000000000105ec76f1
+#endif
diff --git a/src/rocksdb/util/crc32c_test.cc b/src/rocksdb/util/crc32c_test.cc
new file mode 100644
index 000000000..715d63e2d
--- /dev/null
+++ b/src/rocksdb/util/crc32c_test.cc
@@ -0,0 +1,213 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#include "util/crc32c.h"
+
+#include "test_util/testharness.h"
+#include "util/coding.h"
+#include "util/random.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace crc32c {
+
+class CRC {};
+
+// Tests for 3-way crc32c algorithm. We need these tests because it uses
+// different lookup tables than the original Fast_CRC32
+const unsigned int BUFFER_SIZE = 512 * 1024 * sizeof(uint64_t);
+char buffer[BUFFER_SIZE];
+
+struct ExpectedResult {
+  size_t offset;
+  size_t length;
+  uint32_t crc32c;
+};
+
+ExpectedResult expectedResults[] = {
+    // Zero-byte input
+    {0, 0, ~0U},
+    // Small aligned inputs to test special cases in SIMD implementations
+    {8, 1, 1543413366},
+    {8, 2, 523493126},
+    {8, 3, 1560427360},
+    {8, 4, 3422504776},
+    {8, 5, 447841138},
+    {8, 6, 3910050499},
+    {8, 7, 3346241981},
+    // Small unaligned inputs
+    {9, 1, 3855826643},
+    {10, 2, 560880875},
+    {11, 3, 1479707779},
+    {12, 4, 2237687071},
+    {13, 5, 4063855784},
+    {14, 6, 2553454047},
+    {15, 7, 1349220140},
+    // Larger inputs to test leftover chunks at the end of aligned blocks
+    {8, 8, 627613930},
+    {8, 9, 2105929409},
+    {8, 10, 2447068514},
+    {8, 11, 863807079},
+    {8, 12, 292050879},
+    {8, 13, 1411837737},
+    {8, 14, 2614515001},
+    {8, 15, 3579076296},
+    {8, 16, 2897079161},
+    {8, 17, 675168386},
+    // // Much larger inputs
+    {0, BUFFER_SIZE, 2096790750},
+    {1, BUFFER_SIZE / 2, 3854797577},
+
+};
+
+TEST(CRC, StandardResults) {
+  // Original Fast_CRC32 tests.
+  // From rfc3720 section B.4.
+  char buf[32];
+
+  memset(buf, 0, sizeof(buf));
+  ASSERT_EQ(0x8a9136aaU, Value(buf, sizeof(buf)));
+
+  memset(buf, 0xff, sizeof(buf));
+  ASSERT_EQ(0x62a8ab43U, Value(buf, sizeof(buf)));
+
+  for (int i = 0; i < 32; i++) {
+    buf[i] = static_cast<char>(i);
+  }
+  ASSERT_EQ(0x46dd794eU, Value(buf, sizeof(buf)));
+
+  for (int i = 0; i < 32; i++) {
+    buf[i] = static_cast<char>(31 - i);
+  }
+  ASSERT_EQ(0x113fdb5cU, Value(buf, sizeof(buf)));
+
+  unsigned char data[48] = {
+      0x01, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+      0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+      0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x18, 0x28, 0x00, 0x00, 0x00,
+      0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  };
+  ASSERT_EQ(0xd9963a56, Value(reinterpret_cast<char*>(data), sizeof(data)));
+
+  // 3-Way Crc32c tests ported from folly.
+  // Test 1: single computation
+  for (auto expected : expectedResults) {
+    uint32_t result = Value(buffer + expected.offset, expected.length);
+    EXPECT_EQ(~expected.crc32c, result);
+  }
+
+  // Test 2: stitching two computations
+  for (auto expected : expectedResults) {
+    size_t partialLength = expected.length / 2;
+    uint32_t partialChecksum = Value(buffer + expected.offset, partialLength);
+    uint32_t result =
+        Extend(partialChecksum, buffer + expected.offset + partialLength,
+               expected.length - partialLength);
+    EXPECT_EQ(~expected.crc32c, result);
+  }
+}
+
+TEST(CRC, Values) { ASSERT_NE(Value("a", 1), Value("foo", 3)); }
+
+TEST(CRC, Extend) {
+  ASSERT_EQ(Value("hello world", 11), Extend(Value("hello ", 6), "world", 5));
+}
+
+TEST(CRC, Mask) {
+  uint32_t crc = Value("foo", 3);
+  ASSERT_NE(crc, Mask(crc));
+  ASSERT_NE(crc, Mask(Mask(crc)));
+  ASSERT_EQ(crc, Unmask(Mask(crc)));
+  ASSERT_EQ(crc, Unmask(Unmask(Mask(Mask(crc)))));
+}
+
+TEST(CRC, Crc32cCombineBasicTest) {
+  uint32_t crc1 = Value("hello ", 6);
+  uint32_t crc2 = Value("world", 5);
+  uint32_t crc3 = Value("hello world", 11);
+  uint32_t crc1_2_combine = Crc32cCombine(crc1, crc2, 5);
+  ASSERT_EQ(crc3, crc1_2_combine);
+}
+
+TEST(CRC, Crc32cCombineOrderMattersTest) {
+  uint32_t crc1 = Value("hello ", 6);
+  uint32_t crc2 = Value("world", 5);
+  uint32_t crc3 = Value("hello world", 11);
+  uint32_t crc2_1_combine = Crc32cCombine(crc2, crc1, 6);
+  ASSERT_NE(crc3, crc2_1_combine);
+}
+
+TEST(CRC, Crc32cCombineFullCoverTest) {
+  int scale = 4 * 1024;
+  Random rnd(test::RandomSeed());
+  int size_1 = 1024 * 1024;
+  std::string s1 = rnd.RandomBinaryString(size_1);
+  uint32_t crc1 = Value(s1.data(), size_1);
+  for (int i = 0; i < scale; i++) {
+    int size_2 = i;
+    std::string s2 = rnd.RandomBinaryString(size_2);
+    uint32_t crc2 = Value(s2.data(), s2.size());
+    uint32_t crc1_2 = Extend(crc1, s2.data(), s2.size());
+    uint32_t crc1_2_combine = Crc32cCombine(crc1, crc2, size_2);
+    ASSERT_EQ(crc1_2, crc1_2_combine);
+  }
+}
+
+TEST(CRC, Crc32cCombineBigSizeTest) {
+  Random rnd(test::RandomSeed());
+  int size_1 = 1024 * 1024;
+  std::string s1 = rnd.RandomBinaryString(size_1);
+  uint32_t crc1 = Value(s1.data(), size_1);
+  int size_2 = 16 * 1024 * 1024 - 1;
+  std::string s2 = rnd.RandomBinaryString(size_2);
+  uint32_t crc2 = Value(s2.data(), s2.size());
+  uint32_t crc1_2 = Extend(crc1, s2.data(), s2.size());
+  uint32_t crc1_2_combine = Crc32cCombine(crc1, crc2, size_2);
+  ASSERT_EQ(crc1_2, crc1_2_combine);
+}
+
+}  // namespace crc32c
+}  // namespace ROCKSDB_NAMESPACE
+
+// copied from folly
+const uint64_t FNV_64_HASH_START = 14695981039346656037ULL;
+inline uint64_t fnv64_buf(const void* buf, size_t n,
+                          uint64_t hash = FNV_64_HASH_START) {
+  // forcing signed char, since other platforms can use unsigned
+  const signed char* char_buf = reinterpret_cast<const signed char*>(buf);
+
+  for (size_t i = 0; i < n; ++i) {
+    hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) +
+            (hash << 8) + (hash << 40);
+    hash ^= char_buf[i];
+  }
+  return hash;
+}
+
+int main(int argc, char** argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+
+  // Populate a buffer with a deterministic pattern
+  // on which to compute checksums
+
+  const uint8_t* src = (uint8_t*)ROCKSDB_NAMESPACE::crc32c::buffer;
+  uint64_t* dst = (uint64_t*)ROCKSDB_NAMESPACE::crc32c::buffer;
+  const uint64_t* end =
+      (const uint64_t*)(ROCKSDB_NAMESPACE::crc32c::buffer +
+                        ROCKSDB_NAMESPACE::crc32c::BUFFER_SIZE);
+  *dst++ = 0;
+  while (dst < end) {
+    ROCKSDB_NAMESPACE::EncodeFixed64(
+        reinterpret_cast<char*>(dst),
+        fnv64_buf((const char*)src, sizeof(uint64_t)));
+    dst++;
+    src += sizeof(uint64_t);
+  }
+
+  return RUN_ALL_TESTS();
+}
diff --git a/src/rocksdb/util/defer.h b/src/rocksdb/util/defer.h
new file mode 100644
index 000000000..f71e67ba9
--- /dev/null
+++ b/src/rocksdb/util/defer.h
@@ -0,0 +1,82 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <functional>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Defers the execution of the provided function until the Defer
+// object goes out of scope.
+//
+// Usage example:
+//
+// Status DeferTest() {
+//   Status s;
+//   Defer defer([&s]() {
+//     if (!s.ok()) {
+//       // do cleanups ...
+//     }
+//   });
+//   // do something ...
+//   if (!s.ok()) return;
+//   // do some other things ...
+//   return s;
+// }
+//
+// The above code ensures that cleanups will always happen on returning.
+//
+// Without the help of Defer, you can
+// 1. every time when !s.ok(), do the cleanup;
+// 2. instead of returning when !s.ok(), continue the work only when s.ok(),
+//    but sometimes, this might lead to nested blocks of "if (s.ok()) {...}".
+//
+// With the help of Defer, you can centralize the cleanup logic inside the
+// lambda passed to Defer, and you can return immediately on failure when
+// necessary.
+class Defer final {
+ public:
+  explicit Defer(std::function<void()>&& fn) : fn_(std::move(fn)) {}
+  ~Defer() { fn_(); }
+
+  // Disallow copy.
+  Defer(const Defer&) = delete;
+  Defer& operator=(const Defer&) = delete;
+
+ private:
+  std::function<void()> fn_;
+};
+
+// An RAII utility object that saves the current value of an object so that
+// it can be overwritten, and restores it to the saved value when the
+// SaveAndRestore object goes out of scope.
+template <typename T>
+class SaveAndRestore {
+ public:
+  // obj is non-null pointer to value to be saved and later restored.
+  explicit SaveAndRestore(T* obj) : obj_(obj), saved_(*obj) {}
+  // new_value is stored in *obj
+  SaveAndRestore(T* obj, const T& new_value)
+      : obj_(obj), saved_(std::move(*obj)) {
+    *obj = new_value;
+  }
+  SaveAndRestore(T* obj, T&& new_value) : obj_(obj), saved_(std::move(*obj)) {
+    *obj = std::move(new_value);
+  }
+  ~SaveAndRestore() { *obj_ = std::move(saved_); }
+
+  // No copies
+  SaveAndRestore(const SaveAndRestore&) = delete;
+  SaveAndRestore& operator=(const SaveAndRestore&) = delete;
+
+ private:
+  T* const obj_;
+  T saved_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/defer_test.cc b/src/rocksdb/util/defer_test.cc
new file mode 100644
index 000000000..0e98f68b6
--- /dev/null
+++ b/src/rocksdb/util/defer_test.cc
@@ -0,0 +1,51 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "util/defer.h"
+
+#include "port/port.h"
+#include "port/stack_trace.h"
+#include "test_util/testharness.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class DeferTest {};
+
+TEST(DeferTest, BlockScope) {
+  int v = 1;
+  {
+    Defer defer([&v]() { v *= 2; });
+  }
+  ASSERT_EQ(2, v);
+}
+
+TEST(DeferTest, FunctionScope) {
+  int v = 1;
+  auto f = [&v]() {
+    Defer defer([&v]() { v *= 2; });
+    v = 2;
+  };
+  f();
+  ASSERT_EQ(4, v);
+}
+
+TEST(SaveAndRestoreTest, BlockScope) {
+  int v = 1;
+  {
+    SaveAndRestore<int> sr(&v);
+    ASSERT_EQ(v, 1);
+    v = 2;
+    ASSERT_EQ(v, 2);
+  }
+  ASSERT_EQ(v, 1);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
+int main(int argc, char** argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/src/rocksdb/util/distributed_mutex.h b/src/rocksdb/util/distributed_mutex.h
new file mode 100644
index 000000000..9675a1e2d
--- /dev/null
+++ b/src/rocksdb/util/distributed_mutex.h
@@ -0,0 +1,48 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/rocksdb_namespace.h"
+
+// This file declares a wrapper around the efficient folly DistributedMutex
+// that falls back on a standard mutex when not available. See
+// https://github.com/facebook/folly/blob/main/folly/synchronization/DistributedMutex.h
+// for benefits and limitations.
+
+// At the moment, only scoped locking is supported using DMutexLock
+// RAII wrapper, because lock/unlock APIs will vary.
+
+#ifdef USE_FOLLY
+
+#include <folly/synchronization/DistributedMutex.h>
+
+namespace ROCKSDB_NAMESPACE {
+
+class DMutex : public folly::DistributedMutex {
+ public:
+  static const char* kName() { return "folly::DistributedMutex"; }
+
+  explicit DMutex(bool IGNORED_adaptive = false) { (void)IGNORED_adaptive; }
+
+  // currently no-op
+  void AssertHeld() {}
+};
+using DMutexLock = std::lock_guard<folly::DistributedMutex>;
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#else
+
+#include "port/port.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+using DMutex = port::Mutex;
+using DMutexLock = std::lock_guard<DMutex>;
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif
diff --git a/src/rocksdb/util/duplicate_detector.h b/src/rocksdb/util/duplicate_detector.h
new file mode 100644
index 000000000..d778622db
--- /dev/null
+++ b/src/rocksdb/util/duplicate_detector.h
@@ -0,0 +1,71 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cstdint>
+
+#include "db/db_impl/db_impl.h"
+#include "logging/logging.h"
+#include "util/set_comparator.h"
+
+namespace ROCKSDB_NAMESPACE {
+// During recovery if the memtable is flushed we cannot rely on its help on
+// duplicate key detection and as key insert will not be attempted. This class
+// will be used as a emulator of memtable to tell if insertion of a key/seq
+// would have resulted in duplication.
+class DuplicateDetector {
+ public:
+  explicit DuplicateDetector(DBImpl* db) : db_(db) {}
+  bool IsDuplicateKeySeq(uint32_t cf, const Slice& key, SequenceNumber seq) {
+    assert(seq >= batch_seq_);
+    if (batch_seq_ != seq) {  // it is a new batch
+      keys_.clear();
+    }
+    batch_seq_ = seq;
+    CFKeys& cf_keys = keys_[cf];
+    if (cf_keys.size() == 0) {  // just inserted
+      InitWithComp(cf);
+    }
+    auto it = cf_keys.insert(key);
+    if (it.second == false) {  // second is false if a element already existed.
+      keys_.clear();
+      InitWithComp(cf);
+      keys_[cf].insert(key);
+      return true;
+    }
+    return false;
+  }
+
+ private:
+  SequenceNumber batch_seq_ = 0;
+  DBImpl* db_;
+  using CFKeys = std::set<Slice, SetComparator>;
+  std::map<uint32_t, CFKeys> keys_;
+  void InitWithComp(const uint32_t cf) {
+    auto h = db_->GetColumnFamilyHandle(cf);
+    if (!h) {
+      // TODO(myabandeh): This is not a concern in MyRocks as drop cf is not
+      // implemented yet. When it does, we should return proper error instead
+      // of throwing exception.
+      ROCKS_LOG_FATAL(
+          db_->immutable_db_options().info_log,
+          "Recovering an entry from the dropped column family %" PRIu32
+          ". WAL must must have been emptied before dropping the column "
+          "family",
+          cf);
+#ifndef ROCKSDB_LITE
+      throw std::runtime_error(
+          "Recovering an entry from a dropped column family. "
+          "WAL must must have been flushed before dropping the column "
+          "family");
+#endif
+      return;
+    }
+    auto cmp = h->GetComparator();
+    keys_[cf] = CFKeys(SetComparator(cmp));
+  }
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/dynamic_bloom.cc b/src/rocksdb/util/dynamic_bloom.cc
new file mode 100644
index 000000000..0ff3b4a75
--- /dev/null
+++ b/src/rocksdb/util/dynamic_bloom.cc
@@ -0,0 +1,70 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "dynamic_bloom.h"
+
+#include <algorithm>
+
+#include "memory/allocator.h"
+#include "port/port.h"
+#include "rocksdb/slice.h"
+#include "util/hash.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+
+uint32_t roundUpToPow2(uint32_t x) {
+  uint32_t rv = 1;
+  while (rv < x) {
+    rv <<= 1;
+  }
+  return rv;
+}
+}  // namespace
+
+DynamicBloom::DynamicBloom(Allocator* allocator, uint32_t total_bits,
+                           uint32_t num_probes, size_t huge_page_tlb_size,
+                           Logger* logger)
+    // Round down, except round up with 1
+    : kNumDoubleProbes((num_probes + (num_probes == 1)) / 2) {
+  assert(num_probes % 2 == 0);  // limitation of current implementation
+  assert(num_probes <= 10);     // limitation of current implementation
+  assert(kNumDoubleProbes > 0);
+
+  // Determine how much to round off + align by so that x ^ i (that's xor) is
+  // a valid u64 index if x is a valid u64 index and 0 <= i < kNumDoubleProbes.
+  uint32_t block_bytes = /*bytes/u64*/ 8 *
+                         /*u64s*/ std::max(1U, roundUpToPow2(kNumDoubleProbes));
+  uint32_t block_bits = block_bytes * 8;
+  uint32_t blocks = (total_bits + block_bits - 1) / block_bits;
+  uint32_t sz = blocks * block_bytes;
+  kLen = sz / /*bytes/u64*/ 8;
+  assert(kLen > 0);
+#ifndef NDEBUG
+  for (uint32_t i = 0; i < kNumDoubleProbes; ++i) {
+    // Ensure probes starting at last word are in range
+    assert(((kLen - 1) ^ i) < kLen);
+  }
+#endif
+
+  // Padding to correct for allocation not originally aligned on block_bytes
+  // boundary
+  sz += block_bytes - 1;
+  assert(allocator);
+
+  char* raw = allocator->AllocateAligned(sz, huge_page_tlb_size, logger);
+  memset(raw, 0, sz);
+  auto block_offset = reinterpret_cast<uintptr_t>(raw) % block_bytes;
+  if (block_offset > 0) {
+    // Align on block_bytes boundary
+    raw += block_bytes - block_offset;
+  }
+  static_assert(sizeof(std::atomic<uint64_t>) == sizeof(uint64_t),
+                "Expecting zero-space-overhead atomic");
+  data_ = reinterpret_cast<std::atomic<uint64_t>*>(raw);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/dynamic_bloom.h b/src/rocksdb/util/dynamic_bloom.h
new file mode 100644
index 000000000..40cd29404
--- /dev/null
+++ b/src/rocksdb/util/dynamic_bloom.h
@@ -0,0 +1,214 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <array>
+#include <atomic>
+#include <memory>
+#include <string>
+
+#include "port/port.h"
+#include "rocksdb/slice.h"
+#include "table/multiget_context.h"
+#include "util/hash.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Slice;
+class Allocator;
+class Logger;
+
+// A Bloom filter intended only to be used in memory, never serialized in a way
+// that could lead to schema incompatibility. Supports opt-in lock-free
+// concurrent access.
+//
+// This implementation is also intended for applications generally preferring
+// speed vs. maximum accuracy: roughly 0.9x BF op latency for 1.1x FP rate.
+// For 1% FP rate, that means that the latency of a look-up triggered by an FP
+// should be less than roughly 100x the cost of a Bloom filter op.
+//
+// For simplicity and performance, the current implementation requires
+// num_probes to be a multiple of two and <= 10.
+//
+class DynamicBloom {
+ public:
+  // allocator: pass allocator to bloom filter, hence trace the usage of memory
+  // total_bits: fixed total bits for the bloom
+  // num_probes: number of hash probes for a single key
+  // hash_func:  customized hash function
+  // huge_page_tlb_size:  if >0, try to allocate bloom bytes from huge page TLB
+  //                      within this page size. Need to reserve huge pages for
+  //                      it to be allocated, like:
+  //                         sysctl -w vm.nr_hugepages=20
+  //                     See linux doc Documentation/vm/hugetlbpage.txt
+  explicit DynamicBloom(Allocator* allocator, uint32_t total_bits,
+                        uint32_t num_probes = 6, size_t huge_page_tlb_size = 0,
+                        Logger* logger = nullptr);
+
+  ~DynamicBloom() {}
+
+  // Assuming single threaded access to this function.
+  void Add(const Slice& key);
+
+  // Like Add, but may be called concurrent with other functions.
+  void AddConcurrently(const Slice& key);
+
+  // Assuming single threaded access to this function.
+  void AddHash(uint32_t hash);
+
+  // Like AddHash, but may be called concurrent with other functions.
+  void AddHashConcurrently(uint32_t hash);
+
+  // Multithreaded access to this function is OK
+  bool MayContain(const Slice& key) const;
+
+  void MayContain(int num_keys, Slice* keys, bool* may_match) const;
+
+  // Multithreaded access to this function is OK
+  bool MayContainHash(uint32_t hash) const;
+
+  void Prefetch(uint32_t h);
+
+ private:
+  // Length of the structure, in 64-bit words. For this structure, "word"
+  // will always refer to 64-bit words.
+  uint32_t kLen;
+  // We make the k probes in pairs, two for each 64-bit read/write. Thus,
+  // this stores k/2, the number of words to double-probe.
+  const uint32_t kNumDoubleProbes;
+
+  std::atomic<uint64_t>* data_;
+
+  // or_func(ptr, mask) should effect *ptr |= mask with the appropriate
+  // concurrency safety, working with bytes.
+  template <typename OrFunc>
+  void AddHash(uint32_t hash, const OrFunc& or_func);
+
+  bool DoubleProbe(uint32_t h32, size_t a) const;
+};
+
+inline void DynamicBloom::Add(const Slice& key) { AddHash(BloomHash(key)); }
+
+inline void DynamicBloom::AddConcurrently(const Slice& key) {
+  AddHashConcurrently(BloomHash(key));
+}
+
+inline void DynamicBloom::AddHash(uint32_t hash) {
+  AddHash(hash, [](std::atomic<uint64_t>* ptr, uint64_t mask) {
+    ptr->store(ptr->load(std::memory_order_relaxed) | mask,
+               std::memory_order_relaxed);
+  });
+}
+
+inline void DynamicBloom::AddHashConcurrently(uint32_t hash) {
+  AddHash(hash, [](std::atomic<uint64_t>* ptr, uint64_t mask) {
+    // Happens-before between AddHash and MaybeContains is handled by
+    // access to versions_->LastSequence(), so all we have to do here is
+    // avoid races (so we don't give the compiler a license to mess up
+    // our code) and not lose bits.  std::memory_order_relaxed is enough
+    // for that.
+    if ((mask & ptr->load(std::memory_order_relaxed)) != mask) {
+      ptr->fetch_or(mask, std::memory_order_relaxed);
+    }
+  });
+}
+
+inline bool DynamicBloom::MayContain(const Slice& key) const {
+  return (MayContainHash(BloomHash(key)));
+}
+
+inline void DynamicBloom::MayContain(int num_keys, Slice* keys,
+                                     bool* may_match) const {
+  std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> hashes;
+  std::array<size_t, MultiGetContext::MAX_BATCH_SIZE> byte_offsets;
+  for (int i = 0; i < num_keys; ++i) {
+    hashes[i] = BloomHash(keys[i]);
+    size_t a = FastRange32(kLen, hashes[i]);
+    PREFETCH(data_ + a, 0, 3);
+    byte_offsets[i] = a;
+  }
+
+  for (int i = 0; i < num_keys; i++) {
+    may_match[i] = DoubleProbe(hashes[i], byte_offsets[i]);
+  }
+}
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+// local variable is initialized but not referenced
+#pragma warning(disable : 4189)
+#endif
+inline void DynamicBloom::Prefetch(uint32_t h32) {
+  size_t a = FastRange32(kLen, h32);
+  PREFETCH(data_ + a, 0, 3);
+}
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+// Speed hacks in this implementation:
+// * Uses fastrange instead of %
+// * Minimum logic to determine first (and all) probed memory addresses.
+//   (Uses constant bit-xor offsets from the starting probe address.)
+// * (Major) Two probes per 64-bit memory fetch/write.
+//   Code simplification / optimization: only allow even number of probes.
+// * Very fast and effective (murmur-like) hash expansion/re-mixing. (At
+// least on recent CPUs, integer multiplication is very cheap. Each 64-bit
+// remix provides five pairs of bit addresses within a uint64_t.)
+//   Code simplification / optimization: only allow up to 10 probes, from a
+//   single 64-bit remix.
+//
+// The FP rate penalty for this implementation, vs. standard Bloom filter, is
+// roughly 1.12x on top of the 1.15x penalty for a 512-bit cache-local Bloom.
+// This implementation does not explicitly use the cache line size, but is
+// effectively cache-local (up to 16 probes) because of the bit-xor offsetting.
+//
+// NB: could easily be upgraded to support a 64-bit hash and
+// total_bits > 2^32 (512MB). (The latter is a bad idea without the former,
+// because of false positives.)
+
+inline bool DynamicBloom::MayContainHash(uint32_t h32) const {
+  size_t a = FastRange32(kLen, h32);
+  PREFETCH(data_ + a, 0, 3);
+  return DoubleProbe(h32, a);
+}
+
+inline bool DynamicBloom::DoubleProbe(uint32_t h32, size_t byte_offset) const {
+  // Expand/remix with 64-bit golden ratio
+  uint64_t h = 0x9e3779b97f4a7c13ULL * h32;
+  for (unsigned i = 0;; ++i) {
+    // Two bit probes per uint64_t probe
+    uint64_t mask =
+        ((uint64_t)1 << (h & 63)) | ((uint64_t)1 << ((h >> 6) & 63));
+    uint64_t val = data_[byte_offset ^ i].load(std::memory_order_relaxed);
+    if (i + 1 >= kNumDoubleProbes) {
+      return (val & mask) == mask;
+    } else if ((val & mask) != mask) {
+      return false;
+    }
+    h = (h >> 12) | (h << 52);
+  }
+}
+
+template <typename OrFunc>
+inline void DynamicBloom::AddHash(uint32_t h32, const OrFunc& or_func) {
+  size_t a = FastRange32(kLen, h32);
+  PREFETCH(data_ + a, 0, 3);
+  // Expand/remix with 64-bit golden ratio
+  uint64_t h = 0x9e3779b97f4a7c13ULL * h32;
+  for (unsigned i = 0;; ++i) {
+    // Two bit probes per uint64_t probe
+    uint64_t mask =
+        ((uint64_t)1 << (h & 63)) | ((uint64_t)1 << ((h >> 6) & 63));
+    or_func(&data_[a ^ i], mask);
+    if (i + 1 >= kNumDoubleProbes) {
+      return;
+    }
+    h = (h >> 12) | (h << 52);
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/dynamic_bloom_test.cc b/src/rocksdb/util/dynamic_bloom_test.cc
new file mode 100644
index 000000000..925c5479a
--- /dev/null
+++ b/src/rocksdb/util/dynamic_bloom_test.cc
@@ -0,0 +1,325 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#ifndef GFLAGS
+#include <cstdio>
+int main() {
+  fprintf(stderr, "Please install gflags to run this test... Skipping...\n");
+  return 0;
+}
+#else
+
+#include <algorithm>
+#include <atomic>
+#include <cinttypes>
+#include <functional>
+#include <memory>
+#include <thread>
+#include <vector>
+
+#include "dynamic_bloom.h"
+#include "memory/arena.h"
+#include "port/port.h"
+#include "rocksdb/system_clock.h"
+#include "test_util/testharness.h"
+#include "test_util/testutil.h"
+#include "util/gflags_compat.h"
+#include "util/stop_watch.h"
+
+using GFLAGS_NAMESPACE::ParseCommandLineFlags;
+
+DEFINE_int32(bits_per_key, 10, "");
+DEFINE_int32(num_probes, 6, "");
+DEFINE_bool(enable_perf, false, "");
+
+namespace ROCKSDB_NAMESPACE {
+
+struct KeyMaker {
+  uint64_t a;
+  uint64_t b;
+
+  // Sequential, within a hash function block
+  inline Slice Seq(uint64_t i) {
+    a = i;
+    return Slice(reinterpret_cast<char *>(&a), sizeof(a));
+  }
+  // Not quite sequential, varies across hash function blocks
+  inline Slice Nonseq(uint64_t i) {
+    a = i;
+    b = i * 123;
+    return Slice(reinterpret_cast<char *>(this), sizeof(*this));
+  }
+  inline Slice Key(uint64_t i, bool nonseq) {
+    return nonseq ? Nonseq(i) : Seq(i);
+  }
+};
+
+class DynamicBloomTest : public testing::Test {};
+
+TEST_F(DynamicBloomTest, EmptyFilter) {
+  Arena arena;
+  DynamicBloom bloom1(&arena, 100, 2);
+  ASSERT_TRUE(!bloom1.MayContain("hello"));
+  ASSERT_TRUE(!bloom1.MayContain("world"));
+
+  DynamicBloom bloom2(&arena, CACHE_LINE_SIZE * 8 * 2 - 1, 2);
+  ASSERT_TRUE(!bloom2.MayContain("hello"));
+  ASSERT_TRUE(!bloom2.MayContain("world"));
+}
+
+TEST_F(DynamicBloomTest, Small) {
+  Arena arena;
+  DynamicBloom bloom1(&arena, 100, 2);
+  bloom1.Add("hello");
+  bloom1.Add("world");
+  ASSERT_TRUE(bloom1.MayContain("hello"));
+  ASSERT_TRUE(bloom1.MayContain("world"));
+  ASSERT_TRUE(!bloom1.MayContain("x"));
+  ASSERT_TRUE(!bloom1.MayContain("foo"));
+
+  DynamicBloom bloom2(&arena, CACHE_LINE_SIZE * 8 * 2 - 1, 2);
+  bloom2.Add("hello");
+  bloom2.Add("world");
+  ASSERT_TRUE(bloom2.MayContain("hello"));
+  ASSERT_TRUE(bloom2.MayContain("world"));
+  ASSERT_TRUE(!bloom2.MayContain("x"));
+  ASSERT_TRUE(!bloom2.MayContain("foo"));
+}
+
+TEST_F(DynamicBloomTest, SmallConcurrentAdd) {
+  Arena arena;
+  DynamicBloom bloom1(&arena, 100, 2);
+  bloom1.AddConcurrently("hello");
+  bloom1.AddConcurrently("world");
+  ASSERT_TRUE(bloom1.MayContain("hello"));
+  ASSERT_TRUE(bloom1.MayContain("world"));
+  ASSERT_TRUE(!bloom1.MayContain("x"));
+  ASSERT_TRUE(!bloom1.MayContain("foo"));
+
+  DynamicBloom bloom2(&arena, CACHE_LINE_SIZE * 8 * 2 - 1, 2);
+  bloom2.AddConcurrently("hello");
+  bloom2.AddConcurrently("world");
+  ASSERT_TRUE(bloom2.MayContain("hello"));
+  ASSERT_TRUE(bloom2.MayContain("world"));
+  ASSERT_TRUE(!bloom2.MayContain("x"));
+  ASSERT_TRUE(!bloom2.MayContain("foo"));
+}
+
+static uint32_t NextNum(uint32_t num) {
+  if (num < 10) {
+    num += 1;
+  } else if (num < 100) {
+    num += 10;
+  } else if (num < 1000) {
+    num += 100;
+  } else {
+    num = num * 26 / 10;
+  }
+  return num;
+}
+
+TEST_F(DynamicBloomTest, VaryingLengths) {
+  KeyMaker km;
+
+  // Count number of filters that significantly exceed the false positive rate
+  int mediocre_filters = 0;
+  int good_filters = 0;
+  uint32_t num_probes = static_cast<uint32_t>(FLAGS_num_probes);
+
+  fprintf(stderr, "bits_per_key: %d  num_probes: %d\n", FLAGS_bits_per_key,
+          num_probes);
+
+  // NB: FP rate impact of 32-bit hash is noticeable starting around 10M keys.
+  // But that effect is hidden if using sequential keys (unique hashes).
+  for (bool nonseq : {false, true}) {
+    const uint32_t max_num = FLAGS_enable_perf ? 40000000 : 400000;
+    for (uint32_t num = 1; num <= max_num; num = NextNum(num)) {
+      uint32_t bloom_bits = 0;
+      Arena arena;
+      bloom_bits = num * FLAGS_bits_per_key;
+      DynamicBloom bloom(&arena, bloom_bits, num_probes);
+      for (uint64_t i = 0; i < num; i++) {
+        bloom.Add(km.Key(i, nonseq));
+        ASSERT_TRUE(bloom.MayContain(km.Key(i, nonseq)));
+      }
+
+      // All added keys must match
+      for (uint64_t i = 0; i < num; i++) {
+        ASSERT_TRUE(bloom.MayContain(km.Key(i, nonseq)));
+      }
+
+      // Check false positive rate
+      int result = 0;
+      for (uint64_t i = 0; i < 30000; i++) {
+        if (bloom.MayContain(km.Key(i + 1000000000, nonseq))) {
+          result++;
+        }
+      }
+      double rate = result / 30000.0;
+
+      fprintf(stderr,
+              "False positives (%s keys): "
+              "%5.2f%% @ num = %6u, bloom_bits = %6u\n",
+              nonseq ? "nonseq" : "seq", rate * 100.0, num, bloom_bits);
+
+      if (rate > 0.0125)
+        mediocre_filters++;  // Allowed, but not too often
+      else
+        good_filters++;
+    }
+  }
+
+  fprintf(stderr, "Filters: %d good, %d mediocre\n", good_filters,
+          mediocre_filters);
+  ASSERT_LE(mediocre_filters, good_filters / 25);
+}
+
+TEST_F(DynamicBloomTest, perf) {
+  KeyMaker km;
+  StopWatchNano timer(SystemClock::Default().get());
+  uint32_t num_probes = static_cast<uint32_t>(FLAGS_num_probes);
+
+  if (!FLAGS_enable_perf) {
+    return;
+  }
+
+  for (uint32_t m = 1; m <= 8; ++m) {
+    Arena arena;
+    const uint32_t num_keys = m * 8 * 1024 * 1024;
+    fprintf(stderr, "testing %" PRIu32 "M keys\n", m * 8);
+
+    DynamicBloom std_bloom(&arena, num_keys * 10, num_probes);
+
+    timer.Start();
+    for (uint64_t i = 1; i <= num_keys; ++i) {
+      std_bloom.Add(km.Seq(i));
+    }
+
+    uint64_t elapsed = timer.ElapsedNanos();
+    fprintf(stderr, "dynamic bloom, avg add latency %3g\n",
+            static_cast<double>(elapsed) / num_keys);
+
+    uint32_t count = 0;
+    timer.Start();
+    for (uint64_t i = 1; i <= num_keys; ++i) {
+      if (std_bloom.MayContain(km.Seq(i))) {
+        ++count;
+      }
+    }
+    ASSERT_EQ(count, num_keys);
+    elapsed = timer.ElapsedNanos();
+    assert(count > 0);
+    fprintf(stderr, "dynamic bloom, avg query latency %3g\n",
+            static_cast<double>(elapsed) / count);
+  }
+}
+
+TEST_F(DynamicBloomTest, concurrent_with_perf) {
+  uint32_t num_probes = static_cast<uint32_t>(FLAGS_num_probes);
+
+  uint32_t m_limit = FLAGS_enable_perf ? 8 : 1;
+
+  uint32_t num_threads = 4;
+  std::vector<port::Thread> threads;
+
+  // NB: Uses sequential keys for speed, but that hides the FP rate
+  // impact of 32-bit hash, which is noticeable starting around 10M keys
+  // when they vary across hashing blocks.
+  for (uint32_t m = 1; m <= m_limit; ++m) {
+    Arena arena;
+    const uint32_t num_keys = m * 8 * 1024 * 1024;
+    fprintf(stderr, "testing %" PRIu32 "M keys\n", m * 8);
+
+    DynamicBloom std_bloom(&arena, num_keys * 10, num_probes);
+
+    std::atomic<uint64_t> elapsed(0);
+
+    std::function<void(size_t)> adder([&](size_t t) {
+      KeyMaker km;
+      StopWatchNano timer(SystemClock::Default().get());
+      timer.Start();
+      for (uint64_t i = 1 + t; i <= num_keys; i += num_threads) {
+        std_bloom.AddConcurrently(km.Seq(i));
+      }
+      elapsed += timer.ElapsedNanos();
+    });
+    for (size_t t = 0; t < num_threads; ++t) {
+      threads.emplace_back(adder, t);
+    }
+    while (threads.size() > 0) {
+      threads.back().join();
+      threads.pop_back();
+    }
+
+    fprintf(stderr,
+            "dynamic bloom, avg parallel add latency %3g"
+            " nanos/key\n",
+            static_cast<double>(elapsed) / num_threads / num_keys);
+
+    elapsed = 0;
+    std::function<void(size_t)> hitter([&](size_t t) {
+      KeyMaker km;
+      StopWatchNano timer(SystemClock::Default().get());
+      timer.Start();
+      for (uint64_t i = 1 + t; i <= num_keys; i += num_threads) {
+        bool f = std_bloom.MayContain(km.Seq(i));
+        ASSERT_TRUE(f);
+      }
+      elapsed += timer.ElapsedNanos();
+    });
+    for (size_t t = 0; t < num_threads; ++t) {
+      threads.emplace_back(hitter, t);
+    }
+    while (threads.size() > 0) {
+      threads.back().join();
+      threads.pop_back();
+    }
+
+    fprintf(stderr,
+            "dynamic bloom, avg parallel hit latency %3g"
+            " nanos/key\n",
+            static_cast<double>(elapsed) / num_threads / num_keys);
+
+    elapsed = 0;
+    std::atomic<uint32_t> false_positives(0);
+    std::function<void(size_t)> misser([&](size_t t) {
+      KeyMaker km;
+      StopWatchNano timer(SystemClock::Default().get());
+      timer.Start();
+      for (uint64_t i = num_keys + 1 + t; i <= 2 * num_keys; i += num_threads) {
+        bool f = std_bloom.MayContain(km.Seq(i));
+        if (f) {
+          ++false_positives;
+        }
+      }
+      elapsed += timer.ElapsedNanos();
+    });
+    for (size_t t = 0; t < num_threads; ++t) {
+      threads.emplace_back(misser, t);
+    }
+    while (threads.size() > 0) {
+      threads.back().join();
+      threads.pop_back();
+    }
+
+    fprintf(stderr,
+            "dynamic bloom, avg parallel miss latency %3g"
+            " nanos/key, %f%% false positive rate\n",
+            static_cast<double>(elapsed) / num_threads / num_keys,
+            false_positives.load() * 100.0 / num_keys);
+  }
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
+int main(int argc, char **argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+  ParseCommandLineFlags(&argc, &argv, true);
+
+  return RUN_ALL_TESTS();
+}
+
+#endif  // GFLAGS
diff --git a/src/rocksdb/util/fastrange.h b/src/rocksdb/util/fastrange.h
new file mode 100644
index 000000000..a70a980f6
--- /dev/null
+++ b/src/rocksdb/util/fastrange.h
@@ -0,0 +1,114 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+// fastrange/FastRange: A faster alternative to % for mapping a hash value
+// to an arbitrary range. See https://github.com/lemire/fastrange
+//
+// Generally recommended are FastRange32 for mapping results of 32-bit
+// hash functions and FastRange64 for mapping results of 64-bit hash
+// functions. FastRange is less forgiving than % if the input hashes are
+// not well distributed over the full range of the type (32 or 64 bits).
+//
+// Also included is a templated implementation FastRangeGeneric for use
+// in generic algorithms, but not otherwise recommended because of
+// potential ambiguity. Unlike with %, it is critical to use the right
+// FastRange variant for the output size of your hash function.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+#ifdef TEST_UINT128_COMPAT
+#undef HAVE_UINT128_EXTENSION
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace detail {
+
+// Using a class template to support partial specialization
+template <typename Hash, typename Range>
+struct FastRangeGenericImpl {
+  // only reach this on no supported specialization
+};
+
+template <typename Range>
+struct FastRangeGenericImpl<uint32_t, Range> {
+  static inline Range Fn(uint32_t hash, Range range) {
+    static_assert(std::is_unsigned<Range>::value, "must be unsigned");
+    static_assert(sizeof(Range) <= sizeof(uint32_t),
+                  "cannot be larger than hash (32 bits)");
+
+    uint64_t product = uint64_t{range} * hash;
+    return static_cast<Range>(product >> 32);
+  }
+};
+
+template <typename Range>
+struct FastRangeGenericImpl<uint64_t, Range> {
+  static inline Range Fn(uint64_t hash, Range range) {
+    static_assert(std::is_unsigned<Range>::value, "must be unsigned");
+    static_assert(sizeof(Range) <= sizeof(uint64_t),
+                  "cannot be larger than hash (64 bits)");
+
+#ifdef HAVE_UINT128_EXTENSION
+    // Can use compiler's 128-bit type. Trust it to do the right thing.
+    __uint128_t wide = __uint128_t{range} * hash;
+    return static_cast<Range>(wide >> 64);
+#else
+    // Fall back: full decomposition.
+    // NOTE: GCC seems to fully understand this code as 64-bit x 64-bit
+    // -> 128-bit multiplication and optimize it appropriately
+    uint64_t range64 = range;  // ok to shift by 32, even if Range is 32-bit
+    uint64_t tmp = uint64_t{range64 & 0xffffFFFF} * uint64_t{hash & 0xffffFFFF};
+    tmp >>= 32;
+    tmp += uint64_t{range64 & 0xffffFFFF} * uint64_t{hash >> 32};
+    // Avoid overflow: first add lower 32 of tmp2, and later upper 32
+    uint64_t tmp2 = uint64_t{range64 >> 32} * uint64_t{hash & 0xffffFFFF};
+    tmp += static_cast<uint32_t>(tmp2);
+    tmp >>= 32;
+    tmp += (tmp2 >> 32);
+    tmp += uint64_t{range64 >> 32} * uint64_t{hash >> 32};
+    return static_cast<Range>(tmp);
+#endif
+  }
+};
+
+}  // namespace detail
+
+// Now an omnibus templated function (yay parameter inference).
+//
+// NOTICE:
+// This templated version is not recommended for typical use because
+// of the potential to mix a 64-bit FastRange with a 32-bit bit hash,
+// most likely because you put your 32-bit hash in an "unsigned long"
+// which is 64 bits on some platforms. That doesn't really matter for
+// an operation like %, but 64-bit FastRange gives extremely bad results,
+// mostly zero, on 32-bit hash values. And because good hashing is not
+// generally required for correctness, this kind of mistake could go
+// unnoticed with just unit tests. Plus it could vary by platform.
+template <typename Hash, typename Range>
+inline Range FastRangeGeneric(Hash hash, Range range) {
+  return detail::FastRangeGenericImpl<Hash, Range>::Fn(hash, range);
+}
+
+// The most popular / convenient / recommended variants:
+
+// Map a quality 64-bit hash value down to an arbitrary size_t range.
+// (size_t is standard for mapping to things in memory.)
+inline size_t FastRange64(uint64_t hash, size_t range) {
+  return FastRangeGeneric(hash, range);
+}
+
+// Map a quality 32-bit hash value down to an arbitrary uint32_t range.
+inline uint32_t FastRange32(uint32_t hash, uint32_t range) {
+  return FastRangeGeneric(hash, range);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/file_checksum_helper.cc b/src/rocksdb/util/file_checksum_helper.cc
new file mode 100644
index 000000000..a73920352
--- /dev/null
+++ b/src/rocksdb/util/file_checksum_helper.cc
@@ -0,0 +1,172 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+//  Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+//  Use of this source code is governed by a BSD-style license that can be
+//  found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/file_checksum_helper.h"
+
+#include <unordered_set>
+
+#include "db/log_reader.h"
+#include "db/version_edit.h"
+#include "db/version_edit_handler.h"
+#include "file/sequence_file_reader.h"
+#include "rocksdb/utilities/customizable_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+void FileChecksumListImpl::reset() { checksum_map_.clear(); }
+
+size_t FileChecksumListImpl::size() const { return checksum_map_.size(); }
+
+Status FileChecksumListImpl::GetAllFileChecksums(
+    std::vector<uint64_t>* file_numbers, std::vector<std::string>* checksums,
+    std::vector<std::string>* checksum_func_names) {
+  if (file_numbers == nullptr || checksums == nullptr ||
+      checksum_func_names == nullptr) {
+    return Status::InvalidArgument("Pointer has not been initiated");
+  }
+
+  for (auto i : checksum_map_) {
+    file_numbers->push_back(i.first);
+    checksums->push_back(i.second.first);
+    checksum_func_names->push_back(i.second.second);
+  }
+  return Status::OK();
+}
+
+Status FileChecksumListImpl::SearchOneFileChecksum(
+    uint64_t file_number, std::string* checksum,
+    std::string* checksum_func_name) {
+  if (checksum == nullptr || checksum_func_name == nullptr) {
+    return Status::InvalidArgument("Pointer has not been initiated");
+  }
+
+  auto it = checksum_map_.find(file_number);
+  if (it == checksum_map_.end()) {
+    return Status::NotFound();
+  } else {
+    *checksum = it->second.first;
+    *checksum_func_name = it->second.second;
+  }
+  return Status::OK();
+}
+
+Status FileChecksumListImpl::InsertOneFileChecksum(
+    uint64_t file_number, const std::string& checksum,
+    const std::string& checksum_func_name) {
+  auto it = checksum_map_.find(file_number);
+  if (it == checksum_map_.end()) {
+    checksum_map_.insert(std::make_pair(
+        file_number, std::make_pair(checksum, checksum_func_name)));
+  } else {
+    it->second.first = checksum;
+    it->second.second = checksum_func_name;
+  }
+  return Status::OK();
+}
+
+Status FileChecksumListImpl::RemoveOneFileChecksum(uint64_t file_number) {
+  auto it = checksum_map_.find(file_number);
+  if (it == checksum_map_.end()) {
+    return Status::NotFound();
+  } else {
+    checksum_map_.erase(it);
+  }
+  return Status::OK();
+}
+
+FileChecksumList* NewFileChecksumList() {
+  FileChecksumListImpl* checksum_list = new FileChecksumListImpl();
+  return checksum_list;
+}
+
+std::shared_ptr<FileChecksumGenFactory> GetFileChecksumGenCrc32cFactory() {
+  static std::shared_ptr<FileChecksumGenFactory> default_crc32c_gen_factory(
+      new FileChecksumGenCrc32cFactory());
+  return default_crc32c_gen_factory;
+}
+
+Status GetFileChecksumsFromManifest(Env* src_env, const std::string& abs_path,
+                                    uint64_t manifest_file_size,
+                                    FileChecksumList* checksum_list) {
+  if (checksum_list == nullptr) {
+    return Status::InvalidArgument("checksum_list is nullptr");
+  }
+  assert(checksum_list);
+  checksum_list->reset();
+  Status s;
+
+  std::unique_ptr<SequentialFileReader> file_reader;
+  {
+    std::unique_ptr<FSSequentialFile> file;
+    const std::shared_ptr<FileSystem>& fs = src_env->GetFileSystem();
+    s = fs->NewSequentialFile(abs_path,
+                              fs->OptimizeForManifestRead(FileOptions()), &file,
+                              nullptr /* dbg */);
+    if (!s.ok()) {
+      return s;
+    }
+    file_reader.reset(new SequentialFileReader(std::move(file), abs_path));
+  }
+
+  struct LogReporter : public log::Reader::Reporter {
+    Status* status_ptr;
+    virtual void Corruption(size_t /*bytes*/, const Status& st) override {
+      if (status_ptr->ok()) {
+        *status_ptr = st;
+      }
+    }
+  } reporter;
+  reporter.status_ptr = &s;
+  log::Reader reader(nullptr, std::move(file_reader), &reporter,
+                     true /* checksum */, 0 /* log_number */);
+  FileChecksumRetriever retriever(manifest_file_size, *checksum_list);
+  retriever.Iterate(reader, &s);
+  assert(!retriever.status().ok() ||
+         manifest_file_size == std::numeric_limits<uint64_t>::max() ||
+         reader.LastRecordEnd() == manifest_file_size);
+
+  return retriever.status();
+}
+
+#ifndef ROCKSDB_LITE
+namespace {
+static int RegisterFileChecksumGenFactories(ObjectLibrary& library,
+                                            const std::string& /*arg*/) {
+  library.AddFactory<FileChecksumGenFactory>(
+      FileChecksumGenCrc32cFactory::kClassName(),
+      [](const std::string& /*uri*/,
+         std::unique_ptr<FileChecksumGenFactory>* guard,
+         std::string* /* errmsg */) {
+        guard->reset(new FileChecksumGenCrc32cFactory());
+        return guard->get();
+      });
+  return 1;
+}
+}  // namespace
+#endif  // !ROCKSDB_LITE
+
+Status FileChecksumGenFactory::CreateFromString(
+    const ConfigOptions& options, const std::string& value,
+    std::shared_ptr<FileChecksumGenFactory>* result) {
+#ifndef ROCKSDB_LITE
+  static std::once_flag once;
+  std::call_once(once, [&]() {
+    RegisterFileChecksumGenFactories(*(ObjectLibrary::Default().get()), "");
+  });
+#endif  // ROCKSDB_LITE
+  if (value == FileChecksumGenCrc32cFactory::kClassName()) {
+    *result = GetFileChecksumGenCrc32cFactory();
+    return Status::OK();
+  } else {
+    Status s = LoadSharedObject<FileChecksumGenFactory>(options, value, nullptr,
+                                                        result);
+    return s;
+  }
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/file_checksum_helper.h b/src/rocksdb/util/file_checksum_helper.h
new file mode 100644
index 000000000..d622e9bba
--- /dev/null
+++ b/src/rocksdb/util/file_checksum_helper.h
@@ -0,0 +1,100 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#include <cassert>
+#include <unordered_map>
+
+#include "port/port.h"
+#include "rocksdb/file_checksum.h"
+#include "rocksdb/status.h"
+#include "util/coding.h"
+#include "util/crc32c.h"
+#include "util/math.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// This is the class to generate the file checksum based on Crc32. It
+// will be used as the default checksum method for SST file checksum
+class FileChecksumGenCrc32c : public FileChecksumGenerator {
+ public:
+  FileChecksumGenCrc32c(const FileChecksumGenContext& /*context*/) {
+    checksum_ = 0;
+  }
+
+  void Update(const char* data, size_t n) override {
+    checksum_ = crc32c::Extend(checksum_, data, n);
+  }
+
+  void Finalize() override {
+    assert(checksum_str_.empty());
+    // Store as big endian raw bytes
+    PutFixed32(&checksum_str_, EndianSwapValue(checksum_));
+  }
+
+  std::string GetChecksum() const override {
+    assert(!checksum_str_.empty());
+    return checksum_str_;
+  }
+
+  const char* Name() const override { return "FileChecksumCrc32c"; }
+
+ private:
+  uint32_t checksum_;
+  std::string checksum_str_;
+};
+
+class FileChecksumGenCrc32cFactory : public FileChecksumGenFactory {
+ public:
+  std::unique_ptr<FileChecksumGenerator> CreateFileChecksumGenerator(
+      const FileChecksumGenContext& context) override {
+    if (context.requested_checksum_func_name.empty() ||
+        context.requested_checksum_func_name == "FileChecksumCrc32c") {
+      return std::unique_ptr<FileChecksumGenerator>(
+          new FileChecksumGenCrc32c(context));
+    } else {
+      return nullptr;
+    }
+  }
+
+  static const char* kClassName() { return "FileChecksumGenCrc32cFactory"; }
+  const char* Name() const override { return kClassName(); }
+};
+
+// The default implementaion of FileChecksumList
+class FileChecksumListImpl : public FileChecksumList {
+ public:
+  FileChecksumListImpl() {}
+  void reset() override;
+
+  size_t size() const override;
+
+  Status GetAllFileChecksums(
+      std::vector<uint64_t>* file_numbers, std::vector<std::string>* checksums,
+      std::vector<std::string>* checksum_func_names) override;
+
+  Status SearchOneFileChecksum(uint64_t file_number, std::string* checksum,
+                               std::string* checksum_func_name) override;
+
+  Status InsertOneFileChecksum(uint64_t file_number,
+                               const std::string& checksum,
+                               const std::string& checksum_func_name) override;
+
+  Status RemoveOneFileChecksum(uint64_t file_number) override;
+
+ private:
+  // Key is the file number, the first portion of the value is checksum, the
+  // second portion of the value is checksum function name.
+  std::unordered_map<uint64_t, std::pair<std::string, std::string>>
+      checksum_map_;
+};
+
+// If manifest_file_size < std::numeric_limits<uint64_t>::max(), only use
+// that length prefix of the manifest file.
+Status GetFileChecksumsFromManifest(Env* src_env, const std::string& abs_path,
+                                    uint64_t manifest_file_size,
+                                    FileChecksumList* checksum_list);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/file_reader_writer_test.cc b/src/rocksdb/util/file_reader_writer_test.cc
new file mode 100644
index 000000000..e778efc3c
--- /dev/null
+++ b/src/rocksdb/util/file_reader_writer_test.cc
@@ -0,0 +1,1066 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#include <algorithm>
+#include <vector>
+
+#include "db/db_test_util.h"
+#include "env/mock_env.h"
+#include "file/line_file_reader.h"
+#include "file/random_access_file_reader.h"
+#include "file/read_write_util.h"
+#include "file/readahead_raf.h"
+#include "file/sequence_file_reader.h"
+#include "file/writable_file_writer.h"
+#include "rocksdb/file_system.h"
+#include "test_util/testharness.h"
+#include "test_util/testutil.h"
+#include "util/crc32c.h"
+#include "util/random.h"
+#include "utilities/fault_injection_fs.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class WritableFileWriterTest : public testing::Test {};
+
+constexpr uint32_t kMb = static_cast<uint32_t>(1) << 20;
+
+TEST_F(WritableFileWriterTest, RangeSync) {
+  class FakeWF : public FSWritableFile {
+   public:
+    explicit FakeWF() : size_(0), last_synced_(0) {}
+    ~FakeWF() override {}
+
+    using FSWritableFile::Append;
+    IOStatus Append(const Slice& data, const IOOptions& /*options*/,
+                    IODebugContext* /*dbg*/) override {
+      size_ += data.size();
+      return IOStatus::OK();
+    }
+    IOStatus Truncate(uint64_t /*size*/, const IOOptions& /*options*/,
+                      IODebugContext* /*dbg*/) override {
+      return IOStatus::OK();
+    }
+    IOStatus Close(const IOOptions& /*options*/,
+                   IODebugContext* /*dbg*/) override {
+      EXPECT_GE(size_, last_synced_ + kMb);
+      EXPECT_LT(size_, last_synced_ + 2 * kMb);
+      // Make sure random writes generated enough writes.
+      EXPECT_GT(size_, 10 * kMb);
+      return IOStatus::OK();
+    }
+    IOStatus Flush(const IOOptions& /*options*/,
+                   IODebugContext* /*dbg*/) override {
+      return IOStatus::OK();
+    }
+    IOStatus Sync(const IOOptions& /*options*/,
+                  IODebugContext* /*dbg*/) override {
+      return IOStatus::OK();
+    }
+    IOStatus Fsync(const IOOptions& /*options*/,
+                   IODebugContext* /*dbg*/) override {
+      return IOStatus::OK();
+    }
+    void SetIOPriority(Env::IOPriority /*pri*/) override {}
+    uint64_t GetFileSize(const IOOptions& /*options*/,
+                         IODebugContext* /*dbg*/) override {
+      return size_;
+    }
+    void GetPreallocationStatus(size_t* /*block_size*/,
+                                size_t* /*last_allocated_block*/) override {}
+    size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const override {
+      return 0;
+    }
+    IOStatus InvalidateCache(size_t /*offset*/, size_t /*length*/) override {
+      return IOStatus::OK();
+    }
+
+   protected:
+    IOStatus Allocate(uint64_t /*offset*/, uint64_t /*len*/,
+                      const IOOptions& /*options*/,
+                      IODebugContext* /*dbg*/) override {
+      return IOStatus::OK();
+    }
+    IOStatus RangeSync(uint64_t offset, uint64_t nbytes,
+                       const IOOptions& /*options*/,
+                       IODebugContext* /*dbg*/) override {
+      EXPECT_EQ(offset % 4096, 0u);
+      EXPECT_EQ(nbytes % 4096, 0u);
+
+      EXPECT_EQ(offset, last_synced_);
+      last_synced_ = offset + nbytes;
+      EXPECT_GE(size_, last_synced_ + kMb);
+      if (size_ > 2 * kMb) {
+        EXPECT_LT(size_, last_synced_ + 2 * kMb);
+      }
+      return IOStatus::OK();
+    }
+
+    uint64_t size_;
+    uint64_t last_synced_;
+  };
+
+  EnvOptions env_options;
+  env_options.bytes_per_sync = kMb;
+  std::unique_ptr<FakeWF> wf(new FakeWF);
+  std::unique_ptr<WritableFileWriter> writer(
+      new WritableFileWriter(std::move(wf), "" /* don't care */, env_options));
+  Random r(301);
+  Status s;
+  std::unique_ptr<char[]> large_buf(new char[10 * kMb]);
+  for (int i = 0; i < 1000; i++) {
+    int skew_limit = (i < 700) ? 10 : 15;
+    uint32_t num = r.Skewed(skew_limit) * 100 + r.Uniform(100);
+    s = writer->Append(Slice(large_buf.get(), num));
+    ASSERT_OK(s);
+
+    // Flush in a chance of 1/10.
+    if (r.Uniform(10) == 0) {
+      s = writer->Flush();
+      ASSERT_OK(s);
+    }
+  }
+  s = writer->Close();
+  ASSERT_OK(s);
+}
+
+TEST_F(WritableFileWriterTest, IncrementalBuffer) {
+  class FakeWF : public FSWritableFile {
+   public:
+    explicit FakeWF(std::string* _file_data, bool _use_direct_io,
+                    bool _no_flush)
+        : file_data_(_file_data),
+          use_direct_io_(_use_direct_io),
+          no_flush_(_no_flush) {}
+    ~FakeWF() override {}
+
+    using FSWritableFile::Append;
+    IOStatus Append(const Slice& data, const IOOptions& /*options*/,
+                    IODebugContext* /*dbg*/) override {
+      file_data_->append(data.data(), data.size());
+      size_ += data.size();
+      return IOStatus::OK();
+    }
+    using FSWritableFile::PositionedAppend;
+    IOStatus PositionedAppend(const Slice& data, uint64_t pos,
+                              const IOOptions& /*options*/,
+                              IODebugContext* /*dbg*/) override {
+      EXPECT_TRUE(pos % 512 == 0);
+      EXPECT_TRUE(data.size() % 512 == 0);
+      file_data_->resize(pos);
+      file_data_->append(data.data(), data.size());
+      size_ += data.size();
+      return IOStatus::OK();
+    }
+
+    IOStatus Truncate(uint64_t size, const IOOptions& /*options*/,
+                      IODebugContext* /*dbg*/) override {
+      file_data_->resize(size);
+      return IOStatus::OK();
+    }
+    IOStatus Close(const IOOptions& /*options*/,
+                   IODebugContext* /*dbg*/) override {
+      return IOStatus::OK();
+    }
+    IOStatus Flush(const IOOptions& /*options*/,
+                   IODebugContext* /*dbg*/) override {
+      return IOStatus::OK();
+    }
+    IOStatus Sync(const IOOptions& /*options*/,
+                  IODebugContext* /*dbg*/) override {
+      return IOStatus::OK();
+    }
+    IOStatus Fsync(const IOOptions& /*options*/,
+                   IODebugContext* /*dbg*/) override {
+      return IOStatus::OK();
+    }
+    void SetIOPriority(Env::IOPriority /*pri*/) override {}
+    uint64_t GetFileSize(const IOOptions& /*options*/,
+                         IODebugContext* /*dbg*/) override {
+      return size_;
+    }
+    void GetPreallocationStatus(size_t* /*block_size*/,
+                                size_t* /*last_allocated_block*/) override {}
+    size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const override {
+      return 0;
+    }
+    IOStatus InvalidateCache(size_t /*offset*/, size_t /*length*/) override {
+      return IOStatus::OK();
+    }
+    bool use_direct_io() const override { return use_direct_io_; }
+
+    std::string* file_data_;
+    bool use_direct_io_;
+    bool no_flush_;
+    size_t size_ = 0;
+  };
+
+  Random r(301);
+  const int kNumAttempts = 50;
+  for (int attempt = 0; attempt < kNumAttempts; attempt++) {
+    bool no_flush = (attempt % 3 == 0);
+    EnvOptions env_options;
+    env_options.writable_file_max_buffer_size =
+        (attempt < kNumAttempts / 2) ? 512 * 1024 : 700 * 1024;
+    std::string actual;
+    std::unique_ptr<FakeWF> wf(new FakeWF(&actual,
+#ifndef ROCKSDB_LITE
+                                          attempt % 2 == 1,
+#else
+                                          false,
+#endif
+                                          no_flush));
+    std::unique_ptr<WritableFileWriter> writer(new WritableFileWriter(
+        std::move(wf), "" /* don't care */, env_options));
+
+    std::string target;
+    for (int i = 0; i < 20; i++) {
+      uint32_t num = r.Skewed(16) * 100 + r.Uniform(100);
+      std::string random_string = r.RandomString(num);
+      ASSERT_OK(writer->Append(Slice(random_string.c_str(), num)));
+      target.append(random_string.c_str(), num);
+
+      // In some attempts, flush in a chance of 1/10.
+      if (!no_flush && r.Uniform(10) == 0) {
+        ASSERT_OK(writer->Flush());
+      }
+    }
+    ASSERT_OK(writer->Flush());
+    ASSERT_OK(writer->Close());
+    ASSERT_EQ(target.size(), actual.size());
+    ASSERT_EQ(target, actual);
+  }
+}
+
+TEST_F(WritableFileWriterTest, BufferWithZeroCapacityDirectIO) {
+  EnvOptions env_opts;
+  env_opts.use_direct_writes = true;
+  env_opts.writable_file_max_buffer_size = 0;
+  {
+    std::unique_ptr<WritableFileWriter> writer;
+    const Status s =
+        WritableFileWriter::Create(FileSystem::Default(), /*fname=*/"dont_care",
+                                   FileOptions(env_opts), &writer,
+                                   /*dbg=*/nullptr);
+    ASSERT_TRUE(s.IsInvalidArgument());
+  }
+}
+
+class DBWritableFileWriterTest : public DBTestBase {
+ public:
+  DBWritableFileWriterTest()
+      : DBTestBase("db_secondary_cache_test", /*env_do_fsync=*/true) {
+    fault_fs_.reset(new FaultInjectionTestFS(env_->GetFileSystem()));
+    fault_env_.reset(new CompositeEnvWrapper(env_, fault_fs_));
+  }
+
+  std::shared_ptr<FaultInjectionTestFS> fault_fs_;
+  std::unique_ptr<Env> fault_env_;
+};
+
+TEST_F(DBWritableFileWriterTest, AppendWithChecksum) {
+  FileOptions file_options = FileOptions();
+  Options options = GetDefaultOptions();
+  options.create_if_missing = true;
+  DestroyAndReopen(options);
+  std::string fname = dbname_ + "/test_file";
+  std::unique_ptr<FSWritableFile> writable_file_ptr;
+  ASSERT_OK(fault_fs_->NewWritableFile(fname, file_options, &writable_file_ptr,
+                                       /*dbg*/ nullptr));
+  std::unique_ptr<TestFSWritableFile> file;
+  file.reset(new TestFSWritableFile(
+      fname, file_options, std::move(writable_file_ptr), fault_fs_.get()));
+  std::unique_ptr<WritableFileWriter> file_writer;
+  ImmutableOptions ioptions(options);
+  file_writer.reset(new WritableFileWriter(
+      std::move(file), fname, file_options, SystemClock::Default().get(),
+      nullptr, ioptions.stats, ioptions.listeners,
+      ioptions.file_checksum_gen_factory.get(), true, true));
+
+  Random rnd(301);
+  std::string data = rnd.RandomString(1000);
+  uint32_t data_crc32c = crc32c::Value(data.c_str(), data.size());
+  fault_fs_->SetChecksumHandoffFuncType(ChecksumType::kCRC32c);
+
+  ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c));
+  ASSERT_OK(file_writer->Flush());
+  Random size_r(47);
+  for (int i = 0; i < 2000; i++) {
+    data = rnd.RandomString((static_cast<int>(size_r.Next()) % 10000));
+    data_crc32c = crc32c::Value(data.c_str(), data.size());
+    ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c));
+
+    data = rnd.RandomString((static_cast<int>(size_r.Next()) % 97));
+    ASSERT_OK(file_writer->Append(Slice(data.c_str())));
+    ASSERT_OK(file_writer->Flush());
+  }
+  ASSERT_OK(file_writer->Close());
+  Destroy(options);
+}
+
+TEST_F(DBWritableFileWriterTest, AppendVerifyNoChecksum) {
+  FileOptions file_options = FileOptions();
+  Options options = GetDefaultOptions();
+  options.create_if_missing = true;
+  DestroyAndReopen(options);
+  std::string fname = dbname_ + "/test_file";
+  std::unique_ptr<FSWritableFile> writable_file_ptr;
+  ASSERT_OK(fault_fs_->NewWritableFile(fname, file_options, &writable_file_ptr,
+                                       /*dbg*/ nullptr));
+  std::unique_ptr<TestFSWritableFile> file;
+  file.reset(new TestFSWritableFile(
+      fname, file_options, std::move(writable_file_ptr), fault_fs_.get()));
+  std::unique_ptr<WritableFileWriter> file_writer;
+  ImmutableOptions ioptions(options);
+  // Enable checksum handoff for this file, but do not enable buffer checksum.
+  // So Append with checksum logic will not be triggered
+  file_writer.reset(new WritableFileWriter(
+      std::move(file), fname, file_options, SystemClock::Default().get(),
+      nullptr, ioptions.stats, ioptions.listeners,
+      ioptions.file_checksum_gen_factory.get(), true, false));
+
+  Random rnd(301);
+  std::string data = rnd.RandomString(1000);
+  uint32_t data_crc32c = crc32c::Value(data.c_str(), data.size());
+  fault_fs_->SetChecksumHandoffFuncType(ChecksumType::kCRC32c);
+
+  ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c));
+  ASSERT_OK(file_writer->Flush());
+  Random size_r(47);
+  for (int i = 0; i < 1000; i++) {
+    data = rnd.RandomString((static_cast<int>(size_r.Next()) % 10000));
+    data_crc32c = crc32c::Value(data.c_str(), data.size());
+    ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c));
+
+    data = rnd.RandomString((static_cast<int>(size_r.Next()) % 97));
+    ASSERT_OK(file_writer->Append(Slice(data.c_str())));
+    ASSERT_OK(file_writer->Flush());
+  }
+  ASSERT_OK(file_writer->Close());
+  Destroy(options);
+}
+
+TEST_F(DBWritableFileWriterTest, AppendWithChecksumRateLimiter) {
+  FileOptions file_options = FileOptions();
+  file_options.rate_limiter = nullptr;
+  Options options = GetDefaultOptions();
+  options.create_if_missing = true;
+  DestroyAndReopen(options);
+  std::string fname = dbname_ + "/test_file";
+  std::unique_ptr<FSWritableFile> writable_file_ptr;
+  ASSERT_OK(fault_fs_->NewWritableFile(fname, file_options, &writable_file_ptr,
+                                       /*dbg*/ nullptr));
+  std::unique_ptr<TestFSWritableFile> file;
+  file.reset(new TestFSWritableFile(
+      fname, file_options, std::move(writable_file_ptr), fault_fs_.get()));
+  std::unique_ptr<WritableFileWriter> file_writer;
+  ImmutableOptions ioptions(options);
+  // Enable checksum handoff for this file, but do not enable buffer checksum.
+  // So Append with checksum logic will not be triggered
+  file_writer.reset(new WritableFileWriter(
+      std::move(file), fname, file_options, SystemClock::Default().get(),
+      nullptr, ioptions.stats, ioptions.listeners,
+      ioptions.file_checksum_gen_factory.get(), true, true));
+  fault_fs_->SetChecksumHandoffFuncType(ChecksumType::kCRC32c);
+
+  Random rnd(301);
+  std::string data;
+  uint32_t data_crc32c;
+  uint64_t start = fault_env_->NowMicros();
+  Random size_r(47);
+  uint64_t bytes_written = 0;
+  for (int i = 0; i < 100; i++) {
+    data = rnd.RandomString((static_cast<int>(size_r.Next()) % 10000));
+    data_crc32c = crc32c::Value(data.c_str(), data.size());
+    ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c));
+    bytes_written += static_cast<uint64_t>(data.size());
+
+    data = rnd.RandomString((static_cast<int>(size_r.Next()) % 97));
+    ASSERT_OK(file_writer->Append(Slice(data.c_str())));
+    ASSERT_OK(file_writer->Flush());
+    bytes_written += static_cast<uint64_t>(data.size());
+  }
+  uint64_t elapsed = fault_env_->NowMicros() - start;
+  double raw_rate = bytes_written * 1000000.0 / elapsed;
+  ASSERT_OK(file_writer->Close());
+
+  // Set the rate-limiter
+  FileOptions file_options1 = FileOptions();
+  file_options1.rate_limiter =
+      NewGenericRateLimiter(static_cast<int64_t>(0.5 * raw_rate));
+  fname = dbname_ + "/test_file_1";
+  std::unique_ptr<FSWritableFile> writable_file_ptr1;
+  ASSERT_OK(fault_fs_->NewWritableFile(fname, file_options1,
+                                       &writable_file_ptr1,
+                                       /*dbg*/ nullptr));
+  file.reset(new TestFSWritableFile(
+      fname, file_options1, std::move(writable_file_ptr1), fault_fs_.get()));
+  // Enable checksum handoff for this file, but do not enable buffer checksum.
+  // So Append with checksum logic will not be triggered
+  file_writer.reset(new WritableFileWriter(
+      std::move(file), fname, file_options1, SystemClock::Default().get(),
+      nullptr, ioptions.stats, ioptions.listeners,
+      ioptions.file_checksum_gen_factory.get(), true, true));
+
+  for (int i = 0; i < 1000; i++) {
+    data = rnd.RandomString((static_cast<int>(size_r.Next()) % 10000));
+    data_crc32c = crc32c::Value(data.c_str(), data.size());
+    ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c));
+
+    data = rnd.RandomString((static_cast<int>(size_r.Next()) % 97));
+    ASSERT_OK(file_writer->Append(Slice(data.c_str())));
+    ASSERT_OK(file_writer->Flush());
+  }
+  ASSERT_OK(file_writer->Close());
+  if (file_options1.rate_limiter != nullptr) {
+    delete file_options1.rate_limiter;
+  }
+
+  Destroy(options);
+}
+
+#ifndef ROCKSDB_LITE
+TEST_F(WritableFileWriterTest, AppendStatusReturn) {
+  class FakeWF : public FSWritableFile {
+   public:
+    explicit FakeWF() : use_direct_io_(false), io_error_(false) {}
+
+    bool use_direct_io() const override { return use_direct_io_; }
+
+    using FSWritableFile::Append;
+    IOStatus Append(const Slice& /*data*/, const IOOptions& /*options*/,
+                    IODebugContext* /*dbg*/) override {
+      if (io_error_) {
+        return IOStatus::IOError("Fake IO error");
+      }
+      return IOStatus::OK();
+    }
+    using FSWritableFile::PositionedAppend;
+    IOStatus PositionedAppend(const Slice& /*data*/, uint64_t,
+                              const IOOptions& /*options*/,
+                              IODebugContext* /*dbg*/) override {
+      if (io_error_) {
+        return IOStatus::IOError("Fake IO error");
+      }
+      return IOStatus::OK();
+    }
+    IOStatus Close(const IOOptions& /*options*/,
+                   IODebugContext* /*dbg*/) override {
+      return IOStatus::OK();
+    }
+    IOStatus Flush(const IOOptions& /*options*/,
+                   IODebugContext* /*dbg*/) override {
+      return IOStatus::OK();
+    }
+    IOStatus Sync(const IOOptions& /*options*/,
+                  IODebugContext* /*dbg*/) override {
+      return IOStatus::OK();
+    }
+    void Setuse_direct_io(bool val) { use_direct_io_ = val; }
+    void SetIOError(bool val) { io_error_ = val; }
+
+   protected:
+    bool use_direct_io_;
+    bool io_error_;
+  };
+  std::unique_ptr<FakeWF> wf(new FakeWF());
+  wf->Setuse_direct_io(true);
+  std::unique_ptr<WritableFileWriter> writer(
+      new WritableFileWriter(std::move(wf), "" /* don't care */, EnvOptions()));
+
+  ASSERT_OK(writer->Append(std::string(2 * kMb, 'a')));
+
+  // Next call to WritableFile::Append() should fail
+  FakeWF* fwf = static_cast<FakeWF*>(writer->writable_file());
+  fwf->SetIOError(true);
+  ASSERT_NOK(writer->Append(std::string(2 * kMb, 'b')));
+}
+#endif
+
+class ReadaheadRandomAccessFileTest
+    : public testing::Test,
+      public testing::WithParamInterface<size_t> {
+ public:
+  static std::vector<size_t> GetReadaheadSizeList() {
+    return {1lu << 12, 1lu << 16};
+  }
+  void SetUp() override {
+    readahead_size_ = GetParam();
+    scratch_.reset(new char[2 * readahead_size_]);
+    ResetSourceStr();
+  }
+  ReadaheadRandomAccessFileTest() : control_contents_() {}
+  std::string Read(uint64_t offset, size_t n) {
+    Slice result;
+    Status s = test_read_holder_->Read(offset, n, IOOptions(), &result,
+                                       scratch_.get(), nullptr);
+    EXPECT_TRUE(s.ok() || s.IsInvalidArgument());
+    return std::string(result.data(), result.size());
+  }
+  void ResetSourceStr(const std::string& str = "") {
+    std::unique_ptr<FSWritableFile> sink(
+        new test::StringSink(&control_contents_));
+    std::unique_ptr<WritableFileWriter> write_holder(new WritableFileWriter(
+        std::move(sink), "" /* don't care */, FileOptions()));
+    Status s = write_holder->Append(Slice(str));
+    EXPECT_OK(s);
+    s = write_holder->Flush();
+    EXPECT_OK(s);
+    std::unique_ptr<FSRandomAccessFile> read_holder(
+        new test::StringSource(control_contents_));
+    test_read_holder_ =
+        NewReadaheadRandomAccessFile(std::move(read_holder), readahead_size_);
+  }
+  size_t GetReadaheadSize() const { return readahead_size_; }
+
+ private:
+  size_t readahead_size_;
+  Slice control_contents_;
+  std::unique_ptr<FSRandomAccessFile> test_read_holder_;
+  std::unique_ptr<char[]> scratch_;
+};
+
+TEST_P(ReadaheadRandomAccessFileTest, EmptySourceStr) {
+  ASSERT_EQ("", Read(0, 1));
+  ASSERT_EQ("", Read(0, 0));
+  ASSERT_EQ("", Read(13, 13));
+}
+
+TEST_P(ReadaheadRandomAccessFileTest, SourceStrLenLessThanReadaheadSize) {
+  std::string str = "abcdefghijklmnopqrs";
+  ResetSourceStr(str);
+  ASSERT_EQ(str.substr(3, 4), Read(3, 4));
+  ASSERT_EQ(str.substr(0, 3), Read(0, 3));
+  ASSERT_EQ(str, Read(0, str.size()));
+  ASSERT_EQ(str.substr(7, std::min(static_cast<int>(str.size()) - 7, 30)),
+            Read(7, 30));
+  ASSERT_EQ("", Read(100, 100));
+}
+
+TEST_P(ReadaheadRandomAccessFileTest, SourceStrLenGreaterThanReadaheadSize) {
+  Random rng(42);
+  for (int k = 0; k < 100; ++k) {
+    size_t strLen = k * GetReadaheadSize() +
+                    rng.Uniform(static_cast<int>(GetReadaheadSize()));
+    std::string str = rng.HumanReadableString(static_cast<int>(strLen));
+    ResetSourceStr(str);
+    for (int test = 1; test <= 100; ++test) {
+      size_t offset = rng.Uniform(static_cast<int>(strLen));
+      size_t n = rng.Uniform(static_cast<int>(GetReadaheadSize()));
+      ASSERT_EQ(str.substr(offset, std::min(n, strLen - offset)),
+                Read(offset, n));
+    }
+  }
+}
+
+TEST_P(ReadaheadRandomAccessFileTest, ReadExceedsReadaheadSize) {
+  Random rng(7);
+  size_t strLen = 4 * GetReadaheadSize() +
+                  rng.Uniform(static_cast<int>(GetReadaheadSize()));
+  std::string str = rng.HumanReadableString(static_cast<int>(strLen));
+  ResetSourceStr(str);
+  for (int test = 1; test <= 100; ++test) {
+    size_t offset = rng.Uniform(static_cast<int>(strLen));
+    size_t n =
+        GetReadaheadSize() + rng.Uniform(static_cast<int>(GetReadaheadSize()));
+    ASSERT_EQ(str.substr(offset, std::min(n, strLen - offset)),
+              Read(offset, n));
+  }
+}
+
+INSTANTIATE_TEST_CASE_P(
+    EmptySourceStr, ReadaheadRandomAccessFileTest,
+    ::testing::ValuesIn(ReadaheadRandomAccessFileTest::GetReadaheadSizeList()));
+INSTANTIATE_TEST_CASE_P(
+    SourceStrLenLessThanReadaheadSize, ReadaheadRandomAccessFileTest,
+    ::testing::ValuesIn(ReadaheadRandomAccessFileTest::GetReadaheadSizeList()));
+INSTANTIATE_TEST_CASE_P(
+    SourceStrLenGreaterThanReadaheadSize, ReadaheadRandomAccessFileTest,
+    ::testing::ValuesIn(ReadaheadRandomAccessFileTest::GetReadaheadSizeList()));
+INSTANTIATE_TEST_CASE_P(
+    ReadExceedsReadaheadSize, ReadaheadRandomAccessFileTest,
+    ::testing::ValuesIn(ReadaheadRandomAccessFileTest::GetReadaheadSizeList()));
+
+class ReadaheadSequentialFileTest : public testing::Test,
+                                    public testing::WithParamInterface<size_t> {
+ public:
+  static std::vector<size_t> GetReadaheadSizeList() {
+    return {1lu << 8, 1lu << 12, 1lu << 16, 1lu << 18};
+  }
+  void SetUp() override {
+    readahead_size_ = GetParam();
+    scratch_.reset(new char[2 * readahead_size_]);
+    ResetSourceStr();
+  }
+  ReadaheadSequentialFileTest() {}
+  std::string Read(size_t n) {
+    Slice result;
+    Status s = test_read_holder_->Read(
+        n, &result, scratch_.get(), Env::IO_TOTAL /* rate_limiter_priority*/);
+    EXPECT_TRUE(s.ok() || s.IsInvalidArgument());
+    return std::string(result.data(), result.size());
+  }
+  void Skip(size_t n) { test_read_holder_->Skip(n); }
+  void ResetSourceStr(const std::string& str = "") {
+    auto read_holder = std::unique_ptr<FSSequentialFile>(
+        new test::SeqStringSource(str, &seq_read_count_));
+    test_read_holder_.reset(new SequentialFileReader(std::move(read_holder),
+                                                     "test", readahead_size_));
+  }
+  size_t GetReadaheadSize() const { return readahead_size_; }
+
+ private:
+  size_t readahead_size_;
+  std::unique_ptr<SequentialFileReader> test_read_holder_;
+  std::unique_ptr<char[]> scratch_;
+  std::atomic<int> seq_read_count_;
+};
+
+TEST_P(ReadaheadSequentialFileTest, EmptySourceStr) {
+  ASSERT_EQ("", Read(0));
+  ASSERT_EQ("", Read(1));
+  ASSERT_EQ("", Read(13));
+}
+
+TEST_P(ReadaheadSequentialFileTest, SourceStrLenLessThanReadaheadSize) {
+  std::string str = "abcdefghijklmnopqrs";
+  ResetSourceStr(str);
+  ASSERT_EQ(str.substr(0, 3), Read(3));
+  ASSERT_EQ(str.substr(3, 1), Read(1));
+  ASSERT_EQ(str.substr(4), Read(str.size()));
+  ASSERT_EQ("", Read(100));
+}
+
+TEST_P(ReadaheadSequentialFileTest, SourceStrLenGreaterThanReadaheadSize) {
+  Random rng(42);
+  for (int s = 0; s < 1; ++s) {
+    for (int k = 0; k < 100; ++k) {
+      size_t strLen = k * GetReadaheadSize() +
+                      rng.Uniform(static_cast<int>(GetReadaheadSize()));
+      std::string str = rng.HumanReadableString(static_cast<int>(strLen));
+      ResetSourceStr(str);
+      size_t offset = 0;
+      for (int test = 1; test <= 100; ++test) {
+        size_t n = rng.Uniform(static_cast<int>(GetReadaheadSize()));
+        if (s && test % 2) {
+          Skip(n);
+        } else {
+          ASSERT_EQ(str.substr(offset, std::min(n, strLen - offset)), Read(n));
+        }
+        offset = std::min(offset + n, strLen);
+      }
+    }
+  }
+}
+
+TEST_P(ReadaheadSequentialFileTest, ReadExceedsReadaheadSize) {
+  Random rng(42);
+  for (int s = 0; s < 1; ++s) {
+    for (int k = 0; k < 100; ++k) {
+      size_t strLen = k * GetReadaheadSize() +
+                      rng.Uniform(static_cast<int>(GetReadaheadSize()));
+      std::string str = rng.HumanReadableString(static_cast<int>(strLen));
+      ResetSourceStr(str);
+      size_t offset = 0;
+      for (int test = 1; test <= 100; ++test) {
+        size_t n = GetReadaheadSize() +
+                   rng.Uniform(static_cast<int>(GetReadaheadSize()));
+        if (s && test % 2) {
+          Skip(n);
+        } else {
+          ASSERT_EQ(str.substr(offset, std::min(n, strLen - offset)), Read(n));
+        }
+        offset = std::min(offset + n, strLen);
+      }
+    }
+  }
+}
+
+INSTANTIATE_TEST_CASE_P(
+    EmptySourceStr, ReadaheadSequentialFileTest,
+    ::testing::ValuesIn(ReadaheadSequentialFileTest::GetReadaheadSizeList()));
+INSTANTIATE_TEST_CASE_P(
+    SourceStrLenLessThanReadaheadSize, ReadaheadSequentialFileTest,
+    ::testing::ValuesIn(ReadaheadSequentialFileTest::GetReadaheadSizeList()));
+INSTANTIATE_TEST_CASE_P(
+    SourceStrLenGreaterThanReadaheadSize, ReadaheadSequentialFileTest,
+    ::testing::ValuesIn(ReadaheadSequentialFileTest::GetReadaheadSizeList()));
+INSTANTIATE_TEST_CASE_P(
+    ReadExceedsReadaheadSize, ReadaheadSequentialFileTest,
+    ::testing::ValuesIn(ReadaheadSequentialFileTest::GetReadaheadSizeList()));
+
+namespace {
+std::string GenerateLine(int n) {
+  std::string rv;
+  // Multiples of 17 characters per line, for likely bad buffer alignment
+  for (int i = 0; i < n; ++i) {
+    rv.push_back(static_cast<char>('0' + (i % 10)));
+    rv.append("xxxxxxxxxxxxxxxx");
+  }
+  return rv;
+}
+}  // namespace
+
+TEST(LineFileReaderTest, LineFileReaderTest) {
+  const int nlines = 1000;
+
+  std::unique_ptr<Env> mem_env(MockEnv::Create(Env::Default()));
+  std::shared_ptr<FileSystem> fs = mem_env->GetFileSystem();
+  // Create an input file
+  {
+    std::unique_ptr<FSWritableFile> file;
+    ASSERT_OK(
+        fs->NewWritableFile("testfile", FileOptions(), &file, /*dbg*/ nullptr));
+
+    for (int i = 0; i < nlines; ++i) {
+      std::string line = GenerateLine(i);
+      line.push_back('\n');
+      ASSERT_OK(file->Append(line, IOOptions(), /*dbg*/ nullptr));
+    }
+  }
+
+  // Verify with no I/O errors
+  {
+    std::unique_ptr<LineFileReader> reader;
+    ASSERT_OK(LineFileReader::Create(fs, "testfile", FileOptions(), &reader,
+                                     nullptr /* dbg */,
+                                     nullptr /* rate_limiter */));
+    std::string line;
+    int count = 0;
+    while (reader->ReadLine(&line, Env::IO_TOTAL /* rate_limiter_priority */)) {
+      ASSERT_EQ(line, GenerateLine(count));
+      ++count;
+      ASSERT_EQ(static_cast<int>(reader->GetLineNumber()), count);
+    }
+    ASSERT_OK(reader->GetStatus());
+    ASSERT_EQ(count, nlines);
+    ASSERT_EQ(static_cast<int>(reader->GetLineNumber()), count);
+    // And still
+    ASSERT_FALSE(
+        reader->ReadLine(&line, Env::IO_TOTAL /* rate_limiter_priority */));
+    ASSERT_OK(reader->GetStatus());
+    ASSERT_EQ(static_cast<int>(reader->GetLineNumber()), count);
+  }
+
+  // Verify with injected I/O error
+  {
+    std::unique_ptr<LineFileReader> reader;
+    ASSERT_OK(LineFileReader::Create(fs, "testfile", FileOptions(), &reader,
+                                     nullptr /* dbg */,
+                                     nullptr /* rate_limiter */));
+    std::string line;
+    int count = 0;
+    // Read part way through the file
+    while (count < nlines / 4) {
+      ASSERT_TRUE(
+          reader->ReadLine(&line, Env::IO_TOTAL /* rate_limiter_priority */));
+      ASSERT_EQ(line, GenerateLine(count));
+      ++count;
+      ASSERT_EQ(static_cast<int>(reader->GetLineNumber()), count);
+    }
+    ASSERT_OK(reader->GetStatus());
+
+    // Inject error
+    int callback_count = 0;
+    SyncPoint::GetInstance()->SetCallBack(
+        "MemFile::Read:IOStatus", [&](void* arg) {
+          IOStatus* status = static_cast<IOStatus*>(arg);
+          *status = IOStatus::Corruption("test");
+          ++callback_count;
+        });
+    SyncPoint::GetInstance()->EnableProcessing();
+
+    while (reader->ReadLine(&line, Env::IO_TOTAL /* rate_limiter_priority */)) {
+      ASSERT_EQ(line, GenerateLine(count));
+      ++count;
+      ASSERT_EQ(static_cast<int>(reader->GetLineNumber()), count);
+    }
+    ASSERT_TRUE(reader->GetStatus().IsCorruption());
+    ASSERT_LT(count, nlines / 2);
+    ASSERT_EQ(callback_count, 1);
+
+    // Still get error & no retry
+    ASSERT_FALSE(
+        reader->ReadLine(&line, Env::IO_TOTAL /* rate_limiter_priority */));
+    ASSERT_TRUE(reader->GetStatus().IsCorruption());
+    ASSERT_EQ(callback_count, 1);
+
+    SyncPoint::GetInstance()->DisableProcessing();
+    SyncPoint::GetInstance()->ClearAllCallBacks();
+  }
+}
+
+#ifndef ROCKSDB_LITE
+class IOErrorEventListener : public EventListener {
+ public:
+  IOErrorEventListener() { notify_error_.store(0); }
+
+  void OnIOError(const IOErrorInfo& io_error_info) override {
+    notify_error_++;
+    EXPECT_FALSE(io_error_info.file_path.empty());
+    EXPECT_FALSE(io_error_info.io_status.ok());
+  }
+
+  size_t NotifyErrorCount() { return notify_error_; }
+
+  bool ShouldBeNotifiedOnFileIO() override { return true; }
+
+ private:
+  std::atomic<size_t> notify_error_;
+};
+
+TEST_F(DBWritableFileWriterTest, IOErrorNotification) {
+  class FakeWF : public FSWritableFile {
+   public:
+    explicit FakeWF() : io_error_(false) {
+      file_append_errors_.store(0);
+      file_flush_errors_.store(0);
+    }
+
+    using FSWritableFile::Append;
+    IOStatus Append(const Slice& /*data*/, const IOOptions& /*options*/,
+                    IODebugContext* /*dbg*/) override {
+      if (io_error_) {
+        file_append_errors_++;
+        return IOStatus::IOError("Fake IO error");
+      }
+      return IOStatus::OK();
+    }
+
+    using FSWritableFile::PositionedAppend;
+    IOStatus PositionedAppend(const Slice& /*data*/, uint64_t,
+                              const IOOptions& /*options*/,
+                              IODebugContext* /*dbg*/) override {
+      if (io_error_) {
+        return IOStatus::IOError("Fake IO error");
+      }
+      return IOStatus::OK();
+    }
+    IOStatus Close(const IOOptions& /*options*/,
+                   IODebugContext* /*dbg*/) override {
+      return IOStatus::OK();
+    }
+    IOStatus Flush(const IOOptions& /*options*/,
+                   IODebugContext* /*dbg*/) override {
+      if (io_error_) {
+        file_flush_errors_++;
+        return IOStatus::IOError("Fake IO error");
+      }
+      return IOStatus::OK();
+    }
+    IOStatus Sync(const IOOptions& /*options*/,
+                  IODebugContext* /*dbg*/) override {
+      return IOStatus::OK();
+    }
+
+    void SetIOError(bool val) { io_error_ = val; }
+
+    void CheckCounters(int file_append_errors, int file_flush_errors) {
+      ASSERT_EQ(file_append_errors, file_append_errors_);
+      ASSERT_EQ(file_flush_errors_, file_flush_errors);
+    }
+
+   protected:
+    bool io_error_;
+    std::atomic<size_t> file_append_errors_;
+    std::atomic<size_t> file_flush_errors_;
+  };
+
+  FileOptions file_options = FileOptions();
+  Options options = GetDefaultOptions();
+  options.create_if_missing = true;
+  IOErrorEventListener* listener = new IOErrorEventListener();
+  options.listeners.emplace_back(listener);
+
+  DestroyAndReopen(options);
+  ImmutableOptions ioptions(options);
+
+  std::string fname = dbname_ + "/test_file";
+  std::unique_ptr<FakeWF> writable_file_ptr(new FakeWF);
+
+  std::unique_ptr<WritableFileWriter> file_writer;
+  writable_file_ptr->SetIOError(true);
+
+  file_writer.reset(new WritableFileWriter(
+      std::move(writable_file_ptr), fname, file_options,
+      SystemClock::Default().get(), nullptr, ioptions.stats, ioptions.listeners,
+      ioptions.file_checksum_gen_factory.get(), true, true));
+
+  FakeWF* fwf = static_cast<FakeWF*>(file_writer->writable_file());
+
+  fwf->SetIOError(true);
+  ASSERT_NOK(file_writer->Append(std::string(2 * kMb, 'a')));
+  fwf->CheckCounters(1, 0);
+  ASSERT_EQ(listener->NotifyErrorCount(), 1);
+
+  file_writer->reset_seen_error();
+  fwf->SetIOError(true);
+  ASSERT_NOK(file_writer->Flush());
+  fwf->CheckCounters(1, 1);
+  ASSERT_EQ(listener->NotifyErrorCount(), 2);
+
+  /* No error generation */
+  file_writer->reset_seen_error();
+  fwf->SetIOError(false);
+  ASSERT_OK(file_writer->Append(std::string(2 * kMb, 'b')));
+  ASSERT_EQ(listener->NotifyErrorCount(), 2);
+  fwf->CheckCounters(1, 1);
+}
+#endif  // ROCKSDB_LITE
+
+class WritableFileWriterIOPriorityTest : public testing::Test {
+ protected:
+  // This test is to check whether the rate limiter priority can be passed
+  // correctly from WritableFileWriter functions to FSWritableFile functions.
+
+  void SetUp() override {
+    // When op_rate_limiter_priority parameter in WritableFileWriter functions
+    // is the default (Env::IO_TOTAL).
+    std::unique_ptr<FakeWF> wf{new FakeWF(Env::IO_HIGH)};
+    FileOptions file_options;
+    writer_.reset(new WritableFileWriter(std::move(wf), "" /* don't care */,
+                                         file_options));
+  }
+
+  class FakeWF : public FSWritableFile {
+   public:
+    explicit FakeWF(Env::IOPriority io_priority) { SetIOPriority(io_priority); }
+    ~FakeWF() override {}
+
+    IOStatus Append(const Slice& /*data*/, const IOOptions& options,
+                    IODebugContext* /*dbg*/) override {
+      EXPECT_EQ(options.rate_limiter_priority, io_priority_);
+      return IOStatus::OK();
+    }
+    IOStatus Append(const Slice& data, const IOOptions& options,
+                    const DataVerificationInfo& /* verification_info */,
+                    IODebugContext* dbg) override {
+      return Append(data, options, dbg);
+    }
+    IOStatus PositionedAppend(const Slice& /*data*/, uint64_t /*offset*/,
+                              const IOOptions& options,
+                              IODebugContext* /*dbg*/) override {
+      EXPECT_EQ(options.rate_limiter_priority, io_priority_);
+      return IOStatus::OK();
+    }
+    IOStatus PositionedAppend(
+        const Slice& /* data */, uint64_t /* offset */,
+        const IOOptions& options,
+        const DataVerificationInfo& /* verification_info */,
+        IODebugContext* /*dbg*/) override {
+      EXPECT_EQ(options.rate_limiter_priority, io_priority_);
+      return IOStatus::OK();
+    }
+    IOStatus Truncate(uint64_t /*size*/, const IOOptions& options,
+                      IODebugContext* /*dbg*/) override {
+      EXPECT_EQ(options.rate_limiter_priority, io_priority_);
+      return IOStatus::OK();
+    }
+    IOStatus Close(const IOOptions& options, IODebugContext* /*dbg*/) override {
+      EXPECT_EQ(options.rate_limiter_priority, io_priority_);
+      return IOStatus::OK();
+    }
+    IOStatus Flush(const IOOptions& options, IODebugContext* /*dbg*/) override {
+      EXPECT_EQ(options.rate_limiter_priority, io_priority_);
+      return IOStatus::OK();
+    }
+    IOStatus Sync(const IOOptions& options, IODebugContext* /*dbg*/) override {
+      EXPECT_EQ(options.rate_limiter_priority, io_priority_);
+      return IOStatus::OK();
+    }
+    IOStatus Fsync(const IOOptions& options, IODebugContext* /*dbg*/) override {
+      EXPECT_EQ(options.rate_limiter_priority, io_priority_);
+      return IOStatus::OK();
+    }
+    uint64_t GetFileSize(const IOOptions& options,
+                         IODebugContext* /*dbg*/) override {
+      EXPECT_EQ(options.rate_limiter_priority, io_priority_);
+      return 0;
+    }
+    void GetPreallocationStatus(size_t* /*block_size*/,
+                                size_t* /*last_allocated_block*/) override {}
+    size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const override {
+      return 0;
+    }
+    IOStatus InvalidateCache(size_t /*offset*/, size_t /*length*/) override {
+      return IOStatus::OK();
+    }
+
+    IOStatus Allocate(uint64_t /*offset*/, uint64_t /*len*/,
+                      const IOOptions& options,
+                      IODebugContext* /*dbg*/) override {
+      EXPECT_EQ(options.rate_limiter_priority, io_priority_);
+      return IOStatus::OK();
+    }
+    IOStatus RangeSync(uint64_t /*offset*/, uint64_t /*nbytes*/,
+                       const IOOptions& options,
+                       IODebugContext* /*dbg*/) override {
+      EXPECT_EQ(options.rate_limiter_priority, io_priority_);
+      return IOStatus::OK();
+    }
+
+    void PrepareWrite(size_t /*offset*/, size_t /*len*/,
+                      const IOOptions& options,
+                      IODebugContext* /*dbg*/) override {
+      EXPECT_EQ(options.rate_limiter_priority, io_priority_);
+    }
+
+    bool IsSyncThreadSafe() const override { return true; }
+  };
+
+  std::unique_ptr<WritableFileWriter> writer_;
+};
+
+TEST_F(WritableFileWriterIOPriorityTest, Append) {
+  ASSERT_OK(writer_->Append(Slice("abc")));
+}
+
+TEST_F(WritableFileWriterIOPriorityTest, Pad) { ASSERT_OK(writer_->Pad(500)); }
+
+TEST_F(WritableFileWriterIOPriorityTest, Flush) { ASSERT_OK(writer_->Flush()); }
+
+TEST_F(WritableFileWriterIOPriorityTest, Close) { ASSERT_OK(writer_->Close()); }
+
+TEST_F(WritableFileWriterIOPriorityTest, Sync) {
+  ASSERT_OK(writer_->Sync(false));
+  ASSERT_OK(writer_->Sync(true));
+}
+
+TEST_F(WritableFileWriterIOPriorityTest, SyncWithoutFlush) {
+  ASSERT_OK(writer_->SyncWithoutFlush(false));
+  ASSERT_OK(writer_->SyncWithoutFlush(true));
+}
+
+TEST_F(WritableFileWriterIOPriorityTest, BasicOp) {
+  EnvOptions env_options;
+  env_options.bytes_per_sync = kMb;
+  std::unique_ptr<FakeWF> wf(new FakeWF(Env::IO_HIGH));
+  std::unique_ptr<WritableFileWriter> writer(
+      new WritableFileWriter(std::move(wf), "" /* don't care */, env_options));
+  Random r(301);
+  Status s;
+  std::unique_ptr<char[]> large_buf(new char[10 * kMb]);
+  for (int i = 0; i < 1000; i++) {
+    int skew_limit = (i < 700) ? 10 : 15;
+    uint32_t num = r.Skewed(skew_limit) * 100 + r.Uniform(100);
+    s = writer->Append(Slice(large_buf.get(), num));
+    ASSERT_OK(s);
+
+    // Flush in a chance of 1/10.
+    if (r.Uniform(10) == 0) {
+      s = writer->Flush();
+      ASSERT_OK(s);
+    }
+  }
+  s = writer->Close();
+  ASSERT_OK(s);
+}
+}  // namespace ROCKSDB_NAMESPACE
+
+int main(int argc, char** argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/src/rocksdb/util/filelock_test.cc b/src/rocksdb/util/filelock_test.cc
new file mode 100644
index 000000000..69947a732
--- /dev/null
+++ b/src/rocksdb/util/filelock_test.cc
@@ -0,0 +1,148 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#include <fcntl.h>
+
+#include "rocksdb/env.h"
+#include "rocksdb/status.h"
+#ifdef __FreeBSD__
+#include <sys/types.h>
+#include <sys/wait.h>
+#endif
+#include <vector>
+
+#include "test_util/testharness.h"
+#include "util/coding.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class LockTest : public testing::Test {
+ public:
+  static LockTest* current_;
+  std::string file_;
+  ROCKSDB_NAMESPACE::Env* env_;
+
+  LockTest()
+      : file_(test::PerThreadDBPath("db_testlock_file")),
+        env_(ROCKSDB_NAMESPACE::Env::Default()) {
+    current_ = this;
+  }
+
+  ~LockTest() override {}
+
+  Status LockFile(FileLock** db_lock) { return env_->LockFile(file_, db_lock); }
+
+  Status UnlockFile(FileLock* db_lock) { return env_->UnlockFile(db_lock); }
+
+  bool AssertFileIsLocked() {
+    return CheckFileLock(/* lock_expected = */ true);
+  }
+
+  bool AssertFileIsNotLocked() {
+    return CheckFileLock(/* lock_expected = */ false);
+  }
+
+  bool CheckFileLock(bool lock_expected) {
+    // We need to fork to check the fcntl lock as we need
+    // to open and close the file from a different process
+    // to avoid either releasing the lock on close, or not
+    // contending for it when requesting a lock.
+
+#ifdef OS_WIN
+
+    // WaitForSingleObject and GetExitCodeProcess can do what waitpid does.
+    // TODO - implement on Windows
+    return true;
+
+#else
+
+    pid_t pid = fork();
+    if (0 == pid) {
+      // child process
+      int exit_val = EXIT_FAILURE;
+      int fd = open(file_.c_str(), O_RDWR | O_CREAT, 0644);
+      if (fd < 0) {
+        // could not open file, could not check if it was locked
+        fprintf(stderr, "Open on on file %s failed.\n", file_.c_str());
+        exit(exit_val);
+      }
+
+      struct flock f;
+      memset(&f, 0, sizeof(f));
+      f.l_type = (F_WRLCK);
+      f.l_whence = SEEK_SET;
+      f.l_start = 0;
+      f.l_len = 0;  // Lock/unlock entire file
+      int value = fcntl(fd, F_SETLK, &f);
+      if (value == -1) {
+        if (lock_expected) {
+          exit_val = EXIT_SUCCESS;
+        }
+      } else {
+        if (!lock_expected) {
+          exit_val = EXIT_SUCCESS;
+        }
+      }
+      close(fd);  // lock is released for child process
+      exit(exit_val);
+    } else if (pid > 0) {
+      // parent process
+      int status;
+      while (-1 == waitpid(pid, &status, 0))
+        ;
+      if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+        // child process exited with non success status
+        return false;
+      } else {
+        return true;
+      }
+    } else {
+      fprintf(stderr, "Fork failed\n");
+      return false;
+    }
+    return false;
+
+#endif
+  }
+};
+LockTest* LockTest::current_;
+
+TEST_F(LockTest, LockBySameThread) {
+  FileLock* lock1;
+  FileLock* lock2;
+
+  // acquire a lock on a file
+  ASSERT_OK(LockFile(&lock1));
+
+  // check the file is locked
+  ASSERT_TRUE(AssertFileIsLocked());
+
+  // re-acquire the lock on the same file. This should fail.
+  Status s = LockFile(&lock2);
+  ASSERT_TRUE(s.IsIOError());
+#ifndef OS_WIN
+  // Validate that error message contains current thread ID.
+  ASSERT_TRUE(s.ToString().find(std::to_string(
+                  Env::Default()->GetThreadID())) != std::string::npos);
+#endif
+
+  // check the file is locked
+  ASSERT_TRUE(AssertFileIsLocked());
+
+  // release the lock
+  ASSERT_OK(UnlockFile(lock1));
+
+  // check the file is not locked
+  ASSERT_TRUE(AssertFileIsNotLocked());
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
+int main(int argc, char** argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/src/rocksdb/util/filter_bench.cc b/src/rocksdb/util/filter_bench.cc
new file mode 100644
index 000000000..93186cd08
--- /dev/null
+++ b/src/rocksdb/util/filter_bench.cc
@@ -0,0 +1,840 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#if !defined(GFLAGS) || defined(ROCKSDB_LITE)
+#include <cstdio>
+int main() {
+  fprintf(stderr, "filter_bench requires gflags and !ROCKSDB_LITE\n");
+  return 1;
+}
+#else
+
+#include <cinttypes>
+#include <iostream>
+#include <sstream>
+#include <vector>
+
+#include "memory/arena.h"
+#include "port/port.h"
+#include "port/stack_trace.h"
+#include "rocksdb/cache.h"
+#include "rocksdb/env.h"
+#include "rocksdb/system_clock.h"
+#include "rocksdb/table.h"
+#include "table/block_based/filter_policy_internal.h"
+#include "table/block_based/full_filter_block.h"
+#include "table/block_based/mock_block_based_table.h"
+#include "table/plain/plain_table_bloom.h"
+#include "util/cast_util.h"
+#include "util/gflags_compat.h"
+#include "util/hash.h"
+#include "util/random.h"
+#include "util/stderr_logger.h"
+#include "util/stop_watch.h"
+#include "util/string_util.h"
+
+using GFLAGS_NAMESPACE::ParseCommandLineFlags;
+using GFLAGS_NAMESPACE::RegisterFlagValidator;
+using GFLAGS_NAMESPACE::SetUsageMessage;
+
+DEFINE_uint32(seed, 0, "Seed for random number generators");
+
+DEFINE_double(working_mem_size_mb, 200,
+              "MB of memory to get up to among all filters, unless "
+              "m_keys_total_max is specified.");
+
+DEFINE_uint32(average_keys_per_filter, 10000,
+              "Average number of keys per filter");
+
+DEFINE_double(vary_key_count_ratio, 0.4,
+              "Vary number of keys by up to +/- vary_key_count_ratio * "
+              "average_keys_per_filter.");
+
+DEFINE_uint32(key_size, 24, "Average number of bytes for each key");
+
+DEFINE_bool(vary_key_alignment, true,
+            "Whether to vary key alignment (default: at least 32-bit "
+            "alignment)");
+
+DEFINE_uint32(vary_key_size_log2_interval, 5,
+              "Use same key size 2^n times, then change. Key size varies from "
+              "-2 to +2 bytes vs. average, unless n>=30 to fix key size.");
+
+DEFINE_uint32(batch_size, 8, "Number of keys to group in each batch");
+
+DEFINE_double(bits_per_key, 10.0, "Bits per key setting for filters");
+
+DEFINE_double(m_queries, 200, "Millions of queries for each test mode");
+
+DEFINE_double(m_keys_total_max, 0,
+              "Maximum total keys added to filters, in millions. "
+              "0 (default) disables. Non-zero overrides working_mem_size_mb "
+              "option.");
+
+DEFINE_bool(use_full_block_reader, false,
+            "Use FullFilterBlockReader interface rather than FilterBitsReader");
+
+DEFINE_bool(use_plain_table_bloom, false,
+            "Use PlainTableBloom structure and interface rather than "
+            "FilterBitsReader/FullFilterBlockReader");
+
+DEFINE_bool(new_builder, false,
+            "Whether to create a new builder for each new filter");
+
+DEFINE_uint32(impl, 0,
+              "Select filter implementation. Without -use_plain_table_bloom:"
+              "0 = legacy full Bloom filter, "
+              "1 = format_version 5 Bloom filter, 2 = Ribbon128 filter. With "
+              "-use_plain_table_bloom: 0 = no locality, 1 = locality.");
+
+DEFINE_bool(net_includes_hashing, false,
+            "Whether query net ns/op times should include hashing. "
+            "(if not, dry run will include hashing) "
+            "(build times always include hashing)");
+
+DEFINE_bool(optimize_filters_for_memory, false,
+            "Setting for BlockBasedTableOptions::optimize_filters_for_memory");
+
+DEFINE_bool(detect_filter_construct_corruption, false,
+            "Setting for "
+            "BlockBasedTableOptions::detect_filter_construct_corruption");
+
+DEFINE_uint32(block_cache_capacity_MB, 8,
+              "Setting for "
+              "LRUCacheOptions::capacity");
+
+DEFINE_bool(charge_filter_construction, false,
+            "Setting for "
+            "CacheEntryRoleOptions::charged of"
+            "CacheEntryRole::kFilterConstruction");
+
+DEFINE_bool(strict_capacity_limit, false,
+            "Setting for "
+            "LRUCacheOptions::strict_capacity_limit");
+
+DEFINE_bool(quick, false, "Run more limited set of tests, fewer queries");
+
+DEFINE_bool(best_case, false, "Run limited tests only for best-case");
+
+DEFINE_bool(allow_bad_fp_rate, false, "Continue even if FP rate is bad");
+
+DEFINE_bool(legend, false,
+            "Print more information about interpreting results instead of "
+            "running tests");
+
+DEFINE_uint32(runs, 1, "Number of times to rebuild and run benchmark tests");
+
+void _always_assert_fail(int line, const char *file, const char *expr) {
+  fprintf(stderr, "%s: %d: Assertion %s failed\n", file, line, expr);
+  abort();
+}
+
+#define ALWAYS_ASSERT(cond) \
+  ((cond) ? (void)0 : ::_always_assert_fail(__LINE__, __FILE__, #cond))
+
+#ifndef NDEBUG
+// This could affect build times enough that we should not include it for
+// accurate speed tests
+#define PREDICT_FP_RATE
+#endif
+
+using ROCKSDB_NAMESPACE::Arena;
+using ROCKSDB_NAMESPACE::BlockContents;
+using ROCKSDB_NAMESPACE::BloomFilterPolicy;
+using ROCKSDB_NAMESPACE::BloomHash;
+using ROCKSDB_NAMESPACE::BloomLikeFilterPolicy;
+using ROCKSDB_NAMESPACE::BuiltinFilterBitsBuilder;
+using ROCKSDB_NAMESPACE::CachableEntry;
+using ROCKSDB_NAMESPACE::Cache;
+using ROCKSDB_NAMESPACE::CacheEntryRole;
+using ROCKSDB_NAMESPACE::CacheEntryRoleOptions;
+using ROCKSDB_NAMESPACE::EncodeFixed32;
+using ROCKSDB_NAMESPACE::Env;
+using ROCKSDB_NAMESPACE::FastRange32;
+using ROCKSDB_NAMESPACE::FilterBitsReader;
+using ROCKSDB_NAMESPACE::FilterBuildingContext;
+using ROCKSDB_NAMESPACE::FilterPolicy;
+using ROCKSDB_NAMESPACE::FullFilterBlockReader;
+using ROCKSDB_NAMESPACE::GetSliceHash;
+using ROCKSDB_NAMESPACE::GetSliceHash64;
+using ROCKSDB_NAMESPACE::Lower32of64;
+using ROCKSDB_NAMESPACE::LRUCacheOptions;
+using ROCKSDB_NAMESPACE::ParsedFullFilterBlock;
+using ROCKSDB_NAMESPACE::PlainTableBloomV1;
+using ROCKSDB_NAMESPACE::Random32;
+using ROCKSDB_NAMESPACE::Slice;
+using ROCKSDB_NAMESPACE::static_cast_with_check;
+using ROCKSDB_NAMESPACE::Status;
+using ROCKSDB_NAMESPACE::StderrLogger;
+using ROCKSDB_NAMESPACE::mock::MockBlockBasedTableTester;
+
+struct KeyMaker {
+  KeyMaker(size_t avg_size)
+      : smallest_size_(avg_size -
+                       (FLAGS_vary_key_size_log2_interval >= 30 ? 2 : 0)),
+        buf_size_(avg_size + 11),  // pad to vary key size and alignment
+        buf_(new char[buf_size_]) {
+    memset(buf_.get(), 0, buf_size_);
+    assert(smallest_size_ > 8);
+  }
+  size_t smallest_size_;
+  size_t buf_size_;
+  std::unique_ptr<char[]> buf_;
+
+  // Returns a unique(-ish) key based on the given parameter values. Each
+  // call returns a Slice from the same buffer so previously returned
+  // Slices should be considered invalidated.
+  Slice Get(uint32_t filter_num, uint32_t val_num) {
+    size_t start = FLAGS_vary_key_alignment ? val_num % 4 : 0;
+    size_t len = smallest_size_;
+    if (FLAGS_vary_key_size_log2_interval < 30) {
+      // To get range [avg_size - 2, avg_size + 2]
+      // use range [smallest_size, smallest_size + 4]
+      len += FastRange32(
+          (val_num >> FLAGS_vary_key_size_log2_interval) * 1234567891, 5);
+    }
+    char *data = buf_.get() + start;
+    // Populate key data such that all data makes it into a key of at
+    // least 8 bytes. We also don't want all the within-filter key
+    // variance confined to a contiguous 32 bits, because then a 32 bit
+    // hash function can "cheat" the false positive rate by
+    // approximating a perfect hash.
+    EncodeFixed32(data, val_num);
+    EncodeFixed32(data + 4, filter_num + val_num);
+    // ensure clearing leftovers from different alignment
+    EncodeFixed32(data + 8, 0);
+    return Slice(data, len);
+  }
+};
+
+void PrintWarnings() {
+#if defined(__GNUC__) && !defined(__OPTIMIZE__)
+  fprintf(stdout,
+          "WARNING: Optimization is disabled: benchmarks unnecessarily slow\n");
+#endif
+#ifndef NDEBUG
+  fprintf(stdout,
+          "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n");
+#endif
+}
+
+void PrintError(const char *error) { fprintf(stderr, "ERROR: %s\n", error); }
+
+struct FilterInfo {
+  uint32_t filter_id_ = 0;
+  std::unique_ptr<const char[]> owner_;
+  Slice filter_;
+  Status filter_construction_status = Status::OK();
+  uint32_t keys_added_ = 0;
+  std::unique_ptr<FilterBitsReader> reader_;
+  std::unique_ptr<FullFilterBlockReader> full_block_reader_;
+  std::unique_ptr<PlainTableBloomV1> plain_table_bloom_;
+  uint64_t outside_queries_ = 0;
+  uint64_t false_positives_ = 0;
+};
+
+enum TestMode {
+  kSingleFilter,
+  kBatchPrepared,
+  kBatchUnprepared,
+  kFiftyOneFilter,
+  kEightyTwentyFilter,
+  kRandomFilter,
+};
+
+static const std::vector<TestMode> allTestModes = {
+    kSingleFilter,   kBatchPrepared,      kBatchUnprepared,
+    kFiftyOneFilter, kEightyTwentyFilter, kRandomFilter,
+};
+
+static const std::vector<TestMode> quickTestModes = {
+    kSingleFilter,
+    kRandomFilter,
+};
+
+static const std::vector<TestMode> bestCaseTestModes = {
+    kSingleFilter,
+};
+
+const char *TestModeToString(TestMode tm) {
+  switch (tm) {
+    case kSingleFilter:
+      return "Single filter";
+    case kBatchPrepared:
+      return "Batched, prepared";
+    case kBatchUnprepared:
+      return "Batched, unprepared";
+    case kFiftyOneFilter:
+      return "Skewed 50% in 1%";
+    case kEightyTwentyFilter:
+      return "Skewed 80% in 20%";
+    case kRandomFilter:
+      return "Random filter";
+  }
+  return "Bad TestMode";
+}
+
+// Do just enough to keep some data dependence for the
+// compiler / CPU
+static uint32_t DryRunNoHash(Slice &s) {
+  uint32_t sz = static_cast<uint32_t>(s.size());
+  if (sz >= 4) {
+    return sz + s.data()[3];
+  } else {
+    return sz;
+  }
+}
+
+static uint32_t DryRunHash32(Slice &s) {
+  // Same perf characteristics as GetSliceHash()
+  return BloomHash(s);
+}
+
+static uint32_t DryRunHash64(Slice &s) {
+  return Lower32of64(GetSliceHash64(s));
+}
+
+const std::shared_ptr<const FilterPolicy> &GetPolicy() {
+  static std::shared_ptr<const FilterPolicy> policy;
+  if (!policy) {
+    policy = BloomLikeFilterPolicy::Create(
+        BloomLikeFilterPolicy::GetAllFixedImpls().at(FLAGS_impl),
+        FLAGS_bits_per_key);
+  }
+  return policy;
+}
+
+struct FilterBench : public MockBlockBasedTableTester {
+  std::vector<KeyMaker> kms_;
+  std::vector<FilterInfo> infos_;
+  Random32 random_;
+  std::ostringstream fp_rate_report_;
+  Arena arena_;
+  double m_queries_;
+  StderrLogger stderr_logger_;
+
+  FilterBench()
+      : MockBlockBasedTableTester(GetPolicy()),
+        random_(FLAGS_seed),
+        m_queries_(0) {
+    for (uint32_t i = 0; i < FLAGS_batch_size; ++i) {
+      kms_.emplace_back(FLAGS_key_size < 8 ? 8 : FLAGS_key_size);
+    }
+    ioptions_.logger = &stderr_logger_;
+    table_options_.optimize_filters_for_memory =
+        FLAGS_optimize_filters_for_memory;
+    table_options_.detect_filter_construct_corruption =
+        FLAGS_detect_filter_construct_corruption;
+    table_options_.cache_usage_options.options_overrides.insert(
+        {CacheEntryRole::kFilterConstruction,
+         {/*.charged = */ FLAGS_charge_filter_construction
+              ? CacheEntryRoleOptions::Decision::kEnabled
+              : CacheEntryRoleOptions::Decision::kDisabled}});
+    if (FLAGS_charge_filter_construction) {
+      table_options_.no_block_cache = false;
+      LRUCacheOptions lo;
+      lo.capacity = FLAGS_block_cache_capacity_MB * 1024 * 1024;
+      lo.num_shard_bits = 0;  // 2^0 shard
+      lo.strict_capacity_limit = FLAGS_strict_capacity_limit;
+      std::shared_ptr<Cache> cache(NewLRUCache(lo));
+      table_options_.block_cache = cache;
+    }
+  }
+
+  void Go();
+
+  double RandomQueryTest(uint32_t inside_threshold, bool dry_run,
+                         TestMode mode);
+};
+
+void FilterBench::Go() {
+  if (FLAGS_use_plain_table_bloom && FLAGS_use_full_block_reader) {
+    throw std::runtime_error(
+        "Can't combine -use_plain_table_bloom and -use_full_block_reader");
+  }
+  if (FLAGS_use_plain_table_bloom) {
+    if (FLAGS_impl > 1) {
+      throw std::runtime_error(
+          "-impl must currently be >= 0 and <= 1 for Plain table");
+    }
+  } else {
+    if (FLAGS_impl > 2) {
+      throw std::runtime_error(
+          "-impl must currently be >= 0 and <= 2 for Block-based table");
+    }
+  }
+
+  if (FLAGS_vary_key_count_ratio < 0.0 || FLAGS_vary_key_count_ratio > 1.0) {
+    throw std::runtime_error("-vary_key_count_ratio must be >= 0.0 and <= 1.0");
+  }
+
+  // For example, average_keys_per_filter = 100, vary_key_count_ratio = 0.1.
+  // Varys up to +/- 10 keys. variance_range = 21 (generating value 0..20).
+  // variance_offset = 10, so value - offset average value is always 0.
+  const uint32_t variance_range =
+      1 + 2 * static_cast<uint32_t>(FLAGS_vary_key_count_ratio *
+                                    FLAGS_average_keys_per_filter);
+  const uint32_t variance_offset = variance_range / 2;
+
+  const std::vector<TestMode> &testModes = FLAGS_best_case ? bestCaseTestModes
+                                           : FLAGS_quick   ? quickTestModes
+                                                           : allTestModes;
+
+  m_queries_ = FLAGS_m_queries;
+  double working_mem_size_mb = FLAGS_working_mem_size_mb;
+  if (FLAGS_quick) {
+    m_queries_ /= 7.0;
+  } else if (FLAGS_best_case) {
+    m_queries_ /= 3.0;
+    working_mem_size_mb /= 10.0;
+  }
+
+  std::cout << "Building..." << std::endl;
+
+  std::unique_ptr<BuiltinFilterBitsBuilder> builder;
+
+  size_t total_memory_used = 0;
+  size_t total_size = 0;
+  size_t total_keys_added = 0;
+#ifdef PREDICT_FP_RATE
+  double weighted_predicted_fp_rate = 0.0;
+#endif
+  size_t max_total_keys;
+  size_t max_mem;
+  if (FLAGS_m_keys_total_max > 0) {
+    max_total_keys = static_cast<size_t>(1000000 * FLAGS_m_keys_total_max);
+    max_mem = SIZE_MAX;
+  } else {
+    max_total_keys = SIZE_MAX;
+    max_mem = static_cast<size_t>(1024 * 1024 * working_mem_size_mb);
+  }
+
+  ROCKSDB_NAMESPACE::StopWatchNano timer(
+      ROCKSDB_NAMESPACE::SystemClock::Default().get(), true);
+
+  infos_.clear();
+  while ((working_mem_size_mb == 0 || total_size < max_mem) &&
+         total_keys_added < max_total_keys) {
+    uint32_t filter_id = random_.Next();
+    uint32_t keys_to_add = FLAGS_average_keys_per_filter +
+                           FastRange32(random_.Next(), variance_range) -
+                           variance_offset;
+    if (max_total_keys - total_keys_added < keys_to_add) {
+      keys_to_add = static_cast<uint32_t>(max_total_keys - total_keys_added);
+    }
+    infos_.emplace_back();
+    FilterInfo &info = infos_.back();
+    info.filter_id_ = filter_id;
+    info.keys_added_ = keys_to_add;
+    if (FLAGS_use_plain_table_bloom) {
+      info.plain_table_bloom_.reset(new PlainTableBloomV1());
+      info.plain_table_bloom_->SetTotalBits(
+          &arena_, static_cast<uint32_t>(keys_to_add * FLAGS_bits_per_key),
+          FLAGS_impl, 0 /*huge_page*/, nullptr /*logger*/);
+      for (uint32_t i = 0; i < keys_to_add; ++i) {
+        uint32_t hash = GetSliceHash(kms_[0].Get(filter_id, i));
+        info.plain_table_bloom_->AddHash(hash);
+      }
+      info.filter_ = info.plain_table_bloom_->GetRawData();
+    } else {
+      if (!builder) {
+        builder.reset(
+            static_cast_with_check<BuiltinFilterBitsBuilder>(GetBuilder()));
+      }
+      for (uint32_t i = 0; i < keys_to_add; ++i) {
+        builder->AddKey(kms_[0].Get(filter_id, i));
+      }
+      info.filter_ =
+          builder->Finish(&info.owner_, &info.filter_construction_status);
+      if (info.filter_construction_status.ok()) {
+        info.filter_construction_status =
+            builder->MaybePostVerify(info.filter_);
+      }
+      if (!info.filter_construction_status.ok()) {
+        PrintError(info.filter_construction_status.ToString().c_str());
+      }
+#ifdef PREDICT_FP_RATE
+      weighted_predicted_fp_rate +=
+          keys_to_add *
+          builder->EstimatedFpRate(keys_to_add, info.filter_.size());
+#endif
+      if (FLAGS_new_builder) {
+        builder.reset();
+      }
+      info.reader_.reset(
+          table_options_.filter_policy->GetFilterBitsReader(info.filter_));
+      CachableEntry<ParsedFullFilterBlock> block(
+          new ParsedFullFilterBlock(table_options_.filter_policy.get(),
+                                    BlockContents(info.filter_)),
+          nullptr /* cache */, nullptr /* cache_handle */,
+          true /* own_value */);
+      info.full_block_reader_.reset(
+          new FullFilterBlockReader(table_.get(), std::move(block)));
+    }
+    total_size += info.filter_.size();
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+    total_memory_used +=
+        malloc_usable_size(const_cast<char *>(info.filter_.data()));
+#endif  // ROCKSDB_MALLOC_USABLE_SIZE
+    total_keys_added += keys_to_add;
+  }
+
+  uint64_t elapsed_nanos = timer.ElapsedNanos();
+  double ns = double(elapsed_nanos) / total_keys_added;
+  std::cout << "Build avg ns/key: " << ns << std::endl;
+  std::cout << "Number of filters: " << infos_.size() << std::endl;
+  std::cout << "Total size (MB): " << total_size / 1024.0 / 1024.0 << std::endl;
+  if (total_memory_used > 0) {
+    std::cout << "Reported total allocated memory (MB): "
+              << total_memory_used / 1024.0 / 1024.0 << std::endl;
+    std::cout << "Reported internal fragmentation: "
+              << (total_memory_used - total_size) * 100.0 / total_size << "%"
+              << std::endl;
+  }
+
+  double bpk = total_size * 8.0 / total_keys_added;
+  std::cout << "Bits/key stored: " << bpk << std::endl;
+#ifdef PREDICT_FP_RATE
+  std::cout << "Predicted FP rate %: "
+            << 100.0 * (weighted_predicted_fp_rate / total_keys_added)
+            << std::endl;
+#endif
+  if (!FLAGS_quick && !FLAGS_best_case) {
+    double tolerable_rate = std::pow(2.0, -(bpk - 1.0) / (1.4 + bpk / 50.0));
+    std::cout << "Best possible FP rate %: " << 100.0 * std::pow(2.0, -bpk)
+              << std::endl;
+    std::cout << "Tolerable FP rate %: " << 100.0 * tolerable_rate << std::endl;
+
+    std::cout << "----------------------------" << std::endl;
+    std::cout << "Verifying..." << std::endl;
+
+    uint32_t outside_q_per_f =
+        static_cast<uint32_t>(m_queries_ * 1000000 / infos_.size());
+    uint64_t fps = 0;
+    for (uint32_t i = 0; i < infos_.size(); ++i) {
+      FilterInfo &info = infos_[i];
+      for (uint32_t j = 0; j < info.keys_added_; ++j) {
+        if (FLAGS_use_plain_table_bloom) {
+          uint32_t hash = GetSliceHash(kms_[0].Get(info.filter_id_, j));
+          ALWAYS_ASSERT(info.plain_table_bloom_->MayContainHash(hash));
+        } else {
+          ALWAYS_ASSERT(
+              info.reader_->MayMatch(kms_[0].Get(info.filter_id_, j)));
+        }
+      }
+      for (uint32_t j = 0; j < outside_q_per_f; ++j) {
+        if (FLAGS_use_plain_table_bloom) {
+          uint32_t hash =
+              GetSliceHash(kms_[0].Get(info.filter_id_, j | 0x80000000));
+          fps += info.plain_table_bloom_->MayContainHash(hash);
+        } else {
+          fps += info.reader_->MayMatch(
+              kms_[0].Get(info.filter_id_, j | 0x80000000));
+        }
+      }
+    }
+    std::cout << " No FNs :)" << std::endl;
+    double prelim_rate = double(fps) / outside_q_per_f / infos_.size();
+    std::cout << " Prelim FP rate %: " << (100.0 * prelim_rate) << std::endl;
+
+    if (!FLAGS_allow_bad_fp_rate) {
+      ALWAYS_ASSERT(prelim_rate < tolerable_rate);
+    }
+  }
+
+  std::cout << "----------------------------" << std::endl;
+  std::cout << "Mixed inside/outside queries..." << std::endl;
+  // 50% each inside and outside
+  uint32_t inside_threshold = UINT32_MAX / 2;
+  for (TestMode tm : testModes) {
+    random_.Seed(FLAGS_seed + 1);
+    double f = RandomQueryTest(inside_threshold, /*dry_run*/ false, tm);
+    random_.Seed(FLAGS_seed + 1);
+    double d = RandomQueryTest(inside_threshold, /*dry_run*/ true, tm);
+    std::cout << "  " << TestModeToString(tm) << " net ns/op: " << (f - d)
+              << std::endl;
+  }
+
+  if (!FLAGS_quick) {
+    std::cout << "----------------------------" << std::endl;
+    std::cout << "Inside queries (mostly)..." << std::endl;
+    // Do about 95% inside queries rather than 100% so that branch predictor
+    // can't give itself an artifically crazy advantage.
+    inside_threshold = UINT32_MAX / 20 * 19;
+    for (TestMode tm : testModes) {
+      random_.Seed(FLAGS_seed + 1);
+      double f = RandomQueryTest(inside_threshold, /*dry_run*/ false, tm);
+      random_.Seed(FLAGS_seed + 1);
+      double d = RandomQueryTest(inside_threshold, /*dry_run*/ true, tm);
+      std::cout << "  " << TestModeToString(tm) << " net ns/op: " << (f - d)
+                << std::endl;
+    }
+
+    std::cout << "----------------------------" << std::endl;
+    std::cout << "Outside queries (mostly)..." << std::endl;
+    // Do about 95% outside queries rather than 100% so that branch predictor
+    // can't give itself an artifically crazy advantage.
+    inside_threshold = UINT32_MAX / 20;
+    for (TestMode tm : testModes) {
+      random_.Seed(FLAGS_seed + 2);
+      double f = RandomQueryTest(inside_threshold, /*dry_run*/ false, tm);
+      random_.Seed(FLAGS_seed + 2);
+      double d = RandomQueryTest(inside_threshold, /*dry_run*/ true, tm);
+      std::cout << "  " << TestModeToString(tm) << " net ns/op: " << (f - d)
+                << std::endl;
+    }
+  }
+  std::cout << fp_rate_report_.str();
+
+  std::cout << "----------------------------" << std::endl;
+  std::cout << "Done. (For more info, run with -legend or -help.)" << std::endl;
+}
+
+double FilterBench::RandomQueryTest(uint32_t inside_threshold, bool dry_run,
+                                    TestMode mode) {
+  for (auto &info : infos_) {
+    info.outside_queries_ = 0;
+    info.false_positives_ = 0;
+  }
+
+  auto dry_run_hash_fn = DryRunNoHash;
+  if (!FLAGS_net_includes_hashing) {
+    if (FLAGS_impl == 0 || FLAGS_use_plain_table_bloom) {
+      dry_run_hash_fn = DryRunHash32;
+    } else {
+      dry_run_hash_fn = DryRunHash64;
+    }
+  }
+
+  uint32_t num_infos = static_cast<uint32_t>(infos_.size());
+  uint32_t dry_run_hash = 0;
+  uint64_t max_queries = static_cast<uint64_t>(m_queries_ * 1000000 + 0.50);
+  // Some filters may be considered secondary in order to implement skewed
+  // queries. num_primary_filters is the number that are to be treated as
+  // equal, and any remainder will be treated as secondary.
+  uint32_t num_primary_filters = num_infos;
+  // The proportion (when divided by 2^32 - 1) of filter queries going to
+  // the primary filters (default = all). The remainder of queries are
+  // against secondary filters.
+  uint32_t primary_filter_threshold = 0xffffffff;
+  if (mode == kSingleFilter) {
+    // 100% of queries to 1 filter
+    num_primary_filters = 1;
+  } else if (mode == kFiftyOneFilter) {
+    if (num_infos < 50) {
+      return 0.0;  // skip
+    }
+    // 50% of queries
+    primary_filter_threshold /= 2;
+    // to 1% of filters
+    num_primary_filters = (num_primary_filters + 99) / 100;
+  } else if (mode == kEightyTwentyFilter) {
+    if (num_infos < 5) {
+      return 0.0;  // skip
+    }
+    // 80% of queries
+    primary_filter_threshold = primary_filter_threshold / 5 * 4;
+    // to 20% of filters
+    num_primary_filters = (num_primary_filters + 4) / 5;
+  } else if (mode == kRandomFilter) {
+    if (num_infos == 1) {
+      return 0.0;  // skip
+    }
+  }
+  uint32_t batch_size = 1;
+  std::unique_ptr<Slice[]> batch_slices;
+  std::unique_ptr<Slice *[]> batch_slice_ptrs;
+  std::unique_ptr<bool[]> batch_results;
+  if (mode == kBatchPrepared || mode == kBatchUnprepared) {
+    batch_size = static_cast<uint32_t>(kms_.size());
+  }
+
+  batch_slices.reset(new Slice[batch_size]);
+  batch_slice_ptrs.reset(new Slice *[batch_size]);
+  batch_results.reset(new bool[batch_size]);
+  for (uint32_t i = 0; i < batch_size; ++i) {
+    batch_results[i] = false;
+    batch_slice_ptrs[i] = &batch_slices[i];
+  }
+
+  ROCKSDB_NAMESPACE::StopWatchNano timer(
+      ROCKSDB_NAMESPACE::SystemClock::Default().get(), true);
+
+  for (uint64_t q = 0; q < max_queries; q += batch_size) {
+    bool inside_this_time = random_.Next() <= inside_threshold;
+
+    uint32_t filter_index;
+    if (random_.Next() <= primary_filter_threshold) {
+      filter_index = random_.Uniformish(num_primary_filters);
+    } else {
+      // secondary
+      filter_index = num_primary_filters +
+                     random_.Uniformish(num_infos - num_primary_filters);
+    }
+    FilterInfo &info = infos_[filter_index];
+    for (uint32_t i = 0; i < batch_size; ++i) {
+      if (inside_this_time) {
+        batch_slices[i] =
+            kms_[i].Get(info.filter_id_, random_.Uniformish(info.keys_added_));
+      } else {
+        batch_slices[i] =
+            kms_[i].Get(info.filter_id_, random_.Uniformish(info.keys_added_) |
+                                             uint32_t{0x80000000});
+        info.outside_queries_++;
+      }
+    }
+    // TODO: implement batched interface to full block reader
+    // TODO: implement batched interface to plain table bloom
+    if (mode == kBatchPrepared && !FLAGS_use_full_block_reader &&
+        !FLAGS_use_plain_table_bloom) {
+      for (uint32_t i = 0; i < batch_size; ++i) {
+        batch_results[i] = false;
+      }
+      if (dry_run) {
+        for (uint32_t i = 0; i < batch_size; ++i) {
+          batch_results[i] = true;
+          dry_run_hash += dry_run_hash_fn(batch_slices[i]);
+        }
+      } else {
+        info.reader_->MayMatch(batch_size, batch_slice_ptrs.get(),
+                               batch_results.get());
+      }
+      for (uint32_t i = 0; i < batch_size; ++i) {
+        if (inside_this_time) {
+          ALWAYS_ASSERT(batch_results[i]);
+        } else {
+          info.false_positives_ += batch_results[i];
+        }
+      }
+    } else {
+      for (uint32_t i = 0; i < batch_size; ++i) {
+        bool may_match;
+        if (FLAGS_use_plain_table_bloom) {
+          if (dry_run) {
+            dry_run_hash += dry_run_hash_fn(batch_slices[i]);
+            may_match = true;
+          } else {
+            uint32_t hash = GetSliceHash(batch_slices[i]);
+            may_match = info.plain_table_bloom_->MayContainHash(hash);
+          }
+        } else if (FLAGS_use_full_block_reader) {
+          if (dry_run) {
+            dry_run_hash += dry_run_hash_fn(batch_slices[i]);
+            may_match = true;
+          } else {
+            may_match = info.full_block_reader_->KeyMayMatch(
+                batch_slices[i],
+                /*no_io=*/false, /*const_ikey_ptr=*/nullptr,
+                /*get_context=*/nullptr,
+                /*lookup_context=*/nullptr, Env::IO_TOTAL);
+          }
+        } else {
+          if (dry_run) {
+            dry_run_hash += dry_run_hash_fn(batch_slices[i]);
+            may_match = true;
+          } else {
+            may_match = info.reader_->MayMatch(batch_slices[i]);
+          }
+        }
+        if (inside_this_time) {
+          ALWAYS_ASSERT(may_match);
+        } else {
+          info.false_positives_ += may_match;
+        }
+      }
+    }
+  }
+
+  uint64_t elapsed_nanos = timer.ElapsedNanos();
+  double ns = double(elapsed_nanos) / max_queries;
+
+  if (!FLAGS_quick) {
+    if (dry_run) {
+      // Printing part of hash prevents dry run components from being optimized
+      // away by compiler
+      std::cout << "    Dry run (" << std::hex << (dry_run_hash & 0xfffff)
+                << std::dec << ") ";
+    } else {
+      std::cout << "    Gross filter    ";
+    }
+    std::cout << "ns/op: " << ns << std::endl;
+  }
+
+  if (!dry_run) {
+    fp_rate_report_.str("");
+    uint64_t q = 0;
+    uint64_t fp = 0;
+    double worst_fp_rate = 0.0;
+    double best_fp_rate = 1.0;
+    for (auto &info : infos_) {
+      q += info.outside_queries_;
+      fp += info.false_positives_;
+      if (info.outside_queries_ > 0) {
+        double fp_rate = double(info.false_positives_) / info.outside_queries_;
+        worst_fp_rate = std::max(worst_fp_rate, fp_rate);
+        best_fp_rate = std::min(best_fp_rate, fp_rate);
+      }
+    }
+    fp_rate_report_ << "    Average FP rate %: " << 100.0 * fp / q << std::endl;
+    if (!FLAGS_quick && !FLAGS_best_case) {
+      fp_rate_report_ << "    Worst   FP rate %: " << 100.0 * worst_fp_rate
+                      << std::endl;
+      fp_rate_report_ << "    Best    FP rate %: " << 100.0 * best_fp_rate
+                      << std::endl;
+      fp_rate_report_ << "    Best possible bits/key: "
+                      << -std::log(double(fp) / q) / std::log(2.0) << std::endl;
+    }
+  }
+  return ns;
+}
+
+int main(int argc, char **argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  SetUsageMessage(std::string("\nUSAGE:\n") + std::string(argv[0]) +
+                  " [-quick] [OTHER OPTIONS]...");
+  ParseCommandLineFlags(&argc, &argv, true);
+
+  PrintWarnings();
+
+  if (FLAGS_legend) {
+    std::cout
+        << "Legend:" << std::endl
+        << "  \"Inside\" - key that was added to filter" << std::endl
+        << "  \"Outside\" - key that was not added to filter" << std::endl
+        << "  \"FN\" - false negative query (must not happen)" << std::endl
+        << "  \"FP\" - false positive query (OK at low rate)" << std::endl
+        << "  \"Dry run\" - cost of testing and hashing overhead." << std::endl
+        << "  \"Gross filter\" - cost of filter queries including testing "
+        << "\n     and hashing overhead." << std::endl
+        << "  \"net\" - best estimate of time in filter operation, without "
+        << "\n     testing and hashing overhead (gross filter - dry run)"
+        << std::endl
+        << "  \"ns/op\" - nanoseconds per operation (key query or add)"
+        << std::endl
+        << "  \"Single filter\" - essentially minimum cost, assuming filter"
+        << "\n     fits easily in L1 CPU cache." << std::endl
+        << "  \"Batched, prepared\" - several queries at once against a"
+        << "\n     randomly chosen filter, using multi-query interface."
+        << std::endl
+        << "  \"Batched, unprepared\" - similar, but using serial calls"
+        << "\n     to single query interface." << std::endl
+        << "  \"Random filter\" - a filter is chosen at random as target"
+        << "\n     of each query." << std::endl
+        << "  \"Skewed X% in Y%\" - like \"Random filter\" except Y% of"
+        << "\n      the filters are designated as \"hot\" and receive X%"
+        << "\n      of queries." << std::endl;
+  } else {
+    FilterBench b;
+    for (uint32_t i = 0; i < FLAGS_runs; ++i) {
+      b.Go();
+      FLAGS_seed += 100;
+      b.random_.Seed(FLAGS_seed);
+    }
+  }
+
+  return 0;
+}
+
+#endif  // !defined(GFLAGS) || defined(ROCKSDB_LITE)
diff --git a/src/rocksdb/util/gflags_compat.h b/src/rocksdb/util/gflags_compat.h
new file mode 100644
index 000000000..b6f88a5bc
--- /dev/null
+++ b/src/rocksdb/util/gflags_compat.h
@@ -0,0 +1,30 @@
+//  Copyright (c) 2017-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+#include <gflags/gflags.h>
+
+#include <functional>
+
+#ifndef GFLAGS_NAMESPACE
+// in case it's not defined in old versions, that's probably because it was
+// still google by default.
+#define GFLAGS_NAMESPACE google
+#endif
+
+#ifndef DEFINE_uint32
+// DEFINE_uint32 does not appear in older versions of gflags. This should be
+// a sane definition for those versions.
+#include <cstdint>
+#define DEFINE_uint32(name, val, txt)             \
+  namespace gflags_compat {                       \
+  DEFINE_int32(name, val, txt);                   \
+  }                                               \
+  std::reference_wrapper<uint32_t> FLAGS_##name = \
+      std::ref(*reinterpret_cast<uint32_t *>(&gflags_compat::FLAGS_##name));
+
+#define DECLARE_uint32(name) \
+  extern std::reference_wrapper<uint32_t> FLAGS_##name;
+#endif  // !DEFINE_uint32
diff --git a/src/rocksdb/util/hash.cc b/src/rocksdb/util/hash.cc
new file mode 100644
index 000000000..0f7f2edc1
--- /dev/null
+++ b/src/rocksdb/util/hash.cc
@@ -0,0 +1,201 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/hash.h"
+
+#include <string>
+
+#include "port/lang.h"
+#include "util/coding.h"
+#include "util/hash128.h"
+#include "util/math128.h"
+#include "util/xxhash.h"
+#include "util/xxph3.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+uint64_t (*kGetSliceNPHash64UnseededFnPtr)(const Slice&) = &GetSliceHash64;
+
+uint32_t Hash(const char* data, size_t n, uint32_t seed) {
+  // MurmurHash1 - fast but mediocre quality
+  // https://github.com/aappleby/smhasher/wiki/MurmurHash1
+  //
+  const uint32_t m = 0xc6a4a793;
+  const uint32_t r = 24;
+  const char* limit = data + n;
+  uint32_t h = static_cast<uint32_t>(seed ^ (n * m));
+
+  // Pick up four bytes at a time
+  while (data + 4 <= limit) {
+    uint32_t w = DecodeFixed32(data);
+    data += 4;
+    h += w;
+    h *= m;
+    h ^= (h >> 16);
+  }
+
+  // Pick up remaining bytes
+  switch (limit - data) {
+    // Note: The original hash implementation used data[i] << shift, which
+    // promotes the char to int and then performs the shift. If the char is
+    // negative, the shift is undefined behavior in C++. The hash algorithm is
+    // part of the format definition, so we cannot change it; to obtain the same
+    // behavior in a legal way we just cast to uint32_t, which will do
+    // sign-extension. To guarantee compatibility with architectures where chars
+    // are unsigned we first cast the char to int8_t.
+    case 3:
+      h += static_cast<uint32_t>(static_cast<int8_t>(data[2])) << 16;
+      FALLTHROUGH_INTENDED;
+    case 2:
+      h += static_cast<uint32_t>(static_cast<int8_t>(data[1])) << 8;
+      FALLTHROUGH_INTENDED;
+    case 1:
+      h += static_cast<uint32_t>(static_cast<int8_t>(data[0]));
+      h *= m;
+      h ^= (h >> r);
+      break;
+  }
+  return h;
+}
+
+// We are standardizing on a preview release of XXH3, because that's
+// the best available at time of standardizing.
+//
+// In testing (mostly Intel Skylake), this hash function is much more
+// thorough than Hash32 and is almost universally faster. Hash() only
+// seems faster when passing runtime-sized keys of the same small size
+// (less than about 24 bytes) thousands of times in a row; this seems
+// to allow the branch predictor to work some magic. XXH3's speed is
+// much less dependent on branch prediction.
+//
+// Hashing with a prefix extractor is potentially a common case of
+// hashing objects of small, predictable size. We could consider
+// bundling hash functions specialized for particular lengths with
+// the prefix extractors.
+uint64_t Hash64(const char* data, size_t n, uint64_t seed) {
+  return XXPH3_64bits_withSeed(data, n, seed);
+}
+
+uint64_t Hash64(const char* data, size_t n) {
+  // Same as seed = 0
+  return XXPH3_64bits(data, n);
+}
+
+uint64_t GetSlicePartsNPHash64(const SliceParts& data, uint64_t seed) {
+  // TODO(ajkr): use XXH3 streaming APIs to avoid the copy/allocation.
+  size_t concat_len = 0;
+  for (int i = 0; i < data.num_parts; ++i) {
+    concat_len += data.parts[i].size();
+  }
+  std::string concat_data;
+  concat_data.reserve(concat_len);
+  for (int i = 0; i < data.num_parts; ++i) {
+    concat_data.append(data.parts[i].data(), data.parts[i].size());
+  }
+  assert(concat_data.size() == concat_len);
+  return NPHash64(concat_data.data(), concat_len, seed);
+}
+
+Unsigned128 Hash128(const char* data, size_t n, uint64_t seed) {
+  auto h = XXH3_128bits_withSeed(data, n, seed);
+  return (Unsigned128{h.high64} << 64) | (h.low64);
+}
+
+Unsigned128 Hash128(const char* data, size_t n) {
+  // Same as seed = 0
+  auto h = XXH3_128bits(data, n);
+  return (Unsigned128{h.high64} << 64) | (h.low64);
+}
+
+void Hash2x64(const char* data, size_t n, uint64_t* high64, uint64_t* low64) {
+  // Same as seed = 0
+  auto h = XXH3_128bits(data, n);
+  *high64 = h.high64;
+  *low64 = h.low64;
+}
+
+void Hash2x64(const char* data, size_t n, uint64_t seed, uint64_t* high64,
+              uint64_t* low64) {
+  auto h = XXH3_128bits_withSeed(data, n, seed);
+  *high64 = h.high64;
+  *low64 = h.low64;
+}
+
+namespace {
+
+inline uint64_t XXH3_avalanche(uint64_t h64) {
+  h64 ^= h64 >> 37;
+  h64 *= 0x165667919E3779F9U;
+  h64 ^= h64 >> 32;
+  return h64;
+}
+
+inline uint64_t XXH3_unavalanche(uint64_t h64) {
+  h64 ^= h64 >> 32;
+  h64 *= 0x8da8ee41d6df849U;  // inverse of 0x165667919E3779F9U
+  h64 ^= h64 >> 37;
+  return h64;
+}
+
+}  // namespace
+
+void BijectiveHash2x64(uint64_t in_high64, uint64_t in_low64, uint64_t seed,
+                       uint64_t* out_high64, uint64_t* out_low64) {
+  // Adapted from XXH3_len_9to16_128b
+  const uint64_t bitflipl = /*secret part*/ 0x59973f0033362349U - seed;
+  const uint64_t bitfliph = /*secret part*/ 0xc202797692d63d58U + seed;
+  Unsigned128 tmp128 =
+      Multiply64to128(in_low64 ^ in_high64 ^ bitflipl, 0x9E3779B185EBCA87U);
+  uint64_t lo = Lower64of128(tmp128);
+  uint64_t hi = Upper64of128(tmp128);
+  lo += 0x3c0000000000000U;  // (len - 1) << 54
+  in_high64 ^= bitfliph;
+  hi += in_high64 + (Lower32of64(in_high64) * uint64_t{0x85EBCA76});
+  lo ^= EndianSwapValue(hi);
+  tmp128 = Multiply64to128(lo, 0xC2B2AE3D27D4EB4FU);
+  lo = Lower64of128(tmp128);
+  hi = Upper64of128(tmp128) + (hi * 0xC2B2AE3D27D4EB4FU);
+  *out_low64 = XXH3_avalanche(lo);
+  *out_high64 = XXH3_avalanche(hi);
+}
+
+void BijectiveUnhash2x64(uint64_t in_high64, uint64_t in_low64, uint64_t seed,
+                         uint64_t* out_high64, uint64_t* out_low64) {
+  // Inverted above (also consulting XXH3_len_9to16_128b)
+  const uint64_t bitflipl = /*secret part*/ 0x59973f0033362349U - seed;
+  const uint64_t bitfliph = /*secret part*/ 0xc202797692d63d58U + seed;
+  uint64_t lo = XXH3_unavalanche(in_low64);
+  uint64_t hi = XXH3_unavalanche(in_high64);
+  lo *= 0xba79078168d4baf;  // inverse of 0xC2B2AE3D27D4EB4FU
+  hi -= Upper64of128(Multiply64to128(lo, 0xC2B2AE3D27D4EB4FU));
+  hi *= 0xba79078168d4baf;  // inverse of 0xC2B2AE3D27D4EB4FU
+  lo ^= EndianSwapValue(hi);
+  lo -= 0x3c0000000000000U;
+  lo *= 0x887493432badb37U;  // inverse of 0x9E3779B185EBCA87U
+  hi -= Upper64of128(Multiply64to128(lo, 0x9E3779B185EBCA87U));
+  uint32_t tmp32 = Lower32of64(hi) * 0xb6c92f47;  // inverse of 0x85EBCA77
+  hi -= tmp32;
+  hi = (hi & 0xFFFFFFFF00000000U) -
+       ((tmp32 * uint64_t{0x85EBCA76}) & 0xFFFFFFFF00000000U) + tmp32;
+  hi ^= bitfliph;
+  lo ^= hi ^ bitflipl;
+  *out_high64 = hi;
+  *out_low64 = lo;
+}
+
+void BijectiveHash2x64(uint64_t in_high64, uint64_t in_low64,
+                       uint64_t* out_high64, uint64_t* out_low64) {
+  BijectiveHash2x64(in_high64, in_low64, /*seed*/ 0, out_high64, out_low64);
+}
+
+void BijectiveUnhash2x64(uint64_t in_high64, uint64_t in_low64,
+                         uint64_t* out_high64, uint64_t* out_low64) {
+  BijectiveUnhash2x64(in_high64, in_low64, /*seed*/ 0, out_high64, out_low64);
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/hash.h b/src/rocksdb/util/hash.h
new file mode 100644
index 000000000..eafa47f34
--- /dev/null
+++ b/src/rocksdb/util/hash.h
@@ -0,0 +1,137 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Common hash functions with convenient interfaces. If hashing a
+// statically-sized input in a performance-critical context, consider
+// calling a specific hash implementation directly, such as
+// XXH3_64bits from xxhash.h.
+//
+// Since this is a very common header, implementation details are kept
+// out-of-line. Out-of-lining also aids in tracking the time spent in
+// hashing functions. Inlining is of limited benefit for runtime-sized
+// hash inputs.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+
+#include "rocksdb/slice.h"
+#include "util/fastrange.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Stable/persistent 64-bit hash. Higher quality and generally faster than
+// Hash(), especially for inputs > 24 bytes.
+// KNOWN FLAW: incrementing seed by 1 might not give sufficiently independent
+// results from previous seed. Recommend incrementing by a large odd number.
+extern uint64_t Hash64(const char* data, size_t n, uint64_t seed);
+
+// Specific optimization without seed (same as seed = 0)
+extern uint64_t Hash64(const char* data, size_t n);
+
+// Non-persistent hash. Must only used for in-memory data structures.
+// The hash results are thus subject to change between releases,
+// architectures, build configuration, etc. (Thus, it rarely makes sense
+// to specify a seed for this function, except for a "rolling" hash.)
+// KNOWN FLAW: incrementing seed by 1 might not give sufficiently independent
+// results from previous seed. Recommend incrementing by a large odd number.
+inline uint64_t NPHash64(const char* data, size_t n, uint64_t seed) {
+#ifdef ROCKSDB_MODIFY_NPHASH
+  // For testing "subject to change"
+  return Hash64(data, n, seed + 123456789);
+#else
+  // Currently same as Hash64
+  return Hash64(data, n, seed);
+#endif
+}
+
+// Specific optimization without seed (same as seed = 0)
+inline uint64_t NPHash64(const char* data, size_t n) {
+#ifdef ROCKSDB_MODIFY_NPHASH
+  // For testing "subject to change"
+  return Hash64(data, n, 123456789);
+#else
+  // Currently same as Hash64
+  return Hash64(data, n);
+#endif
+}
+
+// Convenient and equivalent version of Hash128 without depending on 128-bit
+// scalars
+void Hash2x64(const char* data, size_t n, uint64_t* high64, uint64_t* low64);
+void Hash2x64(const char* data, size_t n, uint64_t seed, uint64_t* high64,
+              uint64_t* low64);
+
+// Hash 128 bits to 128 bits, guaranteed not to lose data (equivalent to
+// Hash2x64 on 16 bytes little endian)
+void BijectiveHash2x64(uint64_t in_high64, uint64_t in_low64,
+                       uint64_t* out_high64, uint64_t* out_low64);
+void BijectiveHash2x64(uint64_t in_high64, uint64_t in_low64, uint64_t seed,
+                       uint64_t* out_high64, uint64_t* out_low64);
+
+// Inverse of above (mostly for testing)
+void BijectiveUnhash2x64(uint64_t in_high64, uint64_t in_low64,
+                         uint64_t* out_high64, uint64_t* out_low64);
+void BijectiveUnhash2x64(uint64_t in_high64, uint64_t in_low64, uint64_t seed,
+                         uint64_t* out_high64, uint64_t* out_low64);
+
+// Stable/persistent 32-bit hash. Moderate quality and high speed on
+// small inputs.
+// TODO: consider rename to Hash32
+// KNOWN FLAW: incrementing seed by 1 might not give sufficiently independent
+// results from previous seed. Recommend pseudorandom or hashed seeds.
+extern uint32_t Hash(const char* data, size_t n, uint32_t seed);
+
+// TODO: consider rename to LegacyBloomHash32
+inline uint32_t BloomHash(const Slice& key) {
+  return Hash(key.data(), key.size(), 0xbc9f1d34);
+}
+
+inline uint64_t GetSliceHash64(const Slice& key) {
+  return Hash64(key.data(), key.size());
+}
+// Provided for convenience for use with template argument deduction, where a
+// specific overload needs to be used.
+extern uint64_t (*kGetSliceNPHash64UnseededFnPtr)(const Slice&);
+
+inline uint64_t GetSliceNPHash64(const Slice& s) {
+  return NPHash64(s.data(), s.size());
+}
+
+inline uint64_t GetSliceNPHash64(const Slice& s, uint64_t seed) {
+  return NPHash64(s.data(), s.size(), seed);
+}
+
+// Similar to `GetSliceNPHash64()` with `seed`, but input comes from
+// concatenation of `Slice`s in `data`.
+extern uint64_t GetSlicePartsNPHash64(const SliceParts& data, uint64_t seed);
+
+inline size_t GetSliceRangedNPHash(const Slice& s, size_t range) {
+  return FastRange64(NPHash64(s.data(), s.size()), range);
+}
+
+// TODO: consider rename to GetSliceHash32
+inline uint32_t GetSliceHash(const Slice& s) {
+  return Hash(s.data(), s.size(), 397);
+}
+
+// Useful for splitting up a 64-bit hash
+inline uint32_t Upper32of64(uint64_t v) {
+  return static_cast<uint32_t>(v >> 32);
+}
+inline uint32_t Lower32of64(uint64_t v) { return static_cast<uint32_t>(v); }
+
+// std::hash compatible interface.
+// TODO: consider rename to SliceHasher32
+struct SliceHasher {
+  uint32_t operator()(const Slice& s) const { return GetSliceHash(s); }
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/hash128.h b/src/rocksdb/util/hash128.h
new file mode 100644
index 000000000..305caa14a
--- /dev/null
+++ b/src/rocksdb/util/hash128.h
@@ -0,0 +1,26 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+// 128-bit hash gets it own header so that more popular hash.h doesn't
+// depend on math128.h
+
+#include "rocksdb/slice.h"
+#include "util/math128.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Stable/persistent 128-bit hash for non-cryptographic applications.
+Unsigned128 Hash128(const char* data, size_t n, uint64_t seed);
+
+// Specific optimization without seed (same as seed = 0)
+Unsigned128 Hash128(const char* data, size_t n);
+
+inline Unsigned128 GetSliceHash128(const Slice& key) {
+  return Hash128(key.data(), key.size());
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/hash_containers.h b/src/rocksdb/util/hash_containers.h
new file mode 100644
index 000000000..52be3718c
--- /dev/null
+++ b/src/rocksdb/util/hash_containers.h
@@ -0,0 +1,51 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+// This header establishes compile-time pluggable implementations of hashed
+// container structures, so that deployments have the option of minimal
+// dependencies with ok performance (e.g. std::unordered_map) or more
+// dependencies with optimized performance (e.g. folly::F14FastMap).
+
+#pragma once
+
+#include "rocksdb/rocksdb_namespace.h"
+
+#ifdef USE_FOLLY
+
+#include <folly/container/F14Map.h>
+#include <folly/container/F14Set.h>
+
+namespace ROCKSDB_NAMESPACE {
+
+template <typename K, typename V>
+using UnorderedMap = folly::F14FastMap<K, V>;
+
+template <typename K, typename V, typename H>
+using UnorderedMapH = folly::F14FastMap<K, V, H>;
+
+template <typename K>
+using UnorderedSet = folly::F14FastSet<K>;
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#else
+
+#include <unordered_map>
+#include <unordered_set>
+
+namespace ROCKSDB_NAMESPACE {
+
+template <typename K, typename V>
+using UnorderedMap = std::unordered_map<K, V>;
+
+template <typename K, typename V, typename H>
+using UnorderedMapH = std::unordered_map<K, V, H>;
+
+template <typename K>
+using UnorderedSet = std::unordered_set<K>;
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif
diff --git a/src/rocksdb/util/hash_map.h b/src/rocksdb/util/hash_map.h
new file mode 100644
index 000000000..e3ad2584f
--- /dev/null
+++ b/src/rocksdb/util/hash_map.h
@@ -0,0 +1,67 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <utility>
+
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// This is similar to std::unordered_map, except that it tries to avoid
+// allocating or deallocating memory as much as possible. With
+// std::unordered_map, an allocation/deallocation is made for every insertion
+// or deletion because of the requirement that iterators remain valid even
+// with insertions or deletions. This means that the hash chains will be
+// implemented as linked lists.
+//
+// This implementation uses autovector as hash chains insteads.
+//
+template <typename K, typename V, size_t size = 128>
+class HashMap {
+  std::array<autovector<std::pair<K, V>, 1>, size> table_;
+
+ public:
+  bool Contains(K key) {
+    auto& bucket = table_[key % size];
+    auto it = std::find_if(
+        bucket.begin(), bucket.end(),
+        [key](const std::pair<K, V>& p) { return p.first == key; });
+    return it != bucket.end();
+  }
+
+  void Insert(K key, const V& value) {
+    auto& bucket = table_[key % size];
+    bucket.push_back({key, value});
+  }
+
+  void Delete(K key) {
+    auto& bucket = table_[key % size];
+    auto it = std::find_if(
+        bucket.begin(), bucket.end(),
+        [key](const std::pair<K, V>& p) { return p.first == key; });
+    if (it != bucket.end()) {
+      auto last = bucket.end() - 1;
+      if (it != last) {
+        *it = *last;
+      }
+      bucket.pop_back();
+    }
+  }
+
+  V& Get(K key) {
+    auto& bucket = table_[key % size];
+    auto it = std::find_if(
+        bucket.begin(), bucket.end(),
+        [key](const std::pair<K, V>& p) { return p.first == key; });
+    return it->second;
+  }
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/hash_test.cc b/src/rocksdb/util/hash_test.cc
new file mode 100644
index 000000000..72112b044
--- /dev/null
+++ b/src/rocksdb/util/hash_test.cc
@@ -0,0 +1,853 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/hash.h"
+
+#include <cstring>
+#include <type_traits>
+#include <vector>
+
+#include "test_util/testharness.h"
+#include "util/coding.h"
+#include "util/coding_lean.h"
+#include "util/hash128.h"
+#include "util/math.h"
+#include "util/math128.h"
+
+using ROCKSDB_NAMESPACE::BijectiveHash2x64;
+using ROCKSDB_NAMESPACE::BijectiveUnhash2x64;
+using ROCKSDB_NAMESPACE::DecodeFixed64;
+using ROCKSDB_NAMESPACE::EncodeFixed32;
+using ROCKSDB_NAMESPACE::EndianSwapValue;
+using ROCKSDB_NAMESPACE::GetSliceHash64;
+using ROCKSDB_NAMESPACE::Hash;
+using ROCKSDB_NAMESPACE::Hash128;
+using ROCKSDB_NAMESPACE::Hash2x64;
+using ROCKSDB_NAMESPACE::Hash64;
+using ROCKSDB_NAMESPACE::Lower32of64;
+using ROCKSDB_NAMESPACE::Lower64of128;
+using ROCKSDB_NAMESPACE::ReverseBits;
+using ROCKSDB_NAMESPACE::Slice;
+using ROCKSDB_NAMESPACE::Unsigned128;
+using ROCKSDB_NAMESPACE::Upper32of64;
+using ROCKSDB_NAMESPACE::Upper64of128;
+
+// The hash algorithm is part of the file format, for example for the Bloom
+// filters. Test that the hash values are stable for a set of random strings of
+// varying lengths.
+TEST(HashTest, Values) {
+  constexpr uint32_t kSeed = 0xbc9f1d34;  // Same as BloomHash.
+
+  EXPECT_EQ(Hash("", 0, kSeed), 3164544308u);
+  EXPECT_EQ(Hash("\x08", 1, kSeed), 422599524u);
+  EXPECT_EQ(Hash("\x17", 1, kSeed), 3168152998u);
+  EXPECT_EQ(Hash("\x9a", 1, kSeed), 3195034349u);
+  EXPECT_EQ(Hash("\x1c", 1, kSeed), 2651681383u);
+  EXPECT_EQ(Hash("\x4d\x76", 2, kSeed), 2447836956u);
+  EXPECT_EQ(Hash("\x52\xd5", 2, kSeed), 3854228105u);
+  EXPECT_EQ(Hash("\x91\xf7", 2, kSeed), 31066776u);
+  EXPECT_EQ(Hash("\xd6\x27", 2, kSeed), 1806091603u);
+  EXPECT_EQ(Hash("\x30\x46\x0b", 3, kSeed), 3808221797u);
+  EXPECT_EQ(Hash("\x56\xdc\xd6", 3, kSeed), 2157698265u);
+  EXPECT_EQ(Hash("\xd4\x52\x33", 3, kSeed), 1721992661u);
+  EXPECT_EQ(Hash("\x6a\xb5\xf4", 3, kSeed), 2469105222u);
+  EXPECT_EQ(Hash("\x67\x53\x81\x1c", 4, kSeed), 118283265u);
+  EXPECT_EQ(Hash("\x69\xb8\xc0\x88", 4, kSeed), 3416318611u);
+  EXPECT_EQ(Hash("\x1e\x84\xaf\x2d", 4, kSeed), 3315003572u);
+  EXPECT_EQ(Hash("\x46\xdc\x54\xbe", 4, kSeed), 447346355u);
+  EXPECT_EQ(Hash("\xd0\x7a\x6e\xea\x56", 5, kSeed), 4255445370u);
+  EXPECT_EQ(Hash("\x86\x83\xd5\xa4\xd8", 5, kSeed), 2390603402u);
+  EXPECT_EQ(Hash("\xb7\x46\xbb\x77\xce", 5, kSeed), 2048907743u);
+  EXPECT_EQ(Hash("\x6c\xa8\xbc\xe5\x99", 5, kSeed), 2177978500u);
+  EXPECT_EQ(Hash("\x5c\x5e\xe1\xa0\x73\x81", 6, kSeed), 1036846008u);
+  EXPECT_EQ(Hash("\x08\x5d\x73\x1c\xe5\x2e", 6, kSeed), 229980482u);
+  EXPECT_EQ(Hash("\x42\xfb\xf2\x52\xb4\x10", 6, kSeed), 3655585422u);
+  EXPECT_EQ(Hash("\x73\xe1\xff\x56\x9c\xce", 6, kSeed), 3502708029u);
+  EXPECT_EQ(Hash("\x5c\xbe\x97\x75\x54\x9a\x52", 7, kSeed), 815120748u);
+  EXPECT_EQ(Hash("\x16\x82\x39\x49\x88\x2b\x36", 7, kSeed), 3056033698u);
+  EXPECT_EQ(Hash("\x59\x77\xf0\xa7\x24\xf4\x78", 7, kSeed), 587205227u);
+  EXPECT_EQ(Hash("\xd3\xa5\x7c\x0e\xc0\x02\x07", 7, kSeed), 2030937252u);
+  EXPECT_EQ(Hash("\x31\x1b\x98\x75\x96\x22\xd3\x9a", 8, kSeed), 469635402u);
+  EXPECT_EQ(Hash("\x38\xd6\xf7\x28\x20\xb4\x8a\xe9", 8, kSeed), 3530274698u);
+  EXPECT_EQ(Hash("\xbb\x18\x5d\xf4\x12\x03\xf7\x99", 8, kSeed), 1974545809u);
+  EXPECT_EQ(Hash("\x80\xd4\x3b\x3b\xae\x22\xa2\x78", 8, kSeed), 3563570120u);
+  EXPECT_EQ(Hash("\x1a\xb5\xd0\xfe\xab\xc3\x61\xb2\x99", 9, kSeed),
+            2706087434u);
+  EXPECT_EQ(Hash("\x8e\x4a\xc3\x18\x20\x2f\x06\xe6\x3c", 9, kSeed),
+            1534654151u);
+  EXPECT_EQ(Hash("\xb6\xc0\xdd\x05\x3f\xc4\x86\x4c\xef", 9, kSeed),
+            2355554696u);
+  EXPECT_EQ(Hash("\x9a\x5f\x78\x0d\xaf\x50\xe1\x1f\x55", 9, kSeed),
+            1400800912u);
+  EXPECT_EQ(Hash("\x22\x6f\x39\x1f\xf8\xdd\x4f\x52\x17\x94", 10, kSeed),
+            3420325137u);
+  EXPECT_EQ(Hash("\x32\x89\x2a\x75\x48\x3a\x4a\x02\x69\xdd", 10, kSeed),
+            3427803584u);
+  EXPECT_EQ(Hash("\x06\x92\x5c\xf4\x88\x0e\x7e\x68\x38\x3e", 10, kSeed),
+            1152407945u);
+  EXPECT_EQ(Hash("\xbd\x2c\x63\x38\xbf\xe9\x78\xb7\xbf\x15", 10, kSeed),
+            3382479516u);
+}
+
+// The hash algorithm is part of the file format, for example for the Bloom
+// filters.
+TEST(HashTest, Hash64Misc) {
+  constexpr uint32_t kSeed = 0;  // Same as GetSliceHash64
+
+  for (char fill : {'\0', 'a', '1', '\xff'}) {
+    const size_t max_size = 1000;
+    const std::string str(max_size, fill);
+
+    for (size_t size = 0; size <= max_size; ++size) {
+      uint64_t here = Hash64(str.data(), size, kSeed);
+
+      // Must be same as unseeded Hash64 and GetSliceHash64
+      EXPECT_EQ(here, Hash64(str.data(), size));
+      EXPECT_EQ(here, GetSliceHash64(Slice(str.data(), size)));
+
+      // Upper and Lower must reconstruct hash
+      EXPECT_EQ(here, (uint64_t{Upper32of64(here)} << 32) | Lower32of64(here));
+      EXPECT_EQ(here, (uint64_t{Upper32of64(here)} << 32) + Lower32of64(here));
+      EXPECT_EQ(here, (uint64_t{Upper32of64(here)} << 32) ^ Lower32of64(here));
+
+      // Seed changes hash value (with high probability)
+      for (uint64_t var_seed = 1; var_seed != 0; var_seed <<= 1) {
+        EXPECT_NE(here, Hash64(str.data(), size, var_seed));
+      }
+
+      // Size changes hash value (with high probability)
+      size_t max_smaller_by = std::min(size_t{30}, size);
+      for (size_t smaller_by = 1; smaller_by <= max_smaller_by; ++smaller_by) {
+        EXPECT_NE(here, Hash64(str.data(), size - smaller_by, kSeed));
+      }
+    }
+  }
+}
+
+// Test that hash values are "non-trivial" for "trivial" inputs
+TEST(HashTest, Hash64Trivial) {
+  // Thorough test too slow for regression testing
+  constexpr bool thorough = false;
+
+  // For various seeds, make sure hash of empty string is not zero.
+  constexpr uint64_t max_seed = thorough ? 0x1000000 : 0x10000;
+  for (uint64_t seed = 0; seed < max_seed; ++seed) {
+    uint64_t here = Hash64("", 0, seed);
+    EXPECT_NE(Lower32of64(here), 0u);
+    EXPECT_NE(Upper32of64(here), 0u);
+  }
+
+  // For standard seed, make sure hash of small strings are not zero
+  constexpr uint32_t kSeed = 0;  // Same as GetSliceHash64
+  char input[4];
+  constexpr int max_len = thorough ? 3 : 2;
+  for (int len = 1; len <= max_len; ++len) {
+    for (uint32_t i = 0; (i >> (len * 8)) == 0; ++i) {
+      EncodeFixed32(input, i);
+      uint64_t here = Hash64(input, len, kSeed);
+      EXPECT_NE(Lower32of64(here), 0u);
+      EXPECT_NE(Upper32of64(here), 0u);
+    }
+  }
+}
+
+// Test that the hash values are stable for a set of random strings of
+// varying small lengths.
+TEST(HashTest, Hash64SmallValueSchema) {
+  constexpr uint32_t kSeed = 0;  // Same as GetSliceHash64
+
+  EXPECT_EQ(Hash64("", 0, kSeed), uint64_t{5999572062939766020u});
+  EXPECT_EQ(Hash64("\x08", 1, kSeed), uint64_t{583283813901344696u});
+  EXPECT_EQ(Hash64("\x17", 1, kSeed), uint64_t{16175549975585474943u});
+  EXPECT_EQ(Hash64("\x9a", 1, kSeed), uint64_t{16322991629225003903u});
+  EXPECT_EQ(Hash64("\x1c", 1, kSeed), uint64_t{13269285487706833447u});
+  EXPECT_EQ(Hash64("\x4d\x76", 2, kSeed), uint64_t{6859542833406258115u});
+  EXPECT_EQ(Hash64("\x52\xd5", 2, kSeed), uint64_t{4919611532550636959u});
+  EXPECT_EQ(Hash64("\x91\xf7", 2, kSeed), uint64_t{14199427467559720719u});
+  EXPECT_EQ(Hash64("\xd6\x27", 2, kSeed), uint64_t{12292689282614532691u});
+  EXPECT_EQ(Hash64("\x30\x46\x0b", 3, kSeed), uint64_t{11404699285340020889u});
+  EXPECT_EQ(Hash64("\x56\xdc\xd6", 3, kSeed), uint64_t{12404347133785524237u});
+  EXPECT_EQ(Hash64("\xd4\x52\x33", 3, kSeed), uint64_t{15853805298481534034u});
+  EXPECT_EQ(Hash64("\x6a\xb5\xf4", 3, kSeed), uint64_t{16863488758399383382u});
+  EXPECT_EQ(Hash64("\x67\x53\x81\x1c", 4, kSeed),
+            uint64_t{9010661983527562386u});
+  EXPECT_EQ(Hash64("\x69\xb8\xc0\x88", 4, kSeed),
+            uint64_t{6611781377647041447u});
+  EXPECT_EQ(Hash64("\x1e\x84\xaf\x2d", 4, kSeed),
+            uint64_t{15290969111616346501u});
+  EXPECT_EQ(Hash64("\x46\xdc\x54\xbe", 4, kSeed),
+            uint64_t{7063754590279313623u});
+  EXPECT_EQ(Hash64("\xd0\x7a\x6e\xea\x56", 5, kSeed),
+            uint64_t{6384167718754869899u});
+  EXPECT_EQ(Hash64("\x86\x83\xd5\xa4\xd8", 5, kSeed),
+            uint64_t{16874407254108011067u});
+  EXPECT_EQ(Hash64("\xb7\x46\xbb\x77\xce", 5, kSeed),
+            uint64_t{16809880630149135206u});
+  EXPECT_EQ(Hash64("\x6c\xa8\xbc\xe5\x99", 5, kSeed),
+            uint64_t{1249038833153141148u});
+  EXPECT_EQ(Hash64("\x5c\x5e\xe1\xa0\x73\x81", 6, kSeed),
+            uint64_t{17358142495308219330u});
+  EXPECT_EQ(Hash64("\x08\x5d\x73\x1c\xe5\x2e", 6, kSeed),
+            uint64_t{4237646583134806322u});
+  EXPECT_EQ(Hash64("\x42\xfb\xf2\x52\xb4\x10", 6, kSeed),
+            uint64_t{4373664924115234051u});
+  EXPECT_EQ(Hash64("\x73\xe1\xff\x56\x9c\xce", 6, kSeed),
+            uint64_t{12012981210634596029u});
+  EXPECT_EQ(Hash64("\x5c\xbe\x97\x75\x54\x9a\x52", 7, kSeed),
+            uint64_t{5716522398211028826u});
+  EXPECT_EQ(Hash64("\x16\x82\x39\x49\x88\x2b\x36", 7, kSeed),
+            uint64_t{15604531309862565013u});
+  EXPECT_EQ(Hash64("\x59\x77\xf0\xa7\x24\xf4\x78", 7, kSeed),
+            uint64_t{8601330687345614172u});
+  EXPECT_EQ(Hash64("\xd3\xa5\x7c\x0e\xc0\x02\x07", 7, kSeed),
+            uint64_t{8088079329364056942u});
+  EXPECT_EQ(Hash64("\x31\x1b\x98\x75\x96\x22\xd3\x9a", 8, kSeed),
+            uint64_t{9844314944338447628u});
+  EXPECT_EQ(Hash64("\x38\xd6\xf7\x28\x20\xb4\x8a\xe9", 8, kSeed),
+            uint64_t{10973293517982163143u});
+  EXPECT_EQ(Hash64("\xbb\x18\x5d\xf4\x12\x03\xf7\x99", 8, kSeed),
+            uint64_t{9986007080564743219u});
+  EXPECT_EQ(Hash64("\x80\xd4\x3b\x3b\xae\x22\xa2\x78", 8, kSeed),
+            uint64_t{1729303145008254458u});
+  EXPECT_EQ(Hash64("\x1a\xb5\xd0\xfe\xab\xc3\x61\xb2\x99", 9, kSeed),
+            uint64_t{13253403748084181481u});
+  EXPECT_EQ(Hash64("\x8e\x4a\xc3\x18\x20\x2f\x06\xe6\x3c", 9, kSeed),
+            uint64_t{7768754303876232188u});
+  EXPECT_EQ(Hash64("\xb6\xc0\xdd\x05\x3f\xc4\x86\x4c\xef", 9, kSeed),
+            uint64_t{12439346786701492u});
+  EXPECT_EQ(Hash64("\x9a\x5f\x78\x0d\xaf\x50\xe1\x1f\x55", 9, kSeed),
+            uint64_t{10841838338450144690u});
+  EXPECT_EQ(Hash64("\x22\x6f\x39\x1f\xf8\xdd\x4f\x52\x17\x94", 10, kSeed),
+            uint64_t{12883919702069153152u});
+  EXPECT_EQ(Hash64("\x32\x89\x2a\x75\x48\x3a\x4a\x02\x69\xdd", 10, kSeed),
+            uint64_t{12692903507676842188u});
+  EXPECT_EQ(Hash64("\x06\x92\x5c\xf4\x88\x0e\x7e\x68\x38\x3e", 10, kSeed),
+            uint64_t{6540985900674032620u});
+  EXPECT_EQ(Hash64("\xbd\x2c\x63\x38\xbf\xe9\x78\xb7\xbf\x15", 10, kSeed),
+            uint64_t{10551812464348219044u});
+}
+
+std::string Hash64TestDescriptor(const char *repeat, size_t limit) {
+  const char *mod61_encode =
+      "abcdefghijklmnopqrstuvwxyz123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+
+  std::string input;
+  while (input.size() < limit) {
+    input.append(repeat);
+  }
+  std::string rv;
+  for (size_t i = 0; i < limit; ++i) {
+    uint64_t h = GetSliceHash64(Slice(input.data(), i));
+    rv.append(1, mod61_encode[static_cast<size_t>(h % 61)]);
+  }
+  return rv;
+}
+
+// XXPH3 changes its algorithm for various sizes up through 250 bytes, so
+// we need to check the stability of larger sizes also.
+TEST(HashTest, Hash64LargeValueSchema) {
+  // Each of these derives a "descriptor" from the hash values for all
+  // lengths up to 430.
+  // Note that "c" is common for the zero-length string.
+  EXPECT_EQ(
+      Hash64TestDescriptor("foo", 430),
+      "cRhyWsY67B6klRA1udmOuiYuX7IthyGBKqbeosz2hzVglWCmQx8nEdnpkvPfYX56Up2OWOTV"
+      "lTzfAoYwvtqKzjD8E9xttR2unelbXbIV67NUe6bOO23BxaSFRcA3njGu5cUWfgwOqNoTsszp"
+      "uPvKRP6qaUR5VdoBkJUCFIefd7edlNK5mv6JYWaGdwxehg65hTkTmjZoPKxTZo4PLyzbL9U4"
+      "xt12ITSfeP2MfBHuLI2z2pDlBb44UQKVMx27LEoAHsdLp3WfWfgH3sdRBRCHm33UxCM4QmE2"
+      "xJ7gqSvNwTeH7v9GlC8zWbGroyD3UVNeShMLx29O7tH1biemLULwAHyIw8zdtLMDpEJ8m2ic"
+      "l6Lb4fDuuFNAs1GCVUthjK8CV8SWI8Rsz5THSwn5CGhpqUwSZcFknjwWIl5rNCvDxXJqYr");
+  // Note that "1EeRk" is common for "Rocks"
+  EXPECT_EQ(
+      Hash64TestDescriptor("Rocks", 430),
+      "c1EeRkrzgOYWLA8PuhJrwTePJewoB44WdXYDfhbk3ZxTqqg25WlPExDl7IKIQLJvnA6gJxxn"
+      "9TCSLkFGfJeXehaSS1GBqWSzfhEH4VXiXIUCuxJXxtKXcSC6FrNIQGTZbYDiUOLD6Y5inzrF"
+      "9etwQhXUBanw55xAUdNMFQAm2GjJ6UDWp2mISLiMMkLjANWMKLaZMqaFLX37qB4MRO1ooVRv"
+      "zSvaNRSCLxlggQCasQq8icWjzf3HjBlZtU6pd4rkaUxSzHqmo9oM5MghbU5Rtxg8wEfO7lVN"
+      "5wdMONYecslQTwjZUpO1K3LDf3K3XK6sUXM6ShQQ3RHmMn2acB4YtTZ3QQcHYJSOHn2DuWpa"
+      "Q8RqzX5lab92YmOLaCdOHq1BPsM7SIBzMdLgePNsJ1vvMALxAaoDUHPxoFLO2wx18IXnyX");
+  EXPECT_EQ(
+      Hash64TestDescriptor("RocksDB", 430),
+      "c1EeRkukbkb28wLTahwD2sfUhZzaBEnF8SVrxnPVB6A7b8CaAl3UKsDZISF92GSq2wDCukOq"
+      "Jgrsp7A3KZhDiLW8dFXp8UPqPxMCRlMdZeVeJ2dJxrmA6cyt99zkQFj7ELbut6jAeVqARFnw"
+      "fnWVXOsaLrq7bDCbMcns2DKvTaaqTCLMYxI7nhtLpFN1jR755FRQFcOzrrDbh7QhypjdvlYw"
+      "cdAMSZgp9JMHxbM23wPSuH6BOFgxejz35PScZfhDPvTOxIy1jc3MZsWrMC3P324zNolO7JdW"
+      "CX2I5UDKjjaEJfxbgVgJIXxtQGlmj2xkO5sPpjULQV4X2HlY7FQleJ4QRaJIB4buhCA4vUTF"
+      "eMFlxCIYUpTCsal2qsmnGOWa8WCcefrohMjDj1fjzSvSaQwlpyR1GZHF2uPOoQagiCpHpm");
+}
+
+TEST(HashTest, Hash128Misc) {
+  constexpr uint32_t kSeed = 0;  // Same as GetSliceHash128
+
+  for (char fill : {'\0', 'a', '1', '\xff', 'e'}) {
+    const size_t max_size = 1000;
+    std::string str(max_size, fill);
+
+    if (fill == 'e') {
+      // Use different characters to check endianness handling
+      for (size_t i = 0; i < str.size(); ++i) {
+        str[i] += static_cast<char>(i);
+      }
+    }
+
+    for (size_t size = 0; size <= max_size; ++size) {
+      Unsigned128 here = Hash128(str.data(), size, kSeed);
+
+      // Must be same as unseeded Hash128 and GetSliceHash128
+      EXPECT_EQ(here, Hash128(str.data(), size));
+      EXPECT_EQ(here, GetSliceHash128(Slice(str.data(), size)));
+      {
+        uint64_t hi, lo;
+        Hash2x64(str.data(), size, &hi, &lo);
+        EXPECT_EQ(Lower64of128(here), lo);
+        EXPECT_EQ(Upper64of128(here), hi);
+      }
+      if (size == 16) {
+        const uint64_t in_hi = DecodeFixed64(str.data() + 8);
+        const uint64_t in_lo = DecodeFixed64(str.data());
+        uint64_t hi, lo;
+        BijectiveHash2x64(in_hi, in_lo, &hi, &lo);
+        EXPECT_EQ(Lower64of128(here), lo);
+        EXPECT_EQ(Upper64of128(here), hi);
+        uint64_t un_hi, un_lo;
+        BijectiveUnhash2x64(hi, lo, &un_hi, &un_lo);
+        EXPECT_EQ(in_lo, un_lo);
+        EXPECT_EQ(in_hi, un_hi);
+      }
+
+      // Upper and Lower must reconstruct hash
+      EXPECT_EQ(here,
+                (Unsigned128{Upper64of128(here)} << 64) | Lower64of128(here));
+      EXPECT_EQ(here,
+                (Unsigned128{Upper64of128(here)} << 64) ^ Lower64of128(here));
+
+      // Seed changes hash value (with high probability)
+      for (uint64_t var_seed = 1; var_seed != 0; var_seed <<= 1) {
+        Unsigned128 seeded = Hash128(str.data(), size, var_seed);
+        EXPECT_NE(here, seeded);
+        // Must match seeded Hash2x64
+        {
+          uint64_t hi, lo;
+          Hash2x64(str.data(), size, var_seed, &hi, &lo);
+          EXPECT_EQ(Lower64of128(seeded), lo);
+          EXPECT_EQ(Upper64of128(seeded), hi);
+        }
+        if (size == 16) {
+          const uint64_t in_hi = DecodeFixed64(str.data() + 8);
+          const uint64_t in_lo = DecodeFixed64(str.data());
+          uint64_t hi, lo;
+          BijectiveHash2x64(in_hi, in_lo, var_seed, &hi, &lo);
+          EXPECT_EQ(Lower64of128(seeded), lo);
+          EXPECT_EQ(Upper64of128(seeded), hi);
+          uint64_t un_hi, un_lo;
+          BijectiveUnhash2x64(hi, lo, var_seed, &un_hi, &un_lo);
+          EXPECT_EQ(in_lo, un_lo);
+          EXPECT_EQ(in_hi, un_hi);
+        }
+      }
+
+      // Size changes hash value (with high probability)
+      size_t max_smaller_by = std::min(size_t{30}, size);
+      for (size_t smaller_by = 1; smaller_by <= max_smaller_by; ++smaller_by) {
+        EXPECT_NE(here, Hash128(str.data(), size - smaller_by, kSeed));
+      }
+    }
+  }
+}
+
+// Test that hash values are "non-trivial" for "trivial" inputs
+TEST(HashTest, Hash128Trivial) {
+  // Thorough test too slow for regression testing
+  constexpr bool thorough = false;
+
+  // For various seeds, make sure hash of empty string is not zero.
+  constexpr uint64_t max_seed = thorough ? 0x1000000 : 0x10000;
+  for (uint64_t seed = 0; seed < max_seed; ++seed) {
+    Unsigned128 here = Hash128("", 0, seed);
+    EXPECT_NE(Lower64of128(here), 0u);
+    EXPECT_NE(Upper64of128(here), 0u);
+  }
+
+  // For standard seed, make sure hash of small strings are not zero
+  constexpr uint32_t kSeed = 0;  // Same as GetSliceHash128
+  char input[4];
+  constexpr int max_len = thorough ? 3 : 2;
+  for (int len = 1; len <= max_len; ++len) {
+    for (uint32_t i = 0; (i >> (len * 8)) == 0; ++i) {
+      EncodeFixed32(input, i);
+      Unsigned128 here = Hash128(input, len, kSeed);
+      EXPECT_NE(Lower64of128(here), 0u);
+      EXPECT_NE(Upper64of128(here), 0u);
+    }
+  }
+}
+
+std::string Hash128TestDescriptor(const char *repeat, size_t limit) {
+  const char *mod61_encode =
+      "abcdefghijklmnopqrstuvwxyz123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+
+  std::string input;
+  while (input.size() < limit) {
+    input.append(repeat);
+  }
+  std::string rv;
+  for (size_t i = 0; i < limit; ++i) {
+    auto h = GetSliceHash128(Slice(input.data(), i));
+    uint64_t h2 = Upper64of128(h) + Lower64of128(h);
+    rv.append(1, mod61_encode[static_cast<size_t>(h2 % 61)]);
+  }
+  return rv;
+}
+
+// XXH3 changes its algorithm for various sizes up through 250 bytes, so
+// we need to check the stability of larger sizes also.
+TEST(HashTest, Hash128ValueSchema) {
+  // Each of these derives a "descriptor" from the hash values for all
+  // lengths up to 430.
+  // Note that "b" is common for the zero-length string.
+  EXPECT_EQ(
+      Hash128TestDescriptor("foo", 430),
+      "bUMA3As8n9I4vNGhThXlEevxZlyMcbb6TYAlIKJ2f5ponsv99q962rYclQ7u3gfnRdCDQ5JI"
+      "2LrGUaCycbXrvLFe4SjgRb9RQwCfrnmNQ7VSEwSKMnkGCK3bDbXSrnIh5qLXdtvIZklbJpGH"
+      "Dqr93BlqF9ubTnOSYkSdx89XvQqflMIW8bjfQp9BPjQejWOeEQspnN1D3sfgVdFhpaQdHYA5"
+      "pI2XcPlCMFPxvrFuRr7joaDvjNe9IUZaunLPMewuXmC3EL95h52Ju3D7y9RNKhgYxMTrA84B"
+      "yJrMvyjdm3vlBxet4EN7v2GEyjbGuaZW9UL6lrX6PghJDg7ACfLGdxNbH3qXM4zaiG2RKnL5"
+      "S3WXKR78RBB5fRFQ8KDIEQjHFvSNsc3GrAEi6W8P2lv8JMTzjBODO2uN4wadVQFT9wpGfV");
+  // Note that "35D2v" is common for "Rocks"
+  EXPECT_EQ(
+      Hash128TestDescriptor("Rocks", 430),
+      "b35D2vzvklFVDqJmyLRXyApwGGO3EAT3swhe8XJAN3mY2UVPglzdmydxcba6JI2tSvwO6zSu"
+      "ANpjSM7tc9G5iMhsa7R8GfyCXRO1TnLg7HvdWNdgGGBirxZR68BgT7TQsYJt6zyEyISeXI1n"
+      "MXA48Xo7dWfJeYN6Z4KWlqZY7TgFXGbks9AX4ehZNSGtIhdO5i58qlgVX1bEejeOVaCcjC79"
+      "67DrMfOKds7rUQzjBa77sMPcoPW1vu6ljGJPZH3XkRyDMZ1twxXKkNxN3tE8nR7JHwyqBAxE"
+      "fTcjbOWrLZ1irWxRSombD8sGDEmclgF11IxqEhe3Rt7gyofO3nExGckKkS9KfRqsCHbiUyva"
+      "JGkJwUHRXaZnh58b4i1Ei9aQKZjXlvIVDixoZrjcNaH5XJIJlRZce9Z9t82wYapTpckYSg");
+  EXPECT_EQ(
+      Hash128TestDescriptor("RocksDB", 430),
+      "b35D2vFUst3XDZCRlSrhmYYakmqImV97LbBsV6EZlOEQpUPH1d1sD3xMKAPlA5UErHehg5O7"
+      "n966fZqhAf3hRc24kGCLfNAWjyUa7vSNOx3IcPoTyVRFZeFlcCtfl7t1QJumHOCpS33EBmBF"
+      "hvK13QjBbDWYWeHQhJhgV9Mqbx17TIcvUkEnYZxb8IzWNmjVsJG44Z7v52DjGj1ZzS62S2Vv"
+      "qWcDO7apvH5VHg68E9Wl6nXP21vlmUqEH9GeWRehfWVvY7mUpsAg5drHHQyDSdiMceiUuUxJ"
+      "XJqHFcDdzbbPk7xDvbLgWCKvH8k3MpQNWOmbSSRDdAP6nGlDjoTToYkcqVREHJzztSWAAq5h"
+      "GHSUNJ6OxsMHhf8EhXfHtKyUzRmPtjYyeckQcGmrQfFFLidc6cjMDKCdBG6c6HVBrS7H2R");
+}
+
+TEST(FastRange32Test, Values) {
+  using ROCKSDB_NAMESPACE::FastRange32;
+  // Zero range
+  EXPECT_EQ(FastRange32(0, 0), 0U);
+  EXPECT_EQ(FastRange32(123, 0), 0U);
+  EXPECT_EQ(FastRange32(0xffffffff, 0), 0U);
+
+  // One range
+  EXPECT_EQ(FastRange32(0, 1), 0U);
+  EXPECT_EQ(FastRange32(123, 1), 0U);
+  EXPECT_EQ(FastRange32(0xffffffff, 1), 0U);
+
+  // Two range
+  EXPECT_EQ(FastRange32(0, 2), 0U);
+  EXPECT_EQ(FastRange32(123, 2), 0U);
+  EXPECT_EQ(FastRange32(0x7fffffff, 2), 0U);
+  EXPECT_EQ(FastRange32(0x80000000, 2), 1U);
+  EXPECT_EQ(FastRange32(0xffffffff, 2), 1U);
+
+  // Seven range
+  EXPECT_EQ(FastRange32(0, 7), 0U);
+  EXPECT_EQ(FastRange32(123, 7), 0U);
+  EXPECT_EQ(FastRange32(613566756, 7), 0U);
+  EXPECT_EQ(FastRange32(613566757, 7), 1U);
+  EXPECT_EQ(FastRange32(1227133513, 7), 1U);
+  EXPECT_EQ(FastRange32(1227133514, 7), 2U);
+  // etc.
+  EXPECT_EQ(FastRange32(0xffffffff, 7), 6U);
+
+  // Big
+  EXPECT_EQ(FastRange32(1, 0x80000000), 0U);
+  EXPECT_EQ(FastRange32(2, 0x80000000), 1U);
+  EXPECT_EQ(FastRange32(4, 0x7fffffff), 1U);
+  EXPECT_EQ(FastRange32(4, 0x80000000), 2U);
+  EXPECT_EQ(FastRange32(0xffffffff, 0x7fffffff), 0x7ffffffeU);
+  EXPECT_EQ(FastRange32(0xffffffff, 0x80000000), 0x7fffffffU);
+}
+
+TEST(FastRange64Test, Values) {
+  using ROCKSDB_NAMESPACE::FastRange64;
+  // Zero range
+  EXPECT_EQ(FastRange64(0, 0), 0U);
+  EXPECT_EQ(FastRange64(123, 0), 0U);
+  EXPECT_EQ(FastRange64(0xffffFFFF, 0), 0U);
+  EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 0), 0U);
+
+  // One range
+  EXPECT_EQ(FastRange64(0, 1), 0U);
+  EXPECT_EQ(FastRange64(123, 1), 0U);
+  EXPECT_EQ(FastRange64(0xffffFFFF, 1), 0U);
+  EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 1), 0U);
+
+  // Two range
+  EXPECT_EQ(FastRange64(0, 2), 0U);
+  EXPECT_EQ(FastRange64(123, 2), 0U);
+  EXPECT_EQ(FastRange64(0xffffFFFF, 2), 0U);
+  EXPECT_EQ(FastRange64(0x7fffFFFFffffFFFF, 2), 0U);
+  EXPECT_EQ(FastRange64(0x8000000000000000, 2), 1U);
+  EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 2), 1U);
+
+  // Seven range
+  EXPECT_EQ(FastRange64(0, 7), 0U);
+  EXPECT_EQ(FastRange64(123, 7), 0U);
+  EXPECT_EQ(FastRange64(0xffffFFFF, 7), 0U);
+  EXPECT_EQ(FastRange64(2635249153387078802, 7), 0U);
+  EXPECT_EQ(FastRange64(2635249153387078803, 7), 1U);
+  EXPECT_EQ(FastRange64(5270498306774157604, 7), 1U);
+  EXPECT_EQ(FastRange64(5270498306774157605, 7), 2U);
+  EXPECT_EQ(FastRange64(0x7fffFFFFffffFFFF, 7), 3U);
+  EXPECT_EQ(FastRange64(0x8000000000000000, 7), 3U);
+  EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 7), 6U);
+
+  // Big but 32-bit range
+  EXPECT_EQ(FastRange64(0x100000000, 0x80000000), 0U);
+  EXPECT_EQ(FastRange64(0x200000000, 0x80000000), 1U);
+  EXPECT_EQ(FastRange64(0x400000000, 0x7fffFFFF), 1U);
+  EXPECT_EQ(FastRange64(0x400000000, 0x80000000), 2U);
+  EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 0x7fffFFFF), 0x7fffFFFEU);
+  EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 0x80000000), 0x7fffFFFFU);
+
+  // Big, > 32-bit range
+#if SIZE_MAX == UINT64_MAX
+  EXPECT_EQ(FastRange64(0x7fffFFFFffffFFFF, 0x4200000002), 0x2100000000U);
+  EXPECT_EQ(FastRange64(0x8000000000000000, 0x4200000002), 0x2100000001U);
+
+  EXPECT_EQ(FastRange64(0x0000000000000000, 420000000002), 0U);
+  EXPECT_EQ(FastRange64(0x7fffFFFFffffFFFF, 420000000002), 210000000000U);
+  EXPECT_EQ(FastRange64(0x8000000000000000, 420000000002), 210000000001U);
+  EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 420000000002), 420000000001U);
+
+  EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 0xffffFFFFffffFFFF),
+            0xffffFFFFffffFFFEU);
+#endif
+}
+
+TEST(FastRangeGenericTest, Values) {
+  using ROCKSDB_NAMESPACE::FastRangeGeneric;
+  // Generic (including big and small)
+  // Note that FastRangeGeneric is also tested indirectly above via
+  // FastRange32 and FastRange64.
+  EXPECT_EQ(
+      FastRangeGeneric(uint64_t{0x8000000000000000}, uint64_t{420000000002}),
+      uint64_t{210000000001});
+  EXPECT_EQ(FastRangeGeneric(uint64_t{0x8000000000000000}, uint16_t{12468}),
+            uint16_t{6234});
+  EXPECT_EQ(FastRangeGeneric(uint32_t{0x80000000}, uint16_t{12468}),
+            uint16_t{6234});
+  // Not recommended for typical use because for example this could fail on
+  // some platforms and pass on others:
+  // EXPECT_EQ(FastRangeGeneric(static_cast<unsigned long>(0x80000000),
+  //                           uint16_t{12468}),
+  //          uint16_t{6234});
+}
+
+// for inspection of disassembly
+uint32_t FastRange32(uint32_t hash, uint32_t range) {
+  return ROCKSDB_NAMESPACE::FastRange32(hash, range);
+}
+
+// for inspection of disassembly
+size_t FastRange64(uint64_t hash, size_t range) {
+  return ROCKSDB_NAMESPACE::FastRange64(hash, range);
+}
+
+// Tests for math.h / math128.h (not worth a separate test binary)
+using ROCKSDB_NAMESPACE::BitParity;
+using ROCKSDB_NAMESPACE::BitsSetToOne;
+using ROCKSDB_NAMESPACE::ConstexprFloorLog2;
+using ROCKSDB_NAMESPACE::CountTrailingZeroBits;
+using ROCKSDB_NAMESPACE::DecodeFixed128;
+using ROCKSDB_NAMESPACE::DecodeFixedGeneric;
+using ROCKSDB_NAMESPACE::DownwardInvolution;
+using ROCKSDB_NAMESPACE::EncodeFixed128;
+using ROCKSDB_NAMESPACE::EncodeFixedGeneric;
+using ROCKSDB_NAMESPACE::FloorLog2;
+using ROCKSDB_NAMESPACE::Lower64of128;
+using ROCKSDB_NAMESPACE::Multiply64to128;
+using ROCKSDB_NAMESPACE::Unsigned128;
+using ROCKSDB_NAMESPACE::Upper64of128;
+
+int blah(int x) { return DownwardInvolution(x); }
+
+template <typename T>
+static void test_BitOps() {
+  // This complex code is to generalize to 128-bit values. Otherwise
+  // we could just use = static_cast<T>(0x5555555555555555ULL);
+  T everyOtherBit = 0;
+  for (unsigned i = 0; i < sizeof(T); ++i) {
+    everyOtherBit = (everyOtherBit << 8) | T{0x55};
+  }
+
+  // This one built using bit operations, as our 128-bit layer
+  // might not implement arithmetic such as subtraction.
+  T vm1 = 0;  // "v minus one"
+
+  for (int i = 0; i < int{8 * sizeof(T)}; ++i) {
+    T v = T{1} << i;
+    // If we could directly use arithmetic:
+    // T vm1 = static_cast<T>(v - 1);
+
+    // FloorLog2
+    if (v > 0) {
+      EXPECT_EQ(FloorLog2(v), i);
+      EXPECT_EQ(ConstexprFloorLog2(v), i);
+    }
+    if (vm1 > 0) {
+      EXPECT_EQ(FloorLog2(vm1), i - 1);
+      EXPECT_EQ(ConstexprFloorLog2(vm1), i - 1);
+      EXPECT_EQ(FloorLog2(everyOtherBit & vm1), (i - 1) & ~1);
+      EXPECT_EQ(ConstexprFloorLog2(everyOtherBit & vm1), (i - 1) & ~1);
+    }
+
+    // CountTrailingZeroBits
+    if (v != 0) {
+      EXPECT_EQ(CountTrailingZeroBits(v), i);
+    }
+    if (vm1 != 0) {
+      EXPECT_EQ(CountTrailingZeroBits(vm1), 0);
+    }
+    if (i < int{8 * sizeof(T)} - 1) {
+      EXPECT_EQ(CountTrailingZeroBits(~vm1 & everyOtherBit), (i + 1) & ~1);
+    }
+
+    // BitsSetToOne
+    EXPECT_EQ(BitsSetToOne(v), 1);
+    EXPECT_EQ(BitsSetToOne(vm1), i);
+    EXPECT_EQ(BitsSetToOne(vm1 & everyOtherBit), (i + 1) / 2);
+
+    // BitParity
+    EXPECT_EQ(BitParity(v), 1);
+    EXPECT_EQ(BitParity(vm1), i & 1);
+    EXPECT_EQ(BitParity(vm1 & everyOtherBit), ((i + 1) / 2) & 1);
+
+    // EndianSwapValue
+    T ev = T{1} << (((sizeof(T) - 1 - (i / 8)) * 8) + i % 8);
+    EXPECT_EQ(EndianSwapValue(v), ev);
+
+    // ReverseBits
+    EXPECT_EQ(ReverseBits(v), static_cast<T>(T{1} << (8 * sizeof(T) - 1 - i)));
+#ifdef HAVE_UINT128_EXTENSION          // Uses multiplication
+    if (std::is_unsigned<T>::value) {  // Technical UB on signed type
+      T rv = T{1} << (8 * sizeof(T) - 1 - i);
+      EXPECT_EQ(ReverseBits(vm1), static_cast<T>(rv * ~T{1}));
+    }
+#endif
+
+    // DownwardInvolution
+    {
+      T misc = static_cast<T>(/*random*/ 0xc682cd153d0e3279U +
+                              i * /*random*/ 0x9b3972f3bea0baa3U);
+      if constexpr (sizeof(T) > 8) {
+        misc = (misc << 64) | (/*random*/ 0x52af031a38ced62dU +
+                               i * /*random*/ 0x936f803d9752ddc3U);
+      }
+      T misc_masked = misc & vm1;
+      EXPECT_LE(misc_masked, vm1);
+      T di_misc_masked = DownwardInvolution(misc_masked);
+      EXPECT_LE(di_misc_masked, vm1);
+      if (misc_masked > 0) {
+        // Highest-order 1 in same position
+        EXPECT_EQ(FloorLog2(misc_masked), FloorLog2(di_misc_masked));
+      }
+      // Validate involution property on short value
+      EXPECT_EQ(DownwardInvolution(di_misc_masked), misc_masked);
+
+      // Validate involution property on large value
+      T di_misc = DownwardInvolution(misc);
+      EXPECT_EQ(DownwardInvolution(di_misc), misc);
+      // Highest-order 1 in same position
+      if (misc > 0) {
+        EXPECT_EQ(FloorLog2(misc), FloorLog2(di_misc));
+      }
+
+      // Validate distributes over xor.
+      // static_casts to avoid numerical promotion effects.
+      EXPECT_EQ(DownwardInvolution(static_cast<T>(misc_masked ^ vm1)),
+                static_cast<T>(di_misc_masked ^ DownwardInvolution(vm1)));
+      T misc2 = static_cast<T>(misc >> 1);
+      EXPECT_EQ(DownwardInvolution(static_cast<T>(misc ^ misc2)),
+                static_cast<T>(di_misc ^ DownwardInvolution(misc2)));
+
+      // Choose some small number of bits to pull off to test combined
+      // uniqueness guarantee
+      int in_bits = i % 7;
+      unsigned in_mask = (unsigned{1} << in_bits) - 1U;
+      // IMPLICIT: int out_bits = 8 - in_bits;
+      std::vector<bool> seen(256, false);
+      for (int j = 0; j < 255; ++j) {
+        T t_in = misc ^ static_cast<T>(j);
+        unsigned in = static_cast<unsigned>(t_in);
+        unsigned out = static_cast<unsigned>(DownwardInvolution(t_in));
+        unsigned val = ((out << in_bits) | (in & in_mask)) & 255U;
+        EXPECT_FALSE(seen[val]);
+        seen[val] = true;
+      }
+
+      if (i + 8 < int{8 * sizeof(T)}) {
+        // Also test manipulating bits in the middle of input is
+        // bijective in bottom of output
+        seen = std::vector<bool>(256, false);
+        for (int j = 0; j < 255; ++j) {
+          T in = misc ^ (static_cast<T>(j) << i);
+          unsigned val = static_cast<unsigned>(DownwardInvolution(in)) & 255U;
+          EXPECT_FALSE(seen[val]);
+          seen[val] = true;
+        }
+      }
+    }
+
+    vm1 = (vm1 << 1) | 1;
+  }
+
+  EXPECT_EQ(ConstexprFloorLog2(T{1}), 0);
+  EXPECT_EQ(ConstexprFloorLog2(T{2}), 1);
+  EXPECT_EQ(ConstexprFloorLog2(T{3}), 1);
+  EXPECT_EQ(ConstexprFloorLog2(T{42}), 5);
+}
+
+TEST(MathTest, BitOps) {
+  test_BitOps<uint32_t>();
+  test_BitOps<uint64_t>();
+  test_BitOps<uint16_t>();
+  test_BitOps<uint8_t>();
+  test_BitOps<unsigned char>();
+  test_BitOps<unsigned short>();
+  test_BitOps<unsigned int>();
+  test_BitOps<unsigned long>();
+  test_BitOps<unsigned long long>();
+  test_BitOps<char>();
+  test_BitOps<size_t>();
+  test_BitOps<int32_t>();
+  test_BitOps<int64_t>();
+  test_BitOps<int16_t>();
+  test_BitOps<int8_t>();
+  test_BitOps<signed char>();
+  test_BitOps<short>();
+  test_BitOps<int>();
+  test_BitOps<long>();
+  test_BitOps<long long>();
+  test_BitOps<ptrdiff_t>();
+}
+
+TEST(MathTest, BitOps128) { test_BitOps<Unsigned128>(); }
+
+TEST(MathTest, Math128) {
+  const Unsigned128 sixteenHexOnes = 0x1111111111111111U;
+  const Unsigned128 thirtyHexOnes = (sixteenHexOnes << 56) | sixteenHexOnes;
+  const Unsigned128 sixteenHexTwos = 0x2222222222222222U;
+  const Unsigned128 thirtyHexTwos = (sixteenHexTwos << 56) | sixteenHexTwos;
+
+  // v will slide from all hex ones to all hex twos
+  Unsigned128 v = thirtyHexOnes;
+  for (int i = 0; i <= 30; ++i) {
+    // Test bitwise operations
+    EXPECT_EQ(BitsSetToOne(v), 30);
+    EXPECT_EQ(BitsSetToOne(~v), 128 - 30);
+    EXPECT_EQ(BitsSetToOne(v & thirtyHexOnes), 30 - i);
+    EXPECT_EQ(BitsSetToOne(v | thirtyHexOnes), 30 + i);
+    EXPECT_EQ(BitsSetToOne(v ^ thirtyHexOnes), 2 * i);
+    EXPECT_EQ(BitsSetToOne(v & thirtyHexTwos), i);
+    EXPECT_EQ(BitsSetToOne(v | thirtyHexTwos), 60 - i);
+    EXPECT_EQ(BitsSetToOne(v ^ thirtyHexTwos), 60 - 2 * i);
+
+    // Test comparisons
+    EXPECT_EQ(v == thirtyHexOnes, i == 0);
+    EXPECT_EQ(v == thirtyHexTwos, i == 30);
+    EXPECT_EQ(v > thirtyHexOnes, i > 0);
+    EXPECT_EQ(v > thirtyHexTwos, false);
+    EXPECT_EQ(v >= thirtyHexOnes, true);
+    EXPECT_EQ(v >= thirtyHexTwos, i == 30);
+    EXPECT_EQ(v < thirtyHexOnes, false);
+    EXPECT_EQ(v < thirtyHexTwos, i < 30);
+    EXPECT_EQ(v <= thirtyHexOnes, i == 0);
+    EXPECT_EQ(v <= thirtyHexTwos, true);
+
+    // Update v, clearing upper-most byte
+    v = ((v << 12) >> 8) | 0x2;
+  }
+
+  for (int i = 0; i < 128; ++i) {
+    // Test shifts
+    Unsigned128 sl = thirtyHexOnes << i;
+    Unsigned128 sr = thirtyHexOnes >> i;
+    EXPECT_EQ(BitsSetToOne(sl), std::min(30, 32 - i / 4));
+    EXPECT_EQ(BitsSetToOne(sr), std::max(0, 30 - (i + 3) / 4));
+    EXPECT_EQ(BitsSetToOne(sl & sr), i % 2 ? 0 : std::max(0, 30 - i / 2));
+  }
+
+  // Test 64x64->128 multiply
+  Unsigned128 product =
+      Multiply64to128(0x1111111111111111U, 0x2222222222222222U);
+  EXPECT_EQ(Lower64of128(product), 2295594818061633090U);
+  EXPECT_EQ(Upper64of128(product), 163971058432973792U);
+}
+
+TEST(MathTest, Coding128) {
+  const char *in = "_1234567890123456";
+  // Note: in + 1 is likely unaligned
+  Unsigned128 decoded = DecodeFixed128(in + 1);
+  EXPECT_EQ(Lower64of128(decoded), 0x3837363534333231U);
+  EXPECT_EQ(Upper64of128(decoded), 0x3635343332313039U);
+  char out[18];
+  out[0] = '_';
+  EncodeFixed128(out + 1, decoded);
+  out[17] = '\0';
+  EXPECT_EQ(std::string(in), std::string(out));
+}
+
+TEST(MathTest, CodingGeneric) {
+  const char *in = "_1234567890123456";
+  // Decode
+  // Note: in + 1 is likely unaligned
+  Unsigned128 decoded128 = DecodeFixedGeneric<Unsigned128>(in + 1);
+  EXPECT_EQ(Lower64of128(decoded128), 0x3837363534333231U);
+  EXPECT_EQ(Upper64of128(decoded128), 0x3635343332313039U);
+
+  uint64_t decoded64 = DecodeFixedGeneric<uint64_t>(in + 1);
+  EXPECT_EQ(decoded64, 0x3837363534333231U);
+
+  uint32_t decoded32 = DecodeFixedGeneric<uint32_t>(in + 1);
+  EXPECT_EQ(decoded32, 0x34333231U);
+
+  uint16_t decoded16 = DecodeFixedGeneric<uint16_t>(in + 1);
+  EXPECT_EQ(decoded16, 0x3231U);
+
+  // Encode
+  char out[18];
+  out[0] = '_';
+  memset(out + 1, '\0', 17);
+  EncodeFixedGeneric(out + 1, decoded128);
+  EXPECT_EQ(std::string(in), std::string(out));
+
+  memset(out + 1, '\0', 9);
+  EncodeFixedGeneric(out + 1, decoded64);
+  EXPECT_EQ(std::string("_12345678"), std::string(out));
+
+  memset(out + 1, '\0', 5);
+  EncodeFixedGeneric(out + 1, decoded32);
+  EXPECT_EQ(std::string("_1234"), std::string(out));
+
+  memset(out + 1, '\0', 3);
+  EncodeFixedGeneric(out + 1, decoded16);
+  EXPECT_EQ(std::string("_12"), std::string(out));
+}
+
+int main(int argc, char **argv) {
+  fprintf(stderr, "NPHash64 id: %x\n",
+          static_cast<int>(ROCKSDB_NAMESPACE::GetSliceNPHash64("RocksDB")));
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+
+  return RUN_ALL_TESTS();
+}
diff --git a/src/rocksdb/util/heap.h b/src/rocksdb/util/heap.h
new file mode 100644
index 000000000..f221fc732
--- /dev/null
+++ b/src/rocksdb/util/heap.h
@@ -0,0 +1,174 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <functional>
+
+#include "port/port.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Binary heap implementation optimized for use in multi-way merge sort.
+// Comparison to std::priority_queue:
+// - In libstdc++, std::priority_queue::pop() usually performs just over logN
+//   comparisons but never fewer.
+// - std::priority_queue does not have a replace-top operation, requiring a
+//   pop+push.  If the replacement element is the new top, this requires
+//   around 2logN comparisons.
+// - This heap's pop() uses a "schoolbook" downheap which requires up to ~2logN
+//   comparisons.
+// - This heap provides a replace_top() operation which requires [1, 2logN]
+//   comparisons.  When the replacement element is also the new top, this
+//   takes just 1 or 2 comparisons.
+//
+// The last property can yield an order-of-magnitude performance improvement
+// when merge-sorting real-world non-random data.  If the merge operation is
+// likely to take chunks of elements from the same input stream, only 1
+// comparison per element is needed.  In RocksDB-land, this happens when
+// compacting a database where keys are not randomly distributed across L0
+// files but nearby keys are likely to be in the same L0 file.
+//
+// The container uses the same counterintuitive ordering as
+// std::priority_queue: the comparison operator is expected to provide the
+// less-than relation, but top() will return the maximum.
+
+template <typename T, typename Compare = std::less<T>>
+class BinaryHeap {
+ public:
+  BinaryHeap() {}
+  explicit BinaryHeap(Compare cmp) : cmp_(std::move(cmp)) {}
+
+  void push(const T& value) {
+    data_.push_back(value);
+    upheap(data_.size() - 1);
+  }
+
+  void push(T&& value) {
+    data_.push_back(std::move(value));
+    upheap(data_.size() - 1);
+  }
+
+  const T& top() const {
+    assert(!empty());
+    return data_.front();
+  }
+
+  void replace_top(const T& value) {
+    assert(!empty());
+    data_.front() = value;
+    downheap(get_root());
+  }
+
+  void replace_top(T&& value) {
+    assert(!empty());
+    data_.front() = std::move(value);
+    downheap(get_root());
+  }
+
+  void pop() {
+    assert(!empty());
+    if (data_.size() > 1) {
+      // Avoid self-move-assign, because it could cause problems with
+      // classes which are not prepared for this and it trips up the
+      // STL debugger when activated.
+      data_.front() = std::move(data_.back());
+    }
+    data_.pop_back();
+    if (!empty()) {
+      downheap(get_root());
+    } else {
+      reset_root_cmp_cache();
+    }
+  }
+
+  void swap(BinaryHeap& other) {
+    std::swap(cmp_, other.cmp_);
+    data_.swap(other.data_);
+    std::swap(root_cmp_cache_, other.root_cmp_cache_);
+  }
+
+  void clear() {
+    data_.clear();
+    reset_root_cmp_cache();
+  }
+
+  bool empty() const { return data_.empty(); }
+
+  size_t size() const { return data_.size(); }
+
+  void reset_root_cmp_cache() {
+    root_cmp_cache_ = std::numeric_limits<size_t>::max();
+  }
+
+ private:
+  static inline size_t get_root() { return 0; }
+  static inline size_t get_parent(size_t index) { return (index - 1) / 2; }
+  static inline size_t get_left(size_t index) { return 2 * index + 1; }
+  static inline size_t get_right(size_t index) { return 2 * index + 2; }
+
+  void upheap(size_t index) {
+    T v = std::move(data_[index]);
+    while (index > get_root()) {
+      const size_t parent = get_parent(index);
+      if (!cmp_(data_[parent], v)) {
+        break;
+      }
+      data_[index] = std::move(data_[parent]);
+      index = parent;
+    }
+    data_[index] = std::move(v);
+    reset_root_cmp_cache();
+  }
+
+  void downheap(size_t index) {
+    T v = std::move(data_[index]);
+
+    size_t picked_child = std::numeric_limits<size_t>::max();
+    while (1) {
+      const size_t left_child = get_left(index);
+      if (get_left(index) >= data_.size()) {
+        break;
+      }
+      const size_t right_child = left_child + 1;
+      assert(right_child == get_right(index));
+      picked_child = left_child;
+      if (index == 0 && root_cmp_cache_ < data_.size()) {
+        picked_child = root_cmp_cache_;
+      } else if (right_child < data_.size() &&
+                 cmp_(data_[left_child], data_[right_child])) {
+        picked_child = right_child;
+      }
+      if (!cmp_(v, data_[picked_child])) {
+        break;
+      }
+      data_[index] = std::move(data_[picked_child]);
+      index = picked_child;
+    }
+
+    if (index == 0) {
+      // We did not change anything in the tree except for the value
+      // of the root node, left and right child did not change, we can
+      // cache that `picked_child` is the smallest child
+      // so next time we compare againist it directly
+      root_cmp_cache_ = picked_child;
+    } else {
+      // the tree changed, reset cache
+      reset_root_cmp_cache();
+    }
+
+    data_[index] = std::move(v);
+  }
+
+  Compare cmp_;
+  autovector<T> data_;
+  // Used to reduce number of cmp_ calls in downheap()
+  size_t root_cmp_cache_ = std::numeric_limits<size_t>::max();
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/heap_test.cc b/src/rocksdb/util/heap_test.cc
new file mode 100644
index 000000000..bbb93324f
--- /dev/null
+++ b/src/rocksdb/util/heap_test.cc
@@ -0,0 +1,131 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "util/heap.h"
+
+#include <gtest/gtest.h>
+
+#include <climits>
+#include <queue>
+#include <random>
+#include <utility>
+
+#include "port/stack_trace.h"
+
+#ifndef GFLAGS
+const int64_t FLAGS_iters = 100000;
+#else
+#include "util/gflags_compat.h"
+DEFINE_int64(iters, 100000, "number of pseudo-random operations in each test");
+#endif  // GFLAGS
+
+/*
+ * Compares the custom heap implementation in util/heap.h against
+ * std::priority_queue on a pseudo-random sequence of operations.
+ */
+
+namespace ROCKSDB_NAMESPACE {
+
+using HeapTestValue = uint64_t;
+using Params = std::tuple<size_t, HeapTestValue, int64_t>;
+
+class HeapTest : public ::testing::TestWithParam<Params> {};
+
+TEST_P(HeapTest, Test) {
+  // This test performs the same pseudorandom sequence of operations on a
+  // BinaryHeap and an std::priority_queue, comparing output.  The three
+  // possible operations are insert, replace top and pop.
+  //
+  // Insert is chosen slightly more often than the others so that the size of
+  // the heap slowly grows.  Once the size heats the MAX_HEAP_SIZE limit, we
+  // disallow inserting until the heap becomes empty, testing the "draining"
+  // scenario.
+
+  const auto MAX_HEAP_SIZE = std::get<0>(GetParam());
+  const auto MAX_VALUE = std::get<1>(GetParam());
+  const auto RNG_SEED = std::get<2>(GetParam());
+
+  BinaryHeap<HeapTestValue> heap;
+  std::priority_queue<HeapTestValue> ref;
+
+  std::mt19937 rng(static_cast<unsigned int>(RNG_SEED));
+  std::uniform_int_distribution<HeapTestValue> value_dist(0, MAX_VALUE);
+  int ndrains = 0;
+  bool draining = false;  // hit max size, draining until we empty the heap
+  size_t size = 0;
+  for (int64_t i = 0; i < FLAGS_iters; ++i) {
+    if (size == 0) {
+      draining = false;
+    }
+
+    if (!draining && (size == 0 || std::bernoulli_distribution(0.4)(rng))) {
+      // insert
+      HeapTestValue val = value_dist(rng);
+      heap.push(val);
+      ref.push(val);
+      ++size;
+      if (size == MAX_HEAP_SIZE) {
+        draining = true;
+        ++ndrains;
+      }
+    } else if (std::bernoulli_distribution(0.5)(rng)) {
+      // replace top
+      HeapTestValue val = value_dist(rng);
+      heap.replace_top(val);
+      ref.pop();
+      ref.push(val);
+    } else {
+      // pop
+      assert(size > 0);
+      heap.pop();
+      ref.pop();
+      --size;
+    }
+
+    // After every operation, check that the public methods give the same
+    // results
+    assert((size == 0) == ref.empty());
+    ASSERT_EQ(size == 0, heap.empty());
+    if (size > 0) {
+      ASSERT_EQ(ref.top(), heap.top());
+    }
+  }
+
+  // Probabilities should be set up to occasionally hit the max heap size and
+  // drain it
+  assert(ndrains > 0);
+
+  heap.clear();
+  ASSERT_TRUE(heap.empty());
+}
+
+// Basic test, MAX_VALUE = 3*MAX_HEAP_SIZE (occasional duplicates)
+INSTANTIATE_TEST_CASE_P(Basic, HeapTest,
+                        ::testing::Values(Params(1000, 3000,
+                                                 0x1b575cf05b708945)));
+// Mid-size heap with small values (many duplicates)
+INSTANTIATE_TEST_CASE_P(SmallValues, HeapTest,
+                        ::testing::Values(Params(100, 10, 0x5ae213f7bd5dccd0)));
+// Small heap, large value range (no duplicates)
+INSTANTIATE_TEST_CASE_P(SmallHeap, HeapTest,
+                        ::testing::Values(Params(10, ULLONG_MAX,
+                                                 0x3e1fa8f4d01707cf)));
+// Two-element heap
+INSTANTIATE_TEST_CASE_P(TwoElementHeap, HeapTest,
+                        ::testing::Values(Params(2, 5, 0x4b5e13ea988c6abc)));
+// One-element heap
+INSTANTIATE_TEST_CASE_P(OneElementHeap, HeapTest,
+                        ::testing::Values(Params(1, 3, 0x176a1019ab0b612e)));
+
+}  // namespace ROCKSDB_NAMESPACE
+
+int main(int argc, char** argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+#ifdef GFLAGS
+  GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
+#endif  // GFLAGS
+  return RUN_ALL_TESTS();
+}
diff --git a/src/rocksdb/util/kv_map.h b/src/rocksdb/util/kv_map.h
new file mode 100644
index 000000000..62be6d18e
--- /dev/null
+++ b/src/rocksdb/util/kv_map.h
@@ -0,0 +1,33 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include <map>
+#include <string>
+
+#include "rocksdb/comparator.h"
+#include "rocksdb/slice.h"
+#include "util/coding.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace stl_wrappers {
+
+struct LessOfComparator {
+  explicit LessOfComparator(const Comparator* c = BytewiseComparator())
+      : cmp(c) {}
+
+  bool operator()(const std::string& a, const std::string& b) const {
+    return cmp->Compare(Slice(a), Slice(b)) < 0;
+  }
+  bool operator()(const Slice& a, const Slice& b) const {
+    return cmp->Compare(a, b) < 0;
+  }
+
+  const Comparator* cmp;
+};
+
+using KVMap = std::map<std::string, std::string, LessOfComparator>;
+}  // namespace stl_wrappers
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/log_write_bench.cc b/src/rocksdb/util/log_write_bench.cc
new file mode 100644
index 000000000..c1637db15
--- /dev/null
+++ b/src/rocksdb/util/log_write_bench.cc
@@ -0,0 +1,88 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#ifndef GFLAGS
+#include <cstdio>
+int main() {
+  fprintf(stderr, "Please install gflags to run rocksdb tools\n");
+  return 1;
+}
+#else
+
+#include "file/writable_file_writer.h"
+#include "monitoring/histogram.h"
+#include "rocksdb/env.h"
+#include "rocksdb/system_clock.h"
+#include "test_util/testharness.h"
+#include "test_util/testutil.h"
+#include "util/gflags_compat.h"
+
+using GFLAGS_NAMESPACE::ParseCommandLineFlags;
+using GFLAGS_NAMESPACE::SetUsageMessage;
+
+// A simple benchmark to simulate transactional logs
+
+DEFINE_int32(num_records, 6000, "Number of records.");
+DEFINE_int32(record_size, 249, "Size of each record.");
+DEFINE_int32(record_interval, 10000, "Interval between records (microSec)");
+DEFINE_int32(bytes_per_sync, 0, "bytes_per_sync parameter in EnvOptions");
+DEFINE_bool(enable_sync, false, "sync after each write.");
+
+namespace ROCKSDB_NAMESPACE {
+void RunBenchmark() {
+  std::string file_name = test::PerThreadDBPath("log_write_benchmark.log");
+  DBOptions options;
+  Env* env = Env::Default();
+  const auto& clock = env->GetSystemClock();
+  EnvOptions env_options = env->OptimizeForLogWrite(EnvOptions(), options);
+  env_options.bytes_per_sync = FLAGS_bytes_per_sync;
+  std::unique_ptr<WritableFile> file;
+  env->NewWritableFile(file_name, &file, env_options);
+  std::unique_ptr<WritableFileWriter> writer;
+  writer.reset(new WritableFileWriter(std::move(file), file_name, env_options,
+                                      clock, nullptr /* stats */,
+                                      options.listeners));
+
+  std::string record;
+  record.assign(FLAGS_record_size, 'X');
+
+  HistogramImpl hist;
+
+  uint64_t start_time = clock->NowMicros();
+  for (int i = 0; i < FLAGS_num_records; i++) {
+    uint64_t start_nanos = clock->NowNanos();
+    writer->Append(record);
+    writer->Flush();
+    if (FLAGS_enable_sync) {
+      writer->Sync(false);
+    }
+    hist.Add(clock->NowNanos() - start_nanos);
+
+    if (i % 1000 == 1) {
+      fprintf(stderr, "Wrote %d records...\n", i);
+    }
+
+    int time_to_sleep =
+        (i + 1) * FLAGS_record_interval - (clock->NowMicros() - start_time);
+    if (time_to_sleep > 0) {
+      clock->SleepForMicroseconds(time_to_sleep);
+    }
+  }
+
+  fprintf(stderr, "Distribution of latency of append+flush: \n%s",
+          hist.ToString().c_str());
+}
+}  // namespace ROCKSDB_NAMESPACE
+
+int main(int argc, char** argv) {
+  SetUsageMessage(std::string("\nUSAGE:\n") + std::string(argv[0]) +
+                  " [OPTIONS]...");
+  ParseCommandLineFlags(&argc, &argv, true);
+
+  ROCKSDB_NAMESPACE::RunBenchmark();
+  return 0;
+}
+
+#endif  // GFLAGS
diff --git a/src/rocksdb/util/math.h b/src/rocksdb/util/math.h
new file mode 100644
index 000000000..da31b43ec
--- /dev/null
+++ b/src/rocksdb/util/math.h
@@ -0,0 +1,294 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <assert.h>
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+#include <cstdint>
+#include <type_traits>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Fast implementation of floor(log2(v)). Undefined for 0 or negative
+// numbers (in case of signed type).
+template <typename T>
+inline int FloorLog2(T v) {
+  static_assert(std::is_integral<T>::value, "non-integral type");
+  assert(v > 0);
+#ifdef _MSC_VER
+  static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
+  unsigned long idx = 0;
+  if (sizeof(T) <= sizeof(uint32_t)) {
+    _BitScanReverse(&idx, static_cast<uint32_t>(v));
+  } else {
+#if defined(_M_X64) || defined(_M_ARM64)
+    _BitScanReverse64(&idx, static_cast<uint64_t>(v));
+#else
+    const auto vh = static_cast<uint32_t>(static_cast<uint64_t>(v) >> 32);
+    if (vh != 0) {
+      _BitScanReverse(&idx, static_cast<uint32_t>(vh));
+      idx += 32;
+    } else {
+      _BitScanReverse(&idx, static_cast<uint32_t>(v));
+    }
+#endif
+  }
+  return idx;
+#else
+  static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
+  if (sizeof(T) <= sizeof(unsigned int)) {
+    int lz = __builtin_clz(static_cast<unsigned int>(v));
+    return int{sizeof(unsigned int)} * 8 - 1 - lz;
+  } else if (sizeof(T) <= sizeof(unsigned long)) {
+    int lz = __builtin_clzl(static_cast<unsigned long>(v));
+    return int{sizeof(unsigned long)} * 8 - 1 - lz;
+  } else {
+    int lz = __builtin_clzll(static_cast<unsigned long long>(v));
+    return int{sizeof(unsigned long long)} * 8 - 1 - lz;
+  }
+#endif
+}
+
+// Constexpr version of FloorLog2
+template <typename T>
+constexpr int ConstexprFloorLog2(T v) {
+  int rv = 0;
+  while (v > T{1}) {
+    ++rv;
+    v >>= 1;
+  }
+  return rv;
+}
+
+// Number of low-order zero bits before the first 1 bit. Undefined for 0.
+template <typename T>
+inline int CountTrailingZeroBits(T v) {
+  static_assert(std::is_integral<T>::value, "non-integral type");
+  assert(v != 0);
+#ifdef _MSC_VER
+  static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
+  unsigned long tz = 0;
+  if (sizeof(T) <= sizeof(uint32_t)) {
+    _BitScanForward(&tz, static_cast<uint32_t>(v));
+  } else {
+#if defined(_M_X64) || defined(_M_ARM64)
+    _BitScanForward64(&tz, static_cast<uint64_t>(v));
+#else
+    _BitScanForward(&tz, static_cast<uint32_t>(v));
+    if (tz == 0) {
+      _BitScanForward(&tz,
+                      static_cast<uint32_t>(static_cast<uint64_t>(v) >> 32));
+      tz += 32;
+    }
+#endif
+  }
+  return static_cast<int>(tz);
+#else
+  static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
+  if (sizeof(T) <= sizeof(unsigned int)) {
+    return __builtin_ctz(static_cast<unsigned int>(v));
+  } else if (sizeof(T) <= sizeof(unsigned long)) {
+    return __builtin_ctzl(static_cast<unsigned long>(v));
+  } else {
+    return __builtin_ctzll(static_cast<unsigned long long>(v));
+  }
+#endif
+}
+
+// Not all MSVC compile settings will use `BitsSetToOneFallback()`. We include
+// the following code at coarse granularity for simpler macros. It's important
+// to exclude at least so our non-MSVC unit test coverage tool doesn't see it.
+#ifdef _MSC_VER
+
+namespace detail {
+
+template <typename T>
+int BitsSetToOneFallback(T v) {
+  const int kBits = static_cast<int>(sizeof(T)) * 8;
+  static_assert((kBits & (kBits - 1)) == 0, "must be power of two bits");
+  // we static_cast these bit patterns in order to truncate them to the correct
+  // size. Warning C4309 dislikes this technique, so disable it here.
+#pragma warning(disable : 4309)
+  v = static_cast<T>(v - ((v >> 1) & static_cast<T>(0x5555555555555555ull)));
+  v = static_cast<T>((v & static_cast<T>(0x3333333333333333ull)) +
+                     ((v >> 2) & static_cast<T>(0x3333333333333333ull)));
+  v = static_cast<T>((v + (v >> 4)) & static_cast<T>(0x0F0F0F0F0F0F0F0Full));
+#pragma warning(default : 4309)
+  for (int shift_bits = 8; shift_bits < kBits; shift_bits <<= 1) {
+    v += static_cast<T>(v >> shift_bits);
+  }
+  // we want the bottom "slot" that's big enough to represent a value up to
+  // (and including) kBits.
+  return static_cast<int>(v & static_cast<T>(kBits | (kBits - 1)));
+}
+
+}  // namespace detail
+
+#endif  // _MSC_VER
+
+// Number of bits set to 1. Also known as "population count".
+template <typename T>
+inline int BitsSetToOne(T v) {
+  static_assert(std::is_integral<T>::value, "non-integral type");
+#ifdef _MSC_VER
+  static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
+  if (sizeof(T) < sizeof(uint32_t)) {
+    // This bit mask is to avoid a compiler warning on unused path
+    constexpr auto mm = 8 * sizeof(uint32_t) - 1;
+    // The bit mask is to neutralize sign extension on small signed types
+    constexpr uint32_t m = (uint32_t{1} << ((8 * sizeof(T)) & mm)) - 1;
+#if defined(HAVE_SSE42) && (defined(_M_X64) || defined(_M_IX86))
+    return static_cast<int>(__popcnt(static_cast<uint32_t>(v) & m));
+#else
+    return static_cast<int>(detail::BitsSetToOneFallback(v) & m);
+#endif
+  } else if (sizeof(T) == sizeof(uint32_t)) {
+#if defined(HAVE_SSE42) && (defined(_M_X64) || defined(_M_IX86))
+    return static_cast<int>(__popcnt(static_cast<uint32_t>(v)));
+#else
+    return detail::BitsSetToOneFallback(static_cast<uint32_t>(v));
+#endif
+  } else {
+#if defined(HAVE_SSE42) && defined(_M_X64)
+    return static_cast<int>(__popcnt64(static_cast<uint64_t>(v)));
+#elif defined(HAVE_SSE42) && defined(_M_IX86)
+    return static_cast<int>(
+        __popcnt(static_cast<uint32_t>(static_cast<uint64_t>(v) >> 32) +
+                 __popcnt(static_cast<uint32_t>(v))));
+#else
+    return detail::BitsSetToOneFallback(static_cast<uint64_t>(v));
+#endif
+  }
+#else
+  static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
+  if (sizeof(T) < sizeof(unsigned int)) {
+    // This bit mask is to avoid a compiler warning on unused path
+    constexpr auto mm = 8 * sizeof(unsigned int) - 1;
+    // This bit mask is to neutralize sign extension on small signed types
+    constexpr unsigned int m = (1U << ((8 * sizeof(T)) & mm)) - 1;
+    return __builtin_popcount(static_cast<unsigned int>(v) & m);
+  } else if (sizeof(T) == sizeof(unsigned int)) {
+    return __builtin_popcount(static_cast<unsigned int>(v));
+  } else if (sizeof(T) <= sizeof(unsigned long)) {
+    return __builtin_popcountl(static_cast<unsigned long>(v));
+  } else {
+    return __builtin_popcountll(static_cast<unsigned long long>(v));
+  }
+#endif
+}
+
+template <typename T>
+inline int BitParity(T v) {
+  static_assert(std::is_integral<T>::value, "non-integral type");
+#ifdef _MSC_VER
+  // bit parity == oddness of popcount
+  return BitsSetToOne(v) & 1;
+#else
+  static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
+  if (sizeof(T) <= sizeof(unsigned int)) {
+    // On any sane systen, potential sign extension here won't change parity
+    return __builtin_parity(static_cast<unsigned int>(v));
+  } else if (sizeof(T) <= sizeof(unsigned long)) {
+    return __builtin_parityl(static_cast<unsigned long>(v));
+  } else {
+    return __builtin_parityll(static_cast<unsigned long long>(v));
+  }
+#endif
+}
+
+// Swaps between big and little endian. Can be used in combination with the
+// little-endian encoding/decoding functions in coding_lean.h and coding.h to
+// encode/decode big endian.
+template <typename T>
+inline T EndianSwapValue(T v) {
+  static_assert(std::is_integral<T>::value, "non-integral type");
+
+#ifdef _MSC_VER
+  if (sizeof(T) == 2) {
+    return static_cast<T>(_byteswap_ushort(static_cast<uint16_t>(v)));
+  } else if (sizeof(T) == 4) {
+    return static_cast<T>(_byteswap_ulong(static_cast<uint32_t>(v)));
+  } else if (sizeof(T) == 8) {
+    return static_cast<T>(_byteswap_uint64(static_cast<uint64_t>(v)));
+  }
+#else
+  if (sizeof(T) == 2) {
+    return static_cast<T>(__builtin_bswap16(static_cast<uint16_t>(v)));
+  } else if (sizeof(T) == 4) {
+    return static_cast<T>(__builtin_bswap32(static_cast<uint32_t>(v)));
+  } else if (sizeof(T) == 8) {
+    return static_cast<T>(__builtin_bswap64(static_cast<uint64_t>(v)));
+  }
+#endif
+  // Recognized by clang as bswap, but not by gcc :(
+  T ret_val = 0;
+  for (std::size_t i = 0; i < sizeof(T); ++i) {
+    ret_val |= ((v >> (8 * i)) & 0xff) << (8 * (sizeof(T) - 1 - i));
+  }
+  return ret_val;
+}
+
+// Reverses the order of bits in an integral value
+template <typename T>
+inline T ReverseBits(T v) {
+  T r = EndianSwapValue(v);
+  const T kHighestByte = T{1} << ((sizeof(T) - 1) * 8);
+  const T kEveryByte = kHighestByte | (kHighestByte / 255);
+
+  r = ((r & (kEveryByte * 0x0f)) << 4) | ((r >> 4) & (kEveryByte * 0x0f));
+  r = ((r & (kEveryByte * 0x33)) << 2) | ((r >> 2) & (kEveryByte * 0x33));
+  r = ((r & (kEveryByte * 0x55)) << 1) | ((r >> 1) & (kEveryByte * 0x55));
+
+  return r;
+}
+
+// Every output bit depends on many input bits in the same and higher
+// positions, but not lower positions. Specifically, this function
+// * Output highest bit set to 1 is same as input (same FloorLog2, or
+//   equivalently, same number of leading zeros)
+// * Is its own inverse (an involution)
+// * Guarantees that b bottom bits of v and c bottom bits of
+//   DownwardInvolution(v) uniquely identify b + c bottom bits of v
+//   (which is all of v if v < 2**(b + c)).
+// ** A notable special case is that modifying c adjacent bits at
+//    some chosen position in the input is bijective with the bottom c
+//    output bits.
+// * Distributes over xor, as in DI(a ^ b) == DI(a) ^ DI(b)
+//
+// This transformation is equivalent to a matrix*vector multiplication in
+// GF(2) where the matrix is recursively defined by the pattern matrix
+// P = | 1 1 |
+//     | 0 1 |
+// and replacing 1's with P and 0's with 2x2 zero matices to some depth,
+// e.g. depth of 6 for 64-bit T. An essential feature of this matrix
+// is that all square sub-matrices that include the top row are invertible.
+template <typename T>
+inline T DownwardInvolution(T v) {
+  static_assert(std::is_integral<T>::value, "non-integral type");
+  static_assert(sizeof(T) <= 8, "only supported up to 64 bits");
+
+  uint64_t r = static_cast<uint64_t>(v);
+  if constexpr (sizeof(T) > 4) {
+    r ^= r >> 32;
+  }
+  if constexpr (sizeof(T) > 2) {
+    r ^= (r & 0xffff0000ffff0000U) >> 16;
+  }
+  if constexpr (sizeof(T) > 1) {
+    r ^= (r & 0xff00ff00ff00ff00U) >> 8;
+  }
+  r ^= (r & 0xf0f0f0f0f0f0f0f0U) >> 4;
+  r ^= (r & 0xccccccccccccccccU) >> 2;
+  r ^= (r & 0xaaaaaaaaaaaaaaaaU) >> 1;
+  return static_cast<T>(r);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/math128.h b/src/rocksdb/util/math128.h
new file mode 100644
index 000000000..ae490051a
--- /dev/null
+++ b/src/rocksdb/util/math128.h
@@ -0,0 +1,316 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "util/coding_lean.h"
+#include "util/math.h"
+
+#ifdef TEST_UINT128_COMPAT
+#undef HAVE_UINT128_EXTENSION
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+// Unsigned128 is a 128 bit value supporting (at least) bitwise operators,
+// shifts, and comparisons. __uint128_t is not always available.
+
+#ifdef HAVE_UINT128_EXTENSION
+using Unsigned128 = __uint128_t;
+#else
+struct Unsigned128 {
+  uint64_t lo;
+  uint64_t hi;
+
+  inline Unsigned128() {
+    static_assert(sizeof(Unsigned128) == 2 * sizeof(uint64_t),
+                  "unexpected overhead in representation");
+    lo = 0;
+    hi = 0;
+  }
+
+  inline Unsigned128(uint64_t lower) {
+    lo = lower;
+    hi = 0;
+  }
+
+  inline Unsigned128(uint64_t lower, uint64_t upper) {
+    lo = lower;
+    hi = upper;
+  }
+
+  explicit operator uint64_t() { return lo; }
+
+  explicit operator uint32_t() { return static_cast<uint32_t>(lo); }
+
+  explicit operator uint16_t() { return static_cast<uint16_t>(lo); }
+
+  explicit operator uint8_t() { return static_cast<uint8_t>(lo); }
+};
+
+inline Unsigned128 operator<<(const Unsigned128& lhs, unsigned shift) {
+  shift &= 127;
+  Unsigned128 rv;
+  if (shift >= 64) {
+    rv.lo = 0;
+    rv.hi = lhs.lo << (shift & 63);
+  } else {
+    uint64_t tmp = lhs.lo;
+    rv.lo = tmp << shift;
+    // Ensure shift==0 shifts away everything. (This avoids another
+    // conditional branch on shift == 0.)
+    tmp = tmp >> 1 >> (63 - shift);
+    rv.hi = tmp | (lhs.hi << shift);
+  }
+  return rv;
+}
+
+inline Unsigned128& operator<<=(Unsigned128& lhs, unsigned shift) {
+  lhs = lhs << shift;
+  return lhs;
+}
+
+inline Unsigned128 operator>>(const Unsigned128& lhs, unsigned shift) {
+  shift &= 127;
+  Unsigned128 rv;
+  if (shift >= 64) {
+    rv.hi = 0;
+    rv.lo = lhs.hi >> (shift & 63);
+  } else {
+    uint64_t tmp = lhs.hi;
+    rv.hi = tmp >> shift;
+    // Ensure shift==0 shifts away everything
+    tmp = tmp << 1 << (63 - shift);
+    rv.lo = tmp | (lhs.lo >> shift);
+  }
+  return rv;
+}
+
+inline Unsigned128& operator>>=(Unsigned128& lhs, unsigned shift) {
+  lhs = lhs >> shift;
+  return lhs;
+}
+
+inline Unsigned128 operator&(const Unsigned128& lhs, const Unsigned128& rhs) {
+  return Unsigned128(lhs.lo & rhs.lo, lhs.hi & rhs.hi);
+}
+
+inline Unsigned128& operator&=(Unsigned128& lhs, const Unsigned128& rhs) {
+  lhs = lhs & rhs;
+  return lhs;
+}
+
+inline Unsigned128 operator|(const Unsigned128& lhs, const Unsigned128& rhs) {
+  return Unsigned128(lhs.lo | rhs.lo, lhs.hi | rhs.hi);
+}
+
+inline Unsigned128& operator|=(Unsigned128& lhs, const Unsigned128& rhs) {
+  lhs = lhs | rhs;
+  return lhs;
+}
+
+inline Unsigned128 operator^(const Unsigned128& lhs, const Unsigned128& rhs) {
+  return Unsigned128(lhs.lo ^ rhs.lo, lhs.hi ^ rhs.hi);
+}
+
+inline Unsigned128& operator^=(Unsigned128& lhs, const Unsigned128& rhs) {
+  lhs = lhs ^ rhs;
+  return lhs;
+}
+
+inline Unsigned128 operator~(const Unsigned128& v) {
+  return Unsigned128(~v.lo, ~v.hi);
+}
+
+inline bool operator==(const Unsigned128& lhs, const Unsigned128& rhs) {
+  return lhs.lo == rhs.lo && lhs.hi == rhs.hi;
+}
+
+inline bool operator!=(const Unsigned128& lhs, const Unsigned128& rhs) {
+  return lhs.lo != rhs.lo || lhs.hi != rhs.hi;
+}
+
+inline bool operator>(const Unsigned128& lhs, const Unsigned128& rhs) {
+  return lhs.hi > rhs.hi || (lhs.hi == rhs.hi && lhs.lo > rhs.lo);
+}
+
+inline bool operator<(const Unsigned128& lhs, const Unsigned128& rhs) {
+  return lhs.hi < rhs.hi || (lhs.hi == rhs.hi && lhs.lo < rhs.lo);
+}
+
+inline bool operator>=(const Unsigned128& lhs, const Unsigned128& rhs) {
+  return lhs.hi > rhs.hi || (lhs.hi == rhs.hi && lhs.lo >= rhs.lo);
+}
+
+inline bool operator<=(const Unsigned128& lhs, const Unsigned128& rhs) {
+  return lhs.hi < rhs.hi || (lhs.hi == rhs.hi && lhs.lo <= rhs.lo);
+}
+#endif
+
+inline uint64_t Lower64of128(Unsigned128 v) {
+#ifdef HAVE_UINT128_EXTENSION
+  return static_cast<uint64_t>(v);
+#else
+  return v.lo;
+#endif
+}
+
+inline uint64_t Upper64of128(Unsigned128 v) {
+#ifdef HAVE_UINT128_EXTENSION
+  return static_cast<uint64_t>(v >> 64);
+#else
+  return v.hi;
+#endif
+}
+
+// This generally compiles down to a single fast instruction on 64-bit.
+// This doesn't really make sense as operator* because it's not a
+// general 128x128 multiply and provides more output than 64x64 multiply.
+inline Unsigned128 Multiply64to128(uint64_t a, uint64_t b) {
+#ifdef HAVE_UINT128_EXTENSION
+  return Unsigned128{a} * Unsigned128{b};
+#else
+  // Full decomposition
+  // NOTE: GCC seems to fully understand this code as 64-bit x 64-bit
+  // -> 128-bit multiplication and optimize it appropriately.
+  uint64_t tmp = uint64_t{b & 0xffffFFFF} * uint64_t{a & 0xffffFFFF};
+  uint64_t lower = tmp & 0xffffFFFF;
+  tmp >>= 32;
+  tmp += uint64_t{b & 0xffffFFFF} * uint64_t{a >> 32};
+  // Avoid overflow: first add lower 32 of tmp2, and later upper 32
+  uint64_t tmp2 = uint64_t{b >> 32} * uint64_t{a & 0xffffFFFF};
+  tmp += tmp2 & 0xffffFFFF;
+  lower |= tmp << 32;
+  tmp >>= 32;
+  tmp += tmp2 >> 32;
+  tmp += uint64_t{b >> 32} * uint64_t{a >> 32};
+  return Unsigned128(lower, tmp);
+#endif
+}
+
+template <>
+inline int FloorLog2(Unsigned128 v) {
+  if (Upper64of128(v) == 0) {
+    return FloorLog2(Lower64of128(v));
+  } else {
+    return FloorLog2(Upper64of128(v)) + 64;
+  }
+}
+
+template <>
+inline int CountTrailingZeroBits(Unsigned128 v) {
+  if (Lower64of128(v) != 0) {
+    return CountTrailingZeroBits(Lower64of128(v));
+  } else {
+    return CountTrailingZeroBits(Upper64of128(v)) + 64;
+  }
+}
+
+template <>
+inline int BitsSetToOne(Unsigned128 v) {
+  return BitsSetToOne(Lower64of128(v)) + BitsSetToOne(Upper64of128(v));
+}
+
+template <>
+inline int BitParity(Unsigned128 v) {
+  return BitParity(Lower64of128(v) ^ Upper64of128(v));
+}
+
+template <>
+inline Unsigned128 EndianSwapValue(Unsigned128 v) {
+  return (Unsigned128{EndianSwapValue(Lower64of128(v))} << 64) |
+         EndianSwapValue(Upper64of128(v));
+}
+
+template <>
+inline Unsigned128 ReverseBits(Unsigned128 v) {
+  return (Unsigned128{ReverseBits(Lower64of128(v))} << 64) |
+         ReverseBits(Upper64of128(v));
+}
+
+template <>
+inline Unsigned128 DownwardInvolution(Unsigned128 v) {
+  return (Unsigned128{DownwardInvolution(Upper64of128(v))} << 64) |
+         DownwardInvolution(Upper64of128(v) ^ Lower64of128(v));
+}
+
+template <typename T>
+struct IsUnsignedUpTo128
+    : std::integral_constant<bool, std::is_unsigned<T>::value ||
+                                       std::is_same<T, Unsigned128>::value> {};
+
+inline void EncodeFixed128(char* dst, Unsigned128 value) {
+  EncodeFixed64(dst, Lower64of128(value));
+  EncodeFixed64(dst + 8, Upper64of128(value));
+}
+
+inline Unsigned128 DecodeFixed128(const char* ptr) {
+  Unsigned128 rv = DecodeFixed64(ptr + 8);
+  return (rv << 64) | DecodeFixed64(ptr);
+}
+
+// A version of EncodeFixed* for generic algorithms. Likely to be used
+// with Unsigned128, so lives here for now.
+template <typename T>
+inline void EncodeFixedGeneric(char* /*dst*/, T /*value*/) {
+  // Unfortunately, GCC does not appear to optimize this simple code down
+  // to a trivial load on Intel:
+  //
+  // T ret_val = 0;
+  // for (size_t i = 0; i < sizeof(T); ++i) {
+  //   ret_val |= (static_cast<T>(static_cast<unsigned char>(ptr[i])) << (8 *
+  //   i));
+  // }
+  // return ret_val;
+  //
+  // But does unroll the loop, and does optimize manually unrolled version
+  // for specific sizes down to a trivial load. I have no idea why it doesn't
+  // do both on this code.
+
+  // So instead, we rely on specializations
+  static_assert(sizeof(T) == 0, "No specialization provided for this type");
+}
+
+template <>
+inline void EncodeFixedGeneric(char* dst, uint16_t value) {
+  return EncodeFixed16(dst, value);
+}
+template <>
+inline void EncodeFixedGeneric(char* dst, uint32_t value) {
+  return EncodeFixed32(dst, value);
+}
+template <>
+inline void EncodeFixedGeneric(char* dst, uint64_t value) {
+  return EncodeFixed64(dst, value);
+}
+template <>
+inline void EncodeFixedGeneric(char* dst, Unsigned128 value) {
+  return EncodeFixed128(dst, value);
+}
+
+// A version of EncodeFixed* for generic algorithms.
+template <typename T>
+inline T DecodeFixedGeneric(const char* /*dst*/) {
+  static_assert(sizeof(T) == 0, "No specialization provided for this type");
+}
+
+template <>
+inline uint16_t DecodeFixedGeneric(const char* dst) {
+  return DecodeFixed16(dst);
+}
+template <>
+inline uint32_t DecodeFixedGeneric(const char* dst) {
+  return DecodeFixed32(dst);
+}
+template <>
+inline uint64_t DecodeFixedGeneric(const char* dst) {
+  return DecodeFixed64(dst);
+}
+template <>
+inline Unsigned128 DecodeFixedGeneric(const char* dst) {
+  return DecodeFixed128(dst);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/murmurhash.cc b/src/rocksdb/util/murmurhash.cc
new file mode 100644
index 000000000..a69f3918a
--- /dev/null
+++ b/src/rocksdb/util/murmurhash.cc
@@ -0,0 +1,196 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+/*
+  Murmurhash from http://sites.google.com/site/murmurhash/
+
+  All code is released to the public domain. For business purposes, Murmurhash
+  is under the MIT license.
+*/
+#include "murmurhash.h"
+
+#include "port/lang.h"
+
+#if defined(__x86_64__)
+
+// -------------------------------------------------------------------
+//
+// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
+// and endian-ness issues if used across multiple platforms.
+//
+// 64-bit hash for 64-bit platforms
+
+#ifdef ROCKSDB_UBSAN_RUN
+#if defined(__clang__)
+__attribute__((__no_sanitize__("alignment")))
+#elif defined(__GNUC__)
+__attribute__((__no_sanitize_undefined__))
+#endif
+#endif
+// clang-format off
+uint64_t MurmurHash64A ( const void * key, int len, unsigned int seed )
+{
+    const uint64_t m = 0xc6a4a7935bd1e995;
+    const int r = 47;
+
+    uint64_t h = seed ^ (len * m);
+
+    const uint64_t * data = (const uint64_t *)key;
+    const uint64_t * end = data + (len/8);
+
+    while(data != end)
+    {
+        uint64_t k = *data++;
+
+        k *= m;
+        k ^= k >> r;
+        k *= m;
+
+        h ^= k;
+        h *= m;
+    }
+
+    const unsigned char * data2 = (const unsigned char*)data;
+
+    switch(len & 7)
+    {
+    case 7: h ^= ((uint64_t)data2[6]) << 48; FALLTHROUGH_INTENDED;
+    case 6: h ^= ((uint64_t)data2[5]) << 40; FALLTHROUGH_INTENDED;
+    case 5: h ^= ((uint64_t)data2[4]) << 32; FALLTHROUGH_INTENDED;
+    case 4: h ^= ((uint64_t)data2[3]) << 24; FALLTHROUGH_INTENDED;
+    case 3: h ^= ((uint64_t)data2[2]) << 16; FALLTHROUGH_INTENDED;
+    case 2: h ^= ((uint64_t)data2[1]) << 8;  FALLTHROUGH_INTENDED;
+    case 1: h ^= ((uint64_t)data2[0]);
+        h *= m;
+    };
+
+    h ^= h >> r;
+    h *= m;
+    h ^= h >> r;
+
+    return h;
+}
+// clang-format on
+
+#elif defined(__i386__)
+
+// -------------------------------------------------------------------
+//
+// Note - This code makes a few assumptions about how your machine behaves -
+//
+// 1. We can read a 4-byte value from any address without crashing
+// 2. sizeof(int) == 4
+//
+// And it has a few limitations -
+//
+// 1. It will not work incrementally.
+// 2. It will not produce the same results on little-endian and big-endian
+//    machines.
+// clang-format off
+unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )
+{
+    // 'm' and 'r' are mixing constants generated offline.
+    // They're not really 'magic', they just happen to work well.
+
+    const unsigned int m = 0x5bd1e995;
+    const int r = 24;
+
+    // Initialize the hash to a 'random' value
+
+    unsigned int h = seed ^ len;
+
+    // Mix 4 bytes at a time into the hash
+
+    const unsigned char * data = (const unsigned char *)key;
+
+    while(len >= 4)
+    {
+        unsigned int k = *(unsigned int *)data;
+
+        k *= m;
+        k ^= k >> r;
+        k *= m;
+
+        h *= m;
+        h ^= k;
+
+        data += 4;
+        len -= 4;
+    }
+
+    // Handle the last few bytes of the input array
+
+    switch(len)
+    {
+    case 3: h ^= data[2] << 16; FALLTHROUGH_INTENDED;
+    case 2: h ^= data[1] << 8;  FALLTHROUGH_INTENDED;
+    case 1: h ^= data[0];
+        h *= m;
+    };
+
+    // Do a few final mixes of the hash to ensure the last few
+    // bytes are well-incorporated.
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+}
+// clang-format on
+
+#else
+
+// -------------------------------------------------------------------
+//
+// Same as MurmurHash2, but endian- and alignment-neutral.
+// Half the speed though, alas.
+// clang-format off
+unsigned int MurmurHashNeutral2 ( const void * key, int len, unsigned int seed )
+{
+    const unsigned int m = 0x5bd1e995;
+    const int r = 24;
+
+    unsigned int h = seed ^ len;
+
+    const unsigned char * data = (const unsigned char *)key;
+
+    while(len >= 4)
+    {
+        unsigned int k;
+
+        k  = data[0];
+        k |= data[1] << 8;
+        k |= data[2] << 16;
+        k |= data[3] << 24;
+
+        k *= m;
+        k ^= k >> r;
+        k *= m;
+
+        h *= m;
+        h ^= k;
+
+        data += 4;
+        len -= 4;
+    }
+
+    switch(len)
+    {
+    case 3: h ^= data[2] << 16; FALLTHROUGH_INTENDED;
+    case 2: h ^= data[1] << 8;  FALLTHROUGH_INTENDED;
+    case 1: h ^= data[0];
+        h *= m;
+    };
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+}
+// clang-format on
+
+#endif
diff --git a/src/rocksdb/util/murmurhash.h b/src/rocksdb/util/murmurhash.h
new file mode 100644
index 000000000..7ef4cbbec
--- /dev/null
+++ b/src/rocksdb/util/murmurhash.h
@@ -0,0 +1,43 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+/*
+  Murmurhash from http://sites.google.com/site/murmurhash/
+
+  All code is released to the public domain. For business purposes, Murmurhash
+  is under the MIT license.
+*/
+#pragma once
+#include <stdint.h>
+
+#include "rocksdb/slice.h"
+
+#if defined(__x86_64__)
+#define MURMUR_HASH MurmurHash64A
+uint64_t MurmurHash64A(const void* key, int len, unsigned int seed);
+#define MurmurHash MurmurHash64A
+using murmur_t = uint64_t;
+
+#elif defined(__i386__)
+#define MURMUR_HASH MurmurHash2
+unsigned int MurmurHash2(const void* key, int len, unsigned int seed);
+#define MurmurHash MurmurHash2
+using murmur_t = unsigned int;
+
+#else
+#define MURMUR_HASH MurmurHashNeutral2
+unsigned int MurmurHashNeutral2(const void* key, int len, unsigned int seed);
+#define MurmurHash MurmurHashNeutral2
+using murmur_t = unsigned int;
+#endif
+
+// Allow slice to be hashable by murmur hash.
+namespace ROCKSDB_NAMESPACE {
+struct murmur_hash {
+  size_t operator()(const Slice& slice) const {
+    return MurmurHash(slice.data(), static_cast<int>(slice.size()), 0);
+  }
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/mutexlock.h b/src/rocksdb/util/mutexlock.h
new file mode 100644
index 000000000..94066b29e
--- /dev/null
+++ b/src/rocksdb/util/mutexlock.h
@@ -0,0 +1,180 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <assert.h>
+
+#include <atomic>
+#include <mutex>
+#include <thread>
+
+#include "port/port.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Helper class that locks a mutex on construction and unlocks the mutex when
+// the destructor of the MutexLock object is invoked.
+//
+// Typical usage:
+//
+//   void MyClass::MyMethod() {
+//     MutexLock l(&mu_);       // mu_ is an instance variable
+//     ... some complex code, possibly with multiple return paths ...
+//   }
+
+class MutexLock {
+ public:
+  explicit MutexLock(port::Mutex *mu) : mu_(mu) { this->mu_->Lock(); }
+  // No copying allowed
+  MutexLock(const MutexLock &) = delete;
+  void operator=(const MutexLock &) = delete;
+
+  ~MutexLock() { this->mu_->Unlock(); }
+
+ private:
+  port::Mutex *const mu_;
+};
+
+//
+// Acquire a ReadLock on the specified RWMutex.
+// The Lock will be automatically released when the
+// object goes out of scope.
+//
+class ReadLock {
+ public:
+  explicit ReadLock(port::RWMutex *mu) : mu_(mu) { this->mu_->ReadLock(); }
+  // No copying allowed
+  ReadLock(const ReadLock &) = delete;
+  void operator=(const ReadLock &) = delete;
+
+  ~ReadLock() { this->mu_->ReadUnlock(); }
+
+ private:
+  port::RWMutex *const mu_;
+};
+
+//
+// Automatically unlock a locked mutex when the object is destroyed
+//
+class ReadUnlock {
+ public:
+  explicit ReadUnlock(port::RWMutex *mu) : mu_(mu) { mu->AssertHeld(); }
+  // No copying allowed
+  ReadUnlock(const ReadUnlock &) = delete;
+  ReadUnlock &operator=(const ReadUnlock &) = delete;
+
+  ~ReadUnlock() { mu_->ReadUnlock(); }
+
+ private:
+  port::RWMutex *const mu_;
+};
+
+//
+// Acquire a WriteLock on the specified RWMutex.
+// The Lock will be automatically released then the
+// object goes out of scope.
+//
+class WriteLock {
+ public:
+  explicit WriteLock(port::RWMutex *mu) : mu_(mu) { this->mu_->WriteLock(); }
+  // No copying allowed
+  WriteLock(const WriteLock &) = delete;
+  void operator=(const WriteLock &) = delete;
+
+  ~WriteLock() { this->mu_->WriteUnlock(); }
+
+ private:
+  port::RWMutex *const mu_;
+};
+
+//
+// SpinMutex has very low overhead for low-contention cases.  Method names
+// are chosen so you can use std::unique_lock or std::lock_guard with it.
+//
+class SpinMutex {
+ public:
+  SpinMutex() : locked_(false) {}
+
+  bool try_lock() {
+    auto currently_locked = locked_.load(std::memory_order_relaxed);
+    return !currently_locked &&
+           locked_.compare_exchange_weak(currently_locked, true,
+                                         std::memory_order_acquire,
+                                         std::memory_order_relaxed);
+  }
+
+  void lock() {
+    for (size_t tries = 0;; ++tries) {
+      if (try_lock()) {
+        // success
+        break;
+      }
+      port::AsmVolatilePause();
+      if (tries > 100) {
+        std::this_thread::yield();
+      }
+    }
+  }
+
+  void unlock() { locked_.store(false, std::memory_order_release); }
+
+ private:
+  std::atomic<bool> locked_;
+};
+
+// We want to prevent false sharing
+template <class T>
+struct ALIGN_AS(CACHE_LINE_SIZE) LockData {
+  T lock_;
+};
+
+//
+// Inspired by Guava: https://github.com/google/guava/wiki/StripedExplained
+// A striped Lock. This offers the underlying lock striping similar
+// to that of ConcurrentHashMap in a reusable form, and extends it for
+// semaphores and read-write locks. Conceptually, lock striping is the technique
+// of dividing a lock into many <i>stripes</i>, increasing the granularity of a
+// single lock and allowing independent operations to lock different stripes and
+// proceed concurrently, instead of creating contention for a single lock.
+//
+template <class T, class P>
+class Striped {
+ public:
+  Striped(size_t stripes, std::function<uint64_t(const P &)> hash)
+      : stripes_(stripes), hash_(hash) {
+    locks_ = reinterpret_cast<LockData<T> *>(
+        port::cacheline_aligned_alloc(sizeof(LockData<T>) * stripes));
+    for (size_t i = 0; i < stripes; i++) {
+      new (&locks_[i]) LockData<T>();
+    }
+  }
+
+  virtual ~Striped() {
+    if (locks_ != nullptr) {
+      assert(stripes_ > 0);
+      for (size_t i = 0; i < stripes_; i++) {
+        locks_[i].~LockData<T>();
+      }
+      port::cacheline_aligned_free(locks_);
+    }
+  }
+
+  T *get(const P &key) {
+    uint64_t h = hash_(key);
+    size_t index = h % stripes_;
+    return &reinterpret_cast<LockData<T> *>(&locks_[index])->lock_;
+  }
+
+ private:
+  size_t stripes_;
+  LockData<T> *locks_;
+  std::function<uint64_t(const P &)> hash_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/ppc-opcode.h b/src/rocksdb/util/ppc-opcode.h
new file mode 100644
index 000000000..5cc5af0e3
--- /dev/null
+++ b/src/rocksdb/util/ppc-opcode.h
@@ -0,0 +1,27 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  Copyright (c) 2017 International Business Machines Corp.
+//  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#define __PPC_RA(a) (((a)&0x1f) << 16)
+#define __PPC_RB(b) (((b)&0x1f) << 11)
+#define __PPC_XA(a) ((((a)&0x1f) << 16) | (((a)&0x20) >> 3))
+#define __PPC_XB(b) ((((b)&0x1f) << 11) | (((b)&0x20) >> 4))
+#define __PPC_XS(s) ((((s)&0x1f) << 21) | (((s)&0x20) >> 5))
+#define __PPC_XT(s) __PPC_XS(s)
+#define VSX_XX3(t, a, b) (__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b))
+#define VSX_XX1(s, a, b) (__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b))
+
+#define PPC_INST_VPMSUMW 0x10000488
+#define PPC_INST_VPMSUMD 0x100004c8
+#define PPC_INST_MFVSRD 0x7c000066
+#define PPC_INST_MTVSRD 0x7c000166
+
+#define VPMSUMW(t, a, b) .long PPC_INST_VPMSUMW | VSX_XX3((t), a, b)
+#define VPMSUMD(t, a, b) .long PPC_INST_VPMSUMD | VSX_XX3((t), a, b)
+#define MFVRD(a, t) .long PPC_INST_MFVSRD | VSX_XX1((t) + 32, a, 0)
+#define MTVRD(t, a) .long PPC_INST_MTVSRD | VSX_XX1((t) + 32, a, 0)
diff --git a/src/rocksdb/util/random.cc b/src/rocksdb/util/random.cc
new file mode 100644
index 000000000..c94c28dfb
--- /dev/null
+++ b/src/rocksdb/util/random.cc
@@ -0,0 +1,62 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#include "util/random.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include <thread>
+#include <utility>
+
+#include "port/likely.h"
+#include "util/thread_local.h"
+
+#define STORAGE_DECL static thread_local
+
+namespace ROCKSDB_NAMESPACE {
+
+Random* Random::GetTLSInstance() {
+  STORAGE_DECL Random* tls_instance;
+  STORAGE_DECL std::aligned_storage<sizeof(Random)>::type tls_instance_bytes;
+
+  auto rv = tls_instance;
+  if (UNLIKELY(rv == nullptr)) {
+    size_t seed = std::hash<std::thread::id>()(std::this_thread::get_id());
+    rv = new (&tls_instance_bytes) Random((uint32_t)seed);
+    tls_instance = rv;
+  }
+  return rv;
+}
+
+std::string Random::HumanReadableString(int len) {
+  std::string ret;
+  ret.resize(len);
+  for (int i = 0; i < len; ++i) {
+    ret[i] = static_cast<char>('a' + Uniform(26));
+  }
+  return ret;
+}
+
+std::string Random::RandomString(int len) {
+  std::string ret;
+  ret.resize(len);
+  for (int i = 0; i < len; i++) {
+    ret[i] = static_cast<char>(' ' + Uniform(95));  // ' ' .. '~'
+  }
+  return ret;
+}
+
+std::string Random::RandomBinaryString(int len) {
+  std::string ret;
+  ret.resize(len);
+  for (int i = 0; i < len; i++) {
+    ret[i] = static_cast<char>(Uniform(CHAR_MAX));
+  }
+  return ret;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/random.h b/src/rocksdb/util/random.h
new file mode 100644
index 000000000..8923bdc4f
--- /dev/null
+++ b/src/rocksdb/util/random.h
@@ -0,0 +1,190 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <stdint.h>
+
+#include <algorithm>
+#include <random>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// A very simple random number generator.  Not especially good at
+// generating truly random bits, but good enough for our needs in this
+// package.
+class Random {
+ private:
+  enum : uint32_t {
+    M = 2147483647L  // 2^31-1
+  };
+  enum : uint64_t {
+    A = 16807  // bits 14, 8, 7, 5, 2, 1, 0
+  };
+
+  uint32_t seed_;
+
+  static uint32_t GoodSeed(uint32_t s) { return (s & M) != 0 ? (s & M) : 1; }
+
+ public:
+  // This is the largest value that can be returned from Next()
+  enum : uint32_t { kMaxNext = M };
+
+  explicit Random(uint32_t s) : seed_(GoodSeed(s)) {}
+
+  void Reset(uint32_t s) { seed_ = GoodSeed(s); }
+
+  uint32_t Next() {
+    // We are computing
+    //       seed_ = (seed_ * A) % M,    where M = 2^31-1
+    //
+    // seed_ must not be zero or M, or else all subsequent computed values
+    // will be zero or M respectively.  For all other values, seed_ will end
+    // up cycling through every number in [1,M-1]
+    uint64_t product = seed_ * A;
+
+    // Compute (product % M) using the fact that ((x << 31) % M) == x.
+    seed_ = static_cast<uint32_t>((product >> 31) + (product & M));
+    // The first reduction may overflow by 1 bit, so we may need to
+    // repeat.  mod == M is not possible; using > allows the faster
+    // sign-bit-based test.
+    if (seed_ > M) {
+      seed_ -= M;
+    }
+    return seed_;
+  }
+
+  uint64_t Next64() { return (uint64_t{Next()} << 32) | Next(); }
+
+  // Returns a uniformly distributed value in the range [0..n-1]
+  // REQUIRES: n > 0
+  uint32_t Uniform(int n) { return Next() % n; }
+
+  // Randomly returns true ~"1/n" of the time, and false otherwise.
+  // REQUIRES: n > 0
+  bool OneIn(int n) { return Uniform(n) == 0; }
+
+  // "Optional" one-in-n, where 0 or negative always returns false
+  // (may or may not consume a random value)
+  bool OneInOpt(int n) { return n > 0 && OneIn(n); }
+
+  // Returns random bool that is true for the given percentage of
+  // calls on average. Zero or less is always false and 100 or more
+  // is always true (may or may not consume a random value)
+  bool PercentTrue(int percentage) {
+    return static_cast<int>(Uniform(100)) < percentage;
+  }
+
+  // Skewed: pick "base" uniformly from range [0,max_log] and then
+  // return "base" random bits.  The effect is to pick a number in the
+  // range [0,2^max_log-1] with exponential bias towards smaller numbers.
+  uint32_t Skewed(int max_log) { return Uniform(1 << Uniform(max_log + 1)); }
+
+  // Returns a random string of length "len"
+  std::string RandomString(int len);
+
+  // Generates a random string of len bytes using human-readable characters
+  std::string HumanReadableString(int len);
+
+  // Generates a random binary data
+  std::string RandomBinaryString(int len);
+
+  // Returns a Random instance for use by the current thread without
+  // additional locking
+  static Random* GetTLSInstance();
+};
+
+// A good 32-bit random number generator based on std::mt19937.
+// This exists in part to avoid compiler variance in warning about coercing
+// uint_fast32_t from mt19937 to uint32_t.
+class Random32 {
+ private:
+  std::mt19937 generator_;
+
+ public:
+  explicit Random32(uint32_t s) : generator_(s) {}
+
+  // Generates the next random number
+  uint32_t Next() { return static_cast<uint32_t>(generator_()); }
+
+  // Returns a uniformly distributed value in the range [0..n-1]
+  // REQUIRES: n > 0
+  uint32_t Uniform(uint32_t n) {
+    return static_cast<uint32_t>(
+        std::uniform_int_distribution<std::mt19937::result_type>(
+            0, n - 1)(generator_));
+  }
+
+  // Returns an *almost* uniformly distributed value in the range [0..n-1].
+  // Much faster than Uniform().
+  // REQUIRES: n > 0
+  uint32_t Uniformish(uint32_t n) {
+    // fastrange (without the header)
+    return static_cast<uint32_t>((uint64_t(generator_()) * uint64_t(n)) >> 32);
+  }
+
+  // Randomly returns true ~"1/n" of the time, and false otherwise.
+  // REQUIRES: n > 0
+  bool OneIn(uint32_t n) { return Uniform(n) == 0; }
+
+  // Skewed: pick "base" uniformly from range [0,max_log] and then
+  // return "base" random bits.  The effect is to pick a number in the
+  // range [0,2^max_log-1] with exponential bias towards smaller numbers.
+  uint32_t Skewed(int max_log) {
+    return Uniform(uint32_t{1} << Uniform(max_log + 1));
+  }
+
+  // Reset the seed of the generator to the given value
+  void Seed(uint32_t new_seed) { generator_.seed(new_seed); }
+};
+
+// A good 64-bit random number generator based on std::mt19937_64
+class Random64 {
+ private:
+  std::mt19937_64 generator_;
+
+ public:
+  explicit Random64(uint64_t s) : generator_(s) {}
+
+  // Generates the next random number
+  uint64_t Next() { return generator_(); }
+
+  // Returns a uniformly distributed value in the range [0..n-1]
+  // REQUIRES: n > 0
+  uint64_t Uniform(uint64_t n) {
+    return std::uniform_int_distribution<uint64_t>(0, n - 1)(generator_);
+  }
+
+  // Randomly returns true ~"1/n" of the time, and false otherwise.
+  // REQUIRES: n > 0
+  bool OneIn(uint64_t n) { return Uniform(n) == 0; }
+
+  // Skewed: pick "base" uniformly from range [0,max_log] and then
+  // return "base" random bits.  The effect is to pick a number in the
+  // range [0,2^max_log-1] with exponential bias towards smaller numbers.
+  uint64_t Skewed(int max_log) {
+    return Uniform(uint64_t(1) << Uniform(max_log + 1));
+  }
+};
+
+// A seeded replacement for removed std::random_shuffle
+template <class RandomIt>
+void RandomShuffle(RandomIt first, RandomIt last, uint32_t seed) {
+  std::mt19937 rng(seed);
+  std::shuffle(first, last, rng);
+}
+
+// A replacement for removed std::random_shuffle
+template <class RandomIt>
+void RandomShuffle(RandomIt first, RandomIt last) {
+  RandomShuffle(first, last, std::random_device{}());
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/random_test.cc b/src/rocksdb/util/random_test.cc
new file mode 100644
index 000000000..1aa62c5da
--- /dev/null
+++ b/src/rocksdb/util/random_test.cc
@@ -0,0 +1,107 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/random.h"
+
+#include <cstring>
+#include <vector>
+
+#include "test_util/testharness.h"
+
+using ROCKSDB_NAMESPACE::Random;
+
+TEST(RandomTest, Uniform) {
+  const int average = 20;
+  for (uint32_t seed : {0, 1, 2, 37, 4096}) {
+    Random r(seed);
+    for (int range : {1, 2, 8, 12, 100}) {
+      std::vector<int> counts(range, 0);
+
+      for (int i = 0; i < range * average; ++i) {
+        ++counts.at(r.Uniform(range));
+      }
+      int max_variance = static_cast<int>(std::sqrt(range) * 2 + 4);
+      for (int i = 0; i < range; ++i) {
+        EXPECT_GE(counts[i], std::max(1, average - max_variance));
+        EXPECT_LE(counts[i], average + max_variance + 1);
+      }
+    }
+  }
+}
+
+TEST(RandomTest, OneIn) {
+  Random r(42);
+  for (int range : {1, 2, 8, 12, 100, 1234}) {
+    const int average = 100;
+    int count = 0;
+    for (int i = 0; i < average * range; ++i) {
+      if (r.OneIn(range)) {
+        ++count;
+      }
+    }
+    if (range == 1) {
+      EXPECT_EQ(count, average);
+    } else {
+      int max_variance = static_cast<int>(std::sqrt(average) * 1.5);
+      EXPECT_GE(count, average - max_variance);
+      EXPECT_LE(count, average + max_variance);
+    }
+  }
+}
+
+TEST(RandomTest, OneInOpt) {
+  Random r(42);
+  for (int range : {-12, 0, 1, 2, 8, 12, 100, 1234}) {
+    const int average = 100;
+    int count = 0;
+    for (int i = 0; i < average * range; ++i) {
+      if (r.OneInOpt(range)) {
+        ++count;
+      }
+    }
+    if (range < 1) {
+      EXPECT_EQ(count, 0);
+    } else if (range == 1) {
+      EXPECT_EQ(count, average);
+    } else {
+      int max_variance = static_cast<int>(std::sqrt(average) * 1.5);
+      EXPECT_GE(count, average - max_variance);
+      EXPECT_LE(count, average + max_variance);
+    }
+  }
+}
+
+TEST(RandomTest, PercentTrue) {
+  Random r(42);
+  for (int pct : {-12, 0, 1, 2, 10, 50, 90, 98, 99, 100, 1234}) {
+    const int samples = 10000;
+
+    int count = 0;
+    for (int i = 0; i < samples; ++i) {
+      if (r.PercentTrue(pct)) {
+        ++count;
+      }
+    }
+    if (pct <= 0) {
+      EXPECT_EQ(count, 0);
+    } else if (pct >= 100) {
+      EXPECT_EQ(count, samples);
+    } else {
+      int est = (count * 100 + (samples / 2)) / samples;
+      EXPECT_EQ(est, pct);
+    }
+  }
+}
+
+int main(int argc, char** argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+
+  return RUN_ALL_TESTS();
+}
diff --git a/src/rocksdb/util/rate_limiter.cc b/src/rocksdb/util/rate_limiter.cc
new file mode 100644
index 000000000..6bbcabfae
--- /dev/null
+++ b/src/rocksdb/util/rate_limiter.cc
@@ -0,0 +1,378 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/rate_limiter.h"
+
+#include <algorithm>
+
+#include "monitoring/statistics.h"
+#include "port/port.h"
+#include "rocksdb/system_clock.h"
+#include "test_util/sync_point.h"
+#include "util/aligned_buffer.h"
+
+namespace ROCKSDB_NAMESPACE {
+size_t RateLimiter::RequestToken(size_t bytes, size_t alignment,
+                                 Env::IOPriority io_priority, Statistics* stats,
+                                 RateLimiter::OpType op_type) {
+  if (io_priority < Env::IO_TOTAL && IsRateLimited(op_type)) {
+    bytes = std::min(bytes, static_cast<size_t>(GetSingleBurstBytes()));
+
+    if (alignment > 0) {
+      // Here we may actually require more than burst and block
+      // as we can not write/read less than one page at a time on direct I/O
+      // thus we do not want to be strictly constrained by burst
+      bytes = std::max(alignment, TruncateToPageBoundary(alignment, bytes));
+    }
+    Request(bytes, io_priority, stats, op_type);
+  }
+  return bytes;
+}
+
+// Pending request
+struct GenericRateLimiter::Req {
+  explicit Req(int64_t _bytes, port::Mutex* _mu)
+      : request_bytes(_bytes), bytes(_bytes), cv(_mu), granted(false) {}
+  int64_t request_bytes;
+  int64_t bytes;
+  port::CondVar cv;
+  bool granted;
+};
+
+GenericRateLimiter::GenericRateLimiter(
+    int64_t rate_bytes_per_sec, int64_t refill_period_us, int32_t fairness,
+    RateLimiter::Mode mode, const std::shared_ptr<SystemClock>& clock,
+    bool auto_tuned)
+    : RateLimiter(mode),
+      refill_period_us_(refill_period_us),
+      rate_bytes_per_sec_(auto_tuned ? rate_bytes_per_sec / 2
+                                     : rate_bytes_per_sec),
+      refill_bytes_per_period_(
+          CalculateRefillBytesPerPeriodLocked(rate_bytes_per_sec_)),
+      clock_(clock),
+      stop_(false),
+      exit_cv_(&request_mutex_),
+      requests_to_wait_(0),
+      available_bytes_(0),
+      next_refill_us_(NowMicrosMonotonicLocked()),
+      fairness_(fairness > 100 ? 100 : fairness),
+      rnd_((uint32_t)time(nullptr)),
+      wait_until_refill_pending_(false),
+      auto_tuned_(auto_tuned),
+      num_drains_(0),
+      max_bytes_per_sec_(rate_bytes_per_sec),
+      tuned_time_(NowMicrosMonotonicLocked()) {
+  for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) {
+    total_requests_[i] = 0;
+    total_bytes_through_[i] = 0;
+  }
+}
+
+GenericRateLimiter::~GenericRateLimiter() {
+  MutexLock g(&request_mutex_);
+  stop_ = true;
+  std::deque<Req*>::size_type queues_size_sum = 0;
+  for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) {
+    queues_size_sum += queue_[i].size();
+  }
+  requests_to_wait_ = static_cast<int32_t>(queues_size_sum);
+
+  for (int i = Env::IO_TOTAL - 1; i >= Env::IO_LOW; --i) {
+    std::deque<Req*> queue = queue_[i];
+    for (auto& r : queue) {
+      r->cv.Signal();
+    }
+  }
+
+  while (requests_to_wait_ > 0) {
+    exit_cv_.Wait();
+  }
+}
+
+// This API allows user to dynamically change rate limiter's bytes per second.
+void GenericRateLimiter::SetBytesPerSecond(int64_t bytes_per_second) {
+  MutexLock g(&request_mutex_);
+  SetBytesPerSecondLocked(bytes_per_second);
+}
+
+void GenericRateLimiter::SetBytesPerSecondLocked(int64_t bytes_per_second) {
+  assert(bytes_per_second > 0);
+  rate_bytes_per_sec_.store(bytes_per_second, std::memory_order_relaxed);
+  refill_bytes_per_period_.store(
+      CalculateRefillBytesPerPeriodLocked(bytes_per_second),
+      std::memory_order_relaxed);
+}
+
+void GenericRateLimiter::Request(int64_t bytes, const Env::IOPriority pri,
+                                 Statistics* stats) {
+  assert(bytes <= refill_bytes_per_period_.load(std::memory_order_relaxed));
+  bytes = std::max(static_cast<int64_t>(0), bytes);
+  TEST_SYNC_POINT("GenericRateLimiter::Request");
+  TEST_SYNC_POINT_CALLBACK("GenericRateLimiter::Request:1",
+                           &rate_bytes_per_sec_);
+  MutexLock g(&request_mutex_);
+
+  if (auto_tuned_) {
+    static const int kRefillsPerTune = 100;
+    std::chrono::microseconds now(NowMicrosMonotonicLocked());
+    if (now - tuned_time_ >=
+        kRefillsPerTune * std::chrono::microseconds(refill_period_us_)) {
+      Status s = TuneLocked();
+      s.PermitUncheckedError();  //**TODO: What to do on error?
+    }
+  }
+
+  if (stop_) {
+    // It is now in the clean-up of ~GenericRateLimiter().
+    // Therefore any new incoming request will exit from here
+    // and not get satiesfied.
+    return;
+  }
+
+  ++total_requests_[pri];
+
+  if (available_bytes_ >= bytes) {
+    // Refill thread assigns quota and notifies requests waiting on
+    // the queue under mutex. So if we get here, that means nobody
+    // is waiting?
+    available_bytes_ -= bytes;
+    total_bytes_through_[pri] += bytes;
+    return;
+  }
+
+  // Request cannot be satisfied at this moment, enqueue
+  Req r(bytes, &request_mutex_);
+  queue_[pri].push_back(&r);
+  TEST_SYNC_POINT_CALLBACK("GenericRateLimiter::Request:PostEnqueueRequest",
+                           &request_mutex_);
+  // A thread representing a queued request coordinates with other such threads.
+  // There are two main duties.
+  //
+  // (1) Waiting for the next refill time.
+  // (2) Refilling the bytes and granting requests.
+  do {
+    int64_t time_until_refill_us = next_refill_us_ - NowMicrosMonotonicLocked();
+    if (time_until_refill_us > 0) {
+      if (wait_until_refill_pending_) {
+        // Somebody is performing (1). Trust we'll be woken up when our request
+        // is granted or we are needed for future duties.
+        r.cv.Wait();
+      } else {
+        // Whichever thread reaches here first performs duty (1) as described
+        // above.
+        int64_t wait_until = clock_->NowMicros() + time_until_refill_us;
+        RecordTick(stats, NUMBER_RATE_LIMITER_DRAINS);
+        ++num_drains_;
+        wait_until_refill_pending_ = true;
+        r.cv.TimedWait(wait_until);
+        TEST_SYNC_POINT_CALLBACK("GenericRateLimiter::Request:PostTimedWait",
+                                 &time_until_refill_us);
+        wait_until_refill_pending_ = false;
+      }
+    } else {
+      // Whichever thread reaches here first performs duty (2) as described
+      // above.
+      RefillBytesAndGrantRequestsLocked();
+      if (r.granted) {
+        // If there is any remaining requests, make sure there exists at least
+        // one candidate is awake for future duties by signaling a front request
+        // of a queue.
+        for (int i = Env::IO_TOTAL - 1; i >= Env::IO_LOW; --i) {
+          std::deque<Req*> queue = queue_[i];
+          if (!queue.empty()) {
+            queue.front()->cv.Signal();
+            break;
+          }
+        }
+      }
+    }
+    // Invariant: non-granted request is always in one queue, and granted
+    // request is always in zero queues.
+#ifndef NDEBUG
+    int num_found = 0;
+    for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) {
+      if (std::find(queue_[i].begin(), queue_[i].end(), &r) !=
+          queue_[i].end()) {
+        ++num_found;
+      }
+    }
+    if (r.granted) {
+      assert(num_found == 0);
+    } else {
+      assert(num_found == 1);
+    }
+#endif  // NDEBUG
+  } while (!stop_ && !r.granted);
+
+  if (stop_) {
+    // It is now in the clean-up of ~GenericRateLimiter().
+    // Therefore any woken-up request will have come out of the loop and then
+    // exit here. It might or might not have been satisfied.
+    --requests_to_wait_;
+    exit_cv_.Signal();
+  }
+}
+
+std::vector<Env::IOPriority>
+GenericRateLimiter::GeneratePriorityIterationOrderLocked() {
+  std::vector<Env::IOPriority> pri_iteration_order(Env::IO_TOTAL /* 4 */);
+  // We make Env::IO_USER a superior priority by always iterating its queue
+  // first
+  pri_iteration_order[0] = Env::IO_USER;
+
+  bool high_pri_iterated_after_mid_low_pri = rnd_.OneIn(fairness_);
+  TEST_SYNC_POINT_CALLBACK(
+      "GenericRateLimiter::GeneratePriorityIterationOrderLocked::"
+      "PostRandomOneInFairnessForHighPri",
+      &high_pri_iterated_after_mid_low_pri);
+  bool mid_pri_itereated_after_low_pri = rnd_.OneIn(fairness_);
+  TEST_SYNC_POINT_CALLBACK(
+      "GenericRateLimiter::GeneratePriorityIterationOrderLocked::"
+      "PostRandomOneInFairnessForMidPri",
+      &mid_pri_itereated_after_low_pri);
+
+  if (high_pri_iterated_after_mid_low_pri) {
+    pri_iteration_order[3] = Env::IO_HIGH;
+    pri_iteration_order[2] =
+        mid_pri_itereated_after_low_pri ? Env::IO_MID : Env::IO_LOW;
+    pri_iteration_order[1] =
+        (pri_iteration_order[2] == Env::IO_MID) ? Env::IO_LOW : Env::IO_MID;
+  } else {
+    pri_iteration_order[1] = Env::IO_HIGH;
+    pri_iteration_order[3] =
+        mid_pri_itereated_after_low_pri ? Env::IO_MID : Env::IO_LOW;
+    pri_iteration_order[2] =
+        (pri_iteration_order[3] == Env::IO_MID) ? Env::IO_LOW : Env::IO_MID;
+  }
+
+  TEST_SYNC_POINT_CALLBACK(
+      "GenericRateLimiter::GeneratePriorityIterationOrderLocked::"
+      "PreReturnPriIterationOrder",
+      &pri_iteration_order);
+  return pri_iteration_order;
+}
+
+void GenericRateLimiter::RefillBytesAndGrantRequestsLocked() {
+  TEST_SYNC_POINT_CALLBACK(
+      "GenericRateLimiter::RefillBytesAndGrantRequestsLocked", &request_mutex_);
+  next_refill_us_ = NowMicrosMonotonicLocked() + refill_period_us_;
+  // Carry over the left over quota from the last period
+  auto refill_bytes_per_period =
+      refill_bytes_per_period_.load(std::memory_order_relaxed);
+  if (available_bytes_ < refill_bytes_per_period) {
+    available_bytes_ += refill_bytes_per_period;
+  }
+
+  std::vector<Env::IOPriority> pri_iteration_order =
+      GeneratePriorityIterationOrderLocked();
+
+  for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) {
+    assert(!pri_iteration_order.empty());
+    Env::IOPriority current_pri = pri_iteration_order[i];
+    auto* queue = &queue_[current_pri];
+    while (!queue->empty()) {
+      auto* next_req = queue->front();
+      if (available_bytes_ < next_req->request_bytes) {
+        // Grant partial request_bytes to avoid starvation of requests
+        // that become asking for more bytes than available_bytes_
+        // due to dynamically reduced rate limiter's bytes_per_second that
+        // leads to reduced refill_bytes_per_period hence available_bytes_
+        next_req->request_bytes -= available_bytes_;
+        available_bytes_ = 0;
+        break;
+      }
+      available_bytes_ -= next_req->request_bytes;
+      next_req->request_bytes = 0;
+      total_bytes_through_[current_pri] += next_req->bytes;
+      queue->pop_front();
+
+      next_req->granted = true;
+      // Quota granted, signal the thread to exit
+      next_req->cv.Signal();
+    }
+  }
+}
+
+int64_t GenericRateLimiter::CalculateRefillBytesPerPeriodLocked(
+    int64_t rate_bytes_per_sec) {
+  if (std::numeric_limits<int64_t>::max() / rate_bytes_per_sec <
+      refill_period_us_) {
+    // Avoid unexpected result in the overflow case. The result now is still
+    // inaccurate but is a number that is large enough.
+    return std::numeric_limits<int64_t>::max() / 1000000;
+  } else {
+    return rate_bytes_per_sec * refill_period_us_ / 1000000;
+  }
+}
+
+Status GenericRateLimiter::TuneLocked() {
+  const int kLowWatermarkPct = 50;
+  const int kHighWatermarkPct = 90;
+  const int kAdjustFactorPct = 5;
+  // computed rate limit will be in
+  // `[max_bytes_per_sec_ / kAllowedRangeFactor, max_bytes_per_sec_]`.
+  const int kAllowedRangeFactor = 20;
+
+  std::chrono::microseconds prev_tuned_time = tuned_time_;
+  tuned_time_ = std::chrono::microseconds(NowMicrosMonotonicLocked());
+
+  int64_t elapsed_intervals = (tuned_time_ - prev_tuned_time +
+                               std::chrono::microseconds(refill_period_us_) -
+                               std::chrono::microseconds(1)) /
+                              std::chrono::microseconds(refill_period_us_);
+  // We tune every kRefillsPerTune intervals, so the overflow and division-by-
+  // zero conditions should never happen.
+  assert(num_drains_ <= std::numeric_limits<int64_t>::max() / 100);
+  assert(elapsed_intervals > 0);
+  int64_t drained_pct = num_drains_ * 100 / elapsed_intervals;
+
+  int64_t prev_bytes_per_sec = GetBytesPerSecond();
+  int64_t new_bytes_per_sec;
+  if (drained_pct == 0) {
+    new_bytes_per_sec = max_bytes_per_sec_ / kAllowedRangeFactor;
+  } else if (drained_pct < kLowWatermarkPct) {
+    // sanitize to prevent overflow
+    int64_t sanitized_prev_bytes_per_sec =
+        std::min(prev_bytes_per_sec, std::numeric_limits<int64_t>::max() / 100);
+    new_bytes_per_sec =
+        std::max(max_bytes_per_sec_ / kAllowedRangeFactor,
+                 sanitized_prev_bytes_per_sec * 100 / (100 + kAdjustFactorPct));
+  } else if (drained_pct > kHighWatermarkPct) {
+    // sanitize to prevent overflow
+    int64_t sanitized_prev_bytes_per_sec =
+        std::min(prev_bytes_per_sec, std::numeric_limits<int64_t>::max() /
+                                         (100 + kAdjustFactorPct));
+    new_bytes_per_sec =
+        std::min(max_bytes_per_sec_,
+                 sanitized_prev_bytes_per_sec * (100 + kAdjustFactorPct) / 100);
+  } else {
+    new_bytes_per_sec = prev_bytes_per_sec;
+  }
+  if (new_bytes_per_sec != prev_bytes_per_sec) {
+    SetBytesPerSecondLocked(new_bytes_per_sec);
+  }
+  num_drains_ = 0;
+  return Status::OK();
+}
+
+RateLimiter* NewGenericRateLimiter(
+    int64_t rate_bytes_per_sec, int64_t refill_period_us /* = 100 * 1000 */,
+    int32_t fairness /* = 10 */,
+    RateLimiter::Mode mode /* = RateLimiter::Mode::kWritesOnly */,
+    bool auto_tuned /* = false */) {
+  assert(rate_bytes_per_sec > 0);
+  assert(refill_period_us > 0);
+  assert(fairness > 0);
+  std::unique_ptr<RateLimiter> limiter(
+      new GenericRateLimiter(rate_bytes_per_sec, refill_period_us, fairness,
+                             mode, SystemClock::Default(), auto_tuned));
+  return limiter.release();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/rate_limiter.h b/src/rocksdb/util/rate_limiter.h
new file mode 100644
index 000000000..4c078f5a0
--- /dev/null
+++ b/src/rocksdb/util/rate_limiter.h
@@ -0,0 +1,146 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <deque>
+
+#include "port/port.h"
+#include "rocksdb/env.h"
+#include "rocksdb/rate_limiter.h"
+#include "rocksdb/status.h"
+#include "rocksdb/system_clock.h"
+#include "util/mutexlock.h"
+#include "util/random.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class GenericRateLimiter : public RateLimiter {
+ public:
+  GenericRateLimiter(int64_t refill_bytes, int64_t refill_period_us,
+                     int32_t fairness, RateLimiter::Mode mode,
+                     const std::shared_ptr<SystemClock>& clock,
+                     bool auto_tuned);
+
+  virtual ~GenericRateLimiter();
+
+  // This API allows user to dynamically change rate limiter's bytes per second.
+  virtual void SetBytesPerSecond(int64_t bytes_per_second) override;
+
+  // Request for token to write bytes. If this request can not be satisfied,
+  // the call is blocked. Caller is responsible to make sure
+  // bytes <= GetSingleBurstBytes() and bytes >= 0. Negative bytes
+  // passed in will be rounded up to 0.
+  using RateLimiter::Request;
+  virtual void Request(const int64_t bytes, const Env::IOPriority pri,
+                       Statistics* stats) override;
+
+  virtual int64_t GetSingleBurstBytes() const override {
+    return refill_bytes_per_period_.load(std::memory_order_relaxed);
+  }
+
+  virtual int64_t GetTotalBytesThrough(
+      const Env::IOPriority pri = Env::IO_TOTAL) const override {
+    MutexLock g(&request_mutex_);
+    if (pri == Env::IO_TOTAL) {
+      int64_t total_bytes_through_sum = 0;
+      for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) {
+        total_bytes_through_sum += total_bytes_through_[i];
+      }
+      return total_bytes_through_sum;
+    }
+    return total_bytes_through_[pri];
+  }
+
+  virtual int64_t GetTotalRequests(
+      const Env::IOPriority pri = Env::IO_TOTAL) const override {
+    MutexLock g(&request_mutex_);
+    if (pri == Env::IO_TOTAL) {
+      int64_t total_requests_sum = 0;
+      for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) {
+        total_requests_sum += total_requests_[i];
+      }
+      return total_requests_sum;
+    }
+    return total_requests_[pri];
+  }
+
+  virtual Status GetTotalPendingRequests(
+      int64_t* total_pending_requests,
+      const Env::IOPriority pri = Env::IO_TOTAL) const override {
+    assert(total_pending_requests != nullptr);
+    MutexLock g(&request_mutex_);
+    if (pri == Env::IO_TOTAL) {
+      int64_t total_pending_requests_sum = 0;
+      for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) {
+        total_pending_requests_sum += static_cast<int64_t>(queue_[i].size());
+      }
+      *total_pending_requests = total_pending_requests_sum;
+    } else {
+      *total_pending_requests = static_cast<int64_t>(queue_[pri].size());
+    }
+    return Status::OK();
+  }
+
+  virtual int64_t GetBytesPerSecond() const override {
+    return rate_bytes_per_sec_.load(std::memory_order_relaxed);
+  }
+
+  virtual void TEST_SetClock(std::shared_ptr<SystemClock> clock) {
+    MutexLock g(&request_mutex_);
+    clock_ = std::move(clock);
+    next_refill_us_ = NowMicrosMonotonicLocked();
+  }
+
+ private:
+  void RefillBytesAndGrantRequestsLocked();
+  std::vector<Env::IOPriority> GeneratePriorityIterationOrderLocked();
+  int64_t CalculateRefillBytesPerPeriodLocked(int64_t rate_bytes_per_sec);
+  Status TuneLocked();
+  void SetBytesPerSecondLocked(int64_t bytes_per_second);
+
+  uint64_t NowMicrosMonotonicLocked() {
+    return clock_->NowNanos() / std::milli::den;
+  }
+
+  // This mutex guard all internal states
+  mutable port::Mutex request_mutex_;
+
+  const int64_t refill_period_us_;
+
+  std::atomic<int64_t> rate_bytes_per_sec_;
+  std::atomic<int64_t> refill_bytes_per_period_;
+  std::shared_ptr<SystemClock> clock_;
+
+  bool stop_;
+  port::CondVar exit_cv_;
+  int32_t requests_to_wait_;
+
+  int64_t total_requests_[Env::IO_TOTAL];
+  int64_t total_bytes_through_[Env::IO_TOTAL];
+  int64_t available_bytes_;
+  int64_t next_refill_us_;
+
+  int32_t fairness_;
+  Random rnd_;
+
+  struct Req;
+  std::deque<Req*> queue_[Env::IO_TOTAL];
+  bool wait_until_refill_pending_;
+
+  bool auto_tuned_;
+  int64_t num_drains_;
+  const int64_t max_bytes_per_sec_;
+  std::chrono::microseconds tuned_time_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/rate_limiter_test.cc b/src/rocksdb/util/rate_limiter_test.cc
new file mode 100644
index 000000000..cda134867
--- /dev/null
+++ b/src/rocksdb/util/rate_limiter_test.cc
@@ -0,0 +1,476 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/rate_limiter.h"
+
+#include <chrono>
+#include <cinttypes>
+#include <cstdint>
+#include <limits>
+
+#include "db/db_test_util.h"
+#include "port/port.h"
+#include "rocksdb/system_clock.h"
+#include "test_util/sync_point.h"
+#include "test_util/testharness.h"
+#include "util/random.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// TODO(yhchiang): the rate will not be accurate when we run test in parallel.
+class RateLimiterTest : public testing::Test {
+ protected:
+  ~RateLimiterTest() override {
+    SyncPoint::GetInstance()->DisableProcessing();
+    SyncPoint::GetInstance()->ClearAllCallBacks();
+  }
+};
+
+TEST_F(RateLimiterTest, OverflowRate) {
+  GenericRateLimiter limiter(std::numeric_limits<int64_t>::max(), 1000, 10,
+                             RateLimiter::Mode::kWritesOnly,
+                             SystemClock::Default(), false /* auto_tuned */);
+  ASSERT_GT(limiter.GetSingleBurstBytes(), 1000000000ll);
+}
+
+TEST_F(RateLimiterTest, StartStop) {
+  std::unique_ptr<RateLimiter> limiter(NewGenericRateLimiter(100, 100, 10));
+}
+
+TEST_F(RateLimiterTest, GetTotalBytesThrough) {
+  std::unique_ptr<RateLimiter> limiter(NewGenericRateLimiter(
+      200 /* rate_bytes_per_sec */, 1000 * 1000 /* refill_period_us */,
+      10 /* fairness */));
+  for (int i = Env::IO_LOW; i <= Env::IO_TOTAL; ++i) {
+    ASSERT_EQ(limiter->GetTotalBytesThrough(static_cast<Env::IOPriority>(i)),
+              0);
+  }
+
+  std::int64_t request_byte = 200;
+  std::int64_t request_byte_sum = 0;
+  for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) {
+    limiter->Request(request_byte, static_cast<Env::IOPriority>(i),
+                     nullptr /* stats */, RateLimiter::OpType::kWrite);
+    request_byte_sum += request_byte;
+  }
+
+  for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) {
+    EXPECT_EQ(limiter->GetTotalBytesThrough(static_cast<Env::IOPriority>(i)),
+              request_byte)
+        << "Failed to track total_bytes_through_ correctly when IOPriority = "
+        << static_cast<Env::IOPriority>(i);
+  }
+  EXPECT_EQ(limiter->GetTotalBytesThrough(Env::IO_TOTAL), request_byte_sum)
+      << "Failed to track total_bytes_through_ correctly when IOPriority = "
+         "Env::IO_TOTAL";
+}
+
+TEST_F(RateLimiterTest, GetTotalRequests) {
+  std::unique_ptr<RateLimiter> limiter(NewGenericRateLimiter(
+      200 /* rate_bytes_per_sec */, 1000 * 1000 /* refill_period_us */,
+      10 /* fairness */));
+  for (int i = Env::IO_LOW; i <= Env::IO_TOTAL; ++i) {
+    ASSERT_EQ(limiter->GetTotalRequests(static_cast<Env::IOPriority>(i)), 0);
+  }
+
+  std::int64_t total_requests_sum = 0;
+  for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) {
+    limiter->Request(200, static_cast<Env::IOPriority>(i), nullptr /* stats */,
+                     RateLimiter::OpType::kWrite);
+    total_requests_sum += 1;
+  }
+
+  for (int i = Env::IO_LOW; i < Env::IO_TOTAL; ++i) {
+    EXPECT_EQ(limiter->GetTotalRequests(static_cast<Env::IOPriority>(i)), 1)
+        << "Failed to track total_requests_ correctly when IOPriority = "
+        << static_cast<Env::IOPriority>(i);
+  }
+  EXPECT_EQ(limiter->GetTotalRequests(Env::IO_TOTAL), total_requests_sum)
+      << "Failed to track total_requests_ correctly when IOPriority = "
+         "Env::IO_TOTAL";
+}
+
+TEST_F(RateLimiterTest, GetTotalPendingRequests) {
+  std::unique_ptr<RateLimiter> limiter(NewGenericRateLimiter(
+      200 /* rate_bytes_per_sec */, 1000 * 1000 /* refill_period_us */,
+      10 /* fairness */));
+  int64_t total_pending_requests = 0;
+  for (int i = Env::IO_LOW; i <= Env::IO_TOTAL; ++i) {
+    ASSERT_OK(limiter->GetTotalPendingRequests(
+        &total_pending_requests, static_cast<Env::IOPriority>(i)));
+    ASSERT_EQ(total_pending_requests, 0);
+  }
+  // This is a variable for making sure the following callback is called
+  // and the assertions in it are indeed excuted
+  bool nonzero_pending_requests_verified = false;
+  SyncPoint::GetInstance()->SetCallBack(
+      "GenericRateLimiter::Request:PostEnqueueRequest", [&](void* arg) {
+        port::Mutex* request_mutex = (port::Mutex*)arg;
+        // We temporarily unlock the mutex so that the following
+        // GetTotalPendingRequests() can acquire it
+        request_mutex->Unlock();
+        for (int i = Env::IO_LOW; i <= Env::IO_TOTAL; ++i) {
+          EXPECT_OK(limiter->GetTotalPendingRequests(
+              &total_pending_requests, static_cast<Env::IOPriority>(i)))
+              << "Failed to return total pending requests for priority level = "
+              << static_cast<Env::IOPriority>(i);
+          if (i == Env::IO_USER || i == Env::IO_TOTAL) {
+            EXPECT_EQ(total_pending_requests, 1)
+                << "Failed to correctly return total pending requests for "
+                   "priority level = "
+                << static_cast<Env::IOPriority>(i);
+          } else {
+            EXPECT_EQ(total_pending_requests, 0)
+                << "Failed to correctly return total pending requests for "
+                   "priority level = "
+                << static_cast<Env::IOPriority>(i);
+          }
+        }
+        // We lock the mutex again so that the request thread can resume running
+        // with the mutex locked
+        request_mutex->Lock();
+        nonzero_pending_requests_verified = true;
+      });
+
+  SyncPoint::GetInstance()->EnableProcessing();
+  limiter->Request(200, Env::IO_USER, nullptr /* stats */,
+                   RateLimiter::OpType::kWrite);
+  ASSERT_EQ(nonzero_pending_requests_verified, true);
+  for (int i = Env::IO_LOW; i <= Env::IO_TOTAL; ++i) {
+    EXPECT_OK(limiter->GetTotalPendingRequests(&total_pending_requests,
+                                               static_cast<Env::IOPriority>(i)))
+        << "Failed to return total pending requests for priority level = "
+        << static_cast<Env::IOPriority>(i);
+    EXPECT_EQ(total_pending_requests, 0)
+        << "Failed to correctly return total pending requests for priority "
+           "level = "
+        << static_cast<Env::IOPriority>(i);
+  }
+  SyncPoint::GetInstance()->DisableProcessing();
+  SyncPoint::GetInstance()->ClearCallBack(
+      "GenericRateLimiter::Request:PostEnqueueRequest");
+}
+
+TEST_F(RateLimiterTest, Modes) {
+  for (auto mode : {RateLimiter::Mode::kWritesOnly,
+                    RateLimiter::Mode::kReadsOnly, RateLimiter::Mode::kAllIo}) {
+    GenericRateLimiter limiter(2000 /* rate_bytes_per_sec */,
+                               1000 * 1000 /* refill_period_us */,
+                               10 /* fairness */, mode, SystemClock::Default(),
+                               false /* auto_tuned */);
+    limiter.Request(1000 /* bytes */, Env::IO_HIGH, nullptr /* stats */,
+                    RateLimiter::OpType::kRead);
+    if (mode == RateLimiter::Mode::kWritesOnly) {
+      ASSERT_EQ(0, limiter.GetTotalBytesThrough(Env::IO_HIGH));
+    } else {
+      ASSERT_EQ(1000, limiter.GetTotalBytesThrough(Env::IO_HIGH));
+    }
+
+    limiter.Request(1000 /* bytes */, Env::IO_HIGH, nullptr /* stats */,
+                    RateLimiter::OpType::kWrite);
+    if (mode == RateLimiter::Mode::kAllIo) {
+      ASSERT_EQ(2000, limiter.GetTotalBytesThrough(Env::IO_HIGH));
+    } else {
+      ASSERT_EQ(1000, limiter.GetTotalBytesThrough(Env::IO_HIGH));
+    }
+  }
+}
+
+TEST_F(RateLimiterTest, GeneratePriorityIterationOrder) {
+  std::unique_ptr<RateLimiter> limiter(NewGenericRateLimiter(
+      200 /* rate_bytes_per_sec */, 1000 * 1000 /* refill_period_us */,
+      10 /* fairness */));
+
+  bool possible_random_one_in_fairness_results_for_high_mid_pri[4][2] = {
+      {false, false}, {false, true}, {true, false}, {true, true}};
+  std::vector<Env::IOPriority> possible_priority_iteration_orders[4] = {
+      {Env::IO_USER, Env::IO_HIGH, Env::IO_MID, Env::IO_LOW},
+      {Env::IO_USER, Env::IO_HIGH, Env::IO_LOW, Env::IO_MID},
+      {Env::IO_USER, Env::IO_MID, Env::IO_LOW, Env::IO_HIGH},
+      {Env::IO_USER, Env::IO_LOW, Env::IO_MID, Env::IO_HIGH}};
+
+  for (int i = 0; i < 4; ++i) {
+    // These are variables for making sure the following callbacks are called
+    // and the assertion in the last callback is indeed excuted
+    bool high_pri_iterated_after_mid_low_pri_set = false;
+    bool mid_pri_itereated_after_low_pri_set = false;
+    bool pri_iteration_order_verified = false;
+    SyncPoint::GetInstance()->SetCallBack(
+        "GenericRateLimiter::GeneratePriorityIterationOrderLocked::"
+        "PostRandomOneInFairnessForHighPri",
+        [&](void* arg) {
+          bool* high_pri_iterated_after_mid_low_pri = (bool*)arg;
+          *high_pri_iterated_after_mid_low_pri =
+              possible_random_one_in_fairness_results_for_high_mid_pri[i][0];
+          high_pri_iterated_after_mid_low_pri_set = true;
+        });
+
+    SyncPoint::GetInstance()->SetCallBack(
+        "GenericRateLimiter::GeneratePriorityIterationOrderLocked::"
+        "PostRandomOneInFairnessForMidPri",
+        [&](void* arg) {
+          bool* mid_pri_itereated_after_low_pri = (bool*)arg;
+          *mid_pri_itereated_after_low_pri =
+              possible_random_one_in_fairness_results_for_high_mid_pri[i][1];
+          mid_pri_itereated_after_low_pri_set = true;
+        });
+
+    SyncPoint::GetInstance()->SetCallBack(
+        "GenericRateLimiter::GeneratePriorityIterationOrderLocked::"
+        "PreReturnPriIterationOrder",
+        [&](void* arg) {
+          std::vector<Env::IOPriority>* pri_iteration_order =
+              (std::vector<Env::IOPriority>*)arg;
+          EXPECT_EQ(*pri_iteration_order, possible_priority_iteration_orders[i])
+              << "Failed to generate priority iteration order correctly when "
+                 "high_pri_iterated_after_mid_low_pri = "
+              << possible_random_one_in_fairness_results_for_high_mid_pri[i][0]
+              << ", mid_pri_itereated_after_low_pri = "
+              << possible_random_one_in_fairness_results_for_high_mid_pri[i][1]
+              << std::endl;
+          pri_iteration_order_verified = true;
+        });
+
+    SyncPoint::GetInstance()->EnableProcessing();
+    limiter->Request(200 /* request max bytes to drain so that refill and order
+                           generation will be triggered every time
+                           GenericRateLimiter::Request() is called */
+                     ,
+                     Env::IO_USER, nullptr /* stats */,
+                     RateLimiter::OpType::kWrite);
+    ASSERT_EQ(high_pri_iterated_after_mid_low_pri_set, true);
+    ASSERT_EQ(mid_pri_itereated_after_low_pri_set, true);
+    ASSERT_EQ(pri_iteration_order_verified, true);
+    SyncPoint::GetInstance()->DisableProcessing();
+    SyncPoint::GetInstance()->ClearCallBack(
+        "GenericRateLimiter::GeneratePriorityIterationOrderLocked::"
+        "PreReturnPriIterationOrder");
+    SyncPoint::GetInstance()->ClearCallBack(
+        "GenericRateLimiter::GeneratePriorityIterationOrderLocked::"
+        "PostRandomOneInFairnessForMidPri");
+    SyncPoint::GetInstance()->ClearCallBack(
+        "GenericRateLimiter::GeneratePriorityIterationOrderLocked::"
+        "PostRandomOneInFairnessForHighPri");
+  }
+}
+
+TEST_F(RateLimiterTest, Rate) {
+  auto* env = Env::Default();
+  struct Arg {
+    Arg(int32_t _target_rate, int _burst)
+        : limiter(NewGenericRateLimiter(_target_rate /* rate_bytes_per_sec */,
+                                        100 * 1000 /* refill_period_us */,
+                                        10 /* fairness */)),
+          request_size(_target_rate /
+                       10 /* refill period here is 1/10 second */),
+          burst(_burst) {}
+    std::unique_ptr<RateLimiter> limiter;
+    int32_t request_size;
+    int burst;
+  };
+
+  auto writer = [](void* p) {
+    const auto& thread_clock = SystemClock::Default();
+    auto* arg = static_cast<Arg*>(p);
+    // Test for 2 seconds
+    auto until = thread_clock->NowMicros() + 2 * 1000000;
+    Random r((uint32_t)(thread_clock->NowNanos() %
+                        std::numeric_limits<uint32_t>::max()));
+    while (thread_clock->NowMicros() < until) {
+      for (int i = 0; i < static_cast<int>(r.Skewed(arg->burst * 2) + 1); ++i) {
+        arg->limiter->Request(r.Uniform(arg->request_size - 1) + 1,
+                              Env::IO_USER, nullptr /* stats */,
+                              RateLimiter::OpType::kWrite);
+      }
+
+      for (int i = 0; i < static_cast<int>(r.Skewed(arg->burst) + 1); ++i) {
+        arg->limiter->Request(r.Uniform(arg->request_size - 1) + 1,
+                              Env::IO_HIGH, nullptr /* stats */,
+                              RateLimiter::OpType::kWrite);
+      }
+
+      for (int i = 0; i < static_cast<int>(r.Skewed(arg->burst / 2 + 1) + 1);
+           ++i) {
+        arg->limiter->Request(r.Uniform(arg->request_size - 1) + 1, Env::IO_MID,
+                              nullptr /* stats */, RateLimiter::OpType::kWrite);
+      }
+
+      arg->limiter->Request(r.Uniform(arg->request_size - 1) + 1, Env::IO_LOW,
+                            nullptr /* stats */, RateLimiter::OpType::kWrite);
+    }
+  };
+
+  int samples = 0;
+  int samples_at_minimum = 0;
+
+  for (int i = 1; i <= 16; i *= 2) {
+    int32_t target = i * 1024 * 10;
+    Arg arg(target, i / 4 + 1);
+    int64_t old_total_bytes_through = 0;
+    for (int iter = 1; iter <= 2; ++iter) {
+      // second iteration changes the target dynamically
+      if (iter == 2) {
+        target *= 2;
+        arg.limiter->SetBytesPerSecond(target);
+      }
+      auto start = env->NowMicros();
+      for (int t = 0; t < i; ++t) {
+        env->StartThread(writer, &arg);
+      }
+      env->WaitForJoin();
+
+      auto elapsed = env->NowMicros() - start;
+      double rate =
+          (arg.limiter->GetTotalBytesThrough() - old_total_bytes_through) *
+          1000000.0 / elapsed;
+      old_total_bytes_through = arg.limiter->GetTotalBytesThrough();
+      fprintf(stderr,
+              "request size [1 - %" PRIi32 "], limit %" PRIi32
+              " KB/sec, actual rate: %lf KB/sec, elapsed %.2lf seconds\n",
+              arg.request_size - 1, target / 1024, rate / 1024,
+              elapsed / 1000000.0);
+
+      ++samples;
+      if (rate / target >= 0.80) {
+        ++samples_at_minimum;
+      }
+      ASSERT_LE(rate / target, 1.25);
+    }
+  }
+
+  // This can fail due to slow execution speed, like when using valgrind or in
+  // heavily loaded CI environments
+  bool skip_minimum_rate_check =
+#if (defined(CIRCLECI) && defined(OS_MACOSX)) || defined(ROCKSDB_VALGRIND_RUN)
+      true;
+#else
+      getenv("SANDCASTLE");
+#endif
+  if (skip_minimum_rate_check) {
+    fprintf(stderr, "Skipped minimum rate check (%d / %d passed)\n",
+            samples_at_minimum, samples);
+  } else {
+    ASSERT_EQ(samples_at_minimum, samples);
+  }
+}
+
+TEST_F(RateLimiterTest, LimitChangeTest) {
+  // starvation test when limit changes to a smaller value
+  int64_t refill_period = 1000 * 1000;
+  auto* env = Env::Default();
+  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
+  struct Arg {
+    Arg(int32_t _request_size, Env::IOPriority _pri,
+        std::shared_ptr<RateLimiter> _limiter)
+        : request_size(_request_size), pri(_pri), limiter(_limiter) {}
+    int32_t request_size;
+    Env::IOPriority pri;
+    std::shared_ptr<RateLimiter> limiter;
+  };
+
+  auto writer = [](void* p) {
+    auto* arg = static_cast<Arg*>(p);
+    arg->limiter->Request(arg->request_size, arg->pri, nullptr /* stats */,
+                          RateLimiter::OpType::kWrite);
+  };
+
+  for (uint32_t i = 1; i <= 16; i <<= 1) {
+    int32_t target = i * 1024 * 10;
+    // refill per second
+    for (int iter = 0; iter < 2; iter++) {
+      std::shared_ptr<RateLimiter> limiter =
+          std::make_shared<GenericRateLimiter>(
+              target, refill_period, 10, RateLimiter::Mode::kWritesOnly,
+              SystemClock::Default(), false /* auto_tuned */);
+      // After "GenericRateLimiter::Request:1" the mutex is held until the bytes
+      // are refilled. This test could be improved to change the limit when lock
+      // is released in `TimedWait()`.
+      ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
+          {{"GenericRateLimiter::Request",
+            "RateLimiterTest::LimitChangeTest:changeLimitStart"},
+           {"RateLimiterTest::LimitChangeTest:changeLimitEnd",
+            "GenericRateLimiter::Request:1"}});
+      Arg arg(target, Env::IO_HIGH, limiter);
+      // The idea behind is to start a request first, then before it refills,
+      // update limit to a different value (2X/0.5X). No starvation should
+      // be guaranteed under any situation
+      // TODO(lightmark): more test cases are welcome.
+      env->StartThread(writer, &arg);
+      int32_t new_limit = (target << 1) >> (iter << 1);
+      TEST_SYNC_POINT("RateLimiterTest::LimitChangeTest:changeLimitStart");
+      arg.limiter->SetBytesPerSecond(new_limit);
+      TEST_SYNC_POINT("RateLimiterTest::LimitChangeTest:changeLimitEnd");
+      env->WaitForJoin();
+      fprintf(stderr,
+              "[COMPLETE] request size %" PRIi32 " KB, new limit %" PRIi32
+              "KB/sec, refill period %" PRIi64 " ms\n",
+              target / 1024, new_limit / 1024, refill_period / 1000);
+    }
+  }
+  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
+}
+
+TEST_F(RateLimiterTest, AutoTuneIncreaseWhenFull) {
+  const std::chrono::seconds kTimePerRefill(1);
+  const int kRefillsPerTune = 100;  // needs to match util/rate_limiter.cc
+
+  SpecialEnv special_env(Env::Default(), /*time_elapse_only_sleep*/ true);
+
+  auto stats = CreateDBStatistics();
+  std::unique_ptr<RateLimiter> rate_limiter(new GenericRateLimiter(
+      1000 /* rate_bytes_per_sec */,
+      std::chrono::microseconds(kTimePerRefill).count(), 10 /* fairness */,
+      RateLimiter::Mode::kWritesOnly, special_env.GetSystemClock(),
+      true /* auto_tuned */));
+
+  // Rate limiter uses `CondVar::TimedWait()`, which does not have access to the
+  // `Env` to advance its time according to the fake wait duration. The
+  // workaround is to install a callback that advance the `Env`'s mock time.
+  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
+      "GenericRateLimiter::Request:PostTimedWait", [&](void* arg) {
+        int64_t time_waited_us = *static_cast<int64_t*>(arg);
+        special_env.SleepForMicroseconds(static_cast<int>(time_waited_us));
+      });
+  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
+
+  // verify rate limit increases after a sequence of periods where rate limiter
+  // is always drained
+  int64_t orig_bytes_per_sec = rate_limiter->GetSingleBurstBytes();
+  rate_limiter->Request(orig_bytes_per_sec, Env::IO_HIGH, stats.get(),
+                        RateLimiter::OpType::kWrite);
+  while (std::chrono::microseconds(special_env.NowMicros()) <=
+         kRefillsPerTune * kTimePerRefill) {
+    rate_limiter->Request(orig_bytes_per_sec, Env::IO_HIGH, stats.get(),
+                          RateLimiter::OpType::kWrite);
+  }
+  int64_t new_bytes_per_sec = rate_limiter->GetSingleBurstBytes();
+  ASSERT_GT(new_bytes_per_sec, orig_bytes_per_sec);
+
+  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
+  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack(
+      "GenericRateLimiter::Request:PostTimedWait");
+
+  // decreases after a sequence of periods where rate limiter is not drained
+  orig_bytes_per_sec = new_bytes_per_sec;
+  special_env.SleepForMicroseconds(static_cast<int>(
+      kRefillsPerTune * std::chrono::microseconds(kTimePerRefill).count()));
+  // make a request so tuner can be triggered
+  rate_limiter->Request(1 /* bytes */, Env::IO_HIGH, stats.get(),
+                        RateLimiter::OpType::kWrite);
+  new_bytes_per_sec = rate_limiter->GetSingleBurstBytes();
+  ASSERT_LT(new_bytes_per_sec, orig_bytes_per_sec);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
+int main(int argc, char** argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/src/rocksdb/util/repeatable_thread.h b/src/rocksdb/util/repeatable_thread.h
new file mode 100644
index 000000000..c75ad7c49
--- /dev/null
+++ b/src/rocksdb/util/repeatable_thread.h
@@ -0,0 +1,149 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <functional>
+#include <string>
+
+#include "monitoring/instrumented_mutex.h"
+#include "port/port.h"
+#include "rocksdb/system_clock.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Simple wrapper around port::Thread that supports calling a callback every
+// X seconds. If you pass in 0, then it will call your callback repeatedly
+// without delay.
+class RepeatableThread {
+ public:
+  RepeatableThread(std::function<void()> function,
+                   const std::string& thread_name, SystemClock* clock,
+                   uint64_t delay_us, uint64_t initial_delay_us = 0)
+      : function_(function),
+        thread_name_("rocksdb:" + thread_name),
+        clock_(clock),
+        delay_us_(delay_us),
+        initial_delay_us_(initial_delay_us),
+        mutex_(clock),
+        cond_var_(&mutex_),
+        running_(true),
+#ifndef NDEBUG
+        waiting_(false),
+        run_count_(0),
+#endif
+        thread_([this] { thread(); }) {
+  }
+
+  void cancel() {
+    {
+      InstrumentedMutexLock l(&mutex_);
+      if (!running_) {
+        return;
+      }
+      running_ = false;
+      cond_var_.SignalAll();
+    }
+    thread_.join();
+  }
+
+  bool IsRunning() { return running_; }
+
+  ~RepeatableThread() { cancel(); }
+
+#ifndef NDEBUG
+  // Wait until RepeatableThread starting waiting, call the optional callback,
+  // then wait for one run of RepeatableThread. Tests can use provide a
+  // custom clock object to mock time, and use the callback here to bump current
+  // time and trigger RepeatableThread. See repeatable_thread_test for example.
+  //
+  // Note: only support one caller of this method.
+  void TEST_WaitForRun(std::function<void()> callback = nullptr) {
+    InstrumentedMutexLock l(&mutex_);
+    while (!waiting_) {
+      cond_var_.Wait();
+    }
+    uint64_t prev_count = run_count_;
+    if (callback != nullptr) {
+      callback();
+    }
+    cond_var_.SignalAll();
+    while (!(run_count_ > prev_count)) {
+      cond_var_.Wait();
+    }
+  }
+#endif
+
+ private:
+  bool wait(uint64_t delay) {
+    InstrumentedMutexLock l(&mutex_);
+    if (running_ && delay > 0) {
+      uint64_t wait_until = clock_->NowMicros() + delay;
+#ifndef NDEBUG
+      waiting_ = true;
+      cond_var_.SignalAll();
+#endif
+      while (running_) {
+        cond_var_.TimedWait(wait_until);
+        if (clock_->NowMicros() >= wait_until) {
+          break;
+        }
+      }
+#ifndef NDEBUG
+      waiting_ = false;
+#endif
+    }
+    return running_;
+  }
+
+  void thread() {
+#if defined(_GNU_SOURCE) && defined(__GLIBC_PREREQ)
+#if __GLIBC_PREREQ(2, 12)
+    // Set thread name.
+    auto thread_handle = thread_.native_handle();
+    int ret __attribute__((__unused__)) =
+        pthread_setname_np(thread_handle, thread_name_.c_str());
+    assert(ret == 0);
+#endif
+#endif
+
+    assert(delay_us_ > 0);
+    if (!wait(initial_delay_us_)) {
+      return;
+    }
+    do {
+      function_();
+#ifndef NDEBUG
+      {
+        InstrumentedMutexLock l(&mutex_);
+        run_count_++;
+        cond_var_.SignalAll();
+      }
+#endif
+    } while (wait(delay_us_));
+  }
+
+  const std::function<void()> function_;
+  const std::string thread_name_;
+  SystemClock* clock_;
+  const uint64_t delay_us_;
+  const uint64_t initial_delay_us_;
+
+  // Mutex lock should be held when accessing running_, waiting_
+  // and run_count_.
+  InstrumentedMutex mutex_;
+  InstrumentedCondVar cond_var_;
+  bool running_;
+#ifndef NDEBUG
+  // RepeatableThread waiting for timeout.
+  bool waiting_;
+  // Times function_ had run.
+  uint64_t run_count_;
+#endif
+  port::Thread thread_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/repeatable_thread_test.cc b/src/rocksdb/util/repeatable_thread_test.cc
new file mode 100644
index 000000000..0b3e95464
--- /dev/null
+++ b/src/rocksdb/util/repeatable_thread_test.cc
@@ -0,0 +1,111 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "util/repeatable_thread.h"
+
+#include <atomic>
+#include <memory>
+
+#include "db/db_test_util.h"
+#include "test_util/mock_time_env.h"
+#include "test_util/sync_point.h"
+#include "test_util/testharness.h"
+
+class RepeatableThreadTest : public testing::Test {
+ public:
+  RepeatableThreadTest()
+      : mock_clock_(std::make_shared<ROCKSDB_NAMESPACE::MockSystemClock>(
+            ROCKSDB_NAMESPACE::SystemClock::Default())) {}
+
+ protected:
+  std::shared_ptr<ROCKSDB_NAMESPACE::MockSystemClock> mock_clock_;
+};
+
+TEST_F(RepeatableThreadTest, TimedTest) {
+  constexpr uint64_t kSecond = 1000000;  // 1s = 1000000us
+  constexpr int kIteration = 3;
+  const auto& clock = ROCKSDB_NAMESPACE::SystemClock::Default();
+  ROCKSDB_NAMESPACE::port::Mutex mutex;
+  ROCKSDB_NAMESPACE::port::CondVar test_cv(&mutex);
+  int count = 0;
+  uint64_t prev_time = clock->NowMicros();
+  ROCKSDB_NAMESPACE::RepeatableThread thread(
+      [&] {
+        ROCKSDB_NAMESPACE::MutexLock l(&mutex);
+        count++;
+        uint64_t now = clock->NowMicros();
+        assert(count == 1 || prev_time + 1 * kSecond <= now);
+        prev_time = now;
+        if (count >= kIteration) {
+          test_cv.SignalAll();
+        }
+      },
+      "rt_test", clock.get(), 1 * kSecond);
+  // Wait for execution finish.
+  {
+    ROCKSDB_NAMESPACE::MutexLock l(&mutex);
+    while (count < kIteration) {
+      test_cv.Wait();
+    }
+  }
+
+  // Test cancel
+  thread.cancel();
+}
+
+TEST_F(RepeatableThreadTest, MockEnvTest) {
+  constexpr uint64_t kSecond = 1000000;  // 1s = 1000000us
+  constexpr int kIteration = 3;
+  mock_clock_->SetCurrentTime(0);  // in seconds
+  std::atomic<int> count{0};
+
+#if defined(OS_MACOSX) && !defined(NDEBUG)
+  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
+  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
+  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
+      "InstrumentedCondVar::TimedWaitInternal", [&](void* arg) {
+        // Obtain the current (real) time in seconds and add 1000 extra seconds
+        // to ensure that RepeatableThread::wait invokes TimedWait with a time
+        // greater than (real) current time. This is to prevent the TimedWait
+        // function from returning immediately without sleeping and releasing
+        // the mutex on certain platforms, e.g. OS X. If TimedWait returns
+        // immediately, the mutex will not be released, and
+        // RepeatableThread::TEST_WaitForRun never has a chance to execute the
+        // callback which, in this case, updates the result returned by
+        // mock_clock->NowMicros. Consequently, RepeatableThread::wait cannot
+        // break out of the loop, causing test to hang. The extra 1000 seconds
+        // is a best-effort approach because there seems no reliable and
+        // deterministic way to provide the aforementioned guarantee. By the
+        // time RepeatableThread::wait is called, it is no guarantee that the
+        // delay + mock_clock->NowMicros will be greater than the current real
+        // time. However, 1000 seconds should be sufficient in most cases.
+        uint64_t time_us = *reinterpret_cast<uint64_t*>(arg);
+        if (time_us < mock_clock_->RealNowMicros()) {
+          *reinterpret_cast<uint64_t*>(arg) =
+              mock_clock_->RealNowMicros() + 1000;
+        }
+      });
+  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
+#endif  // OS_MACOSX && !NDEBUG
+
+  ROCKSDB_NAMESPACE::RepeatableThread thread(
+      [&] { count++; }, "rt_test", mock_clock_.get(), 1 * kSecond, 1 * kSecond);
+  for (int i = 1; i <= kIteration; i++) {
+    // Bump current time
+    thread.TEST_WaitForRun([&] { mock_clock_->SetCurrentTime(i); });
+  }
+  // Test function should be exectued exactly kIteraion times.
+  ASSERT_EQ(kIteration, count.load());
+
+  // Test cancel
+  thread.cancel();
+}
+
+int main(int argc, char** argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+
+  return RUN_ALL_TESTS();
+}
diff --git a/src/rocksdb/util/ribbon_alg.h b/src/rocksdb/util/ribbon_alg.h
new file mode 100644
index 000000000..f9afefc23
--- /dev/null
+++ b/src/rocksdb/util/ribbon_alg.h
@@ -0,0 +1,1225 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <array>
+#include <memory>
+
+#include "rocksdb/rocksdb_namespace.h"
+#include "util/math128.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace ribbon {
+
+// RIBBON PHSF & RIBBON Filter (Rapid Incremental Boolean Banding ON-the-fly)
+//
+// ribbon_alg.h: generic versions of core algorithms.
+//
+// Ribbon is a Perfect Hash Static Function construction useful as a compact
+// static Bloom filter alternative. It combines (a) a boolean (GF(2)) linear
+// system construction that approximates a Band Matrix with hashing,
+// (b) an incremental, on-the-fly Gaussian Elimination algorithm that is
+// remarkably efficient and adaptable at constructing an upper-triangular
+// band matrix from a set of band-approximating inputs from (a), and
+// (c) a storage layout that is fast and adaptable as a filter.
+//
+// Footnotes: (a) "Efficient Gauss Elimination for Near-Quadratic Matrices
+// with One Short Random Block per Row, with Applications" by Stefan
+// Walzer and Martin Dietzfelbinger ("DW paper")
+// (b) developed by Peter C. Dillinger, though not the first on-the-fly
+// GE algorithm. See "On the fly Gaussian Elimination for LT codes" by
+// Bioglio, Grangetto, Gaeta, and Sereno.
+// (c) see "interleaved" solution storage below.
+//
+// See ribbon_impl.h for high-level behavioral summary. This file focuses
+// on the core design details.
+//
+// ######################################################################
+// ################# PHSF -> static filter reduction ####################
+//
+// A Perfect Hash Static Function is a data structure representing a
+// map from anything hashable (a "key") to values of some fixed size.
+// Crucially, it is allowed to return garbage values for anything not in
+// the original set of map keys, and it is a "static" structure: entries
+// cannot be added or deleted after construction. PHSFs representing n
+// mappings to b-bit values (assume uniformly distributed) require at least
+// n * b bits to represent, or at least b bits per entry. We typically
+// describe the compactness of a PHSF by typical bits per entry as some
+// function of b. For example, the MWHC construction (k=3 "peeling")
+// requires about 1.0222*b and a variant called Xor+ requires about
+// 1.08*b + 0.5 bits per entry.
+//
+// With more hashing, a PHSF can over-approximate a set as a Bloom filter
+// does, with no FN queries and predictable false positive (FP) query
+// rate. Instead of the user providing a value to map each input key to,
+// a hash function provides the value. Keys in the original set will
+// return a positive membership query because the underlying PHSF returns
+// the same value as hashing the key. When a key is not in the original set,
+// the PHSF returns a "garbage" value, which is only equal to the key's
+// hash with (false positive) probability 1 in 2^b.
+//
+// For a matching false positive rate, standard Bloom filters require
+// 1.44*b bits per entry. Cache-local Bloom filters (like bloom_impl.h)
+// require a bit more, around 1.5*b bits per entry. Thus, a Bloom
+// alternative could save up to or nearly 1/3rd of memory and storage
+// that RocksDB uses for SST (static) Bloom filters. (Memtable Bloom filter
+// is dynamic.)
+//
+// Recommended reading:
+// "Xor Filters: Faster and Smaller Than Bloom and Cuckoo Filters"
+// by Graf and Lemire
+// First three sections of "Fast Scalable Construction of (Minimal
+// Perfect Hash) Functions" by Genuzio, Ottaviano, and Vigna
+//
+// ######################################################################
+// ################## PHSF vs. hash table vs. Bloom #####################
+//
+// You can think of traditional hash tables and related filter variants
+// such as Cuckoo filters as utilizing an "OR" construction: a hash
+// function associates a key with some slots and the data is returned if
+// the data is found in any one of those slots. The collision resolution
+// is visible in the final data structure and requires extra information.
+// For example, Cuckoo filter uses roughly 1.05b + 2 bits per entry, and
+// Golomb-Rice code (aka "GCS") as little as b + 1.5. When the data
+// structure associates each input key with data in one slot, the
+// structure implicitly constructs a (near-)minimal (near-)perfect hash
+// (MPH) of the keys, which requires at least 1.44 bits per key to
+// represent. This is why approaches with visible collision resolution
+// have a fixed + 1.5 or more in storage overhead per entry, often in
+// addition to an overhead multiplier on b.
+//
+// By contrast Bloom filters utilize an "AND" construction: a query only
+// returns true if all bit positions associated with a key are set to 1.
+// There is no collision resolution, so Bloom filters do not suffer a
+// fixed bits per entry overhead like the above structures.
+//
+// PHSFs typically use a bitwise XOR construction: the data you want is
+// not in a single slot, but in a linear combination of several slots.
+// For static data, this gives the best of "AND" and "OR" constructions:
+// avoids the +1.44 or more fixed overhead by not approximating a MPH and
+// can do much better than Bloom's 1.44 factor on b with collision
+// resolution, which here is done ahead of time and invisible at query
+// time.
+//
+// ######################################################################
+// ######################## PHSF construction ###########################
+//
+// For a typical PHSF, construction is solving a linear system of
+// equations, typically in GF(2), which is to say that values are boolean
+// and XOR serves both as addition and subtraction. We can use matrices to
+// represent the problem:
+//
+//    C    *    S    =    R
+// (n x m)   (m x b)   (n x b)
+// where C = coefficients, S = solution, R = results
+// and solving for S given C and R.
+//
+// Note that C and R each have n rows, one for each input entry for the
+// PHSF. A row in C is given by a hash function on the PHSF input key,
+// and the corresponding row in R is the b-bit value to associate with
+// that input key. (In a filter, rows of R are given by another hash
+// function on the input key.)
+//
+// On solving, the matrix S (solution) is the final PHSF data, as it
+// maps any row from the original C to its corresponding desired result
+// in R. We just have to hash our query inputs and compute a linear
+// combination of rows in S.
+//
+// In theory, we could chose m = n and let a hash function associate
+// each input key with random rows in C. A solution exists with high
+// probability, and uses essentially minimum space, b bits per entry
+// (because we set m = n) but this has terrible scaling, something
+// like O(n^2) space and O(n^3) time during construction (Gaussian
+// elimination) and O(n) query time. But computational efficiency is
+// key, and the core of this is avoiding scanning all of S to answer
+// each query.
+//
+// The traditional approach (MWHC, aka Xor filter) starts with setting
+// only some small fixed number of columns (typically k=3) to 1 for each
+// row of C, with remaining entries implicitly 0. This is implemented as
+// three hash functions over [0,m), and S can be implemented as a vector
+// of b-bit values. Now, a query only involves looking up k rows
+// (values) in S and computing their bitwise XOR. Additionally, this
+// construction can use a linear time algorithm called "peeling" for
+// finding a solution in many cases of one existing, but peeling
+// generally requires a larger space overhead factor in the solution
+// (m/n) than is required with Gaussian elimination.
+//
+// Recommended reading:
+// "Peeling Close to the Orientability Threshold - Spatial Coupling in
+// Hashing-Based Data Structures" by Stefan Walzer
+//
+// ######################################################################
+// ##################### Ribbon PHSF construction #######################
+//
+// Ribbon constructs coefficient rows essentially the same as in the
+// Walzer/Dietzfelbinger paper cited above: for some chosen fixed width
+// r (kCoeffBits in code), each key is hashed to a starting column in
+// [0, m - r] (GetStart() in code) and an r-bit sequence of boolean
+// coefficients (GetCoeffRow() in code). If you sort the rows by start,
+// the C matrix would look something like this:
+//
+// [####00000000000000000000]
+// [####00000000000000000000]
+// [000####00000000000000000]
+// [0000####0000000000000000]
+// [0000000####0000000000000]
+// [000000000####00000000000]
+// [000000000####00000000000]
+// [0000000000000####0000000]
+// [0000000000000000####0000]
+// [00000000000000000####000]
+// [00000000000000000000####]
+//
+// where each # could be a 0 or 1, chosen uniformly by a hash function.
+// (Except we typically set the start column value to 1.) This scheme
+// uses hashing to approximate a band matrix, and it has a solution iff
+// it reduces to an upper-triangular boolean r-band matrix, like this:
+//
+// [1###00000000000000000000]
+// [01##00000000000000000000]
+// [000000000000000000000000]
+// [0001###00000000000000000]
+// [000000000000000000000000]
+// [000001##0000000000000000]
+// [000000000000000000000000]
+// [00000001###0000000000000]
+// [000000001###000000000000]
+// [0000000001##000000000000]
+// ...
+// [00000000000000000000001#]
+// [000000000000000000000001]
+//
+// where we have expanded to an m x m matrix by filling with rows of
+// all zeros as needed. As in Gaussian elimination, this form is ready for
+// generating a solution through back-substitution.
+//
+// The awesome thing about the Ribbon construction (from the DW paper) is
+// how row reductions keep each row representable as a start column and
+// r coefficients, because row reductions are only needed when two rows
+// have the same number of leading zero columns. Thus, the combination
+// of those rows, the bitwise XOR of the r-bit coefficient rows, cancels
+// out the leading 1s, so starts (at least) one column later and only
+// needs (at most) r - 1 coefficients.
+//
+// ######################################################################
+// ###################### Ribbon PHSF scalability #######################
+//
+// Although more practical detail is in ribbon_impl.h, it's worth
+// understanding some of the overall benefits and limitations of the
+// Ribbon PHSFs.
+//
+// High-end scalability is a primary issue for Ribbon PHSFs, because in
+// a single Ribbon linear system with fixed r and fixed m/n ratio, the
+// solution probability approaches zero as n approaches infinity.
+// For a given n, solution probability improves with larger r and larger
+// m/n.
+//
+// By contrast, peeling-based PHSFs have somewhat worse storage ratio
+// or solution probability for small n (less than ~1000). This is
+// especially true with spatial-coupling, where benefits are only
+// notable for n on the order of 100k or 1m or more.
+//
+// To make best use of current hardware, r=128 seems to be closest to
+// a "generally good" choice for Ribbon, at least in RocksDB where SST
+// Bloom filters typically hold around 10-100k keys, and almost always
+// less than 10m keys. r=128 ribbon has a high chance of encoding success
+// (with first hash seed) when storage overhead is around 5% (m/n ~ 1.05)
+// for roughly 10k - 10m keys in a single linear system. r=64 only scales
+// up to about 10k keys with the same storage overhead. Construction and
+// access times for r=128 are similar to r=64. r=128 tracks nearly
+// twice as much data during construction, but in most cases we expect
+// the scalability benefits of r=128 vs. r=64 to make it preferred.
+//
+// A natural approach to scaling Ribbon beyond ~10m keys is splitting
+// (or "sharding") the inputs into multiple linear systems with their
+// own hash seeds. This can also help to control peak memory consumption.
+// TODO: much more to come
+//
+// ######################################################################
+// #################### Ribbon on-the-fly banding #######################
+//
+// "Banding" is what we call the process of reducing the inputs to an
+// upper-triangular r-band matrix ready for finishing a solution with
+// back-substitution. Although the DW paper presents an algorithm for
+// this ("SGauss"), the awesome properties of their construction enable
+// an even simpler, faster, and more backtrackable algorithm. In simplest
+// terms, the SGauss algorithm requires sorting the inputs by start
+// columns, but it's possible to make Gaussian elimination resemble hash
+// table insertion!
+//
+// The enhanced algorithm is based on these observations:
+// - When processing a coefficient row with first 1 in column j,
+//   - If it's the first at column j to be processed, it can be part of
+//     the banding at row j. (And that decision never overwritten, with
+//     no loss of generality!)
+//   - Else, it can be combined with existing row j and re-processed,
+//     which will look for a later "empty" row or reach "no solution".
+//
+// We call our banding algorithm "incremental" and "on-the-fly" because
+// (like hash table insertion) we are "finished" after each input
+// processed, with respect to all inputs processed so far. Although the
+// band matrix is an intermediate step to the solution structure, we have
+// eliminated intermediate steps and unnecessary data tracking for
+// banding.
+//
+// Building on "incremental" and "on-the-fly", the banding algorithm is
+// easily backtrackable because no (non-empty) rows are overwritten in
+// the banding. Thus, if we want to "try" adding an additional set of
+// inputs to the banding, we only have to record which rows were written
+// in order to efficiently backtrack to our state before considering
+// the additional set. (TODO: how this can mitigate scalability and
+// reach sub-1% overheads)
+//
+// Like in a linear-probed hash table, as the occupancy approaches and
+// surpasses 90-95%, collision resolution dominates the construction
+// time. (Ribbon doesn't usually pay at query time; see solution
+// storage below.) This means that we can speed up construction time
+// by using a higher m/n ratio, up to negative returns around 1.2.
+// At m/n ~= 1.2, which still saves memory substantially vs. Bloom
+// filter's 1.5, construction speed (including back-substitution) is not
+// far from sorting speed, but still a few times slower than cache-local
+// Bloom construction speed.
+//
+// Back-substitution from an upper-triangular boolean band matrix is
+// especially fast and easy. All the memory accesses are sequential or at
+// least local, no random. If the number of result bits (b) is a
+// compile-time constant, the back-substitution state can even be tracked
+// in CPU registers. Regardless of the solution representation, we prefer
+// column-major representation for tracking back-substitution state, as
+// r (the band width) will typically be much larger than b (result bits
+// or columns), so better to handle r-bit values b times (per solution
+// row) than b-bit values r times.
+//
+// ######################################################################
+// ##################### Ribbon solution storage ########################
+//
+// Row-major layout is typical for boolean (bit) matrices, including for
+// MWHC (Xor) filters where a query combines k b-bit values, and k is
+// typically smaller than b. Even for k=4 and b=2, at least k=4 random
+// look-ups are required regardless of layout.
+//
+// Ribbon PHSFs are quite different, however, because
+// (a) all of the solution rows relevant to a query are within a single
+// range of r rows, and
+// (b) the number of solution rows involved (r/2 on average, or r if
+// avoiding conditional accesses) is typically much greater than
+// b, the number of solution columns.
+//
+// Row-major for Ribbon PHSFs therefore tends to incur undue CPU overhead
+// by processing (up to) r entries of b bits each, where b is typically
+// less than 10 for filter applications.
+//
+// Column-major layout has poor locality because of accessing up to b
+// memory locations in different pages (and obviously cache lines). Note
+// that negative filter queries do not typically need to access all
+// solution columns, as they can return when a mismatch is found in any
+// result/solution column. This optimization doesn't always pay off on
+// recent hardware, where the penalty for unpredictable conditional
+// branching can exceed the penalty for unnecessary work, but the
+// optimization is essentially unavailable with row-major layout.
+//
+// The best compromise seems to be interleaving column-major on the small
+// scale with row-major on the large scale. For example, let a solution
+// "block" be r rows column-major encoded as b r-bit values in sequence.
+// Each query accesses (up to) 2 adjacent blocks, which will typically
+// span 1-3 cache lines in adjacent memory. We get very close to the same
+// locality as row-major, but with much faster reconstruction of each
+// result column, at least for filter applications where b is relatively
+// small and negative queries can return early.
+//
+// ######################################################################
+// ###################### Fractional result bits ########################
+//
+// Bloom filters have great flexibility that alternatives mostly do not
+// have. One of those flexibilities is in utilizing any ratio of data
+// structure bits per key. With a typical memory allocator like jemalloc,
+// this flexibility can save roughly 10% of the filters' footprint in
+// DRAM by rounding up and down filter sizes to minimize memory internal
+// fragmentation (see optimize_filters_for_memory RocksDB option).
+//
+// At first glance, PHSFs only offer a whole number of bits per "slot"
+// (m rather than number of keys n), but coefficient locality in the
+// Ribbon construction makes fractional bits/key quite possible and
+// attractive for filter applications. This works by a prefix of the
+// structure using b-1 solution columns and the rest using b solution
+// columns. See InterleavedSolutionStorage below for more detail.
+//
+// Because false positive rates are non-linear in bits/key, this approach
+// is not quite optimal in terms of information theory. In common cases,
+// we see additional space overhead up to about 1.5% vs. theoretical
+// optimal to achieve the same FP rate. We consider this a quite acceptable
+// overhead for very efficiently utilizing space that might otherwise be
+// wasted.
+//
+// This property of Ribbon even makes it "elastic." A Ribbon filter and
+// its small metadata for answering queries can be adapted into another
+// Ribbon filter filling any smaller multiple of r bits (plus small
+// metadata), with a correspondingly higher FP rate. None of the data
+// thrown away during construction needs to be recalled for this reduction.
+// Similarly a single Ribbon construction can be separated (by solution
+// column) into two or more structures (or "layers" or "levels") with
+// independent filtering ability (no FP correlation, just as solution or
+// result columns in a single structure) despite being constructed as part
+// of a single linear system. (TODO: implement)
+// See also "ElasticBF: Fine-grained and Elastic Bloom Filter Towards
+// Efficient Read for LSM-tree-based KV Stores."
+//
+
+// ######################################################################
+// ################### CODE: Ribbon core algorithms #####################
+// ######################################################################
+//
+// These algorithms are templatized for genericity but near-maximum
+// performance in a given application. The template parameters
+// adhere to informal class/struct type concepts outlined below. (This
+// code is written for C++11 so does not use formal C++ concepts.)
+
+// Rough architecture for these algorithms:
+//
+// +-----------+     +---+     +-----------------+
+// | AddInputs | --> | H | --> | BandingStorage  |
+// +-----------+     | a |     +-----------------+
+//                   | s |             |
+//                   | h |      Back substitution
+//                   | e |             V
+// +-----------+     | r |     +-----------------+
+// | Query Key | --> |   | >+< | SolutionStorage |
+// +-----------+     +---+  |  +-----------------+
+//                          V
+//                     Query result
+
+// Common to other concepts
+// concept RibbonTypes {
+//   // An unsigned integer type for an r-bit subsequence of coefficients.
+//   // r (or kCoeffBits) is taken to be sizeof(CoeffRow) * 8, as it would
+//   // generally only hurt scalability to leave bits of CoeffRow unused.
+//   typename CoeffRow;
+//   // An unsigned integer type big enough to hold a result row (b bits,
+//   // or number of solution/result columns).
+//   // In many applications, especially filters, the number of result
+//   // columns is decided at run time, so ResultRow simply needs to be
+//   // big enough for the largest number of columns allowed.
+//   typename ResultRow;
+//   // An unsigned integer type sufficient for representing the number of
+//   // rows in the solution structure, and at least the arithmetic
+//   // promotion size (usually 32 bits). uint32_t recommended because a
+//   // single Ribbon construction doesn't really scale to billions of
+//   // entries.
+//   typename Index;
+// };
+
+// ######################################################################
+// ######################## Hashers and Banding #########################
+
+// Hasher concepts abstract out hashing details.
+
+// concept PhsfQueryHasher extends RibbonTypes {
+//   // Type for a lookup key, which is hashable.
+//   typename Key;
+//
+//   // Type for hashed summary of a Key. uint64_t is recommended.
+//   typename Hash;
+//
+//   // Compute a hash value summarizing a Key
+//   Hash GetHash(const Key &) const;
+//
+//   // Given a hash value and a number of columns that can start an
+//   // r-sequence of coefficients (== m - r + 1), return the start
+//   // column to associate with that hash value. (Starts can be chosen
+//   // uniformly or "smash" extra entries into the beginning and end for
+//   // better utilization at those extremes of the structure. Details in
+//   // ribbon.impl.h)
+//   Index GetStart(Hash, Index num_starts) const;
+//
+//   // Given a hash value, return the r-bit sequence of coefficients to
+//   // associate with it. It's generally OK if
+//   //   sizeof(CoeffRow) > sizeof(Hash)
+//   // as long as the hash itself is not too prone to collisions for the
+//   // applications and the CoeffRow is generated uniformly from
+//   // available hash data, but relatively independent of the start.
+//   //
+//   // Must be non-zero, because that's required for a solution to exist
+//   // when mapping to non-zero result row. (Note: BandingAdd could be
+//   // modified to allow 0 coeff row if that only occurs with 0 result
+//   // row, which really only makes sense for filter implementation,
+//   // where both values are hash-derived. Or BandingAdd could reject 0
+//   // coeff row, forcing next seed, but that has potential problems with
+//   // generality/scalability.)
+//   CoeffRow GetCoeffRow(Hash) const;
+// };
+
+// concept FilterQueryHasher extends PhsfQueryHasher {
+//   // For building or querying a filter, this returns the expected
+//   // result row associated with a hashed input. For general PHSF,
+//   // this must return 0.
+//   //
+//   // Although not strictly required, there's a slightly better chance of
+//   // solver success if result row is masked down here to only the bits
+//   // actually needed.
+//   ResultRow GetResultRowFromHash(Hash) const;
+// }
+
+// concept BandingHasher extends FilterQueryHasher {
+//   // For a filter, this will generally be the same as Key.
+//   // For a general PHSF, it must either
+//   // (a) include a key and a result it maps to (e.g. in a std::pair), or
+//   // (b) GetResultRowFromInput looks up the result somewhere rather than
+//   // extracting it.
+//   typename AddInput;
+//
+//   // Instead of requiring a way to extract a Key from an
+//   // AddInput, we require getting the hash of the Key part
+//   // of an AddInput, which is trivial if AddInput == Key.
+//   Hash GetHash(const AddInput &) const;
+//
+//   // For building a non-filter PHSF, this extracts or looks up the result
+//   // row to associate with an input. For filter PHSF, this must return 0.
+//   ResultRow GetResultRowFromInput(const AddInput &) const;
+//
+//   // Whether the solver can assume the lowest bit of GetCoeffRow is
+//   // always 1. When true, it should improve solver efficiency slightly.
+//   static bool kFirstCoeffAlwaysOne;
+// }
+
+// Abstract storage for the the result of "banding" the inputs (Gaussian
+// elimination to an upper-triangular boolean band matrix). Because the
+// banding is an incremental / on-the-fly algorithm, this also represents
+// all the intermediate state between input entries.
+//
+// concept BandingStorage extends RibbonTypes {
+//   // Tells the banding algorithm to prefetch memory associated with
+//   // the next input before processing the current input. Generally
+//   // recommended iff the BandingStorage doesn't easily fit in CPU
+//   // cache.
+//   bool UsePrefetch() const;
+//
+//   // Prefetches (e.g. __builtin_prefetch) memory associated with a
+//   // slot index i.
+//   void Prefetch(Index i) const;
+//
+//   // Load or store CoeffRow and ResultRow for slot index i.
+//   // (Gaussian row operations involve both sides of the equation.)
+//   // Bool `for_back_subst` indicates that customizing values for
+//   // unconstrained solution rows (cr == 0) is allowed.
+//   void LoadRow(Index i, CoeffRow *cr, ResultRow *rr, bool for_back_subst)
+//        const;
+//   void StoreRow(Index i, CoeffRow cr, ResultRow rr);
+//
+//   // Returns the number of columns that can start an r-sequence of
+//   // coefficients, which is the number of slots minus r (kCoeffBits)
+//   // plus one. (m - r + 1)
+//   Index GetNumStarts() const;
+// };
+
+// Optional storage for backtracking data in banding a set of input
+// entries. It exposes an array structure which will generally be
+// used as a stack. It must be able to accommodate as many entries
+// as are passed in as inputs to `BandingAddRange`.
+//
+// concept BacktrackStorage extends RibbonTypes {
+//   // If false, backtracking support will be disabled in the algorithm.
+//   // This should preferably be an inline compile-time constant function.
+//   bool UseBacktrack() const;
+//
+//   // Records `to_save` as the `i`th backtrack entry
+//   void BacktrackPut(Index i, Index to_save);
+//
+//   // Recalls the `i`th backtrack entry
+//   Index BacktrackGet(Index i) const;
+// }
+
+// Adds a single entry to BandingStorage (and optionally, BacktrackStorage),
+// returning true if successful or false if solution is impossible with
+// current hasher (and presumably its seed) and number of "slots" (solution
+// or banding rows). (A solution is impossible when there is a linear
+// dependence among the inputs that doesn't "cancel out".)
+//
+// Pre- and post-condition: the BandingStorage represents a band matrix
+// ready for back substitution (row echelon form except for zero rows),
+// augmented with result values such that back substitution would give a
+// solution satisfying all the cr@start -> rr entries added.
+template <bool kFirstCoeffAlwaysOne, typename BandingStorage,
+          typename BacktrackStorage>
+bool BandingAdd(BandingStorage *bs, typename BandingStorage::Index start,
+                typename BandingStorage::ResultRow rr,
+                typename BandingStorage::CoeffRow cr, BacktrackStorage *bts,
+                typename BandingStorage::Index *backtrack_pos) {
+  using CoeffRow = typename BandingStorage::CoeffRow;
+  using ResultRow = typename BandingStorage::ResultRow;
+  using Index = typename BandingStorage::Index;
+
+  Index i = start;
+
+  if (!kFirstCoeffAlwaysOne) {
+    // Requires/asserts that cr != 0
+    int tz = CountTrailingZeroBits(cr);
+    i += static_cast<Index>(tz);
+    cr >>= tz;
+  }
+
+  for (;;) {
+    assert((cr & 1) == 1);
+    CoeffRow cr_at_i;
+    ResultRow rr_at_i;
+    bs->LoadRow(i, &cr_at_i, &rr_at_i, /* for_back_subst */ false);
+    if (cr_at_i == 0) {
+      bs->StoreRow(i, cr, rr);
+      bts->BacktrackPut(*backtrack_pos, i);
+      ++*backtrack_pos;
+      return true;
+    }
+    assert((cr_at_i & 1) == 1);
+    // Gaussian row reduction
+    cr ^= cr_at_i;
+    rr ^= rr_at_i;
+    if (cr == 0) {
+      // Inconsistency or (less likely) redundancy
+      break;
+    }
+    // Find relative offset of next non-zero coefficient.
+    int tz = CountTrailingZeroBits(cr);
+    i += static_cast<Index>(tz);
+    cr >>= tz;
+  }
+
+  // Failed, unless result row == 0 because e.g. a duplicate input or a
+  // stock hash collision, with same result row. (For filter, stock hash
+  // collision implies same result row.) Or we could have a full equation
+  // equal to sum of other equations, which is very possible with
+  // small range of values for result row.
+  return rr == 0;
+}
+
+// Adds a range of entries to BandingStorage returning true if successful
+// or false if solution is impossible with current hasher (and presumably
+// its seed) and number of "slots" (solution or banding rows). (A solution
+// is impossible when there is a linear dependence among the inputs that
+// doesn't "cancel out".) Here "InputIterator" is an iterator over AddInputs.
+//
+// If UseBacktrack in the BacktrackStorage, this function call rolls back
+// to prior state on failure. If !UseBacktrack, some subset of the entries
+// will have been added to the BandingStorage, so best considered to be in
+// an indeterminate state.
+//
+template <typename BandingStorage, typename BacktrackStorage,
+          typename BandingHasher, typename InputIterator>
+bool BandingAddRange(BandingStorage *bs, BacktrackStorage *bts,
+                     const BandingHasher &bh, InputIterator begin,
+                     InputIterator end) {
+  using CoeffRow = typename BandingStorage::CoeffRow;
+  using Index = typename BandingStorage::Index;
+  using ResultRow = typename BandingStorage::ResultRow;
+  using Hash = typename BandingHasher::Hash;
+
+  static_assert(IsUnsignedUpTo128<CoeffRow>::value, "must be unsigned");
+  static_assert(IsUnsignedUpTo128<Index>::value, "must be unsigned");
+  static_assert(IsUnsignedUpTo128<ResultRow>::value, "must be unsigned");
+
+  constexpr bool kFCA1 = BandingHasher::kFirstCoeffAlwaysOne;
+
+  if (begin == end) {
+    // trivial
+    return true;
+  }
+
+  const Index num_starts = bs->GetNumStarts();
+
+  InputIterator cur = begin;
+  Index backtrack_pos = 0;
+  if (!bs->UsePrefetch()) {
+    // Simple version, no prefetch
+    for (;;) {
+      Hash h = bh.GetHash(*cur);
+      Index start = bh.GetStart(h, num_starts);
+      ResultRow rr =
+          bh.GetResultRowFromInput(*cur) | bh.GetResultRowFromHash(h);
+      CoeffRow cr = bh.GetCoeffRow(h);
+
+      if (!BandingAdd<kFCA1>(bs, start, rr, cr, bts, &backtrack_pos)) {
+        break;
+      }
+      if ((++cur) == end) {
+        return true;
+      }
+    }
+  } else {
+    // Pipelined w/prefetch
+    // Prime the pipeline
+    Hash h = bh.GetHash(*cur);
+    Index start = bh.GetStart(h, num_starts);
+    ResultRow rr = bh.GetResultRowFromInput(*cur);
+    bs->Prefetch(start);
+
+    // Pipeline
+    for (;;) {
+      rr |= bh.GetResultRowFromHash(h);
+      CoeffRow cr = bh.GetCoeffRow(h);
+      if ((++cur) == end) {
+        if (!BandingAdd<kFCA1>(bs, start, rr, cr, bts, &backtrack_pos)) {
+          break;
+        }
+        return true;
+      }
+      Hash next_h = bh.GetHash(*cur);
+      Index next_start = bh.GetStart(next_h, num_starts);
+      ResultRow next_rr = bh.GetResultRowFromInput(*cur);
+      bs->Prefetch(next_start);
+      if (!BandingAdd<kFCA1>(bs, start, rr, cr, bts, &backtrack_pos)) {
+        break;
+      }
+      h = next_h;
+      start = next_start;
+      rr = next_rr;
+    }
+  }
+  // failed; backtrack (if implemented)
+  if (bts->UseBacktrack()) {
+    while (backtrack_pos > 0) {
+      --backtrack_pos;
+      Index i = bts->BacktrackGet(backtrack_pos);
+      // Clearing the ResultRow is not strictly required, but is required
+      // for good FP rate on inputs that might have been backtracked out.
+      // (We don't want anything we've backtracked on to leak into final
+      // result, as that might not be "harmless".)
+      bs->StoreRow(i, 0, 0);
+    }
+  }
+  return false;
+}
+
+// Adds a range of entries to BandingStorage returning true if successful
+// or false if solution is impossible with current hasher (and presumably
+// its seed) and number of "slots" (solution or banding rows). (A solution
+// is impossible when there is a linear dependence among the inputs that
+// doesn't "cancel out".) Here "InputIterator" is an iterator over AddInputs.
+//
+// On failure, some subset of the entries will have been added to the
+// BandingStorage, so best considered to be in an indeterminate state.
+//
+template <typename BandingStorage, typename BandingHasher,
+          typename InputIterator>
+bool BandingAddRange(BandingStorage *bs, const BandingHasher &bh,
+                     InputIterator begin, InputIterator end) {
+  using Index = typename BandingStorage::Index;
+  struct NoopBacktrackStorage {
+    bool UseBacktrack() { return false; }
+    void BacktrackPut(Index, Index) {}
+    Index BacktrackGet(Index) {
+      assert(false);
+      return 0;
+    }
+  } nbts;
+  return BandingAddRange(bs, &nbts, bh, begin, end);
+}
+
+// ######################################################################
+// ######################### Solution Storage ###########################
+
+// Back-substitution and query algorithms unfortunately depend on some
+// details of data layout in the final data structure ("solution"). Thus,
+// there is no common SolutionStorage covering all the reasonable
+// possibilities.
+
+// ###################### SimpleSolutionStorage #########################
+
+// SimpleSolutionStorage is for a row-major storage, typically with no
+// unused bits in each ResultRow. This is mostly for demonstration
+// purposes as the simplest solution storage scheme. It is relatively slow
+// for filter queries.
+
+// concept SimpleSolutionStorage extends RibbonTypes {
+//   // This is called at the beginning of back-substitution for the
+//   // solution storage to do any remaining configuration before data
+//   // is stored to it. If configuration is previously finalized, this
+//   // could be a simple assertion or even no-op. Ribbon algorithms
+//   // only call this from back-substitution, and only once per call,
+//   // before other functions here.
+//   void PrepareForNumStarts(Index num_starts) const;
+//   // Must return num_starts passed to PrepareForNumStarts, or the most
+//   // recent call to PrepareForNumStarts if this storage object can be
+//   // reused. Note that num_starts == num_slots - kCoeffBits + 1 because
+//   // there must be a run of kCoeffBits slots starting from each start.
+//   Index GetNumStarts() const;
+//   // Load the solution row (type ResultRow) for a slot
+//   ResultRow Load(Index slot_num) const;
+//   // Store the solution row (type ResultRow) for a slot
+//   void Store(Index slot_num, ResultRow data);
+// };
+
+// Back-substitution for generating a solution from BandingStorage to
+// SimpleSolutionStorage.
+template <typename SimpleSolutionStorage, typename BandingStorage>
+void SimpleBackSubst(SimpleSolutionStorage *sss, const BandingStorage &bs) {
+  using CoeffRow = typename BandingStorage::CoeffRow;
+  using Index = typename BandingStorage::Index;
+  using ResultRow = typename BandingStorage::ResultRow;
+
+  static_assert(sizeof(Index) == sizeof(typename SimpleSolutionStorage::Index),
+                "must be same");
+  static_assert(
+      sizeof(CoeffRow) == sizeof(typename SimpleSolutionStorage::CoeffRow),
+      "must be same");
+  static_assert(
+      sizeof(ResultRow) == sizeof(typename SimpleSolutionStorage::ResultRow),
+      "must be same");
+
+  constexpr auto kCoeffBits = static_cast<Index>(sizeof(CoeffRow) * 8U);
+  constexpr auto kResultBits = static_cast<Index>(sizeof(ResultRow) * 8U);
+
+  // A column-major buffer of the solution matrix, containing enough
+  // recently-computed solution data to compute the next solution row
+  // (based also on banding data).
+  std::array<CoeffRow, kResultBits> state;
+  state.fill(0);
+
+  const Index num_starts = bs.GetNumStarts();
+  sss->PrepareForNumStarts(num_starts);
+  const Index num_slots = num_starts + kCoeffBits - 1;
+
+  for (Index i = num_slots; i > 0;) {
+    --i;
+    CoeffRow cr;
+    ResultRow rr;
+    bs.LoadRow(i, &cr, &rr, /* for_back_subst */ true);
+    // solution row
+    ResultRow sr = 0;
+    for (Index j = 0; j < kResultBits; ++j) {
+      // Compute next solution bit at row i, column j (see derivation below)
+      CoeffRow tmp = state[j] << 1;
+      bool bit = (BitParity(tmp & cr) ^ ((rr >> j) & 1)) != 0;
+      tmp |= bit ? CoeffRow{1} : CoeffRow{0};
+
+      // Now tmp is solution at column j from row i for next kCoeffBits
+      // more rows. Thus, for valid solution, the dot product of the
+      // solution column with the coefficient row has to equal the result
+      // at that column,
+      //   BitParity(tmp & cr) == ((rr >> j) & 1)
+
+      // Update state.
+      state[j] = tmp;
+      // add to solution row
+      sr |= (bit ? ResultRow{1} : ResultRow{0}) << j;
+    }
+    sss->Store(i, sr);
+  }
+}
+
+// Common functionality for querying a key (already hashed) in
+// SimpleSolutionStorage.
+template <typename SimpleSolutionStorage>
+typename SimpleSolutionStorage::ResultRow SimpleQueryHelper(
+    typename SimpleSolutionStorage::Index start_slot,
+    typename SimpleSolutionStorage::CoeffRow cr,
+    const SimpleSolutionStorage &sss) {
+  using CoeffRow = typename SimpleSolutionStorage::CoeffRow;
+  using ResultRow = typename SimpleSolutionStorage::ResultRow;
+
+  constexpr unsigned kCoeffBits = static_cast<unsigned>(sizeof(CoeffRow) * 8U);
+
+  ResultRow result = 0;
+  for (unsigned i = 0; i < kCoeffBits; ++i) {
+    // Bit masking whole value is generally faster here than 'if'
+    result ^= sss.Load(start_slot + i) &
+              (ResultRow{0} - (static_cast<ResultRow>(cr >> i) & ResultRow{1}));
+  }
+  return result;
+}
+
+// General PHSF query a key from SimpleSolutionStorage.
+template <typename SimpleSolutionStorage, typename PhsfQueryHasher>
+typename SimpleSolutionStorage::ResultRow SimplePhsfQuery(
+    const typename PhsfQueryHasher::Key &key, const PhsfQueryHasher &hasher,
+    const SimpleSolutionStorage &sss) {
+  const typename PhsfQueryHasher::Hash hash = hasher.GetHash(key);
+
+  static_assert(sizeof(typename SimpleSolutionStorage::Index) ==
+                    sizeof(typename PhsfQueryHasher::Index),
+                "must be same");
+  static_assert(sizeof(typename SimpleSolutionStorage::CoeffRow) ==
+                    sizeof(typename PhsfQueryHasher::CoeffRow),
+                "must be same");
+
+  return SimpleQueryHelper(hasher.GetStart(hash, sss.GetNumStarts()),
+                           hasher.GetCoeffRow(hash), sss);
+}
+
+// Filter query a key from SimpleSolutionStorage.
+template <typename SimpleSolutionStorage, typename FilterQueryHasher>
+bool SimpleFilterQuery(const typename FilterQueryHasher::Key &key,
+                       const FilterQueryHasher &hasher,
+                       const SimpleSolutionStorage &sss) {
+  const typename FilterQueryHasher::Hash hash = hasher.GetHash(key);
+  const typename SimpleSolutionStorage::ResultRow expected =
+      hasher.GetResultRowFromHash(hash);
+
+  static_assert(sizeof(typename SimpleSolutionStorage::Index) ==
+                    sizeof(typename FilterQueryHasher::Index),
+                "must be same");
+  static_assert(sizeof(typename SimpleSolutionStorage::CoeffRow) ==
+                    sizeof(typename FilterQueryHasher::CoeffRow),
+                "must be same");
+  static_assert(sizeof(typename SimpleSolutionStorage::ResultRow) ==
+                    sizeof(typename FilterQueryHasher::ResultRow),
+                "must be same");
+
+  return expected ==
+         SimpleQueryHelper(hasher.GetStart(hash, sss.GetNumStarts()),
+                           hasher.GetCoeffRow(hash), sss);
+}
+
+// #################### InterleavedSolutionStorage ######################
+
+// InterleavedSolutionStorage is row-major at a high level, for good
+// locality, and column-major at a low level, for CPU efficiency
+// especially in filter queries or relatively small number of result bits
+// (== solution columns). The storage is a sequence of "blocks" where a
+// block has one CoeffRow-sized segment for each solution column. Each
+// query spans at most two blocks; the starting solution row is typically
+// in the row-logical middle of a block and spans to the middle of the
+// next block. (See diagram below.)
+//
+// InterleavedSolutionStorage supports choosing b (number of result or
+// solution columns) at run time, and even supports mixing b and b-1 solution
+// columns in a single linear system solution, for filters that can
+// effectively utilize any size space (multiple of CoeffRow) for minimizing
+// FP rate for any number of added keys. To simplify query implementation
+// (with lower-index columns first), the b-bit portion comes after the b-1
+// portion of the structure.
+//
+// Diagram (=== marks logical block boundary; b=4; ### is data used by a
+// query crossing the b-1 to b boundary, each Segment has type CoeffRow):
+//  ...
+// +======================+
+// | S e g m e n t  col=0 |
+// +----------------------+
+// | S e g m e n t  col=1 |
+// +----------------------+
+// | S e g m e n t  col=2 |
+// +======================+
+// | S e g m e n #########|
+// +----------------------+
+// | S e g m e n #########|
+// +----------------------+
+// | S e g m e n #########|
+// +======================+ Result/solution columns: above = 3, below = 4
+// |#############t  col=0 |
+// +----------------------+
+// |#############t  col=1 |
+// +----------------------+
+// |#############t  col=2 |
+// +----------------------+
+// | S e g m e n t  col=3 |
+// +======================+
+// | S e g m e n t  col=0 |
+// +----------------------+
+// | S e g m e n t  col=1 |
+// +----------------------+
+// | S e g m e n t  col=2 |
+// +----------------------+
+// | S e g m e n t  col=3 |
+// +======================+
+//  ...
+//
+// InterleavedSolutionStorage will be adapted by the algorithms from
+// simple array-like segment storage. That array-like storage is templatized
+// in part so that an implementation may choose to handle byte ordering
+// at access time.
+//
+// concept InterleavedSolutionStorage extends RibbonTypes {
+//   // This is called at the beginning of back-substitution for the
+//   // solution storage to do any remaining configuration before data
+//   // is stored to it. If configuration is previously finalized, this
+//   // could be a simple assertion or even no-op. Ribbon algorithms
+//   // only call this from back-substitution, and only once per call,
+//   // before other functions here.
+//   void PrepareForNumStarts(Index num_starts) const;
+//   // Must return num_starts passed to PrepareForNumStarts, or the most
+//   // recent call to PrepareForNumStarts if this storage object can be
+//   // reused. Note that num_starts == num_slots - kCoeffBits + 1 because
+//   // there must be a run of kCoeffBits slots starting from each start.
+//   Index GetNumStarts() const;
+//   // The larger number of solution columns used (called "b" above).
+//   Index GetUpperNumColumns() const;
+//   // If returns > 0, then block numbers below that use
+//   // GetUpperNumColumns() - 1 columns per solution row, and the rest
+//   // use GetUpperNumColumns(). A block represents kCoeffBits "slots",
+//   // where all but the last kCoeffBits - 1 slots are also starts. And
+//   // a block contains a segment for each solution column.
+//   // An implementation may only support uniform columns per solution
+//   // row and return constant 0 here.
+//   Index GetUpperStartBlock() const;
+//
+//   // ### "Array of segments" portion of API ###
+//   // The number of values of type CoeffRow used in this solution
+//   // representation. (This value can be inferred from the previous
+//   // three functions, but is expected at least for sanity / assertion
+//   // checking.)
+//   Index GetNumSegments() const;
+//   // Load an entry from the logical array of segments
+//   CoeffRow LoadSegment(Index segment_num) const;
+//   // Store an entry to the logical array of segments
+//   void StoreSegment(Index segment_num, CoeffRow data);
+// };
+
+// A helper for InterleavedBackSubst.
+template <typename BandingStorage>
+inline void BackSubstBlock(typename BandingStorage::CoeffRow *state,
+                           typename BandingStorage::Index num_columns,
+                           const BandingStorage &bs,
+                           typename BandingStorage::Index start_slot) {
+  using CoeffRow = typename BandingStorage::CoeffRow;
+  using Index = typename BandingStorage::Index;
+  using ResultRow = typename BandingStorage::ResultRow;
+
+  constexpr auto kCoeffBits = static_cast<Index>(sizeof(CoeffRow) * 8U);
+
+  for (Index i = start_slot + kCoeffBits; i > start_slot;) {
+    --i;
+    CoeffRow cr;
+    ResultRow rr;
+    bs.LoadRow(i, &cr, &rr, /* for_back_subst */ true);
+    for (Index j = 0; j < num_columns; ++j) {
+      // Compute next solution bit at row i, column j (see derivation below)
+      CoeffRow tmp = state[j] << 1;
+      int bit = BitParity(tmp & cr) ^ ((rr >> j) & 1);
+      tmp |= static_cast<CoeffRow>(bit);
+
+      // Now tmp is solution at column j from row i for next kCoeffBits
+      // more rows. Thus, for valid solution, the dot product of the
+      // solution column with the coefficient row has to equal the result
+      // at that column,
+      //   BitParity(tmp & cr) == ((rr >> j) & 1)
+
+      // Update state.
+      state[j] = tmp;
+    }
+  }
+}
+
+// Back-substitution for generating a solution from BandingStorage to
+// InterleavedSolutionStorage.
+template <typename InterleavedSolutionStorage, typename BandingStorage>
+void InterleavedBackSubst(InterleavedSolutionStorage *iss,
+                          const BandingStorage &bs) {
+  using CoeffRow = typename BandingStorage::CoeffRow;
+  using Index = typename BandingStorage::Index;
+
+  static_assert(
+      sizeof(Index) == sizeof(typename InterleavedSolutionStorage::Index),
+      "must be same");
+  static_assert(
+      sizeof(CoeffRow) == sizeof(typename InterleavedSolutionStorage::CoeffRow),
+      "must be same");
+
+  constexpr auto kCoeffBits = static_cast<Index>(sizeof(CoeffRow) * 8U);
+
+  const Index num_starts = bs.GetNumStarts();
+  // Although it might be nice to have a filter that returns "always false"
+  // when no key is added, we aren't specifically supporting that here
+  // because it would require another condition branch in the query.
+  assert(num_starts > 0);
+  iss->PrepareForNumStarts(num_starts);
+
+  const Index num_slots = num_starts + kCoeffBits - 1;
+  assert(num_slots % kCoeffBits == 0);
+  const Index num_blocks = num_slots / kCoeffBits;
+  const Index num_segments = iss->GetNumSegments();
+
+  // For now upper, then lower
+  Index num_columns = iss->GetUpperNumColumns();
+  const Index upper_start_block = iss->GetUpperStartBlock();
+
+  if (num_columns == 0) {
+    // Nothing to do, presumably because there's not enough space for even
+    // a single segment.
+    assert(num_segments == 0);
+    // When num_columns == 0, a Ribbon filter query will always return true,
+    // or a PHSF query always 0.
+    return;
+  }
+
+  // We should be utilizing all available segments
+  assert(num_segments == (upper_start_block * (num_columns - 1)) +
+                             ((num_blocks - upper_start_block) * num_columns));
+
+  // TODO: consider fixed-column specializations with stack-allocated state
+
+  // A column-major buffer of the solution matrix, containing enough
+  // recently-computed solution data to compute the next solution row
+  // (based also on banding data).
+  std::unique_ptr<CoeffRow[]> state{new CoeffRow[num_columns]()};
+
+  Index block = num_blocks;
+  Index segment_num = num_segments;
+  while (block > upper_start_block) {
+    --block;
+    BackSubstBlock(state.get(), num_columns, bs, block * kCoeffBits);
+    segment_num -= num_columns;
+    for (Index i = 0; i < num_columns; ++i) {
+      iss->StoreSegment(segment_num + i, state[i]);
+    }
+  }
+  // Now (if applicable), region using lower number of columns
+  // (This should be optimized away if GetUpperStartBlock() returns
+  // constant 0.)
+  --num_columns;
+  while (block > 0) {
+    --block;
+    BackSubstBlock(state.get(), num_columns, bs, block * kCoeffBits);
+    segment_num -= num_columns;
+    for (Index i = 0; i < num_columns; ++i) {
+      iss->StoreSegment(segment_num + i, state[i]);
+    }
+  }
+  // Verify everything processed
+  assert(block == 0);
+  assert(segment_num == 0);
+}
+
+// Prefetch memory for a key in InterleavedSolutionStorage.
+template <typename InterleavedSolutionStorage, typename PhsfQueryHasher>
+inline void InterleavedPrepareQuery(
+    const typename PhsfQueryHasher::Key &key, const PhsfQueryHasher &hasher,
+    const InterleavedSolutionStorage &iss,
+    typename PhsfQueryHasher::Hash *saved_hash,
+    typename InterleavedSolutionStorage::Index *saved_segment_num,
+    typename InterleavedSolutionStorage::Index *saved_num_columns,
+    typename InterleavedSolutionStorage::Index *saved_start_bit) {
+  using Hash = typename PhsfQueryHasher::Hash;
+  using CoeffRow = typename InterleavedSolutionStorage::CoeffRow;
+  using Index = typename InterleavedSolutionStorage::Index;
+
+  static_assert(sizeof(Index) == sizeof(typename PhsfQueryHasher::Index),
+                "must be same");
+
+  const Hash hash = hasher.GetHash(key);
+  const Index start_slot = hasher.GetStart(hash, iss.GetNumStarts());
+
+  constexpr auto kCoeffBits = static_cast<Index>(sizeof(CoeffRow) * 8U);
+
+  const Index upper_start_block = iss.GetUpperStartBlock();
+  Index num_columns = iss.GetUpperNumColumns();
+  Index start_block_num = start_slot / kCoeffBits;
+  Index segment_num = start_block_num * num_columns -
+                      std::min(start_block_num, upper_start_block);
+  // Change to lower num columns if applicable.
+  // (This should not compile to a conditional branch.)
+  num_columns -= (start_block_num < upper_start_block) ? 1 : 0;
+
+  Index start_bit = start_slot % kCoeffBits;
+
+  Index segment_count = num_columns + (start_bit == 0 ? 0 : num_columns);
+
+  iss.PrefetchSegmentRange(segment_num, segment_num + segment_count);
+
+  *saved_hash = hash;
+  *saved_segment_num = segment_num;
+  *saved_num_columns = num_columns;
+  *saved_start_bit = start_bit;
+}
+
+// General PHSF query from InterleavedSolutionStorage, using data for
+// the query key from InterleavedPrepareQuery
+template <typename InterleavedSolutionStorage, typename PhsfQueryHasher>
+inline typename InterleavedSolutionStorage::ResultRow InterleavedPhsfQuery(
+    typename PhsfQueryHasher::Hash hash,
+    typename InterleavedSolutionStorage::Index segment_num,
+    typename InterleavedSolutionStorage::Index num_columns,
+    typename InterleavedSolutionStorage::Index start_bit,
+    const PhsfQueryHasher &hasher, const InterleavedSolutionStorage &iss) {
+  using CoeffRow = typename InterleavedSolutionStorage::CoeffRow;
+  using Index = typename InterleavedSolutionStorage::Index;
+  using ResultRow = typename InterleavedSolutionStorage::ResultRow;
+
+  static_assert(sizeof(Index) == sizeof(typename PhsfQueryHasher::Index),
+                "must be same");
+  static_assert(sizeof(CoeffRow) == sizeof(typename PhsfQueryHasher::CoeffRow),
+                "must be same");
+
+  constexpr auto kCoeffBits = static_cast<Index>(sizeof(CoeffRow) * 8U);
+
+  const CoeffRow cr = hasher.GetCoeffRow(hash);
+
+  ResultRow sr = 0;
+  const CoeffRow cr_left = cr << static_cast<unsigned>(start_bit);
+  for (Index i = 0; i < num_columns; ++i) {
+    sr ^= BitParity(iss.LoadSegment(segment_num + i) & cr_left) << i;
+  }
+
+  if (start_bit > 0) {
+    segment_num += num_columns;
+    const CoeffRow cr_right =
+        cr >> static_cast<unsigned>(kCoeffBits - start_bit);
+    for (Index i = 0; i < num_columns; ++i) {
+      sr ^= BitParity(iss.LoadSegment(segment_num + i) & cr_right) << i;
+    }
+  }
+
+  return sr;
+}
+
+// Filter query a key from InterleavedFilterQuery.
+template <typename InterleavedSolutionStorage, typename FilterQueryHasher>
+inline bool InterleavedFilterQuery(
+    typename FilterQueryHasher::Hash hash,
+    typename InterleavedSolutionStorage::Index segment_num,
+    typename InterleavedSolutionStorage::Index num_columns,
+    typename InterleavedSolutionStorage::Index start_bit,
+    const FilterQueryHasher &hasher, const InterleavedSolutionStorage &iss) {
+  using CoeffRow = typename InterleavedSolutionStorage::CoeffRow;
+  using Index = typename InterleavedSolutionStorage::Index;
+  using ResultRow = typename InterleavedSolutionStorage::ResultRow;
+
+  static_assert(sizeof(Index) == sizeof(typename FilterQueryHasher::Index),
+                "must be same");
+  static_assert(
+      sizeof(CoeffRow) == sizeof(typename FilterQueryHasher::CoeffRow),
+      "must be same");
+  static_assert(
+      sizeof(ResultRow) == sizeof(typename FilterQueryHasher::ResultRow),
+      "must be same");
+
+  constexpr auto kCoeffBits = static_cast<Index>(sizeof(CoeffRow) * 8U);
+
+  const CoeffRow cr = hasher.GetCoeffRow(hash);
+  const ResultRow expected = hasher.GetResultRowFromHash(hash);
+
+  // TODO: consider optimizations such as
+  // * get rid of start_bit == 0 condition with careful fetching & shifting
+  if (start_bit == 0) {
+    for (Index i = 0; i < num_columns; ++i) {
+      if (BitParity(iss.LoadSegment(segment_num + i) & cr) !=
+          (static_cast<int>(expected >> i) & 1)) {
+        return false;
+      }
+    }
+  } else {
+    const CoeffRow cr_left = cr << static_cast<unsigned>(start_bit);
+    const CoeffRow cr_right =
+        cr >> static_cast<unsigned>(kCoeffBits - start_bit);
+
+    for (Index i = 0; i < num_columns; ++i) {
+      CoeffRow soln_data =
+          (iss.LoadSegment(segment_num + i) & cr_left) ^
+          (iss.LoadSegment(segment_num + num_columns + i) & cr_right);
+      if (BitParity(soln_data) != (static_cast<int>(expected >> i) & 1)) {
+        return false;
+      }
+    }
+  }
+  // otherwise, all match
+  return true;
+}
+
+// TODO: refactor Interleaved*Query so that queries can be "prepared" by
+// prefetching memory, to hide memory latency for multiple queries in a
+// single thread.
+
+}  // namespace ribbon
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/ribbon_config.cc b/src/rocksdb/util/ribbon_config.cc
new file mode 100644
index 000000000..c1046f4aa
--- /dev/null
+++ b/src/rocksdb/util/ribbon_config.cc
@@ -0,0 +1,506 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "util/ribbon_config.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace ribbon {
+
+namespace detail {
+
+// Each instantiation of this struct is sufficiently unique for configuration
+// purposes, and is only instantiated for settings where we support the
+// configuration API. An application might only reference one instantiation,
+// meaning the rest could be pruned at link time.
+template <ConstructionFailureChance kCfc, uint64_t kCoeffBits, bool kUseSmash>
+struct BandingConfigHelperData {
+  static constexpr size_t kKnownSize = 18U;
+
+  // Because of complexity in the data, for smaller numbers of slots
+  // (powers of two up to 2^17), we record known numbers that can be added
+  // with kCfc chance of construction failure and settings in template
+  // parameters. Zero means "unsupported (too small) number of slots".
+  // (GetNumToAdd below will use interpolation for numbers of slots
+  // between powers of two; double rather than integer values here make
+  // that more accurate.)
+  static const std::array<double, kKnownSize> kKnownToAddByPow2;
+
+  // For sufficiently large number of slots, doubling the number of
+  // slots will increase the expected overhead (slots over number added)
+  // by approximately this constant.
+  // (This is roughly constant regardless of ConstructionFailureChance and
+  // smash setting.)
+  // (Would be a constant if we had partial template specialization for
+  // static const members.)
+  static inline double GetFactorPerPow2() {
+    if (kCoeffBits == 128U) {
+      return 0.0038;
+    } else {
+      assert(kCoeffBits == 64U);
+      return 0.0083;
+    }
+  }
+
+  // Overhead factor for 2^(kKnownSize-1) slots
+  // (Would be a constant if we had partial template specialization for
+  // static const members.)
+  static inline double GetFinalKnownFactor() {
+    return 1.0 * (uint32_t{1} << (kKnownSize - 1)) /
+           kKnownToAddByPow2[kKnownSize - 1];
+  }
+
+  // GetFinalKnownFactor() - (kKnownSize-1) * GetFactorPerPow2()
+  // (Would be a constant if we had partial template specialization for
+  // static const members.)
+  static inline double GetBaseFactor() {
+    return GetFinalKnownFactor() - (kKnownSize - 1) * GetFactorPerPow2();
+  }
+
+  // Get overhead factor (slots over number to add) for sufficiently large
+  // number of slots (by log base 2)
+  static inline double GetFactorForLarge(double log2_num_slots) {
+    return GetBaseFactor() + log2_num_slots * GetFactorPerPow2();
+  }
+
+  // For a given power of two number of slots (specified by whole number
+  // log base 2), implements GetNumToAdd for such limited case, returning
+  // double for better interpolation in GetNumToAdd and GetNumSlots.
+  static inline double GetNumToAddForPow2(uint32_t log2_num_slots) {
+    assert(log2_num_slots <= 32);  // help clang-analyze
+    if (log2_num_slots < kKnownSize) {
+      return kKnownToAddByPow2[log2_num_slots];
+    } else {
+      return 1.0 * (uint64_t{1} << log2_num_slots) /
+             GetFactorForLarge(1.0 * log2_num_slots);
+    }
+  }
+};
+
+// Based on data from FindOccupancy in ribbon_test
+template <>
+const std::array<double, 18>
+    BandingConfigHelperData<kOneIn2, 128U, false>::kKnownToAddByPow2{{
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,  // unsupported
+        252.984,
+        506.109,
+        1013.71,
+        2029.47,
+        4060.43,
+        8115.63,
+        16202.2,
+        32305.1,
+        64383.5,
+        128274,
+    }};
+
+template <>
+const std::array<double, 18>
+    BandingConfigHelperData<kOneIn2, 128U, /*smash*/ true>::kKnownToAddByPow2{{
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,  // unsupported
+        126.274,
+        254.279,
+        510.27,
+        1022.24,
+        2046.02,
+        4091.99,
+        8154.98,
+        16244.3,
+        32349.7,
+        64426.6,
+        128307,
+    }};
+
+template <>
+const std::array<double, 18>
+    BandingConfigHelperData<kOneIn2, 64U, false>::kKnownToAddByPow2{{
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,  // unsupported
+        124.94,
+        249.968,
+        501.234,
+        1004.06,
+        2006.15,
+        3997.89,
+        7946.99,
+        15778.4,
+        31306.9,
+        62115.3,
+        123284,
+    }};
+
+template <>
+const std::array<double, 18>
+    BandingConfigHelperData<kOneIn2, 64U, /*smash*/ true>::kKnownToAddByPow2{{
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,  // unsupported
+        62.2683,
+        126.259,
+        254.268,
+        509.975,
+        1019.98,
+        2026.16,
+        4019.75,
+        7969.8,
+        15798.2,
+        31330.3,
+        62134.2,
+        123255,
+    }};
+
+template <>
+const std::array<double, 18>
+    BandingConfigHelperData<kOneIn20, 128U, false>::kKnownToAddByPow2{{
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,  // unsupported
+        248.851,
+        499.532,
+        1001.26,
+        2003.97,
+        4005.59,
+        8000.39,
+        15966.6,
+        31828.1,
+        63447.3,
+        126506,
+    }};
+
+template <>
+const std::array<double, 18>
+    BandingConfigHelperData<kOneIn20, 128U, /*smash*/ true>::kKnownToAddByPow2{{
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,  // unsupported
+        122.637,
+        250.651,
+        506.625,
+        1018.54,
+        2036.43,
+        4041.6,
+        8039.25,
+        16005,
+        31869.6,
+        63492.8,
+        126537,
+    }};
+
+template <>
+const std::array<double, 18>
+    BandingConfigHelperData<kOneIn20, 64U, false>::kKnownToAddByPow2{{
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,  // unsupported
+        120.659,
+        243.346,
+        488.168,
+        976.373,
+        1948.86,
+        3875.85,
+        7704.97,
+        15312.4,
+        30395.1,
+        60321.8,
+        119813,
+    }};
+
+template <>
+const std::array<double, 18>
+    BandingConfigHelperData<kOneIn20, 64U, /*smash*/ true>::kKnownToAddByPow2{{
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,  // unsupported
+        58.6016,
+        122.619,
+        250.641,
+        503.595,
+        994.165,
+        1967.36,
+        3898.17,
+        7727.21,
+        15331.5,
+        30405.8,
+        60376.2,
+        119836,
+    }};
+
+template <>
+const std::array<double, 18>
+    BandingConfigHelperData<kOneIn1000, 128U, false>::kKnownToAddByPow2{{
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,  // unsupported
+        242.61,
+        491.887,
+        983.603,
+        1968.21,
+        3926.98,
+        7833.99,
+        15629,
+        31199.9,
+        62307.8,
+        123870,
+    }};
+
+template <>
+const std::array<double, 18> BandingConfigHelperData<
+    kOneIn1000, 128U, /*smash*/ true>::kKnownToAddByPow2{{
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,  // unsupported
+    117.19,
+    245.105,
+    500.748,
+    1010.67,
+    1993.4,
+    3950.01,
+    7863.31,
+    15652,
+    31262.1,
+    62462.8,
+    124095,
+}};
+
+template <>
+const std::array<double, 18>
+    BandingConfigHelperData<kOneIn1000, 64U, false>::kKnownToAddByPow2{{
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,  // unsupported
+        114,
+        234.8,
+        471.498,
+        940.165,
+        1874,
+        3721.5,
+        7387.5,
+        14592,
+        29160,
+        57745,
+        115082,
+    }};
+
+template <>
+const std::array<double, 18>
+    BandingConfigHelperData<kOneIn1000, 64U, /*smash*/ true>::kKnownToAddByPow2{
+        {
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,  // unsupported
+            53.0434,
+            117,
+            245.312,
+            483.571,
+            950.251,
+            1878,
+            3736.34,
+            7387.97,
+            14618,
+            29142.9,
+            57838.8,
+            114932,
+        }};
+
+// We hide these implementation details from the .h file with explicit
+// instantiations below these partial specializations.
+
+template <ConstructionFailureChance kCfc, uint64_t kCoeffBits, bool kUseSmash,
+          bool kHomogeneous>
+uint32_t BandingConfigHelper1MaybeSupported<
+    kCfc, kCoeffBits, kUseSmash, kHomogeneous,
+    true /* kIsSupported */>::GetNumToAdd(uint32_t num_slots) {
+  using Data = detail::BandingConfigHelperData<kCfc, kCoeffBits, kUseSmash>;
+  if (num_slots == 0) {
+    return 0;
+  }
+  uint32_t num_to_add;
+  double log2_num_slots = std::log(num_slots) * 1.4426950409;
+  uint32_t floor_log2 = static_cast<uint32_t>(log2_num_slots);
+  if (floor_log2 + 1 < Data::kKnownSize) {
+    double ceil_portion = 1.0 * num_slots / (uint32_t{1} << floor_log2) - 1.0;
+    // Must be a supported number of slots
+    assert(Data::kKnownToAddByPow2[floor_log2] > 0.0);
+    // Weighted average of two nearest known data points
+    num_to_add = static_cast<uint32_t>(
+        ceil_portion * Data::kKnownToAddByPow2[floor_log2 + 1] +
+        (1.0 - ceil_portion) * Data::kKnownToAddByPow2[floor_log2]);
+  } else {
+    // Use formula for large values
+    double factor = Data::GetFactorForLarge(log2_num_slots);
+    assert(factor >= 1.0);
+    num_to_add = static_cast<uint32_t>(num_slots / factor);
+  }
+  if (kHomogeneous) {
+    // Even when standard filter construction would succeed, we might
+    // have loaded things up too much for Homogeneous filter. (Complete
+    // explanation not known but observed empirically.) This seems to
+    // correct for that, mostly affecting small filter configurations.
+    if (num_to_add >= 8) {
+      num_to_add -= 8;
+    } else {
+      assert(false);
+    }
+  }
+  return num_to_add;
+}
+
+template <ConstructionFailureChance kCfc, uint64_t kCoeffBits, bool kUseSmash,
+          bool kHomogeneous>
+uint32_t BandingConfigHelper1MaybeSupported<
+    kCfc, kCoeffBits, kUseSmash, kHomogeneous,
+    true /* kIsSupported */>::GetNumSlots(uint32_t num_to_add) {
+  using Data = detail::BandingConfigHelperData<kCfc, kCoeffBits, kUseSmash>;
+
+  if (num_to_add == 0) {
+    return 0;
+  }
+  if (kHomogeneous) {
+    // Reverse of above in GetNumToAdd
+    num_to_add += 8;
+  }
+  double log2_num_to_add = std::log(num_to_add) * 1.4426950409;
+  uint32_t approx_log2_slots = static_cast<uint32_t>(log2_num_to_add + 0.5);
+  assert(approx_log2_slots <= 32);  // help clang-analyze
+
+  double lower_num_to_add = Data::GetNumToAddForPow2(approx_log2_slots);
+  double upper_num_to_add;
+  if (approx_log2_slots == 0 || lower_num_to_add == /* unsupported */ 0) {
+    // Return minimum non-zero slots in standard implementation
+    return kUseSmash ? kCoeffBits : 2 * kCoeffBits;
+  } else if (num_to_add < lower_num_to_add) {
+    upper_num_to_add = lower_num_to_add;
+    --approx_log2_slots;
+    lower_num_to_add = Data::GetNumToAddForPow2(approx_log2_slots);
+  } else {
+    upper_num_to_add = Data::GetNumToAddForPow2(approx_log2_slots + 1);
+  }
+
+  assert(num_to_add >= lower_num_to_add);
+  assert(num_to_add < upper_num_to_add);
+
+  double upper_portion =
+      (num_to_add - lower_num_to_add) / (upper_num_to_add - lower_num_to_add);
+
+  double lower_num_slots = 1.0 * (uint64_t{1} << approx_log2_slots);
+
+  // Interpolation, round up
+  return static_cast<uint32_t>(upper_portion * lower_num_slots +
+                               lower_num_slots + 0.999999999);
+}
+
+// These explicit instantiations enable us to hide most of the
+// implementation details from the .h file. (The .h file currently
+// needs to determine whether settings are "supported" or not.)
+
+template struct BandingConfigHelper1MaybeSupported<kOneIn2, 128U, /*sm*/ false,
+                                                   /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn2, 128U, /*sm*/ true,
+                                                   /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn2, 128U, /*sm*/ false,
+                                                   /*hm*/ true, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn2, 128U, /*sm*/ true,
+                                                   /*hm*/ true, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn2, 64U, /*sm*/ false,
+                                                   /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn2, 64U, /*sm*/ true,
+                                                   /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn2, 64U, /*sm*/ false,
+                                                   /*hm*/ true, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn2, 64U, /*sm*/ true,
+                                                   /*hm*/ true, /*sup*/ true>;
+
+template struct BandingConfigHelper1MaybeSupported<kOneIn20, 128U, /*sm*/ false,
+                                                   /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn20, 128U, /*sm*/ true,
+                                                   /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn20, 128U, /*sm*/ false,
+                                                   /*hm*/ true, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn20, 128U, /*sm*/ true,
+                                                   /*hm*/ true, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn20, 64U, /*sm*/ false,
+                                                   /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn20, 64U, /*sm*/ true,
+                                                   /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn20, 64U, /*sm*/ false,
+                                                   /*hm*/ true, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn20, 64U, /*sm*/ true,
+                                                   /*hm*/ true, /*sup*/ true>;
+
+template struct BandingConfigHelper1MaybeSupported<
+    kOneIn1000, 128U, /*sm*/ false, /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<
+    kOneIn1000, 128U, /*sm*/ true, /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<
+    kOneIn1000, 128U, /*sm*/ false, /*hm*/ true, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<
+    kOneIn1000, 128U, /*sm*/ true, /*hm*/ true, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<
+    kOneIn1000, 64U, /*sm*/ false, /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn1000, 64U, /*sm*/ true,
+                                                   /*hm*/ false, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<
+    kOneIn1000, 64U, /*sm*/ false, /*hm*/ true, /*sup*/ true>;
+template struct BandingConfigHelper1MaybeSupported<kOneIn1000, 64U, /*sm*/ true,
+                                                   /*hm*/ true, /*sup*/ true>;
+
+}  // namespace detail
+
+}  // namespace ribbon
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/ribbon_config.h b/src/rocksdb/util/ribbon_config.h
new file mode 100644
index 000000000..0e3edf073
--- /dev/null
+++ b/src/rocksdb/util/ribbon_config.h
@@ -0,0 +1,182 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <array>
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+
+#include "port/lang.h"  // for FALLTHROUGH_INTENDED
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace ribbon {
+
+// RIBBON PHSF & RIBBON Filter (Rapid Incremental Boolean Banding ON-the-fly)
+//
+// ribbon_config.h: APIs for relating numbers of slots with numbers of
+// additions for tolerable construction failure probabilities. This is
+// separate from ribbon_impl.h because it might not be needed for
+// some applications.
+//
+// This API assumes uint32_t for number of slots, as a single Ribbon
+// linear system should not normally overflow that without big penalties.
+//
+// Template parameter kCoeffBits uses uint64_t for convenience in case it
+// comes from size_t.
+//
+// Most of the complexity here is trying to optimize speed and
+// compiled code size, using templates to minimize table look-ups and
+// the compiled size of all linked look-up tables. Look-up tables are
+// required because we don't have good formulas, and the data comes
+// from running FindOccupancy in ribbon_test.
+
+// Represents a chosen chance of successful Ribbon construction for a single
+// seed. Allowing higher chance of failed construction can reduce space
+// overhead but takes extra time in construction.
+enum ConstructionFailureChance {
+  kOneIn2,
+  kOneIn20,
+  // When using kHomogeneous==true, construction failure chance should
+  // not generally exceed target FP rate, so it unlikely useful to
+  // allow a higher "failure" chance. In some cases, even more overhead
+  // is appropriate. (TODO)
+  kOneIn1000,
+};
+
+namespace detail {
+
+// It is useful to compile ribbon_test linking to BandingConfigHelper with
+// settings for which we do not have configuration data, as long as we don't
+// run the code. This template hack supports that.
+template <ConstructionFailureChance kCfc, uint64_t kCoeffBits, bool kUseSmash,
+          bool kHomogeneous, bool kIsSupported>
+struct BandingConfigHelper1MaybeSupported {
+ public:
+  static uint32_t GetNumToAdd(uint32_t num_slots) {
+    // Unsupported
+    assert(num_slots == 0);
+    (void)num_slots;
+    return 0;
+  }
+
+  static uint32_t GetNumSlots(uint32_t num_to_add) {
+    // Unsupported
+    assert(num_to_add == 0);
+    (void)num_to_add;
+    return 0;
+  }
+};
+
+// Base class for BandingConfigHelper1 and helper for BandingConfigHelper
+// with core implementations built on above data
+template <ConstructionFailureChance kCfc, uint64_t kCoeffBits, bool kUseSmash,
+          bool kHomogeneous>
+struct BandingConfigHelper1MaybeSupported<
+    kCfc, kCoeffBits, kUseSmash, kHomogeneous, true /* kIsSupported */> {
+ public:
+  // See BandingConfigHelper1. Implementation in ribbon_config.cc
+  static uint32_t GetNumToAdd(uint32_t num_slots);
+
+  // See BandingConfigHelper1. Implementation in ribbon_config.cc
+  static uint32_t GetNumSlots(uint32_t num_to_add);
+};
+
+}  // namespace detail
+
+template <ConstructionFailureChance kCfc, uint64_t kCoeffBits, bool kUseSmash,
+          bool kHomogeneous>
+struct BandingConfigHelper1
+    : public detail::BandingConfigHelper1MaybeSupported<
+          kCfc, kCoeffBits, kUseSmash, kHomogeneous,
+          /* kIsSupported */ kCoeffBits == 64 || kCoeffBits == 128> {
+ public:
+  // Returns a number of entries that can be added to a given number of
+  // slots, with roughly kCfc chance of construction failure per seed,
+  // or better. Does NOT do rounding for InterleavedSoln; call
+  // RoundUpNumSlots for that.
+  //
+  // inherited:
+  // static uint32_t GetNumToAdd(uint32_t num_slots);
+
+  // Returns a number of slots for a given number of entries to add
+  // that should have roughly kCfc chance of construction failure per
+  // seed, or better. Does NOT do rounding for InterleavedSoln; call
+  // RoundUpNumSlots for that.
+  //
+  // num_to_add should not exceed roughly 2/3rds of the maximum value
+  // of the uint32_t type to avoid overflow.
+  //
+  // inherited:
+  // static uint32_t GetNumSlots(uint32_t num_to_add);
+};
+
+// Configured using TypesAndSettings as in ribbon_impl.h
+template <ConstructionFailureChance kCfc, class TypesAndSettings>
+struct BandingConfigHelper1TS
+    : public BandingConfigHelper1<
+          kCfc,
+          /* kCoeffBits */ sizeof(typename TypesAndSettings::CoeffRow) * 8U,
+          TypesAndSettings::kUseSmash, TypesAndSettings::kHomogeneous> {};
+
+// Like BandingConfigHelper1TS except failure chance can be a runtime rather
+// than compile time value.
+template <class TypesAndSettings>
+struct BandingConfigHelper {
+ public:
+  static constexpr ConstructionFailureChance kDefaultFailureChance =
+      TypesAndSettings::kHomogeneous ? kOneIn1000 : kOneIn20;
+
+  static uint32_t GetNumToAdd(
+      uint32_t num_slots,
+      ConstructionFailureChance max_failure = kDefaultFailureChance) {
+    switch (max_failure) {
+      default:
+        assert(false);
+        FALLTHROUGH_INTENDED;
+      case kOneIn20: {
+        using H1 = BandingConfigHelper1TS<kOneIn20, TypesAndSettings>;
+        return H1::GetNumToAdd(num_slots);
+      }
+      case kOneIn2: {
+        using H1 = BandingConfigHelper1TS<kOneIn2, TypesAndSettings>;
+        return H1::GetNumToAdd(num_slots);
+      }
+      case kOneIn1000: {
+        using H1 = BandingConfigHelper1TS<kOneIn1000, TypesAndSettings>;
+        return H1::GetNumToAdd(num_slots);
+      }
+    }
+  }
+
+  static uint32_t GetNumSlots(
+      uint32_t num_to_add,
+      ConstructionFailureChance max_failure = kDefaultFailureChance) {
+    switch (max_failure) {
+      default:
+        assert(false);
+        FALLTHROUGH_INTENDED;
+      case kOneIn20: {
+        using H1 = BandingConfigHelper1TS<kOneIn20, TypesAndSettings>;
+        return H1::GetNumSlots(num_to_add);
+      }
+      case kOneIn2: {
+        using H1 = BandingConfigHelper1TS<kOneIn2, TypesAndSettings>;
+        return H1::GetNumSlots(num_to_add);
+      }
+      case kOneIn1000: {
+        using H1 = BandingConfigHelper1TS<kOneIn1000, TypesAndSettings>;
+        return H1::GetNumSlots(num_to_add);
+      }
+    }
+  }
+};
+
+}  // namespace ribbon
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/ribbon_impl.h b/src/rocksdb/util/ribbon_impl.h
new file mode 100644
index 000000000..0afecc67d
--- /dev/null
+++ b/src/rocksdb/util/ribbon_impl.h
@@ -0,0 +1,1137 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <cmath>
+
+#include "port/port.h"  // for PREFETCH
+#include "util/fastrange.h"
+#include "util/ribbon_alg.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace ribbon {
+
+// RIBBON PHSF & RIBBON Filter (Rapid Incremental Boolean Banding ON-the-fly)
+//
+// ribbon_impl.h: templated (parameterized) standard implementations
+//
+// Ribbon is a Perfect Hash Static Function construction useful as a compact
+// static Bloom filter alternative. See ribbon_alg.h for core algorithms
+// and core design details.
+//
+// TODO: more details on trade-offs and practical issues.
+//
+// APIs for configuring Ribbon are in ribbon_config.h
+
+// Ribbon implementations in this file take these parameters, which must be
+// provided in a class/struct type with members expressed in this concept:
+
+// concept TypesAndSettings {
+//   // See RibbonTypes and *Hasher in ribbon_alg.h, except here we have
+//   // the added constraint that Hash be equivalent to either uint32_t or
+//   // uint64_t.
+//   typename Hash;
+//   typename CoeffRow;
+//   typename ResultRow;
+//   typename Index;
+//   typename Key;
+//   static constexpr bool kFirstCoeffAlwaysOne;
+//
+//   // An unsigned integer type for identifying a hash seed, typically
+//   // uint32_t or uint64_t. Importantly, this is the amount of data
+//   // stored in memory for identifying a raw seed. See StandardHasher.
+//   typename Seed;
+//
+//   // When true, the PHSF implements a static filter, expecting just
+//   // keys as inputs for construction. When false, implements a general
+//   // PHSF and expects std::pair<Key, ResultRow> as inputs for
+//   // construction.
+//   static constexpr bool kIsFilter;
+//
+//   // When true, enables a special "homogeneous" filter implementation that
+//   // is slightly faster to construct, and never fails to construct though
+//   // FP rate can quickly explode in cases where corresponding
+//   // non-homogeneous filter would fail (or nearly fail?) to construct.
+//   // For smaller filters, you can configure with ConstructionFailureChance
+//   // smaller than desired FP rate to largely counteract this effect.
+//   // TODO: configuring Homogeneous Ribbon for arbitrarily large filters
+//   // based on data from OptimizeHomogAtScale
+//   static constexpr bool kHomogeneous;
+//
+//   // When true, adds a tiny bit more hashing logic on queries and
+//   // construction to improve utilization at the beginning and end of
+//   // the structure.  Recommended when CoeffRow is only 64 bits (or
+//   // less), so typical num_starts < 10k. Although this is compatible
+//   // with kHomogeneous, the competing space vs. time priorities might
+//   // not be useful.
+//   static constexpr bool kUseSmash;
+//
+//   // When true, allows number of "starts" to be zero, for best support
+//   // of the "no keys to add" case by always returning false for filter
+//   // queries. (This is distinct from the "keys added but no space for
+//   // any data" case, in which a filter always returns true.) The cost
+//   // supporting this is a conditional branch (probably predictable) in
+//   // queries.
+//   static constexpr bool kAllowZeroStarts;
+//
+//   // A seedable stock hash function on Keys. All bits of Hash must
+//   // be reasonably high quality. XXH functions recommended, but
+//   // Murmur, City, Farm, etc. also work.
+//   static Hash HashFn(const Key &, Seed raw_seed);
+// };
+
+// A bit of a hack to automatically construct the type for
+// AddInput based on a constexpr bool.
+template <typename Key, typename ResultRow, bool IsFilter>
+struct AddInputSelector {
+  // For general PHSF, not filter
+  using T = std::pair<Key, ResultRow>;
+};
+
+template <typename Key, typename ResultRow>
+struct AddInputSelector<Key, ResultRow, true /*IsFilter*/> {
+  // For Filter
+  using T = Key;
+};
+
+// To avoid writing 'typename' everywhere that we use types like 'Index'
+#define IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings)                   \
+  using CoeffRow = typename TypesAndSettings::CoeffRow;                      \
+  using ResultRow = typename TypesAndSettings::ResultRow;                    \
+  using Index = typename TypesAndSettings::Index;                            \
+  using Hash = typename TypesAndSettings::Hash;                              \
+  using Key = typename TypesAndSettings::Key;                                \
+  using Seed = typename TypesAndSettings::Seed;                              \
+                                                                             \
+  /* Some more additions */                                                  \
+  using QueryInput = Key;                                                    \
+  using AddInput = typename ROCKSDB_NAMESPACE::ribbon::AddInputSelector<     \
+      Key, ResultRow, TypesAndSettings::kIsFilter>::T;                       \
+  static constexpr auto kCoeffBits =                                         \
+      static_cast<Index>(sizeof(CoeffRow) * 8U);                             \
+                                                                             \
+  /* Export to algorithm */                                                  \
+  static constexpr bool kFirstCoeffAlwaysOne =                               \
+      TypesAndSettings::kFirstCoeffAlwaysOne;                                \
+                                                                             \
+  static_assert(sizeof(CoeffRow) + sizeof(ResultRow) + sizeof(Index) +       \
+                        sizeof(Hash) + sizeof(Key) + sizeof(Seed) +          \
+                        sizeof(QueryInput) + sizeof(AddInput) + kCoeffBits + \
+                        kFirstCoeffAlwaysOne >                               \
+                    0,                                                       \
+                "avoid unused warnings, semicolon expected after macro call")
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4309)  // cast truncating constant
+#pragma warning(disable : 4307)  // arithmetic constant overflow
+#endif
+
+// StandardHasher: A standard implementation of concepts RibbonTypes,
+// PhsfQueryHasher, FilterQueryHasher, and BandingHasher from ribbon_alg.h.
+//
+// This implementation should be suitable for most all practical purposes
+// as it "behaves" across a wide range of settings, with little room left
+// for improvement. The key functionality in this hasher is generating
+// CoeffRows, starts, and (for filters) ResultRows, which could be ~150
+// bits of data or more, from a modest hash of 64 or even just 32 bits, with
+// enough uniformity and bitwise independence to be close to "the best you
+// can do" with available hash information in terms of FP rate and
+// compactness. (64 bits recommended and sufficient for PHSF practical
+// purposes.)
+//
+// Another feature of this hasher is a minimal "premixing" of seeds before
+// they are provided to TypesAndSettings::HashFn in case that function does
+// not provide sufficiently independent hashes when iterating merely
+// sequentially on seeds. (This for example works around a problem with the
+// preview version 0.7.2 of XXH3 used in RocksDB, a.k.a. XXPH3 or Hash64, and
+// MurmurHash1 used in RocksDB, a.k.a. Hash.) We say this pre-mixing step
+// translates "ordinal seeds," which we iterate sequentially to find a
+// solution, into "raw seeds," with many more bits changing for each
+// iteration. The translation is an easily reversible lightweight mixing,
+// not suitable for hashing on its own. An advantage of this approach is that
+// StandardHasher can store just the raw seed (e.g. 64 bits) for fast query
+// times, while from the application perspective, we can limit to a small
+// number of ordinal keys (e.g. 64 in 6 bits) for saving in metadata.
+//
+// The default constructor initializes the seed to ordinal seed zero, which
+// is equal to raw seed zero.
+//
+template <class TypesAndSettings>
+class StandardHasher {
+ public:
+  IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
+
+  inline Hash GetHash(const Key& key) const {
+    return TypesAndSettings::HashFn(key, raw_seed_);
+  };
+  // For when AddInput == pair<Key, ResultRow> (kIsFilter == false)
+  inline Hash GetHash(const std::pair<Key, ResultRow>& bi) const {
+    return GetHash(bi.first);
+  };
+  inline Index GetStart(Hash h, Index num_starts) const {
+    // This is "critical path" code because it's required before memory
+    // lookup.
+    //
+    // FastRange gives us a fast and effective mapping from h to the
+    // appropriate range. This depends most, sometimes exclusively, on
+    // upper bits of h.
+    //
+    if (TypesAndSettings::kUseSmash) {
+      // Extra logic to "smash" entries at beginning and end, for
+      // better utilization. For example, without smash and with
+      // kFirstCoeffAlwaysOne, there's about a 30% chance that the
+      // first slot in the banding will be unused, and worse without
+      // kFirstCoeffAlwaysOne. The ending slots are even less utilized
+      // without smash.
+      //
+      // But since this only affects roughly kCoeffBits of the slots,
+      // it's usually small enough to be ignorable (less computation in
+      // this function) when number of slots is roughly 10k or larger.
+      //
+      // The best values for these smash weights might depend on how
+      // densely you're packing entries, and also kCoeffBits, but this
+      // seems to work well for roughly 95% success probability.
+      //
+      constexpr Index kFrontSmash = kCoeffBits / 4;
+      constexpr Index kBackSmash = kCoeffBits / 4;
+      Index start = FastRangeGeneric(h, num_starts + kFrontSmash + kBackSmash);
+      start = std::max(start, kFrontSmash);
+      start -= kFrontSmash;
+      start = std::min(start, num_starts - 1);
+      return start;
+    } else {
+      // For query speed, we allow small number of initial and final
+      // entries to be under-utilized.
+      // NOTE: This call statically enforces that Hash is equivalent to
+      // either uint32_t or uint64_t.
+      return FastRangeGeneric(h, num_starts);
+    }
+  }
+  inline CoeffRow GetCoeffRow(Hash h) const {
+    // This is not so much "critical path" code because it can be done in
+    // parallel (instruction level) with memory lookup.
+    //
+    // When we might have many entries squeezed into a single start,
+    // we need reasonably good remixing for CoeffRow.
+    if (TypesAndSettings::kUseSmash) {
+      // Reasonably good, reasonably fast, reasonably general.
+      // Probably not 1:1 but probably close enough.
+      Unsigned128 a = Multiply64to128(h, kAltCoeffFactor1);
+      Unsigned128 b = Multiply64to128(h, kAltCoeffFactor2);
+      auto cr = static_cast<CoeffRow>(b ^ (a << 64) ^ (a >> 64));
+
+      // Now ensure the value is non-zero
+      if (kFirstCoeffAlwaysOne) {
+        cr |= 1;
+      } else {
+        // Still have to ensure some bit is non-zero
+        cr |= (cr == 0) ? 1 : 0;
+      }
+      return cr;
+    }
+    // If not kUseSmash, we ensure we're not squeezing many entries into a
+    // single start, in part by ensuring num_starts > num_slots / 2. Thus,
+    // here we do not need good remixing for CoeffRow, but just enough that
+    // (a) every bit is reasonably independent from Start.
+    // (b) every Hash-length bit subsequence of the CoeffRow has full or
+    // nearly full entropy from h.
+    // (c) if nontrivial bit subsequences within are correlated, it needs to
+    // be more complicated than exact copy or bitwise not (at least without
+    // kFirstCoeffAlwaysOne), or else there seems to be a kind of
+    // correlated clustering effect.
+    // (d) the CoeffRow is not zero, so that no one input on its own can
+    // doom construction success. (Preferably a mix of 1's and 0's if
+    // satisfying above.)
+
+    // First, establish sufficient bitwise independence from Start, with
+    // multiplication by a large random prime.
+    // Note that we cast to Hash because if we use product bits beyond
+    // original input size, that's going to correlate with Start (FastRange)
+    // even with a (likely) different multiplier here.
+    Hash a = h * kCoeffAndResultFactor;
+
+    static_assert(
+        sizeof(Hash) == sizeof(uint64_t) || sizeof(Hash) == sizeof(uint32_t),
+        "Supported sizes");
+    // If that's big enough, we're done. If not, we have to expand it,
+    // maybe up to 4x size.
+    uint64_t b;
+    if (sizeof(Hash) < sizeof(uint64_t)) {
+      // Almost-trivial hash expansion (OK - see above), favoring roughly
+      // equal number of 1's and 0's in result
+      b = (uint64_t{a} << 32) ^ (a ^ kCoeffXor32);
+    } else {
+      b = a;
+    }
+    static_assert(sizeof(CoeffRow) <= sizeof(Unsigned128), "Supported sizes");
+    Unsigned128 c;
+    if (sizeof(uint64_t) < sizeof(CoeffRow)) {
+      // Almost-trivial hash expansion (OK - see above), favoring roughly
+      // equal number of 1's and 0's in result
+      c = (Unsigned128{b} << 64) ^ (b ^ kCoeffXor64);
+    } else {
+      c = b;
+    }
+    auto cr = static_cast<CoeffRow>(c);
+
+    // Now ensure the value is non-zero
+    if (kFirstCoeffAlwaysOne) {
+      cr |= 1;
+    } else if (sizeof(CoeffRow) == sizeof(Hash)) {
+      // Still have to ensure some bit is non-zero
+      cr |= (cr == 0) ? 1 : 0;
+    } else {
+      // (We did trivial expansion with constant xor, which ensures some
+      // bits are non-zero.)
+    }
+    return cr;
+  }
+  inline ResultRow GetResultRowMask() const {
+    // TODO: will be used with InterleavedSolutionStorage?
+    // For now, all bits set (note: might be a small type so might need to
+    // narrow after promotion)
+    return static_cast<ResultRow>(~ResultRow{0});
+  }
+  inline ResultRow GetResultRowFromHash(Hash h) const {
+    if (TypesAndSettings::kIsFilter && !TypesAndSettings::kHomogeneous) {
+      // This is not so much "critical path" code because it can be done in
+      // parallel (instruction level) with memory lookup.
+      //
+      // ResultRow bits only needs to be independent from CoeffRow bits if
+      // many entries might have the same start location, where "many" is
+      // comparable to number of hash bits or kCoeffBits. If !kUseSmash
+      // and num_starts > kCoeffBits, it is safe and efficient to draw from
+      // the same bits computed for CoeffRow, which are reasonably
+      // independent from Start. (Inlining and common subexpression
+      // elimination with GetCoeffRow should make this
+      // a single shared multiplication in generated code when !kUseSmash.)
+      Hash a = h * kCoeffAndResultFactor;
+
+      // The bits here that are *most* independent of Start are the highest
+      // order bits (as in Knuth multiplicative hash). To make those the
+      // most preferred for use in the result row, we do a bswap here.
+      auto rr = static_cast<ResultRow>(EndianSwapValue(a));
+      return rr & GetResultRowMask();
+    } else {
+      // Must be zero
+      return 0;
+    }
+  }
+  // For when AddInput == Key (kIsFilter == true)
+  inline ResultRow GetResultRowFromInput(const Key&) const {
+    // Must be zero
+    return 0;
+  }
+  // For when AddInput == pair<Key, ResultRow> (kIsFilter == false)
+  inline ResultRow GetResultRowFromInput(
+      const std::pair<Key, ResultRow>& bi) const {
+    // Simple extraction
+    return bi.second;
+  }
+
+  // Seed tracking APIs - see class comment
+  void SetRawSeed(Seed seed) { raw_seed_ = seed; }
+  Seed GetRawSeed() { return raw_seed_; }
+  void SetOrdinalSeed(Seed count) {
+    // A simple, reversible mixing of any size (whole bytes) up to 64 bits.
+    // This allows casting the raw seed to any smaller size we use for
+    // ordinal seeds without risk of duplicate raw seeds for unique ordinal
+    // seeds.
+
+    // Seed type might be smaller than numerical promotion size, but Hash
+    // should be at least that size, so we use Hash as intermediate type.
+    static_assert(sizeof(Seed) <= sizeof(Hash),
+                  "Hash must be at least size of Seed");
+
+    // Multiply by a large random prime (one-to-one for any prefix of bits)
+    Hash tmp = count * kToRawSeedFactor;
+    // Within-byte one-to-one mixing
+    static_assert((kSeedMixMask & (kSeedMixMask >> kSeedMixShift)) == 0,
+                  "Illegal mask+shift");
+    tmp ^= (tmp & kSeedMixMask) >> kSeedMixShift;
+    raw_seed_ = static_cast<Seed>(tmp);
+    // dynamic verification
+    assert(GetOrdinalSeed() == count);
+  }
+  Seed GetOrdinalSeed() {
+    Hash tmp = raw_seed_;
+    // Within-byte one-to-one mixing (its own inverse)
+    tmp ^= (tmp & kSeedMixMask) >> kSeedMixShift;
+    // Multiply by 64-bit multiplicative inverse
+    static_assert(kToRawSeedFactor * kFromRawSeedFactor == Hash{1},
+                  "Must be inverses");
+    return static_cast<Seed>(tmp * kFromRawSeedFactor);
+  }
+
+ protected:
+  // For expanding hash:
+  // large random prime
+  static constexpr Hash kCoeffAndResultFactor =
+      static_cast<Hash>(0xc28f82822b650bedULL);
+  static constexpr uint64_t kAltCoeffFactor1 = 0x876f170be4f1fcb9U;
+  static constexpr uint64_t kAltCoeffFactor2 = 0xf0433a4aecda4c5fU;
+  // random-ish data
+  static constexpr uint32_t kCoeffXor32 = 0xa6293635U;
+  static constexpr uint64_t kCoeffXor64 = 0xc367844a6e52731dU;
+
+  // For pre-mixing seeds
+  static constexpr Hash kSeedMixMask = static_cast<Hash>(0xf0f0f0f0f0f0f0f0ULL);
+  static constexpr unsigned kSeedMixShift = 4U;
+  static constexpr Hash kToRawSeedFactor =
+      static_cast<Hash>(0xc78219a23eeadd03ULL);
+  static constexpr Hash kFromRawSeedFactor =
+      static_cast<Hash>(0xfe1a137d14b475abULL);
+
+  // See class description
+  Seed raw_seed_ = 0;
+};
+
+// StandardRehasher (and StandardRehasherAdapter): A variant of
+// StandardHasher that uses the same type for keys as for hashes.
+// This is primarily intended for building a Ribbon filter
+// from existing hashes without going back to original inputs in
+// order to apply a different seed. This hasher seeds a 1-to-1 mixing
+// transformation to apply a seed to an existing hash. (Untested for
+// hash-sized keys that are not already uniformly distributed.) This
+// transformation builds on the seed pre-mixing done in StandardHasher.
+//
+// Testing suggests essentially no degradation of solution success rate
+// vs. going back to original inputs when changing hash seeds. For example:
+// Average re-seeds for solution with r=128, 1.02x overhead, and ~100k keys
+// is about 1.10 for both StandardHasher and StandardRehasher.
+//
+// StandardRehasher is not really recommended for general PHSFs (not
+// filters) because a collision in the original hash could prevent
+// construction despite re-seeding the Rehasher. (Such collisions
+// do not interfere with filter construction.)
+//
+// concept RehasherTypesAndSettings: like TypesAndSettings but
+// does not require Key or HashFn.
+template <class RehasherTypesAndSettings>
+class StandardRehasherAdapter : public RehasherTypesAndSettings {
+ public:
+  using Hash = typename RehasherTypesAndSettings::Hash;
+  using Key = Hash;
+  using Seed = typename RehasherTypesAndSettings::Seed;
+
+  static Hash HashFn(const Hash& input, Seed raw_seed) {
+    // Note: raw_seed is already lightly pre-mixed, and this multiplication
+    // by a large prime is sufficient mixing (low-to-high bits) on top of
+    // that for good FastRange results, which depends primarily on highest
+    // bits. (The hashed CoeffRow and ResultRow are less sensitive to
+    // mixing than Start.)
+    // Also note: did consider adding ^ (input >> some) before the
+    // multiplication, but doesn't appear to be necessary.
+    return (input ^ raw_seed) * kRehashFactor;
+  }
+
+ private:
+  static constexpr Hash kRehashFactor =
+      static_cast<Hash>(0x6193d459236a3a0dULL);
+};
+
+// See comment on StandardRehasherAdapter
+template <class RehasherTypesAndSettings>
+using StandardRehasher =
+    StandardHasher<StandardRehasherAdapter<RehasherTypesAndSettings>>;
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+// Especially with smaller hashes (e.g. 32 bit), there can be noticeable
+// false positives due to collisions in the Hash returned by GetHash.
+// This function returns the expected FP rate due to those collisions,
+// which can be added to the expected FP rate from the underlying data
+// structure. (Note: technically, a + b is only a good approximation of
+// 1-(1-a)(1-b) == a + b - a*b, if a and b are much closer to 0 than to 1.)
+// The number of entries added can be a double here in case it's an
+// average.
+template <class Hasher, typename Numerical>
+double ExpectedCollisionFpRate(const Hasher& hasher, Numerical added) {
+  // Standardize on the 'double' specialization
+  return ExpectedCollisionFpRate(hasher, 1.0 * added);
+}
+template <class Hasher>
+double ExpectedCollisionFpRate(const Hasher& /*hasher*/, double added) {
+  // Technically, there could be overlap among the added, but ignoring that
+  // is typically close enough.
+  return added / std::pow(256.0, sizeof(typename Hasher::Hash));
+}
+
+// StandardBanding: a canonical implementation of BandingStorage and
+// BacktrackStorage, with convenience API for banding (solving with on-the-fly
+// Gaussian elimination) with and without backtracking.
+template <class TypesAndSettings>
+class StandardBanding : public StandardHasher<TypesAndSettings> {
+ public:
+  IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
+
+  StandardBanding(Index num_slots = 0, Index backtrack_size = 0) {
+    Reset(num_slots, backtrack_size);
+  }
+
+  void Reset(Index num_slots, Index backtrack_size = 0) {
+    if (num_slots == 0) {
+      // Unusual (TypesAndSettings::kAllowZeroStarts) or "uninitialized"
+      num_starts_ = 0;
+    } else {
+      // Normal
+      assert(num_slots >= kCoeffBits);
+      if (num_slots > num_slots_allocated_) {
+        coeff_rows_.reset(new CoeffRow[num_slots]());
+        if (!TypesAndSettings::kHomogeneous) {
+          // Note: don't strictly have to zero-init result_rows,
+          // except possible information leakage, etc ;)
+          result_rows_.reset(new ResultRow[num_slots]());
+        }
+        num_slots_allocated_ = num_slots;
+      } else {
+        for (Index i = 0; i < num_slots; ++i) {
+          coeff_rows_[i] = 0;
+          if (!TypesAndSettings::kHomogeneous) {
+            // Note: don't strictly have to zero-init result_rows,
+            // except possible information leakage, etc ;)
+            result_rows_[i] = 0;
+          }
+        }
+      }
+      num_starts_ = num_slots - kCoeffBits + 1;
+    }
+    EnsureBacktrackSize(backtrack_size);
+  }
+
+  void EnsureBacktrackSize(Index backtrack_size) {
+    if (backtrack_size > backtrack_size_) {
+      backtrack_.reset(new Index[backtrack_size]);
+      backtrack_size_ = backtrack_size;
+    }
+  }
+
+  // ********************************************************************
+  // From concept BandingStorage
+
+  inline bool UsePrefetch() const {
+    // A rough guesstimate of when prefetching during construction pays off.
+    // TODO: verify/validate
+    return num_starts_ > 1500;
+  }
+  inline void Prefetch(Index i) const {
+    PREFETCH(&coeff_rows_[i], 1 /* rw */, 1 /* locality */);
+    if (!TypesAndSettings::kHomogeneous) {
+      PREFETCH(&result_rows_[i], 1 /* rw */, 1 /* locality */);
+    }
+  }
+  inline void LoadRow(Index i, CoeffRow* cr, ResultRow* rr,
+                      bool for_back_subst) const {
+    *cr = coeff_rows_[i];
+    if (TypesAndSettings::kHomogeneous) {
+      if (for_back_subst && *cr == 0) {
+        // Cheap pseudorandom data to fill unconstrained solution rows
+        *rr = static_cast<ResultRow>(i * 0x9E3779B185EBCA87ULL);
+      } else {
+        *rr = 0;
+      }
+    } else {
+      *rr = result_rows_[i];
+    }
+  }
+  inline void StoreRow(Index i, CoeffRow cr, ResultRow rr) {
+    coeff_rows_[i] = cr;
+    if (TypesAndSettings::kHomogeneous) {
+      assert(rr == 0);
+    } else {
+      result_rows_[i] = rr;
+    }
+  }
+  inline Index GetNumStarts() const { return num_starts_; }
+
+  // from concept BacktrackStorage, for when backtracking is used
+  inline bool UseBacktrack() const { return true; }
+  inline void BacktrackPut(Index i, Index to_save) { backtrack_[i] = to_save; }
+  inline Index BacktrackGet(Index i) const { return backtrack_[i]; }
+
+  // ********************************************************************
+  // Some useful API, still somewhat low level. Here an input is
+  // a Key for filters, or std::pair<Key, ResultRow> for general PHSF.
+
+  // Adds a range of inputs to the banding, returning true if successful.
+  // False means none or some may have been successfully added, so it's
+  // best to Reset this banding before any further use.
+  //
+  // Adding can fail even before all the "slots" are completely "full".
+  //
+  template <typename InputIterator>
+  bool AddRange(InputIterator begin, InputIterator end) {
+    assert(num_starts_ > 0 || TypesAndSettings::kAllowZeroStarts);
+    if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
+      // Unusual. Can't add any in this case.
+      return begin == end;
+    }
+    // Normal
+    return BandingAddRange(this, *this, begin, end);
+  }
+
+  // Adds a range of inputs to the banding, returning true if successful,
+  // or if unsuccessful, rolls back to state before this call and returns
+  // false. Caller guarantees that the number of inputs in this batch
+  // does not exceed `backtrack_size` provided to Reset.
+  //
+  // Adding can fail even before all the "slots" are completely "full".
+  //
+  template <typename InputIterator>
+  bool AddRangeOrRollBack(InputIterator begin, InputIterator end) {
+    assert(num_starts_ > 0 || TypesAndSettings::kAllowZeroStarts);
+    if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
+      // Unusual. Can't add any in this case.
+      return begin == end;
+    }
+    // else Normal
+    return BandingAddRange(this, this, *this, begin, end);
+  }
+
+  // Adds a single input to the banding, returning true if successful.
+  // If unsuccessful, returns false and banding state is unchanged.
+  //
+  // Adding can fail even before all the "slots" are completely "full".
+  //
+  bool Add(const AddInput& input) {
+    // Pointer can act as iterator
+    return AddRange(&input, &input + 1);
+  }
+
+  // Return the number of "occupied" rows (with non-zero coefficients stored).
+  Index GetOccupiedCount() const {
+    Index count = 0;
+    if (num_starts_ > 0) {
+      const Index num_slots = num_starts_ + kCoeffBits - 1;
+      for (Index i = 0; i < num_slots; ++i) {
+        if (coeff_rows_[i] != 0) {
+          ++count;
+        }
+      }
+    }
+    return count;
+  }
+
+  // Returns whether a row is "occupied" in the banding (non-zero
+  // coefficients stored). (Only recommended for debug/test)
+  bool IsOccupied(Index i) { return coeff_rows_[i] != 0; }
+
+  // ********************************************************************
+  // High-level API
+
+  // Iteratively (a) resets the structure for `num_slots`, (b) attempts
+  // to add the range of inputs, and (c) if unsuccessful, chooses next
+  // hash seed, until either successful or unsuccessful with all the
+  // allowed seeds. Returns true if successful. In that case, use
+  // GetOrdinalSeed() or GetRawSeed() to get the successful seed.
+  //
+  // The allowed sequence of hash seeds is determined by
+  // `starting_ordinal_seed,` the first ordinal seed to be attempted
+  // (see StandardHasher), and `ordinal_seed_mask,` a bit mask (power of
+  // two minus one) for the range of ordinal seeds to consider. The
+  // max number of seeds considered will be ordinal_seed_mask + 1.
+  // For filters we suggest `starting_ordinal_seed` be chosen randomly
+  // or round-robin, to minimize false positive correlations between keys.
+  //
+  // If unsuccessful, how best to continue is going to be application
+  // specific. It should be possible to choose parameters such that
+  // failure is extremely unlikely, using max_seed around 32 to 64.
+  // (TODO: APIs to help choose parameters) One option for fallback in
+  // constructing a filter is to construct a Bloom filter instead.
+  // Increasing num_slots is an option, but should not be used often
+  // unless construction maximum latency is a concern (rather than
+  // average running time of construction). Instead, choose parameters
+  // appropriately and trust that seeds are independent. (Also,
+  // increasing num_slots without changing hash seed would have a
+  // significant correlation in success, rather than independence.)
+  template <typename InputIterator>
+  bool ResetAndFindSeedToSolve(Index num_slots, InputIterator begin,
+                               InputIterator end,
+                               Seed starting_ordinal_seed = 0U,
+                               Seed ordinal_seed_mask = 63U) {
+    // power of 2 minus 1
+    assert((ordinal_seed_mask & (ordinal_seed_mask + 1)) == 0);
+    // starting seed is within mask
+    assert((starting_ordinal_seed & ordinal_seed_mask) ==
+           starting_ordinal_seed);
+    starting_ordinal_seed &= ordinal_seed_mask;  // if not debug
+
+    Seed cur_ordinal_seed = starting_ordinal_seed;
+    do {
+      StandardHasher<TypesAndSettings>::SetOrdinalSeed(cur_ordinal_seed);
+      Reset(num_slots);
+      bool success = AddRange(begin, end);
+      if (success) {
+        return true;
+      }
+      cur_ordinal_seed = (cur_ordinal_seed + 1) & ordinal_seed_mask;
+    } while (cur_ordinal_seed != starting_ordinal_seed);
+    // Reached limit by circling around
+    return false;
+  }
+
+  static std::size_t EstimateMemoryUsage(uint32_t num_slots) {
+    std::size_t bytes_coeff_rows = num_slots * sizeof(CoeffRow);
+    std::size_t bytes_result_rows = num_slots * sizeof(ResultRow);
+    std::size_t bytes_backtrack = 0;
+    std::size_t bytes_banding =
+        bytes_coeff_rows + bytes_result_rows + bytes_backtrack;
+
+    return bytes_banding;
+  }
+
+ protected:
+  // TODO: explore combining in a struct
+  std::unique_ptr<CoeffRow[]> coeff_rows_;
+  std::unique_ptr<ResultRow[]> result_rows_;
+  // We generally store "starts" instead of slots for speed of GetStart(),
+  // as in StandardHasher.
+  Index num_starts_ = 0;
+  Index num_slots_allocated_ = 0;
+  std::unique_ptr<Index[]> backtrack_;
+  Index backtrack_size_ = 0;
+};
+
+// Implements concept SimpleSolutionStorage, mostly for demonstration
+// purposes. This is "in memory" only because it does not handle byte
+// ordering issues for serialization.
+template <class TypesAndSettings>
+class InMemSimpleSolution {
+ public:
+  IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
+
+  void PrepareForNumStarts(Index num_starts) {
+    if (TypesAndSettings::kAllowZeroStarts && num_starts == 0) {
+      // Unusual
+      num_starts_ = 0;
+    } else {
+      // Normal
+      const Index num_slots = num_starts + kCoeffBits - 1;
+      assert(num_slots >= kCoeffBits);
+      if (num_slots > num_slots_allocated_) {
+        // Do not need to init the memory
+        solution_rows_.reset(new ResultRow[num_slots]);
+        num_slots_allocated_ = num_slots;
+      }
+      num_starts_ = num_starts;
+    }
+  }
+
+  Index GetNumStarts() const { return num_starts_; }
+
+  ResultRow Load(Index slot_num) const { return solution_rows_[slot_num]; }
+
+  void Store(Index slot_num, ResultRow solution_row) {
+    solution_rows_[slot_num] = solution_row;
+  }
+
+  // ********************************************************************
+  // High-level API
+
+  template <typename BandingStorage>
+  void BackSubstFrom(const BandingStorage& bs) {
+    if (TypesAndSettings::kAllowZeroStarts && bs.GetNumStarts() == 0) {
+      // Unusual
+      PrepareForNumStarts(0);
+    } else {
+      // Normal
+      SimpleBackSubst(this, bs);
+    }
+  }
+
+  template <typename PhsfQueryHasher>
+  ResultRow PhsfQuery(const Key& input, const PhsfQueryHasher& hasher) const {
+    // assert(!TypesAndSettings::kIsFilter);  Can be useful in testing
+    if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
+      // Unusual
+      return 0;
+    } else {
+      // Normal
+      return SimplePhsfQuery(input, hasher, *this);
+    }
+  }
+
+  template <typename FilterQueryHasher>
+  bool FilterQuery(const Key& input, const FilterQueryHasher& hasher) const {
+    assert(TypesAndSettings::kIsFilter);
+    if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
+      // Unusual. Zero starts presumes no keys added -> always false
+      return false;
+    } else {
+      // Normal, or upper_num_columns_ == 0 means "no space for data" and
+      // thus will always return true.
+      return SimpleFilterQuery(input, hasher, *this);
+    }
+  }
+
+  double ExpectedFpRate() const {
+    assert(TypesAndSettings::kIsFilter);
+    if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
+      // Unusual, but we don't have FPs if we always return false.
+      return 0.0;
+    }
+    // else Normal
+
+    // Each result (solution) bit (column) cuts FP rate in half
+    return std::pow(0.5, 8U * sizeof(ResultRow));
+  }
+
+  // ********************************************************************
+  // Static high-level API
+
+  // Round up to a number of slots supported by this structure. Note that
+  // this needs to be must be taken into account for the banding if this
+  // solution layout/storage is to be used.
+  static Index RoundUpNumSlots(Index num_slots) {
+    // Must be at least kCoeffBits for at least one start
+    // Or if not smash, even more because hashing not equipped
+    // for stacking up so many entries on a single start location
+    auto min_slots = kCoeffBits * (TypesAndSettings::kUseSmash ? 1 : 2);
+    return std::max(num_slots, static_cast<Index>(min_slots));
+  }
+
+ protected:
+  // We generally store "starts" instead of slots for speed of GetStart(),
+  // as in StandardHasher.
+  Index num_starts_ = 0;
+  Index num_slots_allocated_ = 0;
+  std::unique_ptr<ResultRow[]> solution_rows_;
+};
+
+// Implements concept InterleavedSolutionStorage always using little-endian
+// byte order, so easy for serialization/deserialization. This implementation
+// fully supports fractional bits per key, where any number of segments
+// (number of bytes multiple of sizeof(CoeffRow)) can be used with any number
+// of slots that is a multiple of kCoeffBits.
+//
+// The structure is passed an externally allocated/de-allocated byte buffer
+// that is optionally pre-populated (from storage) for answering queries,
+// or can be populated by BackSubstFrom.
+//
+template <class TypesAndSettings>
+class SerializableInterleavedSolution {
+ public:
+  IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings);
+
+  // Does not take ownership of `data` but uses it (up to `data_len` bytes)
+  // throughout lifetime
+  SerializableInterleavedSolution(char* data, size_t data_len)
+      : data_(data), data_len_(data_len) {}
+
+  void PrepareForNumStarts(Index num_starts) {
+    assert(num_starts == 0 || (num_starts % kCoeffBits == 1));
+    num_starts_ = num_starts;
+
+    InternalConfigure();
+  }
+
+  Index GetNumStarts() const { return num_starts_; }
+
+  Index GetNumBlocks() const {
+    const Index num_slots = num_starts_ + kCoeffBits - 1;
+    return num_slots / kCoeffBits;
+  }
+
+  Index GetUpperNumColumns() const { return upper_num_columns_; }
+
+  Index GetUpperStartBlock() const { return upper_start_block_; }
+
+  Index GetNumSegments() const {
+    return static_cast<Index>(data_len_ / sizeof(CoeffRow));
+  }
+
+  CoeffRow LoadSegment(Index segment_num) const {
+    assert(data_ != nullptr);  // suppress clang analyzer report
+    return DecodeFixedGeneric<CoeffRow>(data_ + segment_num * sizeof(CoeffRow));
+  }
+  void StoreSegment(Index segment_num, CoeffRow val) {
+    assert(data_ != nullptr);  // suppress clang analyzer report
+    EncodeFixedGeneric(data_ + segment_num * sizeof(CoeffRow), val);
+  }
+  void PrefetchSegmentRange(Index begin_segment_num,
+                            Index end_segment_num) const {
+    if (end_segment_num == begin_segment_num) {
+      // Nothing to do
+      return;
+    }
+    char* cur = data_ + begin_segment_num * sizeof(CoeffRow);
+    char* last = data_ + (end_segment_num - 1) * sizeof(CoeffRow);
+    while (cur < last) {
+      PREFETCH(cur, 0 /* rw */, 1 /* locality */);
+      cur += CACHE_LINE_SIZE;
+    }
+    PREFETCH(last, 0 /* rw */, 1 /* locality */);
+  }
+
+  // ********************************************************************
+  // High-level API
+
+  void ConfigureForNumBlocks(Index num_blocks) {
+    if (num_blocks == 0) {
+      PrepareForNumStarts(0);
+    } else {
+      PrepareForNumStarts(num_blocks * kCoeffBits - kCoeffBits + 1);
+    }
+  }
+
+  void ConfigureForNumSlots(Index num_slots) {
+    assert(num_slots % kCoeffBits == 0);
+    ConfigureForNumBlocks(num_slots / kCoeffBits);
+  }
+
+  template <typename BandingStorage>
+  void BackSubstFrom(const BandingStorage& bs) {
+    if (TypesAndSettings::kAllowZeroStarts && bs.GetNumStarts() == 0) {
+      // Unusual
+      PrepareForNumStarts(0);
+    } else {
+      // Normal
+      InterleavedBackSubst(this, bs);
+    }
+  }
+
+  template <typename PhsfQueryHasher>
+  ResultRow PhsfQuery(const Key& input, const PhsfQueryHasher& hasher) const {
+    // assert(!TypesAndSettings::kIsFilter);  Can be useful in testing
+    if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
+      // Unusual
+      return 0;
+    } else {
+      // Normal
+      // NOTE: not using a struct to encourage compiler optimization
+      Hash hash;
+      Index segment_num;
+      Index num_columns;
+      Index start_bit;
+      InterleavedPrepareQuery(input, hasher, *this, &hash, &segment_num,
+                              &num_columns, &start_bit);
+      return InterleavedPhsfQuery(hash, segment_num, num_columns, start_bit,
+                                  hasher, *this);
+    }
+  }
+
+  template <typename FilterQueryHasher>
+  bool FilterQuery(const Key& input, const FilterQueryHasher& hasher) const {
+    assert(TypesAndSettings::kIsFilter);
+    if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
+      // Unusual. Zero starts presumes no keys added -> always false
+      return false;
+    } else {
+      // Normal, or upper_num_columns_ == 0 means "no space for data" and
+      // thus will always return true.
+      // NOTE: not using a struct to encourage compiler optimization
+      Hash hash;
+      Index segment_num;
+      Index num_columns;
+      Index start_bit;
+      InterleavedPrepareQuery(input, hasher, *this, &hash, &segment_num,
+                              &num_columns, &start_bit);
+      return InterleavedFilterQuery(hash, segment_num, num_columns, start_bit,
+                                    hasher, *this);
+    }
+  }
+
+  double ExpectedFpRate() const {
+    assert(TypesAndSettings::kIsFilter);
+    if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) {
+      // Unusual. Zero starts presumes no keys added -> always false
+      return 0.0;
+    }
+    // else Normal
+
+    // Note: Ignoring smash setting; still close enough in that case
+    double lower_portion =
+        (upper_start_block_ * 1.0 * kCoeffBits) / num_starts_;
+
+    // Each result (solution) bit (column) cuts FP rate in half. Weight that
+    // for upper and lower number of bits (columns).
+    return lower_portion * std::pow(0.5, upper_num_columns_ - 1) +
+           (1.0 - lower_portion) * std::pow(0.5, upper_num_columns_);
+  }
+
+  // ********************************************************************
+  // Static high-level API
+
+  // Round up to a number of slots supported by this structure. Note that
+  // this needs to be must be taken into account for the banding if this
+  // solution layout/storage is to be used.
+  static Index RoundUpNumSlots(Index num_slots) {
+    // Must be multiple of kCoeffBits
+    Index corrected = (num_slots + kCoeffBits - 1) / kCoeffBits * kCoeffBits;
+
+    // Do not use num_starts==1 unless kUseSmash, because the hashing
+    // might not be equipped for stacking up so many entries on a
+    // single start location.
+    if (!TypesAndSettings::kUseSmash && corrected == kCoeffBits) {
+      corrected += kCoeffBits;
+    }
+    return corrected;
+  }
+
+  // Round down to a number of slots supported by this structure. Note that
+  // this needs to be must be taken into account for the banding if this
+  // solution layout/storage is to be used.
+  static Index RoundDownNumSlots(Index num_slots) {
+    // Must be multiple of kCoeffBits
+    Index corrected = num_slots / kCoeffBits * kCoeffBits;
+
+    // Do not use num_starts==1 unless kUseSmash, because the hashing
+    // might not be equipped for stacking up so many entries on a
+    // single start location.
+    if (!TypesAndSettings::kUseSmash && corrected == kCoeffBits) {
+      corrected = 0;
+    }
+    return corrected;
+  }
+
+  // Compute the number of bytes for a given number of slots and desired
+  // FP rate. Since desired FP rate might not be exactly achievable,
+  // rounding_bias32==0 means to always round toward lower FP rate
+  // than desired (more bytes); rounding_bias32==max uint32_t means always
+  // round toward higher FP rate than desired (fewer bytes); other values
+  // act as a proportional threshold or bias between the two.
+  static size_t GetBytesForFpRate(Index num_slots, double desired_fp_rate,
+                                  uint32_t rounding_bias32) {
+    return InternalGetBytesForFpRate(num_slots, desired_fp_rate,
+                                     1.0 / desired_fp_rate, rounding_bias32);
+  }
+
+  // The same, but specifying desired accuracy as 1.0 / FP rate, or
+  // one_in_fp_rate. E.g. desired_one_in_fp_rate=100 means 1% FP rate.
+  static size_t GetBytesForOneInFpRate(Index num_slots,
+                                       double desired_one_in_fp_rate,
+                                       uint32_t rounding_bias32) {
+    return InternalGetBytesForFpRate(num_slots, 1.0 / desired_one_in_fp_rate,
+                                     desired_one_in_fp_rate, rounding_bias32);
+  }
+
+ protected:
+  static size_t InternalGetBytesForFpRate(Index num_slots,
+                                          double desired_fp_rate,
+                                          double desired_one_in_fp_rate,
+                                          uint32_t rounding_bias32) {
+    assert(TypesAndSettings::kIsFilter);
+    if (TypesAndSettings::kAllowZeroStarts) {
+      if (num_slots == 0) {
+        // Unusual. Zero starts presumes no keys added -> always false (no FPs)
+        return 0U;
+      }
+    } else {
+      assert(num_slots > 0);
+    }
+    // Must be rounded up already.
+    assert(RoundUpNumSlots(num_slots) == num_slots);
+
+    if (desired_one_in_fp_rate > 1.0 && desired_fp_rate < 1.0) {
+      // Typical: less than 100% FP rate
+      if (desired_one_in_fp_rate <= static_cast<ResultRow>(-1)) {
+        // Typical: Less than maximum result row entropy
+        ResultRow rounded = static_cast<ResultRow>(desired_one_in_fp_rate);
+        int lower_columns = FloorLog2(rounded);
+        double lower_columns_fp_rate = std::pow(2.0, -lower_columns);
+        double upper_columns_fp_rate = std::pow(2.0, -(lower_columns + 1));
+        // Floating point don't let me down!
+        assert(lower_columns_fp_rate >= desired_fp_rate);
+        assert(upper_columns_fp_rate <= desired_fp_rate);
+
+        double lower_portion = (desired_fp_rate - upper_columns_fp_rate) /
+                               (lower_columns_fp_rate - upper_columns_fp_rate);
+        // Floating point don't let me down!
+        assert(lower_portion >= 0.0);
+        assert(lower_portion <= 1.0);
+
+        double rounding_bias = (rounding_bias32 + 0.5) / double{0x100000000};
+        assert(rounding_bias > 0.0);
+        assert(rounding_bias < 1.0);
+
+        // Note: Ignoring smash setting; still close enough in that case
+        Index num_starts = num_slots - kCoeffBits + 1;
+        // Lower upper_start_block means lower FP rate (higher accuracy)
+        Index upper_start_block = static_cast<Index>(
+            (lower_portion * num_starts + rounding_bias) / kCoeffBits);
+        Index num_blocks = num_slots / kCoeffBits;
+        assert(upper_start_block < num_blocks);
+
+        // Start by assuming all blocks use lower number of columns
+        Index num_segments = num_blocks * static_cast<Index>(lower_columns);
+        // Correct by 1 each for blocks using upper number of columns
+        num_segments += (num_blocks - upper_start_block);
+        // Total bytes
+        return num_segments * sizeof(CoeffRow);
+      } else {
+        // one_in_fp_rate too big, thus requested FP rate is smaller than
+        // supported. Use max number of columns for minimum supported FP rate.
+        return num_slots * sizeof(ResultRow);
+      }
+    } else {
+      // Effectively asking for 100% FP rate, or NaN etc.
+      if (TypesAndSettings::kAllowZeroStarts) {
+        // Zero segments
+        return 0U;
+      } else {
+        // One segment (minimum size, maximizing FP rate)
+        return sizeof(CoeffRow);
+      }
+    }
+  }
+
+  void InternalConfigure() {
+    const Index num_blocks = GetNumBlocks();
+    Index num_segments = GetNumSegments();
+
+    if (num_blocks == 0) {
+      // Exceptional
+      upper_num_columns_ = 0;
+      upper_start_block_ = 0;
+    } else {
+      // Normal
+      upper_num_columns_ =
+          (num_segments + /*round up*/ num_blocks - 1) / num_blocks;
+      upper_start_block_ = upper_num_columns_ * num_blocks - num_segments;
+      // Unless that's more columns than supported by ResultRow data type
+      if (upper_num_columns_ > 8U * sizeof(ResultRow)) {
+        // Use maximum columns (there will be space unused)
+        upper_num_columns_ = static_cast<Index>(8U * sizeof(ResultRow));
+        upper_start_block_ = 0;
+        num_segments = num_blocks * upper_num_columns_;
+      }
+    }
+    // Update data_len_ for correct rounding and/or unused space
+    // NOTE: unused space stays gone if we PrepareForNumStarts again.
+    // We are prioritizing minimizing the number of fields over making
+    // the "unusued space" feature work well.
+    data_len_ = num_segments * sizeof(CoeffRow);
+  }
+
+  char* const data_;
+  size_t data_len_;
+  Index num_starts_ = 0;
+  Index upper_num_columns_ = 0;
+  Index upper_start_block_ = 0;
+};
+
+}  // namespace ribbon
+
+}  // namespace ROCKSDB_NAMESPACE
+
+// For convenience working with templates
+#define IMPORT_RIBBON_IMPL_TYPES(TypesAndSettings)                            \
+  using Hasher = ROCKSDB_NAMESPACE::ribbon::StandardHasher<TypesAndSettings>; \
+  using Banding =                                                             \
+      ROCKSDB_NAMESPACE::ribbon::StandardBanding<TypesAndSettings>;           \
+  using SimpleSoln =                                                          \
+      ROCKSDB_NAMESPACE::ribbon::InMemSimpleSolution<TypesAndSettings>;       \
+  using InterleavedSoln =                                                     \
+      ROCKSDB_NAMESPACE::ribbon::SerializableInterleavedSolution<             \
+          TypesAndSettings>;                                                  \
+  static_assert(sizeof(Hasher) + sizeof(Banding) + sizeof(SimpleSoln) +       \
+                        sizeof(InterleavedSoln) >                             \
+                    0,                                                        \
+                "avoid unused warnings, semicolon expected after macro call")
diff --git a/src/rocksdb/util/ribbon_test.cc b/src/rocksdb/util/ribbon_test.cc
new file mode 100644
index 000000000..6519df3d5
--- /dev/null
+++ b/src/rocksdb/util/ribbon_test.cc
@@ -0,0 +1,1308 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/system_clock.h"
+#include "test_util/testharness.h"
+#include "util/bloom_impl.h"
+#include "util/coding.h"
+#include "util/hash.h"
+#include "util/ribbon_config.h"
+#include "util/ribbon_impl.h"
+#include "util/stop_watch.h"
+#include "util/string_util.h"
+
+#ifndef GFLAGS
+uint32_t FLAGS_thoroughness = 5;
+uint32_t FLAGS_max_add = 0;
+uint32_t FLAGS_min_check = 4000;
+uint32_t FLAGS_max_check = 100000;
+bool FLAGS_verbose = false;
+
+bool FLAGS_find_occ = false;
+bool FLAGS_find_slot_occ = false;
+double FLAGS_find_next_factor = 1.618;
+uint32_t FLAGS_find_iters = 10000;
+uint32_t FLAGS_find_min_slots = 128;
+uint32_t FLAGS_find_max_slots = 1000000;
+
+bool FLAGS_optimize_homog = false;
+uint32_t FLAGS_optimize_homog_slots = 30000000;
+uint32_t FLAGS_optimize_homog_check = 200000;
+double FLAGS_optimize_homog_granularity = 0.002;
+#else
+#include "util/gflags_compat.h"
+using GFLAGS_NAMESPACE::ParseCommandLineFlags;
+// Using 500 is a good test when you have time to be thorough.
+// Default is for general RocksDB regression test runs.
+DEFINE_uint32(thoroughness, 5, "iterations per configuration");
+DEFINE_uint32(max_add, 0,
+              "Add up to this number of entries to a single filter in "
+              "CompactnessAndBacktrackAndFpRate; 0 == reasonable default");
+DEFINE_uint32(min_check, 4000,
+              "Minimum number of novel entries for testing FP rate");
+DEFINE_uint32(max_check, 10000,
+              "Maximum number of novel entries for testing FP rate");
+DEFINE_bool(verbose, false, "Print extra details");
+
+// Options for FindOccupancy, which is more of a tool than a test.
+DEFINE_bool(find_occ, false, "whether to run the FindOccupancy tool");
+DEFINE_bool(find_slot_occ, false,
+            "whether to show individual slot occupancies with "
+            "FindOccupancy tool");
+DEFINE_double(find_next_factor, 1.618,
+              "factor to next num_slots for FindOccupancy");
+DEFINE_uint32(find_iters, 10000, "number of samples for FindOccupancy");
+DEFINE_uint32(find_min_slots, 128, "number of slots for FindOccupancy");
+DEFINE_uint32(find_max_slots, 1000000, "number of slots for FindOccupancy");
+
+// Options for OptimizeHomogAtScale, which is more of a tool than a test.
+DEFINE_bool(optimize_homog, false,
+            "whether to run the OptimizeHomogAtScale tool");
+DEFINE_uint32(optimize_homog_slots, 30000000,
+              "number of slots for OptimizeHomogAtScale");
+DEFINE_uint32(optimize_homog_check, 200000,
+              "number of queries for checking FP rate in OptimizeHomogAtScale");
+DEFINE_double(
+    optimize_homog_granularity, 0.002,
+    "overhead change between FP rate checking in OptimizeHomogAtScale");
+
+#endif  // GFLAGS
+
+template <typename TypesAndSettings>
+class RibbonTypeParamTest : public ::testing::Test {};
+
+class RibbonTest : public ::testing::Test {};
+
+namespace {
+
+// Different ways of generating keys for testing
+
+// Generate semi-sequential keys
+struct StandardKeyGen {
+  StandardKeyGen(const std::string& prefix, uint64_t id)
+      : id_(id), str_(prefix) {
+    ROCKSDB_NAMESPACE::PutFixed64(&str_, /*placeholder*/ 0);
+  }
+
+  // Prefix (only one required)
+  StandardKeyGen& operator++() {
+    ++id_;
+    return *this;
+  }
+
+  StandardKeyGen& operator+=(uint64_t i) {
+    id_ += i;
+    return *this;
+  }
+
+  const std::string& operator*() {
+    // Use multiplication to mix things up a little in the key
+    ROCKSDB_NAMESPACE::EncodeFixed64(&str_[str_.size() - 8],
+                                     id_ * uint64_t{0x1500000001});
+    return str_;
+  }
+
+  bool operator==(const StandardKeyGen& other) {
+    // Same prefix is assumed
+    return id_ == other.id_;
+  }
+  bool operator!=(const StandardKeyGen& other) {
+    // Same prefix is assumed
+    return id_ != other.id_;
+  }
+
+  uint64_t id_;
+  std::string str_;
+};
+
+// Generate small sequential keys, that can misbehave with sequential seeds
+// as in https://github.com/Cyan4973/xxHash/issues/469.
+// These keys are only heuristically unique, but that's OK with 64 bits,
+// for testing purposes.
+struct SmallKeyGen {
+  SmallKeyGen(const std::string& prefix, uint64_t id) : id_(id) {
+    // Hash the prefix for a heuristically unique offset
+    id_ += ROCKSDB_NAMESPACE::GetSliceHash64(prefix);
+    ROCKSDB_NAMESPACE::PutFixed64(&str_, id_);
+  }
+
+  // Prefix (only one required)
+  SmallKeyGen& operator++() {
+    ++id_;
+    return *this;
+  }
+
+  SmallKeyGen& operator+=(uint64_t i) {
+    id_ += i;
+    return *this;
+  }
+
+  const std::string& operator*() {
+    ROCKSDB_NAMESPACE::EncodeFixed64(&str_[str_.size() - 8], id_);
+    return str_;
+  }
+
+  bool operator==(const SmallKeyGen& other) { return id_ == other.id_; }
+  bool operator!=(const SmallKeyGen& other) { return id_ != other.id_; }
+
+  uint64_t id_;
+  std::string str_;
+};
+
+template <typename KeyGen>
+struct Hash32KeyGenWrapper : public KeyGen {
+  Hash32KeyGenWrapper(const std::string& prefix, uint64_t id)
+      : KeyGen(prefix, id) {}
+  uint32_t operator*() {
+    auto& key = *static_cast<KeyGen&>(*this);
+    // unseeded
+    return ROCKSDB_NAMESPACE::GetSliceHash(key);
+  }
+};
+
+template <typename KeyGen>
+struct Hash64KeyGenWrapper : public KeyGen {
+  Hash64KeyGenWrapper(const std::string& prefix, uint64_t id)
+      : KeyGen(prefix, id) {}
+  uint64_t operator*() {
+    auto& key = *static_cast<KeyGen&>(*this);
+    // unseeded
+    return ROCKSDB_NAMESPACE::GetSliceHash64(key);
+  }
+};
+
+using ROCKSDB_NAMESPACE::ribbon::ConstructionFailureChance;
+
+const std::vector<ConstructionFailureChance> kFailureOnly50Pct = {
+    ROCKSDB_NAMESPACE::ribbon::kOneIn2};
+
+const std::vector<ConstructionFailureChance> kFailureOnlyRare = {
+    ROCKSDB_NAMESPACE::ribbon::kOneIn1000};
+
+const std::vector<ConstructionFailureChance> kFailureAll = {
+    ROCKSDB_NAMESPACE::ribbon::kOneIn2, ROCKSDB_NAMESPACE::ribbon::kOneIn20,
+    ROCKSDB_NAMESPACE::ribbon::kOneIn1000};
+
+}  // namespace
+
+using ROCKSDB_NAMESPACE::ribbon::ExpectedCollisionFpRate;
+using ROCKSDB_NAMESPACE::ribbon::StandardHasher;
+using ROCKSDB_NAMESPACE::ribbon::StandardRehasherAdapter;
+
+struct DefaultTypesAndSettings {
+  using CoeffRow = ROCKSDB_NAMESPACE::Unsigned128;
+  using ResultRow = uint8_t;
+  using Index = uint32_t;
+  using Hash = uint64_t;
+  using Seed = uint32_t;
+  using Key = ROCKSDB_NAMESPACE::Slice;
+  static constexpr bool kIsFilter = true;
+  static constexpr bool kHomogeneous = false;
+  static constexpr bool kFirstCoeffAlwaysOne = true;
+  static constexpr bool kUseSmash = false;
+  static constexpr bool kAllowZeroStarts = false;
+  static Hash HashFn(const Key& key, uint64_t raw_seed) {
+    // This version 0.7.2 preview of XXH3 (a.k.a. XXPH3) function does
+    // not pass SmallKeyGen tests below without some seed premixing from
+    // StandardHasher. See https://github.com/Cyan4973/xxHash/issues/469
+    return ROCKSDB_NAMESPACE::Hash64(key.data(), key.size(), raw_seed);
+  }
+  // For testing
+  using KeyGen = StandardKeyGen;
+  static const std::vector<ConstructionFailureChance>& FailureChanceToTest() {
+    return kFailureAll;
+  }
+};
+
+using TypesAndSettings_Coeff128 = DefaultTypesAndSettings;
+struct TypesAndSettings_Coeff128Smash : public DefaultTypesAndSettings {
+  static constexpr bool kUseSmash = true;
+};
+struct TypesAndSettings_Coeff64 : public DefaultTypesAndSettings {
+  using CoeffRow = uint64_t;
+};
+struct TypesAndSettings_Coeff64Smash : public TypesAndSettings_Coeff64 {
+  static constexpr bool kUseSmash = true;
+};
+struct TypesAndSettings_Coeff64Smash0 : public TypesAndSettings_Coeff64Smash {
+  static constexpr bool kFirstCoeffAlwaysOne = false;
+};
+
+// Homogeneous Ribbon configurations
+struct TypesAndSettings_Coeff128_Homog : public DefaultTypesAndSettings {
+  static constexpr bool kHomogeneous = true;
+  // Since our best construction success setting still has 1/1000 failure
+  // rate, the best FP rate we test is 1/256
+  using ResultRow = uint8_t;
+  // Homogeneous only makes sense with sufficient slots for equivalent of
+  // almost sure construction success
+  static const std::vector<ConstructionFailureChance>& FailureChanceToTest() {
+    return kFailureOnlyRare;
+  }
+};
+struct TypesAndSettings_Coeff128Smash_Homog
+    : public TypesAndSettings_Coeff128_Homog {
+  // Smash (extra time to save space) + Homog (extra space to save time)
+  // doesn't make much sense in practice, but we minimally test it
+  static constexpr bool kUseSmash = true;
+};
+struct TypesAndSettings_Coeff64_Homog : public TypesAndSettings_Coeff128_Homog {
+  using CoeffRow = uint64_t;
+};
+struct TypesAndSettings_Coeff64Smash_Homog
+    : public TypesAndSettings_Coeff64_Homog {
+  // Smash (extra time to save space) + Homog (extra space to save time)
+  // doesn't make much sense in practice, but we minimally test it
+  static constexpr bool kUseSmash = true;
+};
+
+// Less exhaustive mix of coverage, but still covering the most stressful case
+// (only 50% construction success)
+struct AbridgedTypesAndSettings : public DefaultTypesAndSettings {
+  static const std::vector<ConstructionFailureChance>& FailureChanceToTest() {
+    return kFailureOnly50Pct;
+  }
+};
+struct TypesAndSettings_Result16 : public AbridgedTypesAndSettings {
+  using ResultRow = uint16_t;
+};
+struct TypesAndSettings_Result32 : public AbridgedTypesAndSettings {
+  using ResultRow = uint32_t;
+};
+struct TypesAndSettings_IndexSizeT : public AbridgedTypesAndSettings {
+  using Index = size_t;
+};
+struct TypesAndSettings_Hash32 : public AbridgedTypesAndSettings {
+  using Hash = uint32_t;
+  static Hash HashFn(const Key& key, Hash raw_seed) {
+    // This MurmurHash1 function does not pass tests below without the
+    // seed premixing from StandardHasher. In fact, it needs more than
+    // just a multiplication mixer on the ordinal seed.
+    return ROCKSDB_NAMESPACE::Hash(key.data(), key.size(), raw_seed);
+  }
+};
+struct TypesAndSettings_Hash32_Result16 : public AbridgedTypesAndSettings {
+  using ResultRow = uint16_t;
+};
+struct TypesAndSettings_KeyString : public AbridgedTypesAndSettings {
+  using Key = std::string;
+};
+struct TypesAndSettings_Seed8 : public AbridgedTypesAndSettings {
+  // This is not a generally recommended configuration. With the configured
+  // hash function, it would fail with SmallKeyGen due to insufficient
+  // independence among the seeds.
+  using Seed = uint8_t;
+};
+struct TypesAndSettings_NoAlwaysOne : public AbridgedTypesAndSettings {
+  static constexpr bool kFirstCoeffAlwaysOne = false;
+};
+struct TypesAndSettings_AllowZeroStarts : public AbridgedTypesAndSettings {
+  static constexpr bool kAllowZeroStarts = true;
+};
+struct TypesAndSettings_Seed64 : public AbridgedTypesAndSettings {
+  using Seed = uint64_t;
+};
+struct TypesAndSettings_Rehasher
+    : public StandardRehasherAdapter<AbridgedTypesAndSettings> {
+  using KeyGen = Hash64KeyGenWrapper<StandardKeyGen>;
+};
+struct TypesAndSettings_Rehasher_Result16 : public TypesAndSettings_Rehasher {
+  using ResultRow = uint16_t;
+};
+struct TypesAndSettings_Rehasher_Result32 : public TypesAndSettings_Rehasher {
+  using ResultRow = uint32_t;
+};
+struct TypesAndSettings_Rehasher_Seed64
+    : public StandardRehasherAdapter<TypesAndSettings_Seed64> {
+  using KeyGen = Hash64KeyGenWrapper<StandardKeyGen>;
+  // Note: 64-bit seed with Rehasher gives slightly better average reseeds
+};
+struct TypesAndSettings_Rehasher32
+    : public StandardRehasherAdapter<TypesAndSettings_Hash32> {
+  using KeyGen = Hash32KeyGenWrapper<StandardKeyGen>;
+};
+struct TypesAndSettings_Rehasher32_Coeff64
+    : public TypesAndSettings_Rehasher32 {
+  using CoeffRow = uint64_t;
+};
+struct TypesAndSettings_SmallKeyGen : public AbridgedTypesAndSettings {
+  // SmallKeyGen stresses the independence of different hash seeds
+  using KeyGen = SmallKeyGen;
+};
+struct TypesAndSettings_Hash32_SmallKeyGen : public TypesAndSettings_Hash32 {
+  // SmallKeyGen stresses the independence of different hash seeds
+  using KeyGen = SmallKeyGen;
+};
+struct TypesAndSettings_Coeff32 : public DefaultTypesAndSettings {
+  using CoeffRow = uint32_t;
+};
+struct TypesAndSettings_Coeff32Smash : public TypesAndSettings_Coeff32 {
+  static constexpr bool kUseSmash = true;
+};
+struct TypesAndSettings_Coeff16 : public DefaultTypesAndSettings {
+  using CoeffRow = uint16_t;
+};
+struct TypesAndSettings_Coeff16Smash : public TypesAndSettings_Coeff16 {
+  static constexpr bool kUseSmash = true;
+};
+
+using TestTypesAndSettings = ::testing::Types<
+    TypesAndSettings_Coeff128, TypesAndSettings_Coeff128Smash,
+    TypesAndSettings_Coeff64, TypesAndSettings_Coeff64Smash,
+    TypesAndSettings_Coeff64Smash0, TypesAndSettings_Coeff128_Homog,
+    TypesAndSettings_Coeff128Smash_Homog, TypesAndSettings_Coeff64_Homog,
+    TypesAndSettings_Coeff64Smash_Homog, TypesAndSettings_Result16,
+    TypesAndSettings_Result32, TypesAndSettings_IndexSizeT,
+    TypesAndSettings_Hash32, TypesAndSettings_Hash32_Result16,
+    TypesAndSettings_KeyString, TypesAndSettings_Seed8,
+    TypesAndSettings_NoAlwaysOne, TypesAndSettings_AllowZeroStarts,
+    TypesAndSettings_Seed64, TypesAndSettings_Rehasher,
+    TypesAndSettings_Rehasher_Result16, TypesAndSettings_Rehasher_Result32,
+    TypesAndSettings_Rehasher_Seed64, TypesAndSettings_Rehasher32,
+    TypesAndSettings_Rehasher32_Coeff64, TypesAndSettings_SmallKeyGen,
+    TypesAndSettings_Hash32_SmallKeyGen, TypesAndSettings_Coeff32,
+    TypesAndSettings_Coeff32Smash, TypesAndSettings_Coeff16,
+    TypesAndSettings_Coeff16Smash>;
+TYPED_TEST_CASE(RibbonTypeParamTest, TestTypesAndSettings);
+
+namespace {
+
+// For testing Poisson-distributed (or similar) statistics, get value for
+// `stddevs_allowed` standard deviations above expected mean
+// `expected_count`.
+// (Poisson approximates Binomial only if probability of a trial being
+// in the count is low.)
+uint64_t PoissonUpperBound(double expected_count, double stddevs_allowed) {
+  return static_cast<uint64_t>(
+      expected_count + stddevs_allowed * std::sqrt(expected_count) + 1.0);
+}
+
+uint64_t PoissonLowerBound(double expected_count, double stddevs_allowed) {
+  return static_cast<uint64_t>(std::max(
+      0.0, expected_count - stddevs_allowed * std::sqrt(expected_count)));
+}
+
+uint64_t FrequentPoissonUpperBound(double expected_count) {
+  // Allow up to 5.0 standard deviations for frequently checked statistics
+  return PoissonUpperBound(expected_count, 5.0);
+}
+
+uint64_t FrequentPoissonLowerBound(double expected_count) {
+  return PoissonLowerBound(expected_count, 5.0);
+}
+
+uint64_t InfrequentPoissonUpperBound(double expected_count) {
+  // Allow up to 3 standard deviations for infrequently checked statistics
+  return PoissonUpperBound(expected_count, 3.0);
+}
+
+uint64_t InfrequentPoissonLowerBound(double expected_count) {
+  return PoissonLowerBound(expected_count, 3.0);
+}
+
+}  // namespace
+
+TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) {
+  IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam);
+  IMPORT_RIBBON_IMPL_TYPES(TypeParam);
+  using KeyGen = typename TypeParam::KeyGen;
+  using ConfigHelper =
+      ROCKSDB_NAMESPACE::ribbon::BandingConfigHelper<TypeParam>;
+
+  if (sizeof(CoeffRow) < 8) {
+    ROCKSDB_GTEST_BYPASS("Not fully supported");
+    return;
+  }
+
+  const auto log2_thoroughness =
+      static_cast<uint32_t>(ROCKSDB_NAMESPACE::FloorLog2(FLAGS_thoroughness));
+
+  // We are going to choose num_to_add using an exponential distribution,
+  // so that we have good representation of small-to-medium filters.
+  // Here we just pick some reasonable, practical upper bound based on
+  // kCoeffBits or option.
+  const double log_max_add = std::log(
+      FLAGS_max_add > 0 ? FLAGS_max_add
+                        : static_cast<uint32_t>(kCoeffBits * kCoeffBits) *
+                              std::max(FLAGS_thoroughness, uint32_t{32}));
+
+  // This needs to be enough below the minimum number of slots to get a
+  // reasonable number of samples with the minimum number of slots.
+  const double log_min_add = std::log(0.66 * SimpleSoln::RoundUpNumSlots(1));
+
+  ASSERT_GT(log_max_add, log_min_add);
+
+  const double diff_log_add = log_max_add - log_min_add;
+
+  for (ConstructionFailureChance cs : TypeParam::FailureChanceToTest()) {
+    double expected_reseeds;
+    switch (cs) {
+      default:
+        assert(false);
+        FALLTHROUGH_INTENDED;
+      case ROCKSDB_NAMESPACE::ribbon::kOneIn2:
+        fprintf(stderr, "== Failure: 50 percent\n");
+        expected_reseeds = 1.0;
+        break;
+      case ROCKSDB_NAMESPACE::ribbon::kOneIn20:
+        fprintf(stderr, "== Failure: 95 percent\n");
+        expected_reseeds = 0.053;
+        break;
+      case ROCKSDB_NAMESPACE::ribbon::kOneIn1000:
+        fprintf(stderr, "== Failure: 1/1000\n");
+        expected_reseeds = 0.001;
+        break;
+    }
+
+    uint64_t total_reseeds = 0;
+    uint64_t total_singles = 0;
+    uint64_t total_single_failures = 0;
+    uint64_t total_batch = 0;
+    uint64_t total_batch_successes = 0;
+    uint64_t total_fp_count = 0;
+    uint64_t total_added = 0;
+    uint64_t total_expand_trials = 0;
+    uint64_t total_expand_failures = 0;
+    double total_expand_overhead = 0.0;
+
+    uint64_t soln_query_nanos = 0;
+    uint64_t soln_query_count = 0;
+    uint64_t bloom_query_nanos = 0;
+    uint64_t isoln_query_nanos = 0;
+    uint64_t isoln_query_count = 0;
+
+    // Take different samples if you change thoroughness
+    ROCKSDB_NAMESPACE::Random32 rnd(FLAGS_thoroughness);
+
+    for (uint32_t i = 0; i < FLAGS_thoroughness; ++i) {
+      // We are going to choose num_to_add using an exponential distribution
+      // as noted above, but instead of randomly choosing them, we generate
+      // samples linearly using the golden ratio, which ensures a nice spread
+      // even for a small number of samples, and starting with the minimum
+      // number of slots to ensure it is tested.
+      double log_add =
+          std::fmod(0.6180339887498948482 * diff_log_add * i, diff_log_add) +
+          log_min_add;
+      uint32_t num_to_add = static_cast<uint32_t>(std::exp(log_add));
+
+      // Most of the time, test the Interleaved solution storage, but when
+      // we do we have to make num_slots a multiple of kCoeffBits. So
+      // sometimes we want to test without that limitation.
+      bool test_interleaved = (i % 7) != 6;
+
+      // Compute num_slots, and re-adjust num_to_add to get as close as possible
+      // to next num_slots, to stress that num_slots in terms of construction
+      // success. Ensure at least one iteration:
+      Index num_slots = Index{0} - 1;
+      --num_to_add;
+      for (;;) {
+        Index next_num_slots = SimpleSoln::RoundUpNumSlots(
+            ConfigHelper::GetNumSlots(num_to_add + 1, cs));
+        if (test_interleaved) {
+          next_num_slots = InterleavedSoln::RoundUpNumSlots(next_num_slots);
+          // assert idempotent
+          EXPECT_EQ(next_num_slots,
+                    InterleavedSoln::RoundUpNumSlots(next_num_slots));
+        }
+        // assert idempotent with InterleavedSoln::RoundUpNumSlots
+        EXPECT_EQ(next_num_slots, SimpleSoln::RoundUpNumSlots(next_num_slots));
+
+        if (next_num_slots > num_slots) {
+          break;
+        }
+        num_slots = next_num_slots;
+        ++num_to_add;
+      }
+      assert(num_slots < Index{0} - 1);
+
+      total_added += num_to_add;
+
+      std::string prefix;
+      ROCKSDB_NAMESPACE::PutFixed32(&prefix, rnd.Next());
+
+      // Batch that must be added
+      std::string added_str = prefix + "added";
+      KeyGen keys_begin(added_str, 0);
+      KeyGen keys_end(added_str, num_to_add);
+
+      // A couple more that will probably be added
+      KeyGen one_more(prefix + "more", 1);
+      KeyGen two_more(prefix + "more", 2);
+
+      // Batch that may or may not be added
+      uint32_t batch_size =
+          static_cast<uint32_t>(2.0 * std::sqrt(num_slots - num_to_add));
+      if (batch_size < 10U) {
+        batch_size = 0;
+      }
+      std::string batch_str = prefix + "batch";
+      KeyGen batch_begin(batch_str, 0);
+      KeyGen batch_end(batch_str, batch_size);
+
+      // Batch never (successfully) added, but used for querying FP rate
+      std::string not_str = prefix + "not";
+      KeyGen other_keys_begin(not_str, 0);
+      KeyGen other_keys_end(not_str, FLAGS_max_check);
+
+      double overhead_ratio = 1.0 * num_slots / num_to_add;
+      if (FLAGS_verbose) {
+        fprintf(stderr, "Adding(%s) %u / %u   Overhead: %g   Batch size: %u\n",
+                test_interleaved ? "i" : "s", (unsigned)num_to_add,
+                (unsigned)num_slots, overhead_ratio, (unsigned)batch_size);
+      }
+
+      // Vary bytes for InterleavedSoln to use number of solution columns
+      // from 0 to max allowed by ResultRow type (and used by SimpleSoln).
+      // Specifically include 0 and max, and otherwise skew toward max.
+      uint32_t max_ibytes =
+          static_cast<uint32_t>(sizeof(ResultRow) * num_slots);
+      size_t ibytes;
+      if (i == 0) {
+        ibytes = 0;
+      } else if (i == 1) {
+        ibytes = max_ibytes;
+      } else {
+        // Skewed
+        ibytes =
+            std::max(rnd.Uniformish(max_ibytes), rnd.Uniformish(max_ibytes));
+      }
+      std::unique_ptr<char[]> idata(new char[ibytes]);
+      InterleavedSoln isoln(idata.get(), ibytes);
+
+      SimpleSoln soln;
+      Hasher hasher;
+      bool first_single;
+      bool second_single;
+      bool batch_success;
+      {
+        Banding banding;
+        // Traditional solve for a fixed set.
+        ASSERT_TRUE(
+            banding.ResetAndFindSeedToSolve(num_slots, keys_begin, keys_end));
+
+        Index occupied_count = banding.GetOccupiedCount();
+        Index more_added = 0;
+
+        if (TypeParam::kHomogeneous || overhead_ratio < 1.01 ||
+            batch_size == 0) {
+          // Homogeneous not compatible with backtracking because add
+          // doesn't fail. Small overhead ratio too packed to expect more
+          first_single = false;
+          second_single = false;
+          batch_success = false;
+        } else {
+          // Now to test backtracking, starting with guaranteed fail. By using
+          // the keys that will be used to test FP rate, we are then doing an
+          // extra check that after backtracking there are no remnants (e.g. in
+          // result side of banding) of these entries.
+          KeyGen other_keys_too_big_end = other_keys_begin;
+          other_keys_too_big_end += num_to_add;
+          banding.EnsureBacktrackSize(std::max(num_to_add, batch_size));
+          EXPECT_FALSE(banding.AddRangeOrRollBack(other_keys_begin,
+                                                  other_keys_too_big_end));
+          EXPECT_EQ(occupied_count, banding.GetOccupiedCount());
+
+          // Check that we still have a good chance of adding a couple more
+          // individually
+          first_single = banding.Add(*one_more);
+          second_single = banding.Add(*two_more);
+          more_added += (first_single ? 1 : 0) + (second_single ? 1 : 0);
+          total_singles += 2U;
+          total_single_failures += 2U - more_added;
+
+          // Or as a batch
+          batch_success = banding.AddRangeOrRollBack(batch_begin, batch_end);
+          ++total_batch;
+          if (batch_success) {
+            more_added += batch_size;
+            ++total_batch_successes;
+          }
+          EXPECT_LE(banding.GetOccupiedCount(), occupied_count + more_added);
+        }
+
+        // Also verify that redundant adds are OK (no effect)
+        ASSERT_TRUE(
+            banding.AddRange(keys_begin, KeyGen(added_str, num_to_add / 8)));
+        EXPECT_LE(banding.GetOccupiedCount(), occupied_count + more_added);
+
+        // Now back-substitution
+        soln.BackSubstFrom(banding);
+        if (test_interleaved) {
+          isoln.BackSubstFrom(banding);
+        }
+
+        Seed reseeds = banding.GetOrdinalSeed();
+        total_reseeds += reseeds;
+
+        EXPECT_LE(reseeds, 8 + log2_thoroughness);
+        if (reseeds > log2_thoroughness + 1) {
+          fprintf(
+              stderr, "%s high reseeds at %u, %u/%u: %u\n",
+              reseeds > log2_thoroughness + 8 ? "ERROR Extremely" : "Somewhat",
+              static_cast<unsigned>(i), static_cast<unsigned>(num_to_add),
+              static_cast<unsigned>(num_slots), static_cast<unsigned>(reseeds));
+        }
+
+        if (reseeds > 0) {
+          // "Expand" test: given a failed construction, how likely is it to
+          // pass with same seed and more slots. At each step, we increase
+          // enough to ensure there is at least one shift within each coeff
+          // block.
+          ++total_expand_trials;
+          Index expand_count = 0;
+          Index ex_slots = num_slots;
+          banding.SetOrdinalSeed(0);
+          for (;; ++expand_count) {
+            ASSERT_LE(expand_count, log2_thoroughness);
+            ex_slots += ex_slots / kCoeffBits;
+            if (test_interleaved) {
+              ex_slots = InterleavedSoln::RoundUpNumSlots(ex_slots);
+            }
+            banding.Reset(ex_slots);
+            bool success = banding.AddRange(keys_begin, keys_end);
+            if (success) {
+              break;
+            }
+          }
+          total_expand_failures += expand_count;
+          total_expand_overhead += 1.0 * (ex_slots - num_slots) / num_slots;
+        }
+
+        hasher.SetOrdinalSeed(reseeds);
+      }
+      // soln and hasher now independent of Banding object
+
+      // Verify keys added
+      KeyGen cur = keys_begin;
+      while (cur != keys_end) {
+        ASSERT_TRUE(soln.FilterQuery(*cur, hasher));
+        ASSERT_TRUE(!test_interleaved || isoln.FilterQuery(*cur, hasher));
+        ++cur;
+      }
+      // We (maybe) snuck these in!
+      if (first_single) {
+        ASSERT_TRUE(soln.FilterQuery(*one_more, hasher));
+        ASSERT_TRUE(!test_interleaved || isoln.FilterQuery(*one_more, hasher));
+      }
+      if (second_single) {
+        ASSERT_TRUE(soln.FilterQuery(*two_more, hasher));
+        ASSERT_TRUE(!test_interleaved || isoln.FilterQuery(*two_more, hasher));
+      }
+      if (batch_success) {
+        cur = batch_begin;
+        while (cur != batch_end) {
+          ASSERT_TRUE(soln.FilterQuery(*cur, hasher));
+          ASSERT_TRUE(!test_interleaved || isoln.FilterQuery(*cur, hasher));
+          ++cur;
+        }
+      }
+
+      // Check FP rate (depends only on number of result bits == solution
+      // columns)
+      Index fp_count = 0;
+      cur = other_keys_begin;
+      {
+        ROCKSDB_NAMESPACE::StopWatchNano timer(
+            ROCKSDB_NAMESPACE::SystemClock::Default().get(), true);
+        while (cur != other_keys_end) {
+          bool fp = soln.FilterQuery(*cur, hasher);
+          fp_count += fp ? 1 : 0;
+          ++cur;
+        }
+        soln_query_nanos += timer.ElapsedNanos();
+        soln_query_count += FLAGS_max_check;
+      }
+      {
+        double expected_fp_count = soln.ExpectedFpRate() * FLAGS_max_check;
+        // For expected FP rate, also include false positives due to collisions
+        // in Hash value. (Negligible for 64-bit, can matter for 32-bit.)
+        double correction =
+            FLAGS_max_check * ExpectedCollisionFpRate(hasher, num_to_add);
+
+        // NOTE: rare violations expected with kHomogeneous
+        EXPECT_LE(fp_count,
+                  FrequentPoissonUpperBound(expected_fp_count + correction));
+        EXPECT_GE(fp_count,
+                  FrequentPoissonLowerBound(expected_fp_count + correction));
+      }
+      total_fp_count += fp_count;
+
+      // And also check FP rate for isoln
+      if (test_interleaved) {
+        Index ifp_count = 0;
+        cur = other_keys_begin;
+        ROCKSDB_NAMESPACE::StopWatchNano timer(
+            ROCKSDB_NAMESPACE::SystemClock::Default().get(), true);
+        while (cur != other_keys_end) {
+          ifp_count += isoln.FilterQuery(*cur, hasher) ? 1 : 0;
+          ++cur;
+        }
+        isoln_query_nanos += timer.ElapsedNanos();
+        isoln_query_count += FLAGS_max_check;
+        {
+          double expected_fp_count = isoln.ExpectedFpRate() * FLAGS_max_check;
+          // For expected FP rate, also include false positives due to
+          // collisions in Hash value. (Negligible for 64-bit, can matter for
+          // 32-bit.)
+          double correction =
+              FLAGS_max_check * ExpectedCollisionFpRate(hasher, num_to_add);
+
+          // NOTE: rare violations expected with kHomogeneous
+          EXPECT_LE(ifp_count,
+                    FrequentPoissonUpperBound(expected_fp_count + correction));
+
+          // FIXME: why sometimes can we slightly "beat the odds"?
+          // (0.95 factor should not be needed)
+          EXPECT_GE(ifp_count, FrequentPoissonLowerBound(
+                                   0.95 * expected_fp_count + correction));
+        }
+        // Since the bits used in isoln are a subset of the bits used in soln,
+        // it cannot have fewer FPs
+        EXPECT_GE(ifp_count, fp_count);
+      }
+
+      // And compare to Bloom time, for fun
+      if (ibytes >= /* minimum Bloom impl bytes*/ 64) {
+        Index bfp_count = 0;
+        cur = other_keys_begin;
+        ROCKSDB_NAMESPACE::StopWatchNano timer(
+            ROCKSDB_NAMESPACE::SystemClock::Default().get(), true);
+        while (cur != other_keys_end) {
+          uint64_t h = hasher.GetHash(*cur);
+          uint32_t h1 = ROCKSDB_NAMESPACE::Lower32of64(h);
+          uint32_t h2 = sizeof(Hash) >= 8 ? ROCKSDB_NAMESPACE::Upper32of64(h)
+                                          : h1 * 0x9e3779b9;
+          bfp_count +=
+              ROCKSDB_NAMESPACE::FastLocalBloomImpl::HashMayMatch(
+                  h1, h2, static_cast<uint32_t>(ibytes), 6, idata.get())
+                  ? 1
+                  : 0;
+          ++cur;
+        }
+        bloom_query_nanos += timer.ElapsedNanos();
+        // ensure bfp_count is used
+        ASSERT_LT(bfp_count, FLAGS_max_check);
+      }
+    }
+
+    // "outside" == key not in original set so either negative or false positive
+    fprintf(stderr,
+            "Simple      outside query, hot, incl hashing, ns/key: %g\n",
+            1.0 * soln_query_nanos / soln_query_count);
+    fprintf(stderr,
+            "Interleaved outside query, hot, incl hashing, ns/key: %g\n",
+            1.0 * isoln_query_nanos / isoln_query_count);
+    fprintf(stderr,
+            "Bloom       outside query, hot, incl hashing, ns/key: %g\n",
+            1.0 * bloom_query_nanos / soln_query_count);
+
+    if (TypeParam::kHomogeneous) {
+      EXPECT_EQ(total_reseeds, 0U);
+    } else {
+      double average_reseeds = 1.0 * total_reseeds / FLAGS_thoroughness;
+      fprintf(stderr, "Average re-seeds: %g\n", average_reseeds);
+      // Values above were chosen to target around 50% chance of encoding
+      // success rate (average of 1.0 re-seeds) or slightly better. But 1.15 is
+      // also close enough.
+      EXPECT_LE(total_reseeds,
+                InfrequentPoissonUpperBound(1.15 * expected_reseeds *
+                                            FLAGS_thoroughness));
+      // Would use 0.85 here instead of 0.75, but
+      // TypesAndSettings_Hash32_SmallKeyGen can "beat the odds" because of
+      // sequential keys with a small, cheap hash function. We accept that
+      // there are surely inputs that are somewhat bad for this setup, but
+      // these somewhat good inputs are probably more likely.
+      EXPECT_GE(total_reseeds,
+                InfrequentPoissonLowerBound(0.75 * expected_reseeds *
+                                            FLAGS_thoroughness));
+    }
+
+    if (total_expand_trials > 0) {
+      double average_expand_failures =
+          1.0 * total_expand_failures / total_expand_trials;
+      fprintf(stderr, "Average expand failures, and overhead: %g, %g\n",
+              average_expand_failures,
+              total_expand_overhead / total_expand_trials);
+      // Seems to be a generous allowance
+      EXPECT_LE(total_expand_failures,
+                InfrequentPoissonUpperBound(1.0 * total_expand_trials));
+    } else {
+      fprintf(stderr, "Average expand failures: N/A\n");
+    }
+
+    if (total_singles > 0) {
+      double single_failure_rate = 1.0 * total_single_failures / total_singles;
+      fprintf(stderr, "Add'l single, failure rate: %g\n", single_failure_rate);
+      // A rough bound (one sided) based on nothing in particular
+      double expected_single_failures = 1.0 * total_singles /
+                                        (sizeof(CoeffRow) == 16 ? 128
+                                         : TypeParam::kUseSmash ? 64
+                                                                : 32);
+      EXPECT_LE(total_single_failures,
+                InfrequentPoissonUpperBound(expected_single_failures));
+    }
+
+    if (total_batch > 0) {
+      // Counting successes here for Poisson to approximate the Binomial
+      // distribution.
+      // A rough bound (one sided) based on nothing in particular.
+      double expected_batch_successes = 1.0 * total_batch / 2;
+      uint64_t lower_bound =
+          InfrequentPoissonLowerBound(expected_batch_successes);
+      fprintf(stderr, "Add'l batch, success rate: %g (>= %g)\n",
+              1.0 * total_batch_successes / total_batch,
+              1.0 * lower_bound / total_batch);
+      EXPECT_GE(total_batch_successes, lower_bound);
+    }
+
+    {
+      uint64_t total_checked = uint64_t{FLAGS_max_check} * FLAGS_thoroughness;
+      double expected_total_fp_count =
+          total_checked * std::pow(0.5, 8U * sizeof(ResultRow));
+      // For expected FP rate, also include false positives due to collisions
+      // in Hash value. (Negligible for 64-bit, can matter for 32-bit.)
+      double average_added = 1.0 * total_added / FLAGS_thoroughness;
+      expected_total_fp_count +=
+          total_checked * ExpectedCollisionFpRate(Hasher(), average_added);
+
+      uint64_t upper_bound =
+          InfrequentPoissonUpperBound(expected_total_fp_count);
+      uint64_t lower_bound =
+          InfrequentPoissonLowerBound(expected_total_fp_count);
+      fprintf(stderr, "Average FP rate: %g (~= %g, <= %g, >= %g)\n",
+              1.0 * total_fp_count / total_checked,
+              expected_total_fp_count / total_checked,
+              1.0 * upper_bound / total_checked,
+              1.0 * lower_bound / total_checked);
+      EXPECT_LE(total_fp_count, upper_bound);
+      EXPECT_GE(total_fp_count, lower_bound);
+    }
+  }
+}
+
+TYPED_TEST(RibbonTypeParamTest, Extremes) {
+  IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam);
+  IMPORT_RIBBON_IMPL_TYPES(TypeParam);
+  using KeyGen = typename TypeParam::KeyGen;
+
+  size_t bytes = 128 * 1024;
+  std::unique_ptr<char[]> buf(new char[bytes]);
+  InterleavedSoln isoln(buf.get(), bytes);
+  SimpleSoln soln;
+  Hasher hasher;
+  Banding banding;
+
+  // ########################################
+  // Add zero keys to minimal number of slots
+  KeyGen begin_and_end("foo", 123);
+  ASSERT_TRUE(banding.ResetAndFindSeedToSolve(
+      /*slots*/ kCoeffBits, begin_and_end, begin_and_end, /*first seed*/ 0,
+      /* seed mask*/ 0));
+
+  soln.BackSubstFrom(banding);
+  isoln.BackSubstFrom(banding);
+
+  // Because there's plenty of memory, we expect the interleaved solution to
+  // use maximum supported columns (same as simple solution)
+  ASSERT_EQ(isoln.GetUpperNumColumns(), 8U * sizeof(ResultRow));
+  ASSERT_EQ(isoln.GetUpperStartBlock(), 0U);
+
+  // Somewhat oddly, we expect same FP rate as if we had essentially filled
+  // up the slots.
+  KeyGen other_keys_begin("not", 0);
+  KeyGen other_keys_end("not", FLAGS_max_check);
+
+  Index fp_count = 0;
+  KeyGen cur = other_keys_begin;
+  while (cur != other_keys_end) {
+    bool isoln_query_result = isoln.FilterQuery(*cur, hasher);
+    bool soln_query_result = soln.FilterQuery(*cur, hasher);
+    // Solutions are equivalent
+    ASSERT_EQ(isoln_query_result, soln_query_result);
+    if (!TypeParam::kHomogeneous) {
+      // And in fact we only expect an FP when ResultRow is 0
+      // (except Homogeneous)
+      ASSERT_EQ(soln_query_result, hasher.GetResultRowFromHash(
+                                       hasher.GetHash(*cur)) == ResultRow{0});
+    }
+    fp_count += soln_query_result ? 1 : 0;
+    ++cur;
+  }
+  {
+    ASSERT_EQ(isoln.ExpectedFpRate(), soln.ExpectedFpRate());
+    double expected_fp_count = isoln.ExpectedFpRate() * FLAGS_max_check;
+    EXPECT_LE(fp_count, InfrequentPoissonUpperBound(expected_fp_count));
+    if (TypeParam::kHomogeneous) {
+      // Pseudorandom garbage in Homogeneous filter can "beat the odds" if
+      // nothing added
+    } else {
+      EXPECT_GE(fp_count, InfrequentPoissonLowerBound(expected_fp_count));
+    }
+  }
+
+  // ######################################################
+  // Use zero bytes for interleaved solution (key(s) added)
+
+  // Add one key
+  KeyGen key_begin("added", 0);
+  KeyGen key_end("added", 1);
+  ASSERT_TRUE(banding.ResetAndFindSeedToSolve(
+      /*slots*/ kCoeffBits, key_begin, key_end, /*first seed*/ 0,
+      /* seed mask*/ 0));
+
+  InterleavedSoln isoln2(nullptr, /*bytes*/ 0);
+
+  isoln2.BackSubstFrom(banding);
+
+  ASSERT_EQ(isoln2.GetUpperNumColumns(), 0U);
+  ASSERT_EQ(isoln2.GetUpperStartBlock(), 0U);
+
+  // All queries return true
+  ASSERT_TRUE(isoln2.FilterQuery(*other_keys_begin, hasher));
+  ASSERT_EQ(isoln2.ExpectedFpRate(), 1.0);
+}
+
+TEST(RibbonTest, AllowZeroStarts) {
+  IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings_AllowZeroStarts);
+  IMPORT_RIBBON_IMPL_TYPES(TypesAndSettings_AllowZeroStarts);
+  using KeyGen = StandardKeyGen;
+
+  InterleavedSoln isoln(nullptr, /*bytes*/ 0);
+  SimpleSoln soln;
+  Hasher hasher;
+  Banding banding;
+
+  KeyGen begin("foo", 0);
+  KeyGen end("foo", 1);
+  // Can't add 1 entry
+  ASSERT_FALSE(banding.ResetAndFindSeedToSolve(/*slots*/ 0, begin, end));
+
+  KeyGen begin_and_end("foo", 123);
+  // Can add 0 entries
+  ASSERT_TRUE(banding.ResetAndFindSeedToSolve(/*slots*/ 0, begin_and_end,
+                                              begin_and_end));
+
+  Seed reseeds = banding.GetOrdinalSeed();
+  ASSERT_EQ(reseeds, 0U);
+  hasher.SetOrdinalSeed(reseeds);
+
+  // Can construct 0-slot solutions
+  isoln.BackSubstFrom(banding);
+  soln.BackSubstFrom(banding);
+
+  // Should always return false
+  ASSERT_FALSE(isoln.FilterQuery(*begin, hasher));
+  ASSERT_FALSE(soln.FilterQuery(*begin, hasher));
+
+  // And report that in FP rate
+  ASSERT_EQ(isoln.ExpectedFpRate(), 0.0);
+  ASSERT_EQ(soln.ExpectedFpRate(), 0.0);
+}
+
+TEST(RibbonTest, RawAndOrdinalSeeds) {
+  StandardHasher<TypesAndSettings_Seed64> hasher64;
+  StandardHasher<DefaultTypesAndSettings> hasher64_32;
+  StandardHasher<TypesAndSettings_Hash32> hasher32;
+  StandardHasher<TypesAndSettings_Seed8> hasher8;
+
+  for (uint32_t limit : {0xffU, 0xffffU}) {
+    std::vector<bool> seen(limit + 1);
+    for (uint32_t i = 0; i < limit; ++i) {
+      hasher64.SetOrdinalSeed(i);
+      auto raw64 = hasher64.GetRawSeed();
+      hasher32.SetOrdinalSeed(i);
+      auto raw32 = hasher32.GetRawSeed();
+      hasher8.SetOrdinalSeed(static_cast<uint8_t>(i));
+      auto raw8 = hasher8.GetRawSeed();
+      {
+        hasher64_32.SetOrdinalSeed(i);
+        auto raw64_32 = hasher64_32.GetRawSeed();
+        ASSERT_EQ(raw64_32, raw32);  // Same size seed
+      }
+      if (i == 0) {
+        // Documented that ordinal seed 0 == raw seed 0
+        ASSERT_EQ(raw64, 0U);
+        ASSERT_EQ(raw32, 0U);
+        ASSERT_EQ(raw8, 0U);
+      } else {
+        // Extremely likely that upper bits are set
+        ASSERT_GT(raw64, raw32);
+        ASSERT_GT(raw32, raw8);
+      }
+      // Hashers agree on lower bits
+      ASSERT_EQ(static_cast<uint32_t>(raw64), raw32);
+      ASSERT_EQ(static_cast<uint8_t>(raw32), raw8);
+
+      // The translation is one-to-one for this size prefix
+      uint32_t v = static_cast<uint32_t>(raw32 & limit);
+      ASSERT_EQ(raw64 & limit, v);
+      ASSERT_FALSE(seen[v]);
+      seen[v] = true;
+    }
+  }
+}
+
+namespace {
+
+struct PhsfInputGen {
+  PhsfInputGen(const std::string& prefix, uint64_t id) : id_(id) {
+    val_.first = prefix;
+    ROCKSDB_NAMESPACE::PutFixed64(&val_.first, /*placeholder*/ 0);
+  }
+
+  // Prefix (only one required)
+  PhsfInputGen& operator++() {
+    ++id_;
+    return *this;
+  }
+
+  const std::pair<std::string, uint8_t>& operator*() {
+    // Use multiplication to mix things up a little in the key
+    ROCKSDB_NAMESPACE::EncodeFixed64(&val_.first[val_.first.size() - 8],
+                                     id_ * uint64_t{0x1500000001});
+    // Occasionally repeat values etc.
+    val_.second = static_cast<uint8_t>(id_ * 7 / 8);
+    return val_;
+  }
+
+  const std::pair<std::string, uint8_t>* operator->() { return &**this; }
+
+  bool operator==(const PhsfInputGen& other) {
+    // Same prefix is assumed
+    return id_ == other.id_;
+  }
+  bool operator!=(const PhsfInputGen& other) {
+    // Same prefix is assumed
+    return id_ != other.id_;
+  }
+
+  uint64_t id_;
+  std::pair<std::string, uint8_t> val_;
+};
+
+struct PhsfTypesAndSettings : public DefaultTypesAndSettings {
+  static constexpr bool kIsFilter = false;
+};
+}  // namespace
+
+TEST(RibbonTest, PhsfBasic) {
+  IMPORT_RIBBON_TYPES_AND_SETTINGS(PhsfTypesAndSettings);
+  IMPORT_RIBBON_IMPL_TYPES(PhsfTypesAndSettings);
+
+  Index num_slots = 12800;
+  Index num_to_add = static_cast<Index>(num_slots / 1.02);
+
+  PhsfInputGen begin("in", 0);
+  PhsfInputGen end("in", num_to_add);
+
+  std::unique_ptr<char[]> idata(new char[/*bytes*/ num_slots]);
+  InterleavedSoln isoln(idata.get(), /*bytes*/ num_slots);
+  SimpleSoln soln;
+  Hasher hasher;
+
+  {
+    Banding banding;
+    ASSERT_TRUE(banding.ResetAndFindSeedToSolve(num_slots, begin, end));
+
+    soln.BackSubstFrom(banding);
+    isoln.BackSubstFrom(banding);
+
+    hasher.SetOrdinalSeed(banding.GetOrdinalSeed());
+  }
+
+  for (PhsfInputGen cur = begin; cur != end; ++cur) {
+    ASSERT_EQ(cur->second, soln.PhsfQuery(cur->first, hasher));
+    ASSERT_EQ(cur->second, isoln.PhsfQuery(cur->first, hasher));
+  }
+}
+
+// Not a real test, but a tool used to build APIs in ribbon_config.h
+TYPED_TEST(RibbonTypeParamTest, FindOccupancy) {
+  IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam);
+  IMPORT_RIBBON_IMPL_TYPES(TypeParam);
+  using KeyGen = typename TypeParam::KeyGen;
+
+  if (!FLAGS_find_occ) {
+    ROCKSDB_GTEST_BYPASS("Tool disabled during unit test runs");
+    return;
+  }
+
+  KeyGen cur(std::to_string(testing::UnitTest::GetInstance()->random_seed()),
+             0);
+
+  Banding banding;
+  Index num_slots = InterleavedSoln::RoundUpNumSlots(FLAGS_find_min_slots);
+  Index max_slots = InterleavedSoln::RoundUpNumSlots(FLAGS_find_max_slots);
+  while (num_slots <= max_slots) {
+    std::map<int32_t, uint32_t> rem_histogram;
+    std::map<Index, uint32_t> slot_histogram;
+    if (FLAGS_find_slot_occ) {
+      for (Index i = 0; i < kCoeffBits; ++i) {
+        slot_histogram[i] = 0;
+        slot_histogram[num_slots - 1 - i] = 0;
+        slot_histogram[num_slots / 2 - kCoeffBits / 2 + i] = 0;
+      }
+    }
+    uint64_t total_added = 0;
+    for (uint32_t i = 0; i < FLAGS_find_iters; ++i) {
+      banding.Reset(num_slots);
+      uint32_t j = 0;
+      KeyGen end = cur;
+      end += num_slots + num_slots / 10;
+      for (; cur != end; ++cur) {
+        if (banding.Add(*cur)) {
+          ++j;
+        } else {
+          break;
+        }
+      }
+      total_added += j;
+      for (auto& slot : slot_histogram) {
+        slot.second += banding.IsOccupied(slot.first);
+      }
+
+      int32_t bucket =
+          static_cast<int32_t>(num_slots) - static_cast<int32_t>(j);
+      rem_histogram[bucket]++;
+      if (FLAGS_verbose) {
+        fprintf(stderr, "num_slots: %u i: %u / %u avg_overhead: %g\r",
+                static_cast<unsigned>(num_slots), static_cast<unsigned>(i),
+                static_cast<unsigned>(FLAGS_find_iters),
+                1.0 * (i + 1) * num_slots / total_added);
+      }
+    }
+    if (FLAGS_verbose) {
+      fprintf(stderr, "\n");
+    }
+
+    uint32_t cumulative = 0;
+
+    double p50_rem = 0;
+    double p95_rem = 0;
+    double p99_9_rem = 0;
+
+    for (auto& h : rem_histogram) {
+      double before = 1.0 * cumulative / FLAGS_find_iters;
+      double not_after = 1.0 * (cumulative + h.second) / FLAGS_find_iters;
+      if (FLAGS_verbose) {
+        fprintf(stderr, "overhead: %g before: %g not_after: %g\n",
+                1.0 * num_slots / (num_slots - h.first), before, not_after);
+      }
+      cumulative += h.second;
+      if (before < 0.5 && 0.5 <= not_after) {
+        // fake it with linear interpolation
+        double portion = (0.5 - before) / (not_after - before);
+        p50_rem = h.first + portion;
+      } else if (before < 0.95 && 0.95 <= not_after) {
+        // fake it with linear interpolation
+        double portion = (0.95 - before) / (not_after - before);
+        p95_rem = h.first + portion;
+      } else if (before < 0.999 && 0.999 <= not_after) {
+        // fake it with linear interpolation
+        double portion = (0.999 - before) / (not_after - before);
+        p99_9_rem = h.first + portion;
+      }
+    }
+    for (auto& slot : slot_histogram) {
+      fprintf(stderr, "slot[%u] occupied: %g\n", (unsigned)slot.first,
+              1.0 * slot.second / FLAGS_find_iters);
+    }
+
+    double mean_rem =
+        (1.0 * FLAGS_find_iters * num_slots - total_added) / FLAGS_find_iters;
+    fprintf(
+        stderr,
+        "num_slots: %u iters: %u mean_ovr: %g p50_ovr: %g p95_ovr: %g "
+        "p99.9_ovr: %g mean_rem: %g p50_rem: %g p95_rem: %g p99.9_rem: %g\n",
+        static_cast<unsigned>(num_slots),
+        static_cast<unsigned>(FLAGS_find_iters),
+        1.0 * num_slots / (num_slots - mean_rem),
+        1.0 * num_slots / (num_slots - p50_rem),
+        1.0 * num_slots / (num_slots - p95_rem),
+        1.0 * num_slots / (num_slots - p99_9_rem), mean_rem, p50_rem, p95_rem,
+        p99_9_rem);
+
+    num_slots = std::max(
+        num_slots + 1, static_cast<Index>(num_slots * FLAGS_find_next_factor));
+    num_slots = InterleavedSoln::RoundUpNumSlots(num_slots);
+  }
+}
+
+// Not a real test, but a tool to understand Homogeneous Ribbon
+// behavior (TODO: configuration APIs & tests)
+TYPED_TEST(RibbonTypeParamTest, OptimizeHomogAtScale) {
+  IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam);
+  IMPORT_RIBBON_IMPL_TYPES(TypeParam);
+  using KeyGen = typename TypeParam::KeyGen;
+
+  if (!FLAGS_optimize_homog) {
+    ROCKSDB_GTEST_BYPASS("Tool disabled during unit test runs");
+    return;
+  }
+
+  if (!TypeParam::kHomogeneous) {
+    ROCKSDB_GTEST_BYPASS("Only for Homogeneous Ribbon");
+    return;
+  }
+
+  KeyGen cur(std::to_string(testing::UnitTest::GetInstance()->random_seed()),
+             0);
+
+  Banding banding;
+  Index num_slots = SimpleSoln::RoundUpNumSlots(FLAGS_optimize_homog_slots);
+  banding.Reset(num_slots);
+
+  // This and "band_ovr" is the "allocated overhead", or slots over added.
+  // It does not take into account FP rates.
+  double target_overhead = 1.20;
+  uint32_t num_added = 0;
+
+  do {
+    do {
+      (void)banding.Add(*cur);
+      ++cur;
+      ++num_added;
+    } while (1.0 * num_slots / num_added > target_overhead);
+
+    SimpleSoln soln;
+    soln.BackSubstFrom(banding);
+
+    std::array<uint32_t, 8U * sizeof(ResultRow)> fp_counts_by_cols;
+    fp_counts_by_cols.fill(0U);
+    for (uint32_t i = 0; i < FLAGS_optimize_homog_check; ++i) {
+      ResultRow r = soln.PhsfQuery(*cur, banding);
+      ++cur;
+      for (size_t j = 0; j < fp_counts_by_cols.size(); ++j) {
+        if ((r & 1) == 1) {
+          break;
+        }
+        fp_counts_by_cols[j]++;
+        r /= 2;
+      }
+    }
+    fprintf(stderr, "band_ovr: %g ", 1.0 * num_slots / num_added);
+    for (unsigned j = 0; j < fp_counts_by_cols.size(); ++j) {
+      double inv_fp_rate =
+          1.0 * FLAGS_optimize_homog_check / fp_counts_by_cols[j];
+      double equiv_cols = std::log(inv_fp_rate) * 1.4426950409;
+      // Overhead vs. information-theoretic minimum based on observed
+      // FP rate (subject to sampling error, especially for low FP rates)
+      double actual_overhead =
+          1.0 * (j + 1) * num_slots / (equiv_cols * num_added);
+      fprintf(stderr, "ovr_%u: %g ", j + 1, actual_overhead);
+    }
+    fprintf(stderr, "\n");
+    target_overhead -= FLAGS_optimize_homog_granularity;
+  } while (target_overhead > 1.0);
+}
+
+int main(int argc, char** argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+#ifdef GFLAGS
+  ParseCommandLineFlags(&argc, &argv, true);
+#endif  // GFLAGS
+  return RUN_ALL_TESTS();
+}
diff --git a/src/rocksdb/util/set_comparator.h b/src/rocksdb/util/set_comparator.h
new file mode 100644
index 000000000..e0e64436a
--- /dev/null
+++ b/src/rocksdb/util/set_comparator.h
@@ -0,0 +1,24 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "rocksdb/comparator.h"
+
+namespace ROCKSDB_NAMESPACE {
+// A comparator to be used in std::set
+struct SetComparator {
+  explicit SetComparator() : user_comparator_(BytewiseComparator()) {}
+  explicit SetComparator(const Comparator* user_comparator)
+      : user_comparator_(user_comparator ? user_comparator
+                                         : BytewiseComparator()) {}
+  bool operator()(const Slice& lhs, const Slice& rhs) const {
+    return user_comparator_->Compare(lhs, rhs) < 0;
+  }
+
+ private:
+  const Comparator* user_comparator_;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/single_thread_executor.h b/src/rocksdb/util/single_thread_executor.h
new file mode 100644
index 000000000..c69f2a292
--- /dev/null
+++ b/src/rocksdb/util/single_thread_executor.h
@@ -0,0 +1,56 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+
+#if USE_COROUTINES
+#include <atomic>
+
+#include "folly/CPortability.h"
+#include "folly/CppAttributes.h"
+#include "folly/Executor.h"
+#include "util/async_file_reader.h"
+
+namespace ROCKSDB_NAMESPACE {
+// Implements a simple executor that runs callback functions in the same
+// thread, unlike CPUThreadExecutor which may schedule the callback on
+// another thread. Runs in a tight loop calling the queued callbacks,
+// and polls for async IO completions when idle. The completions will
+// resume suspended coroutines and they get added to the queue, which
+// will get picked up by this loop.
+// Any possibility of deadlock is precluded because the file system
+// guarantees that async IO completion callbacks will not be scheduled
+// to run in this thread or this executor.
+class SingleThreadExecutor : public folly::Executor {
+ public:
+  explicit SingleThreadExecutor(AsyncFileReader& reader)
+      : reader_(reader), busy_(false) {}
+
+  void add(folly::Func callback) override {
+    auto& q = q_;
+    q.push(std::move(callback));
+    if (q.size() == 1 && !busy_) {
+      while (!q.empty()) {
+        q.front()();
+        q.pop();
+
+        if (q.empty()) {
+          // Prevent recursion, as the Wait may queue resumed coroutines
+          busy_ = true;
+          reader_.Wait();
+          busy_ = false;
+        }
+      }
+    }
+  }
+
+ private:
+  std::queue<folly::Func> q_;
+  AsyncFileReader& reader_;
+  bool busy_;
+};
+}  // namespace ROCKSDB_NAMESPACE
+#endif  // USE_COROUTINES
diff --git a/src/rocksdb/util/slice.cc b/src/rocksdb/util/slice.cc
new file mode 100644
index 000000000..1fa21afcb
--- /dev/null
+++ b/src/rocksdb/util/slice.cc
@@ -0,0 +1,405 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "rocksdb/slice.h"
+
+#include <stdio.h>
+
+#include <algorithm>
+
+#include "rocksdb/convenience.h"
+#include "rocksdb/slice_transform.h"
+#include "rocksdb/utilities/object_registry.h"
+#include "rocksdb/utilities/options_type.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+
+class FixedPrefixTransform : public SliceTransform {
+ private:
+  size_t prefix_len_;
+  std::string id_;
+
+ public:
+  explicit FixedPrefixTransform(size_t prefix_len) : prefix_len_(prefix_len) {
+    id_ = std::string(kClassName()) + "." + std::to_string(prefix_len_);
+  }
+
+  static const char* kClassName() { return "rocksdb.FixedPrefix"; }
+  static const char* kNickName() { return "fixed"; }
+  const char* Name() const override { return kClassName(); }
+  const char* NickName() const override { return kNickName(); }
+
+  bool IsInstanceOf(const std::string& name) const override {
+    if (name == id_) {
+      return true;
+    } else if (StartsWith(name, kNickName())) {
+      std::string alt_id =
+          std::string(kNickName()) + ":" + std::to_string(prefix_len_);
+      if (name == alt_id) {
+        return true;
+      }
+    }
+    return SliceTransform::IsInstanceOf(name);
+  }
+
+  std::string GetId() const override { return id_; }
+
+  Slice Transform(const Slice& src) const override {
+    assert(InDomain(src));
+    return Slice(src.data(), prefix_len_);
+  }
+
+  bool InDomain(const Slice& src) const override {
+    return (src.size() >= prefix_len_);
+  }
+
+  bool InRange(const Slice& dst) const override {
+    return (dst.size() == prefix_len_);
+  }
+
+  bool FullLengthEnabled(size_t* len) const override {
+    *len = prefix_len_;
+    return true;
+  }
+
+  bool SameResultWhenAppended(const Slice& prefix) const override {
+    return InDomain(prefix);
+  }
+};
+
+class CappedPrefixTransform : public SliceTransform {
+ private:
+  size_t cap_len_;
+  std::string id_;
+
+ public:
+  explicit CappedPrefixTransform(size_t cap_len) : cap_len_(cap_len) {
+    id_ = std::string(kClassName()) + "." + std::to_string(cap_len_);
+  }
+
+  static const char* kClassName() { return "rocksdb.CappedPrefix"; }
+  static const char* kNickName() { return "capped"; }
+  const char* Name() const override { return kClassName(); }
+  const char* NickName() const override { return kNickName(); }
+  std::string GetId() const override { return id_; }
+
+  bool IsInstanceOf(const std::string& name) const override {
+    if (name == id_) {
+      return true;
+    } else if (StartsWith(name, kNickName())) {
+      std::string alt_id =
+          std::string(kNickName()) + ":" + std::to_string(cap_len_);
+      if (name == alt_id) {
+        return true;
+      }
+    }
+    return SliceTransform::IsInstanceOf(name);
+  }
+
+  Slice Transform(const Slice& src) const override {
+    assert(InDomain(src));
+    return Slice(src.data(), std::min(cap_len_, src.size()));
+  }
+
+  bool InDomain(const Slice& /*src*/) const override { return true; }
+
+  bool InRange(const Slice& dst) const override {
+    return (dst.size() <= cap_len_);
+  }
+
+  bool FullLengthEnabled(size_t* len) const override {
+    *len = cap_len_;
+    return true;
+  }
+
+  bool SameResultWhenAppended(const Slice& prefix) const override {
+    return prefix.size() >= cap_len_;
+  }
+};
+
+class NoopTransform : public SliceTransform {
+ public:
+  explicit NoopTransform() {}
+
+  static const char* kClassName() { return "rocksdb.Noop"; }
+  const char* Name() const override { return kClassName(); }
+
+  Slice Transform(const Slice& src) const override { return src; }
+
+  bool InDomain(const Slice& /*src*/) const override { return true; }
+
+  bool InRange(const Slice& /*dst*/) const override { return true; }
+
+  bool SameResultWhenAppended(const Slice& /*prefix*/) const override {
+    return false;
+  }
+};
+
+}  // end namespace
+
+const SliceTransform* NewFixedPrefixTransform(size_t prefix_len) {
+  return new FixedPrefixTransform(prefix_len);
+}
+
+const SliceTransform* NewCappedPrefixTransform(size_t cap_len) {
+  return new CappedPrefixTransform(cap_len);
+}
+
+const SliceTransform* NewNoopTransform() { return new NoopTransform; }
+
+#ifndef ROCKSDB_LITE
+static int RegisterBuiltinSliceTransform(ObjectLibrary& library,
+                                         const std::string& /*arg*/) {
+  // For the builtin transforms, the format is typically
+  // [Name].[0-9]+ or [NickName]:[0-9]+
+  library.AddFactory<const SliceTransform>(
+      NoopTransform::kClassName(),
+      [](const std::string& /*uri*/,
+         std::unique_ptr<const SliceTransform>* guard,
+         std::string* /*errmsg*/) {
+        guard->reset(NewNoopTransform());
+        return guard->get();
+      });
+  library.AddFactory<const SliceTransform>(
+      ObjectLibrary::PatternEntry(FixedPrefixTransform::kNickName(), false)
+          .AddNumber(":"),
+      [](const std::string& uri, std::unique_ptr<const SliceTransform>* guard,
+         std::string* /*errmsg*/) {
+        auto colon = uri.find(":");
+        auto len = ParseSizeT(uri.substr(colon + 1));
+        guard->reset(NewFixedPrefixTransform(len));
+        return guard->get();
+      });
+  library.AddFactory<const SliceTransform>(
+      ObjectLibrary::PatternEntry(FixedPrefixTransform::kClassName(), false)
+          .AddNumber("."),
+      [](const std::string& uri, std::unique_ptr<const SliceTransform>* guard,
+         std::string* /*errmsg*/) {
+        auto len = ParseSizeT(
+            uri.substr(strlen(FixedPrefixTransform::kClassName()) + 1));
+        guard->reset(NewFixedPrefixTransform(len));
+        return guard->get();
+      });
+  library.AddFactory<const SliceTransform>(
+      ObjectLibrary::PatternEntry(CappedPrefixTransform::kNickName(), false)
+          .AddNumber(":"),
+      [](const std::string& uri, std::unique_ptr<const SliceTransform>* guard,
+         std::string* /*errmsg*/) {
+        auto colon = uri.find(":");
+        auto len = ParseSizeT(uri.substr(colon + 1));
+        guard->reset(NewCappedPrefixTransform(len));
+        return guard->get();
+      });
+  library.AddFactory<const SliceTransform>(
+      ObjectLibrary::PatternEntry(CappedPrefixTransform::kClassName(), false)
+          .AddNumber("."),
+      [](const std::string& uri, std::unique_ptr<const SliceTransform>* guard,
+         std::string* /*errmsg*/) {
+        auto len = ParseSizeT(
+            uri.substr(strlen(CappedPrefixTransform::kClassName()) + 1));
+        guard->reset(NewCappedPrefixTransform(len));
+        return guard->get();
+      });
+  size_t num_types;
+  return static_cast<int>(library.GetFactoryCount(&num_types));
+}
+#endif  // ROCKSDB_LITE
+
+Status SliceTransform::CreateFromString(
+    const ConfigOptions& config_options, const std::string& value,
+    std::shared_ptr<const SliceTransform>* result) {
+#ifndef ROCKSDB_LITE
+  static std::once_flag once;
+  std::call_once(once, [&]() {
+    RegisterBuiltinSliceTransform(*(ObjectLibrary::Default().get()), "");
+  });
+#endif  // ROCKSDB_LITE
+  std::string id;
+  std::unordered_map<std::string, std::string> opt_map;
+  Status status = Customizable::GetOptionsMap(config_options, result->get(),
+                                              value, &id, &opt_map);
+  if (!status.ok()) {  // GetOptionsMap failed
+    return status;
+  } else if (id.empty() && opt_map.empty()) {
+    result->reset();
+  } else {
+#ifndef ROCKSDB_LITE
+    status = config_options.registry->NewSharedObject(id, result);
+#else
+    auto Matches = [](const std::string& input, size_t size,
+                      const char* pattern, char sep) {
+      auto plen = strlen(pattern);
+      return (size > plen + 2 && input[plen] == sep &&
+              StartsWith(input, pattern));
+    };
+
+    auto size = id.size();
+    if (id == NoopTransform::kClassName()) {
+      result->reset(NewNoopTransform());
+    } else if (Matches(id, size, FixedPrefixTransform::kNickName(), ':')) {
+      auto fixed = strlen(FixedPrefixTransform::kNickName());
+      auto len = ParseSizeT(id.substr(fixed + 1));
+      result->reset(NewFixedPrefixTransform(len));
+    } else if (Matches(id, size, CappedPrefixTransform::kNickName(), ':')) {
+      auto capped = strlen(CappedPrefixTransform::kNickName());
+      auto len = ParseSizeT(id.substr(capped + 1));
+      result->reset(NewCappedPrefixTransform(len));
+    } else if (Matches(id, size, CappedPrefixTransform::kClassName(), '.')) {
+      auto capped = strlen(CappedPrefixTransform::kClassName());
+      auto len = ParseSizeT(id.substr(capped + 1));
+      result->reset(NewCappedPrefixTransform(len));
+    } else if (Matches(id, size, FixedPrefixTransform::kClassName(), '.')) {
+      auto fixed = strlen(FixedPrefixTransform::kClassName());
+      auto len = ParseSizeT(id.substr(fixed + 1));
+      result->reset(NewFixedPrefixTransform(len));
+    } else {
+      status = Status::NotSupported("Cannot load object in LITE mode ", id);
+    }
+#endif  // ROCKSDB_LITE
+    if (config_options.ignore_unsupported_options && status.IsNotSupported()) {
+      return Status::OK();
+    } else if (status.ok()) {
+      SliceTransform* transform = const_cast<SliceTransform*>(result->get());
+      status =
+          Customizable::ConfigureNewObject(config_options, transform, opt_map);
+    }
+  }
+  return status;
+}
+
+std::string SliceTransform::AsString() const {
+#ifndef ROCKSDB_LITE
+  if (HasRegisteredOptions()) {
+    ConfigOptions opts;
+    opts.delimiter = ";";
+    return ToString(opts);
+  }
+#endif  // ROCKSDB_LITE
+  return GetId();
+}
+
+// 2 small internal utility functions, for efficient hex conversions
+// and no need for snprintf, toupper etc...
+// Originally from wdt/util/EncryptionUtils.cpp - for
+// std::to_string(true)/DecodeHex:
+char toHex(unsigned char v) {
+  if (v <= 9) {
+    return '0' + v;
+  }
+  return 'A' + v - 10;
+}
+// most of the code is for validation/error check
+int fromHex(char c) {
+  // toupper:
+  if (c >= 'a' && c <= 'f') {
+    c -= ('a' - 'A');  // aka 0x20
+  }
+  // validation
+  if (c < '0' || (c > '9' && (c < 'A' || c > 'F'))) {
+    return -1;  // invalid not 0-9A-F hex char
+  }
+  if (c <= '9') {
+    return c - '0';
+  }
+  return c - 'A' + 10;
+}
+
+Slice::Slice(const SliceParts& parts, std::string* buf) {
+  size_t length = 0;
+  for (int i = 0; i < parts.num_parts; ++i) {
+    length += parts.parts[i].size();
+  }
+  buf->reserve(length);
+
+  for (int i = 0; i < parts.num_parts; ++i) {
+    buf->append(parts.parts[i].data(), parts.parts[i].size());
+  }
+  data_ = buf->data();
+  size_ = buf->size();
+}
+
+// Return a string that contains the copy of the referenced data.
+std::string Slice::ToString(bool hex) const {
+  std::string result;  // RVO/NRVO/move
+  if (hex) {
+    result.reserve(2 * size_);
+    for (size_t i = 0; i < size_; ++i) {
+      unsigned char c = data_[i];
+      result.push_back(toHex(c >> 4));
+      result.push_back(toHex(c & 0xf));
+    }
+    return result;
+  } else {
+    result.assign(data_, size_);
+    return result;
+  }
+}
+
+// Originally from rocksdb/utilities/ldb_cmd.h
+bool Slice::DecodeHex(std::string* result) const {
+  std::string::size_type len = size_;
+  if (len % 2) {
+    // Hex string must be even number of hex digits to get complete bytes back
+    return false;
+  }
+  if (!result) {
+    return false;
+  }
+  result->clear();
+  result->reserve(len / 2);
+
+  for (size_t i = 0; i < len;) {
+    int h1 = fromHex(data_[i++]);
+    if (h1 < 0) {
+      return false;
+    }
+    int h2 = fromHex(data_[i++]);
+    if (h2 < 0) {
+      return false;
+    }
+    result->push_back(static_cast<char>((h1 << 4) | h2));
+  }
+  return true;
+}
+
+PinnableSlice::PinnableSlice(PinnableSlice&& other) {
+  *this = std::move(other);
+}
+
+PinnableSlice& PinnableSlice::operator=(PinnableSlice&& other) {
+  if (this != &other) {
+    Cleanable::Reset();
+    Cleanable::operator=(std::move(other));
+    size_ = other.size_;
+    pinned_ = other.pinned_;
+    if (pinned_) {
+      data_ = other.data_;
+      // When it's pinned, buf should no longer be of use.
+    } else {
+      if (other.buf_ == &other.self_space_) {
+        self_space_ = std::move(other.self_space_);
+        buf_ = &self_space_;
+        data_ = buf_->data();
+      } else {
+        buf_ = other.buf_;
+        data_ = other.data_;
+      }
+    }
+    other.self_space_.clear();
+    other.buf_ = &other.self_space_;
+    other.pinned_ = false;
+    other.PinSelf();
+  }
+  return *this;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/slice_test.cc b/src/rocksdb/util/slice_test.cc
new file mode 100644
index 000000000..e1c35d567
--- /dev/null
+++ b/src/rocksdb/util/slice_test.cc
@@ -0,0 +1,191 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/slice.h"
+
+#include <gtest/gtest.h>
+
+#include "port/port.h"
+#include "port/stack_trace.h"
+#include "rocksdb/data_structure.h"
+#include "rocksdb/types.h"
+#include "test_util/testharness.h"
+#include "test_util/testutil.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+TEST(SliceTest, StringView) {
+  std::string s = "foo";
+  std::string_view sv = s;
+  ASSERT_EQ(Slice(s), Slice(sv));
+  ASSERT_EQ(Slice(s), Slice(std::move(sv)));
+}
+
+// Use this to keep track of the cleanups that were actually performed
+void Multiplier(void* arg1, void* arg2) {
+  int* res = reinterpret_cast<int*>(arg1);
+  int* num = reinterpret_cast<int*>(arg2);
+  *res *= *num;
+}
+
+class PinnableSliceTest : public testing::Test {
+ public:
+  void AssertSameData(const std::string& expected, const PinnableSlice& slice) {
+    std::string got;
+    got.assign(slice.data(), slice.size());
+    ASSERT_EQ(expected, got);
+  }
+};
+
+// Test that the external buffer is moved instead of being copied.
+TEST_F(PinnableSliceTest, MoveExternalBuffer) {
+  Slice s("123");
+  std::string buf;
+  PinnableSlice v1(&buf);
+  v1.PinSelf(s);
+
+  PinnableSlice v2(std::move(v1));
+  ASSERT_EQ(buf.data(), v2.data());
+  ASSERT_EQ(&buf, v2.GetSelf());
+
+  PinnableSlice v3;
+  v3 = std::move(v2);
+  ASSERT_EQ(buf.data(), v3.data());
+  ASSERT_EQ(&buf, v3.GetSelf());
+}
+
+TEST_F(PinnableSliceTest, Move) {
+  int n2 = 2;
+  int res = 1;
+  const std::string const_str1 = "123";
+  const std::string const_str2 = "ABC";
+  Slice slice1(const_str1);
+  Slice slice2(const_str2);
+
+  {
+    // Test move constructor on a pinned slice.
+    res = 1;
+    PinnableSlice v1;
+    v1.PinSlice(slice1, Multiplier, &res, &n2);
+    PinnableSlice v2(std::move(v1));
+
+    // Since v1's Cleanable has been moved to v2,
+    // no cleanup should happen in Reset.
+    v1.Reset();
+    ASSERT_EQ(1, res);
+
+    AssertSameData(const_str1, v2);
+  }
+  // v2 is cleaned up.
+  ASSERT_EQ(2, res);
+
+  {
+    // Test move constructor on an unpinned slice.
+    PinnableSlice v1;
+    v1.PinSelf(slice1);
+    PinnableSlice v2(std::move(v1));
+
+    AssertSameData(const_str1, v2);
+  }
+
+  {
+    // Test move assignment from a pinned slice to
+    // another pinned slice.
+    res = 1;
+    PinnableSlice v1;
+    v1.PinSlice(slice1, Multiplier, &res, &n2);
+    PinnableSlice v2;
+    v2.PinSlice(slice2, Multiplier, &res, &n2);
+    v2 = std::move(v1);
+
+    // v2's Cleanable will be Reset before moving
+    // anything from v1.
+    ASSERT_EQ(2, res);
+    // Since v1's Cleanable has been moved to v2,
+    // no cleanup should happen in Reset.
+    v1.Reset();
+    ASSERT_EQ(2, res);
+
+    AssertSameData(const_str1, v2);
+  }
+  // The Cleanable moved from v1 to v2 will be Reset.
+  ASSERT_EQ(4, res);
+
+  {
+    // Test move assignment from a pinned slice to
+    // an unpinned slice.
+    res = 1;
+    PinnableSlice v1;
+    v1.PinSlice(slice1, Multiplier, &res, &n2);
+    PinnableSlice v2;
+    v2.PinSelf(slice2);
+    v2 = std::move(v1);
+
+    // Since v1's Cleanable has been moved to v2,
+    // no cleanup should happen in Reset.
+    v1.Reset();
+    ASSERT_EQ(1, res);
+
+    AssertSameData(const_str1, v2);
+  }
+  // The Cleanable moved from v1 to v2 will be Reset.
+  ASSERT_EQ(2, res);
+
+  {
+    // Test move assignment from an upinned slice to
+    // another unpinned slice.
+    PinnableSlice v1;
+    v1.PinSelf(slice1);
+    PinnableSlice v2;
+    v2.PinSelf(slice2);
+    v2 = std::move(v1);
+
+    AssertSameData(const_str1, v2);
+  }
+
+  {
+    // Test move assignment from an upinned slice to
+    // a pinned slice.
+    res = 1;
+    PinnableSlice v1;
+    v1.PinSelf(slice1);
+    PinnableSlice v2;
+    v2.PinSlice(slice2, Multiplier, &res, &n2);
+    v2 = std::move(v1);
+
+    // v2's Cleanable will be Reset before moving
+    // anything from v1.
+    ASSERT_EQ(2, res);
+
+    AssertSameData(const_str1, v2);
+  }
+  // No Cleanable is moved from v1 to v2, so no more cleanup.
+  ASSERT_EQ(2, res);
+}
+
+// ***************************************************************** //
+// Unit test for SmallEnumSet
+class SmallEnumSetTest : public testing::Test {
+ public:
+  SmallEnumSetTest() {}
+  ~SmallEnumSetTest() {}
+};
+
+TEST_F(SmallEnumSetTest, SmallSetTest) {
+  FileTypeSet fs;
+  ASSERT_TRUE(fs.Add(FileType::kIdentityFile));
+  ASSERT_FALSE(fs.Add(FileType::kIdentityFile));
+  ASSERT_TRUE(fs.Add(FileType::kInfoLogFile));
+  ASSERT_TRUE(fs.Contains(FileType::kIdentityFile));
+  ASSERT_FALSE(fs.Contains(FileType::kDBLockFile));
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
+int main(int argc, char** argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/src/rocksdb/util/slice_transform_test.cc b/src/rocksdb/util/slice_transform_test.cc
new file mode 100644
index 000000000..64ac8bb1f
--- /dev/null
+++ b/src/rocksdb/util/slice_transform_test.cc
@@ -0,0 +1,154 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "rocksdb/slice_transform.h"
+
+#include "rocksdb/db.h"
+#include "rocksdb/env.h"
+#include "rocksdb/filter_policy.h"
+#include "rocksdb/statistics.h"
+#include "rocksdb/table.h"
+#include "test_util/testharness.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class SliceTransformTest : public testing::Test {};
+
+TEST_F(SliceTransformTest, CapPrefixTransform) {
+  std::string s;
+  s = "abcdefge";
+
+  std::unique_ptr<const SliceTransform> transform;
+
+  transform.reset(NewCappedPrefixTransform(6));
+  ASSERT_EQ(transform->Transform(s).ToString(), "abcdef");
+  ASSERT_TRUE(transform->SameResultWhenAppended("123456"));
+  ASSERT_TRUE(transform->SameResultWhenAppended("1234567"));
+  ASSERT_TRUE(!transform->SameResultWhenAppended("12345"));
+
+  transform.reset(NewCappedPrefixTransform(8));
+  ASSERT_EQ(transform->Transform(s).ToString(), "abcdefge");
+
+  transform.reset(NewCappedPrefixTransform(10));
+  ASSERT_EQ(transform->Transform(s).ToString(), "abcdefge");
+
+  transform.reset(NewCappedPrefixTransform(0));
+  ASSERT_EQ(transform->Transform(s).ToString(), "");
+
+  transform.reset(NewCappedPrefixTransform(0));
+  ASSERT_EQ(transform->Transform("").ToString(), "");
+}
+
+class SliceTransformDBTest : public testing::Test {
+ private:
+  std::string dbname_;
+  Env* env_;
+  DB* db_;
+
+ public:
+  SliceTransformDBTest() : env_(Env::Default()), db_(nullptr) {
+    dbname_ = test::PerThreadDBPath("slice_transform_db_test");
+    EXPECT_OK(DestroyDB(dbname_, last_options_));
+  }
+
+  ~SliceTransformDBTest() override {
+    delete db_;
+    EXPECT_OK(DestroyDB(dbname_, last_options_));
+  }
+
+  DB* db() { return db_; }
+
+  // Return the current option configuration.
+  Options* GetOptions() { return &last_options_; }
+
+  void DestroyAndReopen() {
+    // Destroy using last options
+    Destroy();
+    ASSERT_OK(TryReopen());
+  }
+
+  void Destroy() {
+    delete db_;
+    db_ = nullptr;
+    ASSERT_OK(DestroyDB(dbname_, last_options_));
+  }
+
+  Status TryReopen() {
+    delete db_;
+    db_ = nullptr;
+    last_options_.create_if_missing = true;
+
+    return DB::Open(last_options_, dbname_, &db_);
+  }
+
+  Options last_options_;
+};
+
+namespace {
+uint64_t TestGetTickerCount(const Options& options, Tickers ticker_type) {
+  return options.statistics->getTickerCount(ticker_type);
+}
+}  // namespace
+
+TEST_F(SliceTransformDBTest, CapPrefix) {
+  last_options_.prefix_extractor.reset(NewCappedPrefixTransform(8));
+  last_options_.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+  BlockBasedTableOptions bbto;
+  bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
+  bbto.whole_key_filtering = false;
+  last_options_.table_factory.reset(NewBlockBasedTableFactory(bbto));
+  ASSERT_OK(TryReopen());
+
+  ReadOptions ro;
+  FlushOptions fo;
+  WriteOptions wo;
+
+  ASSERT_OK(db()->Put(wo, "barbarbar", "foo"));
+  ASSERT_OK(db()->Put(wo, "barbarbar2", "foo2"));
+  ASSERT_OK(db()->Put(wo, "foo", "bar"));
+  ASSERT_OK(db()->Put(wo, "foo3", "bar3"));
+  ASSERT_OK(db()->Flush(fo));
+
+  std::unique_ptr<Iterator> iter(db()->NewIterator(ro));
+
+  iter->Seek("foo");
+  ASSERT_OK(iter->status());
+  ASSERT_TRUE(iter->Valid());
+  ASSERT_EQ(iter->value().ToString(), "bar");
+  ASSERT_EQ(TestGetTickerCount(last_options_, BLOOM_FILTER_PREFIX_USEFUL), 0U);
+
+  iter->Seek("foo2");
+  ASSERT_OK(iter->status());
+  ASSERT_TRUE(!iter->Valid());
+  ASSERT_EQ(TestGetTickerCount(last_options_, BLOOM_FILTER_PREFIX_USEFUL), 1U);
+
+  iter->Seek("barbarbar");
+  ASSERT_OK(iter->status());
+  ASSERT_TRUE(iter->Valid());
+  ASSERT_EQ(iter->value().ToString(), "foo");
+  ASSERT_EQ(TestGetTickerCount(last_options_, BLOOM_FILTER_PREFIX_USEFUL), 1U);
+
+  iter->Seek("barfoofoo");
+  ASSERT_OK(iter->status());
+  ASSERT_TRUE(!iter->Valid());
+  ASSERT_EQ(TestGetTickerCount(last_options_, BLOOM_FILTER_PREFIX_USEFUL), 2U);
+
+  iter->Seek("foobarbar");
+  ASSERT_OK(iter->status());
+  ASSERT_TRUE(!iter->Valid());
+  ASSERT_EQ(TestGetTickerCount(last_options_, BLOOM_FILTER_PREFIX_USEFUL), 3U);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
+int main(int argc, char** argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/src/rocksdb/util/status.cc b/src/rocksdb/util/status.cc
new file mode 100644
index 000000000..72fdfdbcc
--- /dev/null
+++ b/src/rocksdb/util/status.cc
@@ -0,0 +1,154 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "rocksdb/status.h"
+
+#include <stdio.h>
+#ifdef OS_WIN
+#include <string.h>
+#endif
+#include <cstring>
+
+#include "port/port.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+std::unique_ptr<const char[]> Status::CopyState(const char* s) {
+  const size_t cch = std::strlen(s) + 1;  // +1 for the null terminator
+  char* rv = new char[cch];
+  std::strncpy(rv, s, cch);
+  return std::unique_ptr<const char[]>(rv);
+}
+
+static const char* msgs[static_cast<int>(Status::kMaxSubCode)] = {
+    "",                                                   // kNone
+    "Timeout Acquiring Mutex",                            // kMutexTimeout
+    "Timeout waiting to lock key",                        // kLockTimeout
+    "Failed to acquire lock due to max_num_locks limit",  // kLockLimit
+    "No space left on device",                            // kNoSpace
+    "Deadlock",                                           // kDeadlock
+    "Stale file handle",                                  // kStaleFile
+    "Memory limit reached",                               // kMemoryLimit
+    "Space limit reached",                                // kSpaceLimit
+    "No such file or directory",                          // kPathNotFound
+    // KMergeOperandsInsufficientCapacity
+    "Insufficient capacity for merge operands",
+    // kManualCompactionPaused
+    "Manual compaction paused",
+    " (overwritten)",    // kOverwritten, subcode of OK
+    "Txn not prepared",  // kTxnNotPrepared
+    "IO fenced off",     // kIOFenced
+};
+
+Status::Status(Code _code, SubCode _subcode, const Slice& msg,
+               const Slice& msg2, Severity sev)
+    : code_(_code),
+      subcode_(_subcode),
+      sev_(sev),
+      retryable_(false),
+      data_loss_(false),
+      scope_(0) {
+  assert(subcode_ != kMaxSubCode);
+  const size_t len1 = msg.size();
+  const size_t len2 = msg2.size();
+  const size_t size = len1 + (len2 ? (2 + len2) : 0);
+  char* const result = new char[size + 1];  // +1 for null terminator
+  memcpy(result, msg.data(), len1);
+  if (len2) {
+    result[len1] = ':';
+    result[len1 + 1] = ' ';
+    memcpy(result + len1 + 2, msg2.data(), len2);
+  }
+  result[size] = '\0';  // null terminator for C style string
+  state_.reset(result);
+}
+
+std::string Status::ToString() const {
+#ifdef ROCKSDB_ASSERT_STATUS_CHECKED
+  checked_ = true;
+#endif  // ROCKSDB_ASSERT_STATUS_CHECKED
+  const char* type = nullptr;
+  switch (code_) {
+    case kOk:
+      return "OK";
+    case kNotFound:
+      type = "NotFound: ";
+      break;
+    case kCorruption:
+      type = "Corruption: ";
+      break;
+    case kNotSupported:
+      type = "Not implemented: ";
+      break;
+    case kInvalidArgument:
+      type = "Invalid argument: ";
+      break;
+    case kIOError:
+      type = "IO error: ";
+      break;
+    case kMergeInProgress:
+      type = "Merge in progress: ";
+      break;
+    case kIncomplete:
+      type = "Result incomplete: ";
+      break;
+    case kShutdownInProgress:
+      type = "Shutdown in progress: ";
+      break;
+    case kTimedOut:
+      type = "Operation timed out: ";
+      break;
+    case kAborted:
+      type = "Operation aborted: ";
+      break;
+    case kBusy:
+      type = "Resource busy: ";
+      break;
+    case kExpired:
+      type = "Operation expired: ";
+      break;
+    case kTryAgain:
+      type = "Operation failed. Try again.: ";
+      break;
+    case kCompactionTooLarge:
+      type = "Compaction too large: ";
+      break;
+    case kColumnFamilyDropped:
+      type = "Column family dropped: ";
+      break;
+    case kMaxCode:
+      assert(false);
+      break;
+  }
+  char tmp[30];
+  if (type == nullptr) {
+    // This should not happen since `code_` should be a valid non-`kMaxCode`
+    // member of the `Code` enum. The above switch-statement should have had a
+    // case assigning `type` to a corresponding string.
+    assert(false);
+    snprintf(tmp, sizeof(tmp), "Unknown code(%d): ", static_cast<int>(code()));
+    type = tmp;
+  }
+  std::string result(type);
+  if (subcode_ != kNone) {
+    uint32_t index = static_cast<int32_t>(subcode_);
+    assert(sizeof(msgs) / sizeof(msgs[0]) > index);
+    result.append(msgs[index]);
+  }
+
+  if (state_ != nullptr) {
+    if (subcode_ != kNone) {
+      result.append(": ");
+    }
+    result.append(state_.get());
+  }
+  return result;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/stderr_logger.cc b/src/rocksdb/util/stderr_logger.cc
new file mode 100644
index 000000000..6044b8b93
--- /dev/null
+++ b/src/rocksdb/util/stderr_logger.cc
@@ -0,0 +1,30 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "util/stderr_logger.h"
+
+#include "port/sys_time.h"
+
+namespace ROCKSDB_NAMESPACE {
+StderrLogger::~StderrLogger() {}
+
+void StderrLogger::Logv(const char* format, va_list ap) {
+  const uint64_t thread_id = Env::Default()->GetThreadID();
+
+  port::TimeVal now_tv;
+  port::GetTimeOfDay(&now_tv, nullptr);
+  const time_t seconds = now_tv.tv_sec;
+  struct tm t;
+  port::LocalTimeR(&seconds, &t);
+  fprintf(stderr, "%04d/%02d/%02d-%02d:%02d:%02d.%06d %llx ", t.tm_year + 1900,
+          t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec,
+          static_cast<int>(now_tv.tv_usec),
+          static_cast<long long unsigned int>(thread_id));
+
+  vfprintf(stderr, format, ap);
+  fprintf(stderr, "\n");
+}
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/stderr_logger.h b/src/rocksdb/util/stderr_logger.h
new file mode 100644
index 000000000..c3b01210c
--- /dev/null
+++ b/src/rocksdb/util/stderr_logger.h
@@ -0,0 +1,31 @@
+//  Copyright (c) Meta Platforms, Inc. and affiliates.
+//
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "rocksdb/env.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Prints logs to stderr for faster debugging
+class StderrLogger : public Logger {
+ public:
+  explicit StderrLogger(const InfoLogLevel log_level = InfoLogLevel::INFO_LEVEL)
+      : Logger(log_level) {}
+
+  ~StderrLogger() override;
+
+  // Brings overloaded Logv()s into scope so they're not hidden when we override
+  // a subset of them.
+  using Logger::Logv;
+
+  virtual void Logv(const char* format, va_list ap) override;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/stop_watch.h b/src/rocksdb/util/stop_watch.h
new file mode 100644
index 000000000..e26380d97
--- /dev/null
+++ b/src/rocksdb/util/stop_watch.h
@@ -0,0 +1,118 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#pragma once
+#include "monitoring/statistics.h"
+#include "rocksdb/system_clock.h"
+
+namespace ROCKSDB_NAMESPACE {
+// Auto-scoped.
+// Records the measure time into the corresponding histogram if statistics
+// is not nullptr. It is also saved into *elapsed if the pointer is not nullptr
+// and overwrite is true, it will be added to *elapsed if overwrite is false.
+class StopWatch {
+ public:
+  StopWatch(SystemClock* clock, Statistics* statistics,
+            const uint32_t hist_type, uint64_t* elapsed = nullptr,
+            bool overwrite = true, bool delay_enabled = false)
+      : clock_(clock),
+        statistics_(statistics),
+        hist_type_(hist_type),
+        elapsed_(elapsed),
+        overwrite_(overwrite),
+        stats_enabled_(statistics &&
+                       statistics->get_stats_level() >=
+                           StatsLevel::kExceptTimers &&
+                       statistics->HistEnabledForType(hist_type)),
+        delay_enabled_(delay_enabled),
+        total_delay_(0),
+        delay_start_time_(0),
+        start_time_((stats_enabled_ || elapsed != nullptr) ? clock->NowMicros()
+                                                           : 0) {}
+
+  ~StopWatch() {
+    if (elapsed_) {
+      if (overwrite_) {
+        *elapsed_ = clock_->NowMicros() - start_time_;
+      } else {
+        *elapsed_ += clock_->NowMicros() - start_time_;
+      }
+    }
+    if (elapsed_ && delay_enabled_) {
+      *elapsed_ -= total_delay_;
+    }
+    if (stats_enabled_) {
+      statistics_->reportTimeToHistogram(
+          hist_type_, (elapsed_ != nullptr)
+                          ? *elapsed_
+                          : (clock_->NowMicros() - start_time_));
+    }
+  }
+
+  void DelayStart() {
+    // if delay_start_time_ is not 0, it means we are already tracking delay,
+    // so delay_start_time_ should not be overwritten
+    if (elapsed_ && delay_enabled_ && delay_start_time_ == 0) {
+      delay_start_time_ = clock_->NowMicros();
+    }
+  }
+
+  void DelayStop() {
+    if (elapsed_ && delay_enabled_ && delay_start_time_ != 0) {
+      total_delay_ += clock_->NowMicros() - delay_start_time_;
+    }
+    // reset to 0 means currently no delay is being tracked, so two consecutive
+    // calls to DelayStop will not increase total_delay_
+    delay_start_time_ = 0;
+  }
+
+  uint64_t GetDelay() const { return delay_enabled_ ? total_delay_ : 0; }
+
+  uint64_t start_time() const { return start_time_; }
+
+ private:
+  SystemClock* clock_;
+  Statistics* statistics_;
+  const uint32_t hist_type_;
+  uint64_t* elapsed_;
+  bool overwrite_;
+  bool stats_enabled_;
+  bool delay_enabled_;
+  uint64_t total_delay_;
+  uint64_t delay_start_time_;
+  const uint64_t start_time_;
+};
+
+// a nano second precision stopwatch
+class StopWatchNano {
+ public:
+  explicit StopWatchNano(SystemClock* clock, bool auto_start = false)
+      : clock_(clock), start_(0) {
+    if (auto_start) {
+      Start();
+    }
+  }
+
+  void Start() { start_ = clock_->NowNanos(); }
+
+  uint64_t ElapsedNanos(bool reset = false) {
+    auto now = clock_->NowNanos();
+    auto elapsed = now - start_;
+    if (reset) {
+      start_ = now;
+    }
+    return elapsed;
+  }
+
+  uint64_t ElapsedNanosSafe(bool reset = false) {
+    return (clock_ != nullptr) ? ElapsedNanos(reset) : 0U;
+  }
+
+ private:
+  SystemClock* clock_;
+  uint64_t start_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/string_util.cc b/src/rocksdb/util/string_util.cc
new file mode 100644
index 000000000..324482a4c
--- /dev/null
+++ b/src/rocksdb/util/string_util.cc
@@ -0,0 +1,504 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+#include "util/string_util.h"
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cinttypes>
+#include <cmath>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "port/port.h"
+#include "port/sys_time.h"
+#include "rocksdb/slice.h"
+
+#ifndef __has_cpp_attribute
+#define ROCKSDB_HAS_CPP_ATTRIBUTE(x) 0
+#else
+#define ROCKSDB_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
+#endif
+
+#if ROCKSDB_HAS_CPP_ATTRIBUTE(maybe_unused) && __cplusplus >= 201703L
+#define ROCKSDB_MAYBE_UNUSED [[maybe_unused]]
+#elif ROCKSDB_HAS_CPP_ATTRIBUTE(gnu::unused) || __GNUC__
+#define ROCKSDB_MAYBE_UNUSED [[gnu::unused]]
+#else
+#define ROCKSDB_MAYBE_UNUSED
+#endif
+
+namespace ROCKSDB_NAMESPACE {
+
+const std::string kNullptrString = "nullptr";
+
+std::vector<std::string> StringSplit(const std::string& arg, char delim) {
+  std::vector<std::string> splits;
+  std::stringstream ss(arg);
+  std::string item;
+  while (std::getline(ss, item, delim)) {
+    splits.push_back(item);
+  }
+  return splits;
+}
+
+// for micros < 10ms, print "XX us".
+// for micros < 10sec, print "XX ms".
+// for micros >= 10 sec, print "XX sec".
+// for micros <= 1 hour, print Y:X M:S".
+// for micros > 1 hour, print Z:Y:X H:M:S".
+int AppendHumanMicros(uint64_t micros, char* output, int len,
+                      bool fixed_format) {
+  if (micros < 10000 && !fixed_format) {
+    return snprintf(output, len, "%" PRIu64 " us", micros);
+  } else if (micros < 10000000 && !fixed_format) {
+    return snprintf(output, len, "%.3lf ms",
+                    static_cast<double>(micros) / 1000);
+  } else if (micros < 1000000l * 60 && !fixed_format) {
+    return snprintf(output, len, "%.3lf sec",
+                    static_cast<double>(micros) / 1000000);
+  } else if (micros < 1000000ll * 60 * 60 && !fixed_format) {
+    return snprintf(output, len, "%02" PRIu64 ":%05.3f M:S",
+                    micros / 1000000 / 60,
+                    static_cast<double>(micros % 60000000) / 1000000);
+  } else {
+    return snprintf(output, len, "%02" PRIu64 ":%02" PRIu64 ":%05.3f H:M:S",
+                    micros / 1000000 / 3600, (micros / 1000000 / 60) % 60,
+                    static_cast<double>(micros % 60000000) / 1000000);
+  }
+}
+
+// for sizes >=10TB, print "XXTB"
+// for sizes >=10GB, print "XXGB"
+// etc.
+// append file size summary to output and return the len
+int AppendHumanBytes(uint64_t bytes, char* output, int len) {
+  const uint64_t ull10 = 10;
+  if (bytes >= ull10 << 40) {
+    return snprintf(output, len, "%" PRIu64 "TB", bytes >> 40);
+  } else if (bytes >= ull10 << 30) {
+    return snprintf(output, len, "%" PRIu64 "GB", bytes >> 30);
+  } else if (bytes >= ull10 << 20) {
+    return snprintf(output, len, "%" PRIu64 "MB", bytes >> 20);
+  } else if (bytes >= ull10 << 10) {
+    return snprintf(output, len, "%" PRIu64 "KB", bytes >> 10);
+  } else {
+    return snprintf(output, len, "%" PRIu64 "B", bytes);
+  }
+}
+
+void AppendNumberTo(std::string* str, uint64_t num) {
+  char buf[30];
+  snprintf(buf, sizeof(buf), "%" PRIu64, num);
+  str->append(buf);
+}
+
+void AppendEscapedStringTo(std::string* str, const Slice& value) {
+  for (size_t i = 0; i < value.size(); i++) {
+    char c = value[i];
+    if (c >= ' ' && c <= '~') {
+      str->push_back(c);
+    } else {
+      char buf[10];
+      snprintf(buf, sizeof(buf), "\\x%02x",
+               static_cast<unsigned int>(c) & 0xff);
+      str->append(buf);
+    }
+  }
+}
+
+std::string NumberToHumanString(int64_t num) {
+  char buf[19];
+  int64_t absnum = num < 0 ? -num : num;
+  if (absnum < 10000) {
+    snprintf(buf, sizeof(buf), "%" PRIi64, num);
+  } else if (absnum < 10000000) {
+    snprintf(buf, sizeof(buf), "%" PRIi64 "K", num / 1000);
+  } else if (absnum < 10000000000LL) {
+    snprintf(buf, sizeof(buf), "%" PRIi64 "M", num / 1000000);
+  } else {
+    snprintf(buf, sizeof(buf), "%" PRIi64 "G", num / 1000000000);
+  }
+  return std::string(buf);
+}
+
+std::string BytesToHumanString(uint64_t bytes) {
+  const char* size_name[] = {"KB", "MB", "GB", "TB"};
+  double final_size = static_cast<double>(bytes);
+  size_t size_idx;
+
+  // always start with KB
+  final_size /= 1024;
+  size_idx = 0;
+
+  while (size_idx < 3 && final_size >= 1024) {
+    final_size /= 1024;
+    size_idx++;
+  }
+
+  char buf[20];
+  snprintf(buf, sizeof(buf), "%.2f %s", final_size, size_name[size_idx]);
+  return std::string(buf);
+}
+
+std::string TimeToHumanString(int unixtime) {
+  char time_buffer[80];
+  time_t rawtime = unixtime;
+  struct tm tInfo;
+  struct tm* timeinfo = port::LocalTimeR(&rawtime, &tInfo);
+  assert(timeinfo == &tInfo);
+  strftime(time_buffer, 80, "%c", timeinfo);
+  return std::string(time_buffer);
+}
+
+std::string EscapeString(const Slice& value) {
+  std::string r;
+  AppendEscapedStringTo(&r, value);
+  return r;
+}
+
+bool ConsumeDecimalNumber(Slice* in, uint64_t* val) {
+  uint64_t v = 0;
+  int digits = 0;
+  while (!in->empty()) {
+    char c = (*in)[0];
+    if (c >= '0' && c <= '9') {
+      ++digits;
+      const unsigned int delta = (c - '0');
+      static const uint64_t kMaxUint64 = ~static_cast<uint64_t>(0);
+      if (v > kMaxUint64 / 10 ||
+          (v == kMaxUint64 / 10 && delta > kMaxUint64 % 10)) {
+        // Overflow
+        return false;
+      }
+      v = (v * 10) + delta;
+      in->remove_prefix(1);
+    } else {
+      break;
+    }
+  }
+  *val = v;
+  return (digits > 0);
+}
+
+bool isSpecialChar(const char c) {
+  if (c == '\\' || c == '#' || c == ':' || c == '\r' || c == '\n') {
+    return true;
+  }
+  return false;
+}
+
+namespace {
+using CharMap = std::pair<char, char>;
+}
+
+char UnescapeChar(const char c) {
+  static const CharMap convert_map[] = {{'r', '\r'}, {'n', '\n'}};
+
+  auto iter = std::find_if(std::begin(convert_map), std::end(convert_map),
+                           [c](const CharMap& p) { return p.first == c; });
+
+  if (iter == std::end(convert_map)) {
+    return c;
+  }
+  return iter->second;
+}
+
+char EscapeChar(const char c) {
+  static const CharMap convert_map[] = {{'\n', 'n'}, {'\r', 'r'}};
+
+  auto iter = std::find_if(std::begin(convert_map), std::end(convert_map),
+                           [c](const CharMap& p) { return p.first == c; });
+
+  if (iter == std::end(convert_map)) {
+    return c;
+  }
+  return iter->second;
+}
+
+std::string EscapeOptionString(const std::string& raw_string) {
+  std::string output;
+  for (auto c : raw_string) {
+    if (isSpecialChar(c)) {
+      output += '\\';
+      output += EscapeChar(c);
+    } else {
+      output += c;
+    }
+  }
+
+  return output;
+}
+
+std::string UnescapeOptionString(const std::string& escaped_string) {
+  bool escaped = false;
+  std::string output;
+
+  for (auto c : escaped_string) {
+    if (escaped) {
+      output += UnescapeChar(c);
+      escaped = false;
+    } else {
+      if (c == '\\') {
+        escaped = true;
+        continue;
+      }
+      output += c;
+    }
+  }
+  return output;
+}
+
+std::string trim(const std::string& str) {
+  if (str.empty()) return std::string();
+  size_t start = 0;
+  size_t end = str.size() - 1;
+  while (isspace(str[start]) != 0 && start < end) {
+    ++start;
+  }
+  while (isspace(str[end]) != 0 && start < end) {
+    --end;
+  }
+  if (start <= end) {
+    return str.substr(start, end - start + 1);
+  }
+  return std::string();
+}
+
+bool EndsWith(const std::string& string, const std::string& pattern) {
+  size_t plen = pattern.size();
+  size_t slen = string.size();
+  if (plen <= slen) {
+    return string.compare(slen - plen, plen, pattern) == 0;
+  } else {
+    return false;
+  }
+}
+
+bool StartsWith(const std::string& string, const std::string& pattern) {
+  return string.compare(0, pattern.size(), pattern) == 0;
+}
+
+#ifndef ROCKSDB_LITE
+
+bool ParseBoolean(const std::string& type, const std::string& value) {
+  if (value == "true" || value == "1") {
+    return true;
+  } else if (value == "false" || value == "0") {
+    return false;
+  }
+  throw std::invalid_argument(type);
+}
+
+uint8_t ParseUint8(const std::string& value) {
+  uint64_t num = ParseUint64(value);
+  if ((num >> 8LL) == 0) {
+    return static_cast<uint8_t>(num);
+  } else {
+    throw std::out_of_range(value);
+  }
+}
+
+uint32_t ParseUint32(const std::string& value) {
+  uint64_t num = ParseUint64(value);
+  if ((num >> 32LL) == 0) {
+    return static_cast<uint32_t>(num);
+  } else {
+    throw std::out_of_range(value);
+  }
+}
+
+int32_t ParseInt32(const std::string& value) {
+  int64_t num = ParseInt64(value);
+  if (num <= std::numeric_limits<int32_t>::max() &&
+      num >= std::numeric_limits<int32_t>::min()) {
+    return static_cast<int32_t>(num);
+  } else {
+    throw std::out_of_range(value);
+  }
+}
+
+#endif
+
+uint64_t ParseUint64(const std::string& value) {
+  size_t endchar;
+#ifndef CYGWIN
+  uint64_t num = std::stoull(value.c_str(), &endchar);
+#else
+  char* endptr;
+  uint64_t num = std::strtoul(value.c_str(), &endptr, 0);
+  endchar = endptr - value.c_str();
+#endif
+
+  if (endchar < value.length()) {
+    char c = value[endchar];
+    if (c == 'k' || c == 'K')
+      num <<= 10LL;
+    else if (c == 'm' || c == 'M')
+      num <<= 20LL;
+    else if (c == 'g' || c == 'G')
+      num <<= 30LL;
+    else if (c == 't' || c == 'T')
+      num <<= 40LL;
+  }
+
+  return num;
+}
+
+int64_t ParseInt64(const std::string& value) {
+  size_t endchar;
+#ifndef CYGWIN
+  int64_t num = std::stoll(value.c_str(), &endchar);
+#else
+  char* endptr;
+  int64_t num = std::strtoll(value.c_str(), &endptr, 0);
+  endchar = endptr - value.c_str();
+#endif
+
+  if (endchar < value.length()) {
+    char c = value[endchar];
+    if (c == 'k' || c == 'K')
+      num <<= 10LL;
+    else if (c == 'm' || c == 'M')
+      num <<= 20LL;
+    else if (c == 'g' || c == 'G')
+      num <<= 30LL;
+    else if (c == 't' || c == 'T')
+      num <<= 40LL;
+  }
+
+  return num;
+}
+
+int ParseInt(const std::string& value) {
+  size_t endchar;
+#ifndef CYGWIN
+  int num = std::stoi(value.c_str(), &endchar);
+#else
+  char* endptr;
+  int num = std::strtoul(value.c_str(), &endptr, 0);
+  endchar = endptr - value.c_str();
+#endif
+
+  if (endchar < value.length()) {
+    char c = value[endchar];
+    if (c == 'k' || c == 'K')
+      num <<= 10;
+    else if (c == 'm' || c == 'M')
+      num <<= 20;
+    else if (c == 'g' || c == 'G')
+      num <<= 30;
+  }
+
+  return num;
+}
+
+double ParseDouble(const std::string& value) {
+#ifndef CYGWIN
+  return std::stod(value);
+#else
+  return std::strtod(value.c_str(), 0);
+#endif
+}
+
+size_t ParseSizeT(const std::string& value) {
+  return static_cast<size_t>(ParseUint64(value));
+}
+
+std::vector<int> ParseVectorInt(const std::string& value) {
+  std::vector<int> result;
+  size_t start = 0;
+  while (start < value.size()) {
+    size_t end = value.find(':', start);
+    if (end == std::string::npos) {
+      result.push_back(ParseInt(value.substr(start)));
+      break;
+    } else {
+      result.push_back(ParseInt(value.substr(start, end - start)));
+      start = end + 1;
+    }
+  }
+  return result;
+}
+
+bool SerializeIntVector(const std::vector<int>& vec, std::string* value) {
+  *value = "";
+  for (size_t i = 0; i < vec.size(); ++i) {
+    if (i > 0) {
+      *value += ":";
+    }
+    *value += std::to_string(vec[i]);
+  }
+  return true;
+}
+
+// Copied from folly/string.cpp:
+// https://github.com/facebook/folly/blob/0deef031cb8aab76dc7e736f8b7c22d701d5f36b/folly/String.cpp#L457
+// There are two variants of `strerror_r` function, one returns
+// `int`, and another returns `char*`. Selecting proper version using
+// preprocessor macros portably is extremely hard.
+//
+// For example, on Android function signature depends on `__USE_GNU` and
+// `__ANDROID_API__` macros (https://git.io/fjBBE).
+//
+// So we are using C++ overloading trick: we pass a pointer of
+// `strerror_r` to `invoke_strerror_r` function, and C++ compiler
+// selects proper function.
+
+#if !(defined(_WIN32) && (defined(__MINGW32__) || defined(_MSC_VER)))
+ROCKSDB_MAYBE_UNUSED
+static std::string invoke_strerror_r(int (*strerror_r)(int, char*, size_t),
+                                     int err, char* buf, size_t buflen) {
+  // Using XSI-compatible strerror_r
+  int r = strerror_r(err, buf, buflen);
+
+  // OSX/FreeBSD use EINVAL and Linux uses -1 so just check for non-zero
+  if (r != 0) {
+    snprintf(buf, buflen, "Unknown error %d (strerror_r failed with error %d)",
+             err, errno);
+  }
+  return buf;
+}
+
+ROCKSDB_MAYBE_UNUSED
+static std::string invoke_strerror_r(char* (*strerror_r)(int, char*, size_t),
+                                     int err, char* buf, size_t buflen) {
+  // Using GNU strerror_r
+  return strerror_r(err, buf, buflen);
+}
+#endif  // !(defined(_WIN32) && (defined(__MINGW32__) || defined(_MSC_VER)))
+
+std::string errnoStr(int err) {
+  char buf[1024];
+  buf[0] = '\0';
+
+  std::string result;
+
+  // https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man3/strerror_r.3.html
+  // http://www.kernel.org/doc/man-pages/online/pages/man3/strerror.3.html
+#if defined(_WIN32) && (defined(__MINGW32__) || defined(_MSC_VER))
+  // mingw64 has no strerror_r, but Windows has strerror_s, which C11 added
+  // as well. So maybe we should use this across all platforms (together
+  // with strerrorlen_s). Note strerror_r and _s have swapped args.
+  int r = strerror_s(buf, sizeof(buf), err);
+  if (r != 0) {
+    snprintf(buf, sizeof(buf),
+             "Unknown error %d (strerror_r failed with error %d)", err, errno);
+  }
+  result.assign(buf);
+#else
+  // Using any strerror_r
+  result.assign(invoke_strerror_r(strerror_r, err, buf, sizeof(buf)));
+#endif
+
+  return result;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/string_util.h b/src/rocksdb/util/string_util.h
new file mode 100644
index 000000000..11178fd1d
--- /dev/null
+++ b/src/rocksdb/util/string_util.h
@@ -0,0 +1,177 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#pragma once
+
+#include <cstdint>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Slice;
+
+extern std::vector<std::string> StringSplit(const std::string& arg, char delim);
+
+// Append a human-readable printout of "num" to *str
+extern void AppendNumberTo(std::string* str, uint64_t num);
+
+// Append a human-readable printout of "value" to *str.
+// Escapes any non-printable characters found in "value".
+extern void AppendEscapedStringTo(std::string* str, const Slice& value);
+
+// Put n digits from v in base kBase to (*buf)[0] to (*buf)[n-1] and
+// advance *buf to the position after what was written.
+template <size_t kBase>
+inline void PutBaseChars(char** buf, size_t n, uint64_t v, bool uppercase) {
+  const char* digitChars = uppercase ? "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                                     : "0123456789abcdefghijklmnopqrstuvwxyz";
+  for (size_t i = n; i > 0; --i) {
+    (*buf)[i - 1] = digitChars[static_cast<size_t>(v % kBase)];
+    v /= kBase;
+  }
+  *buf += n;
+}
+
+// Parse n digits from *buf in base kBase to *v and advance *buf to the
+// position after what was read. On success, true is returned. On failure,
+// false is returned, *buf is placed at the first bad character, and *v
+// contains the partial parsed data. Overflow is not checked but the
+// result is accurate mod 2^64. Requires the starting value of *v to be
+// zero or previously accumulated parsed digits, i.e.
+//   ParseBaseChars(&b, n, &v);
+// is equivalent to n calls to
+//   ParseBaseChars(&b, 1, &v);
+template <int kBase>
+inline bool ParseBaseChars(const char** buf, size_t n, uint64_t* v) {
+  while (n) {
+    char c = **buf;
+    *v *= static_cast<uint64_t>(kBase);
+    if (c >= '0' && (kBase >= 10 ? c <= '9' : c < '0' + kBase)) {
+      *v += static_cast<uint64_t>(c - '0');
+    } else if (kBase > 10 && c >= 'A' && c < 'A' + kBase - 10) {
+      *v += static_cast<uint64_t>(c - 'A' + 10);
+    } else if (kBase > 10 && c >= 'a' && c < 'a' + kBase - 10) {
+      *v += static_cast<uint64_t>(c - 'a' + 10);
+    } else {
+      return false;
+    }
+    --n;
+    ++*buf;
+  }
+  return true;
+}
+
+// Return a human-readable version of num.
+// for num >= 10.000, prints "xxK"
+// for num >= 10.000.000, prints "xxM"
+// for num >= 10.000.000.000, prints "xxG"
+extern std::string NumberToHumanString(int64_t num);
+
+// Return a human-readable version of bytes
+// ex: 1048576 -> 1.00 GB
+extern std::string BytesToHumanString(uint64_t bytes);
+
+// Return a human-readable version of unix time
+// ex: 1562116015 -> "Tue Jul  2 18:06:55 2019"
+extern std::string TimeToHumanString(int unixtime);
+
+// Append a human-readable time in micros.
+int AppendHumanMicros(uint64_t micros, char* output, int len,
+                      bool fixed_format);
+
+// Append a human-readable size in bytes
+int AppendHumanBytes(uint64_t bytes, char* output, int len);
+
+// Return a human-readable version of "value".
+// Escapes any non-printable characters found in "value".
+extern std::string EscapeString(const Slice& value);
+
+// Parse a human-readable number from "*in" into *value.  On success,
+// advances "*in" past the consumed number and sets "*val" to the
+// numeric value.  Otherwise, returns false and leaves *in in an
+// unspecified state.
+extern bool ConsumeDecimalNumber(Slice* in, uint64_t* val);
+
+// Returns true if the input char "c" is considered as a special character
+// that will be escaped when EscapeOptionString() is called.
+//
+// @param c the input char
+// @return true if the input char "c" is considered as a special character.
+// @see EscapeOptionString
+bool isSpecialChar(const char c);
+
+// If the input char is an escaped char, it will return the its
+// associated raw-char.  Otherwise, the function will simply return
+// the original input char.
+char UnescapeChar(const char c);
+
+// If the input char is a control char, it will return the its
+// associated escaped char.  Otherwise, the function will simply return
+// the original input char.
+char EscapeChar(const char c);
+
+// Converts a raw string to an escaped string.  Escaped-characters are
+// defined via the isSpecialChar() function.  When a char in the input
+// string "raw_string" is classified as a special characters, then it
+// will be prefixed by '\' in the output.
+//
+// It's inverse function is UnescapeOptionString().
+// @param raw_string the input string
+// @return the '\' escaped string of the input "raw_string"
+// @see isSpecialChar, UnescapeOptionString
+std::string EscapeOptionString(const std::string& raw_string);
+
+// The inverse function of EscapeOptionString.  It converts
+// an '\' escaped string back to a raw string.
+//
+// @param escaped_string the input '\' escaped string
+// @return the raw string of the input "escaped_string"
+std::string UnescapeOptionString(const std::string& escaped_string);
+
+std::string trim(const std::string& str);
+
+// Returns true if "string" ends with "pattern"
+bool EndsWith(const std::string& string, const std::string& pattern);
+
+// Returns true if "string" starts with "pattern"
+bool StartsWith(const std::string& string, const std::string& pattern);
+
+#ifndef ROCKSDB_LITE
+bool ParseBoolean(const std::string& type, const std::string& value);
+
+uint8_t ParseUint8(const std::string& value);
+
+uint32_t ParseUint32(const std::string& value);
+
+int32_t ParseInt32(const std::string& value);
+#endif
+
+uint64_t ParseUint64(const std::string& value);
+
+int ParseInt(const std::string& value);
+
+int64_t ParseInt64(const std::string& value);
+
+double ParseDouble(const std::string& value);
+
+size_t ParseSizeT(const std::string& value);
+
+std::vector<int> ParseVectorInt(const std::string& value);
+
+bool SerializeIntVector(const std::vector<int>& vec, std::string* value);
+
+extern const std::string kNullptrString;
+
+// errnoStr() function returns a string that describes the error code passed in
+// the argument err
+extern std::string errnoStr(int err);
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/thread_guard.h b/src/rocksdb/util/thread_guard.h
new file mode 100644
index 000000000..b2bb06a1b
--- /dev/null
+++ b/src/rocksdb/util/thread_guard.h
@@ -0,0 +1,41 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include "port/port.h"
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Resource management object for threads that joins the thread upon
+// destruction. Has unique ownership of the thread object, so copying it is not
+// allowed, while moving it transfers ownership.
+class ThreadGuard {
+ public:
+  ThreadGuard() = default;
+
+  explicit ThreadGuard(port::Thread&& thread) : thread_(std::move(thread)) {}
+
+  ThreadGuard(const ThreadGuard&) = delete;
+  ThreadGuard& operator=(const ThreadGuard&) = delete;
+
+  ThreadGuard(ThreadGuard&&) noexcept = default;
+  ThreadGuard& operator=(ThreadGuard&&) noexcept = default;
+
+  ~ThreadGuard() {
+    if (thread_.joinable()) {
+      thread_.join();
+    }
+  }
+
+  const port::Thread& GetThread() const { return thread_; }
+  port::Thread& GetThread() { return thread_; }
+
+ private:
+  port::Thread thread_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/thread_list_test.cc b/src/rocksdb/util/thread_list_test.cc
new file mode 100644
index 000000000..af4e62355
--- /dev/null
+++ b/src/rocksdb/util/thread_list_test.cc
@@ -0,0 +1,360 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include <condition_variable>
+#include <mutex>
+
+#include "monitoring/thread_status_updater.h"
+#include "rocksdb/db.h"
+#include "test_util/testharness.h"
+
+#ifdef ROCKSDB_USING_THREAD_STATUS
+
+namespace ROCKSDB_NAMESPACE {
+
+class SimulatedBackgroundTask {
+ public:
+  SimulatedBackgroundTask(
+      const void* db_key, const std::string& db_name, const void* cf_key,
+      const std::string& cf_name,
+      const ThreadStatus::OperationType operation_type =
+          ThreadStatus::OP_UNKNOWN,
+      const ThreadStatus::StateType state_type = ThreadStatus::STATE_UNKNOWN)
+      : db_key_(db_key),
+        db_name_(db_name),
+        cf_key_(cf_key),
+        cf_name_(cf_name),
+        operation_type_(operation_type),
+        state_type_(state_type),
+        should_run_(true),
+        running_count_(0) {
+    Env::Default()->GetThreadStatusUpdater()->NewColumnFamilyInfo(
+        db_key_, db_name_, cf_key_, cf_name_);
+  }
+
+  ~SimulatedBackgroundTask() {
+    Env::Default()->GetThreadStatusUpdater()->EraseDatabaseInfo(db_key_);
+  }
+
+  void Run() {
+    std::unique_lock<std::mutex> l(mutex_);
+    running_count_++;
+    bg_cv_.notify_all();
+    Env::Default()->GetThreadStatusUpdater()->SetColumnFamilyInfoKey(cf_key_);
+    Env::Default()->GetThreadStatusUpdater()->SetThreadOperation(
+        operation_type_);
+    Env::Default()->GetThreadStatusUpdater()->SetThreadState(state_type_);
+    while (should_run_) {
+      bg_cv_.wait(l);
+    }
+    Env::Default()->GetThreadStatusUpdater()->ClearThreadState();
+    Env::Default()->GetThreadStatusUpdater()->ClearThreadOperation();
+    Env::Default()->GetThreadStatusUpdater()->SetColumnFamilyInfoKey(nullptr);
+    running_count_--;
+    bg_cv_.notify_all();
+  }
+
+  void FinishAllTasks() {
+    std::unique_lock<std::mutex> l(mutex_);
+    should_run_ = false;
+    bg_cv_.notify_all();
+  }
+
+  void WaitUntilScheduled(int job_count) {
+    std::unique_lock<std::mutex> l(mutex_);
+    while (running_count_ < job_count) {
+      bg_cv_.wait(l);
+    }
+  }
+
+  void WaitUntilDone() {
+    std::unique_lock<std::mutex> l(mutex_);
+    while (running_count_ > 0) {
+      bg_cv_.wait(l);
+    }
+  }
+
+  static void DoSimulatedTask(void* arg) {
+    reinterpret_cast<SimulatedBackgroundTask*>(arg)->Run();
+  }
+
+ private:
+  const void* db_key_;
+  const std::string db_name_;
+  const void* cf_key_;
+  const std::string cf_name_;
+  const ThreadStatus::OperationType operation_type_;
+  const ThreadStatus::StateType state_type_;
+  std::mutex mutex_;
+  std::condition_variable bg_cv_;
+  bool should_run_;
+  std::atomic<int> running_count_;
+};
+
+class ThreadListTest : public testing::Test {
+ public:
+  ThreadListTest() {}
+};
+
+TEST_F(ThreadListTest, GlobalTables) {
+  // verify the global tables for operations and states are properly indexed.
+  for (int type = 0; type != ThreadStatus::NUM_OP_TYPES; ++type) {
+    ASSERT_EQ(global_operation_table[type].type, type);
+    ASSERT_EQ(
+        global_operation_table[type].name,
+        ThreadStatus::GetOperationName(ThreadStatus::OperationType(type)));
+  }
+
+  for (int type = 0; type != ThreadStatus::NUM_STATE_TYPES; ++type) {
+    ASSERT_EQ(global_state_table[type].type, type);
+    ASSERT_EQ(global_state_table[type].name,
+              ThreadStatus::GetStateName(ThreadStatus::StateType(type)));
+  }
+
+  for (int stage = 0; stage != ThreadStatus::NUM_OP_STAGES; ++stage) {
+    ASSERT_EQ(global_op_stage_table[stage].stage, stage);
+    ASSERT_EQ(global_op_stage_table[stage].name,
+              ThreadStatus::GetOperationStageName(
+                  ThreadStatus::OperationStage(stage)));
+  }
+}
+
+TEST_F(ThreadListTest, SimpleColumnFamilyInfoTest) {
+  Env* env = Env::Default();
+  const int kHighPriorityThreads = 3;
+  const int kLowPriorityThreads = 5;
+  const int kSimulatedHighPriThreads = kHighPriorityThreads - 1;
+  const int kSimulatedLowPriThreads = kLowPriorityThreads / 3;
+  const int kDelayMicros = 1000000;
+  env->SetBackgroundThreads(kHighPriorityThreads, Env::HIGH);
+  env->SetBackgroundThreads(kLowPriorityThreads, Env::LOW);
+  // Wait 1 second so that threads start
+  Env::Default()->SleepForMicroseconds(kDelayMicros);
+  SimulatedBackgroundTask running_task(reinterpret_cast<void*>(1234), "running",
+                                       reinterpret_cast<void*>(5678),
+                                       "pikachu");
+
+  for (int test = 0; test < kSimulatedHighPriThreads; ++test) {
+    env->Schedule(&SimulatedBackgroundTask::DoSimulatedTask, &running_task,
+                  Env::Priority::HIGH);
+  }
+
+  for (int test = 0; test < kSimulatedLowPriThreads; ++test) {
+    env->Schedule(&SimulatedBackgroundTask::DoSimulatedTask, &running_task,
+                  Env::Priority::LOW);
+  }
+  running_task.WaitUntilScheduled(kSimulatedHighPriThreads +
+                                  kSimulatedLowPriThreads);
+  // We can only reserve limited number of waiting threads
+  ASSERT_EQ(kHighPriorityThreads - kSimulatedHighPriThreads,
+            env->ReserveThreads(kHighPriorityThreads, Env::Priority::HIGH));
+  ASSERT_EQ(kLowPriorityThreads - kSimulatedLowPriThreads,
+            env->ReserveThreads(kLowPriorityThreads, Env::Priority::LOW));
+
+  // Reservation shall not affect the existing thread list
+  std::vector<ThreadStatus> thread_list;
+
+  // Verify the number of running threads in each pool.
+  ASSERT_OK(env->GetThreadList(&thread_list));
+  int running_count[ThreadStatus::NUM_THREAD_TYPES] = {0};
+  for (auto thread_status : thread_list) {
+    if (thread_status.cf_name == "pikachu" &&
+        thread_status.db_name == "running") {
+      running_count[thread_status.thread_type]++;
+    }
+  }
+  // Cannot reserve more threads
+  ASSERT_EQ(0, env->ReserveThreads(kHighPriorityThreads, Env::Priority::HIGH));
+  ASSERT_EQ(0, env->ReserveThreads(kLowPriorityThreads, Env::Priority::LOW));
+
+  ASSERT_EQ(running_count[ThreadStatus::HIGH_PRIORITY],
+            kSimulatedHighPriThreads);
+  ASSERT_EQ(running_count[ThreadStatus::LOW_PRIORITY], kSimulatedLowPriThreads);
+  ASSERT_EQ(running_count[ThreadStatus::USER], 0);
+
+  running_task.FinishAllTasks();
+  running_task.WaitUntilDone();
+
+  ASSERT_EQ(kHighPriorityThreads - kSimulatedHighPriThreads,
+            env->ReleaseThreads(kHighPriorityThreads, Env::Priority::HIGH));
+  ASSERT_EQ(kLowPriorityThreads - kSimulatedLowPriThreads,
+            env->ReleaseThreads(kLowPriorityThreads, Env::Priority::LOW));
+  // Verify none of the threads are running
+  ASSERT_OK(env->GetThreadList(&thread_list));
+
+  for (int i = 0; i < ThreadStatus::NUM_THREAD_TYPES; ++i) {
+    running_count[i] = 0;
+  }
+  for (auto thread_status : thread_list) {
+    if (thread_status.cf_name == "pikachu" &&
+        thread_status.db_name == "running") {
+      running_count[thread_status.thread_type]++;
+    }
+  }
+
+  ASSERT_EQ(running_count[ThreadStatus::HIGH_PRIORITY], 0);
+  ASSERT_EQ(running_count[ThreadStatus::LOW_PRIORITY], 0);
+  ASSERT_EQ(running_count[ThreadStatus::USER], 0);
+}
+
+namespace {
+void UpdateStatusCounts(const std::vector<ThreadStatus>& thread_list,
+                        int operation_counts[], int state_counts[]) {
+  for (auto thread_status : thread_list) {
+    operation_counts[thread_status.operation_type]++;
+    state_counts[thread_status.state_type]++;
+  }
+}
+
+void VerifyAndResetCounts(const int correct_counts[], int collected_counts[],
+                          int size) {
+  for (int i = 0; i < size; ++i) {
+    ASSERT_EQ(collected_counts[i], correct_counts[i]);
+    collected_counts[i] = 0;
+  }
+}
+
+void UpdateCount(int operation_counts[], int from_event, int to_event,
+                 int amount) {
+  operation_counts[from_event] -= amount;
+  operation_counts[to_event] += amount;
+}
+}  // namespace
+
+TEST_F(ThreadListTest, SimpleEventTest) {
+  Env* env = Env::Default();
+
+  // simulated tasks
+  const int kFlushWriteTasks = 3;
+  SimulatedBackgroundTask flush_write_task(
+      reinterpret_cast<void*>(1234), "running", reinterpret_cast<void*>(5678),
+      "pikachu", ThreadStatus::OP_FLUSH);
+
+  const int kCompactionWriteTasks = 4;
+  SimulatedBackgroundTask compaction_write_task(
+      reinterpret_cast<void*>(1234), "running", reinterpret_cast<void*>(5678),
+      "pikachu", ThreadStatus::OP_COMPACTION);
+
+  const int kCompactionReadTasks = 5;
+  SimulatedBackgroundTask compaction_read_task(
+      reinterpret_cast<void*>(1234), "running", reinterpret_cast<void*>(5678),
+      "pikachu", ThreadStatus::OP_COMPACTION);
+
+  const int kCompactionWaitTasks = 6;
+  SimulatedBackgroundTask compaction_wait_task(
+      reinterpret_cast<void*>(1234), "running", reinterpret_cast<void*>(5678),
+      "pikachu", ThreadStatus::OP_COMPACTION);
+
+  // setup right answers
+  int correct_operation_counts[ThreadStatus::NUM_OP_TYPES] = {0};
+  correct_operation_counts[ThreadStatus::OP_FLUSH] = kFlushWriteTasks;
+  correct_operation_counts[ThreadStatus::OP_COMPACTION] =
+      kCompactionWriteTasks + kCompactionReadTasks + kCompactionWaitTasks;
+
+  env->SetBackgroundThreads(correct_operation_counts[ThreadStatus::OP_FLUSH],
+                            Env::HIGH);
+  env->SetBackgroundThreads(
+      correct_operation_counts[ThreadStatus::OP_COMPACTION], Env::LOW);
+
+  // schedule the simulated tasks
+  for (int t = 0; t < kFlushWriteTasks; ++t) {
+    env->Schedule(&SimulatedBackgroundTask::DoSimulatedTask, &flush_write_task,
+                  Env::Priority::HIGH);
+  }
+  flush_write_task.WaitUntilScheduled(kFlushWriteTasks);
+
+  for (int t = 0; t < kCompactionWriteTasks; ++t) {
+    env->Schedule(&SimulatedBackgroundTask::DoSimulatedTask,
+                  &compaction_write_task, Env::Priority::LOW);
+  }
+  compaction_write_task.WaitUntilScheduled(kCompactionWriteTasks);
+
+  for (int t = 0; t < kCompactionReadTasks; ++t) {
+    env->Schedule(&SimulatedBackgroundTask::DoSimulatedTask,
+                  &compaction_read_task, Env::Priority::LOW);
+  }
+  compaction_read_task.WaitUntilScheduled(kCompactionReadTasks);
+
+  for (int t = 0; t < kCompactionWaitTasks; ++t) {
+    env->Schedule(&SimulatedBackgroundTask::DoSimulatedTask,
+                  &compaction_wait_task, Env::Priority::LOW);
+  }
+  compaction_wait_task.WaitUntilScheduled(kCompactionWaitTasks);
+
+  // verify the thread-status
+  int operation_counts[ThreadStatus::NUM_OP_TYPES] = {0};
+  int state_counts[ThreadStatus::NUM_STATE_TYPES] = {0};
+
+  std::vector<ThreadStatus> thread_list;
+  ASSERT_OK(env->GetThreadList(&thread_list));
+  UpdateStatusCounts(thread_list, operation_counts, state_counts);
+  VerifyAndResetCounts(correct_operation_counts, operation_counts,
+                       ThreadStatus::NUM_OP_TYPES);
+
+  // terminate compaction-wait tasks and see if the thread-status
+  // reflects this update
+  compaction_wait_task.FinishAllTasks();
+  compaction_wait_task.WaitUntilDone();
+  UpdateCount(correct_operation_counts, ThreadStatus::OP_COMPACTION,
+              ThreadStatus::OP_UNKNOWN, kCompactionWaitTasks);
+
+  ASSERT_OK(env->GetThreadList(&thread_list));
+  UpdateStatusCounts(thread_list, operation_counts, state_counts);
+  VerifyAndResetCounts(correct_operation_counts, operation_counts,
+                       ThreadStatus::NUM_OP_TYPES);
+
+  // terminate flush-write tasks and see if the thread-status
+  // reflects this update
+  flush_write_task.FinishAllTasks();
+  flush_write_task.WaitUntilDone();
+  UpdateCount(correct_operation_counts, ThreadStatus::OP_FLUSH,
+              ThreadStatus::OP_UNKNOWN, kFlushWriteTasks);
+
+  ASSERT_OK(env->GetThreadList(&thread_list));
+  UpdateStatusCounts(thread_list, operation_counts, state_counts);
+  VerifyAndResetCounts(correct_operation_counts, operation_counts,
+                       ThreadStatus::NUM_OP_TYPES);
+
+  // terminate compaction-write tasks and see if the thread-status
+  // reflects this update
+  compaction_write_task.FinishAllTasks();
+  compaction_write_task.WaitUntilDone();
+  UpdateCount(correct_operation_counts, ThreadStatus::OP_COMPACTION,
+              ThreadStatus::OP_UNKNOWN, kCompactionWriteTasks);
+
+  ASSERT_OK(env->GetThreadList(&thread_list));
+  UpdateStatusCounts(thread_list, operation_counts, state_counts);
+  VerifyAndResetCounts(correct_operation_counts, operation_counts,
+                       ThreadStatus::NUM_OP_TYPES);
+
+  // terminate compaction-write tasks and see if the thread-status
+  // reflects this update
+  compaction_read_task.FinishAllTasks();
+  compaction_read_task.WaitUntilDone();
+  UpdateCount(correct_operation_counts, ThreadStatus::OP_COMPACTION,
+              ThreadStatus::OP_UNKNOWN, kCompactionReadTasks);
+
+  ASSERT_OK(env->GetThreadList(&thread_list));
+  UpdateStatusCounts(thread_list, operation_counts, state_counts);
+  VerifyAndResetCounts(correct_operation_counts, operation_counts,
+                       ThreadStatus::NUM_OP_TYPES);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
+int main(int argc, char** argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
+
+#else
+
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return 0;
+}
+
+#endif  // ROCKSDB_USING_THREAD_STATUS
diff --git a/src/rocksdb/util/thread_local.cc b/src/rocksdb/util/thread_local.cc
new file mode 100644
index 000000000..969639d9b
--- /dev/null
+++ b/src/rocksdb/util/thread_local.cc
@@ -0,0 +1,521 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/thread_local.h"
+
+#include <stdlib.h>
+
+#include "port/likely.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct Entry {
+  Entry() : ptr(nullptr) {}
+  Entry(const Entry& e) : ptr(e.ptr.load(std::memory_order_relaxed)) {}
+  std::atomic<void*> ptr;
+};
+
+class StaticMeta;
+
+// This is the structure that is declared as "thread_local" storage.
+// The vector keep list of atomic pointer for all instances for "current"
+// thread. The vector is indexed by an Id that is unique in process and
+// associated with one ThreadLocalPtr instance. The Id is assigned by a
+// global StaticMeta singleton. So if we instantiated 3 ThreadLocalPtr
+// instances, each thread will have a ThreadData with a vector of size 3:
+//     ---------------------------------------------------
+//     |          | instance 1 | instance 2 | instance 3 |
+//     ---------------------------------------------------
+//     | thread 1 |    void*   |    void*   |    void*   | <- ThreadData
+//     ---------------------------------------------------
+//     | thread 2 |    void*   |    void*   |    void*   | <- ThreadData
+//     ---------------------------------------------------
+//     | thread 3 |    void*   |    void*   |    void*   | <- ThreadData
+//     ---------------------------------------------------
+struct ThreadData {
+  explicit ThreadData(ThreadLocalPtr::StaticMeta* _inst)
+      : entries(), next(nullptr), prev(nullptr), inst(_inst) {}
+  std::vector<Entry> entries;
+  ThreadData* next;
+  ThreadData* prev;
+  ThreadLocalPtr::StaticMeta* inst;
+};
+
+class ThreadLocalPtr::StaticMeta {
+ public:
+  StaticMeta();
+
+  // Return the next available Id
+  uint32_t GetId();
+  // Return the next available Id without claiming it
+  uint32_t PeekId() const;
+  // Return the given Id back to the free pool. This also triggers
+  // UnrefHandler for associated pointer value (if not NULL) for all threads.
+  void ReclaimId(uint32_t id);
+
+  // Return the pointer value for the given id for the current thread.
+  void* Get(uint32_t id) const;
+  // Reset the pointer value for the given id for the current thread.
+  void Reset(uint32_t id, void* ptr);
+  // Atomically swap the supplied ptr and return the previous value
+  void* Swap(uint32_t id, void* ptr);
+  // Atomically compare and swap the provided value only if it equals
+  // to expected value.
+  bool CompareAndSwap(uint32_t id, void* ptr, void*& expected);
+  // Reset all thread local data to replacement, and return non-nullptr
+  // data for all existing threads
+  void Scrape(uint32_t id, autovector<void*>* ptrs, void* const replacement);
+  // Update res by applying func on each thread-local value. Holds a lock that
+  // prevents unref handler from running during this call, but clients must
+  // still provide external synchronization since the owning thread can
+  // access the values without internal locking, e.g., via Get() and Reset().
+  void Fold(uint32_t id, FoldFunc func, void* res);
+
+  // Register the UnrefHandler for id
+  void SetHandler(uint32_t id, UnrefHandler handler);
+
+  // protect inst, next_instance_id_, free_instance_ids_, head_,
+  // ThreadData.entries
+  //
+  // Note that here we prefer function static variable instead of the usual
+  // global static variable.  The reason is that c++ destruction order of
+  // static variables in the reverse order of their construction order.
+  // However, C++ does not guarantee any construction order when global
+  // static variables are defined in different files, while the function
+  // static variables are initialized when their function are first called.
+  // As a result, the construction order of the function static variables
+  // can be controlled by properly invoke their first function calls in
+  // the right order.
+  //
+  // For instance, the following function contains a function static
+  // variable.  We place a dummy function call of this inside
+  // Env::Default() to ensure the construction order of the construction
+  // order.
+  static port::Mutex* Mutex();
+
+  // Returns the member mutex of the current StaticMeta.  In general,
+  // Mutex() should be used instead of this one.  However, in case where
+  // the static variable inside Instance() goes out of scope, MemberMutex()
+  // should be used.  One example is OnThreadExit() function.
+  port::Mutex* MemberMutex() { return &mutex_; }
+
+ private:
+  // Get UnrefHandler for id with acquiring mutex
+  // REQUIRES: mutex locked
+  UnrefHandler GetHandler(uint32_t id);
+
+  // Triggered before a thread terminates
+  static void OnThreadExit(void* ptr);
+
+  // Add current thread's ThreadData to the global chain
+  // REQUIRES: mutex locked
+  void AddThreadData(ThreadData* d);
+
+  // Remove current thread's ThreadData from the global chain
+  // REQUIRES: mutex locked
+  void RemoveThreadData(ThreadData* d);
+
+  static ThreadData* GetThreadLocal();
+
+  uint32_t next_instance_id_;
+  // Used to recycle Ids in case ThreadLocalPtr is instantiated and destroyed
+  // frequently. This also prevents it from blowing up the vector space.
+  autovector<uint32_t> free_instance_ids_;
+  // Chain all thread local structure together. This is necessary since
+  // when one ThreadLocalPtr gets destroyed, we need to loop over each
+  // thread's version of pointer corresponding to that instance and
+  // call UnrefHandler for it.
+  ThreadData head_;
+
+  std::unordered_map<uint32_t, UnrefHandler> handler_map_;
+
+  // The private mutex.  Developers should always use Mutex() instead of
+  // using this variable directly.
+  port::Mutex mutex_;
+  // Thread local storage
+  static thread_local ThreadData* tls_;
+
+  // Used to make thread exit trigger possible if !defined(OS_MACOSX).
+  // Otherwise, used to retrieve thread data.
+  pthread_key_t pthread_key_;
+};
+
+thread_local ThreadData* ThreadLocalPtr::StaticMeta::tls_ = nullptr;
+
+// Windows doesn't support a per-thread destructor with its
+// TLS primitives.  So, we build it manually by inserting a
+// function to be called on each thread's exit.
+// See http://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
+// and http://www.nynaeve.net/?p=183
+//
+// really we do this to have clear conscience since using TLS with thread-pools
+// is iffy
+// although OK within a request. But otherwise, threads have no identity in its
+// modern use.
+
+// This runs on windows only called from the System Loader
+#ifdef OS_WIN
+
+// Windows cleanup routine is invoked from a System Loader with a different
+// signature so we can not directly hookup the original OnThreadExit which is
+// private member
+// so we make StaticMeta class share with the us the address of the function so
+// we can invoke it.
+namespace wintlscleanup {
+
+// This is set to OnThreadExit in StaticMeta singleton constructor
+UnrefHandler thread_local_inclass_routine = nullptr;
+pthread_key_t thread_local_key = pthread_key_t(-1);
+
+// Static callback function to call with each thread termination.
+void NTAPI WinOnThreadExit(PVOID module, DWORD reason, PVOID reserved) {
+  // We decided to punt on PROCESS_EXIT
+  if (DLL_THREAD_DETACH == reason) {
+    if (thread_local_key != pthread_key_t(-1) &&
+        thread_local_inclass_routine != nullptr) {
+      void* tls = TlsGetValue(thread_local_key);
+      if (tls != nullptr) {
+        thread_local_inclass_routine(tls);
+      }
+    }
+  }
+}
+
+}  // namespace wintlscleanup
+
+// extern "C" suppresses C++ name mangling so we know the symbol name for the
+// linker /INCLUDE:symbol pragma above.
+extern "C" {
+
+#ifdef _MSC_VER
+// The linker must not discard thread_callback_on_exit.  (We force a reference
+// to this variable with a linker /include:symbol pragma to ensure that.) If
+// this variable is discarded, the OnThreadExit function will never be called.
+#ifndef _X86_
+
+// .CRT section is merged with .rdata on x64 so it must be constant data.
+#pragma const_seg(".CRT$XLB")
+// When defining a const variable, it must have external linkage to be sure the
+// linker doesn't discard it.
+extern const PIMAGE_TLS_CALLBACK p_thread_callback_on_exit;
+const PIMAGE_TLS_CALLBACK p_thread_callback_on_exit =
+    wintlscleanup::WinOnThreadExit;
+// Reset the default section.
+#pragma const_seg()
+
+#pragma comment(linker, "/include:_tls_used")
+#pragma comment(linker, "/include:p_thread_callback_on_exit")
+
+#else  // _X86_
+
+#pragma data_seg(".CRT$XLB")
+PIMAGE_TLS_CALLBACK p_thread_callback_on_exit = wintlscleanup::WinOnThreadExit;
+// Reset the default section.
+#pragma data_seg()
+
+#pragma comment(linker, "/INCLUDE:__tls_used")
+#pragma comment(linker, "/INCLUDE:_p_thread_callback_on_exit")
+
+#endif  // _X86_
+
+#else
+// https://github.com/couchbase/gperftools/blob/master/src/windows/port.cc
+BOOL WINAPI DllMain(HINSTANCE h, DWORD dwReason, PVOID pv) {
+  if (dwReason == DLL_THREAD_DETACH)
+    wintlscleanup::WinOnThreadExit(h, dwReason, pv);
+  return TRUE;
+}
+#endif
+}  // extern "C"
+
+#endif  // OS_WIN
+
+void ThreadLocalPtr::InitSingletons() { ThreadLocalPtr::Instance(); }
+
+ThreadLocalPtr::StaticMeta* ThreadLocalPtr::Instance() {
+  // Here we prefer function static variable instead of global
+  // static variable as function static variable is initialized
+  // when the function is first call.  As a result, we can properly
+  // control their construction order by properly preparing their
+  // first function call.
+  //
+  // Note that here we decide to make "inst" a static pointer w/o deleting
+  // it at the end instead of a static variable.  This is to avoid the following
+  // destruction order disaster happens when a child thread using ThreadLocalPtr
+  // dies AFTER the main thread dies:  When a child thread happens to use
+  // ThreadLocalPtr, it will try to delete its thread-local data on its
+  // OnThreadExit when the child thread dies.  However, OnThreadExit depends
+  // on the following variable.  As a result, if the main thread dies before any
+  // child thread happen to use ThreadLocalPtr dies, then the destruction of
+  // the following variable will go first, then OnThreadExit, therefore causing
+  // invalid access.
+  //
+  // The above problem can be solved by using thread_local to store tls_.
+  // thread_local supports dynamic construction and destruction of
+  // non-primitive typed variables.  As a result, we can guarantee the
+  // destruction order even when the main thread dies before any child threads.
+  static ThreadLocalPtr::StaticMeta* inst = new ThreadLocalPtr::StaticMeta();
+  return inst;
+}
+
+port::Mutex* ThreadLocalPtr::StaticMeta::Mutex() { return &Instance()->mutex_; }
+
+void ThreadLocalPtr::StaticMeta::OnThreadExit(void* ptr) {
+  auto* tls = static_cast<ThreadData*>(ptr);
+  assert(tls != nullptr);
+
+  // Use the cached StaticMeta::Instance() instead of directly calling
+  // the variable inside StaticMeta::Instance() might already go out of
+  // scope here in case this OnThreadExit is called after the main thread
+  // dies.
+  auto* inst = tls->inst;
+  pthread_setspecific(inst->pthread_key_, nullptr);
+
+  MutexLock l(inst->MemberMutex());
+  inst->RemoveThreadData(tls);
+  // Unref stored pointers of current thread from all instances
+  uint32_t id = 0;
+  for (auto& e : tls->entries) {
+    void* raw = e.ptr.load();
+    if (raw != nullptr) {
+      auto unref = inst->GetHandler(id);
+      if (unref != nullptr) {
+        unref(raw);
+      }
+    }
+    ++id;
+  }
+  // Delete thread local structure no matter if it is Mac platform
+  delete tls;
+}
+
+ThreadLocalPtr::StaticMeta::StaticMeta()
+    : next_instance_id_(0), head_(this), pthread_key_(0) {
+  if (pthread_key_create(&pthread_key_, &OnThreadExit) != 0) {
+    abort();
+  }
+
+  // OnThreadExit is not getting called on the main thread.
+  // Call through the static destructor mechanism to avoid memory leak.
+  //
+  // Caveats: ~A() will be invoked _after_ ~StaticMeta for the global
+  // singleton (destructors are invoked in reverse order of constructor
+  // _completion_); the latter must not mutate internal members. This
+  // cleanup mechanism inherently relies on use-after-release of the
+  // StaticMeta, and is brittle with respect to compiler-specific handling
+  // of memory backing destructed statically-scoped objects. Perhaps
+  // registering with atexit(3) would be more robust.
+  //
+// This is not required on Windows.
+#if !defined(OS_WIN)
+  static struct A {
+    ~A() {
+      if (tls_) {
+        OnThreadExit(tls_);
+      }
+    }
+  } a;
+#endif  // !defined(OS_WIN)
+
+  head_.next = &head_;
+  head_.prev = &head_;
+
+#ifdef OS_WIN
+  // Share with Windows its cleanup routine and the key
+  wintlscleanup::thread_local_inclass_routine = OnThreadExit;
+  wintlscleanup::thread_local_key = pthread_key_;
+#endif
+}
+
+void ThreadLocalPtr::StaticMeta::AddThreadData(ThreadData* d) {
+  Mutex()->AssertHeld();
+  d->next = &head_;
+  d->prev = head_.prev;
+  head_.prev->next = d;
+  head_.prev = d;
+}
+
+void ThreadLocalPtr::StaticMeta::RemoveThreadData(ThreadData* d) {
+  Mutex()->AssertHeld();
+  d->next->prev = d->prev;
+  d->prev->next = d->next;
+  d->next = d->prev = d;
+}
+
+ThreadData* ThreadLocalPtr::StaticMeta::GetThreadLocal() {
+  if (UNLIKELY(tls_ == nullptr)) {
+    auto* inst = Instance();
+    tls_ = new ThreadData(inst);
+    {
+      // Register it in the global chain, needs to be done before thread exit
+      // handler registration
+      MutexLock l(Mutex());
+      inst->AddThreadData(tls_);
+    }
+    // Even it is not OS_MACOSX, need to register value for pthread_key_ so that
+    // its exit handler will be triggered.
+    if (pthread_setspecific(inst->pthread_key_, tls_) != 0) {
+      {
+        MutexLock l(Mutex());
+        inst->RemoveThreadData(tls_);
+      }
+      delete tls_;
+      abort();
+    }
+  }
+  return tls_;
+}
+
+void* ThreadLocalPtr::StaticMeta::Get(uint32_t id) const {
+  auto* tls = GetThreadLocal();
+  if (UNLIKELY(id >= tls->entries.size())) {
+    return nullptr;
+  }
+  return tls->entries[id].ptr.load(std::memory_order_acquire);
+}
+
+void ThreadLocalPtr::StaticMeta::Reset(uint32_t id, void* ptr) {
+  auto* tls = GetThreadLocal();
+  if (UNLIKELY(id >= tls->entries.size())) {
+    // Need mutex to protect entries access within ReclaimId
+    MutexLock l(Mutex());
+    tls->entries.resize(id + 1);
+  }
+  tls->entries[id].ptr.store(ptr, std::memory_order_release);
+}
+
+void* ThreadLocalPtr::StaticMeta::Swap(uint32_t id, void* ptr) {
+  auto* tls = GetThreadLocal();
+  if (UNLIKELY(id >= tls->entries.size())) {
+    // Need mutex to protect entries access within ReclaimId
+    MutexLock l(Mutex());
+    tls->entries.resize(id + 1);
+  }
+  return tls->entries[id].ptr.exchange(ptr, std::memory_order_acquire);
+}
+
+bool ThreadLocalPtr::StaticMeta::CompareAndSwap(uint32_t id, void* ptr,
+                                                void*& expected) {
+  auto* tls = GetThreadLocal();
+  if (UNLIKELY(id >= tls->entries.size())) {
+    // Need mutex to protect entries access within ReclaimId
+    MutexLock l(Mutex());
+    tls->entries.resize(id + 1);
+  }
+  return tls->entries[id].ptr.compare_exchange_strong(
+      expected, ptr, std::memory_order_release, std::memory_order_relaxed);
+}
+
+void ThreadLocalPtr::StaticMeta::Scrape(uint32_t id, autovector<void*>* ptrs,
+                                        void* const replacement) {
+  MutexLock l(Mutex());
+  for (ThreadData* t = head_.next; t != &head_; t = t->next) {
+    if (id < t->entries.size()) {
+      void* ptr =
+          t->entries[id].ptr.exchange(replacement, std::memory_order_acquire);
+      if (ptr != nullptr) {
+        ptrs->push_back(ptr);
+      }
+    }
+  }
+}
+
+void ThreadLocalPtr::StaticMeta::Fold(uint32_t id, FoldFunc func, void* res) {
+  MutexLock l(Mutex());
+  for (ThreadData* t = head_.next; t != &head_; t = t->next) {
+    if (id < t->entries.size()) {
+      void* ptr = t->entries[id].ptr.load();
+      if (ptr != nullptr) {
+        func(ptr, res);
+      }
+    }
+  }
+}
+
+uint32_t ThreadLocalPtr::TEST_PeekId() { return Instance()->PeekId(); }
+
+void ThreadLocalPtr::StaticMeta::SetHandler(uint32_t id, UnrefHandler handler) {
+  MutexLock l(Mutex());
+  handler_map_[id] = handler;
+}
+
+UnrefHandler ThreadLocalPtr::StaticMeta::GetHandler(uint32_t id) {
+  Mutex()->AssertHeld();
+  auto iter = handler_map_.find(id);
+  if (iter == handler_map_.end()) {
+    return nullptr;
+  }
+  return iter->second;
+}
+
+uint32_t ThreadLocalPtr::StaticMeta::GetId() {
+  MutexLock l(Mutex());
+  if (free_instance_ids_.empty()) {
+    return next_instance_id_++;
+  }
+
+  uint32_t id = free_instance_ids_.back();
+  free_instance_ids_.pop_back();
+  return id;
+}
+
+uint32_t ThreadLocalPtr::StaticMeta::PeekId() const {
+  MutexLock l(Mutex());
+  if (!free_instance_ids_.empty()) {
+    return free_instance_ids_.back();
+  }
+  return next_instance_id_;
+}
+
+void ThreadLocalPtr::StaticMeta::ReclaimId(uint32_t id) {
+  // This id is not used, go through all thread local data and release
+  // corresponding value
+  MutexLock l(Mutex());
+  auto unref = GetHandler(id);
+  for (ThreadData* t = head_.next; t != &head_; t = t->next) {
+    if (id < t->entries.size()) {
+      void* ptr = t->entries[id].ptr.exchange(nullptr);
+      if (ptr != nullptr && unref != nullptr) {
+        unref(ptr);
+      }
+    }
+  }
+  handler_map_[id] = nullptr;
+  free_instance_ids_.push_back(id);
+}
+
+ThreadLocalPtr::ThreadLocalPtr(UnrefHandler handler)
+    : id_(Instance()->GetId()) {
+  if (handler != nullptr) {
+    Instance()->SetHandler(id_, handler);
+  }
+}
+
+ThreadLocalPtr::~ThreadLocalPtr() { Instance()->ReclaimId(id_); }
+
+void* ThreadLocalPtr::Get() const { return Instance()->Get(id_); }
+
+void ThreadLocalPtr::Reset(void* ptr) { Instance()->Reset(id_, ptr); }
+
+void* ThreadLocalPtr::Swap(void* ptr) { return Instance()->Swap(id_, ptr); }
+
+bool ThreadLocalPtr::CompareAndSwap(void* ptr, void*& expected) {
+  return Instance()->CompareAndSwap(id_, ptr, expected);
+}
+
+void ThreadLocalPtr::Scrape(autovector<void*>* ptrs, void* const replacement) {
+  Instance()->Scrape(id_, ptrs, replacement);
+}
+
+void ThreadLocalPtr::Fold(FoldFunc func, void* res) {
+  Instance()->Fold(id_, func, res);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/thread_local.h b/src/rocksdb/util/thread_local.h
new file mode 100644
index 000000000..fde68f86f
--- /dev/null
+++ b/src/rocksdb/util/thread_local.h
@@ -0,0 +1,100 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include <atomic>
+#include <functional>
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include "port/port.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Cleanup function that will be called for a stored thread local
+// pointer (if not NULL) when one of the following happens:
+// (1) a thread terminates
+// (2) a ThreadLocalPtr is destroyed
+//
+// Warning: this function is called while holding a global mutex. The same mutex
+// is used (at least in some cases) by most methods of ThreadLocalPtr, and it's
+// shared across all instances of ThreadLocalPtr. Thereforere extra care
+// is needed to avoid deadlocks. In particular, the handler shouldn't lock any
+// mutexes and shouldn't call any methods of any ThreadLocalPtr instances,
+// unless you know what you're doing.
+using UnrefHandler = void (*)(void* ptr);
+
+// ThreadLocalPtr stores only values of pointer type.  Different from
+// the usual thread-local-storage, ThreadLocalPtr has the ability to
+// distinguish data coming from different threads and different
+// ThreadLocalPtr instances.  For example, if a regular thread_local
+// variable A is declared in DBImpl, two DBImpl objects would share
+// the same A.  However, a ThreadLocalPtr that is defined under the
+// scope of DBImpl can avoid such confliction.  As a result, its memory
+// usage would be O(# of threads * # of ThreadLocalPtr instances).
+class ThreadLocalPtr {
+ public:
+  explicit ThreadLocalPtr(UnrefHandler handler = nullptr);
+
+  ThreadLocalPtr(const ThreadLocalPtr&) = delete;
+  ThreadLocalPtr& operator=(const ThreadLocalPtr&) = delete;
+
+  ~ThreadLocalPtr();
+
+  // Return the current pointer stored in thread local
+  void* Get() const;
+
+  // Set a new pointer value to the thread local storage.
+  void Reset(void* ptr);
+
+  // Atomically swap the supplied ptr and return the previous value
+  void* Swap(void* ptr);
+
+  // Atomically compare the stored value with expected. Set the new
+  // pointer value to thread local only if the comparison is true.
+  // Otherwise, expected returns the stored value.
+  // Return true on success, false on failure
+  bool CompareAndSwap(void* ptr, void*& expected);
+
+  // Reset all thread local data to replacement, and return non-nullptr
+  // data for all existing threads
+  void Scrape(autovector<void*>* ptrs, void* const replacement);
+
+  using FoldFunc = std::function<void(void*, void*)>;
+  // Update res by applying func on each thread-local value. Holds a lock that
+  // prevents unref handler from running during this call, but clients must
+  // still provide external synchronization since the owning thread can
+  // access the values without internal locking, e.g., via Get() and Reset().
+  void Fold(FoldFunc func, void* res);
+
+  // Add here for testing
+  // Return the next available Id without claiming it
+  static uint32_t TEST_PeekId();
+
+  // Initialize the static singletons of the ThreadLocalPtr.
+  //
+  // If this function is not called, then the singletons will be
+  // automatically initialized when they are used.
+  //
+  // Calling this function twice or after the singletons have been
+  // initialized will be no-op.
+  static void InitSingletons();
+
+  class StaticMeta;
+
+ private:
+  static StaticMeta* Instance();
+
+  const uint32_t id_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/thread_local_test.cc b/src/rocksdb/util/thread_local_test.cc
new file mode 100644
index 000000000..25ef5c0ee
--- /dev/null
+++ b/src/rocksdb/util/thread_local_test.cc
@@ -0,0 +1,582 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "util/thread_local.h"
+
+#include <atomic>
+#include <string>
+#include <thread>
+
+#include "port/port.h"
+#include "rocksdb/env.h"
+#include "test_util/sync_point.h"
+#include "test_util/testharness.h"
+#include "test_util/testutil.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class ThreadLocalTest : public testing::Test {
+ public:
+  ThreadLocalTest() : env_(Env::Default()) {}
+
+  Env* env_;
+};
+
+namespace {
+
+struct Params {
+  Params(port::Mutex* m, port::CondVar* c, int* u, int n,
+         UnrefHandler handler = nullptr)
+      : mu(m),
+        cv(c),
+        unref(u),
+        total(n),
+        started(0),
+        completed(0),
+        doWrite(false),
+        tls1(handler),
+        tls2(nullptr) {}
+
+  port::Mutex* mu;
+  port::CondVar* cv;
+  int* unref;
+  int total;
+  int started;
+  int completed;
+  bool doWrite;
+  ThreadLocalPtr tls1;
+  ThreadLocalPtr* tls2;
+};
+
+class IDChecker : public ThreadLocalPtr {
+ public:
+  static uint32_t PeekId() { return TEST_PeekId(); }
+};
+
+}  // anonymous namespace
+
+// Suppress false positive clang analyzer warnings.
+#ifndef __clang_analyzer__
+TEST_F(ThreadLocalTest, UniqueIdTest) {
+  port::Mutex mu;
+  port::CondVar cv(&mu);
+
+  uint32_t base_id = IDChecker::PeekId();
+  // New ThreadLocal instance bumps id by 1
+  {
+    // Id used 0
+    Params p1(&mu, &cv, nullptr, 1u);
+    ASSERT_EQ(IDChecker::PeekId(), base_id + 1u);
+    // Id used 1
+    Params p2(&mu, &cv, nullptr, 1u);
+    ASSERT_EQ(IDChecker::PeekId(), base_id + 2u);
+    // Id used 2
+    Params p3(&mu, &cv, nullptr, 1u);
+    ASSERT_EQ(IDChecker::PeekId(), base_id + 3u);
+    // Id used 3
+    Params p4(&mu, &cv, nullptr, 1u);
+    ASSERT_EQ(IDChecker::PeekId(), base_id + 4u);
+  }
+  // id 3, 2, 1, 0 are in the free queue in order
+  ASSERT_EQ(IDChecker::PeekId(), base_id + 0u);
+
+  // pick up 0
+  Params p1(&mu, &cv, nullptr, 1u);
+  ASSERT_EQ(IDChecker::PeekId(), base_id + 1u);
+  // pick up 1
+  Params* p2 = new Params(&mu, &cv, nullptr, 1u);
+  ASSERT_EQ(IDChecker::PeekId(), base_id + 2u);
+  // pick up 2
+  Params p3(&mu, &cv, nullptr, 1u);
+  ASSERT_EQ(IDChecker::PeekId(), base_id + 3u);
+  // return up 1
+  delete p2;
+  ASSERT_EQ(IDChecker::PeekId(), base_id + 1u);
+  // Now we have 3, 1 in queue
+  // pick up 1
+  Params p4(&mu, &cv, nullptr, 1u);
+  ASSERT_EQ(IDChecker::PeekId(), base_id + 3u);
+  // pick up 3
+  Params p5(&mu, &cv, nullptr, 1u);
+  // next new id
+  ASSERT_EQ(IDChecker::PeekId(), base_id + 4u);
+  // After exit, id sequence in queue:
+  // 3, 1, 2, 0
+}
+#endif  // __clang_analyzer__
+
+TEST_F(ThreadLocalTest, SequentialReadWriteTest) {
+  // global id list carries over 3, 1, 2, 0
+  uint32_t base_id = IDChecker::PeekId();
+
+  port::Mutex mu;
+  port::CondVar cv(&mu);
+  Params p(&mu, &cv, nullptr, 1);
+  ThreadLocalPtr tls2;
+  p.tls2 = &tls2;
+
+  ASSERT_GT(IDChecker::PeekId(), base_id);
+  base_id = IDChecker::PeekId();
+
+  auto func = [](Params* ptr) {
+    Params& params = *ptr;
+    ASSERT_TRUE(params.tls1.Get() == nullptr);
+    params.tls1.Reset(reinterpret_cast<int*>(1));
+    ASSERT_TRUE(params.tls1.Get() == reinterpret_cast<int*>(1));
+    params.tls1.Reset(reinterpret_cast<int*>(2));
+    ASSERT_TRUE(params.tls1.Get() == reinterpret_cast<int*>(2));
+
+    ASSERT_TRUE(params.tls2->Get() == nullptr);
+    params.tls2->Reset(reinterpret_cast<int*>(1));
+    ASSERT_TRUE(params.tls2->Get() == reinterpret_cast<int*>(1));
+    params.tls2->Reset(reinterpret_cast<int*>(2));
+    ASSERT_TRUE(params.tls2->Get() == reinterpret_cast<int*>(2));
+
+    params.mu->Lock();
+    ++(params.completed);
+    params.cv->SignalAll();
+    params.mu->Unlock();
+  };
+
+  for (int iter = 0; iter < 1024; ++iter) {
+    ASSERT_EQ(IDChecker::PeekId(), base_id);
+    // Another new thread, read/write should not see value from previous thread
+    env_->StartThreadTyped(func, &p);
+
+    mu.Lock();
+    while (p.completed != iter + 1) {
+      cv.Wait();
+    }
+    mu.Unlock();
+    ASSERT_EQ(IDChecker::PeekId(), base_id);
+  }
+}
+
+TEST_F(ThreadLocalTest, ConcurrentReadWriteTest) {
+  // global id list carries over 3, 1, 2, 0
+  uint32_t base_id = IDChecker::PeekId();
+
+  ThreadLocalPtr tls2;
+  port::Mutex mu1;
+  port::CondVar cv1(&mu1);
+  Params p1(&mu1, &cv1, nullptr, 16);
+  p1.tls2 = &tls2;
+
+  port::Mutex mu2;
+  port::CondVar cv2(&mu2);
+  Params p2(&mu2, &cv2, nullptr, 16);
+  p2.doWrite = true;
+  p2.tls2 = &tls2;
+
+  auto func = [](void* ptr) {
+    auto& p = *static_cast<Params*>(ptr);
+
+    p.mu->Lock();
+    // Size_T switches size along with the ptr size
+    // we want to cast to.
+    size_t own = ++(p.started);
+    p.cv->SignalAll();
+    while (p.started != p.total) {
+      p.cv->Wait();
+    }
+    p.mu->Unlock();
+
+    // Let write threads write a different value from the read threads
+    if (p.doWrite) {
+      own += 8192;
+    }
+
+    ASSERT_TRUE(p.tls1.Get() == nullptr);
+    ASSERT_TRUE(p.tls2->Get() == nullptr);
+
+    auto* env = Env::Default();
+    auto start = env->NowMicros();
+
+    p.tls1.Reset(reinterpret_cast<size_t*>(own));
+    p.tls2->Reset(reinterpret_cast<size_t*>(own + 1));
+    // Loop for 1 second
+    while (env->NowMicros() - start < 1000 * 1000) {
+      for (int iter = 0; iter < 100000; ++iter) {
+        ASSERT_TRUE(p.tls1.Get() == reinterpret_cast<size_t*>(own));
+        ASSERT_TRUE(p.tls2->Get() == reinterpret_cast<size_t*>(own + 1));
+        if (p.doWrite) {
+          p.tls1.Reset(reinterpret_cast<size_t*>(own));
+          p.tls2->Reset(reinterpret_cast<size_t*>(own + 1));
+        }
+      }
+    }
+
+    p.mu->Lock();
+    ++(p.completed);
+    p.cv->SignalAll();
+    p.mu->Unlock();
+  };
+
+  // Initiate 2 instnaces: one keeps writing and one keeps reading.
+  // The read instance should not see data from the write instance.
+  // Each thread local copy of the value are also different from each
+  // other.
+  for (int th = 0; th < p1.total; ++th) {
+    env_->StartThreadTyped(func, &p1);
+  }
+  for (int th = 0; th < p2.total; ++th) {
+    env_->StartThreadTyped(func, &p2);
+  }
+
+  mu1.Lock();
+  while (p1.completed != p1.total) {
+    cv1.Wait();
+  }
+  mu1.Unlock();
+
+  mu2.Lock();
+  while (p2.completed != p2.total) {
+    cv2.Wait();
+  }
+  mu2.Unlock();
+
+  ASSERT_EQ(IDChecker::PeekId(), base_id + 3u);
+}
+
+TEST_F(ThreadLocalTest, Unref) {
+  auto unref = [](void* ptr) {
+    auto& p = *static_cast<Params*>(ptr);
+    p.mu->Lock();
+    ++(*p.unref);
+    p.mu->Unlock();
+  };
+
+  // Case 0: no unref triggered if ThreadLocalPtr is never accessed
+  auto func0 = [](Params* ptr) {
+    auto& p = *ptr;
+    p.mu->Lock();
+    ++(p.started);
+    p.cv->SignalAll();
+    while (p.started != p.total) {
+      p.cv->Wait();
+    }
+    p.mu->Unlock();
+  };
+
+  for (int th = 1; th <= 128; th += th) {
+    port::Mutex mu;
+    port::CondVar cv(&mu);
+    int unref_count = 0;
+    Params p(&mu, &cv, &unref_count, th, unref);
+
+    for (int i = 0; i < p.total; ++i) {
+      env_->StartThreadTyped(func0, &p);
+    }
+    env_->WaitForJoin();
+    ASSERT_EQ(unref_count, 0);
+  }
+
+  // Case 1: unref triggered by thread exit
+  auto func1 = [](Params* ptr) {
+    auto& p = *ptr;
+
+    p.mu->Lock();
+    ++(p.started);
+    p.cv->SignalAll();
+    while (p.started != p.total) {
+      p.cv->Wait();
+    }
+    p.mu->Unlock();
+
+    ASSERT_TRUE(p.tls1.Get() == nullptr);
+    ASSERT_TRUE(p.tls2->Get() == nullptr);
+
+    p.tls1.Reset(ptr);
+    p.tls2->Reset(ptr);
+
+    p.tls1.Reset(ptr);
+    p.tls2->Reset(ptr);
+  };
+
+  for (int th = 1; th <= 128; th += th) {
+    port::Mutex mu;
+    port::CondVar cv(&mu);
+    int unref_count = 0;
+    ThreadLocalPtr tls2(unref);
+    Params p(&mu, &cv, &unref_count, th, unref);
+    p.tls2 = &tls2;
+
+    for (int i = 0; i < p.total; ++i) {
+      env_->StartThreadTyped(func1, &p);
+    }
+
+    env_->WaitForJoin();
+
+    // N threads x 2 ThreadLocal instance cleanup on thread exit
+    ASSERT_EQ(unref_count, 2 * p.total);
+  }
+
+  // Case 2: unref triggered by ThreadLocal instance destruction
+  auto func2 = [](Params* ptr) {
+    auto& p = *ptr;
+
+    p.mu->Lock();
+    ++(p.started);
+    p.cv->SignalAll();
+    while (p.started != p.total) {
+      p.cv->Wait();
+    }
+    p.mu->Unlock();
+
+    ASSERT_TRUE(p.tls1.Get() == nullptr);
+    ASSERT_TRUE(p.tls2->Get() == nullptr);
+
+    p.tls1.Reset(ptr);
+    p.tls2->Reset(ptr);
+
+    p.tls1.Reset(ptr);
+    p.tls2->Reset(ptr);
+
+    p.mu->Lock();
+    ++(p.completed);
+    p.cv->SignalAll();
+
+    // Waiting for instruction to exit thread
+    while (p.completed != 0) {
+      p.cv->Wait();
+    }
+    p.mu->Unlock();
+  };
+
+  for (int th = 1; th <= 128; th += th) {
+    port::Mutex mu;
+    port::CondVar cv(&mu);
+    int unref_count = 0;
+    Params p(&mu, &cv, &unref_count, th, unref);
+    p.tls2 = new ThreadLocalPtr(unref);
+
+    for (int i = 0; i < p.total; ++i) {
+      env_->StartThreadTyped(func2, &p);
+    }
+
+    // Wait for all threads to finish using Params
+    mu.Lock();
+    while (p.completed != p.total) {
+      cv.Wait();
+    }
+    mu.Unlock();
+
+    // Now destroy one ThreadLocal instance
+    delete p.tls2;
+    p.tls2 = nullptr;
+    // instance destroy for N threads
+    ASSERT_EQ(unref_count, p.total);
+
+    // Signal to exit
+    mu.Lock();
+    p.completed = 0;
+    cv.SignalAll();
+    mu.Unlock();
+    env_->WaitForJoin();
+    // additional N threads exit unref for the left instance
+    ASSERT_EQ(unref_count, 2 * p.total);
+  }
+}
+
+TEST_F(ThreadLocalTest, Swap) {
+  ThreadLocalPtr tls;
+  tls.Reset(reinterpret_cast<void*>(1));
+  ASSERT_EQ(reinterpret_cast<int64_t>(tls.Swap(nullptr)), 1);
+  ASSERT_TRUE(tls.Swap(reinterpret_cast<void*>(2)) == nullptr);
+  ASSERT_EQ(reinterpret_cast<int64_t>(tls.Get()), 2);
+  ASSERT_EQ(reinterpret_cast<int64_t>(tls.Swap(reinterpret_cast<void*>(3))), 2);
+}
+
+TEST_F(ThreadLocalTest, Scrape) {
+  auto unref = [](void* ptr) {
+    auto& p = *static_cast<Params*>(ptr);
+    p.mu->Lock();
+    ++(*p.unref);
+    p.mu->Unlock();
+  };
+
+  auto func = [](void* ptr) {
+    auto& p = *static_cast<Params*>(ptr);
+
+    ASSERT_TRUE(p.tls1.Get() == nullptr);
+    ASSERT_TRUE(p.tls2->Get() == nullptr);
+
+    p.tls1.Reset(ptr);
+    p.tls2->Reset(ptr);
+
+    p.tls1.Reset(ptr);
+    p.tls2->Reset(ptr);
+
+    p.mu->Lock();
+    ++(p.completed);
+    p.cv->SignalAll();
+
+    // Waiting for instruction to exit thread
+    while (p.completed != 0) {
+      p.cv->Wait();
+    }
+    p.mu->Unlock();
+  };
+
+  for (int th = 1; th <= 128; th += th) {
+    port::Mutex mu;
+    port::CondVar cv(&mu);
+    int unref_count = 0;
+    Params p(&mu, &cv, &unref_count, th, unref);
+    p.tls2 = new ThreadLocalPtr(unref);
+
+    for (int i = 0; i < p.total; ++i) {
+      env_->StartThreadTyped(func, &p);
+    }
+
+    // Wait for all threads to finish using Params
+    mu.Lock();
+    while (p.completed != p.total) {
+      cv.Wait();
+    }
+    mu.Unlock();
+
+    ASSERT_EQ(unref_count, 0);
+
+    // Scrape all thread local data. No unref at thread
+    // exit or ThreadLocalPtr destruction
+    autovector<void*> ptrs;
+    p.tls1.Scrape(&ptrs, nullptr);
+    p.tls2->Scrape(&ptrs, nullptr);
+    delete p.tls2;
+    // Signal to exit
+    mu.Lock();
+    p.completed = 0;
+    cv.SignalAll();
+    mu.Unlock();
+    env_->WaitForJoin();
+
+    ASSERT_EQ(unref_count, 0);
+  }
+}
+
+TEST_F(ThreadLocalTest, Fold) {
+  auto unref = [](void* ptr) {
+    delete static_cast<std::atomic<int64_t>*>(ptr);
+  };
+  static const int kNumThreads = 16;
+  static const int kItersPerThread = 10;
+  port::Mutex mu;
+  port::CondVar cv(&mu);
+  Params params(&mu, &cv, nullptr, kNumThreads, unref);
+  auto func = [](void* ptr) {
+    auto& p = *static_cast<Params*>(ptr);
+    ASSERT_TRUE(p.tls1.Get() == nullptr);
+    p.tls1.Reset(new std::atomic<int64_t>(0));
+
+    for (int i = 0; i < kItersPerThread; ++i) {
+      static_cast<std::atomic<int64_t>*>(p.tls1.Get())->fetch_add(1);
+    }
+
+    p.mu->Lock();
+    ++(p.completed);
+    p.cv->SignalAll();
+
+    // Waiting for instruction to exit thread
+    while (p.completed != 0) {
+      p.cv->Wait();
+    }
+    p.mu->Unlock();
+  };
+
+  for (int th = 0; th < params.total; ++th) {
+    env_->StartThread(func, &params);
+  }
+
+  // Wait for all threads to finish using Params
+  mu.Lock();
+  while (params.completed != params.total) {
+    cv.Wait();
+  }
+  mu.Unlock();
+
+  // Verify Fold() behavior
+  int64_t sum = 0;
+  params.tls1.Fold(
+      [](void* ptr, void* res) {
+        auto sum_ptr = static_cast<int64_t*>(res);
+        *sum_ptr += static_cast<std::atomic<int64_t>*>(ptr)->load();
+      },
+      &sum);
+  ASSERT_EQ(sum, kNumThreads * kItersPerThread);
+
+  // Signal to exit
+  mu.Lock();
+  params.completed = 0;
+  cv.SignalAll();
+  mu.Unlock();
+  env_->WaitForJoin();
+}
+
+TEST_F(ThreadLocalTest, CompareAndSwap) {
+  ThreadLocalPtr tls;
+  ASSERT_TRUE(tls.Swap(reinterpret_cast<void*>(1)) == nullptr);
+  void* expected = reinterpret_cast<void*>(1);
+  // Swap in 2
+  ASSERT_TRUE(tls.CompareAndSwap(reinterpret_cast<void*>(2), expected));
+  expected = reinterpret_cast<void*>(100);
+  // Fail Swap, still 2
+  ASSERT_TRUE(!tls.CompareAndSwap(reinterpret_cast<void*>(2), expected));
+  ASSERT_EQ(expected, reinterpret_cast<void*>(2));
+  // Swap in 3
+  expected = reinterpret_cast<void*>(2);
+  ASSERT_TRUE(tls.CompareAndSwap(reinterpret_cast<void*>(3), expected));
+  ASSERT_EQ(tls.Get(), reinterpret_cast<void*>(3));
+}
+
+namespace {
+
+void* AccessThreadLocal(void* /*arg*/) {
+  TEST_SYNC_POINT("AccessThreadLocal:Start");
+  ThreadLocalPtr tlp;
+  tlp.Reset(new std::string("hello RocksDB"));
+  TEST_SYNC_POINT("AccessThreadLocal:End");
+  return nullptr;
+}
+
+}  // namespace
+
+// The following test is disabled as it requires manual steps to run it
+// correctly.
+//
+// Currently we have no way to acess SyncPoint w/o ASAN error when the
+// child thread dies after the main thread dies.  So if you manually enable
+// this test and only see an ASAN error on SyncPoint, it means you pass the
+// test.
+TEST_F(ThreadLocalTest, DISABLED_MainThreadDiesFirst) {
+  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
+      {{"AccessThreadLocal:Start", "MainThreadDiesFirst:End"},
+       {"PosixEnv::~PosixEnv():End", "AccessThreadLocal:End"}});
+
+  // Triggers the initialization of singletons.
+  Env::Default();
+
+#ifndef ROCKSDB_LITE
+  try {
+#endif  // ROCKSDB_LITE
+    ROCKSDB_NAMESPACE::port::Thread th(&AccessThreadLocal, nullptr);
+    th.detach();
+    TEST_SYNC_POINT("MainThreadDiesFirst:End");
+#ifndef ROCKSDB_LITE
+  } catch (const std::system_error& ex) {
+    std::cerr << "Start thread: " << ex.code() << std::endl;
+    FAIL();
+  }
+#endif  // ROCKSDB_LITE
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
+int main(int argc, char** argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/src/rocksdb/util/thread_operation.h b/src/rocksdb/util/thread_operation.h
new file mode 100644
index 000000000..c24fccd5c
--- /dev/null
+++ b/src/rocksdb/util/thread_operation.h
@@ -0,0 +1,112 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// This file defines the structures for thread operation and state.
+// Thread operations are used to describe high level action of a
+// thread such as doing compaction or flush, while thread state
+// are used to describe lower-level action such as reading /
+// writing a file or waiting for a mutex.  Operations and states
+// are designed to be independent.  Typically, a thread usually involves
+// in one operation and one state at any specific point in time.
+
+#pragma once
+
+#include <string>
+
+#include "rocksdb/thread_status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+#ifdef ROCKSDB_USING_THREAD_STATUS
+
+// The structure that describes a major thread operation.
+struct OperationInfo {
+  const ThreadStatus::OperationType type;
+  const std::string name;
+};
+
+// The global operation table.
+//
+// When updating a status of a thread, the pointer of the OperationInfo
+// of the current ThreadStatusData will be pointing to one of the
+// rows in this global table.
+//
+// Note that it's not designed to be constant as in the future we
+// might consider adding global count to the OperationInfo.
+static OperationInfo global_operation_table[] = {
+    {ThreadStatus::OP_UNKNOWN, ""},
+    {ThreadStatus::OP_COMPACTION, "Compaction"},
+    {ThreadStatus::OP_FLUSH, "Flush"}};
+
+struct OperationStageInfo {
+  const ThreadStatus::OperationStage stage;
+  const std::string name;
+};
+
+// A table maintains the mapping from stage type to stage string.
+// Note that the string must be changed accordingly when the
+// associated function name changed.
+static OperationStageInfo global_op_stage_table[] = {
+    {ThreadStatus::STAGE_UNKNOWN, ""},
+    {ThreadStatus::STAGE_FLUSH_RUN, "FlushJob::Run"},
+    {ThreadStatus::STAGE_FLUSH_WRITE_L0, "FlushJob::WriteLevel0Table"},
+    {ThreadStatus::STAGE_COMPACTION_PREPARE, "CompactionJob::Prepare"},
+    {ThreadStatus::STAGE_COMPACTION_RUN, "CompactionJob::Run"},
+    {ThreadStatus::STAGE_COMPACTION_PROCESS_KV,
+     "CompactionJob::ProcessKeyValueCompaction"},
+    {ThreadStatus::STAGE_COMPACTION_INSTALL, "CompactionJob::Install"},
+    {ThreadStatus::STAGE_COMPACTION_SYNC_FILE,
+     "CompactionJob::FinishCompactionOutputFile"},
+    {ThreadStatus::STAGE_PICK_MEMTABLES_TO_FLUSH,
+     "MemTableList::PickMemtablesToFlush"},
+    {ThreadStatus::STAGE_MEMTABLE_ROLLBACK,
+     "MemTableList::RollbackMemtableFlush"},
+    {ThreadStatus::STAGE_MEMTABLE_INSTALL_FLUSH_RESULTS,
+     "MemTableList::TryInstallMemtableFlushResults"},
+};
+
+// The structure that describes a state.
+struct StateInfo {
+  const ThreadStatus::StateType type;
+  const std::string name;
+};
+
+// The global state table.
+//
+// When updating a status of a thread, the pointer of the StateInfo
+// of the current ThreadStatusData will be pointing to one of the
+// rows in this global table.
+static StateInfo global_state_table[] = {
+    {ThreadStatus::STATE_UNKNOWN, ""},
+    {ThreadStatus::STATE_MUTEX_WAIT, "Mutex Wait"},
+};
+
+struct OperationProperty {
+  int code;
+  std::string name;
+};
+
+static OperationProperty compaction_operation_properties[] = {
+    {ThreadStatus::COMPACTION_JOB_ID, "JobID"},
+    {ThreadStatus::COMPACTION_INPUT_OUTPUT_LEVEL, "InputOutputLevel"},
+    {ThreadStatus::COMPACTION_PROP_FLAGS, "Manual/Deletion/Trivial"},
+    {ThreadStatus::COMPACTION_TOTAL_INPUT_BYTES, "TotalInputBytes"},
+    {ThreadStatus::COMPACTION_BYTES_READ, "BytesRead"},
+    {ThreadStatus::COMPACTION_BYTES_WRITTEN, "BytesWritten"},
+};
+
+static OperationProperty flush_operation_properties[] = {
+    {ThreadStatus::FLUSH_JOB_ID, "JobID"},
+    {ThreadStatus::FLUSH_BYTES_MEMTABLES, "BytesMemtables"},
+    {ThreadStatus::FLUSH_BYTES_WRITTEN, "BytesWritten"}};
+
+#else
+
+struct OperationInfo {};
+
+struct StateInfo {};
+
+#endif  // ROCKSDB_USING_THREAD_STATUS
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/threadpool_imp.cc b/src/rocksdb/util/threadpool_imp.cc
new file mode 100644
index 000000000..09706cac5
--- /dev/null
+++ b/src/rocksdb/util/threadpool_imp.cc
@@ -0,0 +1,551 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/threadpool_imp.h"
+
+#ifndef OS_WIN
+#include <unistd.h>
+#endif
+
+#ifdef OS_LINUX
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#endif
+
+#include <stdlib.h>
+
+#include <algorithm>
+#include <atomic>
+#include <condition_variable>
+#include <deque>
+#include <mutex>
+#include <sstream>
+#include <thread>
+#include <vector>
+
+#include "monitoring/thread_status_util.h"
+#include "port/port.h"
+#include "test_util/sync_point.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+void ThreadPoolImpl::PthreadCall(const char* label, int result) {
+  if (result != 0) {
+    fprintf(stderr, "pthread %s: %s\n", label, errnoStr(result).c_str());
+    abort();
+  }
+}
+
+struct ThreadPoolImpl::Impl {
+  Impl();
+  ~Impl();
+
+  void JoinThreads(bool wait_for_jobs_to_complete);
+
+  void SetBackgroundThreadsInternal(int num, bool allow_reduce);
+  int GetBackgroundThreads();
+
+  unsigned int GetQueueLen() const {
+    return queue_len_.load(std::memory_order_relaxed);
+  }
+
+  void LowerIOPriority();
+
+  void LowerCPUPriority(CpuPriority pri);
+
+  void WakeUpAllThreads() { bgsignal_.notify_all(); }
+
+  void BGThread(size_t thread_id);
+
+  void StartBGThreads();
+
+  void Submit(std::function<void()>&& schedule,
+              std::function<void()>&& unschedule, void* tag);
+
+  int UnSchedule(void* arg);
+
+  void SetHostEnv(Env* env) { env_ = env; }
+
+  Env* GetHostEnv() const { return env_; }
+
+  bool HasExcessiveThread() const {
+    return static_cast<int>(bgthreads_.size()) > total_threads_limit_;
+  }
+
+  // Return true iff the current thread is the excessive thread to terminate.
+  // Always terminate the running thread that is added last, even if there are
+  // more than one thread to terminate.
+  bool IsLastExcessiveThread(size_t thread_id) const {
+    return HasExcessiveThread() && thread_id == bgthreads_.size() - 1;
+  }
+
+  bool IsExcessiveThread(size_t thread_id) const {
+    return static_cast<int>(thread_id) >= total_threads_limit_;
+  }
+
+  // Return the thread priority.
+  // This would allow its member-thread to know its priority.
+  Env::Priority GetThreadPriority() const { return priority_; }
+
+  // Set the thread priority.
+  void SetThreadPriority(Env::Priority priority) { priority_ = priority; }
+
+  int ReserveThreads(int threads_to_be_reserved) {
+    std::unique_lock<std::mutex> lock(mu_);
+    // We can reserve at most num_waiting_threads_ in total so the number of
+    // threads that can be reserved might be fewer than the desired one. In
+    // rare cases, num_waiting_threads_ could be less than reserved_threads
+    // due to SetBackgroundThreadInternal or last excessive threads. If that
+    // happens, we cannot reserve any other threads.
+    int reserved_threads_in_success =
+        std::min(std::max(num_waiting_threads_ - reserved_threads_, 0),
+                 threads_to_be_reserved);
+    reserved_threads_ += reserved_threads_in_success;
+    return reserved_threads_in_success;
+  }
+
+  int ReleaseThreads(int threads_to_be_released) {
+    std::unique_lock<std::mutex> lock(mu_);
+    // We cannot release more than reserved_threads_
+    int released_threads_in_success =
+        std::min(reserved_threads_, threads_to_be_released);
+    reserved_threads_ -= released_threads_in_success;
+    WakeUpAllThreads();
+    return released_threads_in_success;
+  }
+
+ private:
+  static void BGThreadWrapper(void* arg);
+
+  bool low_io_priority_;
+  CpuPriority cpu_priority_;
+  Env::Priority priority_;
+  Env* env_;
+
+  int total_threads_limit_;
+  std::atomic_uint queue_len_;  // Queue length. Used for stats reporting
+  // Number of reserved threads, managed by ReserveThreads(..) and
+  // ReleaseThreads(..), if num_waiting_threads_ is no larger than
+  // reserved_threads_, its thread will be blocked to ensure the reservation
+  // mechanism
+  int reserved_threads_;
+  // Number of waiting threads (Maximum number of threads that can be
+  // reserved), in rare cases, num_waiting_threads_ could be less than
+  // reserved_threads due to SetBackgroundThreadInternal or last
+  // excessive threads.
+  int num_waiting_threads_;
+  bool exit_all_threads_;
+  bool wait_for_jobs_to_complete_;
+
+  // Entry per Schedule()/Submit() call
+  struct BGItem {
+    void* tag = nullptr;
+    std::function<void()> function;
+    std::function<void()> unschedFunction;
+  };
+
+  using BGQueue = std::deque<BGItem>;
+  BGQueue queue_;
+
+  std::mutex mu_;
+  std::condition_variable bgsignal_;
+  std::vector<port::Thread> bgthreads_;
+};
+
+inline ThreadPoolImpl::Impl::Impl()
+    : low_io_priority_(false),
+      cpu_priority_(CpuPriority::kNormal),
+      priority_(Env::LOW),
+      env_(nullptr),
+      total_threads_limit_(0),
+      queue_len_(),
+      reserved_threads_(0),
+      num_waiting_threads_(0),
+      exit_all_threads_(false),
+      wait_for_jobs_to_complete_(false),
+      queue_(),
+      mu_(),
+      bgsignal_(),
+      bgthreads_() {}
+
+inline ThreadPoolImpl::Impl::~Impl() { assert(bgthreads_.size() == 0U); }
+
+void ThreadPoolImpl::Impl::JoinThreads(bool wait_for_jobs_to_complete) {
+  std::unique_lock<std::mutex> lock(mu_);
+  assert(!exit_all_threads_);
+
+  wait_for_jobs_to_complete_ = wait_for_jobs_to_complete;
+  exit_all_threads_ = true;
+  // prevent threads from being recreated right after they're joined, in case
+  // the user is concurrently submitting jobs.
+  total_threads_limit_ = 0;
+  reserved_threads_ = 0;
+  num_waiting_threads_ = 0;
+
+  lock.unlock();
+
+  bgsignal_.notify_all();
+
+  for (auto& th : bgthreads_) {
+    th.join();
+  }
+
+  bgthreads_.clear();
+
+  exit_all_threads_ = false;
+  wait_for_jobs_to_complete_ = false;
+}
+
+inline void ThreadPoolImpl::Impl::LowerIOPriority() {
+  std::lock_guard<std::mutex> lock(mu_);
+  low_io_priority_ = true;
+}
+
+inline void ThreadPoolImpl::Impl::LowerCPUPriority(CpuPriority pri) {
+  std::lock_guard<std::mutex> lock(mu_);
+  cpu_priority_ = pri;
+}
+
+void ThreadPoolImpl::Impl::BGThread(size_t thread_id) {
+  bool low_io_priority = false;
+  CpuPriority current_cpu_priority = CpuPriority::kNormal;
+
+  while (true) {
+    // Wait until there is an item that is ready to run
+    std::unique_lock<std::mutex> lock(mu_);
+    // Stop waiting if the thread needs to do work or needs to terminate.
+    // Increase num_waiting_threads_ once this task has started waiting
+    num_waiting_threads_++;
+
+    TEST_SYNC_POINT("ThreadPoolImpl::BGThread::WaitingThreadsInc");
+    TEST_IDX_SYNC_POINT("ThreadPoolImpl::BGThread::Start:th", thread_id);
+    // When not exist_all_threads and the current thread id is not the last
+    // excessive thread, it may be blocked due to 3 reasons: 1) queue is empty
+    // 2) it is the excessive thread (not the last one)
+    // 3) the number of waiting threads is not greater than reserved threads
+    // (i.e, no available threads due to full reservation")
+    while (!exit_all_threads_ && !IsLastExcessiveThread(thread_id) &&
+           (queue_.empty() || IsExcessiveThread(thread_id) ||
+            num_waiting_threads_ <= reserved_threads_)) {
+      bgsignal_.wait(lock);
+    }
+    // Decrease num_waiting_threads_ once the thread is not waiting
+    num_waiting_threads_--;
+
+    if (exit_all_threads_) {  // mechanism to let BG threads exit safely
+
+      if (!wait_for_jobs_to_complete_ || queue_.empty()) {
+        break;
+      }
+    } else if (IsLastExcessiveThread(thread_id)) {
+      // Current thread is the last generated one and is excessive.
+      // We always terminate excessive thread in the reverse order of
+      // generation time. But not when `exit_all_threads_ == true`,
+      // otherwise `JoinThreads()` could try to `join()` a `detach()`ed
+      // thread.
+      auto& terminating_thread = bgthreads_.back();
+      terminating_thread.detach();
+      bgthreads_.pop_back();
+      if (HasExcessiveThread()) {
+        // There is still at least more excessive thread to terminate.
+        WakeUpAllThreads();
+      }
+      TEST_IDX_SYNC_POINT("ThreadPoolImpl::BGThread::Termination:th",
+                          thread_id);
+      TEST_SYNC_POINT("ThreadPoolImpl::BGThread::Termination");
+      break;
+    }
+
+    auto func = std::move(queue_.front().function);
+    queue_.pop_front();
+
+    queue_len_.store(static_cast<unsigned int>(queue_.size()),
+                     std::memory_order_relaxed);
+
+    bool decrease_io_priority = (low_io_priority != low_io_priority_);
+    CpuPriority cpu_priority = cpu_priority_;
+    lock.unlock();
+
+    if (cpu_priority < current_cpu_priority) {
+      TEST_SYNC_POINT_CALLBACK("ThreadPoolImpl::BGThread::BeforeSetCpuPriority",
+                               &current_cpu_priority);
+      // 0 means current thread.
+      port::SetCpuPriority(0, cpu_priority);
+      current_cpu_priority = cpu_priority;
+      TEST_SYNC_POINT_CALLBACK("ThreadPoolImpl::BGThread::AfterSetCpuPriority",
+                               &current_cpu_priority);
+    }
+
+#ifdef OS_LINUX
+    if (decrease_io_priority) {
+#define IOPRIO_CLASS_SHIFT (13)
+#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data)
+      // Put schedule into IOPRIO_CLASS_IDLE class (lowest)
+      // These system calls only have an effect when used in conjunction
+      // with an I/O scheduler that supports I/O priorities. As at
+      // kernel 2.6.17 the only such scheduler is the Completely
+      // Fair Queuing (CFQ) I/O scheduler.
+      // To change scheduler:
+      //  echo cfq > /sys/block/<device_name>/queue/schedule
+      // Tunables to consider:
+      //  /sys/block/<device_name>/queue/slice_idle
+      //  /sys/block/<device_name>/queue/slice_sync
+      syscall(SYS_ioprio_set, 1,  // IOPRIO_WHO_PROCESS
+              0,                  // current thread
+              IOPRIO_PRIO_VALUE(3, 0));
+      low_io_priority = true;
+    }
+#else
+    (void)decrease_io_priority;  // avoid 'unused variable' error
+#endif
+
+    TEST_SYNC_POINT_CALLBACK("ThreadPoolImpl::Impl::BGThread:BeforeRun",
+                             &priority_);
+
+    func();
+  }
+}
+
+// Helper struct for passing arguments when creating threads.
+struct BGThreadMetadata {
+  ThreadPoolImpl::Impl* thread_pool_;
+  size_t thread_id_;  // Thread count in the thread.
+  BGThreadMetadata(ThreadPoolImpl::Impl* thread_pool, size_t thread_id)
+      : thread_pool_(thread_pool), thread_id_(thread_id) {}
+};
+
+void ThreadPoolImpl::Impl::BGThreadWrapper(void* arg) {
+  BGThreadMetadata* meta = reinterpret_cast<BGThreadMetadata*>(arg);
+  size_t thread_id = meta->thread_id_;
+  ThreadPoolImpl::Impl* tp = meta->thread_pool_;
+#ifdef ROCKSDB_USING_THREAD_STATUS
+  // initialize it because compiler isn't good enough to see we don't use it
+  // uninitialized
+  ThreadStatus::ThreadType thread_type = ThreadStatus::NUM_THREAD_TYPES;
+  switch (tp->GetThreadPriority()) {
+    case Env::Priority::HIGH:
+      thread_type = ThreadStatus::HIGH_PRIORITY;
+      break;
+    case Env::Priority::LOW:
+      thread_type = ThreadStatus::LOW_PRIORITY;
+      break;
+    case Env::Priority::BOTTOM:
+      thread_type = ThreadStatus::BOTTOM_PRIORITY;
+      break;
+    case Env::Priority::USER:
+      thread_type = ThreadStatus::USER;
+      break;
+    case Env::Priority::TOTAL:
+      assert(false);
+      return;
+  }
+  assert(thread_type != ThreadStatus::NUM_THREAD_TYPES);
+  ThreadStatusUtil::RegisterThread(tp->GetHostEnv(), thread_type);
+#endif
+  delete meta;
+  tp->BGThread(thread_id);
+#ifdef ROCKSDB_USING_THREAD_STATUS
+  ThreadStatusUtil::UnregisterThread();
+#endif
+  return;
+}
+
+void ThreadPoolImpl::Impl::SetBackgroundThreadsInternal(int num,
+                                                        bool allow_reduce) {
+  std::lock_guard<std::mutex> lock(mu_);
+  if (exit_all_threads_) {
+    return;
+  }
+  if (num > total_threads_limit_ ||
+      (num < total_threads_limit_ && allow_reduce)) {
+    total_threads_limit_ = std::max(0, num);
+    WakeUpAllThreads();
+    StartBGThreads();
+  }
+}
+
+int ThreadPoolImpl::Impl::GetBackgroundThreads() {
+  std::unique_lock<std::mutex> lock(mu_);
+  return total_threads_limit_;
+}
+
+void ThreadPoolImpl::Impl::StartBGThreads() {
+  // Start background thread if necessary
+  while ((int)bgthreads_.size() < total_threads_limit_) {
+    port::Thread p_t(&BGThreadWrapper,
+                     new BGThreadMetadata(this, bgthreads_.size()));
+
+// Set the thread name to aid debugging
+#if defined(_GNU_SOURCE) && defined(__GLIBC_PREREQ)
+#if __GLIBC_PREREQ(2, 12)
+    auto th_handle = p_t.native_handle();
+    std::string thread_priority = Env::PriorityToString(GetThreadPriority());
+    std::ostringstream thread_name_stream;
+    thread_name_stream << "rocksdb:";
+    for (char c : thread_priority) {
+      thread_name_stream << static_cast<char>(tolower(c));
+    }
+    pthread_setname_np(th_handle, thread_name_stream.str().c_str());
+#endif
+#endif
+    bgthreads_.push_back(std::move(p_t));
+  }
+}
+
+void ThreadPoolImpl::Impl::Submit(std::function<void()>&& schedule,
+                                  std::function<void()>&& unschedule,
+                                  void* tag) {
+  std::lock_guard<std::mutex> lock(mu_);
+
+  if (exit_all_threads_) {
+    return;
+  }
+
+  StartBGThreads();
+
+  // Add to priority queue
+  queue_.push_back(BGItem());
+  TEST_SYNC_POINT("ThreadPoolImpl::Submit::Enqueue");
+  auto& item = queue_.back();
+  item.tag = tag;
+  item.function = std::move(schedule);
+  item.unschedFunction = std::move(unschedule);
+
+  queue_len_.store(static_cast<unsigned int>(queue_.size()),
+                   std::memory_order_relaxed);
+
+  if (!HasExcessiveThread()) {
+    // Wake up at least one waiting thread.
+    bgsignal_.notify_one();
+  } else {
+    // Need to wake up all threads to make sure the one woken
+    // up is not the one to terminate.
+    WakeUpAllThreads();
+  }
+}
+
+int ThreadPoolImpl::Impl::UnSchedule(void* arg) {
+  int count = 0;
+
+  std::vector<std::function<void()>> candidates;
+  {
+    std::lock_guard<std::mutex> lock(mu_);
+
+    // Remove from priority queue
+    BGQueue::iterator it = queue_.begin();
+    while (it != queue_.end()) {
+      if (arg == (*it).tag) {
+        if (it->unschedFunction) {
+          candidates.push_back(std::move(it->unschedFunction));
+        }
+        it = queue_.erase(it);
+        count++;
+      } else {
+        ++it;
+      }
+    }
+    queue_len_.store(static_cast<unsigned int>(queue_.size()),
+                     std::memory_order_relaxed);
+  }
+
+  // Run unschedule functions outside the mutex
+  for (auto& f : candidates) {
+    f();
+  }
+
+  return count;
+}
+
+ThreadPoolImpl::ThreadPoolImpl() : impl_(new Impl()) {}
+
+ThreadPoolImpl::~ThreadPoolImpl() {}
+
+void ThreadPoolImpl::JoinAllThreads() { impl_->JoinThreads(false); }
+
+void ThreadPoolImpl::SetBackgroundThreads(int num) {
+  impl_->SetBackgroundThreadsInternal(num, true);
+}
+
+int ThreadPoolImpl::GetBackgroundThreads() {
+  return impl_->GetBackgroundThreads();
+}
+
+unsigned int ThreadPoolImpl::GetQueueLen() const {
+  return impl_->GetQueueLen();
+}
+
+void ThreadPoolImpl::WaitForJobsAndJoinAllThreads() {
+  impl_->JoinThreads(true);
+}
+
+void ThreadPoolImpl::LowerIOPriority() { impl_->LowerIOPriority(); }
+
+void ThreadPoolImpl::LowerCPUPriority(CpuPriority pri) {
+  impl_->LowerCPUPriority(pri);
+}
+
+void ThreadPoolImpl::IncBackgroundThreadsIfNeeded(int num) {
+  impl_->SetBackgroundThreadsInternal(num, false);
+}
+
+void ThreadPoolImpl::SubmitJob(const std::function<void()>& job) {
+  auto copy(job);
+  impl_->Submit(std::move(copy), std::function<void()>(), nullptr);
+}
+
+void ThreadPoolImpl::SubmitJob(std::function<void()>&& job) {
+  impl_->Submit(std::move(job), std::function<void()>(), nullptr);
+}
+
+void ThreadPoolImpl::Schedule(void (*function)(void* arg1), void* arg,
+                              void* tag, void (*unschedFunction)(void* arg)) {
+  if (unschedFunction == nullptr) {
+    impl_->Submit(std::bind(function, arg), std::function<void()>(), tag);
+  } else {
+    impl_->Submit(std::bind(function, arg), std::bind(unschedFunction, arg),
+                  tag);
+  }
+}
+
+int ThreadPoolImpl::UnSchedule(void* arg) { return impl_->UnSchedule(arg); }
+
+void ThreadPoolImpl::SetHostEnv(Env* env) { impl_->SetHostEnv(env); }
+
+Env* ThreadPoolImpl::GetHostEnv() const { return impl_->GetHostEnv(); }
+
+// Return the thread priority.
+// This would allow its member-thread to know its priority.
+Env::Priority ThreadPoolImpl::GetThreadPriority() const {
+  return impl_->GetThreadPriority();
+}
+
+// Set the thread priority.
+void ThreadPoolImpl::SetThreadPriority(Env::Priority priority) {
+  impl_->SetThreadPriority(priority);
+}
+
+// Reserve a specific number of threads, prevent them from running other
+// functions The number of reserved threads could be fewer than the desired one
+int ThreadPoolImpl::ReserveThreads(int threads_to_be_reserved) {
+  return impl_->ReserveThreads(threads_to_be_reserved);
+}
+
+// Release a specific number of threads
+int ThreadPoolImpl::ReleaseThreads(int threads_to_be_released) {
+  return impl_->ReleaseThreads(threads_to_be_released);
+}
+
+ThreadPool* NewThreadPool(int num_threads) {
+  ThreadPoolImpl* thread_pool = new ThreadPoolImpl();
+  thread_pool->SetBackgroundThreads(num_threads);
+  return thread_pool;
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/threadpool_imp.h b/src/rocksdb/util/threadpool_imp.h
new file mode 100644
index 000000000..a5109e38f
--- /dev/null
+++ b/src/rocksdb/util/threadpool_imp.h
@@ -0,0 +1,120 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include <functional>
+#include <memory>
+
+#include "rocksdb/env.h"
+#include "rocksdb/threadpool.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class ThreadPoolImpl : public ThreadPool {
+ public:
+  ThreadPoolImpl();
+  ~ThreadPoolImpl();
+
+  ThreadPoolImpl(ThreadPoolImpl&&) = delete;
+  ThreadPoolImpl& operator=(ThreadPoolImpl&&) = delete;
+
+  // Implement ThreadPool interfaces
+
+  // Wait for all threads to finish.
+  // Discards all the jobs that did not
+  // start executing and waits for those running
+  // to complete
+  void JoinAllThreads() override;
+
+  // Set the number of background threads that will be executing the
+  // scheduled jobs.
+  void SetBackgroundThreads(int num) override;
+  int GetBackgroundThreads() override;
+
+  // Get the number of jobs scheduled in the ThreadPool queue.
+  unsigned int GetQueueLen() const override;
+
+  // Waits for all jobs to complete those
+  // that already started running and those that did not
+  // start yet
+  void WaitForJobsAndJoinAllThreads() override;
+
+  // Make threads to run at a lower kernel IO priority
+  // Currently only has effect on Linux
+  void LowerIOPriority();
+
+  // Make threads to run at a lower kernel CPU priority
+  // Currently only has effect on Linux
+  void LowerCPUPriority(CpuPriority pri);
+
+  // Ensure there is at aleast num threads in the pool
+  // but do not kill threads if there are more
+  void IncBackgroundThreadsIfNeeded(int num);
+
+  // Submit a fire and forget job
+  // These jobs can not be unscheduled
+
+  // This allows to submit the same job multiple times
+  void SubmitJob(const std::function<void()>&) override;
+  // This moves the function in for efficiency
+  void SubmitJob(std::function<void()>&&) override;
+
+  // Schedule a job with an unschedule tag and unschedule function
+  // Can be used to filter and unschedule jobs by a tag
+  // that are still in the queue and did not start running
+  void Schedule(void (*function)(void* arg1), void* arg, void* tag,
+                void (*unschedFunction)(void* arg));
+
+  // Filter jobs that are still in a queue and match
+  // the given tag. Remove them from a queue if any
+  // and for each such job execute an unschedule function
+  // if such was given at scheduling time.
+  int UnSchedule(void* tag);
+
+  void SetHostEnv(Env* env);
+
+  Env* GetHostEnv() const;
+
+  // Return the thread priority.
+  // This would allow its member-thread to know its priority.
+  Env::Priority GetThreadPriority() const;
+
+  // Set the thread priority.
+  void SetThreadPriority(Env::Priority priority);
+
+  // Reserve a specific number of threads, prevent them from running other
+  // functions The number of reserved threads could be fewer than the desired
+  // one
+  int ReserveThreads(int threads_to_be_reserved) override;
+
+  // Release a specific number of threads
+  int ReleaseThreads(int threads_to_be_released) override;
+
+  static void PthreadCall(const char* label, int result);
+
+  struct Impl;
+
+ private:
+  // Current public virtual interface does not provide usable
+  // functionality and thus can not be used internally to
+  // facade different implementations.
+  //
+  // We propose a pimpl idiom in order to easily replace the thread pool impl
+  // w/o touching the header file but providing a different .cc potentially
+  // CMake option driven.
+  //
+  // Another option is to introduce a Env::MakeThreadPool() virtual interface
+  // and override the environment. This would require refactoring ThreadPool
+  // usage.
+  //
+  // We can also combine these two approaches
+  std::unique_ptr<Impl> impl_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/timer.h b/src/rocksdb/util/timer.h
new file mode 100644
index 000000000..db71cefaf
--- /dev/null
+++ b/src/rocksdb/util/timer.h
@@ -0,0 +1,340 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <queue>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "monitoring/instrumented_mutex.h"
+#include "rocksdb/system_clock.h"
+#include "test_util/sync_point.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// A Timer class to handle repeated work.
+//
+// `Start()` and `Shutdown()` are currently not thread-safe. The client must
+// serialize calls to these two member functions.
+//
+// A single timer instance can handle multiple functions via a single thread.
+// It is better to leave long running work to a dedicated thread pool.
+//
+// Timer can be started by calling `Start()`, and ended by calling `Shutdown()`.
+// Work (in terms of a `void function`) can be scheduled by calling `Add` with
+// a unique function name and de-scheduled by calling `Cancel`.
+// Many functions can be added.
+//
+// Impl Details:
+// A heap is used to keep track of when the next timer goes off.
+// A map from a function name to the function keeps track of all the functions.
+class Timer {
+ public:
+  explicit Timer(SystemClock* clock)
+      : clock_(clock),
+        mutex_(clock),
+        cond_var_(&mutex_),
+        running_(false),
+        executing_task_(false) {}
+
+  ~Timer() { Shutdown(); }
+
+  // Add a new function to run.
+  // fn_name has to be identical, otherwise it will fail to add and return false
+  // start_after_us is the initial delay.
+  // repeat_every_us is the interval between ending time of the last call and
+  // starting time of the next call. For example, repeat_every_us = 2000 and
+  // the function takes 1000us to run. If it starts at time [now]us, then it
+  // finishes at [now]+1000us, 2nd run starting time will be at [now]+3000us.
+  // repeat_every_us == 0 means do not repeat.
+  bool Add(std::function<void()> fn, const std::string& fn_name,
+           uint64_t start_after_us, uint64_t repeat_every_us) {
+    auto fn_info = std::make_unique<FunctionInfo>(std::move(fn), fn_name, 0,
+                                                  repeat_every_us);
+    InstrumentedMutexLock l(&mutex_);
+    // Assign time within mutex to make sure the next_run_time is larger than
+    // the current running one
+    fn_info->next_run_time_us = clock_->NowMicros() + start_after_us;
+    // the new task start time should never before the current task executing
+    // time, as the executing task can only be running if it's next_run_time_us
+    // is due (<= clock_->NowMicros()).
+    if (executing_task_ &&
+        fn_info->next_run_time_us < heap_.top()->next_run_time_us) {
+      return false;
+    }
+    auto it = map_.find(fn_name);
+    if (it == map_.end()) {
+      heap_.push(fn_info.get());
+      map_.try_emplace(fn_name, std::move(fn_info));
+    } else {
+      // timer doesn't support duplicated function name
+      return false;
+    }
+    cond_var_.SignalAll();
+    return true;
+  }
+
+  void Cancel(const std::string& fn_name) {
+    InstrumentedMutexLock l(&mutex_);
+
+    // Mark the function with fn_name as invalid so that it will not be
+    // requeued.
+    auto it = map_.find(fn_name);
+    if (it != map_.end() && it->second) {
+      it->second->Cancel();
+    }
+
+    // If the currently running function is fn_name, then we need to wait
+    // until it finishes before returning to caller.
+    while (!heap_.empty() && executing_task_) {
+      FunctionInfo* func_info = heap_.top();
+      assert(func_info);
+      if (func_info->name == fn_name) {
+        WaitForTaskCompleteIfNecessary();
+      } else {
+        break;
+      }
+    }
+  }
+
+  void CancelAll() {
+    InstrumentedMutexLock l(&mutex_);
+    CancelAllWithLock();
+  }
+
+  // Start the Timer
+  bool Start() {
+    InstrumentedMutexLock l(&mutex_);
+    if (running_) {
+      return false;
+    }
+
+    running_ = true;
+    thread_ = std::make_unique<port::Thread>(&Timer::Run, this);
+    return true;
+  }
+
+  // Shutdown the Timer
+  bool Shutdown() {
+    {
+      InstrumentedMutexLock l(&mutex_);
+      if (!running_) {
+        return false;
+      }
+      running_ = false;
+      CancelAllWithLock();
+      cond_var_.SignalAll();
+    }
+
+    if (thread_) {
+      thread_->join();
+    }
+    return true;
+  }
+
+  bool HasPendingTask() const {
+    InstrumentedMutexLock l(&mutex_);
+    for (const auto& fn_info : map_) {
+      if (fn_info.second->IsValid()) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+#ifndef NDEBUG
+  // Wait until Timer starting waiting, call the optional callback, then wait
+  // for Timer waiting again.
+  // Tests can provide a custom Clock object to mock time, and use the callback
+  // here to bump current time and trigger Timer. See timer_test for example.
+  //
+  // Note: only support one caller of this method.
+  void TEST_WaitForRun(const std::function<void()>& callback = nullptr) {
+    InstrumentedMutexLock l(&mutex_);
+    // It act as a spin lock
+    while (executing_task_ ||
+           (!heap_.empty() &&
+            heap_.top()->next_run_time_us <= clock_->NowMicros())) {
+      cond_var_.TimedWait(clock_->NowMicros() + 1000);
+    }
+    if (callback != nullptr) {
+      callback();
+    }
+    cond_var_.SignalAll();
+    do {
+      cond_var_.TimedWait(clock_->NowMicros() + 1000);
+    } while (executing_task_ ||
+             (!heap_.empty() &&
+              heap_.top()->next_run_time_us <= clock_->NowMicros()));
+  }
+
+  size_t TEST_GetPendingTaskNum() const {
+    InstrumentedMutexLock l(&mutex_);
+    size_t ret = 0;
+    for (const auto& fn_info : map_) {
+      if (fn_info.second->IsValid()) {
+        ret++;
+      }
+    }
+    return ret;
+  }
+
+  void TEST_OverrideTimer(SystemClock* clock) {
+    InstrumentedMutexLock l(&mutex_);
+    clock_ = clock;
+  }
+#endif  // NDEBUG
+
+ private:
+  void Run() {
+    InstrumentedMutexLock l(&mutex_);
+
+    while (running_) {
+      if (heap_.empty()) {
+        // wait
+        TEST_SYNC_POINT("Timer::Run::Waiting");
+        cond_var_.Wait();
+        continue;
+      }
+
+      FunctionInfo* current_fn = heap_.top();
+      assert(current_fn);
+
+      if (!current_fn->IsValid()) {
+        heap_.pop();
+        map_.erase(current_fn->name);
+        continue;
+      }
+
+      if (current_fn->next_run_time_us <= clock_->NowMicros()) {
+        // make a copy of the function so it won't be changed after
+        // mutex_.unlock.
+        std::function<void()> fn = current_fn->fn;
+        executing_task_ = true;
+        mutex_.Unlock();
+        // Execute the work
+        fn();
+        mutex_.Lock();
+        executing_task_ = false;
+        cond_var_.SignalAll();
+
+        // Remove the work from the heap once it is done executing, make sure
+        // it's the same function after executing the work while mutex is
+        // released.
+        // Note that we are just removing the pointer from the heap. Its
+        // memory is still managed in the map (as it holds a unique ptr).
+        // So current_fn is still a valid ptr.
+        assert(heap_.top() == current_fn);
+        heap_.pop();
+
+        // current_fn may be cancelled already.
+        if (current_fn->IsValid() && current_fn->repeat_every_us > 0) {
+          assert(running_);
+          current_fn->next_run_time_us =
+              clock_->NowMicros() + current_fn->repeat_every_us;
+
+          // Schedule new work into the heap with new time.
+          heap_.push(current_fn);
+        } else {
+          // if current_fn is cancelled or no need to repeat, remove it from the
+          // map to avoid leak.
+          map_.erase(current_fn->name);
+        }
+      } else {
+        cond_var_.TimedWait(current_fn->next_run_time_us);
+      }
+    }
+  }
+
+  void CancelAllWithLock() {
+    mutex_.AssertHeld();
+    if (map_.empty() && heap_.empty()) {
+      return;
+    }
+
+    // With mutex_ held, set all tasks to invalid so that they will not be
+    // re-queued.
+    for (auto& elem : map_) {
+      auto& func_info = elem.second;
+      assert(func_info);
+      func_info->Cancel();
+    }
+
+    // WaitForTaskCompleteIfNecessary() may release mutex_
+    WaitForTaskCompleteIfNecessary();
+
+    while (!heap_.empty()) {
+      heap_.pop();
+    }
+    map_.clear();
+  }
+
+  // A wrapper around std::function to keep track when it should run next
+  // and at what frequency.
+  struct FunctionInfo {
+    // the actual work
+    std::function<void()> fn;
+    // name of the function
+    std::string name;
+    // when the function should run next
+    uint64_t next_run_time_us;
+    // repeat interval
+    uint64_t repeat_every_us;
+    // controls whether this function is valid.
+    // A function is valid upon construction and until someone explicitly
+    // calls `Cancel()`.
+    bool valid;
+
+    FunctionInfo(std::function<void()>&& _fn, std::string _name,
+                 const uint64_t _next_run_time_us, uint64_t _repeat_every_us)
+        : fn(std::move(_fn)),
+          name(std::move(_name)),
+          next_run_time_us(_next_run_time_us),
+          repeat_every_us(_repeat_every_us),
+          valid(true) {}
+
+    void Cancel() { valid = false; }
+
+    bool IsValid() const { return valid; }
+  };
+
+  void WaitForTaskCompleteIfNecessary() {
+    mutex_.AssertHeld();
+    while (executing_task_) {
+      TEST_SYNC_POINT("Timer::WaitForTaskCompleteIfNecessary:TaskExecuting");
+      cond_var_.Wait();
+    }
+  }
+
+  struct RunTimeOrder {
+    bool operator()(const FunctionInfo* f1, const FunctionInfo* f2) {
+      return f1->next_run_time_us > f2->next_run_time_us;
+    }
+  };
+
+  SystemClock* clock_;
+  // This mutex controls both the heap_ and the map_. It needs to be held for
+  // making any changes in them.
+  mutable InstrumentedMutex mutex_;
+  InstrumentedCondVar cond_var_;
+  std::unique_ptr<port::Thread> thread_;
+  bool running_;
+  bool executing_task_;
+
+  std::priority_queue<FunctionInfo*, std::vector<FunctionInfo*>, RunTimeOrder>
+      heap_;
+
+  // In addition to providing a mapping from a function name to a function,
+  // it is also responsible for memory management.
+  std::unordered_map<std::string, std::unique_ptr<FunctionInfo>> map_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/timer_queue.h b/src/rocksdb/util/timer_queue.h
new file mode 100644
index 000000000..36a1744ac
--- /dev/null
+++ b/src/rocksdb/util/timer_queue.h
@@ -0,0 +1,231 @@
+//  Portions Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Borrowed from
+// http://www.crazygaze.com/blog/2016/03/24/portable-c-timer-queue/
+// Timer Queue
+//
+// License
+//
+// The source code in this article is licensed under the CC0 license, so feel
+// free to copy, modify, share, do whatever you want with it.
+// No attribution is required, but Ill be happy if you do.
+// CC0 license
+
+// The person who associated a work with this deed has dedicated the work to the
+// public domain by waiving all of his or her rights to the work worldwide
+// under copyright law, including all related and neighboring rights, to the
+// extent allowed by law.  You can copy, modify, distribute and perform the
+// work, even for commercial purposes, all without asking permission.
+
+#pragma once
+
+#include <assert.h>
+
+#include <chrono>
+#include <condition_variable>
+#include <functional>
+#include <queue>
+#include <thread>
+#include <utility>
+#include <vector>
+
+#include "port/port.h"
+#include "test_util/sync_point.h"
+
+// Allows execution of handlers at a specified time in the future
+// Guarantees:
+//  - All handlers are executed ONCE, even if cancelled (aborted parameter will
+// be set to true)
+//      - If TimerQueue is destroyed, it will cancel all handlers.
+//  - Handlers are ALWAYS executed in the Timer Queue worker thread.
+//  - Handlers execution order is NOT guaranteed
+//
+////////////////////////////////////////////////////////////////////////////////
+// borrowed from
+// http://www.crazygaze.com/blog/2016/03/24/portable-c-timer-queue/
+class TimerQueue {
+ public:
+  TimerQueue() : m_th(&TimerQueue::run, this) {}
+
+  ~TimerQueue() { shutdown(); }
+
+  // This function is not thread-safe.
+  void shutdown() {
+    if (closed_) {
+      return;
+    }
+    cancelAll();
+    // Abusing the timer queue to trigger the shutdown.
+    add(0, [this](bool) {
+      m_finish = true;
+      return std::make_pair(false, 0);
+    });
+    m_th.join();
+    closed_ = true;
+  }
+
+  // Adds a new timer
+  // \return
+  //  Returns the ID of the new timer. You can use this ID to cancel the
+  // timer
+  uint64_t add(int64_t milliseconds,
+               std::function<std::pair<bool, int64_t>(bool)> handler) {
+    WorkItem item;
+    Clock::time_point tp = Clock::now();
+    item.end = tp + std::chrono::milliseconds(milliseconds);
+    TEST_SYNC_POINT_CALLBACK("TimeQueue::Add:item.end", &item.end);
+    item.period = milliseconds;
+    item.handler = std::move(handler);
+
+    std::unique_lock<std::mutex> lk(m_mtx);
+    uint64_t id = ++m_idcounter;
+    item.id = id;
+    m_items.push(std::move(item));
+
+    // Something changed, so wake up timer thread
+    m_checkWork.notify_one();
+    return id;
+  }
+
+  // Cancels the specified timer
+  // \return
+  //  1 if the timer was cancelled.
+  //  0 if you were too late to cancel (or the timer ID was never valid to
+  // start with)
+  size_t cancel(uint64_t id) {
+    // Instead of removing the item from the container (thus breaking the
+    // heap integrity), we set the item as having no handler, and put
+    // that handler on a new item at the top for immediate execution
+    // The timer thread will then ignore the original item, since it has no
+    // handler.
+    std::unique_lock<std::mutex> lk(m_mtx);
+    for (auto&& item : m_items.getContainer()) {
+      if (item.id == id && item.handler) {
+        WorkItem newItem;
+        // Zero time, so it stays at the top for immediate execution
+        newItem.end = Clock::time_point();
+        newItem.id = 0;  // Means it is a canceled item
+        // Move the handler from item to newitem (thus clearing item)
+        newItem.handler = std::move(item.handler);
+        m_items.push(std::move(newItem));
+
+        // Something changed, so wake up timer thread
+        m_checkWork.notify_one();
+        return 1;
+      }
+    }
+    return 0;
+  }
+
+  // Cancels all timers
+  // \return
+  //  The number of timers cancelled
+  size_t cancelAll() {
+    // Setting all "end" to 0 (for immediate execution) is ok,
+    // since it maintains the heap integrity
+    std::unique_lock<std::mutex> lk(m_mtx);
+    m_cancel = true;
+    for (auto&& item : m_items.getContainer()) {
+      if (item.id && item.handler) {
+        item.end = Clock::time_point();
+        item.id = 0;
+      }
+    }
+    auto ret = m_items.size();
+
+    m_checkWork.notify_one();
+    return ret;
+  }
+
+ private:
+  using Clock = std::chrono::steady_clock;
+  TimerQueue(const TimerQueue&) = delete;
+  TimerQueue& operator=(const TimerQueue&) = delete;
+
+  void run() {
+    std::unique_lock<std::mutex> lk(m_mtx);
+    while (!m_finish) {
+      auto end = calcWaitTime_lock();
+      if (end.first) {
+        // Timers found, so wait until it expires (or something else
+        // changes)
+        m_checkWork.wait_until(lk, end.second);
+      } else {
+        // No timers exist, so wait forever until something changes
+        m_checkWork.wait(lk);
+      }
+
+      // Check and execute as much work as possible, such as, all expired
+      // timers
+      checkWork(&lk);
+    }
+
+    // If we are shutting down, we should not have any items left,
+    // since the shutdown cancels all items
+    assert(m_items.size() == 0);
+  }
+
+  std::pair<bool, Clock::time_point> calcWaitTime_lock() {
+    while (m_items.size()) {
+      if (m_items.top().handler) {
+        // Item present, so return the new wait time
+        return std::make_pair(true, m_items.top().end);
+      } else {
+        // Discard empty handlers (they were cancelled)
+        m_items.pop();
+      }
+    }
+
+    // No items found, so return no wait time (causes the thread to wait
+    // indefinitely)
+    return std::make_pair(false, Clock::time_point());
+  }
+
+  void checkWork(std::unique_lock<std::mutex>* lk) {
+    while (m_items.size() && m_items.top().end <= Clock::now()) {
+      WorkItem item(m_items.top());
+      m_items.pop();
+
+      if (item.handler) {
+        (*lk).unlock();
+        auto reschedule_pair = item.handler(item.id == 0);
+        (*lk).lock();
+        if (!m_cancel && reschedule_pair.first) {
+          int64_t new_period = (reschedule_pair.second == -1)
+                                   ? item.period
+                                   : reschedule_pair.second;
+
+          item.period = new_period;
+          item.end = Clock::now() + std::chrono::milliseconds(new_period);
+          m_items.push(std::move(item));
+        }
+      }
+    }
+  }
+
+  bool m_finish = false;
+  bool m_cancel = false;
+  uint64_t m_idcounter = 0;
+  std::condition_variable m_checkWork;
+
+  struct WorkItem {
+    Clock::time_point end;
+    int64_t period;
+    uint64_t id;  // id==0 means it was cancelled
+    std::function<std::pair<bool, int64_t>(bool)> handler;
+    bool operator>(const WorkItem& other) const { return end > other.end; }
+  };
+
+  std::mutex m_mtx;
+  // Inheriting from priority_queue, so we can access the internal container
+  class Queue : public std::priority_queue<WorkItem, std::vector<WorkItem>,
+                                           std::greater<WorkItem>> {
+   public:
+    std::vector<WorkItem>& getContainer() { return this->c; }
+  } m_items;
+  ROCKSDB_NAMESPACE::port::Thread m_th;
+  bool closed_ = false;
+};
diff --git a/src/rocksdb/util/timer_queue_test.cc b/src/rocksdb/util/timer_queue_test.cc
new file mode 100644
index 000000000..b3c3768ec
--- /dev/null
+++ b/src/rocksdb/util/timer_queue_test.cc
@@ -0,0 +1,73 @@
+//  Portions Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+// borrowed from
+// http://www.crazygaze.com/blog/2016/03/24/portable-c-timer-queue/
+// Timer Queue
+//
+// License
+//
+// The source code in this article is licensed under the CC0 license, so feel
+// free
+// to copy, modify, share, do whatever you want with it.
+// No attribution is required, but Ill be happy if you do.
+// CC0 license
+
+// The person who associated a work with this deed has dedicated the work to the
+// public domain by waiving all of his or her rights to the work worldwide
+// under copyright law, including all related and neighboring rights, to the
+// extent allowed by law.  You can copy, modify, distribute and perform the
+// work, even for
+// commercial purposes, all without asking permission. See Other Information
+// below.
+//
+
+#include "util/timer_queue.h"
+
+#include <future>
+
+namespace Timing {
+
+using Clock = std::chrono::high_resolution_clock;
+double now() {
+  static auto start = Clock::now();
+  return std::chrono::duration<double, std::milli>(Clock::now() - start)
+      .count();
+}
+
+}  // namespace Timing
+
+int main() {
+  TimerQueue q;
+
+  double tnow = Timing::now();
+
+  q.add(10000, [tnow](bool aborted) mutable {
+    printf("T 1: %d, Elapsed %4.2fms\n", aborted, Timing::now() - tnow);
+    return std::make_pair(false, 0);
+  });
+  q.add(10001, [tnow](bool aborted) mutable {
+    printf("T 2: %d, Elapsed %4.2fms\n", aborted, Timing::now() - tnow);
+    return std::make_pair(false, 0);
+  });
+
+  q.add(1000, [tnow](bool aborted) mutable {
+    printf("T 3: %d, Elapsed %4.2fms\n", aborted, Timing::now() - tnow);
+    return std::make_pair(!aborted, 1000);
+  });
+
+  auto id = q.add(2000, [tnow](bool aborted) mutable {
+    printf("T 4: %d, Elapsed %4.2fms\n", aborted, Timing::now() - tnow);
+    return std::make_pair(!aborted, 2000);
+  });
+
+  (void)id;
+  // auto ret = q.cancel(id);
+  // assert(ret == 1);
+  // q.cancelAll();
+
+  return 0;
+}
+//////////////////////////////////////////
diff --git a/src/rocksdb/util/timer_test.cc b/src/rocksdb/util/timer_test.cc
new file mode 100644
index 000000000..0ebfa9f3d
--- /dev/null
+++ b/src/rocksdb/util/timer_test.cc
@@ -0,0 +1,402 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "util/timer.h"
+
+#include "db/db_test_util.h"
+#include "rocksdb/file_system.h"
+#include "test_util/mock_time_env.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class TimerTest : public testing::Test {
+ public:
+  TimerTest()
+      : mock_clock_(std::make_shared<MockSystemClock>(SystemClock::Default())) {
+  }
+
+ protected:
+  std::shared_ptr<MockSystemClock> mock_clock_;
+
+  void SetUp() override { mock_clock_->InstallTimedWaitFixCallback(); }
+
+  const int kUsPerSec = 1000000;
+};
+
+TEST_F(TimerTest, SingleScheduleOnce) {
+  const int kInitDelayUs = 1 * kUsPerSec;
+  Timer timer(mock_clock_.get());
+
+  int count = 0;
+  timer.Add([&] { count++; }, "fn_sch_test", kInitDelayUs, 0);
+
+  ASSERT_TRUE(timer.Start());
+
+  ASSERT_EQ(0, count);
+  // Wait for execution to finish
+  timer.TEST_WaitForRun(
+      [&] { mock_clock_->SleepForMicroseconds(kInitDelayUs); });
+  ASSERT_EQ(1, count);
+
+  ASSERT_TRUE(timer.Shutdown());
+}
+
+TEST_F(TimerTest, MultipleScheduleOnce) {
+  const int kInitDelay1Us = 1 * kUsPerSec;
+  const int kInitDelay2Us = 3 * kUsPerSec;
+  Timer timer(mock_clock_.get());
+
+  int count1 = 0;
+  timer.Add([&] { count1++; }, "fn_sch_test1", kInitDelay1Us, 0);
+
+  int count2 = 0;
+  timer.Add([&] { count2++; }, "fn_sch_test2", kInitDelay2Us, 0);
+
+  ASSERT_TRUE(timer.Start());
+  ASSERT_EQ(0, count1);
+  ASSERT_EQ(0, count2);
+
+  timer.TEST_WaitForRun(
+      [&] { mock_clock_->SleepForMicroseconds(kInitDelay1Us); });
+
+  ASSERT_EQ(1, count1);
+  ASSERT_EQ(0, count2);
+
+  timer.TEST_WaitForRun([&] {
+    mock_clock_->SleepForMicroseconds(kInitDelay2Us - kInitDelay1Us);
+  });
+
+  ASSERT_EQ(1, count1);
+  ASSERT_EQ(1, count2);
+
+  ASSERT_TRUE(timer.Shutdown());
+}
+
+TEST_F(TimerTest, SingleScheduleRepeatedly) {
+  const int kIterations = 5;
+  const int kInitDelayUs = 1 * kUsPerSec;
+  const int kRepeatUs = 1 * kUsPerSec;
+
+  Timer timer(mock_clock_.get());
+  int count = 0;
+  timer.Add([&] { count++; }, "fn_sch_test", kInitDelayUs, kRepeatUs);
+
+  ASSERT_TRUE(timer.Start());
+  ASSERT_EQ(0, count);
+
+  timer.TEST_WaitForRun(
+      [&] { mock_clock_->SleepForMicroseconds(kInitDelayUs); });
+
+  ASSERT_EQ(1, count);
+
+  // Wait for execution to finish
+  for (int i = 1; i < kIterations; i++) {
+    timer.TEST_WaitForRun(
+        [&] { mock_clock_->SleepForMicroseconds(kRepeatUs); });
+  }
+  ASSERT_EQ(kIterations, count);
+
+  ASSERT_TRUE(timer.Shutdown());
+}
+
+TEST_F(TimerTest, MultipleScheduleRepeatedly) {
+  const int kIterations = 5;
+  const int kInitDelay1Us = 0 * kUsPerSec;
+  const int kInitDelay2Us = 1 * kUsPerSec;
+  const int kInitDelay3Us = 0 * kUsPerSec;
+  const int kRepeatUs = 2 * kUsPerSec;
+  const int kLargeRepeatUs = 100 * kUsPerSec;
+
+  Timer timer(mock_clock_.get());
+
+  int count1 = 0;
+  timer.Add([&] { count1++; }, "fn_sch_test1", kInitDelay1Us, kRepeatUs);
+
+  int count2 = 0;
+  timer.Add([&] { count2++; }, "fn_sch_test2", kInitDelay2Us, kRepeatUs);
+
+  // Add a function with relatively large repeat interval
+  int count3 = 0;
+  timer.Add([&] { count3++; }, "fn_sch_test3", kInitDelay3Us, kLargeRepeatUs);
+
+  ASSERT_TRUE(timer.Start());
+
+  ASSERT_EQ(0, count2);
+  // Wait for execution to finish
+  for (int i = 1; i < kIterations * (kRepeatUs / kUsPerSec); i++) {
+    timer.TEST_WaitForRun(
+        [&] { mock_clock_->SleepForMicroseconds(1 * kUsPerSec); });
+    ASSERT_EQ((i + 2) / (kRepeatUs / kUsPerSec), count1);
+    ASSERT_EQ((i + 1) / (kRepeatUs / kUsPerSec), count2);
+
+    // large interval function should only run once (the first one).
+    ASSERT_EQ(1, count3);
+  }
+
+  timer.Cancel("fn_sch_test1");
+
+  // Wait for execution to finish
+  timer.TEST_WaitForRun(
+      [&] { mock_clock_->SleepForMicroseconds(1 * kUsPerSec); });
+  ASSERT_EQ(kIterations, count1);
+  ASSERT_EQ(kIterations, count2);
+  ASSERT_EQ(1, count3);
+
+  timer.Cancel("fn_sch_test2");
+
+  ASSERT_EQ(kIterations, count1);
+  ASSERT_EQ(kIterations, count2);
+
+  // execute the long interval one
+  timer.TEST_WaitForRun([&] {
+    mock_clock_->SleepForMicroseconds(
+        kLargeRepeatUs - static_cast<int>(mock_clock_->NowMicros()));
+  });
+  ASSERT_EQ(2, count3);
+
+  ASSERT_TRUE(timer.Shutdown());
+}
+
+TEST_F(TimerTest, AddAfterStartTest) {
+  const int kIterations = 5;
+  const int kInitDelayUs = 1 * kUsPerSec;
+  const int kRepeatUs = 1 * kUsPerSec;
+
+  // wait timer to run and then add a new job
+  SyncPoint::GetInstance()->LoadDependency(
+      {{"Timer::Run::Waiting", "TimerTest:AddAfterStartTest:1"}});
+  SyncPoint::GetInstance()->EnableProcessing();
+
+  Timer timer(mock_clock_.get());
+
+  ASSERT_TRUE(timer.Start());
+
+  TEST_SYNC_POINT("TimerTest:AddAfterStartTest:1");
+  int count = 0;
+  timer.Add([&] { count++; }, "fn_sch_test", kInitDelayUs, kRepeatUs);
+  ASSERT_EQ(0, count);
+  // Wait for execution to finish
+  timer.TEST_WaitForRun(
+      [&] { mock_clock_->SleepForMicroseconds(kInitDelayUs); });
+  ASSERT_EQ(1, count);
+
+  for (int i = 1; i < kIterations; i++) {
+    timer.TEST_WaitForRun(
+        [&] { mock_clock_->SleepForMicroseconds(kRepeatUs); });
+  }
+  ASSERT_EQ(kIterations, count);
+
+  ASSERT_TRUE(timer.Shutdown());
+}
+
+TEST_F(TimerTest, CancelRunningTask) {
+  static constexpr char kTestFuncName[] = "test_func";
+  const int kRepeatUs = 1 * kUsPerSec;
+  Timer timer(mock_clock_.get());
+  ASSERT_TRUE(timer.Start());
+  int* value = new int;
+  *value = 0;
+  SyncPoint::GetInstance()->DisableProcessing();
+  SyncPoint::GetInstance()->LoadDependency({
+      {"TimerTest::CancelRunningTask:test_func:0",
+       "TimerTest::CancelRunningTask:BeforeCancel"},
+      {"Timer::WaitForTaskCompleteIfNecessary:TaskExecuting",
+       "TimerTest::CancelRunningTask:test_func:1"},
+  });
+  SyncPoint::GetInstance()->EnableProcessing();
+  timer.Add(
+      [&]() {
+        *value = 1;
+        TEST_SYNC_POINT("TimerTest::CancelRunningTask:test_func:0");
+        TEST_SYNC_POINT("TimerTest::CancelRunningTask:test_func:1");
+      },
+      kTestFuncName, 0, kRepeatUs);
+  port::Thread control_thr([&]() {
+    TEST_SYNC_POINT("TimerTest::CancelRunningTask:BeforeCancel");
+    timer.Cancel(kTestFuncName);
+    // Verify that *value has been set to 1.
+    ASSERT_EQ(1, *value);
+    delete value;
+    value = nullptr;
+  });
+  mock_clock_->SleepForMicroseconds(kRepeatUs);
+  control_thr.join();
+  ASSERT_TRUE(timer.Shutdown());
+}
+
+TEST_F(TimerTest, ShutdownRunningTask) {
+  const int kRepeatUs = 1 * kUsPerSec;
+  constexpr char kTestFunc1Name[] = "test_func1";
+  constexpr char kTestFunc2Name[] = "test_func2";
+  Timer timer(mock_clock_.get());
+
+  SyncPoint::GetInstance()->DisableProcessing();
+  SyncPoint::GetInstance()->LoadDependency({
+      {"TimerTest::ShutdownRunningTest:test_func:0",
+       "TimerTest::ShutdownRunningTest:BeforeShutdown"},
+      {"Timer::WaitForTaskCompleteIfNecessary:TaskExecuting",
+       "TimerTest::ShutdownRunningTest:test_func:1"},
+  });
+  SyncPoint::GetInstance()->EnableProcessing();
+
+  ASSERT_TRUE(timer.Start());
+
+  int* value = new int;
+  *value = 0;
+  timer.Add(
+      [&]() {
+        TEST_SYNC_POINT("TimerTest::ShutdownRunningTest:test_func:0");
+        *value = 1;
+        TEST_SYNC_POINT("TimerTest::ShutdownRunningTest:test_func:1");
+      },
+      kTestFunc1Name, 0, kRepeatUs);
+
+  timer.Add([&]() { ++(*value); }, kTestFunc2Name, 0, kRepeatUs);
+
+  port::Thread control_thr([&]() {
+    TEST_SYNC_POINT("TimerTest::ShutdownRunningTest:BeforeShutdown");
+    timer.Shutdown();
+  });
+  mock_clock_->SleepForMicroseconds(kRepeatUs);
+  control_thr.join();
+  delete value;
+}
+
+TEST_F(TimerTest, AddSameFuncName) {
+  const int kInitDelayUs = 1 * kUsPerSec;
+  const int kRepeat1Us = 5 * kUsPerSec;
+  const int kRepeat2Us = 4 * kUsPerSec;
+
+  Timer timer(mock_clock_.get());
+  ASSERT_TRUE(timer.Start());
+
+  int func_counter1 = 0;
+  ASSERT_TRUE(timer.Add([&] { func_counter1++; }, "duplicated_func",
+                        kInitDelayUs, kRepeat1Us));
+
+  int func2_counter = 0;
+  ASSERT_TRUE(
+      timer.Add([&] { func2_counter++; }, "func2", kInitDelayUs, kRepeat2Us));
+
+  // New function with the same name should fail to add
+  int func_counter2 = 0;
+  ASSERT_FALSE(timer.Add([&] { func_counter2++; }, "duplicated_func",
+                         kInitDelayUs, kRepeat1Us));
+
+  ASSERT_EQ(0, func_counter1);
+  ASSERT_EQ(0, func2_counter);
+
+  timer.TEST_WaitForRun(
+      [&] { mock_clock_->SleepForMicroseconds(kInitDelayUs); });
+
+  ASSERT_EQ(1, func_counter1);
+  ASSERT_EQ(1, func2_counter);
+
+  timer.TEST_WaitForRun([&] { mock_clock_->SleepForMicroseconds(kRepeat1Us); });
+
+  ASSERT_EQ(2, func_counter1);
+  ASSERT_EQ(2, func2_counter);
+  ASSERT_EQ(0, func_counter2);
+
+  ASSERT_TRUE(timer.Shutdown());
+}
+
+TEST_F(TimerTest, RepeatIntervalWithFuncRunningTime) {
+  const int kInitDelayUs = 1 * kUsPerSec;
+  const int kRepeatUs = 5 * kUsPerSec;
+  const int kFuncRunningTimeUs = 1 * kUsPerSec;
+
+  Timer timer(mock_clock_.get());
+  ASSERT_TRUE(timer.Start());
+
+  int func_counter = 0;
+  timer.Add(
+      [&] {
+        mock_clock_->SleepForMicroseconds(kFuncRunningTimeUs);
+        func_counter++;
+      },
+      "func", kInitDelayUs, kRepeatUs);
+
+  ASSERT_EQ(0, func_counter);
+  timer.TEST_WaitForRun(
+      [&] { mock_clock_->SleepForMicroseconds(kInitDelayUs); });
+  ASSERT_EQ(1, func_counter);
+  ASSERT_EQ(kInitDelayUs + kFuncRunningTimeUs, mock_clock_->NowMicros());
+
+  // After repeat interval time, the function is not executed, as running
+  // the function takes some time (`kFuncRunningTimeSec`). The repeat interval
+  // is the time between ending time of the last call and starting time of the
+  // next call.
+  uint64_t next_abs_interval_time_us = kInitDelayUs + kRepeatUs;
+  timer.TEST_WaitForRun([&] {
+    mock_clock_->SetCurrentTime(next_abs_interval_time_us / kUsPerSec);
+  });
+  ASSERT_EQ(1, func_counter);
+
+  // After the function running time, it's executed again
+  timer.TEST_WaitForRun(
+      [&] { mock_clock_->SleepForMicroseconds(kFuncRunningTimeUs); });
+  ASSERT_EQ(2, func_counter);
+
+  ASSERT_TRUE(timer.Shutdown());
+}
+
+TEST_F(TimerTest, DestroyRunningTimer) {
+  const int kInitDelayUs = 1 * kUsPerSec;
+  const int kRepeatUs = 1 * kUsPerSec;
+
+  auto timer_ptr = new Timer(mock_clock_.get());
+
+  int count = 0;
+  timer_ptr->Add([&] { count++; }, "fn_sch_test", kInitDelayUs, kRepeatUs);
+  ASSERT_TRUE(timer_ptr->Start());
+
+  timer_ptr->TEST_WaitForRun(
+      [&] { mock_clock_->SleepForMicroseconds(kInitDelayUs); });
+
+  // delete a running timer should not cause any exception
+  delete timer_ptr;
+}
+
+TEST_F(TimerTest, DestroyTimerWithRunningFunc) {
+  const int kRepeatUs = 1 * kUsPerSec;
+  auto timer_ptr = new Timer(mock_clock_.get());
+
+  SyncPoint::GetInstance()->DisableProcessing();
+  SyncPoint::GetInstance()->LoadDependency({
+      {"TimerTest::DestroyTimerWithRunningFunc:test_func:0",
+       "TimerTest::DestroyTimerWithRunningFunc:BeforeDelete"},
+      {"Timer::WaitForTaskCompleteIfNecessary:TaskExecuting",
+       "TimerTest::DestroyTimerWithRunningFunc:test_func:1"},
+  });
+  SyncPoint::GetInstance()->EnableProcessing();
+
+  ASSERT_TRUE(timer_ptr->Start());
+
+  int count = 0;
+  timer_ptr->Add(
+      [&]() {
+        TEST_SYNC_POINT("TimerTest::DestroyTimerWithRunningFunc:test_func:0");
+        count++;
+        TEST_SYNC_POINT("TimerTest::DestroyTimerWithRunningFunc:test_func:1");
+      },
+      "fn_running_test", 0, kRepeatUs);
+
+  port::Thread control_thr([&] {
+    TEST_SYNC_POINT("TimerTest::DestroyTimerWithRunningFunc:BeforeDelete");
+    delete timer_ptr;
+  });
+  mock_clock_->SleepForMicroseconds(kRepeatUs);
+  control_thr.join();
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
+int main(int argc, char** argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+
+  return RUN_ALL_TESTS();
+}
diff --git a/src/rocksdb/util/user_comparator_wrapper.h b/src/rocksdb/util/user_comparator_wrapper.h
new file mode 100644
index 000000000..59ebada12
--- /dev/null
+++ b/src/rocksdb/util/user_comparator_wrapper.h
@@ -0,0 +1,64 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include "monitoring/perf_context_imp.h"
+#include "rocksdb/comparator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Wrapper of user comparator, with auto increment to
+// perf_context.user_key_comparison_count.
+class UserComparatorWrapper {
+ public:
+  // `UserComparatorWrapper`s constructed with the default constructor are not
+  // usable and will segfault on any attempt to use them for comparisons.
+  UserComparatorWrapper() : user_comparator_(nullptr) {}
+
+  explicit UserComparatorWrapper(const Comparator* const user_cmp)
+      : user_comparator_(user_cmp) {}
+
+  ~UserComparatorWrapper() = default;
+
+  const Comparator* user_comparator() const { return user_comparator_; }
+
+  int Compare(const Slice& a, const Slice& b) const {
+    PERF_COUNTER_ADD(user_key_comparison_count, 1);
+    return user_comparator_->Compare(a, b);
+  }
+
+  bool Equal(const Slice& a, const Slice& b) const {
+    PERF_COUNTER_ADD(user_key_comparison_count, 1);
+    return user_comparator_->Equal(a, b);
+  }
+
+  int CompareTimestamp(const Slice& ts1, const Slice& ts2) const {
+    return user_comparator_->CompareTimestamp(ts1, ts2);
+  }
+
+  int CompareWithoutTimestamp(const Slice& a, const Slice& b) const {
+    PERF_COUNTER_ADD(user_key_comparison_count, 1);
+    return user_comparator_->CompareWithoutTimestamp(a, b);
+  }
+
+  int CompareWithoutTimestamp(const Slice& a, bool a_has_ts, const Slice& b,
+                              bool b_has_ts) const {
+    PERF_COUNTER_ADD(user_key_comparison_count, 1);
+    return user_comparator_->CompareWithoutTimestamp(a, a_has_ts, b, b_has_ts);
+  }
+
+  bool EqualWithoutTimestamp(const Slice& a, const Slice& b) const {
+    return user_comparator_->EqualWithoutTimestamp(a, b);
+  }
+
+ private:
+  const Comparator* user_comparator_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/vector_iterator.h b/src/rocksdb/util/vector_iterator.h
new file mode 100644
index 000000000..c4cc01d56
--- /dev/null
+++ b/src/rocksdb/util/vector_iterator.h
@@ -0,0 +1,118 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+#pragma once
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "db/dbformat.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/iterator.h"
+#include "rocksdb/slice.h"
+#include "table/internal_iterator.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Iterator over a vector of keys/values
+class VectorIterator : public InternalIterator {
+ public:
+  VectorIterator(std::vector<std::string> keys, std::vector<std::string> values,
+                 const CompareInterface* icmp = nullptr)
+      : keys_(std::move(keys)),
+        values_(std::move(values)),
+        current_(keys_.size()),
+        indexed_cmp_(icmp, &keys_) {
+    assert(keys_.size() == values_.size());
+
+    indices_.reserve(keys_.size());
+    for (size_t i = 0; i < keys_.size(); i++) {
+      indices_.push_back(i);
+    }
+    if (icmp != nullptr) {
+      std::sort(indices_.begin(), indices_.end(), indexed_cmp_);
+    }
+  }
+
+  virtual bool Valid() const override {
+    return !indices_.empty() && current_ < indices_.size();
+  }
+
+  virtual void SeekToFirst() override { current_ = 0; }
+  virtual void SeekToLast() override { current_ = indices_.size() - 1; }
+
+  virtual void Seek(const Slice& target) override {
+    if (indexed_cmp_.cmp != nullptr) {
+      current_ = std::lower_bound(indices_.begin(), indices_.end(), target,
+                                  indexed_cmp_) -
+                 indices_.begin();
+    } else {
+      current_ =
+          std::lower_bound(keys_.begin(), keys_.end(), target.ToString()) -
+          keys_.begin();
+    }
+  }
+
+  virtual void SeekForPrev(const Slice& target) override {
+    if (indexed_cmp_.cmp != nullptr) {
+      current_ = std::upper_bound(indices_.begin(), indices_.end(), target,
+                                  indexed_cmp_) -
+                 indices_.begin();
+    } else {
+      current_ =
+          std::upper_bound(keys_.begin(), keys_.end(), target.ToString()) -
+          keys_.begin();
+    }
+    if (!Valid()) {
+      SeekToLast();
+    } else {
+      Prev();
+    }
+  }
+
+  virtual void Next() override { current_++; }
+  virtual void Prev() override { current_--; }
+
+  virtual Slice key() const override {
+    return Slice(keys_[indices_[current_]]);
+  }
+  virtual Slice value() const override {
+    return Slice(values_[indices_[current_]]);
+  }
+
+  virtual Status status() const override { return Status::OK(); }
+
+  virtual bool IsKeyPinned() const override { return true; }
+  virtual bool IsValuePinned() const override { return true; }
+
+ protected:
+  std::vector<std::string> keys_;
+  std::vector<std::string> values_;
+  size_t current_;
+
+ private:
+  struct IndexedKeyComparator {
+    IndexedKeyComparator(const CompareInterface* c,
+                         const std::vector<std::string>* ks)
+        : cmp(c), keys(ks) {}
+
+    bool operator()(size_t a, size_t b) const {
+      return cmp->Compare((*keys)[a], (*keys)[b]) < 0;
+    }
+
+    bool operator()(size_t a, const Slice& b) const {
+      return cmp->Compare((*keys)[a], b) < 0;
+    }
+
+    bool operator()(const Slice& a, size_t b) const {
+      return cmp->Compare(a, (*keys)[b]) < 0;
+    }
+
+    const CompareInterface* cmp;
+    const std::vector<std::string>* keys;
+  };
+
+  IndexedKeyComparator indexed_cmp_;
+  std::vector<size_t> indices_;
+};
+
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/work_queue.h b/src/rocksdb/util/work_queue.h
new file mode 100644
index 000000000..94ece85d9
--- /dev/null
+++ b/src/rocksdb/util/work_queue.h
@@ -0,0 +1,150 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+/*
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+#pragma once
+
+#include <atomic>
+#include <cassert>
+#include <condition_variable>
+#include <cstddef>
+#include <functional>
+#include <mutex>
+#include <queue>
+
+#include "rocksdb/rocksdb_namespace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+/// Unbounded thread-safe work queue.
+//
+// This file is an excerpt from Facebook's zstd repo at
+// https://github.com/facebook/zstd/. The relevant file is
+// contrib/pzstd/utils/WorkQueue.h.
+
+template <typename T>
+class WorkQueue {
+  // Protects all member variable access
+  std::mutex mutex_;
+  std::condition_variable readerCv_;
+  std::condition_variable writerCv_;
+  std::condition_variable finishCv_;
+
+  std::queue<T> queue_;
+  bool done_;
+  std::size_t maxSize_;
+
+  // Must have lock to call this function
+  bool full() const {
+    if (maxSize_ == 0) {
+      return false;
+    }
+    return queue_.size() >= maxSize_;
+  }
+
+ public:
+  /**
+   * Constructs an empty work queue with an optional max size.
+   * If `maxSize == 0` the queue size is unbounded.
+   *
+   * @param maxSize The maximum allowed size of the work queue.
+   */
+  WorkQueue(std::size_t maxSize = 0) : done_(false), maxSize_(maxSize) {}
+
+  /**
+   * Push an item onto the work queue.  Notify a single thread that work is
+   * available.  If `finish()` has been called, do nothing and return false.
+   * If `push()` returns false, then `item` has not been copied from.
+   *
+   * @param item  Item to push onto the queue.
+   * @returns     True upon success, false if `finish()` has been called.  An
+   *               item was pushed iff `push()` returns true.
+   */
+  template <typename U>
+  bool push(U&& item) {
+    {
+      std::unique_lock<std::mutex> lock(mutex_);
+      while (full() && !done_) {
+        writerCv_.wait(lock);
+      }
+      if (done_) {
+        return false;
+      }
+      queue_.push(std::forward<U>(item));
+    }
+    readerCv_.notify_one();
+    return true;
+  }
+
+  /**
+   * Attempts to pop an item off the work queue.  It will block until data is
+   * available or `finish()` has been called.
+   *
+   * @param[out] item  If `pop` returns `true`, it contains the popped item.
+   *                    If `pop` returns `false`, it is unmodified.
+   * @returns          True upon success.  False if the queue is empty and
+   *                    `finish()` has been called.
+   */
+  bool pop(T& item) {
+    {
+      std::unique_lock<std::mutex> lock(mutex_);
+      while (queue_.empty() && !done_) {
+        readerCv_.wait(lock);
+      }
+      if (queue_.empty()) {
+        assert(done_);
+        return false;
+      }
+      item = queue_.front();
+      queue_.pop();
+    }
+    writerCv_.notify_one();
+    return true;
+  }
+
+  /**
+   * Sets the maximum queue size.  If `maxSize == 0` then it is unbounded.
+   *
+   * @param maxSize The new maximum queue size.
+   */
+  void setMaxSize(std::size_t maxSize) {
+    {
+      std::lock_guard<std::mutex> lock(mutex_);
+      maxSize_ = maxSize;
+    }
+    writerCv_.notify_all();
+  }
+
+  /**
+   * Promise that `push()` won't be called again, so once the queue is empty
+   * there will never any more work.
+   */
+  void finish() {
+    {
+      std::lock_guard<std::mutex> lock(mutex_);
+      assert(!done_);
+      done_ = true;
+    }
+    readerCv_.notify_all();
+    writerCv_.notify_all();
+    finishCv_.notify_all();
+  }
+
+  /// Blocks until `finish()` has been called (but the queue may not be empty).
+  void waitUntilFinished() {
+    std::unique_lock<std::mutex> lock(mutex_);
+    while (!done_) {
+      finishCv_.wait(lock);
+    }
+  }
+};
+}  // namespace ROCKSDB_NAMESPACE
diff --git a/src/rocksdb/util/work_queue_test.cc b/src/rocksdb/util/work_queue_test.cc
new file mode 100644
index 000000000..c23a51279
--- /dev/null
+++ b/src/rocksdb/util/work_queue_test.cc
@@ -0,0 +1,272 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+/*
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+#include "util/work_queue.h"
+
+#include <gtest/gtest.h>
+
+#include <iostream>
+#include <memory>
+#include <mutex>
+#include <thread>
+#include <vector>
+
+#include "port/stack_trace.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// Unit test for work_queue.h.
+//
+// This file is an excerpt from Facebook's zstd repo at
+// https://github.com/facebook/zstd/. The relevant file is
+// contrib/pzstd/utils/test/WorkQueueTest.cpp.
+
+struct Popper {
+  WorkQueue<int>* queue;
+  int* results;
+  std::mutex* mutex;
+
+  void operator()() {
+    int result;
+    while (queue->pop(result)) {
+      std::lock_guard<std::mutex> lock(*mutex);
+      results[result] = result;
+    }
+  }
+};
+
+TEST(WorkQueue, SingleThreaded) {
+  WorkQueue<int> queue;
+  int result;
+
+  queue.push(5);
+  EXPECT_TRUE(queue.pop(result));
+  EXPECT_EQ(5, result);
+
+  queue.push(1);
+  queue.push(2);
+  EXPECT_TRUE(queue.pop(result));
+  EXPECT_EQ(1, result);
+  EXPECT_TRUE(queue.pop(result));
+  EXPECT_EQ(2, result);
+
+  queue.push(1);
+  queue.push(2);
+  queue.finish();
+  EXPECT_TRUE(queue.pop(result));
+  EXPECT_EQ(1, result);
+  EXPECT_TRUE(queue.pop(result));
+  EXPECT_EQ(2, result);
+  EXPECT_FALSE(queue.pop(result));
+
+  queue.waitUntilFinished();
+}
+
+TEST(WorkQueue, SPSC) {
+  WorkQueue<int> queue;
+  const int max = 100;
+
+  for (int i = 0; i < 10; ++i) {
+    queue.push(i);
+  }
+
+  std::thread thread([&queue, max] {
+    int result;
+    for (int i = 0;; ++i) {
+      if (!queue.pop(result)) {
+        EXPECT_EQ(i, max);
+        break;
+      }
+      EXPECT_EQ(i, result);
+    }
+  });
+
+  std::this_thread::yield();
+  for (int i = 10; i < max; ++i) {
+    queue.push(i);
+  }
+  queue.finish();
+
+  thread.join();
+}
+
+TEST(WorkQueue, SPMC) {
+  WorkQueue<int> queue;
+  std::vector<int> results(50, -1);
+  std::mutex mutex;
+  std::vector<std::thread> threads;
+  for (int i = 0; i < 5; ++i) {
+    threads.emplace_back(Popper{&queue, results.data(), &mutex});
+  }
+
+  for (int i = 0; i < 50; ++i) {
+    queue.push(i);
+  }
+  queue.finish();
+
+  for (auto& thread : threads) {
+    thread.join();
+  }
+
+  for (int i = 0; i < 50; ++i) {
+    EXPECT_EQ(i, results[i]);
+  }
+}
+
+TEST(WorkQueue, MPMC) {
+  WorkQueue<int> queue;
+  std::vector<int> results(100, -1);
+  std::mutex mutex;
+  std::vector<std::thread> popperThreads;
+  for (int i = 0; i < 4; ++i) {
+    popperThreads.emplace_back(Popper{&queue, results.data(), &mutex});
+  }
+
+  std::vector<std::thread> pusherThreads;
+  for (int i = 0; i < 2; ++i) {
+    auto min = i * 50;
+    auto max = (i + 1) * 50;
+    pusherThreads.emplace_back([&queue, min, max] {
+      for (int j = min; j < max; ++j) {
+        queue.push(j);
+      }
+    });
+  }
+
+  for (auto& thread : pusherThreads) {
+    thread.join();
+  }
+  queue.finish();
+
+  for (auto& thread : popperThreads) {
+    thread.join();
+  }
+
+  for (int i = 0; i < 100; ++i) {
+    EXPECT_EQ(i, results[i]);
+  }
+}
+
+TEST(WorkQueue, BoundedSizeWorks) {
+  WorkQueue<int> queue(1);
+  int result;
+  queue.push(5);
+  queue.pop(result);
+  queue.push(5);
+  queue.pop(result);
+  queue.push(5);
+  queue.finish();
+  queue.pop(result);
+  EXPECT_EQ(5, result);
+}
+
+TEST(WorkQueue, BoundedSizePushAfterFinish) {
+  WorkQueue<int> queue(1);
+  int result;
+  queue.push(5);
+  std::thread pusher([&queue] { queue.push(6); });
+  // Dirtily try and make sure that pusher has run.
+  std::this_thread::sleep_for(std::chrono::seconds(1));
+  queue.finish();
+  EXPECT_TRUE(queue.pop(result));
+  EXPECT_EQ(5, result);
+  EXPECT_FALSE(queue.pop(result));
+
+  pusher.join();
+}
+
+TEST(WorkQueue, SetMaxSize) {
+  WorkQueue<int> queue(2);
+  int result;
+  queue.push(5);
+  queue.push(6);
+  queue.setMaxSize(1);
+  std::thread pusher([&queue] { queue.push(7); });
+  // Dirtily try and make sure that pusher has run.
+  std::this_thread::sleep_for(std::chrono::seconds(1));
+  queue.finish();
+  EXPECT_TRUE(queue.pop(result));
+  EXPECT_EQ(5, result);
+  EXPECT_TRUE(queue.pop(result));
+  EXPECT_EQ(6, result);
+  EXPECT_FALSE(queue.pop(result));
+
+  pusher.join();
+}
+
+TEST(WorkQueue, BoundedSizeMPMC) {
+  WorkQueue<int> queue(10);
+  std::vector<int> results(200, -1);
+  std::mutex mutex;
+  std::cerr << "Creating popperThreads" << std::endl;
+  std::vector<std::thread> popperThreads;
+  for (int i = 0; i < 4; ++i) {
+    popperThreads.emplace_back(Popper{&queue, results.data(), &mutex});
+  }
+
+  std::cerr << "Creating pusherThreads" << std::endl;
+  std::vector<std::thread> pusherThreads;
+  for (int i = 0; i < 2; ++i) {
+    auto min = i * 100;
+    auto max = (i + 1) * 100;
+    pusherThreads.emplace_back([&queue, min, max] {
+      for (int j = min; j < max; ++j) {
+        queue.push(j);
+      }
+    });
+  }
+
+  std::cerr << "Joining pusherThreads" << std::endl;
+  for (auto& thread : pusherThreads) {
+    thread.join();
+  }
+  std::cerr << "Finishing queue" << std::endl;
+  queue.finish();
+
+  std::cerr << "Joining popperThreads" << std::endl;
+  for (auto& thread : popperThreads) {
+    thread.join();
+  }
+
+  std::cerr << "Inspecting results" << std::endl;
+  for (int i = 0; i < 200; ++i) {
+    EXPECT_EQ(i, results[i]);
+  }
+}
+
+TEST(WorkQueue, FailedPush) {
+  WorkQueue<int> queue;
+  EXPECT_TRUE(queue.push(1));
+  queue.finish();
+  EXPECT_FALSE(queue.push(1));
+}
+
+TEST(WorkQueue, FailedPop) {
+  WorkQueue<int> queue;
+  int x = 5;
+  EXPECT_TRUE(queue.push(x));
+  queue.finish();
+  x = 0;
+  EXPECT_TRUE(queue.pop(x));
+  EXPECT_EQ(5, x);
+  EXPECT_FALSE(queue.pop(x));
+  EXPECT_EQ(5, x);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
+
+int main(int argc, char** argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/src/rocksdb/util/xxhash.cc b/src/rocksdb/util/xxhash.cc
new file mode 100644
index 000000000..88852c330
--- /dev/null
+++ b/src/rocksdb/util/xxhash.cc
@@ -0,0 +1,48 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+/*
+ * xxHash - Extremely Fast Hash algorithm
+ * Copyright (C) 2012-2020 Yann Collet
+ *
+ * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following disclaimer
+ *      in the documentation and/or other materials provided with the
+ *      distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at:
+ *   - xxHash homepage: https://www.xxhash.com
+ *   - xxHash source repository: https://github.com/Cyan4973/xxHash
+ */
+
+/*
+ * xxhash.c instantiates functions defined in xxhash.h
+ */
+// clang-format off
+#ifndef XXH_STATIC_LINKING_ONLY
+#define XXH_STATIC_LINKING_ONLY   /* access advanced declarations */
+#endif // !defined(XXH_STATIC_LINKING_ONLY)
+#define XXH_IMPLEMENTATION   /* access definitions */
+
+#include "xxhash.h"
diff --git a/src/rocksdb/util/xxhash.h b/src/rocksdb/util/xxhash.h
new file mode 100644
index 000000000..195f06b39
--- /dev/null
+++ b/src/rocksdb/util/xxhash.h
@@ -0,0 +1,5346 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+/* BEGIN RocksDB customizations */
+#ifndef XXH_STATIC_LINKING_ONLY
+// Using compiled xxhash.cc
+#define XXH_STATIC_LINKING_ONLY 1
+#endif  // !defined(XXH_STATIC_LINKING_ONLY)
+#ifndef XXH_NAMESPACE
+#define XXH_NAMESPACE ROCKSDB_
+#endif  // !defined(XXH_NAMESPACE)
+
+// for FALLTHROUGH_INTENDED, inserted as appropriate
+#include "port/lang.h"
+/* END RocksDB customizations */
+
+// clang-format off
+/*
+ * xxHash - Extremely Fast Hash algorithm
+ * Header File
+ * Copyright (C) 2012-2020 Yann Collet
+ *
+ * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following disclaimer
+ *      in the documentation and/or other materials provided with the
+ *      distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at:
+ *   - xxHash homepage: https://www.xxhash.com
+ *   - xxHash source repository: https://github.com/Cyan4973/xxHash
+ */
+/*!
+ * @mainpage xxHash
+ *
+ * @file xxhash.h
+ * xxHash prototypes and implementation
+ */
+/* TODO: update */
+/* Notice extracted from xxHash homepage:
+
+xxHash is an extremely fast hash algorithm, running at RAM speed limits.
+It also successfully passes all tests from the SMHasher suite.
+
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+
+Name            Speed       Q.Score   Author
+xxHash          5.4 GB/s     10
+CrapWow         3.2 GB/s      2       Andrew
+MurmurHash 3a   2.7 GB/s     10       Austin Appleby
+SpookyHash      2.0 GB/s     10       Bob Jenkins
+SBox            1.4 GB/s      9       Bret Mulvey
+Lookup3         1.2 GB/s      9       Bob Jenkins
+SuperFastHash   1.2 GB/s      1       Paul Hsieh
+CityHash64      1.05 GB/s    10       Pike & Alakuijala
+FNV             0.55 GB/s     5       Fowler, Noll, Vo
+CRC32           0.43 GB/s     9
+MD5-32          0.33 GB/s    10       Ronald L. Rivest
+SHA1-32         0.28 GB/s    10
+
+Q.Score is a measure of quality of the hash function.
+It depends on successfully passing SMHasher test set.
+10 is a perfect score.
+
+Note: SMHasher's CRC32 implementation is not the fastest one.
+Other speed-oriented implementations can be faster,
+especially in combination with PCLMUL instruction:
+https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735
+
+A 64-bit version, named XXH64, is available since r35.
+It offers much better speed, but for 64-bit applications only.
+Name     Speed on 64 bits    Speed on 32 bits
+XXH64       13.8 GB/s            1.9 GB/s
+XXH32        6.8 GB/s            6.0 GB/s
+*/
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* ****************************
+ *  INLINE mode
+ ******************************/
+/*!
+ * XXH_INLINE_ALL (and XXH_PRIVATE_API)
+ * Use these build macros to inline xxhash into the target unit.
+ * Inlining improves performance on small inputs, especially when the length is
+ * expressed as a compile-time constant:
+ *
+ *      https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
+ *
+ * It also keeps xxHash symbols private to the unit, so they are not exported.
+ *
+ * Usage:
+ *     #define XXH_INLINE_ALL
+ *     #include "xxhash.h"
+ *
+ * Do not compile and link xxhash.o as a separate object, as it is not useful.
+ */
+#if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \
+    && !defined(XXH_INLINE_ALL_31684351384)
+   /* this section should be traversed only once */
+#  define XXH_INLINE_ALL_31684351384
+   /* give access to the advanced API, required to compile implementations */
+#  undef XXH_STATIC_LINKING_ONLY   /* avoid macro redef */
+#  define XXH_STATIC_LINKING_ONLY
+   /* make all functions private */
+#  undef XXH_PUBLIC_API
+#  if defined(__GNUC__)
+#    define XXH_PUBLIC_API static __inline __attribute__((unused))
+#  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#    define XXH_PUBLIC_API static inline
+#  elif defined(_MSC_VER)
+#    define XXH_PUBLIC_API static __inline
+#  else
+     /* note: this version may generate warnings for unused static functions */
+#    define XXH_PUBLIC_API static
+#  endif
+
+   /*
+    * This part deals with the special case where a unit wants to inline xxHash,
+    * but "xxhash.h" has previously been included without XXH_INLINE_ALL, such
+    * as part of some previously included *.h header file.
+    * Without further action, the new include would just be ignored,
+    * and functions would effectively _not_ be inlined (silent failure).
+    * The following macros solve this situation by prefixing all inlined names,
+    * avoiding naming collision with previous inclusions.
+    */
+#  ifdef XXH_NAMESPACE
+#    error "XXH_INLINE_ALL with XXH_NAMESPACE is not supported"
+     /*
+      * Note: Alternative: #undef all symbols (it's a pretty large list).
+      * Without #error: it compiles, but functions are actually not inlined.
+      */
+#  endif
+#  define XXH_NAMESPACE XXH_INLINE_
+   /*
+    * Some identifiers (enums, type names) are not symbols, but they must
+    * still be renamed to avoid redeclaration.
+    * Alternative solution: do not redeclare them.
+    * However, this requires some #ifdefs, and is a more dispersed action.
+    * Meanwhile, renaming can be achieved in a single block
+    */
+#  define XXH_IPREF(Id)   XXH_INLINE_ ## Id
+#  define XXH_OK XXH_IPREF(XXH_OK)
+#  define XXH_ERROR XXH_IPREF(XXH_ERROR)
+#  define XXH_errorcode XXH_IPREF(XXH_errorcode)
+#  define XXH32_canonical_t  XXH_IPREF(XXH32_canonical_t)
+#  define XXH64_canonical_t  XXH_IPREF(XXH64_canonical_t)
+#  define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t)
+#  define XXH32_state_s XXH_IPREF(XXH32_state_s)
+#  define XXH32_state_t XXH_IPREF(XXH32_state_t)
+#  define XXH64_state_s XXH_IPREF(XXH64_state_s)
+#  define XXH64_state_t XXH_IPREF(XXH64_state_t)
+#  define XXH3_state_s  XXH_IPREF(XXH3_state_s)
+#  define XXH3_state_t  XXH_IPREF(XXH3_state_t)
+#  define XXH128_hash_t XXH_IPREF(XXH128_hash_t)
+   /* Ensure the header is parsed again, even if it was previously included */
+#  undef XXHASH_H_5627135585666179
+#  undef XXHASH_H_STATIC_13879238742
+#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
+
+
+
+/* ****************************************************************
+ *  Stable API
+ *****************************************************************/
+#ifndef XXHASH_H_5627135585666179
+#define XXHASH_H_5627135585666179 1
+
+
+/*!
+ * @defgroup public Public API
+ * Contains details on the public xxHash functions.
+ * @{
+ */
+/* specific declaration modes for Windows */
+#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
+#  if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
+#    ifdef XXH_EXPORT
+#      define XXH_PUBLIC_API __declspec(dllexport)
+#    elif XXH_IMPORT
+#      define XXH_PUBLIC_API __declspec(dllimport)
+#    endif
+#  else
+#    define XXH_PUBLIC_API   /* do nothing */
+#  endif
+#endif
+
+#ifdef XXH_DOXYGEN
+/*!
+ * @brief Emulate a namespace by transparently prefixing all symbols.
+ *
+ * If you want to include _and expose_ xxHash functions from within your own
+ * library, but also want to avoid symbol collisions with other libraries which
+ * may also include xxHash, you can use XXH_NAMESPACE to automatically prefix
+ * any public symbol from xxhash library with the value of XXH_NAMESPACE
+ * (therefore, avoid empty or numeric values).
+ *
+ * Note that no change is required within the calling program as long as it
+ * includes `xxhash.h`: Regular symbol names will be automatically translated
+ * by this header.
+ */
+#  define XXH_NAMESPACE /* YOUR NAME HERE */
+#  undef XXH_NAMESPACE
+#endif
+
+#ifdef XXH_NAMESPACE
+#  define XXH_CAT(A,B) A##B
+#  define XXH_NAME2(A,B) XXH_CAT(A,B)
+#  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
+/* XXH32 */
+#  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+#  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+#  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+#  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+#  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+#  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+#  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
+#  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
+#  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
+/* XXH64 */
+#  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+#  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+#  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+#  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+#  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+#  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+#  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
+#  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
+#  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
+/* XXH3_64bits */
+#  define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
+#  define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
+#  define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
+#  define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
+#  define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
+#  define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
+#  define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
+#  define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
+#  define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
+#  define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
+#  define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
+#  define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret)
+/* XXH3_128bits */
+#  define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
+#  define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
+#  define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
+#  define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
+#  define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
+#  define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
+#  define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
+#  define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
+#  define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
+#  define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
+#  define XXH128_cmp     XXH_NAME2(XXH_NAMESPACE, XXH128_cmp)
+#  define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash)
+#  define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical)
+#endif
+
+
+/* *************************************
+*  Version
+***************************************/
+#define XXH_VERSION_MAJOR    0
+#define XXH_VERSION_MINOR    8
+#define XXH_VERSION_RELEASE  1
+#define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
+
+/*!
+ * @brief Obtains the xxHash version.
+ *
+ * This is only useful when xxHash is compiled as a shared library, as it is
+ * independent of the version defined in the header.
+ *
+ * @return `XXH_VERSION_NUMBER` as of when the libray was compiled.
+ */
+XXH_PUBLIC_API unsigned XXH_versionNumber (void);
+
+
+/* ****************************
+*  Definitions
+******************************/
+#include <stddef.h>   /* size_t */
+typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+
+
+/*-**********************************************************************
+*  32-bit hash
+************************************************************************/
+#if defined(XXH_DOXYGEN) /* Don't show <stdint.h> include */
+/*!
+ * @brief An unsigned 32-bit integer.
+ *
+ * Not necessarily defined to `uint32_t` but functionally equivalent.
+ */
+typedef uint32_t XXH32_hash_t;
+#elif !defined (__VMS) \
+  && (defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+#   include <stdint.h>
+    typedef uint32_t XXH32_hash_t;
+#else
+#   include <limits.h>
+#   if UINT_MAX == 0xFFFFFFFFUL
+      typedef unsigned int XXH32_hash_t;
+#   else
+#     if ULONG_MAX == 0xFFFFFFFFUL
+        typedef unsigned long XXH32_hash_t;
+#     else
+#       error "unsupported platform: need a 32-bit type"
+#     endif
+#   endif
+#endif
+
+/*!
+ * @}
+ *
+ * @defgroup xxh32_family XXH32 family
+ * @ingroup public
+ * Contains functions used in the classic 32-bit xxHash algorithm.
+ *
+ * @note
+ *   XXH32 is considered rather weak by today's standards.
+ *   The @ref xxh3_family provides competitive speed for both 32-bit and 64-bit
+ *   systems, and offers true 64/128 bit hash results. It provides a superior
+ *   level of dispersion, and greatly reduces the risks of collisions.
+ *
+ * @see @ref xxh64_family, @ref xxh3_family : Other xxHash families
+ * @see @ref xxh32_impl for implementation details
+ * @{
+ */
+
+/*!
+ * @brief Calculates the 32-bit hash of @p input using xxHash32.
+ *
+ * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark): 5.4 GB/s
+ *
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ * @param seed The 32-bit seed to alter the hash's output predictably.
+ *
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 32-bit hash value.
+ *
+ * @see
+ *    XXH64(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
+ *    Direct equivalents for the other variants of xxHash.
+ * @see
+ *    XXH32_createState(), XXH32_update(), XXH32_digest(): Streaming version.
+ */
+XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
+
+/*!
+ * Streaming functions generate the xxHash value from an incremental input.
+ * This method is slower than single-call functions, due to state management.
+ * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
+ *
+ * An XXH state must first be allocated using `XXH*_createState()`.
+ *
+ * Start a new hash by initializing the state with a seed using `XXH*_reset()`.
+ *
+ * Then, feed the hash state by calling `XXH*_update()` as many times as necessary.
+ *
+ * The function returns an error code, with 0 meaning OK, and any other value
+ * meaning there is an error.
+ *
+ * Finally, a hash value can be produced anytime, by using `XXH*_digest()`.
+ * This function returns the nn-bits hash as an int or long long.
+ *
+ * It's still possible to continue inserting input into the hash state after a
+ * digest, and generate new hash values later on by invoking `XXH*_digest()`.
+ *
+ * When done, release the state using `XXH*_freeState()`.
+ *
+ * Example code for incrementally hashing a file:
+ * @code{.c}
+ *    #include <stdio.h>
+ *    #include <xxhash.h>
+ *    #define BUFFER_SIZE 256
+ *
+ *    // Note: XXH64 and XXH3 use the same interface.
+ *    XXH32_hash_t
+ *    hashFile(FILE* stream)
+ *    {
+ *        XXH32_state_t* state;
+ *        unsigned char buf[BUFFER_SIZE];
+ *        size_t amt;
+ *        XXH32_hash_t hash;
+ *
+ *        state = XXH32_createState();       // Create a state
+ *        assert(state != NULL);             // Error check here
+ *        XXH32_reset(state, 0xbaad5eed);    // Reset state with our seed
+ *        while ((amt = fread(buf, 1, sizeof(buf), stream)) != 0) {
+ *            XXH32_update(state, buf, amt); // Hash the file in chunks
+ *        }
+ *        hash = XXH32_digest(state);        // Finalize the hash
+ *        XXH32_freeState(state);            // Clean up
+ *        return hash;
+ *    }
+ * @endcode
+ */
+
+/*!
+ * @typedef struct XXH32_state_s XXH32_state_t
+ * @brief The opaque state struct for the XXH32 streaming API.
+ *
+ * @see XXH32_state_s for details.
+ */
+typedef struct XXH32_state_s XXH32_state_t;
+
+/*!
+ * @brief Allocates an @ref XXH32_state_t.
+ *
+ * Must be freed with XXH32_freeState().
+ * @return An allocated XXH32_state_t on success, `NULL` on failure.
+ */
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
+/*!
+ * @brief Frees an @ref XXH32_state_t.
+ *
+ * Must be allocated with XXH32_createState().
+ * @param statePtr A pointer to an @ref XXH32_state_t allocated with @ref XXH32_createState().
+ * @return XXH_OK.
+ */
+XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
+/*!
+ * @brief Copies one @ref XXH32_state_t to another.
+ *
+ * @param dst_state The state to copy to.
+ * @param src_state The state to copy from.
+ * @pre
+ *   @p dst_state and @p src_state must not be `NULL` and must not overlap.
+ */
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
+
+/*!
+ * @brief Resets an @ref XXH32_state_t to begin a new hash.
+ *
+ * This function resets and seeds a state. Call it before @ref XXH32_update().
+ *
+ * @param statePtr The state struct to reset.
+ * @param seed The 32-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, XXH32_hash_t seed);
+
+/*!
+ * @brief Consumes a block of @p input to an @ref XXH32_state_t.
+ *
+ * Call this to incrementally consume blocks of data.
+ *
+ * @param statePtr The state struct to update.
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+
+/*!
+ * @brief Returns the calculated hash value from an @ref XXH32_state_t.
+ *
+ * @note
+ *   Calling XXH32_digest() will not affect @p statePtr, so you can update,
+ *   digest, and update again.
+ *
+ * @param statePtr The state struct to calculate the hash from.
+ *
+ * @pre
+ *  @p statePtr must not be `NULL`.
+ *
+ * @return The calculated xxHash32 value from that state.
+ */
+XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
+
+/*******   Canonical representation   *******/
+
+/*
+ * The default return values from XXH functions are unsigned 32 and 64 bit
+ * integers.
+ * This the simplest and fastest format for further post-processing.
+ *
+ * However, this leaves open the question of what is the order on the byte level,
+ * since little and big endian conventions will store the same number differently.
+ *
+ * The canonical representation settles this issue by mandating big-endian
+ * convention, the same convention as human-readable numbers (large digits first).
+ *
+ * When writing hash values to storage, sending them over a network, or printing
+ * them, it's highly recommended to use the canonical representation to ensure
+ * portability across a wider range of systems, present and future.
+ *
+ * The following functions allow transformation of hash values to and from
+ * canonical format.
+ */
+
+/*!
+ * @brief Canonical (big endian) representation of @ref XXH32_hash_t.
+ */
+typedef struct {
+    unsigned char digest[4]; /*!< Hash bytes, big endian */
+} XXH32_canonical_t;
+
+/*!
+ * @brief Converts an @ref XXH32_hash_t to a big endian @ref XXH32_canonical_t.
+ *
+ * @param dst The @ref XXH32_canonical_t pointer to be stored to.
+ * @param hash The @ref XXH32_hash_t to be converted.
+ *
+ * @pre
+ *   @p dst must not be `NULL`.
+ */
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
+
+/*!
+ * @brief Converts an @ref XXH32_canonical_t to a native @ref XXH32_hash_t.
+ *
+ * @param src The @ref XXH32_canonical_t to convert.
+ *
+ * @pre
+ *   @p src must not be `NULL`.
+ *
+ * @return The converted hash.
+ */
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+
+
+/*!
+ * @}
+ * @ingroup public
+ * @{
+ */
+
+#ifndef XXH_NO_LONG_LONG
+/*-**********************************************************************
+*  64-bit hash
+************************************************************************/
+#if defined(XXH_DOXYGEN) /* don't include <stdint.h> */
+/*!
+ * @brief An unsigned 64-bit integer.
+ *
+ * Not necessarily defined to `uint64_t` but functionally equivalent.
+ */
+typedef uint64_t XXH64_hash_t;
+#elif !defined (__VMS) \
+  && (defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+#  include <stdint.h>
+   typedef uint64_t XXH64_hash_t;
+#else
+#  include <limits.h>
+#  if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL
+     /* LP64 ABI says uint64_t is unsigned long */
+     typedef unsigned long XXH64_hash_t;
+#  else
+     /* the following type must have a width of 64-bit */
+     typedef unsigned long long XXH64_hash_t;
+#  endif
+#endif
+
+/*!
+ * @}
+ *
+ * @defgroup xxh64_family XXH64 family
+ * @ingroup public
+ * @{
+ * Contains functions used in the classic 64-bit xxHash algorithm.
+ *
+ * @note
+ *   XXH3 provides competitive speed for both 32-bit and 64-bit systems,
+ *   and offers true 64/128 bit hash results. It provides a superior level of
+ *   dispersion, and greatly reduces the risks of collisions.
+ */
+
+
+/*!
+ * @brief Calculates the 64-bit hash of @p input using xxHash64.
+ *
+ * This function usually runs faster on 64-bit systems, but slower on 32-bit
+ * systems (see benchmark).
+ *
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ * @param seed The 64-bit seed to alter the hash's output predictably.
+ *
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 64-bit hash.
+ *
+ * @see
+ *    XXH32(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
+ *    Direct equivalents for the other variants of xxHash.
+ * @see
+ *    XXH64_createState(), XXH64_update(), XXH64_digest(): Streaming version.
+ */
+XXH_PUBLIC_API XXH64_hash_t XXH64(const void* input, size_t length, XXH64_hash_t seed);
+
+/*******   Streaming   *******/
+/*!
+ * @brief The opaque state struct for the XXH64 streaming API.
+ *
+ * @see XXH64_state_s for details.
+ */
+typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
+XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
+
+/*******   Canonical representation   *******/
+typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
+
+/*!
+ * @}
+ * ************************************************************************
+ * @defgroup xxh3_family XXH3 family
+ * @ingroup public
+ * @{
+ *
+ * XXH3 is a more recent hash algorithm featuring:
+ *  - Improved speed for both small and large inputs
+ *  - True 64-bit and 128-bit outputs
+ *  - SIMD acceleration
+ *  - Improved 32-bit viability
+ *
+ * Speed analysis methodology is explained here:
+ *
+ *    https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html
+ *
+ * Compared to XXH64, expect XXH3 to run approximately
+ * ~2x faster on large inputs and >3x faster on small ones,
+ * exact differences vary depending on platform.
+ *
+ * XXH3's speed benefits greatly from SIMD and 64-bit arithmetic,
+ * but does not require it.
+ * Any 32-bit and 64-bit targets that can run XXH32 smoothly
+ * can run XXH3 at competitive speeds, even without vector support.
+ * Further details are explained in the implementation.
+ *
+ * Optimized implementations are provided for AVX512, AVX2, SSE2, NEON, POWER8,
+ * ZVector and scalar targets. This can be controlled via the XXH_VECTOR macro.
+ *
+ * XXH3 implementation is portable:
+ * it has a generic C90 formulation that can be compiled on any platform,
+ * all implementations generage exactly the same hash value on all platforms.
+ * Starting from v0.8.0, it's also labelled "stable", meaning that
+ * any future version will also generate the same hash value.
+ *
+ * XXH3 offers 2 variants, _64bits and _128bits.
+ *
+ * When only 64 bits are needed, prefer invoking the _64bits variant, as it
+ * reduces the amount of mixing, resulting in faster speed on small inputs.
+ * It's also generally simpler to manipulate a scalar return type than a struct.
+ *
+ * The API supports one-shot hashing, streaming mode, and custom secrets.
+ */
+
+/*-**********************************************************************
+*  XXH3 64-bit variant
+************************************************************************/
+
+/* XXH3_64bits():
+ * default 64-bit variant, using default secret and default seed of 0.
+ * It's the fastest variant. */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* data, size_t len);
+
+/*
+ * XXH3_64bits_withSeed():
+ * This variant generates a custom secret on the fly
+ * based on default secret altered using the `seed` value.
+ * While this operation is decently fast, note that it's not completely free.
+ * Note: seed==0 produces the same results as XXH3_64bits().
+ */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
+
+/*!
+ * The bare minimum size for a custom secret.
+ *
+ * @see
+ *  XXH3_64bits_withSecret(), XXH3_64bits_reset_withSecret(),
+ *  XXH3_128bits_withSecret(), XXH3_128bits_reset_withSecret().
+ */
+#define XXH3_SECRET_SIZE_MIN 136
+
+/*
+ * XXH3_64bits_withSecret():
+ * It's possible to provide any blob of bytes as a "secret" to generate the hash.
+ * This makes it more difficult for an external actor to prepare an intentional collision.
+ * The main condition is that secretSize *must* be large enough (>= XXH3_SECRET_SIZE_MIN).
+ * However, the quality of produced hash values depends on secret's entropy.
+ * Technically, the secret must look like a bunch of random bytes.
+ * Avoid "trivial" or structured data such as repeated sequences or a text document.
+ * Whenever unsure about the "randomness" of the blob of bytes,
+ * consider relabelling it as a "custom seed" instead,
+ * and employ "XXH3_generateSecret()" (see below)
+ * to generate a high entropy secret derived from the custom seed.
+ */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
+
+
+/*******   Streaming   *******/
+/*
+ * Streaming requires state maintenance.
+ * This operation costs memory and CPU.
+ * As a consequence, streaming is slower than one-shot hashing.
+ * For better performance, prefer one-shot functions whenever applicable.
+ */
+
+/*!
+ * @brief The state struct for the XXH3 streaming API.
+ *
+ * @see XXH3_state_s for details.
+ */
+typedef struct XXH3_state_s XXH3_state_t;
+XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void);
+XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr);
+XXH_PUBLIC_API void XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state);
+
+/*
+ * XXH3_64bits_reset():
+ * Initialize with default parameters.
+ * digest will be equivalent to `XXH3_64bits()`.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH3_state_t* statePtr);
+/*
+ * XXH3_64bits_reset_withSeed():
+ * Generate a custom secret from `seed`, and store it into `statePtr`.
+ * digest will be equivalent to `XXH3_64bits_withSeed()`.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
+/*
+ * XXH3_64bits_reset_withSecret():
+ * `secret` is referenced, it _must outlive_ the hash streaming session.
+ * Similar to one-shot API, `secretSize` must be >= `XXH3_SECRET_SIZE_MIN`,
+ * and the quality of produced hash values depends on secret's entropy
+ * (secret's content should look like a bunch of random bytes).
+ * When in doubt about the randomness of a candidate `secret`,
+ * consider employing `XXH3_generateSecret()` instead (see below).
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
+
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH64_hash_t  XXH3_64bits_digest (const XXH3_state_t* statePtr);
+
+/* note : canonical representation of XXH3 is the same as XXH64
+ * since they both produce XXH64_hash_t values */
+
+
+/*-**********************************************************************
+*  XXH3 128-bit variant
+************************************************************************/
+
+/*!
+ * @brief The return value from 128-bit hashes.
+ *
+ * Stored in little endian order, although the fields themselves are in native
+ * endianness.
+ */
+typedef struct {
+    XXH64_hash_t low64;   /*!< `value & 0xFFFFFFFFFFFFFFFF` */
+    XXH64_hash_t high64;  /*!< `value >> 64` */
+} XXH128_hash_t;
+
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* data, size_t len);
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
+
+/*******   Streaming   *******/
+/*
+ * Streaming requires state maintenance.
+ * This operation costs memory and CPU.
+ * As a consequence, streaming is slower than one-shot hashing.
+ * For better performance, prefer one-shot functions whenever applicable.
+ *
+ * XXH3_128bits uses the same XXH3_state_t as XXH3_64bits().
+ * Use already declared XXH3_createState() and XXH3_freeState().
+ *
+ * All reset and streaming functions have same meaning as their 64-bit counterpart.
+ */
+
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH3_state_t* statePtr);
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
+
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);
+
+/* Following helper functions make it possible to compare XXH128_hast_t values.
+ * Since XXH128_hash_t is a structure, this capability is not offered by the language.
+ * Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */
+
+/*!
+ * XXH128_isEqual():
+ * Return: 1 if `h1` and `h2` are equal, 0 if they are not.
+ */
+XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
+
+/*!
+ * XXH128_cmp():
+ *
+ * This comparator is compatible with stdlib's `qsort()`/`bsearch()`.
+ *
+ * return: >0 if *h128_1  > *h128_2
+ *         =0 if *h128_1 == *h128_2
+ *         <0 if *h128_1  < *h128_2
+ */
+XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2);
+
+
+/*******   Canonical representation   *******/
+typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t;
+XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash);
+XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t* src);
+
+
+#endif  /* XXH_NO_LONG_LONG */
+
+/*!
+ * @}
+ */
+#endif /* XXHASH_H_5627135585666179 */
+
+
+
+#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
+#define XXHASH_H_STATIC_13879238742
+/* ****************************************************************************
+ * This section contains declarations which are not guaranteed to remain stable.
+ * They may change in future versions, becoming incompatible with a different
+ * version of the library.
+ * These declarations should only be used with static linking.
+ * Never use them in association with dynamic linking!
+ ***************************************************************************** */
+
+/*
+ * These definitions are only present to allow static allocation
+ * of XXH states, on stack or in a struct, for example.
+ * Never **ever** access their members directly.
+ */
+
+/*!
+ * @internal
+ * @brief Structure for XXH32 streaming API.
+ *
+ * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
+ * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
+ * an opaque type. This allows fields to safely be changed.
+ *
+ * Typedef'd to @ref XXH32_state_t.
+ * Do not access the members of this struct directly.
+ * @see XXH64_state_s, XXH3_state_s
+ */
+struct XXH32_state_s {
+   XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */
+   XXH32_hash_t large_len;    /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */
+   XXH32_hash_t v1;           /*!< First accumulator lane */
+   XXH32_hash_t v2;           /*!< Second accumulator lane */
+   XXH32_hash_t v3;           /*!< Third accumulator lane */
+   XXH32_hash_t v4;           /*!< Fourth accumulator lane */
+   XXH32_hash_t mem32[4];     /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */
+   XXH32_hash_t memsize;      /*!< Amount of data in @ref mem32 */
+   XXH32_hash_t reserved;     /*!< Reserved field. Do not read or write to it, it may be removed. */
+};   /* typedef'd to XXH32_state_t */
+
+
+#ifndef XXH_NO_LONG_LONG  /* defined when there is no 64-bit support */
+
+/*!
+ * @internal
+ * @brief Structure for XXH64 streaming API.
+ *
+ * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
+ * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
+ * an opaque type. This allows fields to safely be changed.
+ *
+ * Typedef'd to @ref XXH64_state_t.
+ * Do not access the members of this struct directly.
+ * @see XXH32_state_s, XXH3_state_s
+ */
+struct XXH64_state_s {
+   XXH64_hash_t total_len;    /*!< Total length hashed. This is always 64-bit. */
+   XXH64_hash_t v1;           /*!< First accumulator lane */
+   XXH64_hash_t v2;           /*!< Second accumulator lane */
+   XXH64_hash_t v3;           /*!< Third accumulator lane */
+   XXH64_hash_t v4;           /*!< Fourth accumulator lane */
+   XXH64_hash_t mem64[4];     /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */
+   XXH32_hash_t memsize;      /*!< Amount of data in @ref mem64 */
+   XXH32_hash_t reserved32;   /*!< Reserved field, needed for padding anyways*/
+   XXH64_hash_t reserved64;   /*!< Reserved field. Do not read or write to it, it may be removed. */
+};   /* typedef'd to XXH64_state_t */
+
+#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)   /* C11+ */
+#  include <stdalign.h>
+#  define XXH_ALIGN(n)      alignas(n)
+#elif defined(__GNUC__)
+#  define XXH_ALIGN(n)      __attribute__ ((aligned(n)))
+#elif defined(_MSC_VER)
+#  define XXH_ALIGN(n)      __declspec(align(n))
+#else
+#  define XXH_ALIGN(n)   /* disabled */
+#endif
+
+/* Old GCC versions only accept the attribute after the type in structures. */
+#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L))   /* C11+ */ \
+    && defined(__GNUC__)
+#   define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align)
+#else
+#   define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type
+#endif
+
+/*!
+ * @brief The size of the internal XXH3 buffer.
+ *
+ * This is the optimal update size for incremental hashing.
+ *
+ * @see XXH3_64b_update(), XXH3_128b_update().
+ */
+#define XXH3_INTERNALBUFFER_SIZE 256
+
+/*!
+ * @brief Default size of the secret buffer (and @ref XXH3_kSecret).
+ *
+ * This is the size used in @ref XXH3_kSecret and the seeded functions.
+ *
+ * Not to be confused with @ref XXH3_SECRET_SIZE_MIN.
+ */
+#define XXH3_SECRET_DEFAULT_SIZE 192
+
+/*!
+ * @internal
+ * @brief Structure for XXH3 streaming API.
+ *
+ * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
+ * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
+ * an opaque type. This allows fields to safely be changed.
+ *
+ * @note **This structure has a strict alignment requirement of 64 bytes.** Do
+ * not allocate this with `malloc()` or `new`, it will not be sufficiently
+ * aligned. Use @ref XXH3_createState() and @ref XXH3_freeState(), or stack
+ * allocation.
+ *
+ * Typedef'd to @ref XXH3_state_t.
+ * Do not access the members of this struct directly.
+ *
+ * @see XXH3_INITSTATE() for stack initialization.
+ * @see XXH3_createState(), XXH3_freeState().
+ * @see XXH32_state_s, XXH64_state_s
+ */
+struct XXH3_state_s {
+   XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);
+       /*!< The 8 accumulators. Similar to `vN` in @ref XXH32_state_s::v1 and @ref XXH64_state_s */
+   XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
+       /*!< Used to store a custom secret generated from a seed. */
+   XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
+       /*!< The internal buffer. @see XXH32_state_s::mem32 */
+   XXH32_hash_t bufferedSize;
+       /*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */
+   XXH32_hash_t reserved32;
+       /*!< Reserved field. Needed for padding on 64-bit. */
+   size_t nbStripesSoFar;
+       /*!< Number or stripes processed. */
+   XXH64_hash_t totalLen;
+       /*!< Total length hashed. 64-bit even on 32-bit targets. */
+   size_t nbStripesPerBlock;
+       /*!< Number of stripes per block. */
+   size_t secretLimit;
+       /*!< Size of @ref customSecret or @ref extSecret */
+   XXH64_hash_t seed;
+       /*!< Seed for _withSeed variants. Must be zero otherwise, @see XXH3_INITSTATE() */
+   XXH64_hash_t reserved64;
+       /*!< Reserved field. */
+   const unsigned char* extSecret;
+       /*!< Reference to an external secret for the _withSecret variants, NULL
+        *   for other variants. */
+   /* note: there may be some padding at the end due to alignment on 64 bytes */
+}; /* typedef'd to XXH3_state_t */
+
+#undef XXH_ALIGN_MEMBER
+
+/*!
+ * @brief Initializes a stack-allocated `XXH3_state_s`.
+ *
+ * When the @ref XXH3_state_t structure is merely emplaced on stack,
+ * it should be initialized with XXH3_INITSTATE() or a memset()
+ * in case its first reset uses XXH3_NNbits_reset_withSeed().
+ * This init can be omitted if the first reset uses default or _withSecret mode.
+ * This operation isn't necessary when the state is created with XXH3_createState().
+ * Note that this doesn't prepare the state for a streaming operation,
+ * it's still necessary to use XXH3_NNbits_reset*() afterwards.
+ */
+#define XXH3_INITSTATE(XXH3_state_ptr)   { (XXH3_state_ptr)->seed = 0; }
+
+
+/* ===   Experimental API   === */
+/* Symbols defined below must be considered tied to a specific library version. */
+
+/*
+ * XXH3_generateSecret():
+ *
+ * Derive a high-entropy secret from any user-defined content, named customSeed.
+ * The generated secret can be used in combination with `*_withSecret()` functions.
+ * The `_withSecret()` variants are useful to provide a higher level of protection than 64-bit seed,
+ * as it becomes much more difficult for an external actor to guess how to impact the calculation logic.
+ *
+ * The function accepts as input a custom seed of any length and any content,
+ * and derives from it a high-entropy secret of length XXH3_SECRET_DEFAULT_SIZE
+ * into an already allocated buffer secretBuffer.
+ * The generated secret is _always_ XXH_SECRET_DEFAULT_SIZE bytes long.
+ *
+ * The generated secret can then be used with any `*_withSecret()` variant.
+ * Functions `XXH3_128bits_withSecret()`, `XXH3_64bits_withSecret()`,
+ * `XXH3_128bits_reset_withSecret()` and `XXH3_64bits_reset_withSecret()`
+ * are part of this list. They all accept a `secret` parameter
+ * which must be very long for implementation reasons (>= XXH3_SECRET_SIZE_MIN)
+ * _and_ feature very high entropy (consist of random-looking bytes).
+ * These conditions can be a high bar to meet, so
+ * this function can be used to generate a secret of proper quality.
+ *
+ * customSeed can be anything. It can have any size, even small ones,
+ * and its content can be anything, even stupidly "low entropy" source such as a bunch of zeroes.
+ * The resulting `secret` will nonetheless provide all expected qualities.
+ *
+ * Supplying NULL as the customSeed copies the default secret into `secretBuffer`.
+ * When customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
+ */
+XXH_PUBLIC_API void XXH3_generateSecret(void* secretBuffer, const void* customSeed, size_t customSeedSize);
+
+
+/* simple short-cut to pre-selected XXH3_128bits variant */
+XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed);
+
+
+#endif  /* XXH_NO_LONG_LONG */
+#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
+#  define XXH_IMPLEMENTATION
+#endif
+
+#endif  /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */
+
+
+/* ======================================================================== */
+/* ======================================================================== */
+/* ======================================================================== */
+
+
+/*-**********************************************************************
+ * xxHash implementation
+ *-**********************************************************************
+ * xxHash's implementation used to be hosted inside xxhash.c.
+ *
+ * However, inlining requires implementation to be visible to the compiler,
+ * hence be included alongside the header.
+ * Previously, implementation was hosted inside xxhash.c,
+ * which was then #included when inlining was activated.
+ * This construction created issues with a few build and install systems,
+ * as it required xxhash.c to be stored in /include directory.
+ *
+ * xxHash implementation is now directly integrated within xxhash.h.
+ * As a consequence, xxhash.c is no longer needed in /include.
+ *
+ * xxhash.c is still available and is still useful.
+ * In a "normal" setup, when xxhash is not inlined,
+ * xxhash.h only exposes the prototypes and public symbols,
+ * while xxhash.c can be built into an object file xxhash.o
+ * which can then be linked into the final binary.
+ ************************************************************************/
+
+#if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \
+   || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)
+#  define XXH_IMPLEM_13a8737387
+
+/* *************************************
+*  Tuning parameters
+***************************************/
+
+/*!
+ * @defgroup tuning Tuning parameters
+ * @{
+ *
+ * Various macros to control xxHash's behavior.
+ */
+#ifdef XXH_DOXYGEN
+/*!
+ * @brief Define this to disable 64-bit code.
+ *
+ * Useful if only using the @ref xxh32_family and you have a strict C90 compiler.
+ */
+#  define XXH_NO_LONG_LONG
+#  undef XXH_NO_LONG_LONG /* don't actually */
+/*!
+ * @brief Controls how unaligned memory is accessed.
+ *
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is
+ * safe and portable.
+ *
+ * Unfortunately, on some target/compiler combinations, the generated assembly
+ * is sub-optimal.
+ *
+ * The below switch allow selection of a different access method
+ * in the search for improved performance.
+ *
+ * @par Possible options:
+ *
+ *  - `XXH_FORCE_MEMORY_ACCESS=0` (default): `memcpy`
+ *   @par
+ *     Use `memcpy()`. Safe and portable. Note that most modern compilers will
+ *     eliminate the function call and treat it as an unaligned access.
+ *
+ *  - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((packed))`
+ *   @par
+ *     Depends on compiler extensions and is therefore not portable.
+ *     This method is safe _if_ your compiler supports it,
+ *     and *generally* as fast or faster than `memcpy`.
+ *
+ *  - `XXH_FORCE_MEMORY_ACCESS=2`: Direct cast
+ *  @par
+ *     Casts directly and dereferences. This method doesn't depend on the
+ *     compiler, but it violates the C standard as it directly dereferences an
+ *     unaligned pointer. It can generate buggy code on targets which do not
+ *     support unaligned memory accesses, but in some circumstances, it's the
+ *     only known way to get the most performance.
+ *
+ *  - `XXH_FORCE_MEMORY_ACCESS=3`: Byteshift
+ *  @par
+ *     Also portable. This can generate the best code on old compilers which don't
+ *     inline small `memcpy()` calls, and it might also be faster on big-endian
+ *     systems which lack a native byteswap instruction. However, some compilers
+ *     will emit literal byteshifts even if the target supports unaligned access.
+ *  .
+ *
+ * @warning
+ *   Methods 1 and 2 rely on implementation-defined behavior. Use these with
+ *   care, as what works on one compiler/platform/optimization level may cause
+ *   another to read garbage data or even crash.
+ *
+ * See https://stackoverflow.com/a/32095106/646947 for details.
+ *
+ * Prefer these methods in priority order (0 > 3 > 1 > 2)
+ */
+#  define XXH_FORCE_MEMORY_ACCESS 0
+/*!
+ * @def XXH_ACCEPT_NULL_INPUT_POINTER
+ * @brief Whether to add explicit `NULL` checks.
+ *
+ * If the input pointer is `NULL` and the length is non-zero, xxHash's default
+ * behavior is to dereference it, triggering a segfault.
+ *
+ * When this macro is enabled, xxHash actively checks the input for a null pointer.
+ * If it is, the result for null input pointers is the same as a zero-length input.
+ */
+#  define XXH_ACCEPT_NULL_INPUT_POINTER 0
+/*!
+ * @def XXH_FORCE_ALIGN_CHECK
+ * @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()
+ * and XXH64() only).
+ *
+ * This is an important performance trick for architectures without decent
+ * unaligned memory access performance.
+ *
+ * It checks for input alignment, and when conditions are met, uses a "fast
+ * path" employing direct 32-bit/64-bit reads, resulting in _dramatically
+ * faster_ read speed.
+ *
+ * The check costs one initial branch per hash, which is generally negligible,
+ * but not zero.
+ *
+ * Moreover, it's not useful to generate an additional code path if memory
+ * access uses the same instruction for both aligned and unaligned
+ * addresses (e.g. x86 and aarch64).
+ *
+ * In these cases, the alignment check can be removed by setting this macro to 0.
+ * Then the code will always use unaligned memory access.
+ * Align check is automatically disabled on x86, x64 & arm64,
+ * which are platforms known to offer good unaligned memory accesses performance.
+ *
+ * This option does not affect XXH3 (only XXH32 and XXH64).
+ */
+#  define XXH_FORCE_ALIGN_CHECK 0
+
+/*!
+ * @def XXH_NO_INLINE_HINTS
+ * @brief When non-zero, sets all functions to `static`.
+ *
+ * By default, xxHash tries to force the compiler to inline almost all internal
+ * functions.
+ *
+ * This can usually improve performance due to reduced jumping and improved
+ * constant folding, but significantly increases the size of the binary which
+ * might not be favorable.
+ *
+ * Additionally, sometimes the forced inlining can be detrimental to performance,
+ * depending on the architecture.
+ *
+ * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the
+ * compiler full control on whether to inline or not.
+ *
+ * When not optimizing (-O0), optimizing for size (-Os, -Oz), or using
+ * -fno-inline with GCC or Clang, this will automatically be defined.
+ */
+#  define XXH_NO_INLINE_HINTS 0
+
+/*!
+ * @def XXH_REROLL
+ * @brief Whether to reroll `XXH32_finalize` and `XXH64_finalize`.
+ *
+ * For performance, `XXH32_finalize` and `XXH64_finalize` use an unrolled loop
+ * in the form of a switch statement.
+ *
+ * This is not always desirable, as it generates larger code, and depending on
+ * the architecture, may even be slower
+ *
+ * This is automatically defined with `-Os`/`-Oz` on GCC and Clang.
+ */
+#  define XXH_REROLL 0
+
+/*!
+ * @internal
+ * @brief Redefines old internal names.
+ *
+ * For compatibility with code that uses xxHash's internals before the names
+ * were changed to improve namespacing. There is no other reason to use this.
+ */
+#  define XXH_OLD_NAMES
+#  undef XXH_OLD_NAMES /* don't actually use, it is ugly. */
+#endif /* XXH_DOXYGEN */
+/*!
+ * @}
+ */
+
+#ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+   /* prefer __packed__ structures (method 1) for gcc on armv7 and armv8 */
+#  if !defined(__clang__) && ( \
+    (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
+    (defined(__GNUC__) && (defined(__ARM_ARCH) && __ARM_ARCH >= 7)) )
+#    define XXH_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+#ifndef XXH_ACCEPT_NULL_INPUT_POINTER   /* can be defined externally */
+#  define XXH_ACCEPT_NULL_INPUT_POINTER 0
+#endif
+
+#ifndef XXH_FORCE_ALIGN_CHECK  /* can be defined externally */
+#  if defined(__i386)  || defined(__x86_64__) || defined(__aarch64__) \
+   || defined(_M_IX86) || defined(_M_X64)     || defined(_M_ARM64) /* visual */
+#    define XXH_FORCE_ALIGN_CHECK 0
+#  else
+#    define XXH_FORCE_ALIGN_CHECK 1
+#  endif
+#endif
+
+#ifndef XXH_NO_INLINE_HINTS
+#  if defined(__OPTIMIZE_SIZE__) /* -Os, -Oz */ \
+   || defined(__NO_INLINE__)     /* -O0, -fno-inline */
+#    define XXH_NO_INLINE_HINTS 1
+#  else
+#    define XXH_NO_INLINE_HINTS 0
+#  endif
+#endif
+
+#ifndef XXH_REROLL
+#  if defined(__OPTIMIZE_SIZE__)
+#    define XXH_REROLL 1
+#  else
+#    define XXH_REROLL 0
+#  endif
+#endif
+
+/*!
+ * @defgroup impl Implementation
+ * @{
+ */
+
+
+/* *************************************
+*  Includes & Memory related functions
+***************************************/
+/*
+ * Modify the local functions below should you wish to use
+ * different memory routines for malloc() and free()
+ */
+#include <stdlib.h>
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than malloc().
+ */
+static void* XXH_malloc(size_t s) { return malloc(s); }
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than free().
+ */
+static void XXH_free(void* p) { free(p); }
+
+#include <string.h>
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than memcpy().
+ */
+static void* XXH_memcpy(void* dest, const void* src, size_t size)
+{
+    return memcpy(dest,src,size);
+}
+
+#include <limits.h>   /* ULLONG_MAX */
+
+
+/* *************************************
+*  Compiler Specific Options
+***************************************/
+#ifdef _MSC_VER /* Visual Studio warning fix */
+#  pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
+#endif
+
+#if XXH_NO_INLINE_HINTS  /* disable inlining hints */
+#  if defined(__GNUC__)
+#    define XXH_FORCE_INLINE static __attribute__((unused))
+#  else
+#    define XXH_FORCE_INLINE static
+#  endif
+#  define XXH_NO_INLINE static
+/* enable inlining hints */
+#elif defined(_MSC_VER)  /* Visual Studio */
+#  define XXH_FORCE_INLINE static __forceinline
+#  define XXH_NO_INLINE static __declspec(noinline)
+#elif defined(__GNUC__)
+#  define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
+#  define XXH_NO_INLINE static __attribute__((noinline))
+#elif defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))   /* C99 */
+#  define XXH_FORCE_INLINE static inline
+#  define XXH_NO_INLINE static
+#else
+#  define XXH_FORCE_INLINE static
+#  define XXH_NO_INLINE static
+#endif
+
+
+
+/* *************************************
+*  Debug
+***************************************/
+/*!
+ * @ingroup tuning
+ * @def XXH_DEBUGLEVEL
+ * @brief Sets the debugging level.
+ *
+ * XXH_DEBUGLEVEL is expected to be defined externally, typically via the
+ * compiler's command line options. The value must be a number.
+ */
+#ifndef XXH_DEBUGLEVEL
+#  ifdef DEBUGLEVEL /* backwards compat */
+#    define XXH_DEBUGLEVEL DEBUGLEVEL
+#  else
+#    define XXH_DEBUGLEVEL 0
+#  endif
+#endif
+
+#if (XXH_DEBUGLEVEL>=1)
+#  include <assert.h>   /* note: can still be disabled with NDEBUG */
+#  define XXH_ASSERT(c)   assert(c)
+#else
+#  define XXH_ASSERT(c)   ((void)0)
+#endif
+
+/* note: use after variable declarations */
+#define XXH_STATIC_ASSERT(c)  do { enum { XXH_sa = 1/(int)(!!(c)) }; } while (0)
+
+/*!
+ * @internal
+ * @def XXH_COMPILER_GUARD(var)
+ * @brief Used to prevent unwanted optimizations for @p var.
+ *
+ * It uses an empty GCC inline assembly statement with a register constraint
+ * which forces @p var into a general purpose register (eg eax, ebx, ecx
+ * on x86) and marks it as modified.
+ *
+ * This is used in a few places to avoid unwanted autovectorization (e.g.
+ * XXH32_round()). All vectorization we want is explicit via intrinsics,
+ * and _usually_ isn't wanted elsewhere.
+ *
+ * We also use it to prevent unwanted constant folding for AArch64 in
+ * XXH3_initCustomSecret_scalar().
+ */
+#ifdef __GNUC__
+#  define XXH_COMPILER_GUARD(var) __asm__ __volatile__("" : "+r" (var))
+#else
+#  define XXH_COMPILER_GUARD(var) ((void)0)
+#endif
+
+/* *************************************
+*  Basic Types
+***************************************/
+#if !defined (__VMS) \
+ && (defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+  typedef uint8_t xxh_u8;
+#else
+  typedef unsigned char xxh_u8;
+#endif
+typedef XXH32_hash_t xxh_u32;
+
+#ifdef XXH_OLD_NAMES
+#  define BYTE xxh_u8
+#  define U8   xxh_u8
+#  define U32  xxh_u32
+#endif
+
+/* ***   Memory access   *** */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_read32(const void* ptr)
+ * @brief Reads an unaligned 32-bit integer from @p ptr in native endianness.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ *
+ * @param ptr The pointer to read from.
+ * @return The 32-bit native endian integer from the bytes at @p ptr.
+ */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_readLE32(const void* ptr)
+ * @brief Reads an unaligned 32-bit little endian integer from @p ptr.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ *
+ * @param ptr The pointer to read from.
+ * @return The 32-bit little endian integer from the bytes at @p ptr.
+ */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_readBE32(const void* ptr)
+ * @brief Reads an unaligned 32-bit big endian integer from @p ptr.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ *
+ * @param ptr The pointer to read from.
+ * @return The 32-bit big endian integer from the bytes at @p ptr.
+ */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_readLE32_align(const void* ptr, XXH_alignment align)
+ * @brief Like @ref XXH_readLE32(), but has an option for aligned reads.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ * Note that when @ref XXH_FORCE_ALIGN_CHECK == 0, the @p align parameter is
+ * always @ref XXH_alignment::XXH_unaligned.
+ *
+ * @param ptr The pointer to read from.
+ * @param align Whether @p ptr is aligned.
+ * @pre
+ *   If @p align == @ref XXH_alignment::XXH_aligned, @p ptr must be 4 byte
+ *   aligned.
+ * @return The 32-bit little endian integer from the bytes at @p ptr.
+ */
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
+/*
+ * Manual byteshift. Best for old compilers which don't inline memcpy.
+ * We actually directly use XXH_readLE32 and XXH_readBE32.
+ */
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/*
+ * Force direct memory access. Only works on CPU which support unaligned memory
+ * access in hardware.
+ */
+static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/*
+ * __pack instructions are safer but compiler specific, hence potentially
+ * problematic for some compilers.
+ *
+ * Currently only defined for GCC and ICC.
+ */
+#ifdef XXH_OLD_NAMES
+typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
+#endif
+static xxh_u32 XXH_read32(const void* ptr)
+{
+    typedef union { xxh_u32 u32; } __attribute__((packed)) xxh_unalign;
+    return ((const xxh_unalign*)ptr)->u32;
+}
+
+#else
+
+/*
+ * Portable and safe solution. Generally efficient.
+ * see: https://stackoverflow.com/a/32095106/646947
+ */
+static xxh_u32 XXH_read32(const void* memPtr)
+{
+    xxh_u32 val;
+    memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+
+/* ***   Endianness   *** */
+/*!
+ * @ingroup tuning
+ * @def XXH_CPU_LITTLE_ENDIAN
+ * @brief Whether the target is little endian.
+ *
+ * Defined to 1 if the target is little endian, or 0 if it is big endian.
+ * It can be defined externally, for example on the compiler command line.
+ *
+ * If it is not defined, a runtime check (which is usually constant folded)
+ * is used instead.
+ *
+ * @note
+ *   This is not necessarily defined to an integer constant.
+ *
+ * @see XXH_isLittleEndian() for the runtime check.
+ */
+#ifndef XXH_CPU_LITTLE_ENDIAN
+/*
+ * Try to detect endianness automatically, to avoid the nonstandard behavior
+ * in `XXH_isLittleEndian()`
+ */
+#  if defined(_WIN32) /* Windows is always little endian */ \
+     || defined(__LITTLE_ENDIAN__) \
+     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#    define XXH_CPU_LITTLE_ENDIAN 1
+#  elif defined(__BIG_ENDIAN__) \
+     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#    define XXH_CPU_LITTLE_ENDIAN 0
+#  else
+/*!
+ * @internal
+ * @brief Runtime check for @ref XXH_CPU_LITTLE_ENDIAN.
+ *
+ * Most compilers will constant fold this.
+ */
+static int XXH_isLittleEndian(void)
+{
+    /*
+     * Portable and well-defined behavior.
+     * Don't use static: it is detrimental to performance.
+     */
+    const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 };
+    return one.c[0];
+}
+#   define XXH_CPU_LITTLE_ENDIAN   XXH_isLittleEndian()
+#  endif
+#endif
+
+
+
+
+/* ****************************************
+*  Compiler-specific Functions and Macros
+******************************************/
+#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+#ifdef __has_builtin
+#  define XXH_HAS_BUILTIN(x) __has_builtin(x)
+#else
+#  define XXH_HAS_BUILTIN(x) 0
+#endif
+
+/*!
+ * @internal
+ * @def XXH_rotl32(x,r)
+ * @brief 32-bit rotate left.
+ *
+ * @param x The 32-bit integer to be rotated.
+ * @param r The number of bits to rotate.
+ * @pre
+ *   @p r > 0 && @p r < 32
+ * @note
+ *   @p x and @p r may be evaluated multiple times.
+ * @return The rotated result.
+ */
+#if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \
+                               && XXH_HAS_BUILTIN(__builtin_rotateleft64)
+#  define XXH_rotl32 __builtin_rotateleft32
+#  define XXH_rotl64 __builtin_rotateleft64
+/* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */
+#elif defined(_MSC_VER)
+#  define XXH_rotl32(x,r) _rotl(x,r)
+#  define XXH_rotl64(x,r) _rotl64(x,r)
+#else
+#  define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
+#  define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
+#endif
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_swap32(xxh_u32 x)
+ * @brief A 32-bit byteswap.
+ *
+ * @param x The 32-bit integer to byteswap.
+ * @return @p x, byteswapped.
+ */
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXH_swap32 _byteswap_ulong
+#elif XXH_GCC_VERSION >= 403
+#  define XXH_swap32 __builtin_bswap32
+#else
+static xxh_u32 XXH_swap32 (xxh_u32 x)
+{
+    return  ((x << 24) & 0xff000000 ) |
+            ((x <<  8) & 0x00ff0000 ) |
+            ((x >>  8) & 0x0000ff00 ) |
+            ((x >> 24) & 0x000000ff );
+}
+#endif
+
+
+/* ***************************
+*  Memory reads
+*****************************/
+
+/*!
+ * @internal
+ * @brief Enum to indicate whether a pointer is aligned.
+ */
+typedef enum {
+    XXH_aligned,  /*!< Aligned */
+    XXH_unaligned /*!< Possibly unaligned */
+} XXH_alignment;
+
+/*
+ * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load.
+ *
+ * This is ideal for older compilers which don't inline memcpy.
+ */
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
+
+XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr)
+{
+    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
+    return bytePtr[0]
+         | ((xxh_u32)bytePtr[1] << 8)
+         | ((xxh_u32)bytePtr[2] << 16)
+         | ((xxh_u32)bytePtr[3] << 24);
+}
+
+XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void* memPtr)
+{
+    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
+    return bytePtr[3]
+         | ((xxh_u32)bytePtr[2] << 8)
+         | ((xxh_u32)bytePtr[1] << 16)
+         | ((xxh_u32)bytePtr[0] << 24);
+}
+
+#else
+XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
+}
+
+static xxh_u32 XXH_readBE32(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
+}
+#endif
+
+XXH_FORCE_INLINE xxh_u32
+XXH_readLE32_align(const void* ptr, XXH_alignment align)
+{
+    if (align==XXH_unaligned) {
+        return XXH_readLE32(ptr);
+    } else {
+        return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);
+    }
+}
+
+
+/* *************************************
+*  Misc
+***************************************/
+/*! @ingroup public */
+XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
+
+
+/* *******************************************************************
+*  32-bit hash functions
+*********************************************************************/
+/*!
+ * @}
+ * @defgroup xxh32_impl XXH32 implementation
+ * @ingroup impl
+ * @{
+ */
+ /* #define instead of static const, to be used as initializers */
+#define XXH_PRIME32_1  0x9E3779B1U  /*!< 0b10011110001101110111100110110001 */
+#define XXH_PRIME32_2  0x85EBCA77U  /*!< 0b10000101111010111100101001110111 */
+#define XXH_PRIME32_3  0xC2B2AE3DU  /*!< 0b11000010101100101010111000111101 */
+#define XXH_PRIME32_4  0x27D4EB2FU  /*!< 0b00100111110101001110101100101111 */
+#define XXH_PRIME32_5  0x165667B1U  /*!< 0b00010110010101100110011110110001 */
+
+#ifdef XXH_OLD_NAMES
+#  define PRIME32_1 XXH_PRIME32_1
+#  define PRIME32_2 XXH_PRIME32_2
+#  define PRIME32_3 XXH_PRIME32_3
+#  define PRIME32_4 XXH_PRIME32_4
+#  define PRIME32_5 XXH_PRIME32_5
+#endif
+
+/*!
+ * @internal
+ * @brief Normal stripe processing routine.
+ *
+ * This shuffles the bits so that any bit from @p input impacts several bits in
+ * @p acc.
+ *
+ * @param acc The accumulator lane.
+ * @param input The stripe of input to mix.
+ * @return The mixed accumulator lane.
+ */
+static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
+{
+    acc += input * XXH_PRIME32_2;
+    acc  = XXH_rotl32(acc, 13);
+    acc *= XXH_PRIME32_1;
+#if (defined(__SSE4_1__) || defined(__aarch64__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
+    /*
+     * UGLY HACK:
+     * A compiler fence is the only thing that prevents GCC and Clang from
+     * autovectorizing the XXH32 loop (pragmas and attributes don't work for some
+     * reason) without globally disabling SSE4.1.
+     *
+     * The reason we want to avoid vectorization is because despite working on
+     * 4 integers at a time, there are multiple factors slowing XXH32 down on
+     * SSE4:
+     * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on
+     *   newer chips!) making it slightly slower to multiply four integers at
+     *   once compared to four integers independently. Even when pmulld was
+     *   fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE
+     *   just to multiply unless doing a long operation.
+     *
+     * - Four instructions are required to rotate,
+     *      movqda tmp,  v // not required with VEX encoding
+     *      pslld  tmp, 13 // tmp <<= 13
+     *      psrld  v,   19 // x >>= 19
+     *      por    v,  tmp // x |= tmp
+     *   compared to one for scalar:
+     *      roll   v, 13    // reliably fast across the board
+     *      shldl  v, v, 13 // Sandy Bridge and later prefer this for some reason
+     *
+     * - Instruction level parallelism is actually more beneficial here because
+     *   the SIMD actually serializes this operation: While v1 is rotating, v2
+     *   can load data, while v3 can multiply. SSE forces them to operate
+     *   together.
+     *
+     * This is also enabled on AArch64, as Clang autovectorizes it incorrectly
+     * and it is pointless writing a NEON implementation that is basically the
+     * same speed as scalar for XXH32.
+     */
+    XXH_COMPILER_GUARD(acc);
+#endif
+    return acc;
+}
+
+/*!
+ * @internal
+ * @brief Mixes all bits to finalize the hash.
+ *
+ * The final mix ensures that all input bits have a chance to impact any bit in
+ * the output digest, resulting in an unbiased distribution.
+ *
+ * @param h32 The hash to avalanche.
+ * @return The avalanched hash.
+ */
+static xxh_u32 XXH32_avalanche(xxh_u32 h32)
+{
+    h32 ^= h32 >> 15;
+    h32 *= XXH_PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= XXH_PRIME32_3;
+    h32 ^= h32 >> 16;
+    return(h32);
+}
+
+#define XXH_get32bits(p) XXH_readLE32_align(p, align)
+
+/*!
+ * @internal
+ * @brief Processes the last 0-15 bytes of @p ptr.
+ *
+ * There may be up to 15 bytes remaining to consume from the input.
+ * This final stage will digest them to ensure that all input bytes are present
+ * in the final mix.
+ *
+ * @param h32 The hash to finalize.
+ * @param ptr The pointer to the remaining input.
+ * @param len The remaining length, modulo 16.
+ * @param align Whether @p ptr is aligned.
+ * @return The finalized hash.
+ */
+static xxh_u32
+XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
+{
+#define XXH_PROCESS1 do {                           \
+    h32 += (*ptr++) * XXH_PRIME32_5;                \
+    h32 = XXH_rotl32(h32, 11) * XXH_PRIME32_1;      \
+} while (0)
+
+#define XXH_PROCESS4 do {                           \
+    h32 += XXH_get32bits(ptr) * XXH_PRIME32_3;      \
+    ptr += 4;                                   \
+    h32  = XXH_rotl32(h32, 17) * XXH_PRIME32_4;     \
+} while (0)
+
+    /* Compact rerolled version */
+    if (XXH_REROLL) {
+        len &= 15;
+        while (len >= 4) {
+            XXH_PROCESS4;
+            len -= 4;
+        }
+        while (len > 0) {
+            XXH_PROCESS1;
+            --len;
+        }
+        return XXH32_avalanche(h32);
+    } else {
+         switch(len&15) /* or switch(bEnd - p) */ {
+           case 12:      XXH_PROCESS4;
+                         FALLTHROUGH_INTENDED;
+           case 8:       XXH_PROCESS4;
+                         FALLTHROUGH_INTENDED;
+           case 4:       XXH_PROCESS4;
+                         return XXH32_avalanche(h32);
+
+           case 13:      XXH_PROCESS4;
+                         FALLTHROUGH_INTENDED;
+           case 9:       XXH_PROCESS4;
+                         FALLTHROUGH_INTENDED;
+           case 5:       XXH_PROCESS4;
+                         XXH_PROCESS1;
+                         return XXH32_avalanche(h32);
+
+           case 14:      XXH_PROCESS4;
+                         FALLTHROUGH_INTENDED;
+           case 10:      XXH_PROCESS4;
+                         FALLTHROUGH_INTENDED;
+           case 6:       XXH_PROCESS4;
+                         XXH_PROCESS1;
+                         XXH_PROCESS1;
+                         return XXH32_avalanche(h32);
+
+           case 15:      XXH_PROCESS4;
+                         FALLTHROUGH_INTENDED;
+           case 11:      XXH_PROCESS4;
+                         FALLTHROUGH_INTENDED;
+           case 7:       XXH_PROCESS4;
+                         FALLTHROUGH_INTENDED;
+           case 3:       XXH_PROCESS1;
+                         FALLTHROUGH_INTENDED;
+           case 2:       XXH_PROCESS1;
+                         FALLTHROUGH_INTENDED;
+           case 1:       XXH_PROCESS1;
+                         FALLTHROUGH_INTENDED;
+           case 0:       return XXH32_avalanche(h32);
+        }
+        XXH_ASSERT(0);
+        return h32;   /* reaching this point is deemed impossible */
+    }
+}
+
+#ifdef XXH_OLD_NAMES
+#  define PROCESS1 XXH_PROCESS1
+#  define PROCESS4 XXH_PROCESS4
+#else
+#  undef XXH_PROCESS1
+#  undef XXH_PROCESS4
+#endif
+
+/*!
+ * @internal
+ * @brief The implementation for @ref XXH32().
+ *
+ * @param input, len, seed Directly passed from @ref XXH32().
+ * @param align Whether @p input is aligned.
+ * @return The calculated hash.
+ */
+XXH_FORCE_INLINE xxh_u32
+XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
+{
+    const xxh_u8* bEnd = input ? input + len : NULL;
+    xxh_u32 h32;
+
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+    if (input==NULL) {
+        len=0;
+        bEnd=input=(const xxh_u8*)(size_t)16;
+    }
+#endif
+
+    if (len>=16) {
+        const xxh_u8* const limit = bEnd - 15;
+        xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
+        xxh_u32 v2 = seed + XXH_PRIME32_2;
+        xxh_u32 v3 = seed + 0;
+        xxh_u32 v4 = seed - XXH_PRIME32_1;
+
+        do {
+            v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4;
+            v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4;
+            v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4;
+            v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4;
+        } while (input < limit);
+
+        h32 = XXH_rotl32(v1, 1)  + XXH_rotl32(v2, 7)
+            + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+    } else {
+        h32  = seed + XXH_PRIME32_5;
+    }
+
+    h32 += (xxh_u32)len;
+
+    return XXH32_finalize(h32, input, len&15, align);
+}
+
+/*! @ingroup xxh32_family */
+XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH32_state_t state;
+    XXH32_reset(&state, seed);
+    XXH32_update(&state, (const xxh_u8*)input, len);
+    return XXH32_digest(&state);
+#else
+    if (XXH_FORCE_ALIGN_CHECK) {
+        if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */
+            return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
+    }   }
+
+    return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
+#endif
+}
+
+
+
+/*******   Hash streaming   *******/
+/*!
+ * @ingroup xxh32_family
+ */
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
+{
+    return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
+}
+/*! @ingroup xxh32_family */
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+/*! @ingroup xxh32_family */
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
+{
+    memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+/*! @ingroup xxh32_family */
+XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
+{
+    XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+    memset(&state, 0, sizeof(state));
+    state.v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
+    state.v2 = seed + XXH_PRIME32_2;
+    state.v3 = seed + 0;
+    state.v4 = seed - XXH_PRIME32_1;
+    /* do not write into reserved, planned to be removed in a future version */
+    memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
+    return XXH_OK;
+}
+
+
+/*! @ingroup xxh32_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH32_update(XXH32_state_t* state, const void* input, size_t len)
+{
+    if (input==NULL)
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+        return XXH_OK;
+#else
+        return XXH_ERROR;
+#endif
+
+    {   const xxh_u8* p = (const xxh_u8*)input;
+        const xxh_u8* const bEnd = p + len;
+
+        state->total_len_32 += (XXH32_hash_t)len;
+        state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
+
+        if (state->memsize + len < 16)  {   /* fill in tmp buffer */
+            XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len);
+            state->memsize += (XXH32_hash_t)len;
+            return XXH_OK;
+        }
+
+        if (state->memsize) {   /* some data left from previous update */
+            XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
+            {   const xxh_u32* p32 = state->mem32;
+                state->v1 = XXH32_round(state->v1, XXH_readLE32(p32)); p32++;
+                state->v2 = XXH32_round(state->v2, XXH_readLE32(p32)); p32++;
+                state->v3 = XXH32_round(state->v3, XXH_readLE32(p32)); p32++;
+                state->v4 = XXH32_round(state->v4, XXH_readLE32(p32));
+            }
+            p += 16-state->memsize;
+            state->memsize = 0;
+        }
+
+        /* uintptr_t casts avoid UB or compiler warning on out-of-bounds
+         * pointer arithmetic */
+        if ((uintptr_t)p <= (uintptr_t)bEnd - 16) {
+            const uintptr_t limit = (uintptr_t)bEnd - 16;
+            xxh_u32 v1 = state->v1;
+            xxh_u32 v2 = state->v2;
+            xxh_u32 v3 = state->v3;
+            xxh_u32 v4 = state->v4;
+
+            do {
+                v1 = XXH32_round(v1, XXH_readLE32(p)); p+=4;
+                v2 = XXH32_round(v2, XXH_readLE32(p)); p+=4;
+                v3 = XXH32_round(v3, XXH_readLE32(p)); p+=4;
+                v4 = XXH32_round(v4, XXH_readLE32(p)); p+=4;
+            } while ((uintptr_t)p<=limit);
+
+            state->v1 = v1;
+            state->v2 = v2;
+            state->v3 = v3;
+            state->v4 = v4;
+        }
+
+        if (p < bEnd) {
+            XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
+            state->memsize = (unsigned)(bEnd-p);
+        }
+    }
+
+    return XXH_OK;
+}
+
+
+/*! @ingroup xxh32_family */
+XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
+{
+    xxh_u32 h32;
+
+    if (state->large_len) {
+        h32 = XXH_rotl32(state->v1, 1)
+            + XXH_rotl32(state->v2, 7)
+            + XXH_rotl32(state->v3, 12)
+            + XXH_rotl32(state->v4, 18);
+    } else {
+        h32 = state->v3 /* == seed */ + XXH_PRIME32_5;
+    }
+
+    h32 += state->total_len_32;
+
+    return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
+}
+
+
+/*******   Canonical representation   *******/
+
+/*!
+ * @ingroup xxh32_family
+ * The default return values from XXH functions are unsigned 32 and 64 bit
+ * integers.
+ *
+ * The canonical representation uses big endian convention, the same convention
+ * as human-readable numbers (large digits first).
+ *
+ * This way, hash values can be written into a file or buffer, remaining
+ * comparable across different systems.
+ *
+ * The following functions allow transformation of hash values to and from their
+ * canonical format.
+ */
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
+    memcpy(dst, &hash, sizeof(*dst));
+}
+/*! @ingroup xxh32_family */
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
+{
+    return XXH_readBE32(src);
+}
+
+
+#ifndef XXH_NO_LONG_LONG
+
+/* *******************************************************************
+*  64-bit hash functions
+*********************************************************************/
+/*!
+ * @}
+ * @ingroup impl
+ * @{
+ */
+/*******   Memory access   *******/
+
+typedef XXH64_hash_t xxh_u64;
+
+#ifdef XXH_OLD_NAMES
+#  define U64 xxh_u64
+#endif
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
+/*
+ * Manual byteshift. Best for old compilers which don't inline memcpy.
+ * We actually directly use XXH_readLE64 and XXH_readBE64.
+ */
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static xxh_u64 XXH_read64(const void* memPtr)
+{
+    return *(const xxh_u64*) memPtr;
+}
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/*
+ * __pack instructions are safer, but compiler specific, hence potentially
+ * problematic for some compilers.
+ *
+ * Currently only defined for GCC and ICC.
+ */
+#ifdef XXH_OLD_NAMES
+typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
+#endif
+static xxh_u64 XXH_read64(const void* ptr)
+{
+    typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) xxh_unalign64;
+    return ((const xxh_unalign64*)ptr)->u64;
+}
+
+#else
+
+/*
+ * Portable and safe solution. Generally efficient.
+ * see: https://stackoverflow.com/a/32095106/646947
+ */
+static xxh_u64 XXH_read64(const void* memPtr)
+{
+    xxh_u64 val;
+    memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXH_swap64 _byteswap_uint64
+#elif XXH_GCC_VERSION >= 403
+#  define XXH_swap64 __builtin_bswap64
+#else
+static xxh_u64 XXH_swap64(xxh_u64 x)
+{
+    return  ((x << 56) & 0xff00000000000000ULL) |
+            ((x << 40) & 0x00ff000000000000ULL) |
+            ((x << 24) & 0x0000ff0000000000ULL) |
+            ((x << 8)  & 0x000000ff00000000ULL) |
+            ((x >> 8)  & 0x00000000ff000000ULL) |
+            ((x >> 24) & 0x0000000000ff0000ULL) |
+            ((x >> 40) & 0x000000000000ff00ULL) |
+            ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+
+/* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
+
+XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr)
+{
+    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
+    return bytePtr[0]
+         | ((xxh_u64)bytePtr[1] << 8)
+         | ((xxh_u64)bytePtr[2] << 16)
+         | ((xxh_u64)bytePtr[3] << 24)
+         | ((xxh_u64)bytePtr[4] << 32)
+         | ((xxh_u64)bytePtr[5] << 40)
+         | ((xxh_u64)bytePtr[6] << 48)
+         | ((xxh_u64)bytePtr[7] << 56);
+}
+
+XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void* memPtr)
+{
+    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
+    return bytePtr[7]
+         | ((xxh_u64)bytePtr[6] << 8)
+         | ((xxh_u64)bytePtr[5] << 16)
+         | ((xxh_u64)bytePtr[4] << 24)
+         | ((xxh_u64)bytePtr[3] << 32)
+         | ((xxh_u64)bytePtr[2] << 40)
+         | ((xxh_u64)bytePtr[1] << 48)
+         | ((xxh_u64)bytePtr[0] << 56);
+}
+
+#else
+XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
+}
+
+static xxh_u64 XXH_readBE64(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
+}
+#endif
+
+XXH_FORCE_INLINE xxh_u64
+XXH_readLE64_align(const void* ptr, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return XXH_readLE64(ptr);
+    else
+        return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);
+}
+
+
+/*******   xxh64   *******/
+/*!
+ * @}
+ * @defgroup xxh64_impl XXH64 implementation
+ * @ingroup impl
+ * @{
+ */
+/* #define rather that static const, to be used as initializers */
+#define XXH_PRIME64_1  0x9E3779B185EBCA87ULL  /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */
+#define XXH_PRIME64_2  0xC2B2AE3D27D4EB4FULL  /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */
+#define XXH_PRIME64_3  0x165667B19E3779F9ULL  /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */
+#define XXH_PRIME64_4  0x85EBCA77C2B2AE63ULL  /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */
+#define XXH_PRIME64_5  0x27D4EB2F165667C5ULL  /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */
+
+#ifdef XXH_OLD_NAMES
+#  define PRIME64_1 XXH_PRIME64_1
+#  define PRIME64_2 XXH_PRIME64_2
+#  define PRIME64_3 XXH_PRIME64_3
+#  define PRIME64_4 XXH_PRIME64_4
+#  define PRIME64_5 XXH_PRIME64_5
+#endif
+
+static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
+{
+    acc += input * XXH_PRIME64_2;
+    acc  = XXH_rotl64(acc, 31);
+    acc *= XXH_PRIME64_1;
+    return acc;
+}
+
+static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
+{
+    val  = XXH64_round(0, val);
+    acc ^= val;
+    acc  = acc * XXH_PRIME64_1 + XXH_PRIME64_4;
+    return acc;
+}
+
+static xxh_u64 XXH64_avalanche(xxh_u64 h64)
+{
+    h64 ^= h64 >> 33;
+    h64 *= XXH_PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= XXH_PRIME64_3;
+    h64 ^= h64 >> 32;
+    return h64;
+}
+
+
+#define XXH_get64bits(p) XXH_readLE64_align(p, align)
+
+static xxh_u64
+XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
+{
+    len &= 31;
+    while (len >= 8) {
+        xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));
+        ptr += 8;
+        h64 ^= k1;
+        h64  = XXH_rotl64(h64,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
+        len -= 8;
+    }
+    if (len >= 4) {
+        h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
+        ptr += 4;
+        h64 = XXH_rotl64(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
+        len -= 4;
+    }
+    while (len > 0) {
+        h64 ^= (*ptr++) * XXH_PRIME64_5;
+        h64 = XXH_rotl64(h64, 11) * XXH_PRIME64_1;
+        --len;
+    }
+    return  XXH64_avalanche(h64);
+}
+
+#ifdef XXH_OLD_NAMES
+#  define PROCESS1_64 XXH_PROCESS1_64
+#  define PROCESS4_64 XXH_PROCESS4_64
+#  define PROCESS8_64 XXH_PROCESS8_64
+#else
+#  undef XXH_PROCESS1_64
+#  undef XXH_PROCESS4_64
+#  undef XXH_PROCESS8_64
+#endif
+
+XXH_FORCE_INLINE xxh_u64
+XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
+{
+    const xxh_u8* bEnd = input ? input + len : NULL;
+    xxh_u64 h64;
+
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+    if (input==NULL) {
+        len=0;
+        bEnd=input=(const xxh_u8*)(size_t)32;
+    }
+#endif
+
+    if (len>=32) {
+        const xxh_u8* const limit = bEnd - 32;
+        xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
+        xxh_u64 v2 = seed + XXH_PRIME64_2;
+        xxh_u64 v3 = seed + 0;
+        xxh_u64 v4 = seed - XXH_PRIME64_1;
+
+        do {
+            v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8;
+            v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
+            v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
+            v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
+        } while (input<=limit);
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+        h64 = XXH64_mergeRound(h64, v1);
+        h64 = XXH64_mergeRound(h64, v2);
+        h64 = XXH64_mergeRound(h64, v3);
+        h64 = XXH64_mergeRound(h64, v4);
+
+    } else {
+        h64  = seed + XXH_PRIME64_5;
+    }
+
+    h64 += (xxh_u64) len;
+
+    return XXH64_finalize(h64, input, len, align);
+}
+
+
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH64_state_t state;
+    XXH64_reset(&state, seed);
+    XXH64_update(&state, (const xxh_u8*)input, len);
+    return XXH64_digest(&state);
+#else
+    if (XXH_FORCE_ALIGN_CHECK) {
+        if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */
+            return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
+    }   }
+
+    return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
+
+#endif
+}
+
+/*******   Hash Streaming   *******/
+
+/*! @ingroup xxh64_family*/
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
+{
+    return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
+}
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
+{
+    memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
+{
+    XXH64_state_t state;   /* use a local state to memcpy() in order to avoid strict-aliasing warnings */
+    memset(&state, 0, sizeof(state));
+    state.v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
+    state.v2 = seed + XXH_PRIME64_2;
+    state.v3 = seed + 0;
+    state.v4 = seed - XXH_PRIME64_1;
+     /* do not write into reserved64, might be removed in a future version */
+    memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved64));
+    return XXH_OK;
+}
+
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH64_update (XXH64_state_t* state, const void* input, size_t len)
+{
+    if (input==NULL)
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+        return XXH_OK;
+#else
+        return XXH_ERROR;
+#endif
+
+    {   const xxh_u8* p = (const xxh_u8*)input;
+        const xxh_u8* const bEnd = p + len;
+
+        state->total_len += len;
+
+        if (state->memsize + len < 32) {  /* fill in tmp buffer */
+            XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len);
+            state->memsize += (xxh_u32)len;
+            return XXH_OK;
+        }
+
+        if (state->memsize) {   /* tmp buffer is full */
+            XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
+            state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0));
+            state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1));
+            state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2));
+            state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3));
+            p += 32 - state->memsize;
+            state->memsize = 0;
+        }
+
+        /* uintptr_t casts avoid UB or compiler warning on out-of-bounds
+         * pointer arithmetic */
+        if ((uintptr_t)p + 32 <= (uintptr_t)bEnd) {
+            const uintptr_t limit = (uintptr_t)bEnd - 32;
+            xxh_u64 v1 = state->v1;
+            xxh_u64 v2 = state->v2;
+            xxh_u64 v3 = state->v3;
+            xxh_u64 v4 = state->v4;
+
+            do {
+                v1 = XXH64_round(v1, XXH_readLE64(p)); p+=8;
+                v2 = XXH64_round(v2, XXH_readLE64(p)); p+=8;
+                v3 = XXH64_round(v3, XXH_readLE64(p)); p+=8;
+                v4 = XXH64_round(v4, XXH_readLE64(p)); p+=8;
+            } while ((uintptr_t)p<=limit);
+
+            state->v1 = v1;
+            state->v2 = v2;
+            state->v3 = v3;
+            state->v4 = v4;
+        }
+
+        if (p < bEnd) {
+            XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
+            state->memsize = (unsigned)(bEnd-p);
+        }
+    }
+
+    return XXH_OK;
+}
+
+
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t* state)
+{
+    xxh_u64 h64;
+
+    if (state->total_len >= 32) {
+        xxh_u64 const v1 = state->v1;
+        xxh_u64 const v2 = state->v2;
+        xxh_u64 const v3 = state->v3;
+        xxh_u64 const v4 = state->v4;
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+        h64 = XXH64_mergeRound(h64, v1);
+        h64 = XXH64_mergeRound(h64, v2);
+        h64 = XXH64_mergeRound(h64, v3);
+        h64 = XXH64_mergeRound(h64, v4);
+    } else {
+        h64  = state->v3 /*seed*/ + XXH_PRIME64_5;
+    }
+
+    h64 += (xxh_u64) state->total_len;
+
+    return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
+}
+
+
+/******* Canonical representation   *******/
+
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
+    memcpy(dst, &hash, sizeof(*dst));
+}
+
+/*! @ingroup xxh64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
+{
+    return XXH_readBE64(src);
+}
+
+#ifndef XXH_NO_XXH3
+
+/* *********************************************************************
+*  XXH3
+*  New generation hash designed for speed on small keys and vectorization
+************************************************************************ */
+/*!
+ * @}
+ * @defgroup xxh3_impl XXH3 implementation
+ * @ingroup impl
+ * @{
+ */
+
+/* ===   Compiler specifics   === */
+
+#if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */
+#  define XXH_RESTRICT /* disable */
+#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* >= C99 */
+#  define XXH_RESTRICT   restrict
+#else
+/* Note: it might be useful to define __restrict or __restrict__ for some C++ compilers */
+#  define XXH_RESTRICT   /* disable */
+#endif
+
+#if (defined(__GNUC__) && (__GNUC__ >= 3))  \
+  || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \
+  || defined(__clang__)
+#    define XXH_likely(x) __builtin_expect(x, 1)
+#    define XXH_unlikely(x) __builtin_expect(x, 0)
+#else
+#    define XXH_likely(x) (x)
+#    define XXH_unlikely(x) (x)
+#endif
+
+#if defined(__GNUC__)
+#  if defined(__AVX2__)
+#    include <immintrin.h>
+#  elif defined(__SSE2__)
+#    include <emmintrin.h>
+#  elif defined(__ARM_NEON__) || defined(__ARM_NEON)
+#    define inline __inline__  /* circumvent a clang bug */
+#    include <arm_neon.h>
+#    undef inline
+#  endif
+#elif defined(_MSC_VER)
+#  include <intrin.h>
+#endif
+
+/*
+ * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while
+ * remaining a true 64-bit/128-bit hash function.
+ *
+ * This is done by prioritizing a subset of 64-bit operations that can be
+ * emulated without too many steps on the average 32-bit machine.
+ *
+ * For example, these two lines seem similar, and run equally fast on 64-bit:
+ *
+ *   xxh_u64 x;
+ *   x ^= (x >> 47); // good
+ *   x ^= (x >> 13); // bad
+ *
+ * However, to a 32-bit machine, there is a major difference.
+ *
+ * x ^= (x >> 47) looks like this:
+ *
+ *   x.lo ^= (x.hi >> (47 - 32));
+ *
+ * while x ^= (x >> 13) looks like this:
+ *
+ *   // note: funnel shifts are not usually cheap.
+ *   x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13));
+ *   x.hi ^= (x.hi >> 13);
+ *
+ * The first one is significantly faster than the second, simply because the
+ * shift is larger than 32. This means:
+ *  - All the bits we need are in the upper 32 bits, so we can ignore the lower
+ *    32 bits in the shift.
+ *  - The shift result will always fit in the lower 32 bits, and therefore,
+ *    we can ignore the upper 32 bits in the xor.
+ *
+ * Thanks to this optimization, XXH3 only requires these features to be efficient:
+ *
+ *  - Usable unaligned access
+ *  - A 32-bit or 64-bit ALU
+ *      - If 32-bit, a decent ADC instruction
+ *  - A 32 or 64-bit multiply with a 64-bit result
+ *  - For the 128-bit variant, a decent byteswap helps short inputs.
+ *
+ * The first two are already required by XXH32, and almost all 32-bit and 64-bit
+ * platforms which can run XXH32 can run XXH3 efficiently.
+ *
+ * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one
+ * notable exception.
+ *
+ * First of all, Thumb-1 lacks support for the UMULL instruction which
+ * performs the important long multiply. This means numerous __aeabi_lmul
+ * calls.
+ *
+ * Second of all, the 8 functional registers are just not enough.
+ * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need
+ * Lo registers, and this shuffling results in thousands more MOVs than A32.
+ *
+ * A32 and T32 don't have this limitation. They can access all 14 registers,
+ * do a 32->64 multiply with UMULL, and the flexible operand allowing free
+ * shifts is helpful, too.
+ *
+ * Therefore, we do a quick sanity check.
+ *
+ * If compiling Thumb-1 for a target which supports ARM instructions, we will
+ * emit a warning, as it is not a "sane" platform to compile for.
+ *
+ * Usually, if this happens, it is because of an accident and you probably need
+ * to specify -march, as you likely meant to compile for a newer architecture.
+ *
+ * Credit: large sections of the vectorial and asm source code paths
+ *         have been contributed by @easyaspi314
+ */
+#if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM)
+#   warning "XXH3 is highly inefficient without ARM or Thumb-2."
+#endif
+
+/* ==========================================
+ * Vectorization detection
+ * ========================================== */
+
+#ifdef XXH_DOXYGEN
+/*!
+ * @ingroup tuning
+ * @brief Overrides the vectorization implementation chosen for XXH3.
+ *
+ * Can be defined to 0 to disable SIMD or any of the values mentioned in
+ * @ref XXH_VECTOR_TYPE.
+ *
+ * If this is not defined, it uses predefined macros to determine the best
+ * implementation.
+ */
+#  define XXH_VECTOR XXH_SCALAR
+/*!
+ * @ingroup tuning
+ * @brief Possible values for @ref XXH_VECTOR.
+ *
+ * Note that these are actually implemented as macros.
+ *
+ * If this is not defined, it is detected automatically.
+ * @ref XXH_X86DISPATCH overrides this.
+ */
+enum XXH_VECTOR_TYPE /* fake enum */ {
+    XXH_SCALAR = 0,  /*!< Portable scalar version */
+    XXH_SSE2   = 1,  /*!<
+                      * SSE2 for Pentium 4, Opteron, all x86_64.
+                      *
+                      * @note SSE2 is also guaranteed on Windows 10, macOS, and
+                      * Android x86.
+                      */
+    XXH_AVX2   = 2,  /*!< AVX2 for Haswell and Bulldozer */
+    XXH_AVX512 = 3,  /*!< AVX512 for Skylake and Icelake */
+    XXH_NEON   = 4,  /*!< NEON for most ARMv7-A and all AArch64 */
+    XXH_VSX    = 5,  /*!< VSX and ZVector for POWER8/z13 (64-bit) */
+};
+/*!
+ * @ingroup tuning
+ * @brief Selects the minimum alignment for XXH3's accumulators.
+ *
+ * When using SIMD, this should match the alignment reqired for said vector
+ * type, so, for example, 32 for AVX2.
+ *
+ * Default: Auto detected.
+ */
+#  define XXH_ACC_ALIGN 8
+#endif
+
+/* Actual definition */
+#ifndef XXH_DOXYGEN
+#  define XXH_SCALAR 0
+#  define XXH_SSE2   1
+#  define XXH_AVX2   2
+#  define XXH_AVX512 3
+#  define XXH_NEON   4
+#  define XXH_VSX    5
+#endif
+
+#ifndef XXH_VECTOR    /* can be defined on command line */
+#  if defined(__AVX512F__)
+#    define XXH_VECTOR XXH_AVX512
+#  elif defined(__AVX2__)
+#    define XXH_VECTOR XXH_AVX2
+#  elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
+#    define XXH_VECTOR XXH_SSE2
+#  elif defined(__GNUC__) /* msvc support maybe later */ \
+  && (defined(__ARM_NEON__) || defined(__ARM_NEON)) \
+  && (defined(__LITTLE_ENDIAN__) /* We only support little endian NEON */ \
+    || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
+#    define XXH_VECTOR XXH_NEON
+#  elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
+     || (defined(__s390x__) && defined(__VEC__)) \
+     && defined(__GNUC__) /* TODO: IBM XL */
+#    define XXH_VECTOR XXH_VSX
+#  else
+#    define XXH_VECTOR XXH_SCALAR
+#  endif
+#endif
+
+/*
+ * Controls the alignment of the accumulator,
+ * for compatibility with aligned vector loads, which are usually faster.
+ */
+#ifndef XXH_ACC_ALIGN
+#  if defined(XXH_X86DISPATCH)
+#     define XXH_ACC_ALIGN 64  /* for compatibility with avx512 */
+#  elif XXH_VECTOR == XXH_SCALAR  /* scalar */
+#     define XXH_ACC_ALIGN 8
+#  elif XXH_VECTOR == XXH_SSE2  /* sse2 */
+#     define XXH_ACC_ALIGN 16
+#  elif XXH_VECTOR == XXH_AVX2  /* avx2 */
+#     define XXH_ACC_ALIGN 32
+#  elif XXH_VECTOR == XXH_NEON  /* neon */
+#     define XXH_ACC_ALIGN 16
+#  elif XXH_VECTOR == XXH_VSX   /* vsx */
+#     define XXH_ACC_ALIGN 16
+#  elif XXH_VECTOR == XXH_AVX512  /* avx512 */
+#     define XXH_ACC_ALIGN 64
+#  endif
+#endif
+
+#if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \
+    || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
+#  define XXH_SEC_ALIGN XXH_ACC_ALIGN
+#else
+#  define XXH_SEC_ALIGN 8
+#endif
+
+/*
+ * UGLY HACK:
+ * GCC usually generates the best code with -O3 for xxHash.
+ *
+ * However, when targeting AVX2, it is overzealous in its unrolling resulting
+ * in code roughly 3/4 the speed of Clang.
+ *
+ * There are other issues, such as GCC splitting _mm256_loadu_si256 into
+ * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which
+ * only applies to Sandy and Ivy Bridge... which don't even support AVX2.
+ *
+ * That is why when compiling the AVX2 version, it is recommended to use either
+ *   -O2 -mavx2 -march=haswell
+ * or
+ *   -O2 -mavx2 -mno-avx256-split-unaligned-load
+ * for decent performance, or to use Clang instead.
+ *
+ * Fortunately, we can control the first one with a pragma that forces GCC into
+ * -O2, but the other one we can't control without "failed to inline always
+ * inline function due to target mismatch" warnings.
+ */
+#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
+  && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
+  && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
+#  pragma GCC push_options
+#  pragma GCC optimize("-O2")
+#endif
+
+
+#if XXH_VECTOR == XXH_NEON
+/*
+ * NEON's setup for vmlal_u32 is a little more complicated than it is on
+ * SSE2, AVX2, and VSX.
+ *
+ * While PMULUDQ and VMULEUW both perform a mask, VMLAL.U32 performs an upcast.
+ *
+ * To do the same operation, the 128-bit 'Q' register needs to be split into
+ * two 64-bit 'D' registers, performing this operation::
+ *
+ *   [                a                 |                 b                ]
+ *            |              '---------. .--------'                |
+ *            |                         x                          |
+ *            |              .---------' '--------.                |
+ *   [ a & 0xFFFFFFFF | b & 0xFFFFFFFF ],[    a >> 32     |     b >> 32    ]
+ *
+ * Due to significant changes in aarch64, the fastest method for aarch64 is
+ * completely different than the fastest method for ARMv7-A.
+ *
+ * ARMv7-A treats D registers as unions overlaying Q registers, so modifying
+ * D11 will modify the high half of Q5. This is similar to how modifying AH
+ * will only affect bits 8-15 of AX on x86.
+ *
+ * VZIP takes two registers, and puts even lanes in one register and odd lanes
+ * in the other.
+ *
+ * On ARMv7-A, this strangely modifies both parameters in place instead of
+ * taking the usual 3-operand form.
+ *
+ * Therefore, if we want to do this, we can simply use a D-form VZIP.32 on the
+ * lower and upper halves of the Q register to end up with the high and low
+ * halves where we want - all in one instruction.
+ *
+ *   vzip.32   d10, d11       @ d10 = { d10[0], d11[0] }; d11 = { d10[1], d11[1] }
+ *
+ * Unfortunately we need inline assembly for this: Instructions modifying two
+ * registers at once is not possible in GCC or Clang's IR, and they have to
+ * create a copy.
+ *
+ * aarch64 requires a different approach.
+ *
+ * In order to make it easier to write a decent compiler for aarch64, many
+ * quirks were removed, such as conditional execution.
+ *
+ * NEON was also affected by this.
+ *
+ * aarch64 cannot access the high bits of a Q-form register, and writes to a
+ * D-form register zero the high bits, similar to how writes to W-form scalar
+ * registers (or DWORD registers on x86_64) work.
+ *
+ * The formerly free vget_high intrinsics now require a vext (with a few
+ * exceptions)
+ *
+ * Additionally, VZIP was replaced by ZIP1 and ZIP2, which are the equivalent
+ * of PUNPCKL* and PUNPCKH* in SSE, respectively, in order to only modify one
+ * operand.
+ *
+ * The equivalent of the VZIP.32 on the lower and upper halves would be this
+ * mess:
+ *
+ *   ext     v2.4s, v0.4s, v0.4s, #2 // v2 = { v0[2], v0[3], v0[0], v0[1] }
+ *   zip1    v1.2s, v0.2s, v2.2s     // v1 = { v0[0], v2[0] }
+ *   zip2    v0.2s, v0.2s, v1.2s     // v0 = { v0[1], v2[1] }
+ *
+ * Instead, we use a literal downcast, vmovn_u64 (XTN), and vshrn_n_u64 (SHRN):
+ *
+ *   shrn    v1.2s, v0.2d, #32  // v1 = (uint32x2_t)(v0 >> 32);
+ *   xtn     v0.2s, v0.2d       // v0 = (uint32x2_t)(v0 & 0xFFFFFFFF);
+ *
+ * This is available on ARMv7-A, but is less efficient than a single VZIP.32.
+ */
+
+/*!
+ * Function-like macro:
+ * void XXH_SPLIT_IN_PLACE(uint64x2_t &in, uint32x2_t &outLo, uint32x2_t &outHi)
+ * {
+ *     outLo = (uint32x2_t)(in & 0xFFFFFFFF);
+ *     outHi = (uint32x2_t)(in >> 32);
+ *     in = UNDEFINED;
+ * }
+ */
+# if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \
+   && defined(__GNUC__) \
+   && !defined(__aarch64__) && !defined(__arm64__)
+#  define XXH_SPLIT_IN_PLACE(in, outLo, outHi)                                              \
+    do {                                                                                    \
+      /* Undocumented GCC/Clang operand modifier: %e0 = lower D half, %f0 = upper D half */ \
+      /* https://github.com/gcc-mirror/gcc/blob/38cf91e5/gcc/config/arm/arm.c#L22486 */     \
+      /* https://github.com/llvm-mirror/llvm/blob/2c4ca683/lib/Target/ARM/ARMAsmPrinter.cpp#L399 */ \
+      __asm__("vzip.32  %e0, %f0" : "+w" (in));                                             \
+      (outLo) = vget_low_u32 (vreinterpretq_u32_u64(in));                                   \
+      (outHi) = vget_high_u32(vreinterpretq_u32_u64(in));                                   \
+   } while (0)
+# else
+#  define XXH_SPLIT_IN_PLACE(in, outLo, outHi)                                            \
+    do {                                                                                  \
+      (outLo) = vmovn_u64    (in);                                                        \
+      (outHi) = vshrn_n_u64  ((in), 32);                                                  \
+    } while (0)
+# endif
+#endif  /* XXH_VECTOR == XXH_NEON */
+
+/*
+ * VSX and Z Vector helpers.
+ *
+ * This is very messy, and any pull requests to clean this up are welcome.
+ *
+ * There are a lot of problems with supporting VSX and s390x, due to
+ * inconsistent intrinsics, spotty coverage, and multiple endiannesses.
+ */
+#if XXH_VECTOR == XXH_VSX
+#  if defined(__s390x__)
+#    include <s390intrin.h>
+#  else
+/* gcc's altivec.h can have the unwanted consequence to unconditionally
+ * #define bool, vector, and pixel keywords,
+ * with bad consequences for programs already using these keywords for other purposes.
+ * The paragraph defining these macros is skipped when __APPLE_ALTIVEC__ is defined.
+ * __APPLE_ALTIVEC__ is _generally_ defined automatically by the compiler,
+ * but it seems that, in some cases, it isn't.
+ * Force the build macro to be defined, so that keywords are not altered.
+ */
+#    if defined(__GNUC__) && !defined(__APPLE_ALTIVEC__)
+#      define __APPLE_ALTIVEC__
+#    endif
+#    include <altivec.h>
+#  endif
+
+typedef __vector unsigned long long xxh_u64x2;
+typedef __vector unsigned char xxh_u8x16;
+typedef __vector unsigned xxh_u32x4;
+
+# ifndef XXH_VSX_BE
+#  if defined(__BIG_ENDIAN__) \
+  || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#    define XXH_VSX_BE 1
+#  elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
+#    warning "-maltivec=be is not recommended. Please use native endianness."
+#    define XXH_VSX_BE 1
+#  else
+#    define XXH_VSX_BE 0
+#  endif
+# endif /* !defined(XXH_VSX_BE) */
+
+# if XXH_VSX_BE
+#  if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__))
+#    define XXH_vec_revb vec_revb
+#  else
+/*!
+ * A polyfill for POWER9's vec_revb().
+ */
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val)
+{
+    xxh_u8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
+                                  0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
+    return vec_perm(val, val, vByteSwap);
+}
+#  endif
+# endif /* XXH_VSX_BE */
+
+/*!
+ * Performs an unaligned vector load and byte swaps it on big endian.
+ */
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)
+{
+    xxh_u64x2 ret;
+    memcpy(&ret, ptr, sizeof(xxh_u64x2));
+# if XXH_VSX_BE
+    ret = XXH_vec_revb(ret);
+# endif
+    return ret;
+}
+
+/*
+ * vec_mulo and vec_mule are very problematic intrinsics on PowerPC
+ *
+ * These intrinsics weren't added until GCC 8, despite existing for a while,
+ * and they are endian dependent. Also, their meaning swap depending on version.
+ * */
+# if defined(__s390x__)
+ /* s390x is always big endian, no issue on this platform */
+#  define XXH_vec_mulo vec_mulo
+#  define XXH_vec_mule vec_mule
+# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw)
+/* Clang has a better way to control this, we can just use the builtin which doesn't swap. */
+#  define XXH_vec_mulo __builtin_altivec_vmulouw
+#  define XXH_vec_mule __builtin_altivec_vmuleuw
+# else
+/* gcc needs inline assembly */
+/* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b)
+{
+    xxh_u64x2 result;
+    __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
+    return result;
+}
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b)
+{
+    xxh_u64x2 result;
+    __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
+    return result;
+}
+# endif /* XXH_vec_mulo, XXH_vec_mule */
+#endif /* XXH_VECTOR == XXH_VSX */
+
+
+/* prefetch
+ * can be disabled, by declaring XXH_NO_PREFETCH build macro */
+#if defined(XXH_NO_PREFETCH)
+#  define XXH_PREFETCH(ptr)  (void)(ptr)  /* disabled */
+#else
+#  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))  /* _mm_prefetch() not defined outside of x86/x64 */
+#    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
+#    define XXH_PREFETCH(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
+#  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
+#    define XXH_PREFETCH(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+#  else
+#    define XXH_PREFETCH(ptr) (void)(ptr)  /* disabled */
+#  endif
+#endif  /* XXH_NO_PREFETCH */
+
+
+/* ==========================================
+ * XXH3 default settings
+ * ========================================== */
+
+#define XXH_SECRET_DEFAULT_SIZE 192   /* minimum XXH3_SECRET_SIZE_MIN */
+
+#if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN)
+#  error "default keyset is not large enough"
+#endif
+
+/*! Pseudorandom secret taken directly from FARSH. */
+XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = {
+    0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
+    0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
+    0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
+    0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
+    0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
+    0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
+    0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
+    0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
+    0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
+    0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
+    0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
+    0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
+};
+
+
+#ifdef XXH_OLD_NAMES
+#  define kSecret XXH3_kSecret
+#endif
+
+#ifdef XXH_DOXYGEN
+/*!
+ * @brief Calculates a 32-bit to 64-bit long multiply.
+ *
+ * Implemented as a macro.
+ *
+ * Wraps `__emulu` on MSVC x86 because it tends to call `__allmul` when it doesn't
+ * need to (but it shouldn't need to anyways, it is about 7 instructions to do
+ * a 64x64 multiply...). Since we know that this will _always_ emit `MULL`, we
+ * use that instead of the normal method.
+ *
+ * If you are compiling for platforms like Thumb-1 and don't have a better option,
+ * you may also want to write your own long multiply routine here.
+ *
+ * @param x, y Numbers to be multiplied
+ * @return 64-bit product of the low 32 bits of @p x and @p y.
+ */
+XXH_FORCE_INLINE xxh_u64
+XXH_mult32to64(xxh_u64 x, xxh_u64 y)
+{
+   return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);
+}
+#elif defined(_MSC_VER) && defined(_M_IX86)
+#    include <intrin.h>
+#    define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
+#else
+/*
+ * Downcast + upcast is usually better than masking on older compilers like
+ * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers.
+ *
+ * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands
+ * and perform a full 64x64 multiply -- entirely redundant on 32-bit.
+ */
+#    define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y))
+#endif
+
+/*!
+ * @brief Calculates a 64->128-bit long multiply.
+ *
+ * Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar
+ * version.
+ *
+ * @param lhs, rhs The 64-bit integers to be multiplied
+ * @return The 128-bit result represented in an @ref XXH128_hash_t.
+ */
+static XXH128_hash_t
+XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
+{
+    /*
+     * GCC/Clang __uint128_t method.
+     *
+     * On most 64-bit targets, GCC and Clang define a __uint128_t type.
+     * This is usually the best way as it usually uses a native long 64-bit
+     * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64.
+     *
+     * Usually.
+     *
+     * Despite being a 32-bit platform, Clang (and emscripten) define this type
+     * despite not having the arithmetic for it. This results in a laggy
+     * compiler builtin call which calculates a full 128-bit multiply.
+     * In that case it is best to use the portable one.
+     * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
+     */
+#if defined(__GNUC__) && !defined(__wasm__) \
+    && defined(__SIZEOF_INT128__) \
+    || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
+
+    __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs;
+    XXH128_hash_t r128;
+    r128.low64  = (xxh_u64)(product);
+    r128.high64 = (xxh_u64)(product >> 64);
+    return r128;
+
+    /*
+     * MSVC for x64's _umul128 method.
+     *
+     * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct);
+     *
+     * This compiles to single operand MUL on x64.
+     */
+#elif defined(_M_X64) || defined(_M_IA64)
+
+#ifndef _MSC_VER
+#   pragma intrinsic(_umul128)
+#endif
+    xxh_u64 product_high;
+    xxh_u64 const product_low = _umul128(lhs, rhs, &product_high);
+    XXH128_hash_t r128;
+    r128.low64  = product_low;
+    r128.high64 = product_high;
+    return r128;
+
+#else
+    /*
+     * Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
+     *
+     * This is a fast and simple grade school multiply, which is shown below
+     * with base 10 arithmetic instead of base 0x100000000.
+     *
+     *           9 3 // D2 lhs = 93
+     *         x 7 5 // D2 rhs = 75
+     *     ----------
+     *           1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15
+     *         4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45
+     *         2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21
+     *     + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63
+     *     ---------
+     *         2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27
+     *     + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67
+     *     ---------
+     *       6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975
+     *
+     * The reasons for adding the products like this are:
+     *  1. It avoids manual carry tracking. Just like how
+     *     (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX.
+     *     This avoids a lot of complexity.
+     *
+     *  2. It hints for, and on Clang, compiles to, the powerful UMAAL
+     *     instruction available in ARM's Digital Signal Processing extension
+     *     in 32-bit ARMv6 and later, which is shown below:
+     *
+     *         void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm)
+     *         {
+     *             xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm;
+     *             *RdLo = (xxh_u32)(product & 0xFFFFFFFF);
+     *             *RdHi = (xxh_u32)(product >> 32);
+     *         }
+     *
+     *     This instruction was designed for efficient long multiplication, and
+     *     allows this to be calculated in only 4 instructions at speeds
+     *     comparable to some 64-bit ALUs.
+     *
+     *  3. It isn't terrible on other platforms. Usually this will be a couple
+     *     of 32-bit ADD/ADCs.
+     */
+
+    /* First calculate all of the cross products. */
+    xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF);
+    xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32,        rhs & 0xFFFFFFFF);
+    xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32);
+    xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32,        rhs >> 32);
+
+    /* Now add the products together. These will never overflow. */
+    xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
+    xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32)        + hi_hi;
+    xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
+
+    XXH128_hash_t r128;
+    r128.low64  = lower;
+    r128.high64 = upper;
+    return r128;
+#endif
+}
+
+/*!
+ * @brief Calculates a 64-bit to 128-bit multiply, then XOR folds it.
+ *
+ * The reason for the separate function is to prevent passing too many structs
+ * around by value. This will hopefully inline the multiply, but we don't force it.
+ *
+ * @param lhs, rhs The 64-bit integers to multiply
+ * @return The low 64 bits of the product XOR'd by the high 64 bits.
+ * @see XXH_mult64to128()
+ */
+static xxh_u64
+XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
+{
+    XXH128_hash_t product = XXH_mult64to128(lhs, rhs);
+    return product.low64 ^ product.high64;
+}
+
+/*! Seems to produce slightly better code on GCC for some reason. */
+XXH_FORCE_INLINE xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
+{
+    XXH_ASSERT(0 <= shift && shift < 64);
+    return v64 ^ (v64 >> shift);
+}
+
+/*
+ * This is a fast avalanche stage,
+ * suitable when input bits are already partially mixed
+ */
+static XXH64_hash_t XXH3_avalanche(xxh_u64 h64)
+{
+    h64 = XXH_xorshift64(h64, 37);
+    h64 *= 0x165667919E3779F9ULL;
+    h64 = XXH_xorshift64(h64, 32);
+    return h64;
+}
+
+/*
+ * This is a stronger avalanche,
+ * inspired by Pelle Evensen's rrmxmx
+ * preferable when input has not been previously mixed
+ */
+static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)
+{
+    /* this mix is inspired by Pelle Evensen's rrmxmx */
+    h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24);
+    h64 *= 0x9FB21C651E98DF25ULL;
+    h64 ^= (h64 >> 35) + len ;
+    h64 *= 0x9FB21C651E98DF25ULL;
+    return XXH_xorshift64(h64, 28);
+}
+
+
+/* ==========================================
+ * Short keys
+ * ==========================================
+ * One of the shortcomings of XXH32 and XXH64 was that their performance was
+ * sub-optimal on short lengths. It used an iterative algorithm which strongly
+ * favored lengths that were a multiple of 4 or 8.
+ *
+ * Instead of iterating over individual inputs, we use a set of single shot
+ * functions which piece together a range of lengths and operate in constant time.
+ *
+ * Additionally, the number of multiplies has been significantly reduced. This
+ * reduces latency, especially when emulating 64-bit multiplies on 32-bit.
+ *
+ * Depending on the platform, this may or may not be faster than XXH32, but it
+ * is almost guaranteed to be faster than XXH64.
+ */
+
+/*
+ * At very short lengths, there isn't enough input to fully hide secrets, or use
+ * the entire secret.
+ *
+ * There is also only a limited amount of mixing we can do before significantly
+ * impacting performance.
+ *
+ * Therefore, we use different sections of the secret and always mix two secret
+ * samples with an XOR. This should have no effect on performance on the
+ * seedless or withSeed variants because everything _should_ be constant folded
+ * by modern compilers.
+ *
+ * The XOR mixing hides individual parts of the secret and increases entropy.
+ *
+ * This adds an extra layer of strength for custom secrets.
+ */
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(1 <= len && len <= 3);
+    XXH_ASSERT(secret != NULL);
+    /*
+     * len = 1: combined = { input[0], 0x01, input[0], input[0] }
+     * len = 2: combined = { input[1], 0x02, input[0], input[1] }
+     * len = 3: combined = { input[2], 0x03, input[0], input[1] }
+     */
+    {   xxh_u8  const c1 = input[0];
+        xxh_u8  const c2 = input[len >> 1];
+        xxh_u8  const c3 = input[len - 1];
+        xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2  << 24)
+                               | ((xxh_u32)c3 <<  0) | ((xxh_u32)len << 8);
+        xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
+        xxh_u64 const keyed = (xxh_u64)combined ^ bitflip;
+        return XXH64_avalanche(keyed);
+    }
+}
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(secret != NULL);
+    XXH_ASSERT(4 <= len && len <= 8);
+    seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
+    {   xxh_u32 const input1 = XXH_readLE32(input);
+        xxh_u32 const input2 = XXH_readLE32(input + len - 4);
+        xxh_u64 const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed;
+        xxh_u64 const input64 = input2 + (((xxh_u64)input1) << 32);
+        xxh_u64 const keyed = input64 ^ bitflip;
+        return XXH3_rrmxmx(keyed, len);
+    }
+}
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(secret != NULL);
+    XXH_ASSERT(9 <= len && len <= 16);
+    {   xxh_u64 const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed;
+        xxh_u64 const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed;
+        xxh_u64 const input_lo = XXH_readLE64(input)           ^ bitflip1;
+        xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2;
+        xxh_u64 const acc = len
+                          + XXH_swap64(input_lo) + input_hi
+                          + XXH3_mul128_fold64(input_lo, input_hi);
+        return XXH3_avalanche(acc);
+    }
+}
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    XXH_ASSERT(len <= 16);
+    {   if (XXH_likely(len >  8)) return XXH3_len_9to16_64b(input, len, secret, seed);
+        if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed);
+        if (len) return XXH3_len_1to3_64b(input, len, secret, seed);
+        return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64)));
+    }
+}
+
+/*
+ * DISCLAIMER: There are known *seed-dependent* multicollisions here due to
+ * multiplication by zero, affecting hashes of lengths 17 to 240.
+ *
+ * However, they are very unlikely.
+ *
+ * Keep this in mind when using the unseeded XXH3_64bits() variant: As with all
+ * unseeded non-cryptographic hashes, it does not attempt to defend itself
+ * against specially crafted inputs, only random inputs.
+ *
+ * Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes
+ * cancelling out the secret is taken an arbitrary number of times (addressed
+ * in XXH3_accumulate_512), this collision is very unlikely with random inputs
+ * and/or proper seeding:
+ *
+ * This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a
+ * function that is only called up to 16 times per hash with up to 240 bytes of
+ * input.
+ *
+ * This is not too bad for a non-cryptographic hash function, especially with
+ * only 64 bit outputs.
+ *
+ * The 128-bit variant (which trades some speed for strength) is NOT affected
+ * by this, although it is always a good idea to use a proper seed if you care
+ * about strength.
+ */
+XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,
+                                     const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64)
+{
+#if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
+  && defined(__i386__) && defined(__SSE2__)  /* x86 + SSE2 */ \
+  && !defined(XXH_ENABLE_AUTOVECTORIZE)      /* Define to disable like XXH32 hack */
+    /*
+     * UGLY HACK:
+     * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in
+     * slower code.
+     *
+     * By forcing seed64 into a register, we disrupt the cost model and
+     * cause it to scalarize. See `XXH32_round()`
+     *
+     * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600,
+     * XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on
+     * GCC 9.2, despite both emitting scalar code.
+     *
+     * GCC generates much better scalar code than Clang for the rest of XXH3,
+     * which is why finding a more optimal codepath is an interest.
+     */
+    XXH_COMPILER_GUARD(seed64);
+#endif
+    {   xxh_u64 const input_lo = XXH_readLE64(input);
+        xxh_u64 const input_hi = XXH_readLE64(input+8);
+        return XXH3_mul128_fold64(
+            input_lo ^ (XXH_readLE64(secret)   + seed64),
+            input_hi ^ (XXH_readLE64(secret+8) - seed64)
+        );
+    }
+}
+
+/* For mid range keys, XXH3 uses a Mum-hash variant. */
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
+                     const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+                     XXH64_hash_t seed)
+{
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
+    XXH_ASSERT(16 < len && len <= 128);
+
+    {   xxh_u64 acc = len * XXH_PRIME64_1;
+        if (len > 32) {
+            if (len > 64) {
+                if (len > 96) {
+                    acc += XXH3_mix16B(input+48, secret+96, seed);
+                    acc += XXH3_mix16B(input+len-64, secret+112, seed);
+                }
+                acc += XXH3_mix16B(input+32, secret+64, seed);
+                acc += XXH3_mix16B(input+len-48, secret+80, seed);
+            }
+            acc += XXH3_mix16B(input+16, secret+32, seed);
+            acc += XXH3_mix16B(input+len-32, secret+48, seed);
+        }
+        acc += XXH3_mix16B(input+0, secret+0, seed);
+        acc += XXH3_mix16B(input+len-16, secret+16, seed);
+
+        return XXH3_avalanche(acc);
+    }
+}
+
+#define XXH3_MIDSIZE_MAX 240
+
+XXH_NO_INLINE XXH64_hash_t
+XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
+                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+                      XXH64_hash_t seed)
+{
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
+    XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
+
+    #define XXH3_MIDSIZE_STARTOFFSET 3
+    #define XXH3_MIDSIZE_LASTOFFSET  17
+
+    {   xxh_u64 acc = len * XXH_PRIME64_1;
+        int const nbRounds = (int)len / 16;
+        int i;
+        for (i=0; i<8; i++) {
+            acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed);
+        }
+        acc = XXH3_avalanche(acc);
+        XXH_ASSERT(nbRounds >= 8);
+#if defined(__clang__)                                /* Clang */ \
+    && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
+    && !defined(XXH_ENABLE_AUTOVECTORIZE)             /* Define to disable */
+        /*
+         * UGLY HACK:
+         * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86.
+         * In everywhere else, it uses scalar code.
+         *
+         * For 64->128-bit multiplies, even if the NEON was 100% optimal, it
+         * would still be slower than UMAAL (see XXH_mult64to128).
+         *
+         * Unfortunately, Clang doesn't handle the long multiplies properly and
+         * converts them to the nonexistent "vmulq_u64" intrinsic, which is then
+         * scalarized into an ugly mess of VMOV.32 instructions.
+         *
+         * This mess is difficult to avoid without turning autovectorization
+         * off completely, but they are usually relatively minor and/or not
+         * worth it to fix.
+         *
+         * This loop is the easiest to fix, as unlike XXH32, this pragma
+         * _actually works_ because it is a loop vectorization instead of an
+         * SLP vectorization.
+         */
+        #pragma clang loop vectorize(disable)
+#endif
+        for (i=8 ; i < nbRounds; i++) {
+            acc += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
+        }
+        /* last bytes */
+        acc += XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
+        return XXH3_avalanche(acc);
+    }
+}
+
+
+/* =======     Long Keys     ======= */
+
+#define XXH_STRIPE_LEN 64
+#define XXH_SECRET_CONSUME_RATE 8   /* nb of secret bytes consumed at each accumulation */
+#define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64))
+
+#ifdef XXH_OLD_NAMES
+#  define STRIPE_LEN XXH_STRIPE_LEN
+#  define ACC_NB XXH_ACC_NB
+#endif
+
+XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
+{
+    if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
+    memcpy(dst, &v64, sizeof(v64));
+}
+
+/* Several intrinsic functions below are supposed to accept __int64 as argument,
+ * as documented in https://software.intel.com/sites/landingpage/IntrinsicsGuide/ .
+ * However, several environments do not define __int64 type,
+ * requiring a workaround.
+ */
+#if !defined (__VMS) \
+  && (defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+    typedef int64_t xxh_i64;
+#else
+    /* the following type must have a width of 64-bit */
+    typedef long long xxh_i64;
+#endif
+
+/*
+ * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.
+ *
+ * It is a hardened version of UMAC, based off of FARSH's implementation.
+ *
+ * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD
+ * implementations, and it is ridiculously fast.
+ *
+ * We harden it by mixing the original input to the accumulators as well as the product.
+ *
+ * This means that in the (relatively likely) case of a multiply by zero, the
+ * original input is preserved.
+ *
+ * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve
+ * cross-pollination, as otherwise the upper and lower halves would be
+ * essentially independent.
+ *
+ * This doesn't matter on 64-bit hashes since they all get merged together in
+ * the end, so we skip the extra step.
+ *
+ * Both XXH3_64bits and XXH3_128bits use this subroutine.
+ */
+
+#if (XXH_VECTOR == XXH_AVX512) \
+     || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0)
+
+#ifndef XXH_TARGET_AVX512
+# define XXH_TARGET_AVX512  /* disable attribute target */
+#endif
+
+XXH_FORCE_INLINE XXH_TARGET_AVX512 void
+XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
+                     const void* XXH_RESTRICT input,
+                     const void* XXH_RESTRICT secret)
+{
+    XXH_ALIGN(64) __m512i* const xacc = (__m512i *) acc;
+    XXH_ASSERT((((size_t)acc) & 63) == 0);
+    XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
+
+    {
+        /* data_vec    = input[0]; */
+        __m512i const data_vec    = _mm512_loadu_si512   (input);
+        /* key_vec     = secret[0]; */
+        __m512i const key_vec     = _mm512_loadu_si512   (secret);
+        /* data_key    = data_vec ^ key_vec; */
+        __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);
+        /* data_key_lo = data_key >> 32; */
+        __m512i const data_key_lo = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
+        /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
+        __m512i const product     = _mm512_mul_epu32     (data_key, data_key_lo);
+        /* xacc[0] += swap(data_vec); */
+        __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2));
+        __m512i const sum       = _mm512_add_epi64(*xacc, data_swap);
+        /* xacc[0] += product; */
+        *xacc = _mm512_add_epi64(product, sum);
+    }
+}
+
+/*
+ * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing.
+ *
+ * Multiplication isn't perfect, as explained by Google in HighwayHash:
+ *
+ *  // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to
+ *  // varying degrees. In descending order of goodness, bytes
+ *  // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32.
+ *  // As expected, the upper and lower bytes are much worse.
+ *
+ * Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L291
+ *
+ * Since our algorithm uses a pseudorandom secret to add some variance into the
+ * mix, we don't need to (or want to) mix as often or as much as HighwayHash does.
+ *
+ * This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid
+ * extraction.
+ *
+ * Both XXH3_64bits and XXH3_128bits use this subroutine.
+ */
+
+XXH_FORCE_INLINE XXH_TARGET_AVX512 void
+XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+    XXH_ASSERT((((size_t)acc) & 63) == 0);
+    XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
+    {   XXH_ALIGN(64) __m512i* const xacc = (__m512i*) acc;
+        const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);
+
+        /* xacc[0] ^= (xacc[0] >> 47) */
+        __m512i const acc_vec     = *xacc;
+        __m512i const shifted     = _mm512_srli_epi64    (acc_vec, 47);
+        __m512i const data_vec    = _mm512_xor_si512     (acc_vec, shifted);
+        /* xacc[0] ^= secret; */
+        __m512i const key_vec     = _mm512_loadu_si512   (secret);
+        __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);
+
+        /* xacc[0] *= XXH_PRIME32_1; */
+        __m512i const data_key_hi = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
+        __m512i const prod_lo     = _mm512_mul_epu32     (data_key, prime32);
+        __m512i const prod_hi     = _mm512_mul_epu32     (data_key_hi, prime32);
+        *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
+    }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_AVX512 void
+XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
+{
+    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0);
+    XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64);
+    XXH_ASSERT(((size_t)customSecret & 63) == 0);
+    (void)(&XXH_writeLE64);
+    {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i);
+        __m512i const seed = _mm512_mask_set1_epi64(_mm512_set1_epi64((xxh_i64)seed64), 0xAA, (xxh_i64)(0U - seed64));
+
+        XXH_ALIGN(64) const __m512i* const src  = (const __m512i*) XXH3_kSecret;
+        XXH_ALIGN(64)       __m512i* const dest = (      __m512i*) customSecret;
+        int i;
+        for (i=0; i < nbRounds; ++i) {
+            /* GCC has a bug, _mm512_stream_load_si512 accepts 'void*', not 'void const*',
+             * this will warn "discards 'const' qualifier". */
+            union {
+                XXH_ALIGN(64) const __m512i* cp;
+                XXH_ALIGN(64) void* p;
+            } remote_const_void;
+            remote_const_void.cp = src + i;
+            dest[i] = _mm512_add_epi64(_mm512_stream_load_si512(remote_const_void.p), seed);
+    }   }
+}
+
+#endif
+
+#if (XXH_VECTOR == XXH_AVX2) \
+    || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0)
+
+#ifndef XXH_TARGET_AVX2
+# define XXH_TARGET_AVX2  /* disable attribute target */
+#endif
+
+XXH_FORCE_INLINE XXH_TARGET_AVX2 void
+XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
+                    const void* XXH_RESTRICT input,
+                    const void* XXH_RESTRICT secret)
+{
+    XXH_ASSERT((((size_t)acc) & 31) == 0);
+    {   XXH_ALIGN(32) __m256i* const xacc    =       (__m256i *) acc;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm256_loadu_si256 requires  a const __m256i * pointer for some reason. */
+        const         __m256i* const xinput  = (const __m256i *) input;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
+        const         __m256i* const xsecret = (const __m256i *) secret;
+
+        size_t i;
+        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
+            /* data_vec    = xinput[i]; */
+            __m256i const data_vec    = _mm256_loadu_si256    (xinput+i);
+            /* key_vec     = xsecret[i]; */
+            __m256i const key_vec     = _mm256_loadu_si256   (xsecret+i);
+            /* data_key    = data_vec ^ key_vec; */
+            __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
+            /* data_key_lo = data_key >> 32; */
+            __m256i const data_key_lo = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+            /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
+            __m256i const product     = _mm256_mul_epu32     (data_key, data_key_lo);
+            /* xacc[i] += swap(data_vec); */
+            __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));
+            __m256i const sum       = _mm256_add_epi64(xacc[i], data_swap);
+            /* xacc[i] += product; */
+            xacc[i] = _mm256_add_epi64(product, sum);
+    }   }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_AVX2 void
+XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+    XXH_ASSERT((((size_t)acc) & 31) == 0);
+    {   XXH_ALIGN(32) __m256i* const xacc = (__m256i*) acc;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
+        const         __m256i* const xsecret = (const __m256i *) secret;
+        const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1);
+
+        size_t i;
+        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
+            /* xacc[i] ^= (xacc[i] >> 47) */
+            __m256i const acc_vec     = xacc[i];
+            __m256i const shifted     = _mm256_srli_epi64    (acc_vec, 47);
+            __m256i const data_vec    = _mm256_xor_si256     (acc_vec, shifted);
+            /* xacc[i] ^= xsecret; */
+            __m256i const key_vec     = _mm256_loadu_si256   (xsecret+i);
+            __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
+
+            /* xacc[i] *= XXH_PRIME32_1; */
+            __m256i const data_key_hi = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+            __m256i const prod_lo     = _mm256_mul_epu32     (data_key, prime32);
+            __m256i const prod_hi     = _mm256_mul_epu32     (data_key_hi, prime32);
+            xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
+        }
+    }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
+{
+    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0);
+    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6);
+    XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64);
+    (void)(&XXH_writeLE64);
+    XXH_PREFETCH(customSecret);
+    {   __m256i const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64);
+
+        XXH_ALIGN(64) const __m256i* const src  = (const __m256i*) XXH3_kSecret;
+        XXH_ALIGN(64)       __m256i*       dest = (      __m256i*) customSecret;
+
+#       if defined(__GNUC__) || defined(__clang__)
+        /*
+         * On GCC & Clang, marking 'dest' as modified will cause the compiler:
+         *   - do not extract the secret from sse registers in the internal loop
+         *   - use less common registers, and avoid pushing these reg into stack
+         */
+        XXH_COMPILER_GUARD(dest);
+#       endif
+
+        /* GCC -O2 need unroll loop manually */
+        dest[0] = _mm256_add_epi64(_mm256_stream_load_si256(src+0), seed);
+        dest[1] = _mm256_add_epi64(_mm256_stream_load_si256(src+1), seed);
+        dest[2] = _mm256_add_epi64(_mm256_stream_load_si256(src+2), seed);
+        dest[3] = _mm256_add_epi64(_mm256_stream_load_si256(src+3), seed);
+        dest[4] = _mm256_add_epi64(_mm256_stream_load_si256(src+4), seed);
+        dest[5] = _mm256_add_epi64(_mm256_stream_load_si256(src+5), seed);
+    }
+}
+
+#endif
+
+/* x86dispatch always generates SSE2 */
+#if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH)
+
+#ifndef XXH_TARGET_SSE2
+# define XXH_TARGET_SSE2  /* disable attribute target */
+#endif
+
+XXH_FORCE_INLINE XXH_TARGET_SSE2 void
+XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,
+                    const void* XXH_RESTRICT input,
+                    const void* XXH_RESTRICT secret)
+{
+    /* SSE2 is just a half-scale version of the AVX2 version. */
+    XXH_ASSERT((((size_t)acc) & 15) == 0);
+    {   XXH_ALIGN(16) __m128i* const xacc    =       (__m128i *) acc;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
+        const         __m128i* const xinput  = (const __m128i *) input;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
+        const         __m128i* const xsecret = (const __m128i *) secret;
+
+        size_t i;
+        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
+            /* data_vec    = xinput[i]; */
+            __m128i const data_vec    = _mm_loadu_si128   (xinput+i);
+            /* key_vec     = xsecret[i]; */
+            __m128i const key_vec     = _mm_loadu_si128   (xsecret+i);
+            /* data_key    = data_vec ^ key_vec; */
+            __m128i const data_key    = _mm_xor_si128     (data_vec, key_vec);
+            /* data_key_lo = data_key >> 32; */
+            __m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+            /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
+            __m128i const product     = _mm_mul_epu32     (data_key, data_key_lo);
+            /* xacc[i] += swap(data_vec); */
+            __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));
+            __m128i const sum       = _mm_add_epi64(xacc[i], data_swap);
+            /* xacc[i] += product; */
+            xacc[i] = _mm_add_epi64(product, sum);
+    }   }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_SSE2 void
+XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+    XXH_ASSERT((((size_t)acc) & 15) == 0);
+    {   XXH_ALIGN(16) __m128i* const xacc = (__m128i*) acc;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
+        const         __m128i* const xsecret = (const __m128i *) secret;
+        const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1);
+
+        size_t i;
+        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
+            /* xacc[i] ^= (xacc[i] >> 47) */
+            __m128i const acc_vec     = xacc[i];
+            __m128i const shifted     = _mm_srli_epi64    (acc_vec, 47);
+            __m128i const data_vec    = _mm_xor_si128     (acc_vec, shifted);
+            /* xacc[i] ^= xsecret[i]; */
+            __m128i const key_vec     = _mm_loadu_si128   (xsecret+i);
+            __m128i const data_key    = _mm_xor_si128     (data_vec, key_vec);
+
+            /* xacc[i] *= XXH_PRIME32_1; */
+            __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+            __m128i const prod_lo     = _mm_mul_epu32     (data_key, prime32);
+            __m128i const prod_hi     = _mm_mul_epu32     (data_key_hi, prime32);
+            xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32));
+        }
+    }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
+{
+    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
+    (void)(&XXH_writeLE64);
+    {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i);
+
+#       if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
+        // MSVC 32bit mode does not support _mm_set_epi64x before 2015
+        XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, (xxh_i64)(0U - seed64) };
+        __m128i const seed = _mm_load_si128((__m128i const*)seed64x2);
+#       else
+        __m128i const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64);
+#       endif
+        int i;
+
+        XXH_ALIGN(64)        const float* const src  = (float const*) XXH3_kSecret;
+        XXH_ALIGN(XXH_SEC_ALIGN) __m128i*       dest = (__m128i*) customSecret;
+#       if defined(__GNUC__) || defined(__clang__)
+        /*
+         * On GCC & Clang, marking 'dest' as modified will cause the compiler:
+         *   - do not extract the secret from sse registers in the internal loop
+         *   - use less common registers, and avoid pushing these reg into stack
+         */
+        XXH_COMPILER_GUARD(dest);
+#       endif
+
+        for (i=0; i < nbRounds; ++i) {
+            dest[i] = _mm_add_epi64(_mm_castps_si128(_mm_load_ps(src+i*4)), seed);
+    }   }
+}
+
+#endif
+
+#if (XXH_VECTOR == XXH_NEON)
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
+                    const void* XXH_RESTRICT input,
+                    const void* XXH_RESTRICT secret)
+{
+    XXH_ASSERT((((size_t)acc) & 15) == 0);
+    {
+        XXH_ALIGN(16) uint64x2_t* const xacc = (uint64x2_t *) acc;
+        /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
+        uint8_t const* const xinput = (const uint8_t *) input;
+        uint8_t const* const xsecret  = (const uint8_t *) secret;
+
+        size_t i;
+        for (i=0; i < XXH_STRIPE_LEN / sizeof(uint64x2_t); i++) {
+            /* data_vec = xinput[i]; */
+            uint8x16_t data_vec    = vld1q_u8(xinput  + (i * 16));
+            /* key_vec  = xsecret[i];  */
+            uint8x16_t key_vec     = vld1q_u8(xsecret + (i * 16));
+            uint64x2_t data_key;
+            uint32x2_t data_key_lo, data_key_hi;
+            /* xacc[i] += swap(data_vec); */
+            uint64x2_t const data64  = vreinterpretq_u64_u8(data_vec);
+            uint64x2_t const swapped = vextq_u64(data64, data64, 1);
+            xacc[i] = vaddq_u64 (xacc[i], swapped);
+            /* data_key = data_vec ^ key_vec; */
+            data_key = vreinterpretq_u64_u8(veorq_u8(data_vec, key_vec));
+            /* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF);
+             * data_key_hi = (uint32x2_t) (data_key >> 32);
+             * data_key = UNDEFINED; */
+            XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
+            /* xacc[i] += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
+            xacc[i] = vmlal_u32 (xacc[i], data_key_lo, data_key_hi);
+
+        }
+    }
+}
+
+XXH_FORCE_INLINE void
+XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+    XXH_ASSERT((((size_t)acc) & 15) == 0);
+
+    {   uint64x2_t* xacc       = (uint64x2_t*) acc;
+        uint8_t const* xsecret = (uint8_t const*) secret;
+        uint32x2_t prime       = vdup_n_u32 (XXH_PRIME32_1);
+
+        size_t i;
+        for (i=0; i < XXH_STRIPE_LEN/sizeof(uint64x2_t); i++) {
+            /* xacc[i] ^= (xacc[i] >> 47); */
+            uint64x2_t acc_vec  = xacc[i];
+            uint64x2_t shifted  = vshrq_n_u64 (acc_vec, 47);
+            uint64x2_t data_vec = veorq_u64   (acc_vec, shifted);
+
+            /* xacc[i] ^= xsecret[i]; */
+            uint8x16_t key_vec  = vld1q_u8(xsecret + (i * 16));
+            uint64x2_t data_key = veorq_u64(data_vec, vreinterpretq_u64_u8(key_vec));
+
+            /* xacc[i] *= XXH_PRIME32_1 */
+            uint32x2_t data_key_lo, data_key_hi;
+            /* data_key_lo = (uint32x2_t) (xacc[i] & 0xFFFFFFFF);
+             * data_key_hi = (uint32x2_t) (xacc[i] >> 32);
+             * xacc[i] = UNDEFINED; */
+            XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
+            {   /*
+                 * prod_hi = (data_key >> 32) * XXH_PRIME32_1;
+                 *
+                 * Avoid vmul_u32 + vshll_n_u32 since Clang 6 and 7 will
+                 * incorrectly "optimize" this:
+                 *   tmp     = vmul_u32(vmovn_u64(a), vmovn_u64(b));
+                 *   shifted = vshll_n_u32(tmp, 32);
+                 * to this:
+                 *   tmp     = "vmulq_u64"(a, b); // no such thing!
+                 *   shifted = vshlq_n_u64(tmp, 32);
+                 *
+                 * However, unlike SSE, Clang lacks a 64-bit multiply routine
+                 * for NEON, and it scalarizes two 64-bit multiplies instead.
+                 *
+                 * vmull_u32 has the same timing as vmul_u32, and it avoids
+                 * this bug completely.
+                 * See https://bugs.llvm.org/show_bug.cgi?id=39967
+                 */
+                uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
+                /* xacc[i] = prod_hi << 32; */
+                xacc[i] = vshlq_n_u64(prod_hi, 32);
+                /* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */
+                xacc[i] = vmlal_u32(xacc[i], data_key_lo, prime);
+            }
+    }   }
+}
+
+#endif
+
+#if (XXH_VECTOR == XXH_VSX)
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_512_vsx(  void* XXH_RESTRICT acc,
+                    const void* XXH_RESTRICT input,
+                    const void* XXH_RESTRICT secret)
+{
+          xxh_u64x2* const xacc     =       (xxh_u64x2*) acc;    /* presumed aligned */
+    xxh_u64x2 const* const xinput   = (xxh_u64x2 const*) input;   /* no alignment restriction */
+    xxh_u64x2 const* const xsecret  = (xxh_u64x2 const*) secret;    /* no alignment restriction */
+    xxh_u64x2 const v32 = { 32, 32 };
+    size_t i;
+    for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
+        /* data_vec = xinput[i]; */
+        xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + i);
+        /* key_vec = xsecret[i]; */
+        xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + i);
+        xxh_u64x2 const data_key = data_vec ^ key_vec;
+        /* shuffled = (data_key << 32) | (data_key >> 32); */
+        xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
+        /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
+        xxh_u64x2 const product  = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
+        xacc[i] += product;
+
+        /* swap high and low halves */
+#ifdef __s390x__
+        xacc[i] += vec_permi(data_vec, data_vec, 2);
+#else
+        xacc[i] += vec_xxpermdi(data_vec, data_vec, 2);
+#endif
+    }
+}
+
+XXH_FORCE_INLINE void
+XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+    XXH_ASSERT((((size_t)acc) & 15) == 0);
+
+    {         xxh_u64x2* const xacc    =       (xxh_u64x2*) acc;
+        const xxh_u64x2* const xsecret = (const xxh_u64x2*) secret;
+        /* constants */
+        xxh_u64x2 const v32  = { 32, 32 };
+        xxh_u64x2 const v47 = { 47, 47 };
+        xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 };
+        size_t i;
+        for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
+            /* xacc[i] ^= (xacc[i] >> 47); */
+            xxh_u64x2 const acc_vec  = xacc[i];
+            xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47);
+
+            /* xacc[i] ^= xsecret[i]; */
+            xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + i);
+            xxh_u64x2 const data_key = data_vec ^ key_vec;
+
+            /* xacc[i] *= XXH_PRIME32_1 */
+            /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF);  */
+            xxh_u64x2 const prod_even  = XXH_vec_mule((xxh_u32x4)data_key, prime);
+            /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32);  */
+            xxh_u64x2 const prod_odd  = XXH_vec_mulo((xxh_u32x4)data_key, prime);
+            xacc[i] = prod_odd + (prod_even << v32);
+    }   }
+}
+
+#endif
+
+/* scalar variants - universal */
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
+                     const void* XXH_RESTRICT input,
+                     const void* XXH_RESTRICT secret)
+{
+    XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
+    const xxh_u8* const xinput  = (const xxh_u8*) input;  /* no alignment restriction */
+    const xxh_u8* const xsecret = (const xxh_u8*) secret;   /* no alignment restriction */
+    size_t i;
+    XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
+    for (i=0; i < XXH_ACC_NB; i++) {
+        xxh_u64 const data_val = XXH_readLE64(xinput + 8*i);
+        xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + i*8);
+        xacc[i ^ 1] += data_val; /* swap adjacent lanes */
+        xacc[i] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
+    }
+}
+
+XXH_FORCE_INLINE void
+XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+    XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc;   /* presumed aligned */
+    const xxh_u8* const xsecret = (const xxh_u8*) secret;   /* no alignment restriction */
+    size_t i;
+    XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);
+    for (i=0; i < XXH_ACC_NB; i++) {
+        xxh_u64 const key64 = XXH_readLE64(xsecret + 8*i);
+        xxh_u64 acc64 = xacc[i];
+        acc64 = XXH_xorshift64(acc64, 47);
+        acc64 ^= key64;
+        acc64 *= XXH_PRIME32_1;
+        xacc[i] = acc64;
+    }
+}
+
+XXH_FORCE_INLINE void
+XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
+{
+    /*
+     * We need a separate pointer for the hack below,
+     * which requires a non-const pointer.
+     * Any decent compiler will optimize this out otherwise.
+     */
+    const xxh_u8* kSecretPtr = XXH3_kSecret;
+    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
+
+#if defined(__clang__) && defined(__aarch64__)
+    /*
+     * UGLY HACK:
+     * Clang generates a bunch of MOV/MOVK pairs for aarch64, and they are
+     * placed sequentially, in order, at the top of the unrolled loop.
+     *
+     * While MOVK is great for generating constants (2 cycles for a 64-bit
+     * constant compared to 4 cycles for LDR), long MOVK chains stall the
+     * integer pipelines:
+     *   I   L   S
+     * MOVK
+     * MOVK
+     * MOVK
+     * MOVK
+     * ADD
+     * SUB      STR
+     *          STR
+     * By forcing loads from memory (as the asm line causes Clang to assume
+     * that XXH3_kSecretPtr has been changed), the pipelines are used more
+     * efficiently:
+     *   I   L   S
+     *      LDR
+     *  ADD LDR
+     *  SUB     STR
+     *          STR
+     * XXH3_64bits_withSeed, len == 256, Snapdragon 835
+     *   without hack: 2654.4 MB/s
+     *   with hack:    3202.9 MB/s
+     */
+    XXH_COMPILER_GUARD(kSecretPtr);
+#endif
+    /*
+     * Note: in debug mode, this overrides the asm optimization
+     * and Clang will emit MOVK chains again.
+     */
+    XXH_ASSERT(kSecretPtr == XXH3_kSecret);
+
+    {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
+        int i;
+        for (i=0; i < nbRounds; i++) {
+            /*
+             * The asm hack causes Clang to assume that kSecretPtr aliases with
+             * customSecret, and on aarch64, this prevented LDP from merging two
+             * loads together for free. Putting the loads together before the stores
+             * properly generates LDP.
+             */
+            xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*i)     + seed64;
+            xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*i + 8) - seed64;
+            XXH_writeLE64((xxh_u8*)customSecret + 16*i,     lo);
+            XXH_writeLE64((xxh_u8*)customSecret + 16*i + 8, hi);
+    }   }
+}
+
+
+typedef void (*XXH3_f_accumulate_512)(void* XXH_RESTRICT, const void*, const void*);
+typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*);
+typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
+
+
+#if (XXH_VECTOR == XXH_AVX512)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_avx512
+#define XXH3_scrambleAcc    XXH3_scrambleAcc_avx512
+#define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
+
+#elif (XXH_VECTOR == XXH_AVX2)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_avx2
+#define XXH3_scrambleAcc    XXH3_scrambleAcc_avx2
+#define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
+
+#elif (XXH_VECTOR == XXH_SSE2)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_sse2
+#define XXH3_scrambleAcc    XXH3_scrambleAcc_sse2
+#define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
+
+#elif (XXH_VECTOR == XXH_NEON)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_neon
+#define XXH3_scrambleAcc    XXH3_scrambleAcc_neon
+#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
+#elif (XXH_VECTOR == XXH_VSX)
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_vsx
+#define XXH3_scrambleAcc    XXH3_scrambleAcc_vsx
+#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
+#else /* scalar */
+
+#define XXH3_accumulate_512 XXH3_accumulate_512_scalar
+#define XXH3_scrambleAcc    XXH3_scrambleAcc_scalar
+#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
+#endif
+
+
+
+#ifndef XXH_PREFETCH_DIST
+#  ifdef __clang__
+#    define XXH_PREFETCH_DIST 320
+#  else
+#    if (XXH_VECTOR == XXH_AVX512)
+#      define XXH_PREFETCH_DIST 512
+#    else
+#      define XXH_PREFETCH_DIST 384
+#    endif
+#  endif  /* __clang__ */
+#endif  /* XXH_PREFETCH_DIST */
+
+/*
+ * XXH3_accumulate()
+ * Loops over XXH3_accumulate_512().
+ * Assumption: nbStripes will not overflow the secret size
+ */
+XXH_FORCE_INLINE void
+XXH3_accumulate(     xxh_u64* XXH_RESTRICT acc,
+                const xxh_u8* XXH_RESTRICT input,
+                const xxh_u8* XXH_RESTRICT secret,
+                      size_t nbStripes,
+                      XXH3_f_accumulate_512 f_acc512)
+{
+    size_t n;
+    for (n = 0; n < nbStripes; n++ ) {
+        const xxh_u8* const in = input + n*XXH_STRIPE_LEN;
+        XXH_PREFETCH(in + XXH_PREFETCH_DIST);
+        f_acc512(acc,
+                 in,
+                 secret + n*XXH_SECRET_CONSUME_RATE);
+    }
+}
+
+XXH_FORCE_INLINE void
+XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
+                      const xxh_u8* XXH_RESTRICT input, size_t len,
+                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+                            XXH3_f_accumulate_512 f_acc512,
+                            XXH3_f_scrambleAcc f_scramble)
+{
+    size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
+    size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock;
+    size_t const nb_blocks = (len - 1) / block_len;
+
+    size_t n;
+
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+
+    for (n = 0; n < nb_blocks; n++) {
+        XXH3_accumulate(acc, input + n*block_len, secret, nbStripesPerBlock, f_acc512);
+        f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);
+    }
+
+    /* last partial block */
+    XXH_ASSERT(len > XXH_STRIPE_LEN);
+    {   size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
+        XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
+        XXH3_accumulate(acc, input + nb_blocks*block_len, secret, nbStripes, f_acc512);
+
+        /* last stripe */
+        {   const xxh_u8* const p = input + len - XXH_STRIPE_LEN;
+#define XXH_SECRET_LASTACC_START 7  /* not aligned on 8, last secret is different from acc & scrambler */
+            f_acc512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
+    }   }
+}
+
+XXH_FORCE_INLINE xxh_u64
+XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret)
+{
+    return XXH3_mul128_fold64(
+               acc[0] ^ XXH_readLE64(secret),
+               acc[1] ^ XXH_readLE64(secret+8) );
+}
+
+static XXH64_hash_t
+XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start)
+{
+    xxh_u64 result64 = start;
+    size_t i = 0;
+
+    for (i = 0; i < 4; i++) {
+        result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i);
+#if defined(__clang__)                                /* Clang */ \
+    && (defined(__arm__) || defined(__thumb__))       /* ARMv7 */ \
+    && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */  \
+    && !defined(XXH_ENABLE_AUTOVECTORIZE)             /* Define to disable */
+        /*
+         * UGLY HACK:
+         * Prevent autovectorization on Clang ARMv7-a. Exact same problem as
+         * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b.
+         * XXH3_64bits, len == 256, Snapdragon 835:
+         *   without hack: 2063.7 MB/s
+         *   with hack:    2560.7 MB/s
+         */
+        XXH_COMPILER_GUARD(result64);
+#endif
+    }
+
+    return XXH3_avalanche(result64);
+}
+
+#define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \
+                        XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 }
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
+                           const void* XXH_RESTRICT secret, size_t secretSize,
+                           XXH3_f_accumulate_512 f_acc512,
+                           XXH3_f_scrambleAcc f_scramble)
+{
+    XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
+
+    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc512, f_scramble);
+
+    /* converge into final hash */
+    XXH_STATIC_ASSERT(sizeof(acc) == 64);
+    /* do not align on 8, so that the secret is different from the accumulator */
+#define XXH_SECRET_MERGEACCS_START 11
+    XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
+    return XXH3_mergeAccs(acc, (const xxh_u8*)secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * XXH_PRIME64_1);
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ */
+XXH_NO_INLINE XXH64_hash_t
+XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
+                             XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
+{
+    (void)seed64;
+    return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate_512, XXH3_scrambleAcc);
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ * Since the function is not inlined, the compiler may not be able to understand that,
+ * in some scenarios, its `secret` argument is actually a compile time constant.
+ * This variant enforces that the compiler can detect that,
+ * and uses this opportunity to streamline the generated code for better performance.
+ */
+XXH_NO_INLINE XXH64_hash_t
+XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
+                          XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
+{
+    (void)seed64; (void)secret; (void)secretLen;
+    return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512, XXH3_scrambleAcc);
+}
+
+/*
+ * XXH3_hashLong_64b_withSeed():
+ * Generate a custom key based on alteration of default XXH3_kSecret with the seed,
+ * and then use this key for long mode hashing.
+ *
+ * This operation is decently fast but nonetheless costs a little bit of time.
+ * Try to avoid it whenever possible (typically when seed==0).
+ *
+ * It's important for performance that XXH3_hashLong is not inlined. Not sure
+ * why (uop cache maybe?), but the difference is large and easily measurable.
+ */
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
+                                    XXH64_hash_t seed,
+                                    XXH3_f_accumulate_512 f_acc512,
+                                    XXH3_f_scrambleAcc f_scramble,
+                                    XXH3_f_initCustomSecret f_initSec)
+{
+    if (seed == 0)
+        return XXH3_hashLong_64b_internal(input, len,
+                                          XXH3_kSecret, sizeof(XXH3_kSecret),
+                                          f_acc512, f_scramble);
+    {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
+        f_initSec(secret, seed);
+        return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),
+                                          f_acc512, f_scramble);
+    }
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ */
+XXH_NO_INLINE XXH64_hash_t
+XXH3_hashLong_64b_withSeed(const void* input, size_t len,
+                           XXH64_hash_t seed, const xxh_u8* secret, size_t secretLen)
+{
+    (void)secret; (void)secretLen;
+    return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
+                XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret);
+}
+
+
+typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t,
+                                          XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t);
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,
+                     XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
+                     XXH3_hashLong64_f f_hashLong)
+{
+    XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
+    /*
+     * If an action is to be taken if `secretLen` condition is not respected,
+     * it should be done here.
+     * For now, it's a contract pre-condition.
+     * Adding a check and a branch here would cost performance at every hash.
+     * Also, note that function signature doesn't offer room to return an error.
+     */
+    if (len <= 16)
+        return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
+    if (len <= 128)
+        return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
+    if (len <= XXH3_MIDSIZE_MAX)
+        return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
+    return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen);
+}
+
+
+/* ===   Public entry point   === */
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* input, size_t len)
+{
+    return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH64_hash_t
+XXH3_64bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
+{
+    return XXH3_64bits_internal(input, len, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH64_hash_t
+XXH3_64bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
+{
+    return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
+}
+
+
+/* ===   XXH3 streaming   === */
+
+/*
+ * Malloc's a pointer that is always aligned to align.
+ *
+ * This must be freed with `XXH_alignedFree()`.
+ *
+ * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte
+ * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2
+ * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON.
+ *
+ * This underalignment previously caused a rather obvious crash which went
+ * completely unnoticed due to XXH3_createState() not actually being tested.
+ * Credit to RedSpah for noticing this bug.
+ *
+ * The alignment is done manually: Functions like posix_memalign or _mm_malloc
+ * are avoided: To maintain portability, we would have to write a fallback
+ * like this anyways, and besides, testing for the existence of library
+ * functions without relying on external build tools is impossible.
+ *
+ * The method is simple: Overallocate, manually align, and store the offset
+ * to the original behind the returned pointer.
+ *
+ * Align must be a power of 2 and 8 <= align <= 128.
+ */
+static void* XXH_alignedMalloc(size_t s, size_t align)
+{
+    XXH_ASSERT(align <= 128 && align >= 8); /* range check */
+    XXH_ASSERT((align & (align-1)) == 0);   /* power of 2 */
+    XXH_ASSERT(s != 0 && s < (s + align));  /* empty/overflow */
+    {   /* Overallocate to make room for manual realignment and an offset byte */
+        xxh_u8* base = (xxh_u8*)XXH_malloc(s + align);
+        if (base != NULL) {
+            /*
+             * Get the offset needed to align this pointer.
+             *
+             * Even if the returned pointer is aligned, there will always be
+             * at least one byte to store the offset to the original pointer.
+             */
+            size_t offset = align - ((size_t)base & (align - 1)); /* base % align */
+            /* Add the offset for the now-aligned pointer */
+            xxh_u8* ptr = base + offset;
+
+            XXH_ASSERT((size_t)ptr % align == 0);
+
+            /* Store the offset immediately before the returned pointer. */
+            ptr[-1] = (xxh_u8)offset;
+            return ptr;
+        }
+        return NULL;
+    }
+}
+/*
+ * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass
+ * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout.
+ */
+static void XXH_alignedFree(void* p)
+{
+    if (p != NULL) {
+        xxh_u8* ptr = (xxh_u8*)p;
+        /* Get the offset byte we added in XXH_malloc. */
+        xxh_u8 offset = ptr[-1];
+        /* Free the original malloc'd pointer */
+        xxh_u8* base = ptr - offset;
+        XXH_free(base);
+    }
+}
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
+{
+    XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);
+    if (state==NULL) return NULL;
+    XXH3_INITSTATE(state);
+    return state;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
+{
+    XXH_alignedFree(statePtr);
+    return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API void
+XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state)
+{
+    memcpy(dst_state, src_state, sizeof(*dst_state));
+}
+
+static void
+XXH3_reset_internal(XXH3_state_t* statePtr,
+                           XXH64_hash_t seed,
+                           const void* secret, size_t secretSize)
+{
+    size_t const initStart = offsetof(XXH3_state_t, bufferedSize);
+    size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;
+    XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart);
+    XXH_ASSERT(statePtr != NULL);
+    /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */
+    memset((char*)statePtr + initStart, 0, initLength);
+    statePtr->acc[0] = XXH_PRIME32_3;
+    statePtr->acc[1] = XXH_PRIME64_1;
+    statePtr->acc[2] = XXH_PRIME64_2;
+    statePtr->acc[3] = XXH_PRIME64_3;
+    statePtr->acc[4] = XXH_PRIME64_4;
+    statePtr->acc[5] = XXH_PRIME32_2;
+    statePtr->acc[6] = XXH_PRIME64_5;
+    statePtr->acc[7] = XXH_PRIME32_1;
+    statePtr->seed = seed;
+    statePtr->extSecret = (const unsigned char*)secret;
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+    statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
+    statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset(XXH3_state_t* statePtr)
+{
+    if (statePtr == NULL) return XXH_ERROR;
+    XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
+    return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
+{
+    if (statePtr == NULL) return XXH_ERROR;
+    XXH3_reset_internal(statePtr, 0, secret, secretSize);
+    if (secret == NULL) return XXH_ERROR;
+    if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
+    return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
+{
+    if (statePtr == NULL) return XXH_ERROR;
+    if (seed==0) return XXH3_64bits_reset(statePtr);
+    if (seed != statePtr->seed) XXH3_initCustomSecret(statePtr->customSecret, seed);
+    XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
+    return XXH_OK;
+}
+
+/* Note : when XXH3_consumeStripes() is invoked,
+ * there must be a guarantee that at least one more byte must be consumed from input
+ * so that the function can blindly consume all stripes using the "normal" secret segment */
+XXH_FORCE_INLINE void
+XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
+                    size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
+                    const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
+                    const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
+                    XXH3_f_accumulate_512 f_acc512,
+                    XXH3_f_scrambleAcc f_scramble)
+{
+    XXH_ASSERT(nbStripes <= nbStripesPerBlock);  /* can handle max 1 scramble per invocation */
+    XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock);
+    if (nbStripesPerBlock - *nbStripesSoFarPtr <= nbStripes) {
+        /* need a scrambling operation */
+        size_t const nbStripesToEndofBlock = nbStripesPerBlock - *nbStripesSoFarPtr;
+        size_t const nbStripesAfterBlock = nbStripes - nbStripesToEndofBlock;
+        XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripesToEndofBlock, f_acc512);
+        f_scramble(acc, secret + secretLimit);
+        XXH3_accumulate(acc, input + nbStripesToEndofBlock * XXH_STRIPE_LEN, secret, nbStripesAfterBlock, f_acc512);
+        *nbStripesSoFarPtr = nbStripesAfterBlock;
+    } else {
+        XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, f_acc512);
+        *nbStripesSoFarPtr += nbStripes;
+    }
+}
+
+/*
+ * Both XXH3_64bits_update and XXH3_128bits_update use this routine.
+ */
+XXH_FORCE_INLINE XXH_errorcode
+XXH3_update(XXH3_state_t* state,
+            const xxh_u8* input, size_t len,
+            XXH3_f_accumulate_512 f_acc512,
+            XXH3_f_scrambleAcc f_scramble)
+{
+    if (input==NULL)
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+        return XXH_OK;
+#else
+        return XXH_ERROR;
+#endif
+
+    {   const xxh_u8* const bEnd = input + len;
+        const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
+
+        state->totalLen += len;
+        XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
+
+        if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) {  /* fill in tmp buffer */
+            XXH_memcpy(state->buffer + state->bufferedSize, input, len);
+            state->bufferedSize += (XXH32_hash_t)len;
+            return XXH_OK;
+        }
+        /* total input is now > XXH3_INTERNALBUFFER_SIZE */
+
+        #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
+        XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0);   /* clean multiple */
+
+        /*
+         * Internal buffer is partially filled (always, except at beginning)
+         * Complete it, then consume it.
+         */
+        if (state->bufferedSize) {
+            size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
+            XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
+            input += loadSize;
+            XXH3_consumeStripes(state->acc,
+                               &state->nbStripesSoFar, state->nbStripesPerBlock,
+                                state->buffer, XXH3_INTERNALBUFFER_STRIPES,
+                                secret, state->secretLimit,
+                                f_acc512, f_scramble);
+            state->bufferedSize = 0;
+        }
+        XXH_ASSERT(input < bEnd);
+
+        /* Consume input by a multiple of internal buffer size */
+        if (input+XXH3_INTERNALBUFFER_SIZE < bEnd) {
+            const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
+            do {
+                XXH3_consumeStripes(state->acc,
+                                   &state->nbStripesSoFar, state->nbStripesPerBlock,
+                                    input, XXH3_INTERNALBUFFER_STRIPES,
+                                    secret, state->secretLimit,
+                                    f_acc512, f_scramble);
+                input += XXH3_INTERNALBUFFER_SIZE;
+            } while (input<limit);
+            /* for last partial stripe */
+            memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
+        }
+        XXH_ASSERT(input < bEnd);
+
+        /* Some remaining input (always) : buffer it */
+        XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));
+        state->bufferedSize = (XXH32_hash_t)(bEnd-input);
+    }
+
+    return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_update(XXH3_state_t* state, const void* input, size_t len)
+{
+    return XXH3_update(state, (const xxh_u8*)input, len,
+                       XXH3_accumulate_512, XXH3_scrambleAcc);
+}
+
+
+XXH_FORCE_INLINE void
+XXH3_digest_long (XXH64_hash_t* acc,
+                  const XXH3_state_t* state,
+                  const unsigned char* secret)
+{
+    /*
+     * Digest on a local copy. This way, the state remains unaltered, and it can
+     * continue ingesting more input afterwards.
+     */
+    memcpy(acc, state->acc, sizeof(state->acc));
+    if (state->bufferedSize >= XXH_STRIPE_LEN) {
+        size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
+        size_t nbStripesSoFar = state->nbStripesSoFar;
+        XXH3_consumeStripes(acc,
+                           &nbStripesSoFar, state->nbStripesPerBlock,
+                            state->buffer, nbStripes,
+                            secret, state->secretLimit,
+                            XXH3_accumulate_512, XXH3_scrambleAcc);
+        /* last stripe */
+        XXH3_accumulate_512(acc,
+                            state->buffer + state->bufferedSize - XXH_STRIPE_LEN,
+                            secret + state->secretLimit - XXH_SECRET_LASTACC_START);
+    } else {  /* bufferedSize < XXH_STRIPE_LEN */
+        xxh_u8 lastStripe[XXH_STRIPE_LEN];
+        size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
+        XXH_ASSERT(state->bufferedSize > 0);  /* there is always some input buffered */
+        memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
+        memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
+        XXH3_accumulate_512(acc,
+                            lastStripe,
+                            secret + state->secretLimit - XXH_SECRET_LASTACC_START);
+    }
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
+{
+    const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
+    if (state->totalLen > XXH3_MIDSIZE_MAX) {
+        XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
+        XXH3_digest_long(acc, state, secret);
+        return XXH3_mergeAccs(acc,
+                              secret + XXH_SECRET_MERGEACCS_START,
+                              (xxh_u64)state->totalLen * XXH_PRIME64_1);
+    }
+    /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
+    if (state->seed)
+        return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
+    return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
+                                  secret, state->secretLimit + XXH_STRIPE_LEN);
+}
+
+
+#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API void
+XXH3_generateSecret(void* secretBuffer, const void* customSeed, size_t customSeedSize)
+{
+    XXH_ASSERT(secretBuffer != NULL);
+    if (customSeedSize == 0) {
+        memcpy(secretBuffer, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
+        return;
+    }
+    XXH_ASSERT(customSeed != NULL);
+
+    {   size_t const segmentSize = sizeof(XXH128_hash_t);
+        size_t const nbSegments = XXH_SECRET_DEFAULT_SIZE / segmentSize;
+        XXH128_canonical_t scrambler;
+        XXH64_hash_t seeds[12];
+        size_t segnb;
+        XXH_ASSERT(nbSegments == 12);
+        XXH_ASSERT(segmentSize * nbSegments == XXH_SECRET_DEFAULT_SIZE); /* exact multiple */
+        XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
+
+        /*
+        * Copy customSeed to seeds[], truncating or repeating as necessary.
+        */
+        {   size_t toFill = XXH_MIN(customSeedSize, sizeof(seeds));
+            size_t filled = toFill;
+            memcpy(seeds, customSeed, toFill);
+            while (filled < sizeof(seeds)) {
+                toFill = XXH_MIN(filled, sizeof(seeds) - filled);
+                memcpy((char*)seeds + filled, seeds, toFill);
+                filled += toFill;
+        }   }
+
+        /* generate secret */
+        memcpy(secretBuffer, &scrambler, sizeof(scrambler));
+        for (segnb=1; segnb < nbSegments; segnb++) {
+            size_t const segmentStart = segnb * segmentSize;
+            XXH128_canonical_t segment;
+            XXH128_canonicalFromHash(&segment,
+                XXH128(&scrambler, sizeof(scrambler), XXH_readLE64(seeds + segnb) + segnb) );
+            memcpy((char*)secretBuffer + segmentStart, &segment, sizeof(segment));
+    }   }
+}
+
+
+/* ==========================================
+ * XXH3 128 bits (a.k.a XXH128)
+ * ==========================================
+ * XXH3's 128-bit variant has better mixing and strength than the 64-bit variant,
+ * even without counting the significantly larger output size.
+ *
+ * For example, extra steps are taken to avoid the seed-dependent collisions
+ * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
+ *
+ * This strength naturally comes at the cost of some speed, especially on short
+ * lengths. Note that longer hashes are about as fast as the 64-bit version
+ * due to it using only a slight modification of the 64-bit loop.
+ *
+ * XXH128 is also more oriented towards 64-bit machines. It is still extremely
+ * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
+ */
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    /* A doubled version of 1to3_64b with different constants. */
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(1 <= len && len <= 3);
+    XXH_ASSERT(secret != NULL);
+    /*
+     * len = 1: combinedl = { input[0], 0x01, input[0], input[0] }
+     * len = 2: combinedl = { input[1], 0x02, input[0], input[1] }
+     * len = 3: combinedl = { input[2], 0x03, input[0], input[1] }
+     */
+    {   xxh_u8 const c1 = input[0];
+        xxh_u8 const c2 = input[len >> 1];
+        xxh_u8 const c3 = input[len - 1];
+        xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24)
+                                | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8);
+        xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13);
+        xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
+        xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed;
+        xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl;
+        xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph;
+        XXH128_hash_t h128;
+        h128.low64  = XXH64_avalanche(keyed_lo);
+        h128.high64 = XXH64_avalanche(keyed_hi);
+        return h128;
+    }
+}
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(secret != NULL);
+    XXH_ASSERT(4 <= len && len <= 8);
+    seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
+    {   xxh_u32 const input_lo = XXH_readLE32(input);
+        xxh_u32 const input_hi = XXH_readLE32(input + len - 4);
+        xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32);
+        xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed;
+        xxh_u64 const keyed = input_64 ^ bitflip;
+
+        /* Shift len to the left to ensure it is even, this avoids even multiplies. */
+        XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2));
+
+        m128.high64 += (m128.low64 << 1);
+        m128.low64  ^= (m128.high64 >> 3);
+
+        m128.low64   = XXH_xorshift64(m128.low64, 35);
+        m128.low64  *= 0x9FB21C651E98DF25ULL;
+        m128.low64   = XXH_xorshift64(m128.low64, 28);
+        m128.high64  = XXH3_avalanche(m128.high64);
+        return m128;
+    }
+}
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(secret != NULL);
+    XXH_ASSERT(9 <= len && len <= 16);
+    {   xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed;
+        xxh_u64 const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed;
+        xxh_u64 const input_lo = XXH_readLE64(input);
+        xxh_u64       input_hi = XXH_readLE64(input + len - 8);
+        XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1);
+        /*
+         * Put len in the middle of m128 to ensure that the length gets mixed to
+         * both the low and high bits in the 128x64 multiply below.
+         */
+        m128.low64 += (xxh_u64)(len - 1) << 54;
+        input_hi   ^= bitfliph;
+        /*
+         * Add the high 32 bits of input_hi to the high 32 bits of m128, then
+         * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to
+         * the high 64 bits of m128.
+         *
+         * The best approach to this operation is different on 32-bit and 64-bit.
+         */
+        if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */
+            /*
+             * 32-bit optimized version, which is more readable.
+             *
+             * On 32-bit, it removes an ADC and delays a dependency between the two
+             * halves of m128.high64, but it generates an extra mask on 64-bit.
+             */
+            m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2);
+        } else {
+            /*
+             * 64-bit optimized (albeit more confusing) version.
+             *
+             * Uses some properties of addition and multiplication to remove the mask:
+             *
+             * Let:
+             *    a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF)
+             *    b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000)
+             *    c = XXH_PRIME32_2
+             *
+             *    a + (b * c)
+             * Inverse Property: x + y - x == y
+             *    a + (b * (1 + c - 1))
+             * Distributive Property: x * (y + z) == (x * y) + (x * z)
+             *    a + (b * 1) + (b * (c - 1))
+             * Identity Property: x * 1 == x
+             *    a + b + (b * (c - 1))
+             *
+             * Substitute a, b, and c:
+             *    input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
+             *
+             * Since input_hi.hi + input_hi.lo == input_hi, we get this:
+             *    input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
+             */
+            m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1);
+        }
+        /* m128 ^= XXH_swap64(m128 >> 64); */
+        m128.low64  ^= XXH_swap64(m128.high64);
+
+        {   /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */
+            XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2);
+            h128.high64 += m128.high64 * XXH_PRIME64_2;
+
+            h128.low64   = XXH3_avalanche(h128.low64);
+            h128.high64  = XXH3_avalanche(h128.high64);
+            return h128;
+    }   }
+}
+
+/*
+ * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
+ */
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
+{
+    XXH_ASSERT(len <= 16);
+    {   if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed);
+        if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed);
+        if (len) return XXH3_len_1to3_128b(input, len, secret, seed);
+        {   XXH128_hash_t h128;
+            xxh_u64 const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72);
+            xxh_u64 const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88);
+            h128.low64 = XXH64_avalanche(seed ^ bitflipl);
+            h128.high64 = XXH64_avalanche( seed ^ bitfliph);
+            return h128;
+    }   }
+}
+
+/*
+ * A bit slower than XXH3_mix16B, but handles multiply by zero better.
+ */
+XXH_FORCE_INLINE XXH128_hash_t
+XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2,
+              const xxh_u8* secret, XXH64_hash_t seed)
+{
+    acc.low64  += XXH3_mix16B (input_1, secret+0, seed);
+    acc.low64  ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8);
+    acc.high64 += XXH3_mix16B (input_2, secret+16, seed);
+    acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8);
+    return acc;
+}
+
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
+                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+                      XXH64_hash_t seed)
+{
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
+    XXH_ASSERT(16 < len && len <= 128);
+
+    {   XXH128_hash_t acc;
+        acc.low64 = len * XXH_PRIME64_1;
+        acc.high64 = 0;
+        if (len > 32) {
+            if (len > 64) {
+                if (len > 96) {
+                    acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed);
+                }
+                acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed);
+            }
+            acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);
+        }
+        acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);
+        {   XXH128_hash_t h128;
+            h128.low64  = acc.low64 + acc.high64;
+            h128.high64 = (acc.low64    * XXH_PRIME64_1)
+                        + (acc.high64   * XXH_PRIME64_4)
+                        + ((len - seed) * XXH_PRIME64_2);
+            h128.low64  = XXH3_avalanche(h128.low64);
+            h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
+            return h128;
+        }
+    }
+}
+
+XXH_NO_INLINE XXH128_hash_t
+XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
+                       const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+                       XXH64_hash_t seed)
+{
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
+    XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
+
+    {   XXH128_hash_t acc;
+        int const nbRounds = (int)len / 32;
+        int i;
+        acc.low64 = len * XXH_PRIME64_1;
+        acc.high64 = 0;
+        for (i=0; i<4; i++) {
+            acc = XXH128_mix32B(acc,
+                                input  + (32 * i),
+                                input  + (32 * i) + 16,
+                                secret + (32 * i),
+                                seed);
+        }
+        acc.low64 = XXH3_avalanche(acc.low64);
+        acc.high64 = XXH3_avalanche(acc.high64);
+        XXH_ASSERT(nbRounds >= 4);
+        for (i=4 ; i < nbRounds; i++) {
+            acc = XXH128_mix32B(acc,
+                                input + (32 * i),
+                                input + (32 * i) + 16,
+                                secret + XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)),
+                                seed);
+        }
+        /* last bytes */
+        acc = XXH128_mix32B(acc,
+                            input + len - 16,
+                            input + len - 32,
+                            secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
+                            0ULL - seed);
+
+        {   XXH128_hash_t h128;
+            h128.low64  = acc.low64 + acc.high64;
+            h128.high64 = (acc.low64    * XXH_PRIME64_1)
+                        + (acc.high64   * XXH_PRIME64_4)
+                        + ((len - seed) * XXH_PRIME64_2);
+            h128.low64  = XXH3_avalanche(h128.low64);
+            h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
+            return h128;
+        }
+    }
+}
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
+                            const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
+                            XXH3_f_accumulate_512 f_acc512,
+                            XXH3_f_scrambleAcc f_scramble)
+{
+    XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
+
+    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc512, f_scramble);
+
+    /* converge into final hash */
+    XXH_STATIC_ASSERT(sizeof(acc) == 64);
+    XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
+    {   XXH128_hash_t h128;
+        h128.low64  = XXH3_mergeAccs(acc,
+                                     secret + XXH_SECRET_MERGEACCS_START,
+                                     (xxh_u64)len * XXH_PRIME64_1);
+        h128.high64 = XXH3_mergeAccs(acc,
+                                     secret + secretSize
+                                            - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
+                                     ~((xxh_u64)len * XXH_PRIME64_2));
+        return h128;
+    }
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ */
+XXH_NO_INLINE XXH128_hash_t
+XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
+                           XXH64_hash_t seed64,
+                           const void* XXH_RESTRICT secret, size_t secretLen)
+{
+    (void)seed64; (void)secret; (void)secretLen;
+    return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),
+                                       XXH3_accumulate_512, XXH3_scrambleAcc);
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ */
+XXH_NO_INLINE XXH128_hash_t
+XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
+                              XXH64_hash_t seed64,
+                              const void* XXH_RESTRICT secret, size_t secretLen)
+{
+    (void)seed64;
+    return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
+                                       XXH3_accumulate_512, XXH3_scrambleAcc);
+}
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len,
+                                XXH64_hash_t seed64,
+                                XXH3_f_accumulate_512 f_acc512,
+                                XXH3_f_scrambleAcc f_scramble,
+                                XXH3_f_initCustomSecret f_initSec)
+{
+    if (seed64 == 0)
+        return XXH3_hashLong_128b_internal(input, len,
+                                           XXH3_kSecret, sizeof(XXH3_kSecret),
+                                           f_acc512, f_scramble);
+    {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
+        f_initSec(secret, seed64);
+        return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret),
+                                           f_acc512, f_scramble);
+    }
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ */
+XXH_NO_INLINE XXH128_hash_t
+XXH3_hashLong_128b_withSeed(const void* input, size_t len,
+                            XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen)
+{
+    (void)secret; (void)secretLen;
+    return XXH3_hashLong_128b_withSeed_internal(input, len, seed64,
+                XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret);
+}
+
+typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t,
+                                            XXH64_hash_t, const void* XXH_RESTRICT, size_t);
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_128bits_internal(const void* input, size_t len,
+                      XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
+                      XXH3_hashLong128_f f_hl128)
+{
+    XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
+    /*
+     * If an action is to be taken if `secret` conditions are not respected,
+     * it should be done here.
+     * For now, it's a contract pre-condition.
+     * Adding a check and a branch here would cost performance at every hash.
+     */
+    if (len <= 16)
+        return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
+    if (len <= 128)
+        return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
+    if (len <= XXH3_MIDSIZE_MAX)
+        return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
+    return f_hl128(input, len, seed64, secret, secretLen);
+}
+
+
+/* ===   Public XXH128 API   === */
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* input, size_t len)
+{
+    return XXH3_128bits_internal(input, len, 0,
+                                 XXH3_kSecret, sizeof(XXH3_kSecret),
+                                 XXH3_hashLong_128b_default);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH3_128bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
+{
+    return XXH3_128bits_internal(input, len, 0,
+                                 (const xxh_u8*)secret, secretSize,
+                                 XXH3_hashLong_128b_withSecret);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH3_128bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
+{
+    return XXH3_128bits_internal(input, len, seed,
+                                 XXH3_kSecret, sizeof(XXH3_kSecret),
+                                 XXH3_hashLong_128b_withSeed);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH128(const void* input, size_t len, XXH64_hash_t seed)
+{
+    return XXH3_128bits_withSeed(input, len, seed);
+}
+
+
+/* ===   XXH3 128-bit streaming   === */
+
+/*
+ * All the functions are actually the same as for 64-bit streaming variant.
+ * The only difference is the finalization routine.
+ */
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset(XXH3_state_t* statePtr)
+{
+    if (statePtr == NULL) return XXH_ERROR;
+    XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
+    return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
+{
+    if (statePtr == NULL) return XXH_ERROR;
+    XXH3_reset_internal(statePtr, 0, secret, secretSize);
+    if (secret == NULL) return XXH_ERROR;
+    if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
+    return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
+{
+    if (statePtr == NULL) return XXH_ERROR;
+    if (seed==0) return XXH3_128bits_reset(statePtr);
+    if (seed != statePtr->seed) XXH3_initCustomSecret(statePtr->customSecret, seed);
+    XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
+    return XXH_OK;
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len)
+{
+    return XXH3_update(state, (const xxh_u8*)input, len,
+                       XXH3_accumulate_512, XXH3_scrambleAcc);
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
+{
+    const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
+    if (state->totalLen > XXH3_MIDSIZE_MAX) {
+        XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
+        XXH3_digest_long(acc, state, secret);
+        XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
+        {   XXH128_hash_t h128;
+            h128.low64  = XXH3_mergeAccs(acc,
+                                         secret + XXH_SECRET_MERGEACCS_START,
+                                         (xxh_u64)state->totalLen * XXH_PRIME64_1);
+            h128.high64 = XXH3_mergeAccs(acc,
+                                         secret + state->secretLimit + XXH_STRIPE_LEN
+                                                - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
+                                         ~((xxh_u64)state->totalLen * XXH_PRIME64_2));
+            return h128;
+        }
+    }
+    /* len <= XXH3_MIDSIZE_MAX : short code */
+    if (state->seed)
+        return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
+    return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
+                                   secret, state->secretLimit + XXH_STRIPE_LEN);
+}
+
+/* 128-bit utility functions */
+
+#include <string.h>   /* memcmp, memcpy */
+
+/* return : 1 is equal, 0 if different */
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
+{
+    /* note : XXH128_hash_t is compact, it has no padding byte */
+    return !(memcmp(&h1, &h2, sizeof(h1)));
+}
+
+/* This prototype is compatible with stdlib's qsort().
+ * return : >0 if *h128_1  > *h128_2
+ *          <0 if *h128_1  < *h128_2
+ *          =0 if *h128_1 == *h128_2  */
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2)
+{
+    XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
+    XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;
+    int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64);
+    /* note : bets that, in most cases, hash values are different */
+    if (hcmp) return hcmp;
+    return (h1.low64 > h2.low64) - (h2.low64 > h1.low64);
+}
+
+
+/*======   Canonical representation   ======*/
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API void
+XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) {
+        hash.high64 = XXH_swap64(hash.high64);
+        hash.low64  = XXH_swap64(hash.low64);
+    }
+    memcpy(dst, &hash.high64, sizeof(hash.high64));
+    memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
+}
+
+/*! @ingroup xxh3_family */
+XXH_PUBLIC_API XXH128_hash_t
+XXH128_hashFromCanonical(const XXH128_canonical_t* src)
+{
+    XXH128_hash_t h;
+    h.high64 = XXH_readBE64(src);
+    h.low64  = XXH_readBE64(src->digest + 8);
+    return h;
+}
+
+/* Pop our optimization override from above */
+#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
+  && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
+  && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
+#  pragma GCC pop_options
+#endif
+
+#endif  /* XXH_NO_LONG_LONG */
+
+#endif  /* XXH_NO_XXH3 */
+
+/*!
+ * @}
+ */
+#endif  /* XXH_IMPLEMENTATION */
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/src/rocksdb/util/xxph3.h b/src/rocksdb/util/xxph3.h
new file mode 100644
index 000000000..968000c3a
--- /dev/null
+++ b/src/rocksdb/util/xxph3.h
@@ -0,0 +1,1764 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+/*
+   xxHash - Extremely Fast Hash algorithm
+   Header File
+   Copyright (C) 2012-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+// This is a fork of a preview version of xxHash, as RocksDB depends on
+// this preview version of XXH3. To allow this to coexist with the
+// standard xxHash, including in the "unity" build where all source files
+// and headers go into a single translation unit, here "XXH" has been
+// replaced with "XXPH" for XX Preview Hash.
+
+#ifndef XXPHASH_H_5627135585666179
+#define XXPHASH_H_5627135585666179 1
+
+/* BEGIN RocksDB customizations */
+#ifndef XXPH_STATIC_LINKING_ONLY
+// Access experimental APIs
+#define XXPH_STATIC_LINKING_ONLY 1
+#endif
+#define XXPH_NAMESPACE ROCKSDB_
+#define XXPH_INLINE_ALL
+#include <cstring>
+/* END RocksDB customizations */
+
+// clang-format off
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************
+*  Definitions
+******************************/
+#include <stddef.h>   /* size_t */
+typedef enum { XXPH_OK=0, XXPH_ERROR } XXPH_errorcode;
+
+
+/* ****************************
+ *  API modifier
+ ******************************/
+/** XXPH_INLINE_ALL (and XXPH_PRIVATE_API)
+ *  This build macro includes xxhash functions in `static` mode
+ *  in order to inline them, and remove their symbol from the public list.
+ *  Inlining offers great performance improvement on small keys,
+ *  and dramatic ones when length is expressed as a compile-time constant.
+ *  See https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html .
+ *  Methodology :
+ *     #define XXPH_INLINE_ALL
+ *     #include "xxhash.h"
+ * `xxhash.c` is automatically included.
+ *  It's not useful to compile and link it as a separate object.
+ */
+#if defined(XXPH_INLINE_ALL) || defined(XXPH_PRIVATE_API)
+#  ifndef XXPH_STATIC_LINKING_ONLY
+#    define XXPH_STATIC_LINKING_ONLY
+#  endif
+#  if defined(__GNUC__)
+#    define XXPH_PUBLIC_API static __inline __attribute__((unused))
+#  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#    define XXPH_PUBLIC_API static inline
+#  elif defined(_MSC_VER)
+#    define XXPH_PUBLIC_API static __inline
+#  else
+     /* this version may generate warnings for unused static functions */
+#    define XXPH_PUBLIC_API static
+#  endif
+#else
+#  if defined(WIN32) && defined(_MSC_VER) && (defined(XXPH_IMPORT) || defined(XXPH_EXPORT))
+#    ifdef XXPH_EXPORT
+#      define XXPH_PUBLIC_API __declspec(dllexport)
+#    elif XXPH_IMPORT
+#      define XXPH_PUBLIC_API __declspec(dllimport)
+#    endif
+#  else
+#    define XXPH_PUBLIC_API   /* do nothing */
+#  endif
+#endif /* XXPH_INLINE_ALL || XXPH_PRIVATE_API */
+
+/*! XXPH_NAMESPACE, aka Namespace Emulation :
+ *
+ * If you want to include _and expose_ xxHash functions from within your own library,
+ * but also want to avoid symbol collisions with other libraries which may also include xxHash,
+ *
+ * you can use XXPH_NAMESPACE, to automatically prefix any public symbol from xxhash library
+ * with the value of XXPH_NAMESPACE (therefore, avoid NULL and numeric values).
+ *
+ * Note that no change is required within the calling program as long as it includes `xxhash.h` :
+ * regular symbol name will be automatically translated by this header.
+ */
+#ifdef XXPH_NAMESPACE
+#  define XXPH_CAT(A,B) A##B
+#  define XXPH_NAME2(A,B) XXPH_CAT(A,B)
+#  define XXPH_versionNumber XXPH_NAME2(XXPH_NAMESPACE, XXPH_versionNumber)
+#endif
+
+
+/* *************************************
+*  Version
+***************************************/
+#define XXPH_VERSION_MAJOR    0
+#define XXPH_VERSION_MINOR    7
+#define XXPH_VERSION_RELEASE  2
+#define XXPH_VERSION_NUMBER  (XXPH_VERSION_MAJOR *100*100 + XXPH_VERSION_MINOR *100 + XXPH_VERSION_RELEASE)
+XXPH_PUBLIC_API unsigned XXPH_versionNumber (void);
+
+
+/*-**********************************************************************
+*  32-bit hash
+************************************************************************/
+#if !defined (__VMS) \
+  && (defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+#   include <stdint.h>
+    typedef uint32_t XXPH32_hash_t;
+#else
+#   include <limits.h>
+#   if UINT_MAX == 0xFFFFFFFFUL
+      typedef unsigned int XXPH32_hash_t;
+#   else
+#     if ULONG_MAX == 0xFFFFFFFFUL
+        typedef unsigned long XXPH32_hash_t;
+#     else
+#       error "unsupported platform : need a 32-bit type"
+#     endif
+#   endif
+#endif
+
+#ifndef XXPH_NO_LONG_LONG
+/*-**********************************************************************
+*  64-bit hash
+************************************************************************/
+#if !defined (__VMS) \
+  && (defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+#   include <stdint.h>
+    typedef uint64_t XXPH64_hash_t;
+#else
+    /* the following type must have a width of 64-bit */
+    typedef unsigned long long XXPH64_hash_t;
+#endif
+
+#endif  /* XXPH_NO_LONG_LONG */
+
+
+
+#ifdef XXPH_STATIC_LINKING_ONLY
+
+/* ================================================================================================
+   This section contains declarations which are not guaranteed to remain stable.
+   They may change in future versions, becoming incompatible with a different version of the library.
+   These declarations should only be used with static linking.
+   Never use them in association with dynamic linking !
+=================================================================================================== */
+
+
+/*-**********************************************************************
+*  XXPH3
+*  New experimental hash
+************************************************************************/
+#ifndef XXPH_NO_LONG_LONG
+
+
+/* ============================================
+ * XXPH3 is a new hash algorithm,
+ * featuring improved speed performance for both small and large inputs.
+ * See full speed analysis at : http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html
+ * In general, expect XXPH3 to run about ~2x faster on large inputs,
+ * and >3x faster on small ones, though exact differences depend on platform.
+ *
+ * The algorithm is portable, will generate the same hash on all platforms.
+ * It benefits greatly from vectorization units, but does not require it.
+ *
+ * XXPH3 offers 2 variants, _64bits and _128bits.
+ * When only 64 bits are needed, prefer calling the _64bits variant :
+ * it reduces the amount of mixing, resulting in faster speed on small inputs.
+ * It's also generally simpler to manipulate a scalar return type than a struct.
+ *
+ * The XXPH3 algorithm is still considered experimental.
+ * Produced results can still change between versions.
+ * Results produced by v0.7.x are not comparable with results from v0.7.y .
+ * It's nonetheless possible to use XXPH3 for ephemeral data (local sessions),
+ * but avoid storing values in long-term storage for later reads.
+ *
+ * The API supports one-shot hashing, streaming mode, and custom secrets.
+ *
+ * There are still a number of opened questions that community can influence during the experimental period.
+ * I'm trying to list a few of them below, though don't consider this list as complete.
+ *
+ * - 128-bits output type : currently defined as a structure of two 64-bits fields.
+ *                          That's because 128-bit values do not exist in C standard.
+ *                          Note that it means that, at byte level, result is not identical depending on endianess.
+ *                          However, at field level, they are identical on all platforms.
+ *                          The canonical representation solves the issue of identical byte-level representation across platforms,
+ *                          which is necessary for serialization.
+ *                          Q1 : Would there be a better representation for a 128-bit hash result ?
+ *                          Q2 : Are the names of the inner 64-bit fields important ? Should they be changed ?
+ *
+ * - Prototype XXPH128() :   XXPH128() uses the same arguments as XXPH64(), for consistency.
+ *                          It means it maps to XXPH3_128bits_withSeed().
+ *                          This variant is slightly slower than XXPH3_128bits(),
+ *                          because the seed is now part of the algorithm, and can't be simplified.
+ *                          Is that a good idea ?
+ *
+ * - Seed type for XXPH128() : currently, it's a single 64-bit value, like the 64-bit variant.
+ *                          It could be argued that it's more logical to offer a 128-bit seed input parameter for a 128-bit hash.
+ *                          But 128-bit seed is more difficult to use, since it requires to pass a structure instead of a scalar value.
+ *                          Such a variant could either replace current one, or become an additional one.
+ *                          Farmhash, for example, offers both variants (the 128-bits seed variant is called `doubleSeed`).
+ *                          Follow up question : if both 64-bit and 128-bit seeds are allowed, which variant should be called XXPH128 ?
+ *
+ * - Result for len==0 :    Currently, the result of hashing a zero-length input is always `0`.
+ *                          It seems okay as a return value when using "default" secret and seed.
+ *                          But is it still fine to return `0` when secret or seed are non-default ?
+ *                          Are there use cases which could depend on generating a different hash result for zero-length input when the secret is different ?
+ *
+ * - Consistency (1) :      Streaming XXPH128 uses an XXPH3 state, which is the same state as XXPH3_64bits().
+ *                          It means a 128bit streaming loop must invoke the following symbols :
+ *                          XXPH3_createState(), XXPH3_128bits_reset(), XXPH3_128bits_update() (loop), XXPH3_128bits_digest(), XXPH3_freeState().
+ *                          Is that consistent enough ?
+ *
+ * - Consistency (2) :      The canonical representation of `XXPH3_64bits` is provided by existing functions
+ *                          XXPH64_canonicalFromHash(), and reverse operation XXPH64_hashFromCanonical().
+ *                          As a mirror, canonical functions for XXPH128_hash_t results generated by `XXPH3_128bits`
+ *                          are XXPH128_canonicalFromHash() and XXPH128_hashFromCanonical().
+ *                          Which means, `XXPH3` doesn't appear in the names, because canonical functions operate on a type,
+ *                          independently of which algorithm was used to generate that type.
+ *                          Is that consistent enough ?
+ */
+
+#ifdef XXPH_NAMESPACE
+#  define XXPH3_64bits XXPH_NAME2(XXPH_NAMESPACE, XXPH3_64bits)
+#  define XXPH3_64bits_withSecret XXPH_NAME2(XXPH_NAMESPACE, XXPH3_64bits_withSecret)
+#  define XXPH3_64bits_withSeed XXPH_NAME2(XXPH_NAMESPACE, XXPH3_64bits_withSeed)
+#endif
+
+/* XXPH3_64bits() :
+ * default 64-bit variant, using default secret and default seed of 0.
+ * It's the fastest variant. */
+XXPH_PUBLIC_API XXPH64_hash_t XXPH3_64bits(const void* data, size_t len);
+
+/* XXPH3_64bits_withSecret() :
+ * It's possible to provide any blob of bytes as a "secret" to generate the hash.
+ * This makes it more difficult for an external actor to prepare an intentional collision.
+ * The secret *must* be large enough (>= XXPH3_SECRET_SIZE_MIN).
+ * It should consist of random bytes.
+ * Avoid repeating same character, or sequences of bytes,
+ * and especially avoid swathes of \0.
+ * Failure to respect these conditions will result in a poor quality hash.
+ */
+#define XXPH3_SECRET_SIZE_MIN 136
+XXPH_PUBLIC_API XXPH64_hash_t XXPH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
+
+/* XXPH3_64bits_withSeed() :
+ * This variant generates on the fly a custom secret,
+ * based on the default secret, altered using the `seed` value.
+ * While this operation is decently fast, note that it's not completely free.
+ * note : seed==0 produces same results as XXPH3_64bits() */
+XXPH_PUBLIC_API XXPH64_hash_t XXPH3_64bits_withSeed(const void* data, size_t len, XXPH64_hash_t seed);
+
+#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)   /* C11+ */
+#  include <stdalign.h>
+#  define XXPH_ALIGN(n)      alignas(n)
+#elif defined(__GNUC__)
+#  define XXPH_ALIGN(n)      __attribute__ ((aligned(n)))
+#elif defined(_MSC_VER)
+#  define XXPH_ALIGN(n)      __declspec(align(n))
+#else
+#  define XXPH_ALIGN(n)   /* disabled */
+#endif
+
+#define XXPH3_SECRET_DEFAULT_SIZE 192   /* minimum XXPH3_SECRET_SIZE_MIN */
+
+#endif  /* XXPH_NO_LONG_LONG */
+
+
+/*-**********************************************************************
+*  XXPH_INLINE_ALL
+************************************************************************/
+#if defined(XXPH_INLINE_ALL) || defined(XXPH_PRIVATE_API)
+
+/* === RocksDB modification: was #include here but permanently inlining === */
+
+typedef struct {
+    XXPH64_hash_t low64;
+    XXPH64_hash_t high64;
+} XXPH128_hash_t;
+
+/* *************************************
+*  Tuning parameters
+***************************************/
+/*!XXPH_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
+ *            It can generate buggy code on targets which do not support unaligned memory accesses.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://stackoverflow.com/a/32095106/646947 for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef XXPH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
+#  if !defined(__clang__) && defined(__GNUC__) && defined(__ARM_FEATURE_UNALIGNED) && defined(__ARM_ARCH) && (__ARM_ARCH == 6)
+#    define XXPH_FORCE_MEMORY_ACCESS 2
+#  elif !defined(__clang__) && ((defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
+  (defined(__GNUC__) && (defined(__ARM_ARCH) && __ARM_ARCH >= 7)))
+#    define XXPH_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+/*!XXPH_ACCEPT_NULL_INPUT_POINTER :
+ * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault.
+ * When this macro is enabled, xxHash actively checks input for null pointer.
+ * It it is, result for null input pointers is the same as a null-length input.
+ */
+#ifndef XXPH_ACCEPT_NULL_INPUT_POINTER   /* can be defined externally */
+#  define XXPH_ACCEPT_NULL_INPUT_POINTER 0
+#endif
+
+/*!XXPH_FORCE_ALIGN_CHECK :
+ * This is a minor performance trick, only useful with lots of very small keys.
+ * It means : check for aligned/unaligned input.
+ * The check costs one initial branch per hash;
+ * set it to 0 when the input is guaranteed to be aligned,
+ * or when alignment doesn't matter for performance.
+ */
+#ifndef XXPH_FORCE_ALIGN_CHECK /* can be defined externally */
+#  if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+#    define XXPH_FORCE_ALIGN_CHECK 0
+#  else
+#    define XXPH_FORCE_ALIGN_CHECK 1
+#  endif
+#endif
+
+/*!XXPH_REROLL:
+ * Whether to reroll XXPH32_finalize, and XXPH64_finalize,
+ * instead of using an unrolled jump table/if statement loop.
+ *
+ * This is automatically defined on -Os/-Oz on GCC and Clang. */
+#ifndef XXPH_REROLL
+#  if defined(__OPTIMIZE_SIZE__)
+#    define XXPH_REROLL 1
+#  else
+#    define XXPH_REROLL 0
+#  endif
+#endif
+
+#include <limits.h>   /* ULLONG_MAX */
+
+#ifndef XXPH_STATIC_LINKING_ONLY
+#define XXPH_STATIC_LINKING_ONLY
+#endif
+
+/* BEGIN RocksDB customizations */
+#include "port/lang.h" /* for FALLTHROUGH_INTENDED, inserted as appropriate */
+/* END RocksDB customizations */
+
+/* *************************************
+*  Compiler Specific Options
+***************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
+#  define XXPH_FORCE_INLINE static __forceinline
+#  define XXPH_NO_INLINE static __declspec(noinline)
+#else
+#  if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define XXPH_FORCE_INLINE static inline __attribute__((always_inline))
+#      define XXPH_NO_INLINE static __attribute__((noinline))
+#    else
+#      define XXPH_FORCE_INLINE static inline
+#      define XXPH_NO_INLINE static
+#    endif
+#  else
+#    define XXPH_FORCE_INLINE static
+#    define XXPH_NO_INLINE static
+#  endif /* __STDC_VERSION__ */
+#endif
+
+
+
+/* *************************************
+*  Debug
+***************************************/
+/* DEBUGLEVEL is expected to be defined externally,
+ * typically through compiler command line.
+ * Value must be a number. */
+#ifndef DEBUGLEVEL
+#  define DEBUGLEVEL 0
+#endif
+
+#if (DEBUGLEVEL>=1)
+#  include <assert.h>   /* note : can still be disabled with NDEBUG */
+#  define XXPH_ASSERT(c)   assert(c)
+#else
+#  define XXPH_ASSERT(c)   ((void)0)
+#endif
+
+/* note : use after variable declarations */
+#define XXPH_STATIC_ASSERT(c)  { enum { XXPH_sa = 1/(int)(!!(c)) }; }
+
+
+/* *************************************
+*  Basic Types
+***************************************/
+#if !defined (__VMS) \
+ && (defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+  typedef uint8_t  xxh_u8;
+#else
+  typedef unsigned char      xxh_u8;
+#endif
+typedef XXPH32_hash_t xxh_u32;
+
+
+/* ===   Memory access   === */
+
+#if (defined(XXPH_FORCE_MEMORY_ACCESS) && (XXPH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static xxh_u32 XXPH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
+
+#elif (defined(XXPH_FORCE_MEMORY_ACCESS) && (XXPH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
+static xxh_u32 XXPH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+static xxh_u32 XXPH_read32(const void* memPtr)
+{
+    xxh_u32 val;
+    memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+#endif   /* XXPH_FORCE_DIRECT_MEMORY_ACCESS */
+
+
+/* ===   Endianess   === */
+
+/* XXPH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
+#ifndef XXPH_CPU_LITTLE_ENDIAN
+#  if defined(_WIN32) /* Windows is always little endian */ \
+     || defined(__LITTLE_ENDIAN__) \
+     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#    define XXPH_CPU_LITTLE_ENDIAN 1
+#  elif defined(__BIG_ENDIAN__) \
+     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#    define XXPH_CPU_LITTLE_ENDIAN 0
+#  else
+static int XXPH_isLittleEndian(void)
+{
+    const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+#   define XXPH_CPU_LITTLE_ENDIAN   XXPH_isLittleEndian()
+#  endif
+#endif
+
+
+
+
+/* ****************************************
+*  Compiler-specific Functions and Macros
+******************************************/
+#define XXPH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+#ifndef __has_builtin
+#  define __has_builtin(x) 0
+#endif
+
+#if !defined(NO_CLANG_BUILTIN) && __has_builtin(__builtin_rotateleft32) && __has_builtin(__builtin_rotateleft64)
+#  define XXPH_rotl32 __builtin_rotateleft32
+#  define XXPH_rotl64 __builtin_rotateleft64
+/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
+#elif defined(_MSC_VER)
+#  define XXPH_rotl32(x,r) _rotl(x,r)
+#  define XXPH_rotl64(x,r) _rotl64(x,r)
+#else
+#  define XXPH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
+#  define XXPH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
+#endif
+
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXPH_swap32 _byteswap_ulong
+#elif XXPH_GCC_VERSION >= 403
+#  define XXPH_swap32 __builtin_bswap32
+#else
+static xxh_u32 XXPH_swap32 (xxh_u32 x)
+{
+    return  ((x << 24) & 0xff000000 ) |
+            ((x <<  8) & 0x00ff0000 ) |
+            ((x >>  8) & 0x0000ff00 ) |
+            ((x >> 24) & 0x000000ff );
+}
+#endif
+
+
+/* ***************************
+*  Memory reads
+*****************************/
+typedef enum { XXPH_aligned, XXPH_unaligned } XXPH_alignment;
+
+XXPH_FORCE_INLINE xxh_u32 XXPH_readLE32(const void* ptr)
+{
+    return XXPH_CPU_LITTLE_ENDIAN ? XXPH_read32(ptr) : XXPH_swap32(XXPH_read32(ptr));
+}
+
+XXPH_FORCE_INLINE xxh_u32
+XXPH_readLE32_align(const void* ptr, XXPH_alignment align)
+{
+    if (align==XXPH_unaligned) {
+        return XXPH_readLE32(ptr);
+    } else {
+        return XXPH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXPH_swap32(*(const xxh_u32*)ptr);
+    }
+}
+
+
+/* *************************************
+*  Misc
+***************************************/
+XXPH_PUBLIC_API unsigned XXPH_versionNumber (void) { return XXPH_VERSION_NUMBER; }
+
+
+static const xxh_u32 PRIME32_1 = 0x9E3779B1U;   /* 0b10011110001101110111100110110001 */
+static const xxh_u32 PRIME32_2 = 0x85EBCA77U;   /* 0b10000101111010111100101001110111 */
+static const xxh_u32 PRIME32_3 = 0xC2B2AE3DU;   /* 0b11000010101100101010111000111101 */
+static const xxh_u32 PRIME32_4 = 0x27D4EB2FU;   /* 0b00100111110101001110101100101111 */
+static const xxh_u32 PRIME32_5 = 0x165667B1U;   /* 0b00010110010101100110011110110001 */
+
+#ifndef XXPH_NO_LONG_LONG
+
+/* *******************************************************************
+*  64-bit hash functions
+*********************************************************************/
+
+/*======   Memory access   ======*/
+
+typedef XXPH64_hash_t xxh_u64;
+
+#if (defined(XXPH_FORCE_MEMORY_ACCESS) && (XXPH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static xxh_u64 XXPH_read64(const void* memPtr) { return *(const xxh_u64*) memPtr; }
+
+#elif (defined(XXPH_FORCE_MEMORY_ACCESS) && (XXPH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
+static xxh_u64 XXPH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+
+static xxh_u64 XXPH_read64(const void* memPtr)
+{
+    xxh_u64 val;
+    memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+#endif   /* XXPH_FORCE_DIRECT_MEMORY_ACCESS */
+
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXPH_swap64 _byteswap_uint64
+#elif XXPH_GCC_VERSION >= 403
+#  define XXPH_swap64 __builtin_bswap64
+#else
+static xxh_u64 XXPH_swap64 (xxh_u64 x)
+{
+    return  ((x << 56) & 0xff00000000000000ULL) |
+            ((x << 40) & 0x00ff000000000000ULL) |
+            ((x << 24) & 0x0000ff0000000000ULL) |
+            ((x << 8)  & 0x000000ff00000000ULL) |
+            ((x >> 8)  & 0x00000000ff000000ULL) |
+            ((x >> 24) & 0x0000000000ff0000ULL) |
+            ((x >> 40) & 0x000000000000ff00ULL) |
+            ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+XXPH_FORCE_INLINE xxh_u64 XXPH_readLE64(const void* ptr)
+{
+    return XXPH_CPU_LITTLE_ENDIAN ? XXPH_read64(ptr) : XXPH_swap64(XXPH_read64(ptr));
+}
+
+XXPH_FORCE_INLINE xxh_u64
+XXPH_readLE64_align(const void* ptr, XXPH_alignment align)
+{
+    if (align==XXPH_unaligned)
+        return XXPH_readLE64(ptr);
+    else
+        return XXPH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXPH_swap64(*(const xxh_u64*)ptr);
+}
+
+
+/*======   xxh64   ======*/
+
+static const xxh_u64 PRIME64_1 = 0x9E3779B185EBCA87ULL;   /* 0b1001111000110111011110011011000110000101111010111100101010000111 */
+static const xxh_u64 PRIME64_2 = 0xC2B2AE3D27D4EB4FULL;   /* 0b1100001010110010101011100011110100100111110101001110101101001111 */
+static const xxh_u64 PRIME64_3 = 0x165667B19E3779F9ULL;   /* 0b0001011001010110011001111011000110011110001101110111100111111001 */
+static const xxh_u64 PRIME64_4 = 0x85EBCA77C2B2AE63ULL;   /* 0b1000010111101011110010100111011111000010101100101010111001100011 */
+static const xxh_u64 PRIME64_5 = 0x27D4EB2F165667C5ULL;   /* 0b0010011111010100111010110010111100010110010101100110011111000101 */
+
+
+/* *********************************************************************
+*  XXPH3
+*  New generation hash designed for speed on small keys and vectorization
+************************************************************************ */
+
+/*======== Was #include "xxh3.h", now inlined below ==========*/
+
+/*
+   xxHash - Extremely Fast Hash algorithm
+   Development source file for `xxh3`
+   Copyright (C) 2019-present, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+/* RocksDB Note: This file contains a preview release (xxhash repository
+   version 0.7.2) of XXPH3 that is unlikely to be compatible with the final
+   version of XXPH3. We have therefore renamed this XXPH3 ("preview"), for
+   clarity so that we can continue to use this version even after
+   integrating a newer incompatible version.
+*/
+
+/* ===   Dependencies   === */
+
+#undef XXPH_INLINE_ALL   /* in case it's already defined */
+#define XXPH_INLINE_ALL
+
+
+/* ===   Compiler specifics   === */
+
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* >= C99 */
+#  define XXPH_RESTRICT   restrict
+#else
+/* note : it might be useful to define __restrict or __restrict__ for some C++ compilers */
+#  define XXPH_RESTRICT   /* disable */
+#endif
+
+#if defined(__GNUC__)
+#  if defined(__AVX2__)
+#    include <immintrin.h>
+#  elif defined(__SSE2__)
+#    include <emmintrin.h>
+#  elif defined(__ARM_NEON__) || defined(__ARM_NEON)
+#    define inline __inline__  /* clang bug */
+#    include <arm_neon.h>
+#    undef inline
+#  endif
+#elif defined(_MSC_VER)
+#  include <intrin.h>
+#endif
+
+/*
+ * Sanity check.
+ *
+ * XXPH3 only requires these features to be efficient:
+ *
+ *  - Usable unaligned access
+ *  - A 32-bit or 64-bit ALU
+ *      - If 32-bit, a decent ADC instruction
+ *  - A 32 or 64-bit multiply with a 64-bit result
+ *
+ * Almost all 32-bit and 64-bit targets meet this, except for Thumb-1, the
+ * classic 16-bit only subset of ARM's instruction set.
+ *
+ * First of all, Thumb-1 lacks support for the UMULL instruction which
+ * performs the important long multiply. This means numerous __aeabi_lmul
+ * calls.
+ *
+ * Second of all, the 8 functional registers are just not enough.
+ * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need
+ * Lo registers, and this shuffling results in thousands more MOVs than A32.
+ *
+ * A32 and T32 don't have this limitation. They can access all 14 registers,
+ * do a 32->64 multiply with UMULL, and the flexible operand is helpful too.
+ *
+ * If compiling Thumb-1 for a target which supports ARM instructions, we
+ * will give a warning.
+ *
+ * Usually, if this happens, it is because of an accident and you probably
+ * need to specify -march, as you probably meant to compileh for a newer
+ * architecture.
+ */
+#if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM)
+#   warning "XXPH3 is highly inefficient without ARM or Thumb-2."
+#endif
+
+/* ==========================================
+ * Vectorization detection
+ * ========================================== */
+#define XXPH_SCALAR 0
+#define XXPH_SSE2   1
+#define XXPH_AVX2   2
+#define XXPH_NEON   3
+#define XXPH_VSX    4
+
+#ifndef XXPH_VECTOR    /* can be defined on command line */
+#  if defined(__AVX2__)
+#    define XXPH_VECTOR XXPH_AVX2
+#  elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
+#    define XXPH_VECTOR XXPH_SSE2
+#  elif defined(__GNUC__) /* msvc support maybe later */ \
+  && (defined(__ARM_NEON__) || defined(__ARM_NEON)) \
+  && (defined(__LITTLE_ENDIAN__) /* We only support little endian NEON */ \
+    || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
+#    define XXPH_VECTOR XXPH_NEON
+#  elif defined(__PPC64__) && defined(__POWER8_VECTOR__) && defined(__GNUC__)
+#    define XXPH_VECTOR XXPH_VSX
+#  else
+#    define XXPH_VECTOR XXPH_SCALAR
+#  endif
+#endif
+
+/* control alignment of accumulator,
+ * for compatibility with fast vector loads */
+#ifndef XXPH_ACC_ALIGN
+#  if XXPH_VECTOR == 0   /* scalar */
+#     define XXPH_ACC_ALIGN 8
+#  elif XXPH_VECTOR == 1  /* sse2 */
+#     define XXPH_ACC_ALIGN 16
+#  elif XXPH_VECTOR == 2  /* avx2 */
+#     define XXPH_ACC_ALIGN 32
+#  elif XXPH_VECTOR == 3  /* neon */
+#     define XXPH_ACC_ALIGN 16
+#  elif XXPH_VECTOR == 4  /* vsx */
+#     define XXPH_ACC_ALIGN 16
+#  endif
+#endif
+
+/* xxh_u64 XXPH_mult32to64(xxh_u32 a, xxh_u64 b) { return (xxh_u64)a * (xxh_u64)b; } */
+#if defined(_MSC_VER) && defined(_M_IX86)
+#    include <intrin.h>
+#    define XXPH_mult32to64(x, y) __emulu(x, y)
+#else
+#    define XXPH_mult32to64(x, y) ((xxh_u64)((x) & 0xFFFFFFFF) * (xxh_u64)((y) & 0xFFFFFFFF))
+#endif
+
+/* VSX stuff. It's a lot because VSX support is mediocre across compilers and
+ * there is a lot of mischief with endianness. */
+#if XXPH_VECTOR == XXPH_VSX
+#  include <altivec.h>
+#  undef vector
+typedef __vector unsigned long long U64x2;
+typedef __vector unsigned char U8x16;
+typedef __vector unsigned U32x4;
+
+#ifndef XXPH_VSX_BE
+#  if defined(__BIG_ENDIAN__) \
+  || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#    define XXPH_VSX_BE 1
+#  elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
+#    warning "-maltivec=be is not recommended. Please use native endianness."
+#    define XXPH_VSX_BE 1
+#  else
+#    define XXPH_VSX_BE 0
+#  endif
+#endif
+
+/* We need some helpers for big endian mode. */
+#if XXPH_VSX_BE
+/* A wrapper for POWER9's vec_revb. */
+#  ifdef __POWER9_VECTOR__
+#    define XXPH_vec_revb vec_revb
+#  else
+XXPH_FORCE_INLINE U64x2 XXPH_vec_revb(U64x2 val)
+{
+    U8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
+                              0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
+    return vec_perm(val, val, vByteSwap);
+}
+#  endif
+
+/* Power8 Crypto gives us vpermxor which is very handy for
+ * PPC64EB.
+ *
+ * U8x16 vpermxor(U8x16 a, U8x16 b, U8x16 mask)
+ * {
+ *     U8x16 ret;
+ *     for (int i = 0; i < 16; i++) {
+ *         ret[i] = a[mask[i] & 0xF] ^ b[mask[i] >> 4];
+ *     }
+ *     return ret;
+ * }
+ *
+ * Because both of the main loops load the key, swap, and xor it with input,
+ * we can combine the key swap into this instruction.
+ */
+#  ifdef vec_permxor
+#    define XXPH_vec_permxor vec_permxor
+#  else
+#    define XXPH_vec_permxor __builtin_crypto_vpermxor
+#  endif
+#endif  /* XXPH_VSX_BE */
+/*
+ * Because we reinterpret the multiply, there are endian memes: vec_mulo actually becomes
+ * vec_mule.
+ *
+ * Additionally, the intrinsic wasn't added until GCC 8, despite existing for a while.
+ * Clang has an easy way to control this, we can just use the builtin which doesn't swap.
+ * GCC needs inline assembly. */
+#if __has_builtin(__builtin_altivec_vmuleuw)
+#  define XXPH_vec_mulo __builtin_altivec_vmulouw
+#  define XXPH_vec_mule __builtin_altivec_vmuleuw
+#else
+/* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */
+XXPH_FORCE_INLINE U64x2 XXPH_vec_mulo(U32x4 a, U32x4 b) {
+    U64x2 result;
+    __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
+    return result;
+}
+XXPH_FORCE_INLINE U64x2 XXPH_vec_mule(U32x4 a, U32x4 b) {
+    U64x2 result;
+    __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
+    return result;
+}
+#endif /* __has_builtin(__builtin_altivec_vmuleuw) */
+#endif /* XXPH_VECTOR == XXPH_VSX */
+
+/* prefetch
+ * can be disabled, by declaring XXPH_NO_PREFETCH build macro */
+#if defined(XXPH_NO_PREFETCH)
+#  define XXPH_PREFETCH(ptr)  (void)(ptr)  /* disabled */
+#else
+#if defined(_MSC_VER) && \
+    (defined(_M_X64) ||  \
+     defined(_M_IX86)) /* _mm_prefetch() is not defined outside of x86/x64 */
+#    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
+#    define XXPH_PREFETCH(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
+#  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
+#    define XXPH_PREFETCH(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+#  else
+#    define XXPH_PREFETCH(ptr) (void)(ptr)  /* disabled */
+#  endif
+#endif  /* XXPH_NO_PREFETCH */
+
+
+/* ==========================================
+ * XXPH3 default settings
+ * ========================================== */
+
+#define XXPH_SECRET_DEFAULT_SIZE 192   /* minimum XXPH3_SECRET_SIZE_MIN */
+
+#if (XXPH_SECRET_DEFAULT_SIZE < XXPH3_SECRET_SIZE_MIN)
+#  error "default keyset is not large enough"
+#endif
+
+XXPH_ALIGN(64) static const xxh_u8 kSecret[XXPH_SECRET_DEFAULT_SIZE] = {
+    0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
+    0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
+    0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
+    0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
+    0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
+    0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
+    0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
+    0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
+
+    0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
+    0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
+    0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
+    0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
+};
+
+/*
+ * GCC for x86 has a tendency to use SSE in this loop. While it
+ * successfully avoids swapping (as MUL overwrites EAX and EDX), it
+ * slows it down because instead of free register swap shifts, it
+ * must use pshufd and punpckl/hd.
+ *
+ * To prevent this, we use this attribute to shut off SSE.
+ */
+#if defined(__GNUC__) && !defined(__clang__) && defined(__i386__)
+__attribute__((__target__("no-sse")))
+#endif
+static XXPH128_hash_t
+XXPH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
+{
+    /*
+     * GCC/Clang __uint128_t method.
+     *
+     * On most 64-bit targets, GCC and Clang define a __uint128_t type.
+     * This is usually the best way as it usually uses a native long 64-bit
+     * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64.
+     *
+     * Usually.
+     *
+     * Despite being a 32-bit platform, Clang (and emscripten) define this
+     * type despite not having the arithmetic for it. This results in a
+     * laggy compiler builtin call which calculates a full 128-bit multiply.
+     * In that case it is best to use the portable one.
+     * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
+     */
+#if defined(__GNUC__) && !defined(__wasm__) \
+    && defined(__SIZEOF_INT128__) \
+    || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
+
+    __uint128_t product = (__uint128_t)lhs * (__uint128_t)rhs;
+    XXPH128_hash_t const r128 = { (xxh_u64)(product), (xxh_u64)(product >> 64) };
+    return r128;
+
+    /*
+     * MSVC for x64's _umul128 method.
+     *
+     * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct);
+     *
+     * This compiles to single operand MUL on x64.
+     */
+#elif defined(_M_X64) || defined(_M_IA64)
+
+#ifndef _MSC_VER
+#   pragma intrinsic(_umul128)
+#endif
+    xxh_u64 product_high;
+    xxh_u64 const product_low = _umul128(lhs, rhs, &product_high);
+    XXPH128_hash_t const r128 = { product_low, product_high };
+    return r128;
+
+#else
+    /*
+     * Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
+     *
+     * This is a fast and simple grade school multiply, which is shown
+     * below with base 10 arithmetic instead of base 0x100000000.
+     *
+     *           9 3 // D2 lhs = 93
+     *         x 7 5 // D2 rhs = 75
+     *     ----------
+     *           1 5 // D2 lo_lo = (93 % 10) * (75 % 10)
+     *         4 5 | // D2 hi_lo = (93 / 10) * (75 % 10)
+     *         2 1 | // D2 lo_hi = (93 % 10) * (75 / 10)
+     *     + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10)
+     *     ---------
+     *         2 7 | // D2 cross  = (15 / 10) + (45 % 10) + 21
+     *     + 6 7 | | // D2 upper  = (27 / 10) + (45 / 10) + 63
+     *     ---------
+     *       6 9 7 5
+     *
+     * The reasons for adding the products like this are:
+     *  1. It avoids manual carry tracking. Just like how
+     *     (9 * 9) + 9 + 9 = 99, the same applies with this for
+     *     UINT64_MAX. This avoids a lot of complexity.
+     *
+     *  2. It hints for, and on Clang, compiles to, the powerful UMAAL
+     *     instruction available in ARMv6+ A32/T32, which is shown below:
+     *
+     *         void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm)
+     *         {
+     *             xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm;
+     *             *RdLo = (xxh_u32)(product & 0xFFFFFFFF);
+     *             *RdHi = (xxh_u32)(product >> 32);
+     *         }
+     *
+     *     This instruction was designed for efficient long multiplication,
+     *     and allows this to be calculated in only 4 instructions which
+     *     is comparable to some 64-bit ALUs.
+     *
+     *  3. It isn't terrible on other platforms. Usually this will be
+     *     a couple of 32-bit ADD/ADCs.
+     */
+
+    /* First calculate all of the cross products. */
+    xxh_u64 const lo_lo = XXPH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF);
+    xxh_u64 const hi_lo = XXPH_mult32to64(lhs >> 32,        rhs & 0xFFFFFFFF);
+    xxh_u64 const lo_hi = XXPH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32);
+    xxh_u64 const hi_hi = XXPH_mult32to64(lhs >> 32,        rhs >> 32);
+
+    /* Now add the products together. These will never overflow. */
+    xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
+    xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32)        + hi_hi;
+    xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
+
+    XXPH128_hash_t r128 = { lower, upper };
+    return r128;
+#endif
+}
+
+/*
+ * We want to keep the attribute here because a target switch
+ * disables inlining.
+ *
+ * Does a 64-bit to 128-bit multiply, then XOR folds it.
+ * The reason for the separate function is to prevent passing
+ * too many structs around by value. This will hopefully inline
+ * the multiply, but we don't force it.
+ */
+#if defined(__GNUC__) && !defined(__clang__) && defined(__i386__)
+__attribute__((__target__("no-sse")))
+#endif
+static xxh_u64
+XXPH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
+{
+    XXPH128_hash_t product = XXPH_mult64to128(lhs, rhs);
+    return product.low64 ^ product.high64;
+}
+
+
+static XXPH64_hash_t XXPH3_avalanche(xxh_u64 h64)
+{
+    h64 ^= h64 >> 37;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+    return h64;
+}
+
+
+/* ==========================================
+ * Short keys
+ * ========================================== */
+
+XXPH_FORCE_INLINE XXPH64_hash_t
+XXPH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXPH64_hash_t seed)
+{
+    XXPH_ASSERT(input != NULL);
+    XXPH_ASSERT(1 <= len && len <= 3);
+    XXPH_ASSERT(secret != NULL);
+    {   xxh_u8 const c1 = input[0];
+        xxh_u8 const c2 = input[len >> 1];
+        xxh_u8 const c3 = input[len - 1];
+        xxh_u32  const combined = ((xxh_u32)c1) | (((xxh_u32)c2) << 8) | (((xxh_u32)c3) << 16) | (((xxh_u32)len) << 24);
+        xxh_u64  const keyed = (xxh_u64)combined ^ (XXPH_readLE32(secret) + seed);
+        xxh_u64  const mixed = keyed * PRIME64_1;
+        return XXPH3_avalanche(mixed);
+    }
+}
+
+XXPH_FORCE_INLINE XXPH64_hash_t
+XXPH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXPH64_hash_t seed)
+{
+    XXPH_ASSERT(input != NULL);
+    XXPH_ASSERT(secret != NULL);
+    XXPH_ASSERT(4 <= len && len <= 8);
+    {   xxh_u32 const input_lo = XXPH_readLE32(input);
+        xxh_u32 const input_hi = XXPH_readLE32(input + len - 4);
+        xxh_u64 const input_64 = input_lo | ((xxh_u64)input_hi << 32);
+        xxh_u64 const keyed = input_64 ^ (XXPH_readLE64(secret) + seed);
+        xxh_u64 const mix64 = len + ((keyed ^ (keyed >> 51)) * PRIME32_1);
+        return XXPH3_avalanche((mix64 ^ (mix64 >> 47)) * PRIME64_2);
+    }
+}
+
+XXPH_FORCE_INLINE XXPH64_hash_t
+XXPH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXPH64_hash_t seed)
+{
+    XXPH_ASSERT(input != NULL);
+    XXPH_ASSERT(secret != NULL);
+    XXPH_ASSERT(9 <= len && len <= 16);
+    {   xxh_u64 const input_lo = XXPH_readLE64(input)           ^ (XXPH_readLE64(secret)     + seed);
+        xxh_u64 const input_hi = XXPH_readLE64(input + len - 8) ^ (XXPH_readLE64(secret + 8) - seed);
+        xxh_u64 const acc = len + (input_lo + input_hi) + XXPH3_mul128_fold64(input_lo, input_hi);
+        return XXPH3_avalanche(acc);
+    }
+}
+
+XXPH_FORCE_INLINE XXPH64_hash_t
+XXPH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXPH64_hash_t seed)
+{
+    XXPH_ASSERT(len <= 16);
+    {   if (len > 8) return XXPH3_len_9to16_64b(input, len, secret, seed);
+        if (len >= 4) return XXPH3_len_4to8_64b(input, len, secret, seed);
+        if (len) return XXPH3_len_1to3_64b(input, len, secret, seed);
+        /*
+         * RocksDB modification from XXPH3 preview: zero result for empty
+         * string can be problematic for multiplication-based algorithms.
+         * Return a hash of the seed instead.
+         */
+        return XXPH3_mul128_fold64(seed + XXPH_readLE64(secret), PRIME64_2);
+    }
+}
+
+
+/* ===    Long Keys    === */
+
+#define STRIPE_LEN 64
+#define XXPH_SECRET_CONSUME_RATE 8   /* nb of secret bytes consumed at each accumulation */
+#define ACC_NB (STRIPE_LEN / sizeof(xxh_u64))
+
+typedef enum { XXPH3_acc_64bits, XXPH3_acc_128bits } XXPH3_accWidth_e;
+
+XXPH_FORCE_INLINE void
+XXPH3_accumulate_512(      void* XXPH_RESTRICT acc,
+                    const void* XXPH_RESTRICT input,
+                    const void* XXPH_RESTRICT secret,
+                    XXPH3_accWidth_e accWidth)
+{
+#if (XXPH_VECTOR == XXPH_AVX2)
+
+    XXPH_ASSERT((((size_t)acc) & 31) == 0);
+    {   XXPH_ALIGN(32) __m256i* const xacc  =       (__m256i *) acc;
+        const         __m256i* const xinput = (const __m256i *) input;  /* not really aligned, just for ptr arithmetic, and because _mm256_loadu_si256() requires this type */
+        const         __m256i* const xsecret = (const __m256i *) secret;   /* not really aligned, just for ptr arithmetic, and because _mm256_loadu_si256() requires this type */
+
+        size_t i;
+        for (i=0; i < STRIPE_LEN/sizeof(__m256i); i++) {
+            __m256i const data_vec = _mm256_loadu_si256 (xinput+i);
+            __m256i const key_vec = _mm256_loadu_si256 (xsecret+i);
+            __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec);                                  /* uint32 dk[8]  = {d0+k0, d1+k1, d2+k2, d3+k3, ...} */
+            __m256i const product = _mm256_mul_epu32 (data_key, _mm256_shuffle_epi32 (data_key, 0x31));  /* uint64 mul[4] = {dk0*dk1, dk2*dk3, ...} */
+            if (accWidth == XXPH3_acc_128bits) {
+                __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));
+                __m256i const sum = _mm256_add_epi64(xacc[i], data_swap);
+                xacc[i]  = _mm256_add_epi64(product, sum);
+            } else {  /* XXPH3_acc_64bits */
+                __m256i const sum = _mm256_add_epi64(xacc[i], data_vec);
+                xacc[i]  = _mm256_add_epi64(product, sum);
+            }
+    }   }
+
+#elif (XXPH_VECTOR == XXPH_SSE2)
+
+    XXPH_ASSERT((((size_t)acc) & 15) == 0);
+    {   XXPH_ALIGN(16) __m128i* const xacc  =       (__m128i *) acc;
+        const         __m128i* const xinput = (const __m128i *) input;  /* not really aligned, just for ptr arithmetic, and because _mm_loadu_si128() requires this type */
+        const         __m128i* const xsecret = (const __m128i *) secret;   /* not really aligned, just for ptr arithmetic, and because _mm_loadu_si128() requires this type */
+
+        size_t i;
+        for (i=0; i < STRIPE_LEN/sizeof(__m128i); i++) {
+            __m128i const data_vec = _mm_loadu_si128 (xinput+i);
+            __m128i const key_vec = _mm_loadu_si128 (xsecret+i);
+            __m128i const data_key = _mm_xor_si128 (data_vec, key_vec);                                  /* uint32 dk[8]  = {d0+k0, d1+k1, d2+k2, d3+k3, ...} */
+            __m128i const product = _mm_mul_epu32 (data_key, _mm_shuffle_epi32 (data_key, 0x31));  /* uint64 mul[4] = {dk0*dk1, dk2*dk3, ...} */
+            if (accWidth == XXPH3_acc_128bits) {
+                __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));
+                __m128i const sum = _mm_add_epi64(xacc[i], data_swap);
+                xacc[i]  = _mm_add_epi64(product, sum);
+            } else {  /* XXPH3_acc_64bits */
+                __m128i const sum = _mm_add_epi64(xacc[i], data_vec);
+                xacc[i]  = _mm_add_epi64(product, sum);
+            }
+    }   }
+
+#elif (XXPH_VECTOR == XXPH_NEON)
+
+    XXPH_ASSERT((((size_t)acc) & 15) == 0);
+    {
+        XXPH_ALIGN(16) uint64x2_t* const xacc = (uint64x2_t *) acc;
+        /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
+        uint8_t const* const xinput = (const uint8_t *) input;
+        uint8_t const* const xsecret  = (const uint8_t *) secret;
+
+        size_t i;
+        for (i=0; i < STRIPE_LEN / sizeof(uint64x2_t); i++) {
+#if !defined(__aarch64__) && !defined(__arm64__) && defined(__GNUC__) /* ARM32-specific hack */
+            /* vzip on ARMv7 Clang generates a lot of vmovs (technically vorrs) without this.
+             * vzip on 32-bit ARM NEON will overwrite the original register, and I think that Clang
+             * assumes I don't want to destroy it and tries to make a copy. This slows down the code
+             * a lot.
+             * aarch64 not only uses an entirely different syntax, but it requires three
+             * instructions...
+             *    ext    v1.16B, v0.16B, #8    // select high bits because aarch64 can't address them directly
+             *    zip1   v3.2s, v0.2s, v1.2s   // first zip
+             *    zip2   v2.2s, v0.2s, v1.2s   // second zip
+             * ...to do what ARM does in one:
+             *    vzip.32 d0, d1               // Interleave high and low bits and overwrite. */
+
+            /* data_vec = xsecret[i]; */
+            uint8x16_t const data_vec    = vld1q_u8(xinput + (i * 16));
+            /* key_vec  = xsecret[i];  */
+            uint8x16_t const key_vec     = vld1q_u8(xsecret  + (i * 16));
+            /* data_key = data_vec ^ key_vec; */
+            uint32x4_t       data_key;
+
+            if (accWidth == XXPH3_acc_64bits) {
+                /* Add first to prevent register swaps */
+                /* xacc[i] += data_vec; */
+                xacc[i] = vaddq_u64 (xacc[i], vreinterpretq_u64_u8(data_vec));
+            } else {  /* XXPH3_acc_128bits */
+                /* xacc[i] += swap(data_vec); */
+                /* can probably be optimized better */
+                uint64x2_t const data64 = vreinterpretq_u64_u8(data_vec);
+                uint64x2_t const swapped= vextq_u64(data64, data64, 1);
+                xacc[i] = vaddq_u64 (xacc[i], swapped);
+            }
+
+            data_key = vreinterpretq_u32_u8(veorq_u8(data_vec, key_vec));
+
+            /* Here's the magic. We use the quirkiness of vzip to shuffle data_key in place.
+             * shuffle: data_key[0, 1, 2, 3] = data_key[0, 2, 1, 3] */
+            __asm__("vzip.32 %e0, %f0" : "+w" (data_key));
+            /* xacc[i] += (uint64x2_t) data_key[0, 1] * (uint64x2_t) data_key[2, 3]; */
+            xacc[i] = vmlal_u32(xacc[i], vget_low_u32(data_key), vget_high_u32(data_key));
+
+#else
+            /* On aarch64, vshrn/vmovn seems to be equivalent to, if not faster than, the vzip method. */
+
+            /* data_vec = xsecret[i]; */
+            uint8x16_t const data_vec    = vld1q_u8(xinput + (i * 16));
+            /* key_vec  = xsecret[i];  */
+            uint8x16_t const key_vec     = vld1q_u8(xsecret  + (i * 16));
+            /* data_key = data_vec ^ key_vec; */
+            uint64x2_t const data_key    = vreinterpretq_u64_u8(veorq_u8(data_vec, key_vec));
+            /* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF); */
+            uint32x2_t const data_key_lo = vmovn_u64  (data_key);
+            /* data_key_hi = (uint32x2_t) (data_key >> 32); */
+            uint32x2_t const data_key_hi = vshrn_n_u64 (data_key, 32);
+            if (accWidth == XXPH3_acc_64bits) {
+                /* xacc[i] += data_vec; */
+                xacc[i] = vaddq_u64 (xacc[i], vreinterpretq_u64_u8(data_vec));
+            } else {  /* XXPH3_acc_128bits */
+                /* xacc[i] += swap(data_vec); */
+                uint64x2_t const data64 = vreinterpretq_u64_u8(data_vec);
+                uint64x2_t const swapped= vextq_u64(data64, data64, 1);
+                xacc[i] = vaddq_u64 (xacc[i], swapped);
+            }
+            /* xacc[i] += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
+            xacc[i] = vmlal_u32 (xacc[i], data_key_lo, data_key_hi);
+
+#endif
+        }
+    }
+
+#elif (XXPH_VECTOR == XXPH_VSX) && /* work around a compiler bug */ (__GNUC__ > 5)
+          U64x2* const xacc =        (U64x2*) acc;    /* presumed aligned */
+    U64x2 const* const xinput = (U64x2 const*) input;   /* no alignment restriction */
+    U64x2 const* const xsecret  = (U64x2 const*) secret;    /* no alignment restriction */
+    U64x2 const v32 = { 32,  32 };
+#if XXPH_VSX_BE
+    U8x16 const vXorSwap  = { 0x07, 0x16, 0x25, 0x34, 0x43, 0x52, 0x61, 0x70,
+                              0x8F, 0x9E, 0xAD, 0xBC, 0xCB, 0xDA, 0xE9, 0xF8 };
+#endif
+    size_t i;
+    for (i = 0; i < STRIPE_LEN / sizeof(U64x2); i++) {
+        /* data_vec = xinput[i]; */
+        /* key_vec = xsecret[i]; */
+#if XXPH_VSX_BE
+        /* byteswap */
+        U64x2 const data_vec = XXPH_vec_revb(vec_vsx_ld(0, xinput + i));
+        U64x2 const key_raw = vec_vsx_ld(0, xsecret + i);
+        /* See comment above. data_key = data_vec ^ swap(xsecret[i]); */
+        U64x2 const data_key = (U64x2)XXPH_vec_permxor((U8x16)data_vec, (U8x16)key_raw, vXorSwap);
+#else
+        U64x2 const data_vec = vec_vsx_ld(0, xinput + i);
+        U64x2 const key_vec = vec_vsx_ld(0, xsecret + i);
+        U64x2 const data_key = data_vec ^ key_vec;
+#endif
+        /* shuffled = (data_key << 32) | (data_key >> 32); */
+        U32x4 const shuffled = (U32x4)vec_rl(data_key, v32);
+        /* product = ((U64x2)data_key & 0xFFFFFFFF) * ((U64x2)shuffled & 0xFFFFFFFF); */
+        U64x2 const product = XXPH_vec_mulo((U32x4)data_key, shuffled);
+        xacc[i] += product;
+
+        if (accWidth == XXPH3_acc_64bits) {
+            xacc[i] += data_vec;
+        } else {  /* XXPH3_acc_128bits */
+            /* swap high and low halves */
+            U64x2 const data_swapped = vec_xxpermdi(data_vec, data_vec, 2);
+            xacc[i] += data_swapped;
+        }
+    }
+
+#else   /* scalar variant of Accumulator - universal */
+
+    XXPH_ALIGN(XXPH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc;    /* presumed aligned on 32-bytes boundaries, little hint for the auto-vectorizer */
+    const xxh_u8* const xinput = (const xxh_u8*) input;  /* no alignment restriction */
+    const xxh_u8* const xsecret  = (const xxh_u8*) secret;   /* no alignment restriction */
+    size_t i;
+    XXPH_ASSERT(((size_t)acc & (XXPH_ACC_ALIGN-1)) == 0);
+    for (i=0; i < ACC_NB; i++) {
+        xxh_u64 const data_val = XXPH_readLE64(xinput + 8*i);
+        xxh_u64 const data_key = data_val ^ XXPH_readLE64(xsecret + i*8);
+
+        if (accWidth == XXPH3_acc_64bits) {
+            xacc[i] += data_val;
+        } else {
+            xacc[i ^ 1] += data_val; /* swap adjacent lanes */
+        }
+        xacc[i] += XXPH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
+    }
+#endif
+}
+
+XXPH_FORCE_INLINE void
+XXPH3_scrambleAcc(void* XXPH_RESTRICT acc, const void* XXPH_RESTRICT secret)
+{
+#if (XXPH_VECTOR == XXPH_AVX2)
+
+    XXPH_ASSERT((((size_t)acc) & 31) == 0);
+    {   XXPH_ALIGN(32) __m256i* const xacc = (__m256i*) acc;
+        const         __m256i* const xsecret = (const __m256i *) secret;   /* not really aligned, just for ptr arithmetic, and because _mm256_loadu_si256() requires this argument type */
+        const __m256i prime32 = _mm256_set1_epi32((int)PRIME32_1);
+
+        size_t i;
+        for (i=0; i < STRIPE_LEN/sizeof(__m256i); i++) {
+            /* xacc[i] ^= (xacc[i] >> 47) */
+            __m256i const acc_vec     = xacc[i];
+            __m256i const shifted     = _mm256_srli_epi64    (acc_vec, 47);
+            __m256i const data_vec    = _mm256_xor_si256     (acc_vec, shifted);
+            /* xacc[i] ^= xsecret; */
+            __m256i const key_vec     = _mm256_loadu_si256   (xsecret+i);
+            __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
+
+            /* xacc[i] *= PRIME32_1; */
+            __m256i const data_key_hi = _mm256_shuffle_epi32 (data_key, 0x31);
+            __m256i const prod_lo     = _mm256_mul_epu32     (data_key, prime32);
+            __m256i const prod_hi     = _mm256_mul_epu32     (data_key_hi, prime32);
+            xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
+        }
+    }
+
+#elif (XXPH_VECTOR == XXPH_SSE2)
+
+    XXPH_ASSERT((((size_t)acc) & 15) == 0);
+    {   XXPH_ALIGN(16) __m128i* const xacc = (__m128i*) acc;
+        const         __m128i* const xsecret = (const __m128i *) secret;   /* not really aligned, just for ptr arithmetic, and because _mm_loadu_si128() requires this argument type */
+        const __m128i prime32 = _mm_set1_epi32((int)PRIME32_1);
+
+        size_t i;
+        for (i=0; i < STRIPE_LEN/sizeof(__m128i); i++) {
+            /* xacc[i] ^= (xacc[i] >> 47) */
+            __m128i const acc_vec     = xacc[i];
+            __m128i const shifted     = _mm_srli_epi64    (acc_vec, 47);
+            __m128i const data_vec    = _mm_xor_si128     (acc_vec, shifted);
+            /* xacc[i] ^= xsecret; */
+            __m128i const key_vec     = _mm_loadu_si128   (xsecret+i);
+            __m128i const data_key    = _mm_xor_si128     (data_vec, key_vec);
+
+            /* xacc[i] *= PRIME32_1; */
+            __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, 0x31);
+            __m128i const prod_lo     = _mm_mul_epu32     (data_key, prime32);
+            __m128i const prod_hi     = _mm_mul_epu32     (data_key_hi, prime32);
+            xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32));
+        }
+    }
+
+#elif (XXPH_VECTOR == XXPH_NEON)
+
+    XXPH_ASSERT((((size_t)acc) & 15) == 0);
+
+    {   uint64x2_t* const xacc =     (uint64x2_t*) acc;
+        uint8_t const* const xsecret = (uint8_t const*) secret;
+        uint32x2_t const prime     = vdup_n_u32 (PRIME32_1);
+
+        size_t i;
+        for (i=0; i < STRIPE_LEN/sizeof(uint64x2_t); i++) {
+            /* data_vec = xacc[i] ^ (xacc[i] >> 47); */
+            uint64x2_t const   acc_vec  = xacc[i];
+            uint64x2_t const   shifted  = vshrq_n_u64 (acc_vec, 47);
+            uint64x2_t const   data_vec = veorq_u64   (acc_vec, shifted);
+
+            /* key_vec  = xsecret[i]; */
+            uint32x4_t const   key_vec  = vreinterpretq_u32_u8(vld1q_u8(xsecret + (i * 16)));
+            /* data_key = data_vec ^ key_vec; */
+            uint32x4_t const   data_key = veorq_u32   (vreinterpretq_u32_u64(data_vec), key_vec);
+            /* shuffled = { data_key[0, 2], data_key[1, 3] }; */
+            uint32x2x2_t const shuffled = vzip_u32    (vget_low_u32(data_key), vget_high_u32(data_key));
+
+            /* data_key *= PRIME32_1 */
+
+            /* prod_hi = (data_key >> 32) * PRIME32_1; */
+            uint64x2_t const   prod_hi = vmull_u32    (shuffled.val[1], prime);
+            /* xacc[i] = prod_hi << 32; */
+            xacc[i] = vshlq_n_u64(prod_hi, 32);
+            /* xacc[i] += (prod_hi & 0xFFFFFFFF) * PRIME32_1; */
+            xacc[i] = vmlal_u32(xacc[i], shuffled.val[0], prime);
+    }   }
+
+#elif (XXPH_VECTOR == XXPH_VSX) && /* work around a compiler bug */ (__GNUC__ > 5)
+
+          U64x2* const xacc =       (U64x2*) acc;
+    const U64x2* const xsecret = (const U64x2*) secret;
+    /* constants */
+    U64x2 const v32  = { 32, 32 };
+    U64x2 const v47 = { 47, 47 };
+    U32x4 const prime = { PRIME32_1, PRIME32_1, PRIME32_1, PRIME32_1 };
+    size_t i;
+#if XXPH_VSX_BE
+    /* endian swap */
+    U8x16 const vXorSwap  = { 0x07, 0x16, 0x25, 0x34, 0x43, 0x52, 0x61, 0x70,
+                              0x8F, 0x9E, 0xAD, 0xBC, 0xCB, 0xDA, 0xE9, 0xF8 };
+#endif
+    for (i = 0; i < STRIPE_LEN / sizeof(U64x2); i++) {
+        U64x2 const acc_vec  = xacc[i];
+        U64x2 const data_vec = acc_vec ^ (acc_vec >> v47);
+        /* key_vec = xsecret[i]; */
+#if XXPH_VSX_BE
+        /* swap bytes words */
+        U64x2 const key_raw  = vec_vsx_ld(0, xsecret + i);
+        U64x2 const data_key = (U64x2)XXPH_vec_permxor((U8x16)data_vec, (U8x16)key_raw, vXorSwap);
+#else
+        U64x2 const key_vec  = vec_vsx_ld(0, xsecret + i);
+        U64x2 const data_key = data_vec ^ key_vec;
+#endif
+
+        /* data_key *= PRIME32_1 */
+
+        /* prod_lo = ((U64x2)data_key & 0xFFFFFFFF) * ((U64x2)prime & 0xFFFFFFFF);  */
+        U64x2 const prod_even  = XXPH_vec_mule((U32x4)data_key, prime);
+        /* prod_hi = ((U64x2)data_key >> 32) * ((U64x2)prime >> 32);  */
+        U64x2 const prod_odd  = XXPH_vec_mulo((U32x4)data_key, prime);
+        xacc[i] = prod_odd + (prod_even << v32);
+    }
+
+#else   /* scalar variant of Scrambler - universal */
+
+    XXPH_ALIGN(XXPH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc;   /* presumed aligned on 32-bytes boundaries, little hint for the auto-vectorizer */
+    const xxh_u8* const xsecret = (const xxh_u8*) secret;   /* no alignment restriction */
+    size_t i;
+    XXPH_ASSERT((((size_t)acc) & (XXPH_ACC_ALIGN-1)) == 0);
+    for (i=0; i < ACC_NB; i++) {
+        xxh_u64 const key64 = XXPH_readLE64(xsecret + 8*i);
+        xxh_u64 acc64 = xacc[i];
+        acc64 ^= acc64 >> 47;
+        acc64 ^= key64;
+        acc64 *= PRIME32_1;
+        xacc[i] = acc64;
+    }
+
+#endif
+}
+
+#define XXPH_PREFETCH_DIST 384
+
+/* assumption : nbStripes will not overflow secret size */
+XXPH_FORCE_INLINE void
+XXPH3_accumulate(       xxh_u64* XXPH_RESTRICT acc,
+                const xxh_u8* XXPH_RESTRICT input,
+                const xxh_u8* XXPH_RESTRICT secret,
+                      size_t nbStripes,
+                      XXPH3_accWidth_e accWidth)
+{
+    size_t n;
+    for (n = 0; n < nbStripes; n++ ) {
+        const xxh_u8* const in = input + n*STRIPE_LEN;
+        XXPH_PREFETCH(in + XXPH_PREFETCH_DIST);
+        XXPH3_accumulate_512(acc,
+                            in,
+                            secret + n*XXPH_SECRET_CONSUME_RATE,
+                            accWidth);
+    }
+}
+
+/* note : clang auto-vectorizes well in SS2 mode _if_ this function is `static`,
+ *        and doesn't auto-vectorize it at all if it is `FORCE_INLINE`.
+ *        However, it auto-vectorizes better AVX2 if it is `FORCE_INLINE`
+ *        Pretty much every other modes and compilers prefer `FORCE_INLINE`.
+ */
+
+#if defined(__clang__) && (XXPH_VECTOR==0) && !defined(__AVX2__) && !defined(__arm__) && !defined(__thumb__)
+static void
+#else
+XXPH_FORCE_INLINE void
+#endif
+XXPH3_hashLong_internal_loop( xxh_u64* XXPH_RESTRICT acc,
+                      const xxh_u8* XXPH_RESTRICT input, size_t len,
+                      const xxh_u8* XXPH_RESTRICT secret, size_t secretSize,
+                            XXPH3_accWidth_e accWidth)
+{
+    size_t const nb_rounds = (secretSize - STRIPE_LEN) / XXPH_SECRET_CONSUME_RATE;
+    size_t const block_len = STRIPE_LEN * nb_rounds;
+    size_t const nb_blocks = len / block_len;
+
+    size_t n;
+
+    XXPH_ASSERT(secretSize >= XXPH3_SECRET_SIZE_MIN);
+
+    for (n = 0; n < nb_blocks; n++) {
+        XXPH3_accumulate(acc, input + n*block_len, secret, nb_rounds, accWidth);
+        XXPH3_scrambleAcc(acc, secret + secretSize - STRIPE_LEN);
+    }
+
+    /* last partial block */
+    XXPH_ASSERT(len > STRIPE_LEN);
+    {   size_t const nbStripes = (len - (block_len * nb_blocks)) / STRIPE_LEN;
+        XXPH_ASSERT(nbStripes <= (secretSize / XXPH_SECRET_CONSUME_RATE));
+        XXPH3_accumulate(acc, input + nb_blocks*block_len, secret, nbStripes, accWidth);
+
+        /* last stripe */
+        if (len & (STRIPE_LEN - 1)) {
+            const xxh_u8* const p = input + len - STRIPE_LEN;
+#define XXPH_SECRET_LASTACC_START 7  /* do not align on 8, so that secret is different from scrambler */
+            XXPH3_accumulate_512(acc, p, secret + secretSize - STRIPE_LEN - XXPH_SECRET_LASTACC_START, accWidth);
+    }   }
+}
+
+XXPH_FORCE_INLINE xxh_u64
+XXPH3_mix2Accs(const xxh_u64* XXPH_RESTRICT acc, const xxh_u8* XXPH_RESTRICT secret)
+{
+    return XXPH3_mul128_fold64(
+               acc[0] ^ XXPH_readLE64(secret),
+               acc[1] ^ XXPH_readLE64(secret+8) );
+}
+
+static XXPH64_hash_t
+XXPH3_mergeAccs(const xxh_u64* XXPH_RESTRICT acc, const xxh_u8* XXPH_RESTRICT secret, xxh_u64 start)
+{
+    xxh_u64 result64 = start;
+
+    result64 += XXPH3_mix2Accs(acc+0, secret +  0);
+    result64 += XXPH3_mix2Accs(acc+2, secret + 16);
+    result64 += XXPH3_mix2Accs(acc+4, secret + 32);
+    result64 += XXPH3_mix2Accs(acc+6, secret + 48);
+
+    return XXPH3_avalanche(result64);
+}
+
+#define XXPH3_INIT_ACC { PRIME32_3, PRIME64_1, PRIME64_2, PRIME64_3, \
+                        PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1 };
+
+XXPH_FORCE_INLINE XXPH64_hash_t
+XXPH3_hashLong_internal(const xxh_u8* XXPH_RESTRICT input, size_t len,
+                       const xxh_u8* XXPH_RESTRICT secret, size_t secretSize)
+{
+    XXPH_ALIGN(XXPH_ACC_ALIGN) xxh_u64 acc[ACC_NB] = XXPH3_INIT_ACC;
+
+    XXPH3_hashLong_internal_loop(acc, input, len, secret, secretSize, XXPH3_acc_64bits);
+
+    /* converge into final hash */
+    XXPH_STATIC_ASSERT(sizeof(acc) == 64);
+#define XXPH_SECRET_MERGEACCS_START 11  /* do not align on 8, so that secret is different from accumulator */
+    XXPH_ASSERT(secretSize >= sizeof(acc) + XXPH_SECRET_MERGEACCS_START);
+    return XXPH3_mergeAccs(acc, secret + XXPH_SECRET_MERGEACCS_START, (xxh_u64)len * PRIME64_1);
+}
+
+
+XXPH_NO_INLINE XXPH64_hash_t    /* It's important for performance that XXPH3_hashLong is not inlined. Not sure why (uop cache maybe ?), but difference is large and easily measurable */
+XXPH3_hashLong_64b_defaultSecret(const xxh_u8* XXPH_RESTRICT input, size_t len)
+{
+    return XXPH3_hashLong_internal(input, len, kSecret, sizeof(kSecret));
+}
+
+XXPH_NO_INLINE XXPH64_hash_t    /* It's important for performance that XXPH3_hashLong is not inlined. Not sure why (uop cache maybe ?), but difference is large and easily measurable */
+XXPH3_hashLong_64b_withSecret(const xxh_u8* XXPH_RESTRICT input, size_t len,
+                             const xxh_u8* XXPH_RESTRICT secret, size_t secretSize)
+{
+    return XXPH3_hashLong_internal(input, len, secret, secretSize);
+}
+
+
+XXPH_FORCE_INLINE void XXPH_writeLE64(void* dst, xxh_u64 v64)
+{
+    if (!XXPH_CPU_LITTLE_ENDIAN) v64 = XXPH_swap64(v64);
+    memcpy(dst, &v64, sizeof(v64));
+}
+
+/* XXPH3_initCustomSecret() :
+ * destination `customSecret` is presumed allocated and same size as `kSecret`.
+ */
+XXPH_FORCE_INLINE void XXPH3_initCustomSecret(xxh_u8* customSecret, xxh_u64 seed64)
+{
+    int const nbRounds = XXPH_SECRET_DEFAULT_SIZE / 16;
+    int i;
+
+    XXPH_STATIC_ASSERT((XXPH_SECRET_DEFAULT_SIZE & 15) == 0);
+
+    for (i=0; i < nbRounds; i++) {
+        XXPH_writeLE64(customSecret + 16*i,     XXPH_readLE64(kSecret + 16*i)     + seed64);
+        XXPH_writeLE64(customSecret + 16*i + 8, XXPH_readLE64(kSecret + 16*i + 8) - seed64);
+    }
+}
+
+
+/* XXPH3_hashLong_64b_withSeed() :
+ * Generate a custom key,
+ * based on alteration of default kSecret with the seed,
+ * and then use this key for long mode hashing.
+ * This operation is decently fast but nonetheless costs a little bit of time.
+ * Try to avoid it whenever possible (typically when seed==0).
+ */
+XXPH_NO_INLINE XXPH64_hash_t    /* It's important for performance that XXPH3_hashLong is not inlined. Not sure why (uop cache maybe ?), but difference is large and easily measurable */
+XXPH3_hashLong_64b_withSeed(const xxh_u8* input, size_t len, XXPH64_hash_t seed)
+{
+    XXPH_ALIGN(8) xxh_u8 secret[XXPH_SECRET_DEFAULT_SIZE];
+    if (seed==0) return XXPH3_hashLong_64b_defaultSecret(input, len);
+    XXPH3_initCustomSecret(secret, seed);
+    return XXPH3_hashLong_internal(input, len, secret, sizeof(secret));
+}
+
+
+XXPH_FORCE_INLINE xxh_u64 XXPH3_mix16B(const xxh_u8* XXPH_RESTRICT input,
+                                 const xxh_u8* XXPH_RESTRICT secret, xxh_u64 seed64)
+{
+    xxh_u64 const input_lo = XXPH_readLE64(input);
+    xxh_u64 const input_hi = XXPH_readLE64(input+8);
+    return XXPH3_mul128_fold64(
+               input_lo ^ (XXPH_readLE64(secret)   + seed64),
+               input_hi ^ (XXPH_readLE64(secret+8) - seed64) );
+}
+
+
+XXPH_FORCE_INLINE XXPH64_hash_t
+XXPH3_len_17to128_64b(const xxh_u8* XXPH_RESTRICT input, size_t len,
+                     const xxh_u8* XXPH_RESTRICT secret, size_t secretSize,
+                     XXPH64_hash_t seed)
+{
+    XXPH_ASSERT(secretSize >= XXPH3_SECRET_SIZE_MIN); (void)secretSize;
+    XXPH_ASSERT(16 < len && len <= 128);
+
+    {   xxh_u64 acc = len * PRIME64_1;
+        if (len > 32) {
+            if (len > 64) {
+                if (len > 96) {
+                    acc += XXPH3_mix16B(input+48, secret+96, seed);
+                    acc += XXPH3_mix16B(input+len-64, secret+112, seed);
+                }
+                acc += XXPH3_mix16B(input+32, secret+64, seed);
+                acc += XXPH3_mix16B(input+len-48, secret+80, seed);
+            }
+            acc += XXPH3_mix16B(input+16, secret+32, seed);
+            acc += XXPH3_mix16B(input+len-32, secret+48, seed);
+        }
+        acc += XXPH3_mix16B(input+0, secret+0, seed);
+        acc += XXPH3_mix16B(input+len-16, secret+16, seed);
+
+        return XXPH3_avalanche(acc);
+    }
+}
+
+#define XXPH3_MIDSIZE_MAX 240
+
+XXPH_NO_INLINE XXPH64_hash_t
+XXPH3_len_129to240_64b(const xxh_u8* XXPH_RESTRICT input, size_t len,
+                      const xxh_u8* XXPH_RESTRICT secret, size_t secretSize,
+                      XXPH64_hash_t seed)
+{
+    XXPH_ASSERT(secretSize >= XXPH3_SECRET_SIZE_MIN); (void)secretSize;
+    XXPH_ASSERT(128 < len && len <= XXPH3_MIDSIZE_MAX);
+
+    #define XXPH3_MIDSIZE_STARTOFFSET 3
+    #define XXPH3_MIDSIZE_LASTOFFSET  17
+
+    {   xxh_u64 acc = len * PRIME64_1;
+        int const nbRounds = (int)len / 16;
+        int i;
+        for (i=0; i<8; i++) {
+            acc += XXPH3_mix16B(input+(16*i), secret+(16*i), seed);
+        }
+        acc = XXPH3_avalanche(acc);
+        XXPH_ASSERT(nbRounds >= 8);
+        for (i=8 ; i < nbRounds; i++) {
+            acc += XXPH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXPH3_MIDSIZE_STARTOFFSET, seed);
+        }
+        /* last bytes */
+        acc += XXPH3_mix16B(input + len - 16, secret + XXPH3_SECRET_SIZE_MIN - XXPH3_MIDSIZE_LASTOFFSET, seed);
+        return XXPH3_avalanche(acc);
+    }
+}
+
+/* ===   Public entry point   === */
+
+XXPH_PUBLIC_API XXPH64_hash_t XXPH3_64bits(const void* input, size_t len)
+{
+    if (len <= 16) return XXPH3_len_0to16_64b((const xxh_u8*)input, len, kSecret, 0);
+    if (len <= 128) return XXPH3_len_17to128_64b((const xxh_u8*)input, len, kSecret, sizeof(kSecret), 0);
+    if (len <= XXPH3_MIDSIZE_MAX) return XXPH3_len_129to240_64b((const xxh_u8*)input, len, kSecret, sizeof(kSecret), 0);
+    return XXPH3_hashLong_64b_defaultSecret((const xxh_u8*)input, len);
+}
+
+XXPH_PUBLIC_API XXPH64_hash_t
+XXPH3_64bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
+{
+    XXPH_ASSERT(secretSize >= XXPH3_SECRET_SIZE_MIN);
+    /* if an action must be taken should `secret` conditions not be respected,
+     * it should be done here.
+     * For now, it's a contract pre-condition.
+     * Adding a check and a branch here would cost performance at every hash */
+     if (len <= 16) return XXPH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, 0);
+     if (len <= 128) return XXPH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, 0);
+     if (len <= XXPH3_MIDSIZE_MAX) return XXPH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, 0);
+     return XXPH3_hashLong_64b_withSecret((const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize);
+}
+
+XXPH_PUBLIC_API XXPH64_hash_t
+XXPH3_64bits_withSeed(const void* input, size_t len, XXPH64_hash_t seed)
+{
+    if (len <= 16) return XXPH3_len_0to16_64b((const xxh_u8*)input, len, kSecret, seed);
+    if (len <= 128) return XXPH3_len_17to128_64b((const xxh_u8*)input, len, kSecret, sizeof(kSecret), seed);
+    if (len <= XXPH3_MIDSIZE_MAX) return XXPH3_len_129to240_64b((const xxh_u8*)input, len, kSecret, sizeof(kSecret), seed);
+    return XXPH3_hashLong_64b_withSeed((const xxh_u8*)input, len, seed);
+}
+
+/* ===   XXPH3 streaming   === */
+
+/* RocksDB Note: unused & removed due to bug in preview version */
+
+/*======== END #include "xxh3.h", now inlined above ==========*/
+
+#endif  /* XXPH_NO_LONG_LONG */
+
+/* === END RocksDB modification of permanently inlining === */
+
+#endif /*  defined(XXPH_INLINE_ALL) || defined(XXPH_PRIVATE_API) */
+
+#endif /* XXPH_STATIC_LINKING_ONLY */
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* XXPHASH_H_5627135585666179 */
-- 
cgit v1.2.3