From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Sun, 21 Apr 2024 13:54:28 +0200
Subject: Adding upstream version 18.2.2.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 src/rocksdb/util/ribbon_test.cc | 1308 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 1308 insertions(+)
 create mode 100644 src/rocksdb/util/ribbon_test.cc

(limited to 'src/rocksdb/util/ribbon_test.cc')
diff --git a/src/rocksdb/util/ribbon_test.cc b/src/rocksdb/util/ribbon_test.cc
new file mode 100644
index 000000000..6519df3d5
--- /dev/null
+++ b/src/rocksdb/util/ribbon_test.cc
@@ -0,0 +1,1308 @@
+//  Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "rocksdb/system_clock.h"
+#include "test_util/testharness.h"
+#include "util/bloom_impl.h"
+#include "util/coding.h"
+#include "util/hash.h"
+#include "util/ribbon_config.h"
+#include "util/ribbon_impl.h"
+#include "util/stop_watch.h"
+#include "util/string_util.h"
+
+#ifndef GFLAGS
+uint32_t FLAGS_thoroughness = 5;
+uint32_t FLAGS_max_add = 0;
+uint32_t FLAGS_min_check = 4000;
+uint32_t FLAGS_max_check = 100000;
+bool FLAGS_verbose = false;
+
+bool FLAGS_find_occ = false;
+bool FLAGS_find_slot_occ = false;
+double FLAGS_find_next_factor = 1.618;
+uint32_t FLAGS_find_iters = 10000;
+uint32_t FLAGS_find_min_slots = 128;
+uint32_t FLAGS_find_max_slots = 1000000;
+
+bool FLAGS_optimize_homog = false;
+uint32_t FLAGS_optimize_homog_slots = 30000000;
+uint32_t FLAGS_optimize_homog_check = 200000;
+double FLAGS_optimize_homog_granularity = 0.002;
+#else
+#include "util/gflags_compat.h"
+using GFLAGS_NAMESPACE::ParseCommandLineFlags;
+// Using 500 is a good test when you have time to be thorough.
+// Default is for general RocksDB regression test runs.
+DEFINE_uint32(thoroughness, 5, "iterations per configuration");
+DEFINE_uint32(max_add, 0,
+              "Add up to this number of entries to a single filter in "
+              "CompactnessAndBacktrackAndFpRate; 0 == reasonable default");
+DEFINE_uint32(min_check, 4000,
+              "Minimum number of novel entries for testing FP rate");
+DEFINE_uint32(max_check, 10000,
+              "Maximum number of novel entries for testing FP rate");
+DEFINE_bool(verbose, false, "Print extra details");
+
+// Options for FindOccupancy, which is more of a tool than a test.
+DEFINE_bool(find_occ, false, "whether to run the FindOccupancy tool");
+DEFINE_bool(find_slot_occ, false,
+            "whether to show individual slot occupancies with "
+            "FindOccupancy tool");
+DEFINE_double(find_next_factor, 1.618,
+              "factor to next num_slots for FindOccupancy");
+DEFINE_uint32(find_iters, 10000, "number of samples for FindOccupancy");
+DEFINE_uint32(find_min_slots, 128, "number of slots for FindOccupancy");
+DEFINE_uint32(find_max_slots, 1000000, "number of slots for FindOccupancy");
+
+// Options for OptimizeHomogAtScale, which is more of a tool than a test.
+DEFINE_bool(optimize_homog, false,
+            "whether to run the OptimizeHomogAtScale tool");
+DEFINE_uint32(optimize_homog_slots, 30000000,
+              "number of slots for OptimizeHomogAtScale");
+DEFINE_uint32(optimize_homog_check, 200000,
+              "number of queries for checking FP rate in OptimizeHomogAtScale");
+DEFINE_double(
+    optimize_homog_granularity, 0.002,
+    "overhead change between FP rate checking in OptimizeHomogAtScale");
+
+#endif  // GFLAGS
+
+template <typename TypesAndSettings>
+class RibbonTypeParamTest : public ::testing::Test {};
+
+class RibbonTest : public ::testing::Test {};
+
+namespace {
+
+// Different ways of generating keys for testing
+
+// Generate semi-sequential keys
+struct StandardKeyGen {
+  StandardKeyGen(const std::string& prefix, uint64_t id)
+      : id_(id), str_(prefix) {
+    ROCKSDB_NAMESPACE::PutFixed64(&str_, /*placeholder*/ 0);
+  }
+
+  // Prefix (only one required)
+  StandardKeyGen& operator++() {
+    ++id_;
+    return *this;
+  }
+
+  StandardKeyGen& operator+=(uint64_t i) {
+    id_ += i;
+    return *this;
+  }
+
+  const std::string& operator*() {
+    // Use multiplication to mix things up a little in the key
+    ROCKSDB_NAMESPACE::EncodeFixed64(&str_[str_.size() - 8],
+                                     id_ * uint64_t{0x1500000001});
+    return str_;
+  }
+
+  bool operator==(const StandardKeyGen& other) {
+    // Same prefix is assumed
+    return id_ == other.id_;
+  }
+  bool operator!=(const StandardKeyGen& other) {
+    // Same prefix is assumed
+    return id_ != other.id_;
+  }
+
+  uint64_t id_;
+  std::string str_;
+};
+
+// Generate small sequential keys, that can misbehave with sequential seeds
+// as in https://github.com/Cyan4973/xxHash/issues/469.
+// These keys are only heuristically unique, but that's OK with 64 bits,
+// for testing purposes.
+struct SmallKeyGen {
+  SmallKeyGen(const std::string& prefix, uint64_t id) : id_(id) {
+    // Hash the prefix for a heuristically unique offset
+    id_ += ROCKSDB_NAMESPACE::GetSliceHash64(prefix);
+    ROCKSDB_NAMESPACE::PutFixed64(&str_, id_);
+  }
+
+  // Prefix (only one required)
+  SmallKeyGen& operator++() {
+    ++id_;
+    return *this;
+  }
+
+  SmallKeyGen& operator+=(uint64_t i) {
+    id_ += i;
+    return *this;
+  }
+
+  const std::string& operator*() {
+    ROCKSDB_NAMESPACE::EncodeFixed64(&str_[str_.size() - 8], id_);
+    return str_;
+  }
+
+  bool operator==(const SmallKeyGen& other) { return id_ == other.id_; }
+  bool operator!=(const SmallKeyGen& other) { return id_ != other.id_; }
+
+  uint64_t id_;
+  std::string str_;
+};
+
+template <typename KeyGen>
+struct Hash32KeyGenWrapper : public KeyGen {
+  Hash32KeyGenWrapper(const std::string& prefix, uint64_t id)
+      : KeyGen(prefix, id) {}
+  uint32_t operator*() {
+    auto& key = *static_cast<KeyGen&>(*this);
+    // unseeded
+    return ROCKSDB_NAMESPACE::GetSliceHash(key);
+  }
+};
+
+template <typename KeyGen>
+struct Hash64KeyGenWrapper : public KeyGen {
+  Hash64KeyGenWrapper(const std::string& prefix, uint64_t id)
+      : KeyGen(prefix, id) {}
+  uint64_t operator*() {
+    auto& key = *static_cast<KeyGen&>(*this);
+    // unseeded
+    return ROCKSDB_NAMESPACE::GetSliceHash64(key);
+  }
+};
+
+using ROCKSDB_NAMESPACE::ribbon::ConstructionFailureChance;
+
+const std::vector<ConstructionFailureChance> kFailureOnly50Pct = {
+    ROCKSDB_NAMESPACE::ribbon::kOneIn2};
+
+const std::vector<ConstructionFailureChance> kFailureOnlyRare = {
+    ROCKSDB_NAMESPACE::ribbon::kOneIn1000};
+
+const std::vector<ConstructionFailureChance> kFailureAll = {
+    ROCKSDB_NAMESPACE::ribbon::kOneIn2, ROCKSDB_NAMESPACE::ribbon::kOneIn20,
+    ROCKSDB_NAMESPACE::ribbon::kOneIn1000};
+
+}  // namespace
+
+using ROCKSDB_NAMESPACE::ribbon::ExpectedCollisionFpRate;
+using ROCKSDB_NAMESPACE::ribbon::StandardHasher;
+using ROCKSDB_NAMESPACE::ribbon::StandardRehasherAdapter;
+
+struct DefaultTypesAndSettings {
+  using CoeffRow = ROCKSDB_NAMESPACE::Unsigned128;
+  using ResultRow = uint8_t;
+  using Index = uint32_t;
+  using Hash = uint64_t;
+  using Seed = uint32_t;
+  using Key = ROCKSDB_NAMESPACE::Slice;
+  static constexpr bool kIsFilter = true;
+  static constexpr bool kHomogeneous = false;
+  static constexpr bool kFirstCoeffAlwaysOne = true;
+  static constexpr bool kUseSmash = false;
+  static constexpr bool kAllowZeroStarts = false;
+  static Hash HashFn(const Key& key, uint64_t raw_seed) {
+    // This version 0.7.2 preview of XXH3 (a.k.a. XXPH3) function does
+    // not pass SmallKeyGen tests below without some seed premixing from
+    // StandardHasher. See https://github.com/Cyan4973/xxHash/issues/469
+    return ROCKSDB_NAMESPACE::Hash64(key.data(), key.size(), raw_seed);
+  }
+  // For testing
+  using KeyGen = StandardKeyGen;
+  static const std::vector<ConstructionFailureChance>& FailureChanceToTest() {
+    return kFailureAll;
+  }
+};
+
+using TypesAndSettings_Coeff128 = DefaultTypesAndSettings;
+struct TypesAndSettings_Coeff128Smash : public DefaultTypesAndSettings {
+  static constexpr bool kUseSmash = true;
+};
+struct TypesAndSettings_Coeff64 : public DefaultTypesAndSettings {
+  using CoeffRow = uint64_t;
+};
+struct TypesAndSettings_Coeff64Smash : public TypesAndSettings_Coeff64 {
+  static constexpr bool kUseSmash = true;
+};
+struct TypesAndSettings_Coeff64Smash0 : public TypesAndSettings_Coeff64Smash {
+  static constexpr bool kFirstCoeffAlwaysOne = false;
+};
+
+// Homogeneous Ribbon configurations
+struct TypesAndSettings_Coeff128_Homog : public DefaultTypesAndSettings {
+  static constexpr bool kHomogeneous = true;
+  // Since our best construction success setting still has 1/1000 failure
+  // rate, the best FP rate we test is 1/256
+  using ResultRow = uint8_t;
+  // Homogeneous only makes sense with sufficient slots for equivalent of
+  // almost sure construction success
+  static const std::vector<ConstructionFailureChance>& FailureChanceToTest() {
+    return kFailureOnlyRare;
+  }
+};
+struct TypesAndSettings_Coeff128Smash_Homog
+    : public TypesAndSettings_Coeff128_Homog {
+  // Smash (extra time to save space) + Homog (extra space to save time)
+  // doesn't make much sense in practice, but we minimally test it
+  static constexpr bool kUseSmash = true;
+};
+struct TypesAndSettings_Coeff64_Homog : public TypesAndSettings_Coeff128_Homog {
+  using CoeffRow = uint64_t;
+};
+struct TypesAndSettings_Coeff64Smash_Homog
+    : public TypesAndSettings_Coeff64_Homog {
+  // Smash (extra time to save space) + Homog (extra space to save time)
+  // doesn't make much sense in practice, but we minimally test it
+  static constexpr bool kUseSmash = true;
+};
+
+// Less exhaustive mix of coverage, but still covering the most stressful case
+// (only 50% construction success)
+struct AbridgedTypesAndSettings : public DefaultTypesAndSettings {
+  static const std::vector<ConstructionFailureChance>& FailureChanceToTest() {
+    return kFailureOnly50Pct;
+  }
+};
+struct TypesAndSettings_Result16 : public AbridgedTypesAndSettings {
+  using ResultRow = uint16_t;
+};
+struct TypesAndSettings_Result32 : public AbridgedTypesAndSettings {
+  using ResultRow = uint32_t;
+};
+struct TypesAndSettings_IndexSizeT : public AbridgedTypesAndSettings {
+  using Index = size_t;
+};
+struct TypesAndSettings_Hash32 : public AbridgedTypesAndSettings {
+  using Hash = uint32_t;
+  static Hash HashFn(const Key& key, Hash raw_seed) {
+    // This MurmurHash1 function does not pass tests below without the
+    // seed premixing from StandardHasher. In fact, it needs more than
+    // just a multiplication mixer on the ordinal seed.
+    return ROCKSDB_NAMESPACE::Hash(key.data(), key.size(), raw_seed);
+  }
+};
+struct TypesAndSettings_Hash32_Result16 : public AbridgedTypesAndSettings {
+  using ResultRow = uint16_t;
+};
+struct TypesAndSettings_KeyString : public AbridgedTypesAndSettings {
+  using Key = std::string;
+};
+struct TypesAndSettings_Seed8 : public AbridgedTypesAndSettings {
+  // This is not a generally recommended configuration. With the configured
+  // hash function, it would fail with SmallKeyGen due to insufficient
+  // independence among the seeds.
+  using Seed = uint8_t;
+};
+struct TypesAndSettings_NoAlwaysOne : public AbridgedTypesAndSettings {
+  static constexpr bool kFirstCoeffAlwaysOne = false;
+};
+struct TypesAndSettings_AllowZeroStarts : public AbridgedTypesAndSettings {
+  static constexpr bool kAllowZeroStarts = true;
+};
+struct TypesAndSettings_Seed64 : public AbridgedTypesAndSettings {
+  using Seed = uint64_t;
+};
+struct TypesAndSettings_Rehasher
+    : public StandardRehasherAdapter<AbridgedTypesAndSettings> {
+  using KeyGen = Hash64KeyGenWrapper<StandardKeyGen>;
+};
+struct TypesAndSettings_Rehasher_Result16 : public TypesAndSettings_Rehasher {
+  using ResultRow = uint16_t;
+};
+struct TypesAndSettings_Rehasher_Result32 : public TypesAndSettings_Rehasher {
+  using ResultRow = uint32_t;
+};
+struct TypesAndSettings_Rehasher_Seed64
+    : public StandardRehasherAdapter<TypesAndSettings_Seed64> {
+  using KeyGen = Hash64KeyGenWrapper<StandardKeyGen>;
+  // Note: 64-bit seed with Rehasher gives slightly better average reseeds
+};
+struct TypesAndSettings_Rehasher32
+    : public StandardRehasherAdapter<TypesAndSettings_Hash32> {
+  using KeyGen = Hash32KeyGenWrapper<StandardKeyGen>;
+};
+struct TypesAndSettings_Rehasher32_Coeff64
+    : public TypesAndSettings_Rehasher32 {
+  using CoeffRow = uint64_t;
+};
+struct TypesAndSettings_SmallKeyGen : public AbridgedTypesAndSettings {
+  // SmallKeyGen stresses the independence of different hash seeds
+  using KeyGen = SmallKeyGen;
+};
+struct TypesAndSettings_Hash32_SmallKeyGen : public TypesAndSettings_Hash32 {
+  // SmallKeyGen stresses the independence of different hash seeds
+  using KeyGen = SmallKeyGen;
+};
+struct TypesAndSettings_Coeff32 : public DefaultTypesAndSettings {
+  using CoeffRow = uint32_t;
+};
+struct TypesAndSettings_Coeff32Smash : public TypesAndSettings_Coeff32 {
+  static constexpr bool kUseSmash = true;
+};
+struct TypesAndSettings_Coeff16 : public DefaultTypesAndSettings {
+  using CoeffRow = uint16_t;
+};
+struct TypesAndSettings_Coeff16Smash : public TypesAndSettings_Coeff16 {
+  static constexpr bool kUseSmash = true;
+};
+
+using TestTypesAndSettings = ::testing::Types<
+    TypesAndSettings_Coeff128, TypesAndSettings_Coeff128Smash,
+    TypesAndSettings_Coeff64, TypesAndSettings_Coeff64Smash,
+    TypesAndSettings_Coeff64Smash0, TypesAndSettings_Coeff128_Homog,
+    TypesAndSettings_Coeff128Smash_Homog, TypesAndSettings_Coeff64_Homog,
+    TypesAndSettings_Coeff64Smash_Homog, TypesAndSettings_Result16,
+    TypesAndSettings_Result32, TypesAndSettings_IndexSizeT,
+    TypesAndSettings_Hash32, TypesAndSettings_Hash32_Result16,
+    TypesAndSettings_KeyString, TypesAndSettings_Seed8,
+    TypesAndSettings_NoAlwaysOne, TypesAndSettings_AllowZeroStarts,
+    TypesAndSettings_Seed64, TypesAndSettings_Rehasher,
+    TypesAndSettings_Rehasher_Result16, TypesAndSettings_Rehasher_Result32,
+    TypesAndSettings_Rehasher_Seed64, TypesAndSettings_Rehasher32,
+    TypesAndSettings_Rehasher32_Coeff64, TypesAndSettings_SmallKeyGen,
+    TypesAndSettings_Hash32_SmallKeyGen, TypesAndSettings_Coeff32,
+    TypesAndSettings_Coeff32Smash, TypesAndSettings_Coeff16,
+    TypesAndSettings_Coeff16Smash>;
+TYPED_TEST_CASE(RibbonTypeParamTest, TestTypesAndSettings);
+
+namespace {
+
+// For testing Poisson-distributed (or similar) statistics, get value for
+// `stddevs_allowed` standard deviations above expected mean
+// `expected_count`.
+// (Poisson approximates Binomial only if probability of a trial being
+// in the count is low.)
+uint64_t PoissonUpperBound(double expected_count, double stddevs_allowed) {
+  return static_cast<uint64_t>(
+      expected_count + stddevs_allowed * std::sqrt(expected_count) + 1.0);
+}
+
+uint64_t PoissonLowerBound(double expected_count, double stddevs_allowed) {
+  return static_cast<uint64_t>(std::max(
+      0.0, expected_count - stddevs_allowed * std::sqrt(expected_count)));
+}
+
+uint64_t FrequentPoissonUpperBound(double expected_count) {
+  // Allow up to 5.0 standard deviations for frequently checked statistics
+  return PoissonUpperBound(expected_count, 5.0);
+}
+
+uint64_t FrequentPoissonLowerBound(double expected_count) {
+  return PoissonLowerBound(expected_count, 5.0);
+}
+
+uint64_t InfrequentPoissonUpperBound(double expected_count) {
+  // Allow up to 3 standard deviations for infrequently checked statistics
+  return PoissonUpperBound(expected_count, 3.0);
+}
+
+uint64_t InfrequentPoissonLowerBound(double expected_count) {
+  return PoissonLowerBound(expected_count, 3.0);
+}
+
+}  // namespace
+
+TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) {
+  IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam);
+  IMPORT_RIBBON_IMPL_TYPES(TypeParam);
+  using KeyGen = typename TypeParam::KeyGen;
+  using ConfigHelper =
+      ROCKSDB_NAMESPACE::ribbon::BandingConfigHelper<TypeParam>;
+
+  if (sizeof(CoeffRow) < 8) {
+    ROCKSDB_GTEST_BYPASS("Not fully supported");
+    return;
+  }
+
+  const auto log2_thoroughness =
+      static_cast<uint32_t>(ROCKSDB_NAMESPACE::FloorLog2(FLAGS_thoroughness));
+
+  // We are going to choose num_to_add using an exponential distribution,
+  // so that we have good representation of small-to-medium filters.
+  // Here we just pick some reasonable, practical upper bound based on
+  // kCoeffBits or option.
+  const double log_max_add = std::log(
+      FLAGS_max_add > 0 ? FLAGS_max_add
+                        : static_cast<uint32_t>(kCoeffBits * kCoeffBits) *
+                              std::max(FLAGS_thoroughness, uint32_t{32}));
+
+  // This needs to be enough below the minimum number of slots to get a
+  // reasonable number of samples with the minimum number of slots.
+  const double log_min_add = std::log(0.66 * SimpleSoln::RoundUpNumSlots(1));
+
+  ASSERT_GT(log_max_add, log_min_add);
+
+  const double diff_log_add = log_max_add - log_min_add;
+
+  for (ConstructionFailureChance cs : TypeParam::FailureChanceToTest()) {
+    double expected_reseeds;
+    switch (cs) {
+      default:
+        assert(false);
+        FALLTHROUGH_INTENDED;
+      case ROCKSDB_NAMESPACE::ribbon::kOneIn2:
+        fprintf(stderr, "== Failure: 50 percent\n");
+        expected_reseeds = 1.0;
+        break;
+      case ROCKSDB_NAMESPACE::ribbon::kOneIn20:
+        fprintf(stderr, "== Failure: 95 percent\n");
+        expected_reseeds = 0.053;
+        break;
+      case ROCKSDB_NAMESPACE::ribbon::kOneIn1000:
+        fprintf(stderr, "== Failure: 1/1000\n");
+        expected_reseeds = 0.001;
+        break;
+    }
+
+    uint64_t total_reseeds = 0;
+    uint64_t total_singles = 0;
+    uint64_t total_single_failures = 0;
+    uint64_t total_batch = 0;
+    uint64_t total_batch_successes = 0;
+    uint64_t total_fp_count = 0;
+    uint64_t total_added = 0;
+    uint64_t total_expand_trials = 0;
+    uint64_t total_expand_failures = 0;
+    double total_expand_overhead = 0.0;
+
+    uint64_t soln_query_nanos = 0;
+    uint64_t soln_query_count = 0;
+    uint64_t bloom_query_nanos = 0;
+    uint64_t isoln_query_nanos = 0;
+    uint64_t isoln_query_count = 0;
+
+    // Take different samples if you change thoroughness
+    ROCKSDB_NAMESPACE::Random32 rnd(FLAGS_thoroughness);
+
+    for (uint32_t i = 0; i < FLAGS_thoroughness; ++i) {
+      // We are going to choose num_to_add using an exponential distribution
+      // as noted above, but instead of randomly choosing them, we generate
+      // samples linearly using the golden ratio, which ensures a nice spread
+      // even for a small number of samples, and starting with the minimum
+      // number of slots to ensure it is tested.
+      double log_add =
+          std::fmod(0.6180339887498948482 * diff_log_add * i, diff_log_add) +
+          log_min_add;
+      uint32_t num_to_add = static_cast<uint32_t>(std::exp(log_add));
+
+      // Most of the time, test the Interleaved solution storage, but when
+      // we do we have to make num_slots a multiple of kCoeffBits. So
+      // sometimes we want to test without that limitation.
+      bool test_interleaved = (i % 7) != 6;
+
+      // Compute num_slots, and re-adjust num_to_add to get as close as possible
+      // to next num_slots, to stress that num_slots in terms of construction
+      // success. Ensure at least one iteration:
+      Index num_slots = Index{0} - 1;
+      --num_to_add;
+      for (;;) {
+        Index next_num_slots = SimpleSoln::RoundUpNumSlots(
+            ConfigHelper::GetNumSlots(num_to_add + 1, cs));
+        if (test_interleaved) {
+          next_num_slots = InterleavedSoln::RoundUpNumSlots(next_num_slots);
+          // assert idempotent
+          EXPECT_EQ(next_num_slots,
+                    InterleavedSoln::RoundUpNumSlots(next_num_slots));
+        }
+        // assert idempotent with InterleavedSoln::RoundUpNumSlots
+        EXPECT_EQ(next_num_slots, SimpleSoln::RoundUpNumSlots(next_num_slots));
+
+        if (next_num_slots > num_slots) {
+          break;
+        }
+        num_slots = next_num_slots;
+        ++num_to_add;
+      }
+      assert(num_slots < Index{0} - 1);
+
+      total_added += num_to_add;
+
+      std::string prefix;
+      ROCKSDB_NAMESPACE::PutFixed32(&prefix, rnd.Next());
+
+      // Batch that must be added
+      std::string added_str = prefix + "added";
+      KeyGen keys_begin(added_str, 0);
+      KeyGen keys_end(added_str, num_to_add);
+
+      // A couple more that will probably be added
+      KeyGen one_more(prefix + "more", 1);
+      KeyGen two_more(prefix + "more", 2);
+
+      // Batch that may or may not be added
+      uint32_t batch_size =
+          static_cast<uint32_t>(2.0 * std::sqrt(num_slots - num_to_add));
+      if (batch_size < 10U) {
+        batch_size = 0;
+      }
+      std::string batch_str = prefix + "batch";
+      KeyGen batch_begin(batch_str, 0);
+      KeyGen batch_end(batch_str, batch_size);
+
+      // Batch never (successfully) added, but used for querying FP rate
+      std::string not_str = prefix + "not";
+      KeyGen other_keys_begin(not_str, 0);
+      KeyGen other_keys_end(not_str, FLAGS_max_check);
+
+      double overhead_ratio = 1.0 * num_slots / num_to_add;
+      if (FLAGS_verbose) {
+        fprintf(stderr, "Adding(%s) %u / %u   Overhead: %g   Batch size: %u\n",
+                test_interleaved ? "i" : "s", (unsigned)num_to_add,
+                (unsigned)num_slots, overhead_ratio, (unsigned)batch_size);
+      }
+
+      // Vary bytes for InterleavedSoln to use number of solution columns
+      // from 0 to max allowed by ResultRow type (and used by SimpleSoln).
+      // Specifically include 0 and max, and otherwise skew toward max.
+      uint32_t max_ibytes =
+          static_cast<uint32_t>(sizeof(ResultRow) * num_slots);
+      size_t ibytes;
+      if (i == 0) {
+        ibytes = 0;
+      } else if (i == 1) {
+        ibytes = max_ibytes;
+      } else {
+        // Skewed
+        ibytes =
+            std::max(rnd.Uniformish(max_ibytes), rnd.Uniformish(max_ibytes));
+      }
+      std::unique_ptr<char[]> idata(new char[ibytes]);
+      InterleavedSoln isoln(idata.get(), ibytes);
+
+      SimpleSoln soln;
+      Hasher hasher;
+      bool first_single;
+      bool second_single;
+      bool batch_success;
+      {
+        Banding banding;
+        // Traditional solve for a fixed set.
+        ASSERT_TRUE(
+            banding.ResetAndFindSeedToSolve(num_slots, keys_begin, keys_end));
+
+        Index occupied_count = banding.GetOccupiedCount();
+        Index more_added = 0;
+
+        if (TypeParam::kHomogeneous || overhead_ratio < 1.01 ||
+            batch_size == 0) {
+          // Homogeneous not compatible with backtracking because add
+          // doesn't fail. Small overhead ratio too packed to expect more
+          first_single = false;
+          second_single = false;
+          batch_success = false;
+        } else {
+          // Now to test backtracking, starting with guaranteed fail. By using
+          // the keys that will be used to test FP rate, we are then doing an
+          // extra check that after backtracking there are no remnants (e.g. in
+          // result side of banding) of these entries.
+          KeyGen other_keys_too_big_end = other_keys_begin;
+          other_keys_too_big_end += num_to_add;
+          banding.EnsureBacktrackSize(std::max(num_to_add, batch_size));
+          EXPECT_FALSE(banding.AddRangeOrRollBack(other_keys_begin,
+                                                  other_keys_too_big_end));
+          EXPECT_EQ(occupied_count, banding.GetOccupiedCount());
+
+          // Check that we still have a good chance of adding a couple more
+          // individually
+          first_single = banding.Add(*one_more);
+          second_single = banding.Add(*two_more);
+          more_added += (first_single ? 1 : 0) + (second_single ? 1 : 0);
+          total_singles += 2U;
+          total_single_failures += 2U - more_added;
+
+          // Or as a batch
+          batch_success = banding.AddRangeOrRollBack(batch_begin, batch_end);
+          ++total_batch;
+          if (batch_success) {
+            more_added += batch_size;
+            ++total_batch_successes;
+          }
+          EXPECT_LE(banding.GetOccupiedCount(), occupied_count + more_added);
+        }
+
+        // Also verify that redundant adds are OK (no effect)
+        ASSERT_TRUE(
+            banding.AddRange(keys_begin, KeyGen(added_str, num_to_add / 8)));
+        EXPECT_LE(banding.GetOccupiedCount(), occupied_count + more_added);
+
+        // Now back-substitution
+        soln.BackSubstFrom(banding);
+        if (test_interleaved) {
+          isoln.BackSubstFrom(banding);
+        }
+
+        Seed reseeds = banding.GetOrdinalSeed();
+        total_reseeds += reseeds;
+
+        EXPECT_LE(reseeds, 8 + log2_thoroughness);
+        if (reseeds > log2_thoroughness + 1) {
+          fprintf(
+              stderr, "%s high reseeds at %u, %u/%u: %u\n",
+              reseeds > log2_thoroughness + 8 ? "ERROR Extremely" : "Somewhat",
+              static_cast<unsigned>(i), static_cast<unsigned>(num_to_add),
+              static_cast<unsigned>(num_slots), static_cast<unsigned>(reseeds));
+        }
+
+        if (reseeds > 0) {
+          // "Expand" test: given a failed construction, how likely is it to
+          // pass with same seed and more slots. At each step, we increase
+          // enough to ensure there is at least one shift within each coeff
+          // block.
+          ++total_expand_trials;
+          Index expand_count = 0;
+          Index ex_slots = num_slots;
+          banding.SetOrdinalSeed(0);
+          for (;; ++expand_count) {
+            ASSERT_LE(expand_count, log2_thoroughness);
+            ex_slots += ex_slots / kCoeffBits;
+            if (test_interleaved) {
+              ex_slots = InterleavedSoln::RoundUpNumSlots(ex_slots);
+            }
+            banding.Reset(ex_slots);
+            bool success = banding.AddRange(keys_begin, keys_end);
+            if (success) {
+              break;
+            }
+          }
+          total_expand_failures += expand_count;
+          total_expand_overhead += 1.0 * (ex_slots - num_slots) / num_slots;
+        }
+
+        hasher.SetOrdinalSeed(reseeds);
+      }
+      // soln and hasher now independent of Banding object
+
+      // Verify keys added
+      KeyGen cur = keys_begin;
+      while (cur != keys_end) {
+        ASSERT_TRUE(soln.FilterQuery(*cur, hasher));
+        ASSERT_TRUE(!test_interleaved || isoln.FilterQuery(*cur, hasher));
+        ++cur;
+      }
+      // We (maybe) snuck these in!
+      if (first_single) {
+        ASSERT_TRUE(soln.FilterQuery(*one_more, hasher));
+        ASSERT_TRUE(!test_interleaved || isoln.FilterQuery(*one_more, hasher));
+      }
+      if (second_single) {
+        ASSERT_TRUE(soln.FilterQuery(*two_more, hasher));
+        ASSERT_TRUE(!test_interleaved || isoln.FilterQuery(*two_more, hasher));
+      }
+      if (batch_success) {
+        cur = batch_begin;
+        while (cur != batch_end) {
+          ASSERT_TRUE(soln.FilterQuery(*cur, hasher));
+          ASSERT_TRUE(!test_interleaved || isoln.FilterQuery(*cur, hasher));
+          ++cur;
+        }
+      }
+
+      // Check FP rate (depends only on number of result bits == solution
+      // columns)
+      Index fp_count = 0;
+      cur = other_keys_begin;
+      {
+        ROCKSDB_NAMESPACE::StopWatchNano timer(
+            ROCKSDB_NAMESPACE::SystemClock::Default().get(), true);
+        while (cur != other_keys_end) {
+          bool fp = soln.FilterQuery(*cur, hasher);
+          fp_count += fp ? 1 : 0;
+          ++cur;
+        }
+        soln_query_nanos += timer.ElapsedNanos();
+        soln_query_count += FLAGS_max_check;
+      }
+      {
+        double expected_fp_count = soln.ExpectedFpRate() * FLAGS_max_check;
+        // For expected FP rate, also include false positives due to collisions
+        // in Hash value. (Negligible for 64-bit, can matter for 32-bit.)
+        double correction =
+            FLAGS_max_check * ExpectedCollisionFpRate(hasher, num_to_add);
+
+        // NOTE: rare violations expected with kHomogeneous
+        EXPECT_LE(fp_count,
+                  FrequentPoissonUpperBound(expected_fp_count + correction));
+        EXPECT_GE(fp_count,
+                  FrequentPoissonLowerBound(expected_fp_count + correction));
+      }
+      total_fp_count += fp_count;
+
+      // And also check FP rate for isoln
+      if (test_interleaved) {
+        Index ifp_count = 0;
+        cur = other_keys_begin;
+        ROCKSDB_NAMESPACE::StopWatchNano timer(
+            ROCKSDB_NAMESPACE::SystemClock::Default().get(), true);
+        while (cur != other_keys_end) {
+          ifp_count += isoln.FilterQuery(*cur, hasher) ? 1 : 0;
+          ++cur;
+        }
+        isoln_query_nanos += timer.ElapsedNanos();
+        isoln_query_count += FLAGS_max_check;
+        {
+          double expected_fp_count = isoln.ExpectedFpRate() * FLAGS_max_check;
+          // For expected FP rate, also include false positives due to
+          // collisions in Hash value. (Negligible for 64-bit, can matter for
+          // 32-bit.)
+          double correction =
+              FLAGS_max_check * ExpectedCollisionFpRate(hasher, num_to_add);
+
+          // NOTE: rare violations expected with kHomogeneous
+          EXPECT_LE(ifp_count,
+                    FrequentPoissonUpperBound(expected_fp_count + correction));
+
+          // FIXME: why sometimes can we slightly "beat the odds"?
+          // (0.95 factor should not be needed)
+          EXPECT_GE(ifp_count, FrequentPoissonLowerBound(
+                                   0.95 * expected_fp_count + correction));
+        }
+        // Since the bits used in isoln are a subset of the bits used in soln,
+        // it cannot have fewer FPs
+        EXPECT_GE(ifp_count, fp_count);
+      }
+
+      // And compare to Bloom time, for fun
+      if (ibytes >= /* minimum Bloom impl bytes*/ 64) {
+        Index bfp_count = 0;
+        cur = other_keys_begin;
+        ROCKSDB_NAMESPACE::StopWatchNano timer(
+            ROCKSDB_NAMESPACE::SystemClock::Default().get(), true);
+        while (cur != other_keys_end) {
+          uint64_t h = hasher.GetHash(*cur);
+          uint32_t h1 = ROCKSDB_NAMESPACE::Lower32of64(h);
+          uint32_t h2 = sizeof(Hash) >= 8 ? ROCKSDB_NAMESPACE::Upper32of64(h)
+                                          : h1 * 0x9e3779b9;
+          bfp_count +=
+              ROCKSDB_NAMESPACE::FastLocalBloomImpl::HashMayMatch(
+                  h1, h2, static_cast<uint32_t>(ibytes), 6, idata.get())
+                  ? 1
+                  : 0;
+          ++cur;
+        }
+        bloom_query_nanos += timer.ElapsedNanos();
+        // ensure bfp_count is used
+        ASSERT_LT(bfp_count, FLAGS_max_check);
+      }
+    }
+
+    // "outside" == key not in original set so either negative or false positive
+    fprintf(stderr,
+            "Simple      outside query, hot, incl hashing, ns/key: %g\n",
+            1.0 * soln_query_nanos / soln_query_count);
+    fprintf(stderr,
+            "Interleaved outside query, hot, incl hashing, ns/key: %g\n",
+            1.0 * isoln_query_nanos / isoln_query_count);
+    fprintf(stderr,
+            "Bloom       outside query, hot, incl hashing, ns/key: %g\n",
+            1.0 * bloom_query_nanos / soln_query_count);
+
+    if (TypeParam::kHomogeneous) {
+      EXPECT_EQ(total_reseeds, 0U);
+    } else {
+      double average_reseeds = 1.0 * total_reseeds / FLAGS_thoroughness;
+      fprintf(stderr, "Average re-seeds: %g\n", average_reseeds);
+      // Values above were chosen to target around 50% chance of encoding
+      // success rate (average of 1.0 re-seeds) or slightly better. But 1.15 is
+      // also close enough.
+      EXPECT_LE(total_reseeds,
+                InfrequentPoissonUpperBound(1.15 * expected_reseeds *
+                                            FLAGS_thoroughness));
+      // Would use 0.85 here instead of 0.75, but
+      // TypesAndSettings_Hash32_SmallKeyGen can "beat the odds" because of
+      // sequential keys with a small, cheap hash function. We accept that
+      // there are surely inputs that are somewhat bad for this setup, but
+      // these somewhat good inputs are probably more likely.
+      EXPECT_GE(total_reseeds,
+                InfrequentPoissonLowerBound(0.75 * expected_reseeds *
+                                            FLAGS_thoroughness));
+    }
+
+    if (total_expand_trials > 0) {
+      double average_expand_failures =
+          1.0 * total_expand_failures / total_expand_trials;
+      fprintf(stderr, "Average expand failures, and overhead: %g, %g\n",
+              average_expand_failures,
+              total_expand_overhead / total_expand_trials);
+      // Seems to be a generous allowance
+      EXPECT_LE(total_expand_failures,
+                InfrequentPoissonUpperBound(1.0 * total_expand_trials));
+    } else {
+      fprintf(stderr, "Average expand failures: N/A\n");
+    }
+
+    if (total_singles > 0) {
+      double single_failure_rate = 1.0 * total_single_failures / total_singles;
+      fprintf(stderr, "Add'l single, failure rate: %g\n", single_failure_rate);
+      // A rough bound (one sided) based on nothing in particular
+      double expected_single_failures = 1.0 * total_singles /
+                                        (sizeof(CoeffRow) == 16 ? 128
+                                         : TypeParam::kUseSmash ? 64
+                                                                : 32);
+      EXPECT_LE(total_single_failures,
+                InfrequentPoissonUpperBound(expected_single_failures));
+    }
+
+    if (total_batch > 0) {
+      // Counting successes here for Poisson to approximate the Binomial
+      // distribution.
+      // A rough bound (one sided) based on nothing in particular.
+      double expected_batch_successes = 1.0 * total_batch / 2;
+      uint64_t lower_bound =
+          InfrequentPoissonLowerBound(expected_batch_successes);
+      fprintf(stderr, "Add'l batch, success rate: %g (>= %g)\n",
+              1.0 * total_batch_successes / total_batch,
+              1.0 * lower_bound / total_batch);
+      EXPECT_GE(total_batch_successes, lower_bound);
+    }
+
+    {
+      uint64_t total_checked = uint64_t{FLAGS_max_check} * FLAGS_thoroughness;
+      double expected_total_fp_count =
+          total_checked * std::pow(0.5, 8U * sizeof(ResultRow));
+      // For expected FP rate, also include false positives due to collisions
+      // in Hash value. (Negligible for 64-bit, can matter for 32-bit.)
+      double average_added = 1.0 * total_added / FLAGS_thoroughness;
+      expected_total_fp_count +=
+          total_checked * ExpectedCollisionFpRate(Hasher(), average_added);
+
+      uint64_t upper_bound =
+          InfrequentPoissonUpperBound(expected_total_fp_count);
+      uint64_t lower_bound =
+          InfrequentPoissonLowerBound(expected_total_fp_count);
+      fprintf(stderr, "Average FP rate: %g (~= %g, <= %g, >= %g)\n",
+              1.0 * total_fp_count / total_checked,
+              expected_total_fp_count / total_checked,
+              1.0 * upper_bound / total_checked,
+              1.0 * lower_bound / total_checked);
+      EXPECT_LE(total_fp_count, upper_bound);
+      EXPECT_GE(total_fp_count, lower_bound);
+    }
+  }
+}
+
+TYPED_TEST(RibbonTypeParamTest, Extremes) {
+  IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam);
+  IMPORT_RIBBON_IMPL_TYPES(TypeParam);
+  using KeyGen = typename TypeParam::KeyGen;
+
+  size_t bytes = 128 * 1024;
+  std::unique_ptr<char[]> buf(new char[bytes]);
+  InterleavedSoln isoln(buf.get(), bytes);
+  SimpleSoln soln;
+  Hasher hasher;
+  Banding banding;
+
+  // ########################################
+  // Add zero keys to minimal number of slots
+  KeyGen begin_and_end("foo", 123);
+  ASSERT_TRUE(banding.ResetAndFindSeedToSolve(
+      /*slots*/ kCoeffBits, begin_and_end, begin_and_end, /*first seed*/ 0,
+      /* seed mask*/ 0));
+
+  soln.BackSubstFrom(banding);
+  isoln.BackSubstFrom(banding);
+
+  // Because there's plenty of memory, we expect the interleaved solution to
+  // use maximum supported columns (same as simple solution)
+  ASSERT_EQ(isoln.GetUpperNumColumns(), 8U * sizeof(ResultRow));
+  ASSERT_EQ(isoln.GetUpperStartBlock(), 0U);
+
+  // Somewhat oddly, we expect same FP rate as if we had essentially filled
+  // up the slots.
+  KeyGen other_keys_begin("not", 0);
+  KeyGen other_keys_end("not", FLAGS_max_check);
+
+  Index fp_count = 0;
+  KeyGen cur = other_keys_begin;
+  while (cur != other_keys_end) {
+    bool isoln_query_result = isoln.FilterQuery(*cur, hasher);
+    bool soln_query_result = soln.FilterQuery(*cur, hasher);
+    // Solutions are equivalent
+    ASSERT_EQ(isoln_query_result, soln_query_result);
+    if (!TypeParam::kHomogeneous) {
+      // And in fact we only expect an FP when ResultRow is 0
+      // (except Homogeneous)
+      ASSERT_EQ(soln_query_result, hasher.GetResultRowFromHash(
+                                       hasher.GetHash(*cur)) == ResultRow{0});
+    }
+    fp_count += soln_query_result ? 1 : 0;
+    ++cur;
+  }
+  {
+    ASSERT_EQ(isoln.ExpectedFpRate(), soln.ExpectedFpRate());
+    double expected_fp_count = isoln.ExpectedFpRate() * FLAGS_max_check;
+    EXPECT_LE(fp_count, InfrequentPoissonUpperBound(expected_fp_count));
+    if (TypeParam::kHomogeneous) {
+      // Pseudorandom garbage in Homogeneous filter can "beat the odds" if
+      // nothing added
+    } else {
+      EXPECT_GE(fp_count, InfrequentPoissonLowerBound(expected_fp_count));
+    }
+  }
+
+  // ######################################################
+  // Use zero bytes for interleaved solution (key(s) added)
+
+  // Add one key
+  KeyGen key_begin("added", 0);
+  KeyGen key_end("added", 1);
+  ASSERT_TRUE(banding.ResetAndFindSeedToSolve(
+      /*slots*/ kCoeffBits, key_begin, key_end, /*first seed*/ 0,
+      /* seed mask*/ 0));
+
+  InterleavedSoln isoln2(nullptr, /*bytes*/ 0);
+
+  isoln2.BackSubstFrom(banding);
+
+  ASSERT_EQ(isoln2.GetUpperNumColumns(), 0U);
+  ASSERT_EQ(isoln2.GetUpperStartBlock(), 0U);
+
+  // All queries return true
+  ASSERT_TRUE(isoln2.FilterQuery(*other_keys_begin, hasher));
+  ASSERT_EQ(isoln2.ExpectedFpRate(), 1.0);
+}
+
+TEST(RibbonTest, AllowZeroStarts) {
+  IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings_AllowZeroStarts);
+  IMPORT_RIBBON_IMPL_TYPES(TypesAndSettings_AllowZeroStarts);
+  using KeyGen = StandardKeyGen;
+
+  InterleavedSoln isoln(nullptr, /*bytes*/ 0);
+  SimpleSoln soln;
+  Hasher hasher;
+  Banding banding;
+
+  KeyGen begin("foo", 0);
+  KeyGen end("foo", 1);
+  // Can't add 1 entry
+  ASSERT_FALSE(banding.ResetAndFindSeedToSolve(/*slots*/ 0, begin, end));
+
+  KeyGen begin_and_end("foo", 123);
+  // Can add 0 entries
+  ASSERT_TRUE(banding.ResetAndFindSeedToSolve(/*slots*/ 0, begin_and_end,
+                                              begin_and_end));
+
+  Seed reseeds = banding.GetOrdinalSeed();
+  ASSERT_EQ(reseeds, 0U);
+  hasher.SetOrdinalSeed(reseeds);
+
+  // Can construct 0-slot solutions
+  isoln.BackSubstFrom(banding);
+  soln.BackSubstFrom(banding);
+
+  // Should always return false
+  ASSERT_FALSE(isoln.FilterQuery(*begin, hasher));
+  ASSERT_FALSE(soln.FilterQuery(*begin, hasher));
+
+  // And report that in FP rate
+  ASSERT_EQ(isoln.ExpectedFpRate(), 0.0);
+  ASSERT_EQ(soln.ExpectedFpRate(), 0.0);
+}
+
+TEST(RibbonTest, RawAndOrdinalSeeds) {
+  StandardHasher<TypesAndSettings_Seed64> hasher64;
+  StandardHasher<DefaultTypesAndSettings> hasher64_32;
+  StandardHasher<TypesAndSettings_Hash32> hasher32;
+  StandardHasher<TypesAndSettings_Seed8> hasher8;
+
+  for (uint32_t limit : {0xffU, 0xffffU}) {
+    std::vector<bool> seen(limit + 1);
+    for (uint32_t i = 0; i < limit; ++i) {
+      hasher64.SetOrdinalSeed(i);
+      auto raw64 = hasher64.GetRawSeed();
+      hasher32.SetOrdinalSeed(i);
+      auto raw32 = hasher32.GetRawSeed();
+      hasher8.SetOrdinalSeed(static_cast<uint8_t>(i));
+      auto raw8 = hasher8.GetRawSeed();
+      {
+        hasher64_32.SetOrdinalSeed(i);
+        auto raw64_32 = hasher64_32.GetRawSeed();
+        ASSERT_EQ(raw64_32, raw32);  // Same size seed
+      }
+      if (i == 0) {
+        // Documented that ordinal seed 0 == raw seed 0
+        ASSERT_EQ(raw64, 0U);
+        ASSERT_EQ(raw32, 0U);
+        ASSERT_EQ(raw8, 0U);
+      } else {
+        // Extremely likely that upper bits are set
+        ASSERT_GT(raw64, raw32);
+        ASSERT_GT(raw32, raw8);
+      }
+      // Hashers agree on lower bits
+      ASSERT_EQ(static_cast<uint32_t>(raw64), raw32);
+      ASSERT_EQ(static_cast<uint8_t>(raw32), raw8);
+
+      // The translation is one-to-one for this size prefix
+      uint32_t v = static_cast<uint32_t>(raw32 & limit);
+      ASSERT_EQ(raw64 & limit, v);
+      ASSERT_FALSE(seen[v]);
+      seen[v] = true;
+    }
+  }
+}
+
+namespace {
+
+struct PhsfInputGen {
+  PhsfInputGen(const std::string& prefix, uint64_t id) : id_(id) {
+    val_.first = prefix;
+    ROCKSDB_NAMESPACE::PutFixed64(&val_.first, /*placeholder*/ 0);
+  }
+
+  // Prefix (only one required)
+  PhsfInputGen& operator++() {
+    ++id_;
+    return *this;
+  }
+
+  const std::pair<std::string, uint8_t>& operator*() {
+    // Use multiplication to mix things up a little in the key
+    ROCKSDB_NAMESPACE::EncodeFixed64(&val_.first[val_.first.size() - 8],
+                                     id_ * uint64_t{0x1500000001});
+    // Occasionally repeat values etc.
+    val_.second = static_cast<uint8_t>(id_ * 7 / 8);
+    return val_;
+  }
+
+  const std::pair<std::string, uint8_t>* operator->() { return &**this; }
+
+  bool operator==(const PhsfInputGen& other) {
+    // Same prefix is assumed
+    return id_ == other.id_;
+  }
+  bool operator!=(const PhsfInputGen& other) {
+    // Same prefix is assumed
+    return id_ != other.id_;
+  }
+
+  uint64_t id_;
+  std::pair<std::string, uint8_t> val_;
+};
+
+struct PhsfTypesAndSettings : public DefaultTypesAndSettings {
+  static constexpr bool kIsFilter = false;
+};
+}  // namespace
+
+TEST(RibbonTest, PhsfBasic) {
+  IMPORT_RIBBON_TYPES_AND_SETTINGS(PhsfTypesAndSettings);
+  IMPORT_RIBBON_IMPL_TYPES(PhsfTypesAndSettings);
+
+  Index num_slots = 12800;
+  Index num_to_add = static_cast<Index>(num_slots / 1.02);
+
+  PhsfInputGen begin("in", 0);
+  PhsfInputGen end("in", num_to_add);
+
+  std::unique_ptr<char[]> idata(new char[/*bytes*/ num_slots]);
+  InterleavedSoln isoln(idata.get(), /*bytes*/ num_slots);
+  SimpleSoln soln;
+  Hasher hasher;
+
+  {
+    Banding banding;
+    ASSERT_TRUE(banding.ResetAndFindSeedToSolve(num_slots, begin, end));
+
+    soln.BackSubstFrom(banding);
+    isoln.BackSubstFrom(banding);
+
+    hasher.SetOrdinalSeed(banding.GetOrdinalSeed());
+  }
+
+  for (PhsfInputGen cur = begin; cur != end; ++cur) {
+    ASSERT_EQ(cur->second, soln.PhsfQuery(cur->first, hasher));
+    ASSERT_EQ(cur->second, isoln.PhsfQuery(cur->first, hasher));
+  }
+}
+
+// Not a real test, but a tool used to build APIs in ribbon_config.h
+TYPED_TEST(RibbonTypeParamTest, FindOccupancy) {
+  IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam);
+  IMPORT_RIBBON_IMPL_TYPES(TypeParam);
+  using KeyGen = typename TypeParam::KeyGen;
+
+  if (!FLAGS_find_occ) {
+    ROCKSDB_GTEST_BYPASS("Tool disabled during unit test runs");
+    return;
+  }
+
+  KeyGen cur(std::to_string(testing::UnitTest::GetInstance()->random_seed()),
+             0);
+
+  Banding banding;
+  Index num_slots = InterleavedSoln::RoundUpNumSlots(FLAGS_find_min_slots);
+  Index max_slots = InterleavedSoln::RoundUpNumSlots(FLAGS_find_max_slots);
+  while (num_slots <= max_slots) {
+    std::map<int32_t, uint32_t> rem_histogram;
+    std::map<Index, uint32_t> slot_histogram;
+    if (FLAGS_find_slot_occ) {
+      for (Index i = 0; i < kCoeffBits; ++i) {
+        slot_histogram[i] = 0;
+        slot_histogram[num_slots - 1 - i] = 0;
+        slot_histogram[num_slots / 2 - kCoeffBits / 2 + i] = 0;
+      }
+    }
+    uint64_t total_added = 0;
+    for (uint32_t i = 0; i < FLAGS_find_iters; ++i) {
+      banding.Reset(num_slots);
+      uint32_t j = 0;
+      KeyGen end = cur;
+      end += num_slots + num_slots / 10;
+      for (; cur != end; ++cur) {
+        if (banding.Add(*cur)) {
+          ++j;
+        } else {
+          break;
+        }
+      }
+      total_added += j;
+      for (auto& slot : slot_histogram) {
+        slot.second += banding.IsOccupied(slot.first);
+      }
+
+      int32_t bucket =
+          static_cast<int32_t>(num_slots) - static_cast<int32_t>(j);
+      rem_histogram[bucket]++;
+      if (FLAGS_verbose) {
+        fprintf(stderr, "num_slots: %u i: %u / %u avg_overhead: %g\r",
+                static_cast<unsigned>(num_slots), static_cast<unsigned>(i),
+                static_cast<unsigned>(FLAGS_find_iters),
+                1.0 * (i + 1) * num_slots / total_added);
+      }
+    }
+    if (FLAGS_verbose) {
+      fprintf(stderr, "\n");
+    }
+
+    uint32_t cumulative = 0;
+
+    double p50_rem = 0;
+    double p95_rem = 0;
+    double p99_9_rem = 0;
+
+    for (auto& h : rem_histogram) {
+      double before = 1.0 * cumulative / FLAGS_find_iters;
+      double not_after = 1.0 * (cumulative + h.second) / FLAGS_find_iters;
+      if (FLAGS_verbose) {
+        fprintf(stderr, "overhead: %g before: %g not_after: %g\n",
+                1.0 * num_slots / (num_slots - h.first), before, not_after);
+      }
+      cumulative += h.second;
+      if (before < 0.5 && 0.5 <= not_after) {
+        // fake it with linear interpolation
+        double portion = (0.5 - before) / (not_after - before);
+        p50_rem = h.first + portion;
+      } else if (before < 0.95 && 0.95 <= not_after) {
+        // fake it with linear interpolation
+        double portion = (0.95 - before) / (not_after - before);
+        p95_rem = h.first + portion;
+      } else if (before < 0.999 && 0.999 <= not_after) {
+        // fake it with linear interpolation
+        double portion = (0.999 - before) / (not_after - before);
+        p99_9_rem = h.first + portion;
+      }
+    }
+    for (auto& slot : slot_histogram) {
+      fprintf(stderr, "slot[%u] occupied: %g\n", (unsigned)slot.first,
+              1.0 * slot.second / FLAGS_find_iters);
+    }
+
+    double mean_rem =
+        (1.0 * FLAGS_find_iters * num_slots - total_added) / FLAGS_find_iters;
+    fprintf(
+        stderr,
+        "num_slots: %u iters: %u mean_ovr: %g p50_ovr: %g p95_ovr: %g "
+        "p99.9_ovr: %g mean_rem: %g p50_rem: %g p95_rem: %g p99.9_rem: %g\n",
+        static_cast<unsigned>(num_slots),
+        static_cast<unsigned>(FLAGS_find_iters),
+        1.0 * num_slots / (num_slots - mean_rem),
+        1.0 * num_slots / (num_slots - p50_rem),
+        1.0 * num_slots / (num_slots - p95_rem),
+        1.0 * num_slots / (num_slots - p99_9_rem), mean_rem, p50_rem, p95_rem,
+        p99_9_rem);
+
+    num_slots = std::max(
+        num_slots + 1, static_cast<Index>(num_slots * FLAGS_find_next_factor));
+    num_slots = InterleavedSoln::RoundUpNumSlots(num_slots);
+  }
+}
+
+// Not a real test, but a tool to understand Homogeneous Ribbon
+// behavior (TODO: configuration APIs & tests)
+TYPED_TEST(RibbonTypeParamTest, OptimizeHomogAtScale) {
+  IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam);
+  IMPORT_RIBBON_IMPL_TYPES(TypeParam);
+  using KeyGen = typename TypeParam::KeyGen;
+
+  if (!FLAGS_optimize_homog) {
+    ROCKSDB_GTEST_BYPASS("Tool disabled during unit test runs");
+    return;
+  }
+
+  if (!TypeParam::kHomogeneous) {
+    ROCKSDB_GTEST_BYPASS("Only for Homogeneous Ribbon");
+    return;
+  }
+
+  KeyGen cur(std::to_string(testing::UnitTest::GetInstance()->random_seed()),
+             0);
+
+  Banding banding;
+  Index num_slots = SimpleSoln::RoundUpNumSlots(FLAGS_optimize_homog_slots);
+  banding.Reset(num_slots);
+
+  // This and "band_ovr" is the "allocated overhead", or slots over added.
+  // It does not take into account FP rates.
+  double target_overhead = 1.20;
+  uint32_t num_added = 0;
+
+  do {
+    do {
+      (void)banding.Add(*cur);
+      ++cur;
+      ++num_added;
+    } while (1.0 * num_slots / num_added > target_overhead);
+
+    SimpleSoln soln;
+    soln.BackSubstFrom(banding);
+
+    std::array<uint32_t, 8U * sizeof(ResultRow)> fp_counts_by_cols;
+    fp_counts_by_cols.fill(0U);
+    for (uint32_t i = 0; i < FLAGS_optimize_homog_check; ++i) {
+      ResultRow r = soln.PhsfQuery(*cur, banding);
+      ++cur;
+      for (size_t j = 0; j < fp_counts_by_cols.size(); ++j) {
+        if ((r & 1) == 1) {
+          break;
+        }
+        fp_counts_by_cols[j]++;
+        r /= 2;
+      }
+    }
+    fprintf(stderr, "band_ovr: %g ", 1.0 * num_slots / num_added);
+    for (unsigned j = 0; j < fp_counts_by_cols.size(); ++j) {
+      double inv_fp_rate =
+          1.0 * FLAGS_optimize_homog_check / fp_counts_by_cols[j];
+      double equiv_cols = std::log(inv_fp_rate) * 1.4426950409;
+      // Overhead vs. information-theoretic minimum based on observed
+      // FP rate (subject to sampling error, especially for low FP rates)
+      double actual_overhead =
+          1.0 * (j + 1) * num_slots / (equiv_cols * num_added);
+      fprintf(stderr, "ovr_%u: %g ", j + 1, actual_overhead);
+    }
+    fprintf(stderr, "\n");
+    target_overhead -= FLAGS_optimize_homog_granularity;
+  } while (target_overhead > 1.0);
+}
+
+int main(int argc, char** argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+#ifdef GFLAGS
+  ParseCommandLineFlags(&argc, &argv, true);
+#endif  // GFLAGS
+  return RUN_ALL_TESTS();
+}
-- 
cgit v1.2.3