From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 21 Apr 2024 13:54:28 +0200 Subject: Adding upstream version 18.2.2. Signed-off-by: Daniel Baumann --- src/rocksdb/util/ribbon_alg.h | 1225 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1225 insertions(+) create mode 100644 src/rocksdb/util/ribbon_alg.h (limited to 'src/rocksdb/util/ribbon_alg.h') diff --git a/src/rocksdb/util/ribbon_alg.h b/src/rocksdb/util/ribbon_alg.h new file mode 100644 index 000000000..f9afefc23 --- /dev/null +++ b/src/rocksdb/util/ribbon_alg.h @@ -0,0 +1,1225 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include +#include + +#include "rocksdb/rocksdb_namespace.h" +#include "util/math128.h" + +namespace ROCKSDB_NAMESPACE { + +namespace ribbon { + +// RIBBON PHSF & RIBBON Filter (Rapid Incremental Boolean Banding ON-the-fly) +// +// ribbon_alg.h: generic versions of core algorithms. +// +// Ribbon is a Perfect Hash Static Function construction useful as a compact +// static Bloom filter alternative. It combines (a) a boolean (GF(2)) linear +// system construction that approximates a Band Matrix with hashing, +// (b) an incremental, on-the-fly Gaussian Elimination algorithm that is +// remarkably efficient and adaptable at constructing an upper-triangular +// band matrix from a set of band-approximating inputs from (a), and +// (c) a storage layout that is fast and adaptable as a filter. +// +// Footnotes: (a) "Efficient Gauss Elimination for Near-Quadratic Matrices +// with One Short Random Block per Row, with Applications" by Stefan +// Walzer and Martin Dietzfelbinger ("DW paper") +// (b) developed by Peter C. Dillinger, though not the first on-the-fly +// GE algorithm. See "On the fly Gaussian Elimination for LT codes" by +// Bioglio, Grangetto, Gaeta, and Sereno. +// (c) see "interleaved" solution storage below. +// +// See ribbon_impl.h for high-level behavioral summary. This file focuses +// on the core design details. +// +// ###################################################################### +// ################# PHSF -> static filter reduction #################### +// +// A Perfect Hash Static Function is a data structure representing a +// map from anything hashable (a "key") to values of some fixed size. +// Crucially, it is allowed to return garbage values for anything not in +// the original set of map keys, and it is a "static" structure: entries +// cannot be added or deleted after construction. PHSFs representing n +// mappings to b-bit values (assume uniformly distributed) require at least +// n * b bits to represent, or at least b bits per entry. We typically +// describe the compactness of a PHSF by typical bits per entry as some +// function of b. For example, the MWHC construction (k=3 "peeling") +// requires about 1.0222*b and a variant called Xor+ requires about +// 1.08*b + 0.5 bits per entry. +// +// With more hashing, a PHSF can over-approximate a set as a Bloom filter +// does, with no FN queries and predictable false positive (FP) query +// rate. Instead of the user providing a value to map each input key to, +// a hash function provides the value. Keys in the original set will +// return a positive membership query because the underlying PHSF returns +// the same value as hashing the key. When a key is not in the original set, +// the PHSF returns a "garbage" value, which is only equal to the key's +// hash with (false positive) probability 1 in 2^b. +// +// For a matching false positive rate, standard Bloom filters require +// 1.44*b bits per entry. Cache-local Bloom filters (like bloom_impl.h) +// require a bit more, around 1.5*b bits per entry. Thus, a Bloom +// alternative could save up to or nearly 1/3rd of memory and storage +// that RocksDB uses for SST (static) Bloom filters. (Memtable Bloom filter +// is dynamic.) +// +// Recommended reading: +// "Xor Filters: Faster and Smaller Than Bloom and Cuckoo Filters" +// by Graf and Lemire +// First three sections of "Fast Scalable Construction of (Minimal +// Perfect Hash) Functions" by Genuzio, Ottaviano, and Vigna +// +// ###################################################################### +// ################## PHSF vs. hash table vs. Bloom ##################### +// +// You can think of traditional hash tables and related filter variants +// such as Cuckoo filters as utilizing an "OR" construction: a hash +// function associates a key with some slots and the data is returned if +// the data is found in any one of those slots. The collision resolution +// is visible in the final data structure and requires extra information. +// For example, Cuckoo filter uses roughly 1.05b + 2 bits per entry, and +// Golomb-Rice code (aka "GCS") as little as b + 1.5. When the data +// structure associates each input key with data in one slot, the +// structure implicitly constructs a (near-)minimal (near-)perfect hash +// (MPH) of the keys, which requires at least 1.44 bits per key to +// represent. This is why approaches with visible collision resolution +// have a fixed + 1.5 or more in storage overhead per entry, often in +// addition to an overhead multiplier on b. +// +// By contrast Bloom filters utilize an "AND" construction: a query only +// returns true if all bit positions associated with a key are set to 1. +// There is no collision resolution, so Bloom filters do not suffer a +// fixed bits per entry overhead like the above structures. +// +// PHSFs typically use a bitwise XOR construction: the data you want is +// not in a single slot, but in a linear combination of several slots. +// For static data, this gives the best of "AND" and "OR" constructions: +// avoids the +1.44 or more fixed overhead by not approximating a MPH and +// can do much better than Bloom's 1.44 factor on b with collision +// resolution, which here is done ahead of time and invisible at query +// time. +// +// ###################################################################### +// ######################## PHSF construction ########################### +// +// For a typical PHSF, construction is solving a linear system of +// equations, typically in GF(2), which is to say that values are boolean +// and XOR serves both as addition and subtraction. We can use matrices to +// represent the problem: +// +// C * S = R +// (n x m) (m x b) (n x b) +// where C = coefficients, S = solution, R = results +// and solving for S given C and R. +// +// Note that C and R each have n rows, one for each input entry for the +// PHSF. A row in C is given by a hash function on the PHSF input key, +// and the corresponding row in R is the b-bit value to associate with +// that input key. (In a filter, rows of R are given by another hash +// function on the input key.) +// +// On solving, the matrix S (solution) is the final PHSF data, as it +// maps any row from the original C to its corresponding desired result +// in R. We just have to hash our query inputs and compute a linear +// combination of rows in S. +// +// In theory, we could chose m = n and let a hash function associate +// each input key with random rows in C. A solution exists with high +// probability, and uses essentially minimum space, b bits per entry +// (because we set m = n) but this has terrible scaling, something +// like O(n^2) space and O(n^3) time during construction (Gaussian +// elimination) and O(n) query time. But computational efficiency is +// key, and the core of this is avoiding scanning all of S to answer +// each query. +// +// The traditional approach (MWHC, aka Xor filter) starts with setting +// only some small fixed number of columns (typically k=3) to 1 for each +// row of C, with remaining entries implicitly 0. This is implemented as +// three hash functions over [0,m), and S can be implemented as a vector +// of b-bit values. Now, a query only involves looking up k rows +// (values) in S and computing their bitwise XOR. Additionally, this +// construction can use a linear time algorithm called "peeling" for +// finding a solution in many cases of one existing, but peeling +// generally requires a larger space overhead factor in the solution +// (m/n) than is required with Gaussian elimination. +// +// Recommended reading: +// "Peeling Close to the Orientability Threshold - Spatial Coupling in +// Hashing-Based Data Structures" by Stefan Walzer +// +// ###################################################################### +// ##################### Ribbon PHSF construction ####################### +// +// Ribbon constructs coefficient rows essentially the same as in the +// Walzer/Dietzfelbinger paper cited above: for some chosen fixed width +// r (kCoeffBits in code), each key is hashed to a starting column in +// [0, m - r] (GetStart() in code) and an r-bit sequence of boolean +// coefficients (GetCoeffRow() in code). If you sort the rows by start, +// the C matrix would look something like this: +// +// [####00000000000000000000] +// [####00000000000000000000] +// [000####00000000000000000] +// [0000####0000000000000000] +// [0000000####0000000000000] +// [000000000####00000000000] +// [000000000####00000000000] +// [0000000000000####0000000] +// [0000000000000000####0000] +// [00000000000000000####000] +// [00000000000000000000####] +// +// where each # could be a 0 or 1, chosen uniformly by a hash function. +// (Except we typically set the start column value to 1.) This scheme +// uses hashing to approximate a band matrix, and it has a solution iff +// it reduces to an upper-triangular boolean r-band matrix, like this: +// +// [1###00000000000000000000] +// [01##00000000000000000000] +// [000000000000000000000000] +// [0001###00000000000000000] +// [000000000000000000000000] +// [000001##0000000000000000] +// [000000000000000000000000] +// [00000001###0000000000000] +// [000000001###000000000000] +// [0000000001##000000000000] +// ... +// [00000000000000000000001#] +// [000000000000000000000001] +// +// where we have expanded to an m x m matrix by filling with rows of +// all zeros as needed. As in Gaussian elimination, this form is ready for +// generating a solution through back-substitution. +// +// The awesome thing about the Ribbon construction (from the DW paper) is +// how row reductions keep each row representable as a start column and +// r coefficients, because row reductions are only needed when two rows +// have the same number of leading zero columns. Thus, the combination +// of those rows, the bitwise XOR of the r-bit coefficient rows, cancels +// out the leading 1s, so starts (at least) one column later and only +// needs (at most) r - 1 coefficients. +// +// ###################################################################### +// ###################### Ribbon PHSF scalability ####################### +// +// Although more practical detail is in ribbon_impl.h, it's worth +// understanding some of the overall benefits and limitations of the +// Ribbon PHSFs. +// +// High-end scalability is a primary issue for Ribbon PHSFs, because in +// a single Ribbon linear system with fixed r and fixed m/n ratio, the +// solution probability approaches zero as n approaches infinity. +// For a given n, solution probability improves with larger r and larger +// m/n. +// +// By contrast, peeling-based PHSFs have somewhat worse storage ratio +// or solution probability for small n (less than ~1000). This is +// especially true with spatial-coupling, where benefits are only +// notable for n on the order of 100k or 1m or more. +// +// To make best use of current hardware, r=128 seems to be closest to +// a "generally good" choice for Ribbon, at least in RocksDB where SST +// Bloom filters typically hold around 10-100k keys, and almost always +// less than 10m keys. r=128 ribbon has a high chance of encoding success +// (with first hash seed) when storage overhead is around 5% (m/n ~ 1.05) +// for roughly 10k - 10m keys in a single linear system. r=64 only scales +// up to about 10k keys with the same storage overhead. Construction and +// access times for r=128 are similar to r=64. r=128 tracks nearly +// twice as much data during construction, but in most cases we expect +// the scalability benefits of r=128 vs. r=64 to make it preferred. +// +// A natural approach to scaling Ribbon beyond ~10m keys is splitting +// (or "sharding") the inputs into multiple linear systems with their +// own hash seeds. This can also help to control peak memory consumption. +// TODO: much more to come +// +// ###################################################################### +// #################### Ribbon on-the-fly banding ####################### +// +// "Banding" is what we call the process of reducing the inputs to an +// upper-triangular r-band matrix ready for finishing a solution with +// back-substitution. Although the DW paper presents an algorithm for +// this ("SGauss"), the awesome properties of their construction enable +// an even simpler, faster, and more backtrackable algorithm. In simplest +// terms, the SGauss algorithm requires sorting the inputs by start +// columns, but it's possible to make Gaussian elimination resemble hash +// table insertion! +// +// The enhanced algorithm is based on these observations: +// - When processing a coefficient row with first 1 in column j, +// - If it's the first at column j to be processed, it can be part of +// the banding at row j. (And that decision never overwritten, with +// no loss of generality!) +// - Else, it can be combined with existing row j and re-processed, +// which will look for a later "empty" row or reach "no solution". +// +// We call our banding algorithm "incremental" and "on-the-fly" because +// (like hash table insertion) we are "finished" after each input +// processed, with respect to all inputs processed so far. Although the +// band matrix is an intermediate step to the solution structure, we have +// eliminated intermediate steps and unnecessary data tracking for +// banding. +// +// Building on "incremental" and "on-the-fly", the banding algorithm is +// easily backtrackable because no (non-empty) rows are overwritten in +// the banding. Thus, if we want to "try" adding an additional set of +// inputs to the banding, we only have to record which rows were written +// in order to efficiently backtrack to our state before considering +// the additional set. (TODO: how this can mitigate scalability and +// reach sub-1% overheads) +// +// Like in a linear-probed hash table, as the occupancy approaches and +// surpasses 90-95%, collision resolution dominates the construction +// time. (Ribbon doesn't usually pay at query time; see solution +// storage below.) This means that we can speed up construction time +// by using a higher m/n ratio, up to negative returns around 1.2. +// At m/n ~= 1.2, which still saves memory substantially vs. Bloom +// filter's 1.5, construction speed (including back-substitution) is not +// far from sorting speed, but still a few times slower than cache-local +// Bloom construction speed. +// +// Back-substitution from an upper-triangular boolean band matrix is +// especially fast and easy. All the memory accesses are sequential or at +// least local, no random. If the number of result bits (b) is a +// compile-time constant, the back-substitution state can even be tracked +// in CPU registers. Regardless of the solution representation, we prefer +// column-major representation for tracking back-substitution state, as +// r (the band width) will typically be much larger than b (result bits +// or columns), so better to handle r-bit values b times (per solution +// row) than b-bit values r times. +// +// ###################################################################### +// ##################### Ribbon solution storage ######################## +// +// Row-major layout is typical for boolean (bit) matrices, including for +// MWHC (Xor) filters where a query combines k b-bit values, and k is +// typically smaller than b. Even for k=4 and b=2, at least k=4 random +// look-ups are required regardless of layout. +// +// Ribbon PHSFs are quite different, however, because +// (a) all of the solution rows relevant to a query are within a single +// range of r rows, and +// (b) the number of solution rows involved (r/2 on average, or r if +// avoiding conditional accesses) is typically much greater than +// b, the number of solution columns. +// +// Row-major for Ribbon PHSFs therefore tends to incur undue CPU overhead +// by processing (up to) r entries of b bits each, where b is typically +// less than 10 for filter applications. +// +// Column-major layout has poor locality because of accessing up to b +// memory locations in different pages (and obviously cache lines). Note +// that negative filter queries do not typically need to access all +// solution columns, as they can return when a mismatch is found in any +// result/solution column. This optimization doesn't always pay off on +// recent hardware, where the penalty for unpredictable conditional +// branching can exceed the penalty for unnecessary work, but the +// optimization is essentially unavailable with row-major layout. +// +// The best compromise seems to be interleaving column-major on the small +// scale with row-major on the large scale. For example, let a solution +// "block" be r rows column-major encoded as b r-bit values in sequence. +// Each query accesses (up to) 2 adjacent blocks, which will typically +// span 1-3 cache lines in adjacent memory. We get very close to the same +// locality as row-major, but with much faster reconstruction of each +// result column, at least for filter applications where b is relatively +// small and negative queries can return early. +// +// ###################################################################### +// ###################### Fractional result bits ######################## +// +// Bloom filters have great flexibility that alternatives mostly do not +// have. One of those flexibilities is in utilizing any ratio of data +// structure bits per key. With a typical memory allocator like jemalloc, +// this flexibility can save roughly 10% of the filters' footprint in +// DRAM by rounding up and down filter sizes to minimize memory internal +// fragmentation (see optimize_filters_for_memory RocksDB option). +// +// At first glance, PHSFs only offer a whole number of bits per "slot" +// (m rather than number of keys n), but coefficient locality in the +// Ribbon construction makes fractional bits/key quite possible and +// attractive for filter applications. This works by a prefix of the +// structure using b-1 solution columns and the rest using b solution +// columns. See InterleavedSolutionStorage below for more detail. +// +// Because false positive rates are non-linear in bits/key, this approach +// is not quite optimal in terms of information theory. In common cases, +// we see additional space overhead up to about 1.5% vs. theoretical +// optimal to achieve the same FP rate. We consider this a quite acceptable +// overhead for very efficiently utilizing space that might otherwise be +// wasted. +// +// This property of Ribbon even makes it "elastic." A Ribbon filter and +// its small metadata for answering queries can be adapted into another +// Ribbon filter filling any smaller multiple of r bits (plus small +// metadata), with a correspondingly higher FP rate. None of the data +// thrown away during construction needs to be recalled for this reduction. +// Similarly a single Ribbon construction can be separated (by solution +// column) into two or more structures (or "layers" or "levels") with +// independent filtering ability (no FP correlation, just as solution or +// result columns in a single structure) despite being constructed as part +// of a single linear system. (TODO: implement) +// See also "ElasticBF: Fine-grained and Elastic Bloom Filter Towards +// Efficient Read for LSM-tree-based KV Stores." +// + +// ###################################################################### +// ################### CODE: Ribbon core algorithms ##################### +// ###################################################################### +// +// These algorithms are templatized for genericity but near-maximum +// performance in a given application. The template parameters +// adhere to informal class/struct type concepts outlined below. (This +// code is written for C++11 so does not use formal C++ concepts.) + +// Rough architecture for these algorithms: +// +// +-----------+ +---+ +-----------------+ +// | AddInputs | --> | H | --> | BandingStorage | +// +-----------+ | a | +-----------------+ +// | s | | +// | h | Back substitution +// | e | V +// +-----------+ | r | +-----------------+ +// | Query Key | --> | | >+< | SolutionStorage | +// +-----------+ +---+ | +-----------------+ +// V +// Query result + +// Common to other concepts +// concept RibbonTypes { +// // An unsigned integer type for an r-bit subsequence of coefficients. +// // r (or kCoeffBits) is taken to be sizeof(CoeffRow) * 8, as it would +// // generally only hurt scalability to leave bits of CoeffRow unused. +// typename CoeffRow; +// // An unsigned integer type big enough to hold a result row (b bits, +// // or number of solution/result columns). +// // In many applications, especially filters, the number of result +// // columns is decided at run time, so ResultRow simply needs to be +// // big enough for the largest number of columns allowed. +// typename ResultRow; +// // An unsigned integer type sufficient for representing the number of +// // rows in the solution structure, and at least the arithmetic +// // promotion size (usually 32 bits). uint32_t recommended because a +// // single Ribbon construction doesn't really scale to billions of +// // entries. +// typename Index; +// }; + +// ###################################################################### +// ######################## Hashers and Banding ######################### + +// Hasher concepts abstract out hashing details. + +// concept PhsfQueryHasher extends RibbonTypes { +// // Type for a lookup key, which is hashable. +// typename Key; +// +// // Type for hashed summary of a Key. uint64_t is recommended. +// typename Hash; +// +// // Compute a hash value summarizing a Key +// Hash GetHash(const Key &) const; +// +// // Given a hash value and a number of columns that can start an +// // r-sequence of coefficients (== m - r + 1), return the start +// // column to associate with that hash value. (Starts can be chosen +// // uniformly or "smash" extra entries into the beginning and end for +// // better utilization at those extremes of the structure. Details in +// // ribbon.impl.h) +// Index GetStart(Hash, Index num_starts) const; +// +// // Given a hash value, return the r-bit sequence of coefficients to +// // associate with it. It's generally OK if +// // sizeof(CoeffRow) > sizeof(Hash) +// // as long as the hash itself is not too prone to collisions for the +// // applications and the CoeffRow is generated uniformly from +// // available hash data, but relatively independent of the start. +// // +// // Must be non-zero, because that's required for a solution to exist +// // when mapping to non-zero result row. (Note: BandingAdd could be +// // modified to allow 0 coeff row if that only occurs with 0 result +// // row, which really only makes sense for filter implementation, +// // where both values are hash-derived. Or BandingAdd could reject 0 +// // coeff row, forcing next seed, but that has potential problems with +// // generality/scalability.) +// CoeffRow GetCoeffRow(Hash) const; +// }; + +// concept FilterQueryHasher extends PhsfQueryHasher { +// // For building or querying a filter, this returns the expected +// // result row associated with a hashed input. For general PHSF, +// // this must return 0. +// // +// // Although not strictly required, there's a slightly better chance of +// // solver success if result row is masked down here to only the bits +// // actually needed. +// ResultRow GetResultRowFromHash(Hash) const; +// } + +// concept BandingHasher extends FilterQueryHasher { +// // For a filter, this will generally be the same as Key. +// // For a general PHSF, it must either +// // (a) include a key and a result it maps to (e.g. in a std::pair), or +// // (b) GetResultRowFromInput looks up the result somewhere rather than +// // extracting it. +// typename AddInput; +// +// // Instead of requiring a way to extract a Key from an +// // AddInput, we require getting the hash of the Key part +// // of an AddInput, which is trivial if AddInput == Key. +// Hash GetHash(const AddInput &) const; +// +// // For building a non-filter PHSF, this extracts or looks up the result +// // row to associate with an input. For filter PHSF, this must return 0. +// ResultRow GetResultRowFromInput(const AddInput &) const; +// +// // Whether the solver can assume the lowest bit of GetCoeffRow is +// // always 1. When true, it should improve solver efficiency slightly. +// static bool kFirstCoeffAlwaysOne; +// } + +// Abstract storage for the the result of "banding" the inputs (Gaussian +// elimination to an upper-triangular boolean band matrix). Because the +// banding is an incremental / on-the-fly algorithm, this also represents +// all the intermediate state between input entries. +// +// concept BandingStorage extends RibbonTypes { +// // Tells the banding algorithm to prefetch memory associated with +// // the next input before processing the current input. Generally +// // recommended iff the BandingStorage doesn't easily fit in CPU +// // cache. +// bool UsePrefetch() const; +// +// // Prefetches (e.g. __builtin_prefetch) memory associated with a +// // slot index i. +// void Prefetch(Index i) const; +// +// // Load or store CoeffRow and ResultRow for slot index i. +// // (Gaussian row operations involve both sides of the equation.) +// // Bool `for_back_subst` indicates that customizing values for +// // unconstrained solution rows (cr == 0) is allowed. +// void LoadRow(Index i, CoeffRow *cr, ResultRow *rr, bool for_back_subst) +// const; +// void StoreRow(Index i, CoeffRow cr, ResultRow rr); +// +// // Returns the number of columns that can start an r-sequence of +// // coefficients, which is the number of slots minus r (kCoeffBits) +// // plus one. (m - r + 1) +// Index GetNumStarts() const; +// }; + +// Optional storage for backtracking data in banding a set of input +// entries. It exposes an array structure which will generally be +// used as a stack. It must be able to accommodate as many entries +// as are passed in as inputs to `BandingAddRange`. +// +// concept BacktrackStorage extends RibbonTypes { +// // If false, backtracking support will be disabled in the algorithm. +// // This should preferably be an inline compile-time constant function. +// bool UseBacktrack() const; +// +// // Records `to_save` as the `i`th backtrack entry +// void BacktrackPut(Index i, Index to_save); +// +// // Recalls the `i`th backtrack entry +// Index BacktrackGet(Index i) const; +// } + +// Adds a single entry to BandingStorage (and optionally, BacktrackStorage), +// returning true if successful or false if solution is impossible with +// current hasher (and presumably its seed) and number of "slots" (solution +// or banding rows). (A solution is impossible when there is a linear +// dependence among the inputs that doesn't "cancel out".) +// +// Pre- and post-condition: the BandingStorage represents a band matrix +// ready for back substitution (row echelon form except for zero rows), +// augmented with result values such that back substitution would give a +// solution satisfying all the cr@start -> rr entries added. +template +bool BandingAdd(BandingStorage *bs, typename BandingStorage::Index start, + typename BandingStorage::ResultRow rr, + typename BandingStorage::CoeffRow cr, BacktrackStorage *bts, + typename BandingStorage::Index *backtrack_pos) { + using CoeffRow = typename BandingStorage::CoeffRow; + using ResultRow = typename BandingStorage::ResultRow; + using Index = typename BandingStorage::Index; + + Index i = start; + + if (!kFirstCoeffAlwaysOne) { + // Requires/asserts that cr != 0 + int tz = CountTrailingZeroBits(cr); + i += static_cast(tz); + cr >>= tz; + } + + for (;;) { + assert((cr & 1) == 1); + CoeffRow cr_at_i; + ResultRow rr_at_i; + bs->LoadRow(i, &cr_at_i, &rr_at_i, /* for_back_subst */ false); + if (cr_at_i == 0) { + bs->StoreRow(i, cr, rr); + bts->BacktrackPut(*backtrack_pos, i); + ++*backtrack_pos; + return true; + } + assert((cr_at_i & 1) == 1); + // Gaussian row reduction + cr ^= cr_at_i; + rr ^= rr_at_i; + if (cr == 0) { + // Inconsistency or (less likely) redundancy + break; + } + // Find relative offset of next non-zero coefficient. + int tz = CountTrailingZeroBits(cr); + i += static_cast(tz); + cr >>= tz; + } + + // Failed, unless result row == 0 because e.g. a duplicate input or a + // stock hash collision, with same result row. (For filter, stock hash + // collision implies same result row.) Or we could have a full equation + // equal to sum of other equations, which is very possible with + // small range of values for result row. + return rr == 0; +} + +// Adds a range of entries to BandingStorage returning true if successful +// or false if solution is impossible with current hasher (and presumably +// its seed) and number of "slots" (solution or banding rows). (A solution +// is impossible when there is a linear dependence among the inputs that +// doesn't "cancel out".) Here "InputIterator" is an iterator over AddInputs. +// +// If UseBacktrack in the BacktrackStorage, this function call rolls back +// to prior state on failure. If !UseBacktrack, some subset of the entries +// will have been added to the BandingStorage, so best considered to be in +// an indeterminate state. +// +template +bool BandingAddRange(BandingStorage *bs, BacktrackStorage *bts, + const BandingHasher &bh, InputIterator begin, + InputIterator end) { + using CoeffRow = typename BandingStorage::CoeffRow; + using Index = typename BandingStorage::Index; + using ResultRow = typename BandingStorage::ResultRow; + using Hash = typename BandingHasher::Hash; + + static_assert(IsUnsignedUpTo128::value, "must be unsigned"); + static_assert(IsUnsignedUpTo128::value, "must be unsigned"); + static_assert(IsUnsignedUpTo128::value, "must be unsigned"); + + constexpr bool kFCA1 = BandingHasher::kFirstCoeffAlwaysOne; + + if (begin == end) { + // trivial + return true; + } + + const Index num_starts = bs->GetNumStarts(); + + InputIterator cur = begin; + Index backtrack_pos = 0; + if (!bs->UsePrefetch()) { + // Simple version, no prefetch + for (;;) { + Hash h = bh.GetHash(*cur); + Index start = bh.GetStart(h, num_starts); + ResultRow rr = + bh.GetResultRowFromInput(*cur) | bh.GetResultRowFromHash(h); + CoeffRow cr = bh.GetCoeffRow(h); + + if (!BandingAdd(bs, start, rr, cr, bts, &backtrack_pos)) { + break; + } + if ((++cur) == end) { + return true; + } + } + } else { + // Pipelined w/prefetch + // Prime the pipeline + Hash h = bh.GetHash(*cur); + Index start = bh.GetStart(h, num_starts); + ResultRow rr = bh.GetResultRowFromInput(*cur); + bs->Prefetch(start); + + // Pipeline + for (;;) { + rr |= bh.GetResultRowFromHash(h); + CoeffRow cr = bh.GetCoeffRow(h); + if ((++cur) == end) { + if (!BandingAdd(bs, start, rr, cr, bts, &backtrack_pos)) { + break; + } + return true; + } + Hash next_h = bh.GetHash(*cur); + Index next_start = bh.GetStart(next_h, num_starts); + ResultRow next_rr = bh.GetResultRowFromInput(*cur); + bs->Prefetch(next_start); + if (!BandingAdd(bs, start, rr, cr, bts, &backtrack_pos)) { + break; + } + h = next_h; + start = next_start; + rr = next_rr; + } + } + // failed; backtrack (if implemented) + if (bts->UseBacktrack()) { + while (backtrack_pos > 0) { + --backtrack_pos; + Index i = bts->BacktrackGet(backtrack_pos); + // Clearing the ResultRow is not strictly required, but is required + // for good FP rate on inputs that might have been backtracked out. + // (We don't want anything we've backtracked on to leak into final + // result, as that might not be "harmless".) + bs->StoreRow(i, 0, 0); + } + } + return false; +} + +// Adds a range of entries to BandingStorage returning true if successful +// or false if solution is impossible with current hasher (and presumably +// its seed) and number of "slots" (solution or banding rows). (A solution +// is impossible when there is a linear dependence among the inputs that +// doesn't "cancel out".) Here "InputIterator" is an iterator over AddInputs. +// +// On failure, some subset of the entries will have been added to the +// BandingStorage, so best considered to be in an indeterminate state. +// +template +bool BandingAddRange(BandingStorage *bs, const BandingHasher &bh, + InputIterator begin, InputIterator end) { + using Index = typename BandingStorage::Index; + struct NoopBacktrackStorage { + bool UseBacktrack() { return false; } + void BacktrackPut(Index, Index) {} + Index BacktrackGet(Index) { + assert(false); + return 0; + } + } nbts; + return BandingAddRange(bs, &nbts, bh, begin, end); +} + +// ###################################################################### +// ######################### Solution Storage ########################### + +// Back-substitution and query algorithms unfortunately depend on some +// details of data layout in the final data structure ("solution"). Thus, +// there is no common SolutionStorage covering all the reasonable +// possibilities. + +// ###################### SimpleSolutionStorage ######################### + +// SimpleSolutionStorage is for a row-major storage, typically with no +// unused bits in each ResultRow. This is mostly for demonstration +// purposes as the simplest solution storage scheme. It is relatively slow +// for filter queries. + +// concept SimpleSolutionStorage extends RibbonTypes { +// // This is called at the beginning of back-substitution for the +// // solution storage to do any remaining configuration before data +// // is stored to it. If configuration is previously finalized, this +// // could be a simple assertion or even no-op. Ribbon algorithms +// // only call this from back-substitution, and only once per call, +// // before other functions here. +// void PrepareForNumStarts(Index num_starts) const; +// // Must return num_starts passed to PrepareForNumStarts, or the most +// // recent call to PrepareForNumStarts if this storage object can be +// // reused. Note that num_starts == num_slots - kCoeffBits + 1 because +// // there must be a run of kCoeffBits slots starting from each start. +// Index GetNumStarts() const; +// // Load the solution row (type ResultRow) for a slot +// ResultRow Load(Index slot_num) const; +// // Store the solution row (type ResultRow) for a slot +// void Store(Index slot_num, ResultRow data); +// }; + +// Back-substitution for generating a solution from BandingStorage to +// SimpleSolutionStorage. +template +void SimpleBackSubst(SimpleSolutionStorage *sss, const BandingStorage &bs) { + using CoeffRow = typename BandingStorage::CoeffRow; + using Index = typename BandingStorage::Index; + using ResultRow = typename BandingStorage::ResultRow; + + static_assert(sizeof(Index) == sizeof(typename SimpleSolutionStorage::Index), + "must be same"); + static_assert( + sizeof(CoeffRow) == sizeof(typename SimpleSolutionStorage::CoeffRow), + "must be same"); + static_assert( + sizeof(ResultRow) == sizeof(typename SimpleSolutionStorage::ResultRow), + "must be same"); + + constexpr auto kCoeffBits = static_cast(sizeof(CoeffRow) * 8U); + constexpr auto kResultBits = static_cast(sizeof(ResultRow) * 8U); + + // A column-major buffer of the solution matrix, containing enough + // recently-computed solution data to compute the next solution row + // (based also on banding data). + std::array state; + state.fill(0); + + const Index num_starts = bs.GetNumStarts(); + sss->PrepareForNumStarts(num_starts); + const Index num_slots = num_starts + kCoeffBits - 1; + + for (Index i = num_slots; i > 0;) { + --i; + CoeffRow cr; + ResultRow rr; + bs.LoadRow(i, &cr, &rr, /* for_back_subst */ true); + // solution row + ResultRow sr = 0; + for (Index j = 0; j < kResultBits; ++j) { + // Compute next solution bit at row i, column j (see derivation below) + CoeffRow tmp = state[j] << 1; + bool bit = (BitParity(tmp & cr) ^ ((rr >> j) & 1)) != 0; + tmp |= bit ? CoeffRow{1} : CoeffRow{0}; + + // Now tmp is solution at column j from row i for next kCoeffBits + // more rows. Thus, for valid solution, the dot product of the + // solution column with the coefficient row has to equal the result + // at that column, + // BitParity(tmp & cr) == ((rr >> j) & 1) + + // Update state. + state[j] = tmp; + // add to solution row + sr |= (bit ? ResultRow{1} : ResultRow{0}) << j; + } + sss->Store(i, sr); + } +} + +// Common functionality for querying a key (already hashed) in +// SimpleSolutionStorage. +template +typename SimpleSolutionStorage::ResultRow SimpleQueryHelper( + typename SimpleSolutionStorage::Index start_slot, + typename SimpleSolutionStorage::CoeffRow cr, + const SimpleSolutionStorage &sss) { + using CoeffRow = typename SimpleSolutionStorage::CoeffRow; + using ResultRow = typename SimpleSolutionStorage::ResultRow; + + constexpr unsigned kCoeffBits = static_cast(sizeof(CoeffRow) * 8U); + + ResultRow result = 0; + for (unsigned i = 0; i < kCoeffBits; ++i) { + // Bit masking whole value is generally faster here than 'if' + result ^= sss.Load(start_slot + i) & + (ResultRow{0} - (static_cast(cr >> i) & ResultRow{1})); + } + return result; +} + +// General PHSF query a key from SimpleSolutionStorage. +template +typename SimpleSolutionStorage::ResultRow SimplePhsfQuery( + const typename PhsfQueryHasher::Key &key, const PhsfQueryHasher &hasher, + const SimpleSolutionStorage &sss) { + const typename PhsfQueryHasher::Hash hash = hasher.GetHash(key); + + static_assert(sizeof(typename SimpleSolutionStorage::Index) == + sizeof(typename PhsfQueryHasher::Index), + "must be same"); + static_assert(sizeof(typename SimpleSolutionStorage::CoeffRow) == + sizeof(typename PhsfQueryHasher::CoeffRow), + "must be same"); + + return SimpleQueryHelper(hasher.GetStart(hash, sss.GetNumStarts()), + hasher.GetCoeffRow(hash), sss); +} + +// Filter query a key from SimpleSolutionStorage. +template +bool SimpleFilterQuery(const typename FilterQueryHasher::Key &key, + const FilterQueryHasher &hasher, + const SimpleSolutionStorage &sss) { + const typename FilterQueryHasher::Hash hash = hasher.GetHash(key); + const typename SimpleSolutionStorage::ResultRow expected = + hasher.GetResultRowFromHash(hash); + + static_assert(sizeof(typename SimpleSolutionStorage::Index) == + sizeof(typename FilterQueryHasher::Index), + "must be same"); + static_assert(sizeof(typename SimpleSolutionStorage::CoeffRow) == + sizeof(typename FilterQueryHasher::CoeffRow), + "must be same"); + static_assert(sizeof(typename SimpleSolutionStorage::ResultRow) == + sizeof(typename FilterQueryHasher::ResultRow), + "must be same"); + + return expected == + SimpleQueryHelper(hasher.GetStart(hash, sss.GetNumStarts()), + hasher.GetCoeffRow(hash), sss); +} + +// #################### InterleavedSolutionStorage ###################### + +// InterleavedSolutionStorage is row-major at a high level, for good +// locality, and column-major at a low level, for CPU efficiency +// especially in filter queries or relatively small number of result bits +// (== solution columns). The storage is a sequence of "blocks" where a +// block has one CoeffRow-sized segment for each solution column. Each +// query spans at most two blocks; the starting solution row is typically +// in the row-logical middle of a block and spans to the middle of the +// next block. (See diagram below.) +// +// InterleavedSolutionStorage supports choosing b (number of result or +// solution columns) at run time, and even supports mixing b and b-1 solution +// columns in a single linear system solution, for filters that can +// effectively utilize any size space (multiple of CoeffRow) for minimizing +// FP rate for any number of added keys. To simplify query implementation +// (with lower-index columns first), the b-bit portion comes after the b-1 +// portion of the structure. +// +// Diagram (=== marks logical block boundary; b=4; ### is data used by a +// query crossing the b-1 to b boundary, each Segment has type CoeffRow): +// ... +// +======================+ +// | S e g m e n t col=0 | +// +----------------------+ +// | S e g m e n t col=1 | +// +----------------------+ +// | S e g m e n t col=2 | +// +======================+ +// | S e g m e n #########| +// +----------------------+ +// | S e g m e n #########| +// +----------------------+ +// | S e g m e n #########| +// +======================+ Result/solution columns: above = 3, below = 4 +// |#############t col=0 | +// +----------------------+ +// |#############t col=1 | +// +----------------------+ +// |#############t col=2 | +// +----------------------+ +// | S e g m e n t col=3 | +// +======================+ +// | S e g m e n t col=0 | +// +----------------------+ +// | S e g m e n t col=1 | +// +----------------------+ +// | S e g m e n t col=2 | +// +----------------------+ +// | S e g m e n t col=3 | +// +======================+ +// ... +// +// InterleavedSolutionStorage will be adapted by the algorithms from +// simple array-like segment storage. That array-like storage is templatized +// in part so that an implementation may choose to handle byte ordering +// at access time. +// +// concept InterleavedSolutionStorage extends RibbonTypes { +// // This is called at the beginning of back-substitution for the +// // solution storage to do any remaining configuration before data +// // is stored to it. If configuration is previously finalized, this +// // could be a simple assertion or even no-op. Ribbon algorithms +// // only call this from back-substitution, and only once per call, +// // before other functions here. +// void PrepareForNumStarts(Index num_starts) const; +// // Must return num_starts passed to PrepareForNumStarts, or the most +// // recent call to PrepareForNumStarts if this storage object can be +// // reused. Note that num_starts == num_slots - kCoeffBits + 1 because +// // there must be a run of kCoeffBits slots starting from each start. +// Index GetNumStarts() const; +// // The larger number of solution columns used (called "b" above). +// Index GetUpperNumColumns() const; +// // If returns > 0, then block numbers below that use +// // GetUpperNumColumns() - 1 columns per solution row, and the rest +// // use GetUpperNumColumns(). A block represents kCoeffBits "slots", +// // where all but the last kCoeffBits - 1 slots are also starts. And +// // a block contains a segment for each solution column. +// // An implementation may only support uniform columns per solution +// // row and return constant 0 here. +// Index GetUpperStartBlock() const; +// +// // ### "Array of segments" portion of API ### +// // The number of values of type CoeffRow used in this solution +// // representation. (This value can be inferred from the previous +// // three functions, but is expected at least for sanity / assertion +// // checking.) +// Index GetNumSegments() const; +// // Load an entry from the logical array of segments +// CoeffRow LoadSegment(Index segment_num) const; +// // Store an entry to the logical array of segments +// void StoreSegment(Index segment_num, CoeffRow data); +// }; + +// A helper for InterleavedBackSubst. +template +inline void BackSubstBlock(typename BandingStorage::CoeffRow *state, + typename BandingStorage::Index num_columns, + const BandingStorage &bs, + typename BandingStorage::Index start_slot) { + using CoeffRow = typename BandingStorage::CoeffRow; + using Index = typename BandingStorage::Index; + using ResultRow = typename BandingStorage::ResultRow; + + constexpr auto kCoeffBits = static_cast(sizeof(CoeffRow) * 8U); + + for (Index i = start_slot + kCoeffBits; i > start_slot;) { + --i; + CoeffRow cr; + ResultRow rr; + bs.LoadRow(i, &cr, &rr, /* for_back_subst */ true); + for (Index j = 0; j < num_columns; ++j) { + // Compute next solution bit at row i, column j (see derivation below) + CoeffRow tmp = state[j] << 1; + int bit = BitParity(tmp & cr) ^ ((rr >> j) & 1); + tmp |= static_cast(bit); + + // Now tmp is solution at column j from row i for next kCoeffBits + // more rows. Thus, for valid solution, the dot product of the + // solution column with the coefficient row has to equal the result + // at that column, + // BitParity(tmp & cr) == ((rr >> j) & 1) + + // Update state. + state[j] = tmp; + } + } +} + +// Back-substitution for generating a solution from BandingStorage to +// InterleavedSolutionStorage. +template +void InterleavedBackSubst(InterleavedSolutionStorage *iss, + const BandingStorage &bs) { + using CoeffRow = typename BandingStorage::CoeffRow; + using Index = typename BandingStorage::Index; + + static_assert( + sizeof(Index) == sizeof(typename InterleavedSolutionStorage::Index), + "must be same"); + static_assert( + sizeof(CoeffRow) == sizeof(typename InterleavedSolutionStorage::CoeffRow), + "must be same"); + + constexpr auto kCoeffBits = static_cast(sizeof(CoeffRow) * 8U); + + const Index num_starts = bs.GetNumStarts(); + // Although it might be nice to have a filter that returns "always false" + // when no key is added, we aren't specifically supporting that here + // because it would require another condition branch in the query. + assert(num_starts > 0); + iss->PrepareForNumStarts(num_starts); + + const Index num_slots = num_starts + kCoeffBits - 1; + assert(num_slots % kCoeffBits == 0); + const Index num_blocks = num_slots / kCoeffBits; + const Index num_segments = iss->GetNumSegments(); + + // For now upper, then lower + Index num_columns = iss->GetUpperNumColumns(); + const Index upper_start_block = iss->GetUpperStartBlock(); + + if (num_columns == 0) { + // Nothing to do, presumably because there's not enough space for even + // a single segment. + assert(num_segments == 0); + // When num_columns == 0, a Ribbon filter query will always return true, + // or a PHSF query always 0. + return; + } + + // We should be utilizing all available segments + assert(num_segments == (upper_start_block * (num_columns - 1)) + + ((num_blocks - upper_start_block) * num_columns)); + + // TODO: consider fixed-column specializations with stack-allocated state + + // A column-major buffer of the solution matrix, containing enough + // recently-computed solution data to compute the next solution row + // (based also on banding data). + std::unique_ptr state{new CoeffRow[num_columns]()}; + + Index block = num_blocks; + Index segment_num = num_segments; + while (block > upper_start_block) { + --block; + BackSubstBlock(state.get(), num_columns, bs, block * kCoeffBits); + segment_num -= num_columns; + for (Index i = 0; i < num_columns; ++i) { + iss->StoreSegment(segment_num + i, state[i]); + } + } + // Now (if applicable), region using lower number of columns + // (This should be optimized away if GetUpperStartBlock() returns + // constant 0.) + --num_columns; + while (block > 0) { + --block; + BackSubstBlock(state.get(), num_columns, bs, block * kCoeffBits); + segment_num -= num_columns; + for (Index i = 0; i < num_columns; ++i) { + iss->StoreSegment(segment_num + i, state[i]); + } + } + // Verify everything processed + assert(block == 0); + assert(segment_num == 0); +} + +// Prefetch memory for a key in InterleavedSolutionStorage. +template +inline void InterleavedPrepareQuery( + const typename PhsfQueryHasher::Key &key, const PhsfQueryHasher &hasher, + const InterleavedSolutionStorage &iss, + typename PhsfQueryHasher::Hash *saved_hash, + typename InterleavedSolutionStorage::Index *saved_segment_num, + typename InterleavedSolutionStorage::Index *saved_num_columns, + typename InterleavedSolutionStorage::Index *saved_start_bit) { + using Hash = typename PhsfQueryHasher::Hash; + using CoeffRow = typename InterleavedSolutionStorage::CoeffRow; + using Index = typename InterleavedSolutionStorage::Index; + + static_assert(sizeof(Index) == sizeof(typename PhsfQueryHasher::Index), + "must be same"); + + const Hash hash = hasher.GetHash(key); + const Index start_slot = hasher.GetStart(hash, iss.GetNumStarts()); + + constexpr auto kCoeffBits = static_cast(sizeof(CoeffRow) * 8U); + + const Index upper_start_block = iss.GetUpperStartBlock(); + Index num_columns = iss.GetUpperNumColumns(); + Index start_block_num = start_slot / kCoeffBits; + Index segment_num = start_block_num * num_columns - + std::min(start_block_num, upper_start_block); + // Change to lower num columns if applicable. + // (This should not compile to a conditional branch.) + num_columns -= (start_block_num < upper_start_block) ? 1 : 0; + + Index start_bit = start_slot % kCoeffBits; + + Index segment_count = num_columns + (start_bit == 0 ? 0 : num_columns); + + iss.PrefetchSegmentRange(segment_num, segment_num + segment_count); + + *saved_hash = hash; + *saved_segment_num = segment_num; + *saved_num_columns = num_columns; + *saved_start_bit = start_bit; +} + +// General PHSF query from InterleavedSolutionStorage, using data for +// the query key from InterleavedPrepareQuery +template +inline typename InterleavedSolutionStorage::ResultRow InterleavedPhsfQuery( + typename PhsfQueryHasher::Hash hash, + typename InterleavedSolutionStorage::Index segment_num, + typename InterleavedSolutionStorage::Index num_columns, + typename InterleavedSolutionStorage::Index start_bit, + const PhsfQueryHasher &hasher, const InterleavedSolutionStorage &iss) { + using CoeffRow = typename InterleavedSolutionStorage::CoeffRow; + using Index = typename InterleavedSolutionStorage::Index; + using ResultRow = typename InterleavedSolutionStorage::ResultRow; + + static_assert(sizeof(Index) == sizeof(typename PhsfQueryHasher::Index), + "must be same"); + static_assert(sizeof(CoeffRow) == sizeof(typename PhsfQueryHasher::CoeffRow), + "must be same"); + + constexpr auto kCoeffBits = static_cast(sizeof(CoeffRow) * 8U); + + const CoeffRow cr = hasher.GetCoeffRow(hash); + + ResultRow sr = 0; + const CoeffRow cr_left = cr << static_cast(start_bit); + for (Index i = 0; i < num_columns; ++i) { + sr ^= BitParity(iss.LoadSegment(segment_num + i) & cr_left) << i; + } + + if (start_bit > 0) { + segment_num += num_columns; + const CoeffRow cr_right = + cr >> static_cast(kCoeffBits - start_bit); + for (Index i = 0; i < num_columns; ++i) { + sr ^= BitParity(iss.LoadSegment(segment_num + i) & cr_right) << i; + } + } + + return sr; +} + +// Filter query a key from InterleavedFilterQuery. +template +inline bool InterleavedFilterQuery( + typename FilterQueryHasher::Hash hash, + typename InterleavedSolutionStorage::Index segment_num, + typename InterleavedSolutionStorage::Index num_columns, + typename InterleavedSolutionStorage::Index start_bit, + const FilterQueryHasher &hasher, const InterleavedSolutionStorage &iss) { + using CoeffRow = typename InterleavedSolutionStorage::CoeffRow; + using Index = typename InterleavedSolutionStorage::Index; + using ResultRow = typename InterleavedSolutionStorage::ResultRow; + + static_assert(sizeof(Index) == sizeof(typename FilterQueryHasher::Index), + "must be same"); + static_assert( + sizeof(CoeffRow) == sizeof(typename FilterQueryHasher::CoeffRow), + "must be same"); + static_assert( + sizeof(ResultRow) == sizeof(typename FilterQueryHasher::ResultRow), + "must be same"); + + constexpr auto kCoeffBits = static_cast(sizeof(CoeffRow) * 8U); + + const CoeffRow cr = hasher.GetCoeffRow(hash); + const ResultRow expected = hasher.GetResultRowFromHash(hash); + + // TODO: consider optimizations such as + // * get rid of start_bit == 0 condition with careful fetching & shifting + if (start_bit == 0) { + for (Index i = 0; i < num_columns; ++i) { + if (BitParity(iss.LoadSegment(segment_num + i) & cr) != + (static_cast(expected >> i) & 1)) { + return false; + } + } + } else { + const CoeffRow cr_left = cr << static_cast(start_bit); + const CoeffRow cr_right = + cr >> static_cast(kCoeffBits - start_bit); + + for (Index i = 0; i < num_columns; ++i) { + CoeffRow soln_data = + (iss.LoadSegment(segment_num + i) & cr_left) ^ + (iss.LoadSegment(segment_num + num_columns + i) & cr_right); + if (BitParity(soln_data) != (static_cast(expected >> i) & 1)) { + return false; + } + } + } + // otherwise, all match + return true; +} + +// TODO: refactor Interleaved*Query so that queries can be "prepared" by +// prefetching memory, to hide memory latency for multiple queries in a +// single thread. + +} // namespace ribbon + +} // namespace ROCKSDB_NAMESPACE -- cgit v1.2.3