summaryrefslogtreecommitdiffstats
path: root/third_party/rust/rust_cascade
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/rust_cascade')
-rw-r--r--third_party/rust/rust_cascade/.cargo-checksum.json1
-rw-r--r--third_party/rust/rust_cascade/Cargo.toml34
-rw-r--r--third_party/rust/rust_cascade/README.md12
-rw-r--r--third_party/rust/rust_cascade/license.txt373
-rw-r--r--third_party/rust/rust_cascade/src/lib.rs1129
-rw-r--r--third_party/rust/rust_cascade/test_data/make-sample-data.py106
-rw-r--r--third_party/rust/rust_cascade/test_data/requirements.txt1
-rw-r--r--third_party/rust/rust_cascade/test_data/test_v1_murmur_mlbfbin0 -> 15244 bytes
-rw-r--r--third_party/rust/rust_cascade/test_data/test_v1_murmur_short_mlbfbin0 -> 1024 bytes
-rw-r--r--third_party/rust/rust_cascade/test_data/test_v2_murmur_inverted_mlbfbin0 -> 830 bytes
-rw-r--r--third_party/rust/rust_cascade/test_data/test_v2_murmur_mlbfbin0 -> 786 bytes
-rw-r--r--third_party/rust/rust_cascade/test_data/test_v2_sha256ctr_salt_mlbfbin0 -> 842 bytes
-rw-r--r--third_party/rust/rust_cascade/test_data/test_v2_sha256l32_inverted_mlbfbin0 -> 833 bytes
-rw-r--r--third_party/rust/rust_cascade/test_data/test_v2_sha256l32_mlbfbin0 -> 795 bytes
-rw-r--r--third_party/rust/rust_cascade/test_data/test_v2_sha256l32_salt_mlbfbin0 -> 795 bytes
15 files changed, 1656 insertions, 0 deletions
diff --git a/third_party/rust/rust_cascade/.cargo-checksum.json b/third_party/rust/rust_cascade/.cargo-checksum.json
new file mode 100644
index 0000000000..963fd724ce
--- /dev/null
+++ b/third_party/rust/rust_cascade/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"Cargo.toml":"c523c5156c5d1fe20facdf880a0bab82235bb36a0e60e89c04cfa8fcd4c8ed90","README.md":"a4396d1adf63a77ae9aa0d1d850d02d09eec4a92810a52d675163688f312b3e8","license.txt":"1f256ecad192880510e84ad60474eab7589218784b9a50bc7ceee34c2b91f1d5","src/lib.rs":"30f2fc8a98641d6382e0bf9990b62959d445eeb7f2418f04352109a85eaee555","test_data/make-sample-data.py":"7b9f3efda7d1043eaa32619d9bdc4904db5563b0132815a035211037ab1e028a","test_data/requirements.txt":"cb9372b33ed2774e0d5040459fd63a2f9abae2be599869be43a2a077b2c08aa3","test_data/test_v1_murmur_mlbf":"243df0b7f2f55bfe3cefbba2d4be5eb7957c0a063559c9f284ca4c1ee4211eb5","test_data/test_v1_murmur_short_mlbf":"3d4f03dc0a65cf5800efed6ac0b3c73e5b61e5d62bc82ac42744abc67f4c30fa","test_data/test_v2_murmur_inverted_mlbf":"8f72bc1ca79194026fb2f7335a21f8c61636278a91291122148ad8f1aa8917a2","test_data/test_v2_murmur_mlbf":"83dce93d1147b38ca94548ff52552688caf2ece8388b2b5ea3fff1cb67e1396e","test_data/test_v2_sha256ctr_salt_mlbf":"31970e184563c31f39cd52410c6de1e49924d175df472af441a5aacb016240c3","test_data/test_v2_sha256l32_inverted_mlbf":"96399e30463a761a3f5ae0d0e1d57738b05f231cf6d5c2fe1db6bd9d33fab992","test_data/test_v2_sha256l32_mlbf":"0bd6630ef9a900861af419b496657648ab84b8c134a6fc2484d8e1cf11820ec1","test_data/test_v2_sha256l32_salt_mlbf":"bed34a636baca454f37f66d8314d7891722c8a8b029a0e4e0cf1f6ba176ee2c8"},"package":"ef248456c30c6607f1eb1e5d11025367b3340e235314dd33d2b31b41b35ac335"} \ No newline at end of file
diff --git a/third_party/rust/rust_cascade/Cargo.toml b/third_party/rust/rust_cascade/Cargo.toml
new file mode 100644
index 0000000000..a33da9b4c1
--- /dev/null
+++ b/third_party/rust/rust_cascade/Cargo.toml
@@ -0,0 +1,34 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+name = "rust_cascade"
+version = "1.4.0"
+authors = ["Mark Goodwin <mgoodwin@mozilla.com>", "Dana Keeler <dkeeler@mozilla.com>", "J.C. Jones <jc@mozilla.com>", "John Schanck <jschanck@mozilla.com>"]
+description = "A simple bloom filter cascade implementation in Rust."
+homepage = "https://github.com/mozilla/rust-cascade"
+documentation = "https://docs.rs/rust_cascade/"
+license = "MPL-2.0"
+repository = "https://github.com/mozilla/rust-cascade"
+[dependencies.byteorder]
+version = "1.3.1"
+
+[dependencies.murmurhash3]
+version = "0.0.5"
+
+[dependencies.rand]
+version = "0.7.3"
+
+[dependencies.sha2]
+version = "0.10.2"
+
+[features]
+builder = []
diff --git a/third_party/rust/rust_cascade/README.md b/third_party/rust/rust_cascade/README.md
new file mode 100644
index 0000000000..206bff9267
--- /dev/null
+++ b/third_party/rust/rust_cascade/README.md
@@ -0,0 +1,12 @@
+# rust-cascade
+A Bloom filter cascade implementation in rust. This can utilize one of two hash
+functions:
+
+* MurmurHash32, or
+* SHA256, with an optional salt
+
+This implementation is designed to match up with the Python [filter-cascade
+project](https://pypi.org/project/filtercascade/)
+[[github](https://github.com/mozilla/filter-cascade)]
+
+See tests in src/lib.rs to get an idea of usage.
diff --git a/third_party/rust/rust_cascade/license.txt b/third_party/rust/rust_cascade/license.txt
new file mode 100644
index 0000000000..a612ad9813
--- /dev/null
+++ b/third_party/rust/rust_cascade/license.txt
@@ -0,0 +1,373 @@
+Mozilla Public License Version 2.0
+==================================
+
+1. Definitions
+--------------
+
+1.1. "Contributor"
+ means each individual or legal entity that creates, contributes to
+ the creation of, or owns Covered Software.
+
+1.2. "Contributor Version"
+ means the combination of the Contributions of others (if any) used
+ by a Contributor and that particular Contributor's Contribution.
+
+1.3. "Contribution"
+ means Covered Software of a particular Contributor.
+
+1.4. "Covered Software"
+ means Source Code Form to which the initial Contributor has attached
+ the notice in Exhibit A, the Executable Form of such Source Code
+ Form, and Modifications of such Source Code Form, in each case
+ including portions thereof.
+
+1.5. "Incompatible With Secondary Licenses"
+ means
+
+ (a) that the initial Contributor has attached the notice described
+ in Exhibit B to the Covered Software; or
+
+ (b) that the Covered Software was made available under the terms of
+ version 1.1 or earlier of the License, but not also under the
+ terms of a Secondary License.
+
+1.6. "Executable Form"
+ means any form of the work other than Source Code Form.
+
+1.7. "Larger Work"
+ means a work that combines Covered Software with other material, in
+ a separate file or files, that is not Covered Software.
+
+1.8. "License"
+ means this document.
+
+1.9. "Licensable"
+ means having the right to grant, to the maximum extent possible,
+ whether at the time of the initial grant or subsequently, any and
+ all of the rights conveyed by this License.
+
+1.10. "Modifications"
+ means any of the following:
+
+ (a) any file in Source Code Form that results from an addition to,
+ deletion from, or modification of the contents of Covered
+ Software; or
+
+ (b) any new file in Source Code Form that contains any Covered
+ Software.
+
+1.11. "Patent Claims" of a Contributor
+ means any patent claim(s), including without limitation, method,
+ process, and apparatus claims, in any patent Licensable by such
+ Contributor that would be infringed, but for the grant of the
+ License, by the making, using, selling, offering for sale, having
+ made, import, or transfer of either its Contributions or its
+ Contributor Version.
+
+1.12. "Secondary License"
+ means either the GNU General Public License, Version 2.0, the GNU
+ Lesser General Public License, Version 2.1, the GNU Affero General
+ Public License, Version 3.0, or any later versions of those
+ licenses.
+
+1.13. "Source Code Form"
+ means the form of the work preferred for making modifications.
+
+1.14. "You" (or "Your")
+ means an individual or a legal entity exercising rights under this
+ License. For legal entities, "You" includes any entity that
+ controls, is controlled by, or is under common control with You. For
+ purposes of this definition, "control" means (a) the power, direct
+ or indirect, to cause the direction or management of such entity,
+ whether by contract or otherwise, or (b) ownership of more than
+ fifty percent (50%) of the outstanding shares or beneficial
+ ownership of such entity.
+
+2. License Grants and Conditions
+--------------------------------
+
+2.1. Grants
+
+Each Contributor hereby grants You a world-wide, royalty-free,
+non-exclusive license:
+
+(a) under intellectual property rights (other than patent or trademark)
+ Licensable by such Contributor to use, reproduce, make available,
+ modify, display, perform, distribute, and otherwise exploit its
+ Contributions, either on an unmodified basis, with Modifications, or
+ as part of a Larger Work; and
+
+(b) under Patent Claims of such Contributor to make, use, sell, offer
+ for sale, have made, import, and otherwise transfer either its
+ Contributions or its Contributor Version.
+
+2.2. Effective Date
+
+The licenses granted in Section 2.1 with respect to any Contribution
+become effective for each Contribution on the date the Contributor first
+distributes such Contribution.
+
+2.3. Limitations on Grant Scope
+
+The licenses granted in this Section 2 are the only rights granted under
+this License. No additional rights or licenses will be implied from the
+distribution or licensing of Covered Software under this License.
+Notwithstanding Section 2.1(b) above, no patent license is granted by a
+Contributor:
+
+(a) for any code that a Contributor has removed from Covered Software;
+ or
+
+(b) for infringements caused by: (i) Your and any other third party's
+ modifications of Covered Software, or (ii) the combination of its
+ Contributions with other software (except as part of its Contributor
+ Version); or
+
+(c) under Patent Claims infringed by Covered Software in the absence of
+ its Contributions.
+
+This License does not grant any rights in the trademarks, service marks,
+or logos of any Contributor (except as may be necessary to comply with
+the notice requirements in Section 3.4).
+
+2.4. Subsequent Licenses
+
+No Contributor makes additional grants as a result of Your choice to
+distribute the Covered Software under a subsequent version of this
+License (see Section 10.2) or under the terms of a Secondary License (if
+permitted under the terms of Section 3.3).
+
+2.5. Representation
+
+Each Contributor represents that the Contributor believes its
+Contributions are its original creation(s) or it has sufficient rights
+to grant the rights to its Contributions conveyed by this License.
+
+2.6. Fair Use
+
+This License is not intended to limit any rights You have under
+applicable copyright doctrines of fair use, fair dealing, or other
+equivalents.
+
+2.7. Conditions
+
+Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
+in Section 2.1.
+
+3. Responsibilities
+-------------------
+
+3.1. Distribution of Source Form
+
+All distribution of Covered Software in Source Code Form, including any
+Modifications that You create or to which You contribute, must be under
+the terms of this License. You must inform recipients that the Source
+Code Form of the Covered Software is governed by the terms of this
+License, and how they can obtain a copy of this License. You may not
+attempt to alter or restrict the recipients' rights in the Source Code
+Form.
+
+3.2. Distribution of Executable Form
+
+If You distribute Covered Software in Executable Form then:
+
+(a) such Covered Software must also be made available in Source Code
+ Form, as described in Section 3.1, and You must inform recipients of
+ the Executable Form how they can obtain a copy of such Source Code
+ Form by reasonable means in a timely manner, at a charge no more
+ than the cost of distribution to the recipient; and
+
+(b) You may distribute such Executable Form under the terms of this
+ License, or sublicense it under different terms, provided that the
+ license for the Executable Form does not attempt to limit or alter
+ the recipients' rights in the Source Code Form under this License.
+
+3.3. Distribution of a Larger Work
+
+You may create and distribute a Larger Work under terms of Your choice,
+provided that You also comply with the requirements of this License for
+the Covered Software. If the Larger Work is a combination of Covered
+Software with a work governed by one or more Secondary Licenses, and the
+Covered Software is not Incompatible With Secondary Licenses, this
+License permits You to additionally distribute such Covered Software
+under the terms of such Secondary License(s), so that the recipient of
+the Larger Work may, at their option, further distribute the Covered
+Software under the terms of either this License or such Secondary
+License(s).
+
+3.4. Notices
+
+You may not remove or alter the substance of any license notices
+(including copyright notices, patent notices, disclaimers of warranty,
+or limitations of liability) contained within the Source Code Form of
+the Covered Software, except that You may alter any license notices to
+the extent required to remedy known factual inaccuracies.
+
+3.5. Application of Additional Terms
+
+You may choose to offer, and to charge a fee for, warranty, support,
+indemnity or liability obligations to one or more recipients of Covered
+Software. However, You may do so only on Your own behalf, and not on
+behalf of any Contributor. You must make it absolutely clear that any
+such warranty, support, indemnity, or liability obligation is offered by
+You alone, and You hereby agree to indemnify every Contributor for any
+liability incurred by such Contributor as a result of warranty, support,
+indemnity or liability terms You offer. You may include additional
+disclaimers of warranty and limitations of liability specific to any
+jurisdiction.
+
+4. Inability to Comply Due to Statute or Regulation
+---------------------------------------------------
+
+If it is impossible for You to comply with any of the terms of this
+License with respect to some or all of the Covered Software due to
+statute, judicial order, or regulation then You must: (a) comply with
+the terms of this License to the maximum extent possible; and (b)
+describe the limitations and the code they affect. Such description must
+be placed in a text file included with all distributions of the Covered
+Software under this License. Except to the extent prohibited by statute
+or regulation, such description must be sufficiently detailed for a
+recipient of ordinary skill to be able to understand it.
+
+5. Termination
+--------------
+
+5.1. The rights granted under this License will terminate automatically
+if You fail to comply with any of its terms. However, if You become
+compliant, then the rights granted under this License from a particular
+Contributor are reinstated (a) provisionally, unless and until such
+Contributor explicitly and finally terminates Your grants, and (b) on an
+ongoing basis, if such Contributor fails to notify You of the
+non-compliance by some reasonable means prior to 60 days after You have
+come back into compliance. Moreover, Your grants from a particular
+Contributor are reinstated on an ongoing basis if such Contributor
+notifies You of the non-compliance by some reasonable means, this is the
+first time You have received notice of non-compliance with this License
+from such Contributor, and You become compliant prior to 30 days after
+Your receipt of the notice.
+
+5.2. If You initiate litigation against any entity by asserting a patent
+infringement claim (excluding declaratory judgment actions,
+counter-claims, and cross-claims) alleging that a Contributor Version
+directly or indirectly infringes any patent, then the rights granted to
+You by any and all Contributors for the Covered Software under Section
+2.1 of this License shall terminate.
+
+5.3. In the event of termination under Sections 5.1 or 5.2 above, all
+end user license agreements (excluding distributors and resellers) which
+have been validly granted by You or Your distributors under this License
+prior to termination shall survive termination.
+
+************************************************************************
+* *
+* 6. Disclaimer of Warranty *
+* ------------------------- *
+* *
+* Covered Software is provided under this License on an "as is" *
+* basis, without warranty of any kind, either expressed, implied, or *
+* statutory, including, without limitation, warranties that the *
+* Covered Software is free of defects, merchantable, fit for a *
+* particular purpose or non-infringing. The entire risk as to the *
+* quality and performance of the Covered Software is with You. *
+* Should any Covered Software prove defective in any respect, You *
+* (not any Contributor) assume the cost of any necessary servicing, *
+* repair, or correction. This disclaimer of warranty constitutes an *
+* essential part of this License. No use of any Covered Software is *
+* authorized under this License except under this disclaimer. *
+* *
+************************************************************************
+
+************************************************************************
+* *
+* 7. Limitation of Liability *
+* -------------------------- *
+* *
+* Under no circumstances and under no legal theory, whether tort *
+* (including negligence), contract, or otherwise, shall any *
+* Contributor, or anyone who distributes Covered Software as *
+* permitted above, be liable to You for any direct, indirect, *
+* special, incidental, or consequential damages of any character *
+* including, without limitation, damages for lost profits, loss of *
+* goodwill, work stoppage, computer failure or malfunction, or any *
+* and all other commercial damages or losses, even if such party *
+* shall have been informed of the possibility of such damages. This *
+* limitation of liability shall not apply to liability for death or *
+* personal injury resulting from such party's negligence to the *
+* extent applicable law prohibits such limitation. Some *
+* jurisdictions do not allow the exclusion or limitation of *
+* incidental or consequential damages, so this exclusion and *
+* limitation may not apply to You. *
+* *
+************************************************************************
+
+8. Litigation
+-------------
+
+Any litigation relating to this License may be brought only in the
+courts of a jurisdiction where the defendant maintains its principal
+place of business and such litigation shall be governed by laws of that
+jurisdiction, without reference to its conflict-of-law provisions.
+Nothing in this Section shall prevent a party's ability to bring
+cross-claims or counter-claims.
+
+9. Miscellaneous
+----------------
+
+This License represents the complete agreement concerning the subject
+matter hereof. If any provision of this License is held to be
+unenforceable, such provision shall be reformed only to the extent
+necessary to make it enforceable. Any law or regulation which provides
+that the language of a contract shall be construed against the drafter
+shall not be used to construe this License against a Contributor.
+
+10. Versions of the License
+---------------------------
+
+10.1. New Versions
+
+Mozilla Foundation is the license steward. Except as provided in Section
+10.3, no one other than the license steward has the right to modify or
+publish new versions of this License. Each version will be given a
+distinguishing version number.
+
+10.2. Effect of New Versions
+
+You may distribute the Covered Software under the terms of the version
+of the License under which You originally received the Covered Software,
+or under the terms of any subsequent version published by the license
+steward.
+
+10.3. Modified Versions
+
+If you create software not governed by this License, and you want to
+create a new license for such software, you may create and use a
+modified version of this License if you rename the license and remove
+any references to the name of the license steward (except to note that
+such modified license differs from this License).
+
+10.4. Distributing Source Code Form that is Incompatible With Secondary
+Licenses
+
+If You choose to distribute Source Code Form that is Incompatible With
+Secondary Licenses under the terms of this version of the License, the
+notice described in Exhibit B of this License must be attached.
+
+Exhibit A - Source Code Form License Notice
+-------------------------------------------
+
+ This Source Code Form is subject to the terms of the Mozilla Public
+ License, v. 2.0. If a copy of the MPL was not distributed with this
+ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+If it is not possible or desirable to put the notice in a particular
+file, then You may include the notice in a location (such as a LICENSE
+file in a relevant directory) where a recipient would be likely to look
+for such a notice.
+
+You may add additional accurate notices of copyright ownership.
+
+Exhibit B - "Incompatible With Secondary Licenses" Notice
+---------------------------------------------------------
+
+ This Source Code Form is "Incompatible With Secondary Licenses", as
+ defined by the Mozilla Public License, v. 2.0.
diff --git a/third_party/rust/rust_cascade/src/lib.rs b/third_party/rust/rust_cascade/src/lib.rs
new file mode 100644
index 0000000000..eef8e1f97d
--- /dev/null
+++ b/third_party/rust/rust_cascade/src/lib.rs
@@ -0,0 +1,1129 @@
+//! # rust-cascade
+//!
+//! A library for creating and querying the cascading bloom filters described by
+//! Larisch, Choffnes, Levin, Maggs, Mislove, and Wilson in
+//! "CRLite: A Scalable System for Pushing All TLS Revocations to All Browsers"
+//! <https://www.ieee-security.org/TC/SP2017/papers/567.pdf>
+
+extern crate byteorder;
+extern crate murmurhash3;
+extern crate rand;
+extern crate sha2;
+
+use byteorder::{ByteOrder, LittleEndian, ReadBytesExt};
+use murmurhash3::murmurhash3_x86_32;
+#[cfg(feature = "builder")]
+use rand::rngs::OsRng;
+#[cfg(feature = "builder")]
+use rand::RngCore;
+use sha2::{Digest, Sha256};
+use std::convert::{TryFrom, TryInto};
+use std::fmt;
+use std::io::{ErrorKind, Read};
+use std::mem::size_of;
+
+#[derive(Debug)]
+pub enum CascadeError {
+ LongSalt,
+ TooManyLayers,
+ Collision,
+ UnknownHashFunction,
+ CapacityViolation(&'static str),
+ Parse(&'static str),
+}
+
+impl fmt::Display for CascadeError {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match *self {
+ CascadeError::LongSalt => {
+ write!(f, "Cannot serialize a filter with a salt of length >= 256.")
+ }
+ CascadeError::TooManyLayers => {
+ write!(f, "Cannot serialize a filter with >= 255 layers.")
+ }
+ CascadeError::Collision => {
+ write!(f, "Collision between included and excluded sets.")
+ }
+ CascadeError::UnknownHashFunction => {
+ write!(f, "Unknown hash function.")
+ }
+ CascadeError::CapacityViolation(function) => {
+ write!(f, "Unexpected call to {}", function)
+ }
+ CascadeError::Parse(reason) => {
+ write!(f, "Cannot parse cascade: {}", reason)
+ }
+ }
+ }
+}
+
+/// A Bloom filter representing a specific layer in a multi-layer cascading Bloom filter.
+/// The same hash function is used for all layers, so it is not encoded here.
+struct Bloom {
+ /// How many hash functions this filter uses
+ n_hash_funcs: u32,
+ /// The bit length of the filter
+ size: u32,
+ /// The data of the filter
+ data: Vec<u8>,
+}
+
+#[repr(u8)]
+#[derive(Copy, Clone, PartialEq)]
+/// These enumerations need to match the python filter-cascade project:
+/// <https://github.com/mozilla/filter-cascade/blob/v0.3.0/filtercascade/fileformats.py>
+pub enum HashAlgorithm {
+ MurmurHash3 = 1,
+ Sha256l32 = 2, // low 32 bits of sha256
+ Sha256 = 3, // all 256 bits of sha256
+}
+
+impl fmt::Display for HashAlgorithm {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}", *self as u8)
+ }
+}
+
+impl TryFrom<u8> for HashAlgorithm {
+ type Error = CascadeError;
+ fn try_from(value: u8) -> Result<HashAlgorithm, CascadeError> {
+ match value {
+ // Naturally, these need to match the enum declaration
+ 1 => Ok(Self::MurmurHash3),
+ 2 => Ok(Self::Sha256l32),
+ 3 => Ok(Self::Sha256),
+ _ => Err(CascadeError::UnknownHashFunction),
+ }
+ }
+}
+
+/// A CascadeIndexGenerator provides one-time access to a table of pseudorandom functions H_ij
+/// in which each function is of the form
+/// H(s: &[u8], r: u32) -> usize
+/// and for which 0 <= H(s,r) < r for all s, r.
+/// The pseudorandom functions share a common key, represented as a octet string, and the table can
+/// be constructed from this key alone. The functions are pseudorandom with respect to s, but not
+/// r. For a uniformly random key/table, fixed r, and arbitrary strings m0 and m1,
+/// H_ij(m0, r) is computationally indistinguishable from H_ij(m1,r)
+/// for all i,j.
+///
+/// A call to next_layer() increments i and resets j.
+/// A call to next_index(s, r) increments j, and outputs some value H_ij(s) with 0 <= H_ij(s) < r.
+
+#[derive(Debug)]
+enum CascadeIndexGenerator {
+ MurmurHash3 {
+ key: Vec<u8>,
+ counter: u32,
+ depth: u8,
+ },
+ Sha256l32 {
+ key: Vec<u8>,
+ counter: u32,
+ depth: u8,
+ },
+ Sha256Ctr {
+ key: Vec<u8>,
+ counter: u32,
+ state: [u8; 32],
+ state_available: u8,
+ },
+}
+
+impl PartialEq for CascadeIndexGenerator {
+ fn eq(&self, other: &Self) -> bool {
+ match (self, other) {
+ (
+ CascadeIndexGenerator::MurmurHash3 { key: ref a, .. },
+ CascadeIndexGenerator::MurmurHash3 { key: ref b, .. },
+ )
+ | (
+ CascadeIndexGenerator::Sha256l32 { key: ref a, .. },
+ CascadeIndexGenerator::Sha256l32 { key: ref b, .. },
+ )
+ | (
+ CascadeIndexGenerator::Sha256Ctr { key: ref a, .. },
+ CascadeIndexGenerator::Sha256Ctr { key: ref b, .. },
+ ) => a == b,
+ _ => false,
+ }
+ }
+}
+
+impl CascadeIndexGenerator {
+ fn new(hash_alg: HashAlgorithm, key: Vec<u8>) -> Self {
+ match hash_alg {
+ HashAlgorithm::MurmurHash3 => Self::MurmurHash3 {
+ key,
+ counter: 0,
+ depth: 1,
+ },
+ HashAlgorithm::Sha256l32 => Self::Sha256l32 {
+ key,
+ counter: 0,
+ depth: 1,
+ },
+ HashAlgorithm::Sha256 => Self::Sha256Ctr {
+ key,
+ counter: 0,
+ state: [0; 32],
+ state_available: 0,
+ },
+ }
+ }
+
+ fn next_layer(&mut self) {
+ match self {
+ Self::MurmurHash3 {
+ ref mut counter,
+ ref mut depth,
+ ..
+ }
+ | Self::Sha256l32 {
+ ref mut counter,
+ ref mut depth,
+ ..
+ } => {
+ *counter = 0;
+ *depth += 1;
+ }
+ Self::Sha256Ctr { .. } => (),
+ }
+ }
+
+ fn next_index(&mut self, salt: &[u8], range: u32) -> usize {
+ let index = match self {
+ Self::MurmurHash3 {
+ key,
+ ref mut counter,
+ depth,
+ } => {
+ let hash_seed = (*counter << 16) + *depth as u32;
+ *counter += 1;
+ murmurhash3_x86_32(key, hash_seed)
+ }
+
+ Self::Sha256l32 {
+ key,
+ ref mut counter,
+ depth,
+ } => {
+ let mut hasher = Sha256::new();
+ hasher.update(salt);
+ hasher.update(counter.to_le_bytes());
+ hasher.update(depth.to_le_bytes());
+ hasher.update(&key);
+ *counter += 1;
+ u32::from_le_bytes(
+ hasher.finalize()[0..4]
+ .try_into()
+ .expect("sha256 should have given enough bytes"),
+ )
+ }
+
+ Self::Sha256Ctr {
+ key,
+ ref mut counter,
+ ref mut state,
+ ref mut state_available,
+ } => {
+ // |bytes_needed| is the minimum number of bytes needed to represent a value in [0, range).
+ let bytes_needed = ((range.next_power_of_two().trailing_zeros() + 7) / 8) as usize;
+ let mut index_arr = [0u8; 4];
+ for byte in index_arr.iter_mut().take(bytes_needed) {
+ if *state_available == 0 {
+ let mut hasher = Sha256::new();
+ hasher.update(counter.to_le_bytes());
+ hasher.update(salt);
+ hasher.update(&key);
+ hasher.finalize_into(state.into());
+ *state_available = state.len() as u8;
+ *counter += 1;
+ }
+ *byte = state[state.len() - *state_available as usize];
+ *state_available -= 1;
+ }
+ LittleEndian::read_u32(&index_arr)
+ }
+ };
+ (index % range) as usize
+ }
+}
+
+impl Bloom {
+ /// `new_crlite_bloom` creates an empty bloom filter for a layer of a cascade with the
+ /// parameters specified in [LCL+17, Section III.C].
+ ///
+ /// # Arguments
+ /// * `include_capacity` - the number of elements that will be encoded at the new layer.
+ /// * `exclude_capacity` - the number of elements in the complement of the encoded set.
+ /// * `top_layer` - whether this is the top layer of the filter.
+ #[cfg(feature = "builder")]
+ pub fn new_crlite_bloom(
+ include_capacity: usize,
+ exclude_capacity: usize,
+ top_layer: bool,
+ ) -> Self {
+ assert!(include_capacity != 0 && exclude_capacity != 0);
+
+ let r = include_capacity as f64;
+ let s = exclude_capacity as f64;
+
+ // The desired false positive rate for the top layer is
+ // p = r/(sqrt(2)*s).
+ // With this setting, the number of false positives (which will need to be
+ // encoded at the second layer) is expected to be a factor of sqrt(2)
+ // smaller than the number of elements encoded at the top layer.
+ //
+ // At layer i > 1 we try to ensure that the number of elements to be
+ // encoded at layer i+1 is half the number of elements encoded at
+ // layer i. So we take p = 1/2.
+ let log2_fp_rate = match top_layer {
+ true => (r / s).log2() - 0.5f64,
+ false => -1f64,
+ };
+
+ // the number of hash functions (k) and the size of the bloom filter (m) are given in
+ // [LCL+17] as k = log2(1/p) and m = r log2(1/p) / ln(2).
+ //
+ // If this formula gives a value of m < 256, we take m=256 instead. This results in very
+ // slightly sub-optimal size, but gives us the added benefit of doing less hashing.
+ let n_hash_funcs = (-log2_fp_rate).round() as u32;
+ let size = match (r * (-log2_fp_rate) / (f64::ln(2f64))).round() as u32 {
+ size if size >= 256 => size,
+ _ => 256,
+ };
+
+ Bloom {
+ n_hash_funcs,
+ size,
+ data: vec![0u8; ((size + 7) / 8) as usize],
+ }
+ }
+
+ /// `read` attempts to decode the Bloom filter represented by the bytes in the given reader.
+ ///
+ /// # Arguments
+ /// * `reader` - The encoded representation of this Bloom filter. May be empty. May include
+ /// additional data describing further Bloom filters.
+ /// The format of an encoded Bloom filter is:
+ /// [1 byte] - the hash algorithm to use in the filter
+ /// [4 little endian bytes] - the length in bits of the filter
+ /// [4 little endian bytes] - the number of hash functions to use in the filter
+ /// [1 byte] - which layer in the cascade this filter is
+ /// [variable length bytes] - the filter itself (must be of minimal length)
+ pub fn read<R: Read>(
+ reader: &mut R,
+ ) -> Result<Option<(Bloom, usize, HashAlgorithm)>, CascadeError> {
+ let hash_algorithm_val = match reader.read_u8() {
+ Ok(val) => val,
+ // If reader is at EOF, there is no bloom filter.
+ Err(e) if e.kind() == ErrorKind::UnexpectedEof => return Ok(None),
+ Err(_) => return Err(CascadeError::Parse("read error")),
+ };
+ let hash_algorithm = HashAlgorithm::try_from(hash_algorithm_val)?;
+
+ let size = reader
+ .read_u32::<byteorder::LittleEndian>()
+ .or(Err(CascadeError::Parse("truncated at layer size")))?;
+ let n_hash_funcs = reader
+ .read_u32::<byteorder::LittleEndian>()
+ .or(Err(CascadeError::Parse("truncated at layer hash count")))?;
+ let layer = reader
+ .read_u8()
+ .or(Err(CascadeError::Parse("truncated at layer number")))?;
+
+ let byte_count = ((size + 7) / 8) as usize;
+ let mut data = vec![0; byte_count];
+ reader
+ .read_exact(&mut data)
+ .or(Err(CascadeError::Parse("truncated at layer data")))?;
+ let bloom = Bloom {
+ n_hash_funcs,
+ size,
+ data,
+ };
+ Ok(Some((bloom, layer as usize, hash_algorithm)))
+ }
+
+ fn has(&self, generator: &mut CascadeIndexGenerator, salt: &[u8]) -> bool {
+ for _ in 0..self.n_hash_funcs {
+ let bit_index = generator.next_index(salt, self.size);
+ assert!(bit_index < self.size as usize);
+ let byte_index = bit_index / 8;
+ let mask = 1 << (bit_index % 8);
+ if self.data[byte_index] & mask == 0 {
+ return false;
+ }
+ }
+ true
+ }
+
+ #[cfg(feature = "builder")]
+ fn insert(&mut self, generator: &mut CascadeIndexGenerator, salt: &[u8]) {
+ for _ in 0..self.n_hash_funcs {
+ let bit_index = generator.next_index(salt, self.size);
+ let byte_index = bit_index / 8;
+ let mask = 1 << (bit_index % 8);
+ self.data[byte_index] |= mask;
+ }
+ }
+
+ pub fn approximate_size_of(&self) -> usize {
+ size_of::<Bloom>() + self.data.len()
+ }
+}
+
+impl fmt::Display for Bloom {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "n_hash_funcs={} size={}", self.n_hash_funcs, self.size)
+ }
+}
+
+/// A multi-layer cascading Bloom filter.
+pub struct Cascade {
+ /// The Bloom filter for this layer in the cascade
+ filters: Vec<Bloom>,
+ /// The salt in use, if any
+ salt: Vec<u8>,
+ /// The hash algorithm / index generating function to use
+ hash_algorithm: HashAlgorithm,
+ /// Whether the logic should be inverted
+ inverted: bool,
+}
+
+impl Cascade {
+ /// from_bytes attempts to decode and return a multi-layer cascading Bloom filter.
+ ///
+ /// # Arguments
+ /// `bytes` - The encoded representation of the Bloom filters in this cascade. Starts with 2
+ /// little endian bytes indicating the version. The current version is 2. The Python
+ /// filter-cascade project defines the formats, see
+ /// <https://github.com/mozilla/filter-cascade/blob/v0.3.0/filtercascade/fileformats.py>
+ ///
+ /// May be of length 0, in which case `None` is returned.
+ pub fn from_bytes(bytes: Vec<u8>) -> Result<Option<Self>, CascadeError> {
+ if bytes.is_empty() {
+ return Ok(None);
+ }
+ let mut reader = bytes.as_slice();
+ let version = reader
+ .read_u16::<byteorder::LittleEndian>()
+ .or(Err(CascadeError::Parse("truncated at version")))?;
+
+ let mut filters = vec![];
+ let mut salt = vec![];
+ let mut top_hash_alg = None;
+ let mut inverted = false;
+
+ if version > 2 {
+ return Err(CascadeError::Parse("unknown version"));
+ }
+
+ if version == 2 {
+ let inverted_val = reader
+ .read_u8()
+ .or(Err(CascadeError::Parse("truncated at inverted")))?;
+ if inverted_val > 1 {
+ return Err(CascadeError::Parse("invalid value for inverted"));
+ }
+ inverted = 0 != inverted_val;
+ let salt_len: usize = reader
+ .read_u8()
+ .or(Err(CascadeError::Parse("truncated at salt length")))?
+ .into();
+ if salt_len >= 256 {
+ return Err(CascadeError::Parse("salt too long"));
+ }
+ if salt_len > 0 {
+ let mut salt_bytes = vec![0; salt_len];
+ reader
+ .read_exact(&mut salt_bytes)
+ .or(Err(CascadeError::Parse("truncated at salt")))?;
+ salt = salt_bytes;
+ }
+ }
+
+ while let Some((filter, layer_number, layer_hash_alg)) = Bloom::read(&mut reader)? {
+ filters.push(filter);
+
+ if layer_number != filters.len() {
+ return Err(CascadeError::Parse("irregular layer numbering"));
+ }
+
+ if *top_hash_alg.get_or_insert(layer_hash_alg) != layer_hash_alg {
+ return Err(CascadeError::Parse("Inconsistent hash algorithms"));
+ }
+ }
+
+ if filters.is_empty() {
+ return Err(CascadeError::Parse("missing filters"));
+ }
+
+ let hash_algorithm = top_hash_alg.ok_or(CascadeError::Parse("missing hash algorithm"))?;
+
+ Ok(Some(Cascade {
+ filters,
+ salt,
+ hash_algorithm,
+ inverted,
+ }))
+ }
+
+ /// to_bytes encodes a cascade in the version 2 format.
+ pub fn to_bytes(&self) -> Result<Vec<u8>, CascadeError> {
+ if self.salt.len() >= 256 {
+ return Err(CascadeError::LongSalt);
+ }
+ if self.filters.len() >= 255 {
+ return Err(CascadeError::TooManyLayers);
+ }
+ let mut out = vec![];
+ let version: u16 = 2;
+ let inverted: u8 = self.inverted.into();
+ let salt_len: u8 = self.salt.len() as u8;
+ let hash_alg: u8 = self.hash_algorithm as u8;
+ out.extend_from_slice(&version.to_le_bytes());
+ out.push(inverted);
+ out.push(salt_len);
+ out.extend_from_slice(&self.salt);
+ for (layer, bloom) in self.filters.iter().enumerate() {
+ out.push(hash_alg);
+ out.extend_from_slice(&bloom.size.to_le_bytes());
+ out.extend_from_slice(&bloom.n_hash_funcs.to_le_bytes());
+ out.push((1 + layer) as u8); // 1-indexed
+ out.extend_from_slice(&bloom.data);
+ }
+ Ok(out)
+ }
+
+ /// has determines if the given sequence of bytes is in the cascade.
+ ///
+ /// # Arguments
+ /// `entry` - The bytes to query
+ pub fn has(&self, entry: Vec<u8>) -> bool {
+ // Query filters 0..self.filters.len() until we get a non-membership result.
+ // If this occurs at an even index filter, the element *is not* included.
+ // ... at an odd-index filter, the element *is* included.
+ let mut generator = CascadeIndexGenerator::new(self.hash_algorithm, entry);
+ let mut rv = false;
+ for filter in &self.filters {
+ if filter.has(&mut generator, &self.salt) {
+ rv = !rv;
+ generator.next_layer();
+ } else {
+ break;
+ }
+ }
+ if self.inverted {
+ rv = !rv;
+ }
+ rv
+ }
+
+ pub fn invert(&mut self) {
+ self.inverted = !self.inverted;
+ }
+
+ /// Determine the approximate amount of memory in bytes used by this
+ /// Cascade. Because this implementation does not integrate with the
+ /// allocator, it can't get an accurate measurement of how much memory it
+ /// uses. However, it can make a reasonable guess, assuming the sizes of
+ /// the bloom filters are large enough to dominate the overall allocated
+ /// size.
+ pub fn approximate_size_of(&self) -> usize {
+ size_of::<Cascade>()
+ + self
+ .filters
+ .iter()
+ .map(|x| x.approximate_size_of())
+ .sum::<usize>()
+ + self.salt.len()
+ }
+}
+
+impl fmt::Display for Cascade {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ writeln!(
+ f,
+ "salt={:?} inverted={} hash_algorithm={}",
+ self.salt, self.inverted, self.hash_algorithm,
+ )?;
+ for filter in &self.filters {
+ writeln!(f, "\t[{}]", filter)?;
+ }
+ Ok(())
+ }
+}
+
+/// A CascadeBuilder creates a Cascade with layers given by `Bloom::new_crlite_bloom`.
+///
+/// A builder is initialized using [`CascadeBuilder::default`] or [`CascadeBuilder::new`]. Prefer `default`. The `new` constructor
+/// allows the user to specify sensitive internal details such as the hash function and the domain
+/// separation parameter.
+///
+/// Both constructors take `include_capacity` and an `exclude_capacity` parameters. The
+/// `include_capacity` is the number of elements that will be encoded in the Cascade. The
+/// `exclude_capacity` is size of the complement of the encoded set.
+///
+/// The encoded set is specified through calls to [`CascadeBuilder::include`]. Its complement is specified through
+/// calls to [`CascadeBuilder::exclude`]. The cascade is built with a call to [`CascadeBuilder::finalize`].
+///
+/// The builder will track of the number of calls to `include` and `exclude`.
+/// The caller is responsible for making *exactly* `include_capacity` calls to `include`
+/// followed by *exactly* `exclude_capacity` calls to `exclude`.
+/// Calling `exclude` before all `include` calls have been made will result in a panic!().
+/// Calling `finalize` before all `exclude` calls have been made will result in a panic!().
+///
+#[cfg(feature = "builder")]
+pub struct CascadeBuilder {
+ filters: Vec<Bloom>,
+ salt: Vec<u8>,
+ hash_algorithm: HashAlgorithm,
+ to_include: Vec<CascadeIndexGenerator>,
+ to_exclude: Vec<CascadeIndexGenerator>,
+ status: BuildStatus,
+}
+
+#[cfg(feature = "builder")]
+impl CascadeBuilder {
+ pub fn default(include_capacity: usize, exclude_capacity: usize) -> Self {
+ let mut salt = vec![0u8; 16];
+ OsRng.fill_bytes(&mut salt);
+ CascadeBuilder::new(
+ HashAlgorithm::Sha256,
+ salt,
+ include_capacity,
+ exclude_capacity,
+ )
+ }
+
+ pub fn new(
+ hash_algorithm: HashAlgorithm,
+ salt: Vec<u8>,
+ include_capacity: usize,
+ exclude_capacity: usize,
+ ) -> Self {
+ CascadeBuilder {
+ filters: vec![Bloom::new_crlite_bloom(
+ include_capacity,
+ exclude_capacity,
+ true,
+ )],
+ salt,
+ to_include: vec![],
+ to_exclude: vec![],
+ hash_algorithm,
+ status: BuildStatus(include_capacity, exclude_capacity),
+ }
+ }
+
+ pub fn include(&mut self, item: Vec<u8>) -> Result<(), CascadeError> {
+ match self.status {
+ BuildStatus(ref mut cap, _) if *cap > 0 => *cap -= 1,
+ _ => return Err(CascadeError::CapacityViolation("include")),
+ }
+ let mut generator = CascadeIndexGenerator::new(self.hash_algorithm, item);
+ self.filters[0].insert(&mut generator, &self.salt);
+ self.to_include.push(generator);
+
+ Ok(())
+ }
+
+ pub fn exclude(&mut self, item: Vec<u8>) -> Result<(), CascadeError> {
+ match self.status {
+ BuildStatus(0, ref mut cap) if *cap > 0 => *cap -= 1,
+ _ => return Err(CascadeError::CapacityViolation("exclude")),
+ }
+ let mut generator = CascadeIndexGenerator::new(self.hash_algorithm, item);
+ if self.filters[0].has(&mut generator, &self.salt) {
+ self.to_exclude.push(generator);
+ }
+ Ok(())
+ }
+
+ /// `exclude_threaded` is like `exclude` but it stores false positives in a caller-owned
+ /// `ExcludeSet`. This allows the caller to exclude items in parallel.
+ pub fn exclude_threaded(&self, exclude_set: &mut ExcludeSet, item: Vec<u8>) {
+ exclude_set.size += 1;
+ let mut generator = CascadeIndexGenerator::new(self.hash_algorithm, item);
+ if self.filters[0].has(&mut generator, &self.salt) {
+ exclude_set.set.push(generator);
+ }
+ }
+
+ /// `collect_exclude_set` merges an `ExcludeSet` into the internal storage of the CascadeBuilder.
+ pub fn collect_exclude_set(
+ &mut self,
+ exclude_set: &mut ExcludeSet,
+ ) -> Result<(), CascadeError> {
+ match self.status {
+ BuildStatus(0, ref mut cap) if *cap >= exclude_set.size => *cap -= exclude_set.size,
+ _ => return Err(CascadeError::CapacityViolation("exclude")),
+ }
+ self.to_exclude.append(&mut exclude_set.set);
+
+ Ok(())
+ }
+
+ fn push_layer(&mut self) -> Result<(), CascadeError> {
+ // At even layers we encode elements of to_include. At odd layers we encode elements of
+ // to_exclude. In both cases, we track false positives by filtering the complement of the
+ // encoded set through the newly produced bloom filter.
+ let at_even_layer = self.filters.len() % 2 == 0;
+ let (to_encode, to_filter) = match at_even_layer {
+ true => (&mut self.to_include, &mut self.to_exclude),
+ false => (&mut self.to_exclude, &mut self.to_include),
+ };
+
+ // split ownership of `salt` away from `to_encode` and `to_filter`
+ // We need an immutable reference to salt during `to_encode.iter_mut()`
+ let mut bloom = Bloom::new_crlite_bloom(to_encode.len(), to_filter.len(), false);
+
+ let salt = self.salt.as_slice();
+
+ to_encode.iter_mut().for_each(|x| {
+ x.next_layer();
+ bloom.insert(x, salt)
+ });
+
+ let mut delta = to_filter.len();
+ to_filter.retain_mut(|x| {
+ x.next_layer();
+ bloom.has(x, salt)
+ });
+ delta -= to_filter.len();
+
+ if delta == 0 {
+ // Check for collisions between the |to_encode| and |to_filter| sets.
+ // The implementation of PartialEq for CascadeIndexGenerator will successfully
+ // identify cases where the user called |include(item)| and |exclude(item)| for the
+ // same item. It will not identify collisions in the underlying hash function.
+ for x in to_encode.iter_mut() {
+ if to_filter.contains(x) {
+ return Err(CascadeError::Collision);
+ }
+ }
+ }
+
+ self.filters.push(bloom);
+ Ok(())
+ }
+
+ pub fn finalize(mut self) -> Result<Box<Cascade>, CascadeError> {
+ match self.status {
+ BuildStatus(0, 0) => (),
+ _ => return Err(CascadeError::CapacityViolation("finalize")),
+ }
+
+ loop {
+ if self.to_exclude.is_empty() {
+ break;
+ }
+ self.push_layer()?;
+
+ if self.to_include.is_empty() {
+ break;
+ }
+ self.push_layer()?;
+ }
+
+ Ok(Box::new(Cascade {
+ filters: self.filters,
+ salt: self.salt,
+ hash_algorithm: self.hash_algorithm,
+ inverted: false,
+ }))
+ }
+}
+
+/// BuildStatus is used to ensure that the `include`, `exclude`, and `finalize` calls to
+/// CascadeBuilder are made in the right order. The (a,b) state indicates that the
+/// CascadeBuilder is waiting for `a` calls to `include` and `b` calls to `exclude`.
+#[cfg(feature = "builder")]
+struct BuildStatus(usize, usize);
+
+/// CascadeBuilder::exclude takes `&mut self` so that it can count exclusions and push items to
+/// self.to_exclude. The bulk of the work it does, however, can be done with an immutable reference
+/// to the top level bloom filter. An `ExcludeSet` is used by `CascadeBuilder::exclude_threaded` to
+/// track the changes to a `CascadeBuilder` that would be made with a call to
+/// `CascadeBuilder::exclude`.
+#[cfg(feature = "builder")]
+#[derive(Default)]
+pub struct ExcludeSet {
+ size: usize,
+ set: Vec<CascadeIndexGenerator>,
+}
+
+#[cfg(test)]
+mod tests {
+ use Bloom;
+ use Cascade;
+ #[cfg(feature = "builder")]
+ use CascadeBuilder;
+ #[cfg(feature = "builder")]
+ use CascadeError;
+ use CascadeIndexGenerator;
+ #[cfg(feature = "builder")]
+ use ExcludeSet;
+ use HashAlgorithm;
+
+ #[test]
+ fn bloom_v1_test_from_bytes() {
+ let src: Vec<u8> = vec![
+ 0x01, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x41, 0x00,
+ ];
+ let mut reader = src.as_slice();
+
+ match Bloom::read(&mut reader) {
+ Ok(Some((bloom, 1, HashAlgorithm::MurmurHash3))) => {
+ assert!(bloom.has(
+ &mut CascadeIndexGenerator::new(HashAlgorithm::MurmurHash3, b"this".to_vec()),
+ &vec![]
+ ));
+ assert!(bloom.has(
+ &mut CascadeIndexGenerator::new(HashAlgorithm::MurmurHash3, b"that".to_vec()),
+ &vec![]
+ ));
+ assert!(!bloom.has(
+ &mut CascadeIndexGenerator::new(HashAlgorithm::MurmurHash3, b"other".to_vec()),
+ &vec![]
+ ));
+ }
+ Ok(_) => panic!("Parsing failed"),
+ Err(_) => panic!("Parsing failed"),
+ };
+ assert!(reader.is_empty());
+
+ let short: Vec<u8> = vec![
+ 0x01, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x41,
+ ];
+ assert!(Bloom::read(&mut short.as_slice()).is_err());
+
+ let empty: Vec<u8> = Vec::new();
+ let mut reader = empty.as_slice();
+ match Bloom::read(&mut reader) {
+ Ok(should_be_none) => assert!(should_be_none.is_none()),
+ Err(_) => panic!("Parsing failed"),
+ };
+ }
+
+ #[test]
+ fn bloom_v3_unsupported() {
+ let src: Vec<u8> = vec![0x03, 0x01, 0x00];
+ assert!(Bloom::read(&mut src.as_slice()).is_err());
+ }
+
+ #[test]
+ fn cascade_v1_murmur_from_file_bytes_test() {
+ let v = include_bytes!("../test_data/test_v1_murmur_mlbf").to_vec();
+ let cascade = Cascade::from_bytes(v)
+ .expect("parsing Cascade should succeed")
+ .expect("Cascade should be Some");
+ // Key format is SHA256(issuer SPKI) + serial number
+ let key_for_revoked_cert_1 = vec![
+ 0x2e, 0xb2, 0xd5, 0xa8, 0x60, 0xfe, 0x50, 0xe9, 0xc2, 0x42, 0x36, 0x85, 0x52, 0x98,
+ 0x01, 0x50, 0xe4, 0x5d, 0xb5, 0x32, 0x1a, 0x5b, 0x00, 0x5e, 0x26, 0xd6, 0x76, 0x25,
+ 0x3a, 0x40, 0x9b, 0xf5, 0x06, 0x2d, 0xf5, 0x68, 0xa0, 0x51, 0x31, 0x08, 0x20, 0xd7,
+ 0xec, 0x43, 0x27, 0xe1, 0xba, 0xfd,
+ ];
+ assert!(cascade.has(key_for_revoked_cert_1));
+ let key_for_revoked_cert_2 = vec![
+ 0xf1, 0x1c, 0x3d, 0xd0, 0x48, 0xf7, 0x4e, 0xdb, 0x7c, 0x45, 0x19, 0x2b, 0x83, 0xe5,
+ 0x98, 0x0d, 0x2f, 0x67, 0xec, 0x84, 0xb4, 0xdd, 0xb9, 0x39, 0x6e, 0x33, 0xff, 0x51,
+ 0x73, 0xed, 0x69, 0x8f, 0x00, 0xd2, 0xe8, 0xf6, 0xaa, 0x80, 0x48, 0x1c, 0xd4,
+ ];
+ assert!(cascade.has(key_for_revoked_cert_2));
+ let key_for_valid_cert = vec![
+ 0x99, 0xfc, 0x9d, 0x40, 0xf1, 0xad, 0xb1, 0x63, 0x65, 0x61, 0xa6, 0x1d, 0x68, 0x3d,
+ 0x9e, 0xa6, 0xb4, 0x60, 0xc5, 0x7d, 0x0c, 0x75, 0xea, 0x00, 0xc3, 0x41, 0xb9, 0xdf,
+ 0xb9, 0x0b, 0x5f, 0x39, 0x0b, 0x77, 0x75, 0xf7, 0xaf, 0x9a, 0xe5, 0x42, 0x65, 0xc9,
+ 0xcd, 0x32, 0x57, 0x10, 0x77, 0x8e,
+ ];
+ assert!(!cascade.has(key_for_valid_cert));
+
+ assert_eq!(cascade.approximate_size_of(), 15408);
+
+ let v = include_bytes!("../test_data/test_v1_murmur_short_mlbf").to_vec();
+ assert!(Cascade::from_bytes(v).is_err());
+ }
+
+ #[test]
+ fn cascade_v2_sha256l32_from_file_bytes_test() {
+ let v = include_bytes!("../test_data/test_v2_sha256l32_mlbf").to_vec();
+ let cascade = Cascade::from_bytes(v)
+ .expect("parsing Cascade should succeed")
+ .expect("Cascade should be Some");
+
+ assert!(cascade.salt.len() == 0);
+ assert!(cascade.inverted == false);
+ assert!(cascade.has(b"this".to_vec()) == true);
+ assert!(cascade.has(b"that".to_vec()) == true);
+ assert!(cascade.has(b"other".to_vec()) == false);
+ assert_eq!(cascade.approximate_size_of(), 1001);
+ }
+
+ #[test]
+ fn cascade_v2_sha256l32_with_salt_from_file_bytes_test() {
+ let v = include_bytes!("../test_data/test_v2_sha256l32_salt_mlbf").to_vec();
+ let cascade = Cascade::from_bytes(v)
+ .expect("parsing Cascade should succeed")
+ .expect("Cascade should be Some");
+
+ assert!(cascade.salt == b"nacl".to_vec());
+ assert!(cascade.inverted == false);
+ assert!(cascade.has(b"this".to_vec()) == true);
+ assert!(cascade.has(b"that".to_vec()) == true);
+ assert!(cascade.has(b"other".to_vec()) == false);
+ assert_eq!(cascade.approximate_size_of(), 1001);
+ }
+
+ #[test]
+ fn cascade_v2_murmur_from_file_bytes_test() {
+ let v = include_bytes!("../test_data/test_v2_murmur_mlbf").to_vec();
+ let cascade = Cascade::from_bytes(v)
+ .expect("parsing Cascade should succeed")
+ .expect("Cascade should be Some");
+
+ assert!(cascade.salt.len() == 0);
+ assert!(cascade.inverted == false);
+ assert!(cascade.has(b"this".to_vec()) == true);
+ assert!(cascade.has(b"that".to_vec()) == true);
+ assert!(cascade.has(b"other".to_vec()) == false);
+ assert_eq!(cascade.approximate_size_of(), 992);
+ }
+
+ #[test]
+ fn cascade_v2_murmur_inverted_from_file_bytes_test() {
+ let v = include_bytes!("../test_data/test_v2_murmur_inverted_mlbf").to_vec();
+ let cascade = Cascade::from_bytes(v)
+ .expect("parsing Cascade should succeed")
+ .expect("Cascade should be Some");
+
+ assert!(cascade.salt.len() == 0);
+ assert!(cascade.inverted == true);
+ assert!(cascade.has(b"this".to_vec()) == true);
+ assert!(cascade.has(b"that".to_vec()) == true);
+ assert!(cascade.has(b"other".to_vec()) == false);
+ assert_eq!(cascade.approximate_size_of(), 1058);
+ }
+
+ #[test]
+ fn cascade_v2_sha256l32_inverted_from_file_bytes_test() {
+ let v = include_bytes!("../test_data/test_v2_sha256l32_inverted_mlbf").to_vec();
+ let cascade = Cascade::from_bytes(v)
+ .expect("parsing Cascade should succeed")
+ .expect("Cascade should be Some");
+
+ assert!(cascade.salt.len() == 0);
+ assert!(cascade.inverted == true);
+ assert!(cascade.has(b"this".to_vec()) == true);
+ assert!(cascade.has(b"that".to_vec()) == true);
+ assert!(cascade.has(b"other".to_vec()) == false);
+ assert_eq!(cascade.approximate_size_of(), 1061);
+ }
+
+ #[test]
+ fn cascade_v2_sha256ctr_from_file_bytes_test() {
+ let v = include_bytes!("../test_data/test_v2_sha256ctr_salt_mlbf").to_vec();
+ let cascade = Cascade::from_bytes(v)
+ .expect("parsing Cascade should succeed")
+ .expect("Cascade should be Some");
+
+ assert!(cascade.salt == b"nacl".to_vec());
+ assert!(cascade.inverted == false);
+ assert!(cascade.has(b"this".to_vec()) == true);
+ assert!(cascade.has(b"that".to_vec()) == true);
+ assert!(cascade.has(b"other".to_vec()) == false);
+ assert_eq!(cascade.approximate_size_of(), 1070);
+ }
+
+ #[test]
+ fn cascade_empty() {
+ let cascade = Cascade::from_bytes(Vec::new()).expect("parsing Cascade should succeed");
+ assert!(cascade.is_none());
+ }
+
+ #[test]
+ fn cascade_test_from_bytes() {
+ let unknown_version: Vec<u8> = vec![0xff, 0xff, 0x00, 0x00];
+ match Cascade::from_bytes(unknown_version) {
+ Ok(_) => panic!("Cascade::from_bytes allows unknown version."),
+ Err(_) => (),
+ }
+
+ let first_layer_is_zero: Vec<u8> = vec![
+ 0x01, 0x00, 0x01, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+ ];
+ match Cascade::from_bytes(first_layer_is_zero) {
+ Ok(_) => panic!("Cascade::from_bytes allows zero indexed layers."),
+ Err(_) => (),
+ }
+
+ let second_layer_is_three: Vec<u8> = vec![
+ 0x01, 0x00, 0x01, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01,
+ 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00,
+ ];
+ match Cascade::from_bytes(second_layer_is_three) {
+ Ok(_) => panic!("Cascade::from_bytes allows non-sequential layers."),
+ Err(_) => (),
+ }
+ }
+
+ #[test]
+ #[cfg(feature = "builder")]
+ fn cascade_builder_test_collision() {
+ let mut builder = CascadeBuilder::default(1, 1);
+ builder.include(b"collision!".to_vec()).ok();
+ builder.exclude(b"collision!".to_vec()).ok();
+ assert!(matches!(builder.finalize(), Err(CascadeError::Collision)));
+ }
+
+ #[test]
+ #[cfg(feature = "builder")]
+ fn cascade_builder_test_exclude_too_few() {
+ let mut builder = CascadeBuilder::default(1, 1);
+ builder.include(b"1".to_vec()).ok();
+ assert!(matches!(
+ builder.finalize(),
+ Err(CascadeError::CapacityViolation(_))
+ ));
+ }
+
+ #[test]
+ #[cfg(feature = "builder")]
+ fn cascade_builder_test_include_too_few() {
+ let mut builder = CascadeBuilder::default(1, 1);
+ assert!(matches!(
+ builder.exclude(b"1".to_vec()),
+ Err(CascadeError::CapacityViolation(_))
+ ));
+ }
+
+ #[test]
+ #[cfg(feature = "builder")]
+ fn cascade_builder_test_include_too_many() {
+ let mut builder = CascadeBuilder::default(1, 1);
+ builder.include(b"1".to_vec()).ok();
+ assert!(matches!(
+ builder.include(b"2".to_vec()),
+ Err(CascadeError::CapacityViolation(_))
+ ));
+ }
+
+ #[test]
+ #[cfg(feature = "builder")]
+ fn cascade_builder_test_exclude_too_many() {
+ let mut builder = CascadeBuilder::default(1, 1);
+ builder.include(b"1".to_vec()).ok();
+ builder.exclude(b"2".to_vec()).ok();
+ assert!(matches!(
+ builder.exclude(b"3".to_vec()),
+ Err(CascadeError::CapacityViolation(_))
+ ));
+ }
+
+ #[test]
+ #[cfg(feature = "builder")]
+ fn cascade_builder_test_exclude_threaded_no_collect() {
+ let mut builder = CascadeBuilder::default(1, 3);
+ let mut exclude_set = ExcludeSet::default();
+ builder.include(b"1".to_vec()).ok();
+ builder.exclude_threaded(&mut exclude_set, b"2".to_vec());
+ builder.exclude_threaded(&mut exclude_set, b"3".to_vec());
+ builder.exclude_threaded(&mut exclude_set, b"4".to_vec());
+ assert!(matches!(
+ builder.finalize(),
+ Err(CascadeError::CapacityViolation(_))
+ ));
+ }
+
+ #[test]
+ #[cfg(feature = "builder")]
+ fn cascade_builder_test_exclude_threaded_too_many() {
+ let mut builder = CascadeBuilder::default(1, 3);
+ let mut exclude_set = ExcludeSet::default();
+ builder.include(b"1".to_vec()).ok();
+ builder.exclude_threaded(&mut exclude_set, b"2".to_vec());
+ builder.exclude_threaded(&mut exclude_set, b"3".to_vec());
+ builder.exclude_threaded(&mut exclude_set, b"4".to_vec());
+ builder.exclude_threaded(&mut exclude_set, b"5".to_vec());
+ assert!(matches!(
+ builder.collect_exclude_set(&mut exclude_set),
+ Err(CascadeError::CapacityViolation(_))
+ ));
+ }
+
+ #[test]
+ #[cfg(feature = "builder")]
+ fn cascade_builder_test_exclude_threaded() {
+ let mut builder = CascadeBuilder::default(1, 3);
+ let mut exclude_set = ExcludeSet::default();
+ builder.include(b"1".to_vec()).ok();
+ builder.exclude_threaded(&mut exclude_set, b"2".to_vec());
+ builder.exclude_threaded(&mut exclude_set, b"3".to_vec());
+ builder.exclude_threaded(&mut exclude_set, b"4".to_vec());
+ builder.collect_exclude_set(&mut exclude_set).ok();
+ builder.finalize().ok();
+ }
+
+ #[cfg(feature = "builder")]
+ fn cascade_builder_test_generate(hash_alg: HashAlgorithm, inverted: bool) {
+ let total = 10_000_usize;
+ let included = 100_usize;
+
+ let salt = vec![0u8; 16];
+ let mut builder =
+ CascadeBuilder::new(hash_alg, salt, included, (total - included) as usize);
+ for i in 0..included {
+ builder.include(i.to_le_bytes().to_vec()).ok();
+ }
+ for i in included..total {
+ builder.exclude(i.to_le_bytes().to_vec()).ok();
+ }
+ let mut cascade = builder.finalize().unwrap();
+
+ if inverted {
+ cascade.invert()
+ }
+
+ // Ensure we can serialize / deserialize
+ let cascade_bytes = cascade.to_bytes().expect("failed to serialize cascade");
+
+ let cascade = Cascade::from_bytes(cascade_bytes)
+ .expect("failed to deserialize cascade")
+ .expect("cascade should not be None here");
+
+ // Ensure each query gives the correct result
+ for i in 0..included {
+ assert!(cascade.has(i.to_le_bytes().to_vec()) == true ^ inverted)
+ }
+ for i in included..total {
+ assert!(cascade.has(i.to_le_bytes().to_vec()) == false ^ inverted)
+ }
+ }
+
+ #[test]
+ #[cfg(feature = "builder")]
+ fn cascade_builder_test_generate_murmurhash3_inverted() {
+ cascade_builder_test_generate(HashAlgorithm::MurmurHash3, true);
+ }
+
+ #[test]
+ #[cfg(feature = "builder")]
+ fn cascade_builder_test_generate_murmurhash3() {
+ cascade_builder_test_generate(HashAlgorithm::MurmurHash3, false);
+ }
+
+ #[test]
+ #[cfg(feature = "builder")]
+ fn cascade_builder_test_generate_sha256l32() {
+ cascade_builder_test_generate(HashAlgorithm::Sha256l32, false);
+ }
+
+ #[test]
+ #[cfg(feature = "builder")]
+ fn cascade_builder_test_generate_sha256() {
+ cascade_builder_test_generate(HashAlgorithm::Sha256, false);
+ }
+}
diff --git a/third_party/rust/rust_cascade/test_data/make-sample-data.py b/third_party/rust/rust_cascade/test_data/make-sample-data.py
new file mode 100644
index 0000000000..c9f117da5f
--- /dev/null
+++ b/third_party/rust/rust_cascade/test_data/make-sample-data.py
@@ -0,0 +1,106 @@
+import filtercascade
+import hashlib
+from pathlib import Path
+
+import sys
+import logging
+
+logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
+
+
+def predictable_serial_gen(start, end):
+ counter = start
+ while counter < end:
+ counter += 1
+ m = hashlib.sha256()
+ m.update(counter.to_bytes(4, byteorder="big"))
+ yield m.hexdigest()
+
+
+def store(fc, path):
+ if path.exists():
+ path.unlink()
+ with open(path, "wb") as f:
+ fc.tofile(f)
+
+
+small_set = list(set(predictable_serial_gen(0, 500)))
+large_set = set(predictable_serial_gen(500, 10_000))
+
+# filter parameters
+growth_factor = 1.0
+min_filter_length = 177 # 177 * 1.44 ~ 256, so smallest filter will have 256 bits
+
+print("--- v2_sha256l32_with_salt ---")
+v2_sha256l32_with_salt = filtercascade.FilterCascade(
+ [],
+ defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256,
+ salt=b"nacl",
+ growth_factor=growth_factor,
+ min_filter_length=min_filter_length,
+)
+v2_sha256l32_with_salt.initialize(
+ include=[b"this", b"that"] + small_set, exclude=large_set | set([b"other"])
+)
+store(v2_sha256l32_with_salt, Path("test_v2_sha256l32_salt_mlbf"))
+
+print("--- v2_sha256l32 ---")
+v2_sha256l32 = filtercascade.FilterCascade(
+ [],
+ defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256,
+ growth_factor=growth_factor,
+ min_filter_length=min_filter_length,
+)
+v2_sha256l32.initialize(
+ include=[b"this", b"that"] + small_set, exclude=large_set | set([b"other"])
+)
+store(v2_sha256l32, Path("test_v2_sha256l32_mlbf"))
+
+print("--- v2_murmur ---")
+v2_murmur = filtercascade.FilterCascade(
+ [],
+ defaultHashAlg=filtercascade.fileformats.HashAlgorithm.MURMUR3,
+ growth_factor=growth_factor,
+ min_filter_length=min_filter_length,
+)
+v2_murmur.initialize(
+ include=[b"this", b"that"] + small_set, exclude=large_set | set([b"other"])
+)
+store(v2_murmur, Path("test_v2_murmur_mlbf"))
+
+print("--- v2_murmur_inverted ---")
+v2_murmur_inverted = filtercascade.FilterCascade(
+ [],
+ defaultHashAlg=filtercascade.fileformats.HashAlgorithm.MURMUR3,
+ growth_factor=growth_factor,
+ min_filter_length=min_filter_length,
+)
+v2_murmur_inverted.initialize(
+ include=large_set | set([b"this", b"that"]), exclude=[b"other"] + small_set
+)
+store(v2_murmur_inverted, Path("test_v2_murmur_inverted_mlbf"))
+
+print("--- v2_sha256l32_inverted ---")
+v2_sha256l32_inverted = filtercascade.FilterCascade(
+ [],
+ defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256,
+ growth_factor=growth_factor,
+ min_filter_length=min_filter_length,
+)
+v2_sha256l32_inverted.initialize(
+ include=large_set | set([b"this", b"that"]), exclude=[b"other"] + small_set
+)
+store(v2_sha256l32_inverted, Path("test_v2_sha256l32_inverted_mlbf"))
+
+print("--- v2_sha256ctr_with_salt ---")
+v2_sha256ctr_with_salt = filtercascade.FilterCascade(
+ [],
+ defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256CTR,
+ salt=b"nacl",
+ growth_factor=growth_factor,
+ min_filter_length=min_filter_length,
+)
+v2_sha256ctr_with_salt.initialize(
+ include=[b"this", b"that"] + small_set, exclude=large_set | set([b"other"])
+)
+store(v2_sha256ctr_with_salt, Path("test_v2_sha256ctr_salt_mlbf"))
diff --git a/third_party/rust/rust_cascade/test_data/requirements.txt b/third_party/rust/rust_cascade/test_data/requirements.txt
new file mode 100644
index 0000000000..f97bd4328f
--- /dev/null
+++ b/third_party/rust/rust_cascade/test_data/requirements.txt
@@ -0,0 +1 @@
+filtercascade >= 0.3.0
diff --git a/third_party/rust/rust_cascade/test_data/test_v1_murmur_mlbf b/third_party/rust/rust_cascade/test_data/test_v1_murmur_mlbf
new file mode 100644
index 0000000000..34ced4b840
--- /dev/null
+++ b/third_party/rust/rust_cascade/test_data/test_v1_murmur_mlbf
Binary files differ
diff --git a/third_party/rust/rust_cascade/test_data/test_v1_murmur_short_mlbf b/third_party/rust/rust_cascade/test_data/test_v1_murmur_short_mlbf
new file mode 100644
index 0000000000..d0bb7071ab
--- /dev/null
+++ b/third_party/rust/rust_cascade/test_data/test_v1_murmur_short_mlbf
Binary files differ
diff --git a/third_party/rust/rust_cascade/test_data/test_v2_murmur_inverted_mlbf b/third_party/rust/rust_cascade/test_data/test_v2_murmur_inverted_mlbf
new file mode 100644
index 0000000000..19acee150e
--- /dev/null
+++ b/third_party/rust/rust_cascade/test_data/test_v2_murmur_inverted_mlbf
Binary files differ
diff --git a/third_party/rust/rust_cascade/test_data/test_v2_murmur_mlbf b/third_party/rust/rust_cascade/test_data/test_v2_murmur_mlbf
new file mode 100644
index 0000000000..78f527ebf3
--- /dev/null
+++ b/third_party/rust/rust_cascade/test_data/test_v2_murmur_mlbf
Binary files differ
diff --git a/third_party/rust/rust_cascade/test_data/test_v2_sha256ctr_salt_mlbf b/third_party/rust/rust_cascade/test_data/test_v2_sha256ctr_salt_mlbf
new file mode 100644
index 0000000000..fca48498d6
--- /dev/null
+++ b/third_party/rust/rust_cascade/test_data/test_v2_sha256ctr_salt_mlbf
Binary files differ
diff --git a/third_party/rust/rust_cascade/test_data/test_v2_sha256l32_inverted_mlbf b/third_party/rust/rust_cascade/test_data/test_v2_sha256l32_inverted_mlbf
new file mode 100644
index 0000000000..f9e892ac70
--- /dev/null
+++ b/third_party/rust/rust_cascade/test_data/test_v2_sha256l32_inverted_mlbf
Binary files differ
diff --git a/third_party/rust/rust_cascade/test_data/test_v2_sha256l32_mlbf b/third_party/rust/rust_cascade/test_data/test_v2_sha256l32_mlbf
new file mode 100644
index 0000000000..c0d7266fc4
--- /dev/null
+++ b/third_party/rust/rust_cascade/test_data/test_v2_sha256l32_mlbf
Binary files differ
diff --git a/third_party/rust/rust_cascade/test_data/test_v2_sha256l32_salt_mlbf b/third_party/rust/rust_cascade/test_data/test_v2_sha256l32_salt_mlbf
new file mode 100644
index 0000000000..4f9c2fff16
--- /dev/null
+++ b/third_party/rust/rust_cascade/test_data/test_v2_sha256l32_salt_mlbf
Binary files differ