summaryrefslogtreecommitdiffstats
path: root/third_party/rust/rust_cascade
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
commit2aa4a82499d4becd2284cdb482213d541b8804dd (patch)
treeb80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/rust_cascade
parentInitial commit. (diff)
downloadfirefox-2aa4a82499d4becd2284cdb482213d541b8804dd.tar.xz
firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.zip
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/rust_cascade')
-rw-r--r--third_party/rust/rust_cascade/.cargo-checksum.json1
-rw-r--r--third_party/rust/rust_cascade/Cargo.toml32
-rw-r--r--third_party/rust/rust_cascade/README.md12
-rw-r--r--third_party/rust/rust_cascade/license.txt373
-rw-r--r--third_party/rust/rust_cascade/src/lib.rs477
-rw-r--r--third_party/rust/rust_cascade/test_data/make-sample-data.py59
-rw-r--r--third_party/rust/rust_cascade/test_data/requirements.txt1
-rw-r--r--third_party/rust/rust_cascade/test_data/test_v1_murmur_mlbfbin0 -> 15244 bytes
-rw-r--r--third_party/rust/rust_cascade/test_data/test_v1_murmur_short_mlbfbin0 -> 1024 bytes
-rw-r--r--third_party/rust/rust_cascade/test_data/test_v2_murmur_inverted_mlbfbin0 -> 10173 bytes
-rw-r--r--third_party/rust/rust_cascade/test_data/test_v2_murmur_mlbfbin0 -> 10173 bytes
-rw-r--r--third_party/rust/rust_cascade/test_data/test_v2_sha256_inverted_mlbfbin0 -> 10173 bytes
-rw-r--r--third_party/rust/rust_cascade/test_data/test_v2_sha256_mlbfbin0 -> 10173 bytes
-rw-r--r--third_party/rust/rust_cascade/test_data/test_v2_sha256_salt_mlbfbin0 -> 10177 bytes
14 files changed, 955 insertions, 0 deletions
diff --git a/third_party/rust/rust_cascade/.cargo-checksum.json b/third_party/rust/rust_cascade/.cargo-checksum.json
new file mode 100644
index 0000000000..3224196f6f
--- /dev/null
+++ b/third_party/rust/rust_cascade/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"Cargo.toml":"411cb740d6be8346206164df646ac9df304e9a84bb9f10eb4b07d2ef2f6566ec","README.md":"a4396d1adf63a77ae9aa0d1d850d02d09eec4a92810a52d675163688f312b3e8","license.txt":"1f256ecad192880510e84ad60474eab7589218784b9a50bc7ceee34c2b91f1d5","src/lib.rs":"2c6d1e01ae3a39baad99cd4567b0164dec4dcf77688bc2c3b43798215c857943","test_data/make-sample-data.py":"68bcb106c3ac1929da52e1abb71cd2a6d59eb79549f6e40042368161baa920e0","test_data/requirements.txt":"cb9372b33ed2774e0d5040459fd63a2f9abae2be599869be43a2a077b2c08aa3","test_data/test_v1_murmur_mlbf":"243df0b7f2f55bfe3cefbba2d4be5eb7957c0a063559c9f284ca4c1ee4211eb5","test_data/test_v1_murmur_short_mlbf":"3d4f03dc0a65cf5800efed6ac0b3c73e5b61e5d62bc82ac42744abc67f4c30fa","test_data/test_v2_murmur_inverted_mlbf":"efdd0ab309883f6a3148ec2ddaf0dcb768790e6f130e4e0556994202b1fd7cc4","test_data/test_v2_murmur_mlbf":"80e8e148fbf95aed39783f1fcc2d4576074f8c487656ca2d53571da4b17e20a9","test_data/test_v2_sha256_inverted_mlbf":"e5148cabb45c4899f8220ca51f96a6c76c688e39dfd340ae56bf9dc5226eada2","test_data/test_v2_sha256_mlbf":"08986847b8b2f3bdf4d2df51e465938f88f7a7c401b1740094fc40b033e80b51","test_data/test_v2_sha256_salt_mlbf":"d7b9bf88872162a1917eb14d0340a88b61b574fb1a7120fa54d061e43a9f5460"},"package":"9a5b9bba8f5b985e4923dadd273a987f83669083f3355d65c699e02b9d3d854d"} \ No newline at end of file
diff --git a/third_party/rust/rust_cascade/Cargo.toml b/third_party/rust/rust_cascade/Cargo.toml
new file mode 100644
index 0000000000..d1621fe71c
--- /dev/null
+++ b/third_party/rust/rust_cascade/Cargo.toml
@@ -0,0 +1,32 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies
+#
+# If you believe there's an error in this file please file an
+# issue against the rust-lang/cargo repository. If you're
+# editing this file be aware that the upstream Cargo.toml
+# will likely look very different (and much more reasonable)
+
+[package]
+name = "rust_cascade"
+version = "0.6.0"
+authors = ["Mark Goodwin <mgoodwin@mozilla.com>", "Dana Keeler <dkeeler@mozilla.com>", "J.C. Jones <jc@mozilla.com>"]
+description = "A simple mmh3 based bloom filter cascade implementation in Rust."
+homepage = "https://github.com/mozilla/rust-cascade"
+documentation = "https://docs.rs/rust_cascade/"
+license = "MPL-2.0"
+repository = "https://github.com/mozilla/rust-cascade"
+[dependencies.byteorder]
+version = "1.3.1"
+
+[dependencies.digest]
+version = "0.8.0"
+
+[dependencies.murmurhash3]
+version = "0.0.5"
+
+[dependencies.sha2]
+version = "^0.8"
diff --git a/third_party/rust/rust_cascade/README.md b/third_party/rust/rust_cascade/README.md
new file mode 100644
index 0000000000..206bff9267
--- /dev/null
+++ b/third_party/rust/rust_cascade/README.md
@@ -0,0 +1,12 @@
+# rust-cascade
+A Bloom filter cascade implementation in rust. This can utilize one of two hash
+functions:
+
+* MurmurHash32, or
+* SHA256, with an optional salt
+
+This implementation is designed to match up with the Python [filter-cascade
+project](https://pypi.org/project/filtercascade/)
+[[github](https://github.com/mozilla/filter-cascade)]
+
+See tests in src/lib.rs to get an idea of usage.
diff --git a/third_party/rust/rust_cascade/license.txt b/third_party/rust/rust_cascade/license.txt
new file mode 100644
index 0000000000..a612ad9813
--- /dev/null
+++ b/third_party/rust/rust_cascade/license.txt
@@ -0,0 +1,373 @@
+Mozilla Public License Version 2.0
+==================================
+
+1. Definitions
+--------------
+
+1.1. "Contributor"
+ means each individual or legal entity that creates, contributes to
+ the creation of, or owns Covered Software.
+
+1.2. "Contributor Version"
+ means the combination of the Contributions of others (if any) used
+ by a Contributor and that particular Contributor's Contribution.
+
+1.3. "Contribution"
+ means Covered Software of a particular Contributor.
+
+1.4. "Covered Software"
+ means Source Code Form to which the initial Contributor has attached
+ the notice in Exhibit A, the Executable Form of such Source Code
+ Form, and Modifications of such Source Code Form, in each case
+ including portions thereof.
+
+1.5. "Incompatible With Secondary Licenses"
+ means
+
+ (a) that the initial Contributor has attached the notice described
+ in Exhibit B to the Covered Software; or
+
+ (b) that the Covered Software was made available under the terms of
+ version 1.1 or earlier of the License, but not also under the
+ terms of a Secondary License.
+
+1.6. "Executable Form"
+ means any form of the work other than Source Code Form.
+
+1.7. "Larger Work"
+ means a work that combines Covered Software with other material, in
+ a separate file or files, that is not Covered Software.
+
+1.8. "License"
+ means this document.
+
+1.9. "Licensable"
+ means having the right to grant, to the maximum extent possible,
+ whether at the time of the initial grant or subsequently, any and
+ all of the rights conveyed by this License.
+
+1.10. "Modifications"
+ means any of the following:
+
+ (a) any file in Source Code Form that results from an addition to,
+ deletion from, or modification of the contents of Covered
+ Software; or
+
+ (b) any new file in Source Code Form that contains any Covered
+ Software.
+
+1.11. "Patent Claims" of a Contributor
+ means any patent claim(s), including without limitation, method,
+ process, and apparatus claims, in any patent Licensable by such
+ Contributor that would be infringed, but for the grant of the
+ License, by the making, using, selling, offering for sale, having
+ made, import, or transfer of either its Contributions or its
+ Contributor Version.
+
+1.12. "Secondary License"
+ means either the GNU General Public License, Version 2.0, the GNU
+ Lesser General Public License, Version 2.1, the GNU Affero General
+ Public License, Version 3.0, or any later versions of those
+ licenses.
+
+1.13. "Source Code Form"
+ means the form of the work preferred for making modifications.
+
+1.14. "You" (or "Your")
+ means an individual or a legal entity exercising rights under this
+ License. For legal entities, "You" includes any entity that
+ controls, is controlled by, or is under common control with You. For
+ purposes of this definition, "control" means (a) the power, direct
+ or indirect, to cause the direction or management of such entity,
+ whether by contract or otherwise, or (b) ownership of more than
+ fifty percent (50%) of the outstanding shares or beneficial
+ ownership of such entity.
+
+2. License Grants and Conditions
+--------------------------------
+
+2.1. Grants
+
+Each Contributor hereby grants You a world-wide, royalty-free,
+non-exclusive license:
+
+(a) under intellectual property rights (other than patent or trademark)
+ Licensable by such Contributor to use, reproduce, make available,
+ modify, display, perform, distribute, and otherwise exploit its
+ Contributions, either on an unmodified basis, with Modifications, or
+ as part of a Larger Work; and
+
+(b) under Patent Claims of such Contributor to make, use, sell, offer
+ for sale, have made, import, and otherwise transfer either its
+ Contributions or its Contributor Version.
+
+2.2. Effective Date
+
+The licenses granted in Section 2.1 with respect to any Contribution
+become effective for each Contribution on the date the Contributor first
+distributes such Contribution.
+
+2.3. Limitations on Grant Scope
+
+The licenses granted in this Section 2 are the only rights granted under
+this License. No additional rights or licenses will be implied from the
+distribution or licensing of Covered Software under this License.
+Notwithstanding Section 2.1(b) above, no patent license is granted by a
+Contributor:
+
+(a) for any code that a Contributor has removed from Covered Software;
+ or
+
+(b) for infringements caused by: (i) Your and any other third party's
+ modifications of Covered Software, or (ii) the combination of its
+ Contributions with other software (except as part of its Contributor
+ Version); or
+
+(c) under Patent Claims infringed by Covered Software in the absence of
+ its Contributions.
+
+This License does not grant any rights in the trademarks, service marks,
+or logos of any Contributor (except as may be necessary to comply with
+the notice requirements in Section 3.4).
+
+2.4. Subsequent Licenses
+
+No Contributor makes additional grants as a result of Your choice to
+distribute the Covered Software under a subsequent version of this
+License (see Section 10.2) or under the terms of a Secondary License (if
+permitted under the terms of Section 3.3).
+
+2.5. Representation
+
+Each Contributor represents that the Contributor believes its
+Contributions are its original creation(s) or it has sufficient rights
+to grant the rights to its Contributions conveyed by this License.
+
+2.6. Fair Use
+
+This License is not intended to limit any rights You have under
+applicable copyright doctrines of fair use, fair dealing, or other
+equivalents.
+
+2.7. Conditions
+
+Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
+in Section 2.1.
+
+3. Responsibilities
+-------------------
+
+3.1. Distribution of Source Form
+
+All distribution of Covered Software in Source Code Form, including any
+Modifications that You create or to which You contribute, must be under
+the terms of this License. You must inform recipients that the Source
+Code Form of the Covered Software is governed by the terms of this
+License, and how they can obtain a copy of this License. You may not
+attempt to alter or restrict the recipients' rights in the Source Code
+Form.
+
+3.2. Distribution of Executable Form
+
+If You distribute Covered Software in Executable Form then:
+
+(a) such Covered Software must also be made available in Source Code
+ Form, as described in Section 3.1, and You must inform recipients of
+ the Executable Form how they can obtain a copy of such Source Code
+ Form by reasonable means in a timely manner, at a charge no more
+ than the cost of distribution to the recipient; and
+
+(b) You may distribute such Executable Form under the terms of this
+ License, or sublicense it under different terms, provided that the
+ license for the Executable Form does not attempt to limit or alter
+ the recipients' rights in the Source Code Form under this License.
+
+3.3. Distribution of a Larger Work
+
+You may create and distribute a Larger Work under terms of Your choice,
+provided that You also comply with the requirements of this License for
+the Covered Software. If the Larger Work is a combination of Covered
+Software with a work governed by one or more Secondary Licenses, and the
+Covered Software is not Incompatible With Secondary Licenses, this
+License permits You to additionally distribute such Covered Software
+under the terms of such Secondary License(s), so that the recipient of
+the Larger Work may, at their option, further distribute the Covered
+Software under the terms of either this License or such Secondary
+License(s).
+
+3.4. Notices
+
+You may not remove or alter the substance of any license notices
+(including copyright notices, patent notices, disclaimers of warranty,
+or limitations of liability) contained within the Source Code Form of
+the Covered Software, except that You may alter any license notices to
+the extent required to remedy known factual inaccuracies.
+
+3.5. Application of Additional Terms
+
+You may choose to offer, and to charge a fee for, warranty, support,
+indemnity or liability obligations to one or more recipients of Covered
+Software. However, You may do so only on Your own behalf, and not on
+behalf of any Contributor. You must make it absolutely clear that any
+such warranty, support, indemnity, or liability obligation is offered by
+You alone, and You hereby agree to indemnify every Contributor for any
+liability incurred by such Contributor as a result of warranty, support,
+indemnity or liability terms You offer. You may include additional
+disclaimers of warranty and limitations of liability specific to any
+jurisdiction.
+
+4. Inability to Comply Due to Statute or Regulation
+---------------------------------------------------
+
+If it is impossible for You to comply with any of the terms of this
+License with respect to some or all of the Covered Software due to
+statute, judicial order, or regulation then You must: (a) comply with
+the terms of this License to the maximum extent possible; and (b)
+describe the limitations and the code they affect. Such description must
+be placed in a text file included with all distributions of the Covered
+Software under this License. Except to the extent prohibited by statute
+or regulation, such description must be sufficiently detailed for a
+recipient of ordinary skill to be able to understand it.
+
+5. Termination
+--------------
+
+5.1. The rights granted under this License will terminate automatically
+if You fail to comply with any of its terms. However, if You become
+compliant, then the rights granted under this License from a particular
+Contributor are reinstated (a) provisionally, unless and until such
+Contributor explicitly and finally terminates Your grants, and (b) on an
+ongoing basis, if such Contributor fails to notify You of the
+non-compliance by some reasonable means prior to 60 days after You have
+come back into compliance. Moreover, Your grants from a particular
+Contributor are reinstated on an ongoing basis if such Contributor
+notifies You of the non-compliance by some reasonable means, this is the
+first time You have received notice of non-compliance with this License
+from such Contributor, and You become compliant prior to 30 days after
+Your receipt of the notice.
+
+5.2. If You initiate litigation against any entity by asserting a patent
+infringement claim (excluding declaratory judgment actions,
+counter-claims, and cross-claims) alleging that a Contributor Version
+directly or indirectly infringes any patent, then the rights granted to
+You by any and all Contributors for the Covered Software under Section
+2.1 of this License shall terminate.
+
+5.3. In the event of termination under Sections 5.1 or 5.2 above, all
+end user license agreements (excluding distributors and resellers) which
+have been validly granted by You or Your distributors under this License
+prior to termination shall survive termination.
+
+************************************************************************
+* *
+* 6. Disclaimer of Warranty *
+* ------------------------- *
+* *
+* Covered Software is provided under this License on an "as is" *
+* basis, without warranty of any kind, either expressed, implied, or *
+* statutory, including, without limitation, warranties that the *
+* Covered Software is free of defects, merchantable, fit for a *
+* particular purpose or non-infringing. The entire risk as to the *
+* quality and performance of the Covered Software is with You. *
+* Should any Covered Software prove defective in any respect, You *
+* (not any Contributor) assume the cost of any necessary servicing, *
+* repair, or correction. This disclaimer of warranty constitutes an *
+* essential part of this License. No use of any Covered Software is *
+* authorized under this License except under this disclaimer. *
+* *
+************************************************************************
+
+************************************************************************
+* *
+* 7. Limitation of Liability *
+* -------------------------- *
+* *
+* Under no circumstances and under no legal theory, whether tort *
+* (including negligence), contract, or otherwise, shall any *
+* Contributor, or anyone who distributes Covered Software as *
+* permitted above, be liable to You for any direct, indirect, *
+* special, incidental, or consequential damages of any character *
+* including, without limitation, damages for lost profits, loss of *
+* goodwill, work stoppage, computer failure or malfunction, or any *
+* and all other commercial damages or losses, even if such party *
+* shall have been informed of the possibility of such damages. This *
+* limitation of liability shall not apply to liability for death or *
+* personal injury resulting from such party's negligence to the *
+* extent applicable law prohibits such limitation. Some *
+* jurisdictions do not allow the exclusion or limitation of *
+* incidental or consequential damages, so this exclusion and *
+* limitation may not apply to You. *
+* *
+************************************************************************
+
+8. Litigation
+-------------
+
+Any litigation relating to this License may be brought only in the
+courts of a jurisdiction where the defendant maintains its principal
+place of business and such litigation shall be governed by laws of that
+jurisdiction, without reference to its conflict-of-law provisions.
+Nothing in this Section shall prevent a party's ability to bring
+cross-claims or counter-claims.
+
+9. Miscellaneous
+----------------
+
+This License represents the complete agreement concerning the subject
+matter hereof. If any provision of this License is held to be
+unenforceable, such provision shall be reformed only to the extent
+necessary to make it enforceable. Any law or regulation which provides
+that the language of a contract shall be construed against the drafter
+shall not be used to construe this License against a Contributor.
+
+10. Versions of the License
+---------------------------
+
+10.1. New Versions
+
+Mozilla Foundation is the license steward. Except as provided in Section
+10.3, no one other than the license steward has the right to modify or
+publish new versions of this License. Each version will be given a
+distinguishing version number.
+
+10.2. Effect of New Versions
+
+You may distribute the Covered Software under the terms of the version
+of the License under which You originally received the Covered Software,
+or under the terms of any subsequent version published by the license
+steward.
+
+10.3. Modified Versions
+
+If you create software not governed by this License, and you want to
+create a new license for such software, you may create and use a
+modified version of this License if you rename the license and remove
+any references to the name of the license steward (except to note that
+such modified license differs from this License).
+
+10.4. Distributing Source Code Form that is Incompatible With Secondary
+Licenses
+
+If You choose to distribute Source Code Form that is Incompatible With
+Secondary Licenses under the terms of this version of the License, the
+notice described in Exhibit B of this License must be attached.
+
+Exhibit A - Source Code Form License Notice
+-------------------------------------------
+
+ This Source Code Form is subject to the terms of the Mozilla Public
+ License, v. 2.0. If a copy of the MPL was not distributed with this
+ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+If it is not possible or desirable to put the notice in a particular
+file, then You may include the notice in a location (such as a LICENSE
+file in a relevant directory) where a recipient would be likely to look
+for such a notice.
+
+You may add additional accurate notices of copyright ownership.
+
+Exhibit B - "Incompatible With Secondary Licenses" Notice
+---------------------------------------------------------
+
+ This Source Code Form is "Incompatible With Secondary Licenses", as
+ defined by the Mozilla Public License, v. 2.0.
diff --git a/third_party/rust/rust_cascade/src/lib.rs b/third_party/rust/rust_cascade/src/lib.rs
new file mode 100644
index 0000000000..77c3933158
--- /dev/null
+++ b/third_party/rust/rust_cascade/src/lib.rs
@@ -0,0 +1,477 @@
+extern crate byteorder;
+extern crate digest;
+extern crate murmurhash3;
+extern crate sha2;
+
+use byteorder::ReadBytesExt;
+use murmurhash3::murmurhash3_x86_32;
+use sha2::{Digest, Sha256};
+use std::convert::{TryFrom, TryInto};
+use std::fmt;
+use std::io::{Error, ErrorKind};
+
+/// Helper struct to provide read-only bit access to a slice of bytes.
+struct BitSlice<'a> {
+ /// The slice of bytes we're interested in.
+ bytes: &'a [u8],
+ /// The number of bits that are valid to access in the slice.
+ /// Not necessarily equal to `bytes.len() * 8`, but it will not be greater than that.
+ bit_len: usize,
+}
+
+impl<'a> BitSlice<'a> {
+ /// Creates a new `BitSlice` of the given bit length over the given slice of data.
+ /// Panics if the indicated bit length is larger than fits in the slice.
+ ///
+ /// # Arguments
+ /// * `bytes` - The slice of bytes we need bit-access to
+ /// * `bit_len` - The number of bits that are valid to access in the slice
+ fn new(bytes: &'a [u8], bit_len: usize) -> BitSlice<'a> {
+ if bit_len > bytes.len() * 8 {
+ panic!(
+ "bit_len too large for given data: {} > {} * 8",
+ bit_len,
+ bytes.len()
+ );
+ }
+ BitSlice { bytes, bit_len }
+ }
+
+ /// Get the value of the specified bit.
+ /// Panics if the specified bit is out of range for the number of bits in this instance.
+ ///
+ /// # Arguments
+ /// * `bit_index` - The bit index to access
+ fn get(&self, bit_index: usize) -> bool {
+ if bit_index >= self.bit_len {
+ panic!(
+ "bit index out of range for bit slice: {} >= {}",
+ bit_index, self.bit_len
+ );
+ }
+ let byte_index = bit_index / 8;
+ let final_bit_index = bit_index % 8;
+ let byte = self.bytes[byte_index];
+ let test_value = match final_bit_index {
+ 0 => byte & 0b0000_0001u8,
+ 1 => byte & 0b0000_0010u8,
+ 2 => byte & 0b0000_0100u8,
+ 3 => byte & 0b0000_1000u8,
+ 4 => byte & 0b0001_0000u8,
+ 5 => byte & 0b0010_0000u8,
+ 6 => byte & 0b0100_0000u8,
+ 7 => byte & 0b1000_0000u8,
+ _ => panic!("impossible final_bit_index value: {}", final_bit_index),
+ };
+ test_value > 0
+ }
+}
+
+/// A Bloom filter representing a specific level in a multi-level cascading Bloom filter.
+struct Bloom<'a> {
+ /// What level this filter is in
+ level: u8,
+ /// How many hash functions this filter uses
+ n_hash_funcs: u32,
+ /// The bit length of the filter
+ size: u32,
+ /// The data of the filter
+ bit_slice: BitSlice<'a>,
+ /// The hash algorithm enumeration in use
+ hash_algorithm: HashAlgorithm,
+}
+
+#[repr(u8)]
+#[derive(Copy, Clone)]
+/// These enumerations need to match the python filter-cascade project:
+/// https://github.com/mozilla/filter-cascade/blob/v0.3.0/filtercascade/fileformats.py
+enum HashAlgorithm {
+ MurmurHash3 = 1,
+ Sha256 = 2,
+}
+
+impl fmt::Display for HashAlgorithm {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}", *self as u8)
+ }
+}
+
+impl TryFrom<u8> for HashAlgorithm {
+ type Error = ();
+ fn try_from(value: u8) -> Result<HashAlgorithm, ()> {
+ match value {
+ // Naturally, these need to match the enum declaration
+ 1 => Ok(Self::MurmurHash3),
+ 2 => Ok(Self::Sha256),
+ _ => Err(()),
+ }
+ }
+}
+
+impl<'a> Bloom<'a> {
+ /// Attempts to decode and return a pair that consists of the Bloom filter represented by the
+ /// given bytes and any remaining unprocessed bytes in the given bytes.
+ ///
+ /// # Arguments
+ /// * `bytes` - The encoded representation of this Bloom filter. May include additional data
+ /// describing further Bloom filters. Any additional data is returned unconsumed.
+ /// The format of an encoded Bloom filter is:
+ /// [1 byte] - the hash algorithm to use in the filter
+ /// [4 little endian bytes] - the length in bits of the filter
+ /// [4 little endian bytes] - the number of hash functions to use in the filter
+ /// [1 byte] - which level in the cascade this filter is
+ /// [variable length bytes] - the filter itself (the length is determined by Ceiling(bit length
+ /// / 8)
+ pub fn from_bytes(bytes: &'a [u8]) -> Result<(Bloom<'a>, &'a [u8]), Error> {
+ let mut cursor = bytes;
+ // Load the layer metadata. bloomer.py writes size, nHashFuncs and level as little-endian
+ // unsigned ints.
+ let hash_algorithm_val = cursor.read_u8()?;
+ let hash_algorithm = match HashAlgorithm::try_from(hash_algorithm_val) {
+ Ok(algo) => algo,
+ Err(()) => {
+ return Err(Error::new(
+ ErrorKind::InvalidData,
+ "Unexpected hash algorithm",
+ ))
+ }
+ };
+
+ let size = cursor.read_u32::<byteorder::LittleEndian>()?;
+ let n_hash_funcs = cursor.read_u32::<byteorder::LittleEndian>()?;
+ let level = cursor.read_u8()?;
+
+ let shifted_size = size.wrapping_shr(3) as usize;
+ let byte_count = if size % 8 != 0 {
+ shifted_size + 1
+ } else {
+ shifted_size
+ };
+ if byte_count > cursor.len() {
+ return Err(Error::new(
+ ErrorKind::InvalidData,
+ "Invalid Bloom filter: too short",
+ ));
+ }
+ let (bits_bytes, rest_of_bytes) = cursor.split_at(byte_count);
+ let bloom = Bloom {
+ level,
+ n_hash_funcs,
+ size,
+ bit_slice: BitSlice::new(bits_bytes, size as usize),
+ hash_algorithm,
+ };
+ Ok((bloom, rest_of_bytes))
+ }
+
+ fn hash(&self, n_fn: u32, key: &[u8], salt: Option<&[u8]>) -> u32 {
+ match self.hash_algorithm {
+ HashAlgorithm::MurmurHash3 => {
+ if salt.is_some() {
+ panic!("murmur does not support salts")
+ }
+ let hash_seed = (n_fn << 16) + self.level as u32;
+ murmurhash3_x86_32(key, hash_seed) % self.size
+ }
+ HashAlgorithm::Sha256 => {
+ let mut hasher = Sha256::new();
+ if let Some(salt_bytes) = salt {
+ hasher.input(salt_bytes)
+ }
+ hasher.input(n_fn.to_le_bytes());
+ hasher.input(self.level.to_le_bytes());
+ hasher.input(key);
+
+ u32::from_le_bytes(
+ hasher.result()[0..4]
+ .try_into()
+ .expect("sha256 should have given enough bytes"),
+ ) % self.size
+ }
+ }
+ }
+
+ /// Test for the presence of a given sequence of bytes in this Bloom filter.
+ ///
+ /// # Arguments
+ /// `item` - The slice of bytes to test for
+ pub fn has(&self, item: &[u8], salt: Option<&[u8]>) -> bool {
+ for i in 0..self.n_hash_funcs {
+ if !self.bit_slice.get(self.hash(i, item, salt) as usize) {
+ return false;
+ }
+ }
+ true
+ }
+}
+
+impl<'a> fmt::Display for Bloom<'a> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(
+ f,
+ "level={} n_hash_funcs={} hash_algorithm={} size={}",
+ self.level, self.n_hash_funcs, self.hash_algorithm, self.size
+ )
+ }
+}
+
+/// A multi-level cascading Bloom filter.
+pub struct Cascade<'a> {
+ /// The Bloom filter for this level in the cascade
+ filter: Bloom<'a>,
+ /// The next (lower) level in the cascade
+ child_layer: Option<Box<Cascade<'a>>>,
+ /// The salt in use, if any
+ salt: Option<&'a [u8]>,
+ /// Whether the logic should be inverted
+ inverted: bool,
+}
+
+impl<'a> Cascade<'a> {
+ /// Attempts to decode and return a multi-level cascading Bloom filter. NB: `Cascade` does not
+ /// take ownership of the given data. This is to facilitate decoding cascading filters
+ /// backed by memory-mapped files.
+ ///
+ /// # Arguments
+ /// `bytes` - The encoded representation of the Bloom filters in this cascade. Starts with 2
+ /// little endian bytes indicating the version. The current version is 2. The Python
+ /// filter-cascade project defines the formats, see
+ /// https://github.com/mozilla/filter-cascade/blob/v0.3.0/filtercascade/fileformats.py
+ ///
+ /// May be of length 0, in which case `None` is returned.
+ pub fn from_bytes(bytes: &'a [u8]) -> Result<Option<Box<Cascade<'a>>>, Error> {
+ if bytes.is_empty() {
+ return Ok(None);
+ }
+ let mut cursor = bytes;
+ let version = cursor.read_u16::<byteorder::LittleEndian>()?;
+ let mut salt = None;
+ let mut inverted = false;
+
+ if version >= 2 {
+ inverted = cursor.read_u8()? != 0;
+ let salt_len = cursor.read_u8()? as usize;
+
+ if salt_len > cursor.len() {
+ return Err(Error::new(
+ ErrorKind::InvalidData,
+ "Invalid Bloom filter: too short",
+ ));
+ }
+
+ let (salt_bytes, remaining_bytes) = cursor.split_at(salt_len);
+ if salt_len > 0 {
+ salt = Some(salt_bytes)
+ }
+ cursor = remaining_bytes;
+ }
+
+ if version > 2 {
+ return Err(Error::new(
+ ErrorKind::InvalidData,
+ format!("Invalid version: {}", version),
+ ));
+ }
+
+ Cascade::child_layer_from_bytes(cursor, salt, inverted)
+ }
+
+ fn child_layer_from_bytes(
+ bytes: &'a [u8],
+ salt: Option<&'a [u8]>,
+ inverted: bool,
+ ) -> Result<Option<Box<Cascade<'a>>>, Error> {
+ if bytes.is_empty() {
+ return Ok(None);
+ }
+ let (filter, rest_of_bytes) = Bloom::from_bytes(bytes)?;
+ Ok(Some(Box::new(Cascade {
+ filter,
+ child_layer: Cascade::child_layer_from_bytes(rest_of_bytes, salt, inverted)?,
+ salt,
+ inverted,
+ })))
+ }
+
+ /// Determine if the given sequence of bytes is in the cascade.
+ ///
+ /// # Arguments
+ /// `entry` - The slice of bytes to test for
+ pub fn has(&self, entry: &[u8]) -> bool {
+ let result = self.has_internal(entry);
+ if self.inverted {
+ return !result;
+ }
+ result
+ }
+
+ pub fn has_internal(&self, entry: &[u8]) -> bool {
+ if self.filter.has(&entry, self.salt) {
+ match self.child_layer {
+ Some(ref child) => {
+ let child_value = !child.has_internal(entry);
+ return child_value;
+ }
+ None => {
+ return true;
+ }
+ }
+ }
+ false
+ }
+}
+
+impl<'a> fmt::Display for Cascade<'a> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(
+ f,
+ "salt={:?} inverted={} filter=[{}] ",
+ self.salt, self.inverted, self.filter
+ )?;
+ match &self.child_layer {
+ Some(layer) => write!(f, "[child={}]", layer),
+ None => Ok(()),
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use Bloom;
+ use Cascade;
+
+ #[test]
+ fn bloom_v1_test_from_bytes() {
+ let src: Vec<u8> = vec![
+ 0x01, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x41, 0x00,
+ ];
+
+ match Bloom::from_bytes(&src) {
+ Ok((bloom, rest_of_bytes)) => {
+ assert!(rest_of_bytes.len() == 0);
+ assert!(bloom.has(b"this", None) == true);
+ assert!(bloom.has(b"that", None) == true);
+ assert!(bloom.has(b"other", None) == false);
+ }
+ Err(_) => {
+ panic!("Parsing failed");
+ }
+ };
+
+ let short: Vec<u8> = vec![
+ 0x01, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x41,
+ ];
+ assert!(Bloom::from_bytes(&short).is_err());
+ }
+
+ #[test]
+ fn bloom_v3_unsupported() {
+ let src: Vec<u8> = vec![0x03, 0x01, 0x00];
+ assert!(Bloom::from_bytes(&src).is_err());
+ }
+
+ #[test]
+ fn cascade_v1_murmur_from_file_bytes_test() {
+ let v = include_bytes!("../test_data/test_v1_murmur_mlbf");
+ let cascade = Cascade::from_bytes(v)
+ .expect("parsing Cascade should succeed")
+ .expect("Cascade should be Some");
+ // Key format is SHA256(issuer SPKI) + serial number
+ #[rustfmt::skip]
+ let key_for_revoked_cert_1 =
+ [ 0x2e, 0xb2, 0xd5, 0xa8, 0x60, 0xfe, 0x50, 0xe9, 0xc2, 0x42, 0x36, 0x85, 0x52, 0x98,
+ 0x01, 0x50, 0xe4, 0x5d, 0xb5, 0x32, 0x1a, 0x5b, 0x00, 0x5e, 0x26, 0xd6, 0x76, 0x25,
+ 0x3a, 0x40, 0x9b, 0xf5,
+ 0x06, 0x2d, 0xf5, 0x68, 0xa0, 0x51, 0x31, 0x08, 0x20, 0xd7, 0xec, 0x43, 0x27, 0xe1,
+ 0xba, 0xfd ];
+ assert!(cascade.has(&key_for_revoked_cert_1));
+ #[rustfmt::skip]
+ let key_for_revoked_cert_2 =
+ [ 0xf1, 0x1c, 0x3d, 0xd0, 0x48, 0xf7, 0x4e, 0xdb, 0x7c, 0x45, 0x19, 0x2b, 0x83, 0xe5,
+ 0x98, 0x0d, 0x2f, 0x67, 0xec, 0x84, 0xb4, 0xdd, 0xb9, 0x39, 0x6e, 0x33, 0xff, 0x51,
+ 0x73, 0xed, 0x69, 0x8f,
+ 0x00, 0xd2, 0xe8, 0xf6, 0xaa, 0x80, 0x48, 0x1c, 0xd4 ];
+ assert!(cascade.has(&key_for_revoked_cert_2));
+ #[rustfmt::skip]
+ let key_for_valid_cert =
+ [ 0x99, 0xfc, 0x9d, 0x40, 0xf1, 0xad, 0xb1, 0x63, 0x65, 0x61, 0xa6, 0x1d, 0x68, 0x3d,
+ 0x9e, 0xa6, 0xb4, 0x60, 0xc5, 0x7d, 0x0c, 0x75, 0xea, 0x00, 0xc3, 0x41, 0xb9, 0xdf,
+ 0xb9, 0x0b, 0x5f, 0x39,
+ 0x0b, 0x77, 0x75, 0xf7, 0xaf, 0x9a, 0xe5, 0x42, 0x65, 0xc9, 0xcd, 0x32, 0x57, 0x10,
+ 0x77, 0x8e ];
+ assert!(!cascade.has(&key_for_valid_cert));
+
+ let v = include_bytes!("../test_data/test_v1_murmur_short_mlbf");
+ assert!(Cascade::from_bytes(v).is_err());
+ }
+
+ #[test]
+ fn cascade_v2_sha256_from_file_bytes_test() {
+ let v = include_bytes!("../test_data/test_v2_sha256_mlbf");
+ let cascade = Cascade::from_bytes(v)
+ .expect("parsing Cascade should succeed")
+ .expect("Cascade should be Some");
+
+ assert!(cascade.salt == None);
+ assert!(cascade.inverted == false);
+ assert!(cascade.has(b"this") == true);
+ assert!(cascade.has(b"that") == true);
+ assert!(cascade.has(b"other") == false);
+ }
+
+ #[test]
+ fn cascade_v2_sha256_with_salt_from_file_bytes_test() {
+ let v = include_bytes!("../test_data/test_v2_sha256_salt_mlbf");
+ let cascade = Cascade::from_bytes(v)
+ .expect("parsing Cascade should succeed")
+ .expect("Cascade should be Some");
+
+ assert!(cascade.salt == Some(b"nacl"));
+ assert!(cascade.inverted == false);
+ assert!(cascade.has(b"this") == true);
+ assert!(cascade.has(b"that") == true);
+ assert!(cascade.has(b"other") == false);
+ }
+
+ #[test]
+ fn cascade_v2_murmur_from_file_bytes_test() {
+ let v = include_bytes!("../test_data/test_v2_murmur_mlbf");
+ let cascade = Cascade::from_bytes(v)
+ .expect("parsing Cascade should succeed")
+ .expect("Cascade should be Some");
+
+ assert!(cascade.salt == None);
+ assert!(cascade.inverted == false);
+ assert!(cascade.has(b"this") == true);
+ assert!(cascade.has(b"that") == true);
+ assert!(cascade.has(b"other") == false);
+ }
+
+ #[test]
+ fn cascade_v2_murmur_inverted_from_file_bytes_test() {
+ let v = include_bytes!("../test_data/test_v2_murmur_inverted_mlbf");
+ let cascade = Cascade::from_bytes(v)
+ .expect("parsing Cascade should succeed")
+ .expect("Cascade should be Some");
+
+ assert!(cascade.salt == None);
+ assert!(cascade.inverted == true);
+ assert!(cascade.has(b"this") == true);
+ assert!(cascade.has(b"that") == true);
+ assert!(cascade.has(b"other") == false);
+ }
+
+ #[test]
+ fn cascade_v2_sha256_inverted_from_file_bytes_test() {
+ let v = include_bytes!("../test_data/test_v2_sha256_inverted_mlbf");
+ let cascade = Cascade::from_bytes(v)
+ .expect("parsing Cascade should succeed")
+ .expect("Cascade should be Some");
+
+ assert!(cascade.salt == None);
+ assert!(cascade.inverted == true);
+ assert!(cascade.has(b"this") == true);
+ assert!(cascade.has(b"that") == true);
+ assert!(cascade.has(b"other") == false);
+ }
+}
diff --git a/third_party/rust/rust_cascade/test_data/make-sample-data.py b/third_party/rust/rust_cascade/test_data/make-sample-data.py
new file mode 100644
index 0000000000..bbb73ec4e6
--- /dev/null
+++ b/third_party/rust/rust_cascade/test_data/make-sample-data.py
@@ -0,0 +1,59 @@
+import filtercascade
+import hashlib
+from pathlib import Path
+
+
+def predictable_serial_gen(end):
+ counter = 0
+ while counter < end:
+ counter += 1
+ m = hashlib.sha256()
+ m.update(counter.to_bytes(4, byteorder="big"))
+ yield m.hexdigest()
+
+
+def store(fc, path):
+ if path.exists():
+ path.unlink()
+ with open(path, "wb") as f:
+ fc.tofile(f)
+
+
+large_set = set(predictable_serial_gen(100_000))
+
+v2_sha256_with_salt = filtercascade.FilterCascade(
+ [], defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256, salt=b"nacl"
+)
+v2_sha256_with_salt.initialize(
+ include=[b"this", b"that"], exclude=large_set | set([b"other"])
+)
+store(v2_sha256_with_salt, Path("test_v2_sha256_salt_mlbf"))
+
+v2_sha256 = filtercascade.FilterCascade(
+ [], defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256
+)
+v2_sha256.initialize(include=[b"this", b"that"], exclude=large_set | set([b"other"]))
+store(v2_sha256, Path("test_v2_sha256_mlbf"))
+
+v2_murmur = filtercascade.FilterCascade(
+ [], defaultHashAlg=filtercascade.fileformats.HashAlgorithm.MURMUR3
+)
+v2_murmur.initialize(include=[b"this", b"that"], exclude=large_set | set([b"other"]))
+store(v2_murmur, Path("test_v2_murmur_mlbf"))
+
+v2_murmur_inverted = filtercascade.FilterCascade(
+ [], defaultHashAlg=filtercascade.fileformats.HashAlgorithm.MURMUR3
+)
+v2_murmur_inverted.initialize(
+ include=large_set | set([b"this", b"that"]), exclude=[b"other"]
+)
+store(v2_murmur_inverted, Path("test_v2_murmur_inverted_mlbf"))
+
+
+v2_sha256_inverted = filtercascade.FilterCascade(
+ [], defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256
+)
+v2_sha256_inverted.initialize(
+ include=large_set | set([b"this", b"that"]), exclude=[b"other"]
+)
+store(v2_sha256_inverted, Path("test_v2_sha256_inverted_mlbf"))
diff --git a/third_party/rust/rust_cascade/test_data/requirements.txt b/third_party/rust/rust_cascade/test_data/requirements.txt
new file mode 100644
index 0000000000..f97bd4328f
--- /dev/null
+++ b/third_party/rust/rust_cascade/test_data/requirements.txt
@@ -0,0 +1 @@
+filtercascade >= 0.3.0
diff --git a/third_party/rust/rust_cascade/test_data/test_v1_murmur_mlbf b/third_party/rust/rust_cascade/test_data/test_v1_murmur_mlbf
new file mode 100644
index 0000000000..34ced4b840
--- /dev/null
+++ b/third_party/rust/rust_cascade/test_data/test_v1_murmur_mlbf
Binary files differ
diff --git a/third_party/rust/rust_cascade/test_data/test_v1_murmur_short_mlbf b/third_party/rust/rust_cascade/test_data/test_v1_murmur_short_mlbf
new file mode 100644
index 0000000000..d0bb7071ab
--- /dev/null
+++ b/third_party/rust/rust_cascade/test_data/test_v1_murmur_short_mlbf
Binary files differ
diff --git a/third_party/rust/rust_cascade/test_data/test_v2_murmur_inverted_mlbf b/third_party/rust/rust_cascade/test_data/test_v2_murmur_inverted_mlbf
new file mode 100644
index 0000000000..0c0aecd5f0
--- /dev/null
+++ b/third_party/rust/rust_cascade/test_data/test_v2_murmur_inverted_mlbf
Binary files differ
diff --git a/third_party/rust/rust_cascade/test_data/test_v2_murmur_mlbf b/third_party/rust/rust_cascade/test_data/test_v2_murmur_mlbf
new file mode 100644
index 0000000000..f994ac7183
--- /dev/null
+++ b/third_party/rust/rust_cascade/test_data/test_v2_murmur_mlbf
Binary files differ
diff --git a/third_party/rust/rust_cascade/test_data/test_v2_sha256_inverted_mlbf b/third_party/rust/rust_cascade/test_data/test_v2_sha256_inverted_mlbf
new file mode 100644
index 0000000000..3e1e7c169a
--- /dev/null
+++ b/third_party/rust/rust_cascade/test_data/test_v2_sha256_inverted_mlbf
Binary files differ
diff --git a/third_party/rust/rust_cascade/test_data/test_v2_sha256_mlbf b/third_party/rust/rust_cascade/test_data/test_v2_sha256_mlbf
new file mode 100644
index 0000000000..e662a325d2
--- /dev/null
+++ b/third_party/rust/rust_cascade/test_data/test_v2_sha256_mlbf
Binary files differ
diff --git a/third_party/rust/rust_cascade/test_data/test_v2_sha256_salt_mlbf b/third_party/rust/rust_cascade/test_data/test_v2_sha256_salt_mlbf
new file mode 100644
index 0000000000..330c487faf
--- /dev/null
+++ b/third_party/rust/rust_cascade/test_data/test_v2_sha256_salt_mlbf
Binary files differ