diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/rust_cascade | |
parent | Initial commit. (diff) | |
download | firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.tar.xz firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/rust_cascade')
14 files changed, 955 insertions, 0 deletions
diff --git a/third_party/rust/rust_cascade/.cargo-checksum.json b/third_party/rust/rust_cascade/.cargo-checksum.json new file mode 100644 index 0000000000..3224196f6f --- /dev/null +++ b/third_party/rust/rust_cascade/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"411cb740d6be8346206164df646ac9df304e9a84bb9f10eb4b07d2ef2f6566ec","README.md":"a4396d1adf63a77ae9aa0d1d850d02d09eec4a92810a52d675163688f312b3e8","license.txt":"1f256ecad192880510e84ad60474eab7589218784b9a50bc7ceee34c2b91f1d5","src/lib.rs":"2c6d1e01ae3a39baad99cd4567b0164dec4dcf77688bc2c3b43798215c857943","test_data/make-sample-data.py":"68bcb106c3ac1929da52e1abb71cd2a6d59eb79549f6e40042368161baa920e0","test_data/requirements.txt":"cb9372b33ed2774e0d5040459fd63a2f9abae2be599869be43a2a077b2c08aa3","test_data/test_v1_murmur_mlbf":"243df0b7f2f55bfe3cefbba2d4be5eb7957c0a063559c9f284ca4c1ee4211eb5","test_data/test_v1_murmur_short_mlbf":"3d4f03dc0a65cf5800efed6ac0b3c73e5b61e5d62bc82ac42744abc67f4c30fa","test_data/test_v2_murmur_inverted_mlbf":"efdd0ab309883f6a3148ec2ddaf0dcb768790e6f130e4e0556994202b1fd7cc4","test_data/test_v2_murmur_mlbf":"80e8e148fbf95aed39783f1fcc2d4576074f8c487656ca2d53571da4b17e20a9","test_data/test_v2_sha256_inverted_mlbf":"e5148cabb45c4899f8220ca51f96a6c76c688e39dfd340ae56bf9dc5226eada2","test_data/test_v2_sha256_mlbf":"08986847b8b2f3bdf4d2df51e465938f88f7a7c401b1740094fc40b033e80b51","test_data/test_v2_sha256_salt_mlbf":"d7b9bf88872162a1917eb14d0340a88b61b574fb1a7120fa54d061e43a9f5460"},"package":"9a5b9bba8f5b985e4923dadd273a987f83669083f3355d65c699e02b9d3d854d"}
\ No newline at end of file diff --git a/third_party/rust/rust_cascade/Cargo.toml b/third_party/rust/rust_cascade/Cargo.toml new file mode 100644 index 0000000000..d1621fe71c --- /dev/null +++ b/third_party/rust/rust_cascade/Cargo.toml @@ -0,0 +1,32 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies +# +# If you believe there's an error in this file please file an +# issue against the rust-lang/cargo repository. If you're +# editing this file be aware that the upstream Cargo.toml +# will likely look very different (and much more reasonable) + +[package] +name = "rust_cascade" +version = "0.6.0" +authors = ["Mark Goodwin <mgoodwin@mozilla.com>", "Dana Keeler <dkeeler@mozilla.com>", "J.C. Jones <jc@mozilla.com>"] +description = "A simple mmh3 based bloom filter cascade implementation in Rust." +homepage = "https://github.com/mozilla/rust-cascade" +documentation = "https://docs.rs/rust_cascade/" +license = "MPL-2.0" +repository = "https://github.com/mozilla/rust-cascade" +[dependencies.byteorder] +version = "1.3.1" + +[dependencies.digest] +version = "0.8.0" + +[dependencies.murmurhash3] +version = "0.0.5" + +[dependencies.sha2] +version = "^0.8" diff --git a/third_party/rust/rust_cascade/README.md b/third_party/rust/rust_cascade/README.md new file mode 100644 index 0000000000..206bff9267 --- /dev/null +++ b/third_party/rust/rust_cascade/README.md @@ -0,0 +1,12 @@ +# rust-cascade +A Bloom filter cascade implementation in rust. This can utilize one of two hash +functions: + +* MurmurHash32, or +* SHA256, with an optional salt + +This implementation is designed to match up with the Python [filter-cascade +project](https://pypi.org/project/filtercascade/) +[[github](https://github.com/mozilla/filter-cascade)] + +See tests in src/lib.rs to get an idea of usage. diff --git a/third_party/rust/rust_cascade/license.txt b/third_party/rust/rust_cascade/license.txt new file mode 100644 index 0000000000..a612ad9813 --- /dev/null +++ b/third_party/rust/rust_cascade/license.txt @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/third_party/rust/rust_cascade/src/lib.rs b/third_party/rust/rust_cascade/src/lib.rs new file mode 100644 index 0000000000..77c3933158 --- /dev/null +++ b/third_party/rust/rust_cascade/src/lib.rs @@ -0,0 +1,477 @@ +extern crate byteorder; +extern crate digest; +extern crate murmurhash3; +extern crate sha2; + +use byteorder::ReadBytesExt; +use murmurhash3::murmurhash3_x86_32; +use sha2::{Digest, Sha256}; +use std::convert::{TryFrom, TryInto}; +use std::fmt; +use std::io::{Error, ErrorKind}; + +/// Helper struct to provide read-only bit access to a slice of bytes. +struct BitSlice<'a> { + /// The slice of bytes we're interested in. + bytes: &'a [u8], + /// The number of bits that are valid to access in the slice. + /// Not necessarily equal to `bytes.len() * 8`, but it will not be greater than that. + bit_len: usize, +} + +impl<'a> BitSlice<'a> { + /// Creates a new `BitSlice` of the given bit length over the given slice of data. + /// Panics if the indicated bit length is larger than fits in the slice. + /// + /// # Arguments + /// * `bytes` - The slice of bytes we need bit-access to + /// * `bit_len` - The number of bits that are valid to access in the slice + fn new(bytes: &'a [u8], bit_len: usize) -> BitSlice<'a> { + if bit_len > bytes.len() * 8 { + panic!( + "bit_len too large for given data: {} > {} * 8", + bit_len, + bytes.len() + ); + } + BitSlice { bytes, bit_len } + } + + /// Get the value of the specified bit. + /// Panics if the specified bit is out of range for the number of bits in this instance. + /// + /// # Arguments + /// * `bit_index` - The bit index to access + fn get(&self, bit_index: usize) -> bool { + if bit_index >= self.bit_len { + panic!( + "bit index out of range for bit slice: {} >= {}", + bit_index, self.bit_len + ); + } + let byte_index = bit_index / 8; + let final_bit_index = bit_index % 8; + let byte = self.bytes[byte_index]; + let test_value = match final_bit_index { + 0 => byte & 0b0000_0001u8, + 1 => byte & 0b0000_0010u8, + 2 => byte & 0b0000_0100u8, + 3 => byte & 0b0000_1000u8, + 4 => byte & 0b0001_0000u8, + 5 => byte & 0b0010_0000u8, + 6 => byte & 0b0100_0000u8, + 7 => byte & 0b1000_0000u8, + _ => panic!("impossible final_bit_index value: {}", final_bit_index), + }; + test_value > 0 + } +} + +/// A Bloom filter representing a specific level in a multi-level cascading Bloom filter. +struct Bloom<'a> { + /// What level this filter is in + level: u8, + /// How many hash functions this filter uses + n_hash_funcs: u32, + /// The bit length of the filter + size: u32, + /// The data of the filter + bit_slice: BitSlice<'a>, + /// The hash algorithm enumeration in use + hash_algorithm: HashAlgorithm, +} + +#[repr(u8)] +#[derive(Copy, Clone)] +/// These enumerations need to match the python filter-cascade project: +/// https://github.com/mozilla/filter-cascade/blob/v0.3.0/filtercascade/fileformats.py +enum HashAlgorithm { + MurmurHash3 = 1, + Sha256 = 2, +} + +impl fmt::Display for HashAlgorithm { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", *self as u8) + } +} + +impl TryFrom<u8> for HashAlgorithm { + type Error = (); + fn try_from(value: u8) -> Result<HashAlgorithm, ()> { + match value { + // Naturally, these need to match the enum declaration + 1 => Ok(Self::MurmurHash3), + 2 => Ok(Self::Sha256), + _ => Err(()), + } + } +} + +impl<'a> Bloom<'a> { + /// Attempts to decode and return a pair that consists of the Bloom filter represented by the + /// given bytes and any remaining unprocessed bytes in the given bytes. + /// + /// # Arguments + /// * `bytes` - The encoded representation of this Bloom filter. May include additional data + /// describing further Bloom filters. Any additional data is returned unconsumed. + /// The format of an encoded Bloom filter is: + /// [1 byte] - the hash algorithm to use in the filter + /// [4 little endian bytes] - the length in bits of the filter + /// [4 little endian bytes] - the number of hash functions to use in the filter + /// [1 byte] - which level in the cascade this filter is + /// [variable length bytes] - the filter itself (the length is determined by Ceiling(bit length + /// / 8) + pub fn from_bytes(bytes: &'a [u8]) -> Result<(Bloom<'a>, &'a [u8]), Error> { + let mut cursor = bytes; + // Load the layer metadata. bloomer.py writes size, nHashFuncs and level as little-endian + // unsigned ints. + let hash_algorithm_val = cursor.read_u8()?; + let hash_algorithm = match HashAlgorithm::try_from(hash_algorithm_val) { + Ok(algo) => algo, + Err(()) => { + return Err(Error::new( + ErrorKind::InvalidData, + "Unexpected hash algorithm", + )) + } + }; + + let size = cursor.read_u32::<byteorder::LittleEndian>()?; + let n_hash_funcs = cursor.read_u32::<byteorder::LittleEndian>()?; + let level = cursor.read_u8()?; + + let shifted_size = size.wrapping_shr(3) as usize; + let byte_count = if size % 8 != 0 { + shifted_size + 1 + } else { + shifted_size + }; + if byte_count > cursor.len() { + return Err(Error::new( + ErrorKind::InvalidData, + "Invalid Bloom filter: too short", + )); + } + let (bits_bytes, rest_of_bytes) = cursor.split_at(byte_count); + let bloom = Bloom { + level, + n_hash_funcs, + size, + bit_slice: BitSlice::new(bits_bytes, size as usize), + hash_algorithm, + }; + Ok((bloom, rest_of_bytes)) + } + + fn hash(&self, n_fn: u32, key: &[u8], salt: Option<&[u8]>) -> u32 { + match self.hash_algorithm { + HashAlgorithm::MurmurHash3 => { + if salt.is_some() { + panic!("murmur does not support salts") + } + let hash_seed = (n_fn << 16) + self.level as u32; + murmurhash3_x86_32(key, hash_seed) % self.size + } + HashAlgorithm::Sha256 => { + let mut hasher = Sha256::new(); + if let Some(salt_bytes) = salt { + hasher.input(salt_bytes) + } + hasher.input(n_fn.to_le_bytes()); + hasher.input(self.level.to_le_bytes()); + hasher.input(key); + + u32::from_le_bytes( + hasher.result()[0..4] + .try_into() + .expect("sha256 should have given enough bytes"), + ) % self.size + } + } + } + + /// Test for the presence of a given sequence of bytes in this Bloom filter. + /// + /// # Arguments + /// `item` - The slice of bytes to test for + pub fn has(&self, item: &[u8], salt: Option<&[u8]>) -> bool { + for i in 0..self.n_hash_funcs { + if !self.bit_slice.get(self.hash(i, item, salt) as usize) { + return false; + } + } + true + } +} + +impl<'a> fmt::Display for Bloom<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "level={} n_hash_funcs={} hash_algorithm={} size={}", + self.level, self.n_hash_funcs, self.hash_algorithm, self.size + ) + } +} + +/// A multi-level cascading Bloom filter. +pub struct Cascade<'a> { + /// The Bloom filter for this level in the cascade + filter: Bloom<'a>, + /// The next (lower) level in the cascade + child_layer: Option<Box<Cascade<'a>>>, + /// The salt in use, if any + salt: Option<&'a [u8]>, + /// Whether the logic should be inverted + inverted: bool, +} + +impl<'a> Cascade<'a> { + /// Attempts to decode and return a multi-level cascading Bloom filter. NB: `Cascade` does not + /// take ownership of the given data. This is to facilitate decoding cascading filters + /// backed by memory-mapped files. + /// + /// # Arguments + /// `bytes` - The encoded representation of the Bloom filters in this cascade. Starts with 2 + /// little endian bytes indicating the version. The current version is 2. The Python + /// filter-cascade project defines the formats, see + /// https://github.com/mozilla/filter-cascade/blob/v0.3.0/filtercascade/fileformats.py + /// + /// May be of length 0, in which case `None` is returned. + pub fn from_bytes(bytes: &'a [u8]) -> Result<Option<Box<Cascade<'a>>>, Error> { + if bytes.is_empty() { + return Ok(None); + } + let mut cursor = bytes; + let version = cursor.read_u16::<byteorder::LittleEndian>()?; + let mut salt = None; + let mut inverted = false; + + if version >= 2 { + inverted = cursor.read_u8()? != 0; + let salt_len = cursor.read_u8()? as usize; + + if salt_len > cursor.len() { + return Err(Error::new( + ErrorKind::InvalidData, + "Invalid Bloom filter: too short", + )); + } + + let (salt_bytes, remaining_bytes) = cursor.split_at(salt_len); + if salt_len > 0 { + salt = Some(salt_bytes) + } + cursor = remaining_bytes; + } + + if version > 2 { + return Err(Error::new( + ErrorKind::InvalidData, + format!("Invalid version: {}", version), + )); + } + + Cascade::child_layer_from_bytes(cursor, salt, inverted) + } + + fn child_layer_from_bytes( + bytes: &'a [u8], + salt: Option<&'a [u8]>, + inverted: bool, + ) -> Result<Option<Box<Cascade<'a>>>, Error> { + if bytes.is_empty() { + return Ok(None); + } + let (filter, rest_of_bytes) = Bloom::from_bytes(bytes)?; + Ok(Some(Box::new(Cascade { + filter, + child_layer: Cascade::child_layer_from_bytes(rest_of_bytes, salt, inverted)?, + salt, + inverted, + }))) + } + + /// Determine if the given sequence of bytes is in the cascade. + /// + /// # Arguments + /// `entry` - The slice of bytes to test for + pub fn has(&self, entry: &[u8]) -> bool { + let result = self.has_internal(entry); + if self.inverted { + return !result; + } + result + } + + pub fn has_internal(&self, entry: &[u8]) -> bool { + if self.filter.has(&entry, self.salt) { + match self.child_layer { + Some(ref child) => { + let child_value = !child.has_internal(entry); + return child_value; + } + None => { + return true; + } + } + } + false + } +} + +impl<'a> fmt::Display for Cascade<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "salt={:?} inverted={} filter=[{}] ", + self.salt, self.inverted, self.filter + )?; + match &self.child_layer { + Some(layer) => write!(f, "[child={}]", layer), + None => Ok(()), + } + } +} + +#[cfg(test)] +mod tests { + use Bloom; + use Cascade; + + #[test] + fn bloom_v1_test_from_bytes() { + let src: Vec<u8> = vec![ + 0x01, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x41, 0x00, + ]; + + match Bloom::from_bytes(&src) { + Ok((bloom, rest_of_bytes)) => { + assert!(rest_of_bytes.len() == 0); + assert!(bloom.has(b"this", None) == true); + assert!(bloom.has(b"that", None) == true); + assert!(bloom.has(b"other", None) == false); + } + Err(_) => { + panic!("Parsing failed"); + } + }; + + let short: Vec<u8> = vec![ + 0x01, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x41, + ]; + assert!(Bloom::from_bytes(&short).is_err()); + } + + #[test] + fn bloom_v3_unsupported() { + let src: Vec<u8> = vec![0x03, 0x01, 0x00]; + assert!(Bloom::from_bytes(&src).is_err()); + } + + #[test] + fn cascade_v1_murmur_from_file_bytes_test() { + let v = include_bytes!("../test_data/test_v1_murmur_mlbf"); + let cascade = Cascade::from_bytes(v) + .expect("parsing Cascade should succeed") + .expect("Cascade should be Some"); + // Key format is SHA256(issuer SPKI) + serial number + #[rustfmt::skip] + let key_for_revoked_cert_1 = + [ 0x2e, 0xb2, 0xd5, 0xa8, 0x60, 0xfe, 0x50, 0xe9, 0xc2, 0x42, 0x36, 0x85, 0x52, 0x98, + 0x01, 0x50, 0xe4, 0x5d, 0xb5, 0x32, 0x1a, 0x5b, 0x00, 0x5e, 0x26, 0xd6, 0x76, 0x25, + 0x3a, 0x40, 0x9b, 0xf5, + 0x06, 0x2d, 0xf5, 0x68, 0xa0, 0x51, 0x31, 0x08, 0x20, 0xd7, 0xec, 0x43, 0x27, 0xe1, + 0xba, 0xfd ]; + assert!(cascade.has(&key_for_revoked_cert_1)); + #[rustfmt::skip] + let key_for_revoked_cert_2 = + [ 0xf1, 0x1c, 0x3d, 0xd0, 0x48, 0xf7, 0x4e, 0xdb, 0x7c, 0x45, 0x19, 0x2b, 0x83, 0xe5, + 0x98, 0x0d, 0x2f, 0x67, 0xec, 0x84, 0xb4, 0xdd, 0xb9, 0x39, 0x6e, 0x33, 0xff, 0x51, + 0x73, 0xed, 0x69, 0x8f, + 0x00, 0xd2, 0xe8, 0xf6, 0xaa, 0x80, 0x48, 0x1c, 0xd4 ]; + assert!(cascade.has(&key_for_revoked_cert_2)); + #[rustfmt::skip] + let key_for_valid_cert = + [ 0x99, 0xfc, 0x9d, 0x40, 0xf1, 0xad, 0xb1, 0x63, 0x65, 0x61, 0xa6, 0x1d, 0x68, 0x3d, + 0x9e, 0xa6, 0xb4, 0x60, 0xc5, 0x7d, 0x0c, 0x75, 0xea, 0x00, 0xc3, 0x41, 0xb9, 0xdf, + 0xb9, 0x0b, 0x5f, 0x39, + 0x0b, 0x77, 0x75, 0xf7, 0xaf, 0x9a, 0xe5, 0x42, 0x65, 0xc9, 0xcd, 0x32, 0x57, 0x10, + 0x77, 0x8e ]; + assert!(!cascade.has(&key_for_valid_cert)); + + let v = include_bytes!("../test_data/test_v1_murmur_short_mlbf"); + assert!(Cascade::from_bytes(v).is_err()); + } + + #[test] + fn cascade_v2_sha256_from_file_bytes_test() { + let v = include_bytes!("../test_data/test_v2_sha256_mlbf"); + let cascade = Cascade::from_bytes(v) + .expect("parsing Cascade should succeed") + .expect("Cascade should be Some"); + + assert!(cascade.salt == None); + assert!(cascade.inverted == false); + assert!(cascade.has(b"this") == true); + assert!(cascade.has(b"that") == true); + assert!(cascade.has(b"other") == false); + } + + #[test] + fn cascade_v2_sha256_with_salt_from_file_bytes_test() { + let v = include_bytes!("../test_data/test_v2_sha256_salt_mlbf"); + let cascade = Cascade::from_bytes(v) + .expect("parsing Cascade should succeed") + .expect("Cascade should be Some"); + + assert!(cascade.salt == Some(b"nacl")); + assert!(cascade.inverted == false); + assert!(cascade.has(b"this") == true); + assert!(cascade.has(b"that") == true); + assert!(cascade.has(b"other") == false); + } + + #[test] + fn cascade_v2_murmur_from_file_bytes_test() { + let v = include_bytes!("../test_data/test_v2_murmur_mlbf"); + let cascade = Cascade::from_bytes(v) + .expect("parsing Cascade should succeed") + .expect("Cascade should be Some"); + + assert!(cascade.salt == None); + assert!(cascade.inverted == false); + assert!(cascade.has(b"this") == true); + assert!(cascade.has(b"that") == true); + assert!(cascade.has(b"other") == false); + } + + #[test] + fn cascade_v2_murmur_inverted_from_file_bytes_test() { + let v = include_bytes!("../test_data/test_v2_murmur_inverted_mlbf"); + let cascade = Cascade::from_bytes(v) + .expect("parsing Cascade should succeed") + .expect("Cascade should be Some"); + + assert!(cascade.salt == None); + assert!(cascade.inverted == true); + assert!(cascade.has(b"this") == true); + assert!(cascade.has(b"that") == true); + assert!(cascade.has(b"other") == false); + } + + #[test] + fn cascade_v2_sha256_inverted_from_file_bytes_test() { + let v = include_bytes!("../test_data/test_v2_sha256_inverted_mlbf"); + let cascade = Cascade::from_bytes(v) + .expect("parsing Cascade should succeed") + .expect("Cascade should be Some"); + + assert!(cascade.salt == None); + assert!(cascade.inverted == true); + assert!(cascade.has(b"this") == true); + assert!(cascade.has(b"that") == true); + assert!(cascade.has(b"other") == false); + } +} diff --git a/third_party/rust/rust_cascade/test_data/make-sample-data.py b/third_party/rust/rust_cascade/test_data/make-sample-data.py new file mode 100644 index 0000000000..bbb73ec4e6 --- /dev/null +++ b/third_party/rust/rust_cascade/test_data/make-sample-data.py @@ -0,0 +1,59 @@ +import filtercascade +import hashlib +from pathlib import Path + + +def predictable_serial_gen(end): + counter = 0 + while counter < end: + counter += 1 + m = hashlib.sha256() + m.update(counter.to_bytes(4, byteorder="big")) + yield m.hexdigest() + + +def store(fc, path): + if path.exists(): + path.unlink() + with open(path, "wb") as f: + fc.tofile(f) + + +large_set = set(predictable_serial_gen(100_000)) + +v2_sha256_with_salt = filtercascade.FilterCascade( + [], defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256, salt=b"nacl" +) +v2_sha256_with_salt.initialize( + include=[b"this", b"that"], exclude=large_set | set([b"other"]) +) +store(v2_sha256_with_salt, Path("test_v2_sha256_salt_mlbf")) + +v2_sha256 = filtercascade.FilterCascade( + [], defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256 +) +v2_sha256.initialize(include=[b"this", b"that"], exclude=large_set | set([b"other"])) +store(v2_sha256, Path("test_v2_sha256_mlbf")) + +v2_murmur = filtercascade.FilterCascade( + [], defaultHashAlg=filtercascade.fileformats.HashAlgorithm.MURMUR3 +) +v2_murmur.initialize(include=[b"this", b"that"], exclude=large_set | set([b"other"])) +store(v2_murmur, Path("test_v2_murmur_mlbf")) + +v2_murmur_inverted = filtercascade.FilterCascade( + [], defaultHashAlg=filtercascade.fileformats.HashAlgorithm.MURMUR3 +) +v2_murmur_inverted.initialize( + include=large_set | set([b"this", b"that"]), exclude=[b"other"] +) +store(v2_murmur_inverted, Path("test_v2_murmur_inverted_mlbf")) + + +v2_sha256_inverted = filtercascade.FilterCascade( + [], defaultHashAlg=filtercascade.fileformats.HashAlgorithm.SHA256 +) +v2_sha256_inverted.initialize( + include=large_set | set([b"this", b"that"]), exclude=[b"other"] +) +store(v2_sha256_inverted, Path("test_v2_sha256_inverted_mlbf")) diff --git a/third_party/rust/rust_cascade/test_data/requirements.txt b/third_party/rust/rust_cascade/test_data/requirements.txt new file mode 100644 index 0000000000..f97bd4328f --- /dev/null +++ b/third_party/rust/rust_cascade/test_data/requirements.txt @@ -0,0 +1 @@ +filtercascade >= 0.3.0 diff --git a/third_party/rust/rust_cascade/test_data/test_v1_murmur_mlbf b/third_party/rust/rust_cascade/test_data/test_v1_murmur_mlbf Binary files differnew file mode 100644 index 0000000000..34ced4b840 --- /dev/null +++ b/third_party/rust/rust_cascade/test_data/test_v1_murmur_mlbf diff --git a/third_party/rust/rust_cascade/test_data/test_v1_murmur_short_mlbf b/third_party/rust/rust_cascade/test_data/test_v1_murmur_short_mlbf Binary files differnew file mode 100644 index 0000000000..d0bb7071ab --- /dev/null +++ b/third_party/rust/rust_cascade/test_data/test_v1_murmur_short_mlbf diff --git a/third_party/rust/rust_cascade/test_data/test_v2_murmur_inverted_mlbf b/third_party/rust/rust_cascade/test_data/test_v2_murmur_inverted_mlbf Binary files differnew file mode 100644 index 0000000000..0c0aecd5f0 --- /dev/null +++ b/third_party/rust/rust_cascade/test_data/test_v2_murmur_inverted_mlbf diff --git a/third_party/rust/rust_cascade/test_data/test_v2_murmur_mlbf b/third_party/rust/rust_cascade/test_data/test_v2_murmur_mlbf Binary files differnew file mode 100644 index 0000000000..f994ac7183 --- /dev/null +++ b/third_party/rust/rust_cascade/test_data/test_v2_murmur_mlbf diff --git a/third_party/rust/rust_cascade/test_data/test_v2_sha256_inverted_mlbf b/third_party/rust/rust_cascade/test_data/test_v2_sha256_inverted_mlbf Binary files differnew file mode 100644 index 0000000000..3e1e7c169a --- /dev/null +++ b/third_party/rust/rust_cascade/test_data/test_v2_sha256_inverted_mlbf diff --git a/third_party/rust/rust_cascade/test_data/test_v2_sha256_mlbf b/third_party/rust/rust_cascade/test_data/test_v2_sha256_mlbf Binary files differnew file mode 100644 index 0000000000..e662a325d2 --- /dev/null +++ b/third_party/rust/rust_cascade/test_data/test_v2_sha256_mlbf diff --git a/third_party/rust/rust_cascade/test_data/test_v2_sha256_salt_mlbf b/third_party/rust/rust_cascade/test_data/test_v2_sha256_salt_mlbf Binary files differnew file mode 100644 index 0000000000..330c487faf --- /dev/null +++ b/third_party/rust/rust_cascade/test_data/test_v2_sha256_salt_mlbf |