Adding upstream version 115.7.0esr.upstream/115.7.0esr

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 19:33:14 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 19:33:14 +0000
commit: 36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree: 105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/rust_cascade/src
parent: Initial commit. (diff)
download: firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
1 files changed, 1129 insertions, 0 deletions
diff --git a/third_party/rust/rust_cascade/src/lib.rs b/third_party/rust/rust_cascade/src/lib.rs
new file mode 100644
index 0000000000..eef8e1f97d
--- /dev/null
+++ b/third_party/rust/rust_cascade/src/lib.rs
@@ -0,0 +1,1129 @@
+//! # rust-cascade
+//!
+//! A library for creating and querying the cascading bloom filters described by
+//! Larisch, Choffnes, Levin, Maggs, Mislove, and Wilson in
+//! "CRLite: A Scalable System for Pushing All TLS Revocations to All Browsers"
+//! <https://www.ieee-security.org/TC/SP2017/papers/567.pdf>
+
+extern crate byteorder;
+extern crate murmurhash3;
+extern crate rand;
+extern crate sha2;
+
+use byteorder::{ByteOrder, LittleEndian, ReadBytesExt};
+use murmurhash3::murmurhash3_x86_32;
+#[cfg(feature = "builder")]
+use rand::rngs::OsRng;
+#[cfg(feature = "builder")]
+use rand::RngCore;
+use sha2::{Digest, Sha256};
+use std::convert::{TryFrom, TryInto};
+use std::fmt;
+use std::io::{ErrorKind, Read};
+use std::mem::size_of;
+
+#[derive(Debug)]
+pub enum CascadeError {
+    LongSalt,
+    TooManyLayers,
+    Collision,
+    UnknownHashFunction,
+    CapacityViolation(&'static str),
+    Parse(&'static str),
+}
+
+impl fmt::Display for CascadeError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            CascadeError::LongSalt => {
+                write!(f, "Cannot serialize a filter with a salt of length >= 256.")
+            }
+            CascadeError::TooManyLayers => {
+                write!(f, "Cannot serialize a filter with >= 255 layers.")
+            }
+            CascadeError::Collision => {
+                write!(f, "Collision between included and excluded sets.")
+            }
+            CascadeError::UnknownHashFunction => {
+                write!(f, "Unknown hash function.")
+            }
+            CascadeError::CapacityViolation(function) => {
+                write!(f, "Unexpected call to {}", function)
+            }
+            CascadeError::Parse(reason) => {
+                write!(f, "Cannot parse cascade: {}", reason)
+            }
+        }
+    }
+}
+
+/// A Bloom filter representing a specific layer in a multi-layer cascading Bloom filter.
+/// The same hash function is used for all layers, so it is not encoded here.
+struct Bloom {
+    /// How many hash functions this filter uses
+    n_hash_funcs: u32,
+    /// The bit length of the filter
+    size: u32,
+    /// The data of the filter
+    data: Vec<u8>,
+}
+
+#[repr(u8)]
+#[derive(Copy, Clone, PartialEq)]
+/// These enumerations need to match the python filter-cascade project:
+/// <https://github.com/mozilla/filter-cascade/blob/v0.3.0/filtercascade/fileformats.py>
+pub enum HashAlgorithm {
+    MurmurHash3 = 1,
+    Sha256l32 = 2, // low 32 bits of sha256
+    Sha256 = 3,    // all 256 bits of sha256
+}
+
+impl fmt::Display for HashAlgorithm {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", *self as u8)
+    }
+}
+
+impl TryFrom<u8> for HashAlgorithm {
+    type Error = CascadeError;
+    fn try_from(value: u8) -> Result<HashAlgorithm, CascadeError> {
+        match value {
+            // Naturally, these need to match the enum declaration
+            1 => Ok(Self::MurmurHash3),
+            2 => Ok(Self::Sha256l32),
+            3 => Ok(Self::Sha256),
+            _ => Err(CascadeError::UnknownHashFunction),
+        }
+    }
+}
+
+/// A CascadeIndexGenerator provides one-time access to a table of pseudorandom functions H_ij
+/// in which each function is of the form
+///     H(s: &[u8], r: u32) -> usize
+/// and for which 0 <= H(s,r) < r for all s, r.
+/// The pseudorandom functions share a common key, represented as a octet string, and the table can
+/// be constructed from this key alone. The functions are pseudorandom with respect to s, but not
+/// r. For a uniformly random key/table, fixed r, and arbitrary strings m0 and m1,
+///      H_ij(m0, r) is computationally indistinguishable from H_ij(m1,r)
+/// for all i,j.
+///
+/// A call to next_layer() increments i and resets j.
+/// A call to next_index(s, r) increments j, and outputs some value H_ij(s) with 0 <= H_ij(s) < r.
+
+#[derive(Debug)]
+enum CascadeIndexGenerator {
+    MurmurHash3 {
+        key: Vec<u8>,
+        counter: u32,
+        depth: u8,
+    },
+    Sha256l32 {
+        key: Vec<u8>,
+        counter: u32,
+        depth: u8,
+    },
+    Sha256Ctr {
+        key: Vec<u8>,
+        counter: u32,
+        state: [u8; 32],
+        state_available: u8,
+    },
+}
+
+impl PartialEq for CascadeIndexGenerator {
+    fn eq(&self, other: &Self) -> bool {
+        match (self, other) {
+            (
+                CascadeIndexGenerator::MurmurHash3 { key: ref a, .. },
+                CascadeIndexGenerator::MurmurHash3 { key: ref b, .. },
+            )
+            | (
+                CascadeIndexGenerator::Sha256l32 { key: ref a, .. },
+                CascadeIndexGenerator::Sha256l32 { key: ref b, .. },
+            )
+            | (
+                CascadeIndexGenerator::Sha256Ctr { key: ref a, .. },
+                CascadeIndexGenerator::Sha256Ctr { key: ref b, .. },
+            ) => a == b,
+            _ => false,
+        }
+    }
+}
+
+impl CascadeIndexGenerator {
+    fn new(hash_alg: HashAlgorithm, key: Vec<u8>) -> Self {
+        match hash_alg {
+            HashAlgorithm::MurmurHash3 => Self::MurmurHash3 {
+                key,
+                counter: 0,
+                depth: 1,
+            },
+            HashAlgorithm::Sha256l32 => Self::Sha256l32 {
+                key,
+                counter: 0,
+                depth: 1,
+            },
+            HashAlgorithm::Sha256 => Self::Sha256Ctr {
+                key,
+                counter: 0,
+                state: [0; 32],
+                state_available: 0,
+            },
+        }
+    }
+
+    fn next_layer(&mut self) {
+        match self {
+            Self::MurmurHash3 {
+                ref mut counter,
+                ref mut depth,
+                ..
+            }
+            | Self::Sha256l32 {
+                ref mut counter,
+                ref mut depth,
+                ..
+            } => {
+                *counter = 0;
+                *depth += 1;
+            }
+            Self::Sha256Ctr { .. } => (),
+        }
+    }
+
+    fn next_index(&mut self, salt: &[u8], range: u32) -> usize {
+        let index = match self {
+            Self::MurmurHash3 {
+                key,
+                ref mut counter,
+                depth,
+            } => {
+                let hash_seed = (*counter << 16) + *depth as u32;
+                *counter += 1;
+                murmurhash3_x86_32(key, hash_seed)
+            }
+
+            Self::Sha256l32 {
+                key,
+                ref mut counter,
+                depth,
+            } => {
+                let mut hasher = Sha256::new();
+                hasher.update(salt);
+                hasher.update(counter.to_le_bytes());
+                hasher.update(depth.to_le_bytes());
+                hasher.update(&key);
+                *counter += 1;
+                u32::from_le_bytes(
+                    hasher.finalize()[0..4]
+                        .try_into()
+                        .expect("sha256 should have given enough bytes"),
+                )
+            }
+
+            Self::Sha256Ctr {
+                key,
+                ref mut counter,
+                ref mut state,
+                ref mut state_available,
+            } => {
+                // |bytes_needed| is the minimum number of bytes needed to represent a value in [0, range).
+                let bytes_needed = ((range.next_power_of_two().trailing_zeros() + 7) / 8) as usize;
+                let mut index_arr = [0u8; 4];
+                for byte in index_arr.iter_mut().take(bytes_needed) {
+                    if *state_available == 0 {
+                        let mut hasher = Sha256::new();
+                        hasher.update(counter.to_le_bytes());
+                        hasher.update(salt);
+                        hasher.update(&key);
+                        hasher.finalize_into(state.into());
+                        *state_available = state.len() as u8;
+                        *counter += 1;
+                    }
+                    *byte = state[state.len() - *state_available as usize];
+                    *state_available -= 1;
+                }
+                LittleEndian::read_u32(&index_arr)
+            }
+        };
+        (index % range) as usize
+    }
+}
+
+impl Bloom {
+    /// `new_crlite_bloom` creates an empty bloom filter for a layer of a cascade with the
+    /// parameters specified in [LCL+17, Section III.C].
+    ///
+    /// # Arguments
+    /// * `include_capacity` - the number of elements that will be encoded at the new layer.
+    /// * `exclude_capacity` - the number of elements in the complement of the encoded set.
+    /// * `top_layer`        - whether this is the top layer of the filter.
+    #[cfg(feature = "builder")]
+    pub fn new_crlite_bloom(
+        include_capacity: usize,
+        exclude_capacity: usize,
+        top_layer: bool,
+    ) -> Self {
+        assert!(include_capacity != 0 && exclude_capacity != 0);
+
+        let r = include_capacity as f64;
+        let s = exclude_capacity as f64;
+
+        // The desired false positive rate for the top layer is
+        //   p = r/(sqrt(2)*s).
+        // With this setting, the number of false positives (which will need to be
+        // encoded at the second layer) is expected to be a factor of sqrt(2)
+        // smaller than the number of elements encoded at the top layer.
+        //
+        // At layer i > 1 we try to ensure that the number of elements to be
+        // encoded at layer i+1 is half the number of elements encoded at
+        // layer i. So we take p = 1/2.
+        let log2_fp_rate = match top_layer {
+            true => (r / s).log2() - 0.5f64,
+            false => -1f64,
+        };
+
+        // the number of hash functions (k) and the size of the bloom filter (m) are given in
+        // [LCL+17] as k = log2(1/p)  and  m = r log2(1/p) / ln(2).
+        //
+        // If this formula gives a value of m < 256, we take m=256 instead. This results in very
+        // slightly sub-optimal size, but gives us the added benefit of doing less hashing.
+        let n_hash_funcs = (-log2_fp_rate).round() as u32;
+        let size = match (r * (-log2_fp_rate) / (f64::ln(2f64))).round() as u32 {
+            size if size >= 256 => size,
+            _ => 256,
+        };
+
+        Bloom {
+            n_hash_funcs,
+            size,
+            data: vec![0u8; ((size + 7) / 8) as usize],
+        }
+    }
+
+    /// `read` attempts to decode the Bloom filter represented by the bytes in the given reader.
+    ///
+    /// # Arguments
+    /// * `reader` - The encoded representation of this Bloom filter. May be empty. May include
+    /// additional data describing further Bloom filters.
+    /// The format of an encoded Bloom filter is:
+    /// [1 byte] - the hash algorithm to use in the filter
+    /// [4 little endian bytes] - the length in bits of the filter
+    /// [4 little endian bytes] - the number of hash functions to use in the filter
+    /// [1 byte] - which layer in the cascade this filter is
+    /// [variable length bytes] - the filter itself (must be of minimal length)
+    pub fn read<R: Read>(
+        reader: &mut R,
+    ) -> Result<Option<(Bloom, usize, HashAlgorithm)>, CascadeError> {
+        let hash_algorithm_val = match reader.read_u8() {
+            Ok(val) => val,
+            // If reader is at EOF, there is no bloom filter.
+            Err(e) if e.kind() == ErrorKind::UnexpectedEof => return Ok(None),
+            Err(_) => return Err(CascadeError::Parse("read error")),
+        };
+        let hash_algorithm = HashAlgorithm::try_from(hash_algorithm_val)?;
+
+        let size = reader
+            .read_u32::<byteorder::LittleEndian>()
+            .or(Err(CascadeError::Parse("truncated at layer size")))?;
+        let n_hash_funcs = reader
+            .read_u32::<byteorder::LittleEndian>()
+            .or(Err(CascadeError::Parse("truncated at layer hash count")))?;
+        let layer = reader
+            .read_u8()
+            .or(Err(CascadeError::Parse("truncated at layer number")))?;
+
+        let byte_count = ((size + 7) / 8) as usize;
+        let mut data = vec![0; byte_count];
+        reader
+            .read_exact(&mut data)
+            .or(Err(CascadeError::Parse("truncated at layer data")))?;
+        let bloom = Bloom {
+            n_hash_funcs,
+            size,
+            data,
+        };
+        Ok(Some((bloom, layer as usize, hash_algorithm)))
+    }
+
+    fn has(&self, generator: &mut CascadeIndexGenerator, salt: &[u8]) -> bool {
+        for _ in 0..self.n_hash_funcs {
+            let bit_index = generator.next_index(salt, self.size);
+            assert!(bit_index < self.size as usize);
+            let byte_index = bit_index / 8;
+            let mask = 1 << (bit_index % 8);
+            if self.data[byte_index] & mask == 0 {
+                return false;
+            }
+        }
+        true
+    }
+
+    #[cfg(feature = "builder")]
+    fn insert(&mut self, generator: &mut CascadeIndexGenerator, salt: &[u8]) {
+        for _ in 0..self.n_hash_funcs {
+            let bit_index = generator.next_index(salt, self.size);
+            let byte_index = bit_index / 8;
+            let mask = 1 << (bit_index % 8);
+            self.data[byte_index] |= mask;
+        }
+    }
+
+    pub fn approximate_size_of(&self) -> usize {
+        size_of::<Bloom>() + self.data.len()
+    }
+}
+
+impl fmt::Display for Bloom {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "n_hash_funcs={} size={}", self.n_hash_funcs, self.size)
+    }
+}
+
+/// A multi-layer cascading Bloom filter.
+pub struct Cascade {
+    /// The Bloom filter for this layer in the cascade
+    filters: Vec<Bloom>,
+    /// The salt in use, if any
+    salt: Vec<u8>,
+    /// The hash algorithm / index generating function to use
+    hash_algorithm: HashAlgorithm,
+    /// Whether the logic should be inverted
+    inverted: bool,
+}
+
+impl Cascade {
+    /// from_bytes attempts to decode and return a multi-layer cascading Bloom filter.
+    ///
+    /// # Arguments
+    /// `bytes` - The encoded representation of the Bloom filters in this cascade. Starts with 2
+    /// little endian bytes indicating the version. The current version is 2. The Python
+    /// filter-cascade project defines the formats, see
+    /// <https://github.com/mozilla/filter-cascade/blob/v0.3.0/filtercascade/fileformats.py>
+    ///
+    /// May be of length 0, in which case `None` is returned.
+    pub fn from_bytes(bytes: Vec<u8>) -> Result<Option<Self>, CascadeError> {
+        if bytes.is_empty() {
+            return Ok(None);
+        }
+        let mut reader = bytes.as_slice();
+        let version = reader
+            .read_u16::<byteorder::LittleEndian>()
+            .or(Err(CascadeError::Parse("truncated at version")))?;
+
+        let mut filters = vec![];
+        let mut salt = vec![];
+        let mut top_hash_alg = None;
+        let mut inverted = false;
+
+        if version > 2 {
+            return Err(CascadeError::Parse("unknown version"));
+        }
+
+        if version == 2 {
+            let inverted_val = reader
+                .read_u8()
+                .or(Err(CascadeError::Parse("truncated at inverted")))?;
+            if inverted_val > 1 {
+                return Err(CascadeError::Parse("invalid value for inverted"));
+            }
+            inverted = 0 != inverted_val;
+            let salt_len: usize = reader
+                .read_u8()
+                .or(Err(CascadeError::Parse("truncated at salt length")))?
+                .into();
+            if salt_len >= 256 {
+                return Err(CascadeError::Parse("salt too long"));
+            }
+            if salt_len > 0 {
+                let mut salt_bytes = vec![0; salt_len];
+                reader
+                    .read_exact(&mut salt_bytes)
+                    .or(Err(CascadeError::Parse("truncated at salt")))?;
+                salt = salt_bytes;
+            }
+        }
+
+        while let Some((filter, layer_number, layer_hash_alg)) = Bloom::read(&mut reader)? {
+            filters.push(filter);
+
+            if layer_number != filters.len() {
+                return Err(CascadeError::Parse("irregular layer numbering"));
+            }
+
+            if *top_hash_alg.get_or_insert(layer_hash_alg) != layer_hash_alg {
+                return Err(CascadeError::Parse("Inconsistent hash algorithms"));
+            }
+        }
+
+        if filters.is_empty() {
+            return Err(CascadeError::Parse("missing filters"));
+        }
+
+        let hash_algorithm = top_hash_alg.ok_or(CascadeError::Parse("missing hash algorithm"))?;
+
+        Ok(Some(Cascade {
+            filters,
+            salt,
+            hash_algorithm,
+            inverted,
+        }))
+    }
+
+    /// to_bytes encodes a cascade in the version 2 format.
+    pub fn to_bytes(&self) -> Result<Vec<u8>, CascadeError> {
+        if self.salt.len() >= 256 {
+            return Err(CascadeError::LongSalt);
+        }
+        if self.filters.len() >= 255 {
+            return Err(CascadeError::TooManyLayers);
+        }
+        let mut out = vec![];
+        let version: u16 = 2;
+        let inverted: u8 = self.inverted.into();
+        let salt_len: u8 = self.salt.len() as u8;
+        let hash_alg: u8 = self.hash_algorithm as u8;
+        out.extend_from_slice(&version.to_le_bytes());
+        out.push(inverted);
+        out.push(salt_len);
+        out.extend_from_slice(&self.salt);
+        for (layer, bloom) in self.filters.iter().enumerate() {
+            out.push(hash_alg);
+            out.extend_from_slice(&bloom.size.to_le_bytes());
+            out.extend_from_slice(&bloom.n_hash_funcs.to_le_bytes());
+            out.push((1 + layer) as u8); // 1-indexed
+            out.extend_from_slice(&bloom.data);
+        }
+        Ok(out)
+    }
+
+    /// has determines if the given sequence of bytes is in the cascade.
+    ///
+    /// # Arguments
+    /// `entry` - The bytes to query
+    pub fn has(&self, entry: Vec<u8>) -> bool {
+        // Query filters 0..self.filters.len() until we get a non-membership result.
+        // If this occurs at an even index filter, the element *is not* included.
+        // ... at an odd-index filter, the element *is* included.
+        let mut generator = CascadeIndexGenerator::new(self.hash_algorithm, entry);
+        let mut rv = false;
+        for filter in &self.filters {
+            if filter.has(&mut generator, &self.salt) {
+                rv = !rv;
+                generator.next_layer();
+            } else {
+                break;
+            }
+        }
+        if self.inverted {
+            rv = !rv;
+        }
+        rv
+    }
+
+    pub fn invert(&mut self) {
+        self.inverted = !self.inverted;
+    }
+
+    /// Determine the approximate amount of memory in bytes used by this
+    /// Cascade. Because this implementation does not integrate with the
+    /// allocator, it can't get an accurate measurement of how much memory it
+    /// uses. However, it can make a reasonable guess, assuming the sizes of
+    /// the bloom filters are large enough to dominate the overall allocated
+    /// size.
+    pub fn approximate_size_of(&self) -> usize {
+        size_of::<Cascade>()
+            + self
+                .filters
+                .iter()
+                .map(|x| x.approximate_size_of())
+                .sum::<usize>()
+            + self.salt.len()
+    }
+}
+
+impl fmt::Display for Cascade {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        writeln!(
+            f,
+            "salt={:?} inverted={} hash_algorithm={}",
+            self.salt, self.inverted, self.hash_algorithm,
+        )?;
+        for filter in &self.filters {
+            writeln!(f, "\t[{}]", filter)?;
+        }
+        Ok(())
+    }
+}
+
+/// A CascadeBuilder creates a Cascade with layers given by `Bloom::new_crlite_bloom`.
+///
+/// A builder is initialized using [`CascadeBuilder::default`] or [`CascadeBuilder::new`]. Prefer `default`. The `new` constructor
+/// allows the user to specify sensitive internal details such as the hash function and the domain
+/// separation parameter.
+///
+/// Both constructors take `include_capacity` and an `exclude_capacity` parameters. The
+/// `include_capacity` is the number of elements that will be encoded in the Cascade. The
+/// `exclude_capacity` is size of the complement of the encoded set.
+///
+/// The encoded set is specified through calls to [`CascadeBuilder::include`]. Its complement is specified through
+/// calls to [`CascadeBuilder::exclude`]. The cascade is built with a call to [`CascadeBuilder::finalize`].
+///
+/// The builder will track of the number of calls to `include` and `exclude`.
+/// The caller is responsible for making *exactly* `include_capacity` calls to `include`
+/// followed by *exactly* `exclude_capacity` calls to `exclude`.
+/// Calling `exclude` before all `include` calls have been made will result in a panic!().
+/// Calling `finalize` before all `exclude` calls have been made will result in a panic!().
+///
+#[cfg(feature = "builder")]
+pub struct CascadeBuilder {
+    filters: Vec<Bloom>,
+    salt: Vec<u8>,
+    hash_algorithm: HashAlgorithm,
+    to_include: Vec<CascadeIndexGenerator>,
+    to_exclude: Vec<CascadeIndexGenerator>,
+    status: BuildStatus,
+}
+
+#[cfg(feature = "builder")]
+impl CascadeBuilder {
+    pub fn default(include_capacity: usize, exclude_capacity: usize) -> Self {
+        let mut salt = vec![0u8; 16];
+        OsRng.fill_bytes(&mut salt);
+        CascadeBuilder::new(
+            HashAlgorithm::Sha256,
+            salt,
+            include_capacity,
+            exclude_capacity,
+        )
+    }
+
+    pub fn new(
+        hash_algorithm: HashAlgorithm,
+        salt: Vec<u8>,
+        include_capacity: usize,
+        exclude_capacity: usize,
+    ) -> Self {
+        CascadeBuilder {
+            filters: vec![Bloom::new_crlite_bloom(
+                include_capacity,
+                exclude_capacity,
+                true,
+            )],
+            salt,
+            to_include: vec![],
+            to_exclude: vec![],
+            hash_algorithm,
+            status: BuildStatus(include_capacity, exclude_capacity),
+        }
+    }
+
+    pub fn include(&mut self, item: Vec<u8>) -> Result<(), CascadeError> {
+        match self.status {
+            BuildStatus(ref mut cap, _) if *cap > 0 => *cap -= 1,
+            _ => return Err(CascadeError::CapacityViolation("include")),
+        }
+        let mut generator = CascadeIndexGenerator::new(self.hash_algorithm, item);
+        self.filters[0].insert(&mut generator, &self.salt);
+        self.to_include.push(generator);
+
+        Ok(())
+    }
+
+    pub fn exclude(&mut self, item: Vec<u8>) -> Result<(), CascadeError> {
+        match self.status {
+            BuildStatus(0, ref mut cap) if *cap > 0 => *cap -= 1,
+            _ => return Err(CascadeError::CapacityViolation("exclude")),
+        }
+        let mut generator = CascadeIndexGenerator::new(self.hash_algorithm, item);
+        if self.filters[0].has(&mut generator, &self.salt) {
+            self.to_exclude.push(generator);
+        }
+        Ok(())
+    }
+
+    /// `exclude_threaded` is like `exclude` but it stores false positives in a caller-owned
+    /// `ExcludeSet`. This allows the caller to exclude items in parallel.
+    pub fn exclude_threaded(&self, exclude_set: &mut ExcludeSet, item: Vec<u8>) {
+        exclude_set.size += 1;
+        let mut generator = CascadeIndexGenerator::new(self.hash_algorithm, item);
+        if self.filters[0].has(&mut generator, &self.salt) {
+            exclude_set.set.push(generator);
+        }
+    }
+
+    /// `collect_exclude_set` merges an `ExcludeSet` into the internal storage of the CascadeBuilder.
+    pub fn collect_exclude_set(
+        &mut self,
+        exclude_set: &mut ExcludeSet,
+    ) -> Result<(), CascadeError> {
+        match self.status {
+            BuildStatus(0, ref mut cap) if *cap >= exclude_set.size => *cap -= exclude_set.size,
+            _ => return Err(CascadeError::CapacityViolation("exclude")),
+        }
+        self.to_exclude.append(&mut exclude_set.set);
+
+        Ok(())
+    }
+
+    fn push_layer(&mut self) -> Result<(), CascadeError> {
+        // At even layers we encode elements of to_include. At odd layers we encode elements of
+        // to_exclude. In both cases, we track false positives by filtering the complement of the
+        // encoded set through the newly produced bloom filter.
+        let at_even_layer = self.filters.len() % 2 == 0;
+        let (to_encode, to_filter) = match at_even_layer {
+            true => (&mut self.to_include, &mut self.to_exclude),
+            false => (&mut self.to_exclude, &mut self.to_include),
+        };
+
+        // split ownership of `salt` away from `to_encode` and `to_filter`
+        // We need an immutable reference to salt during `to_encode.iter_mut()`
+        let mut bloom = Bloom::new_crlite_bloom(to_encode.len(), to_filter.len(), false);
+
+        let salt = self.salt.as_slice();
+
+        to_encode.iter_mut().for_each(|x| {
+            x.next_layer();
+            bloom.insert(x, salt)
+        });
+
+        let mut delta = to_filter.len();
+        to_filter.retain_mut(|x| {
+            x.next_layer();
+            bloom.has(x, salt)
+        });
+        delta -= to_filter.len();
+
+        if delta == 0 {
+            // Check for collisions between the |to_encode| and |to_filter| sets.
+            // The implementation of PartialEq for CascadeIndexGenerator will successfully
+            // identify cases where the user called |include(item)| and |exclude(item)| for the
+            // same item. It will not identify collisions in the underlying hash function.
+            for x in to_encode.iter_mut() {
+                if to_filter.contains(x) {
+                    return Err(CascadeError::Collision);
+                }
+            }
+        }
+
+        self.filters.push(bloom);
+        Ok(())
+    }
+
+    pub fn finalize(mut self) -> Result<Box<Cascade>, CascadeError> {
+        match self.status {
+            BuildStatus(0, 0) => (),
+            _ => return Err(CascadeError::CapacityViolation("finalize")),
+        }
+
+        loop {
+            if self.to_exclude.is_empty() {
+                break;
+            }
+            self.push_layer()?;
+
+            if self.to_include.is_empty() {
+                break;
+            }
+            self.push_layer()?;
+        }
+
+        Ok(Box::new(Cascade {
+            filters: self.filters,
+            salt: self.salt,
+            hash_algorithm: self.hash_algorithm,
+            inverted: false,
+        }))
+    }
+}
+
+/// BuildStatus is used to ensure that the `include`, `exclude`, and `finalize` calls to
+/// CascadeBuilder are made in the right order. The (a,b) state indicates that the
+/// CascadeBuilder is waiting for `a` calls to `include` and `b` calls to `exclude`.
+#[cfg(feature = "builder")]
+struct BuildStatus(usize, usize);
+
+/// CascadeBuilder::exclude takes `&mut self` so that it can count exclusions and push items to
+/// self.to_exclude. The bulk of the work it does, however, can be done with an immutable reference
+/// to the top level bloom filter. An `ExcludeSet` is used by `CascadeBuilder::exclude_threaded` to
+/// track the changes to a `CascadeBuilder` that would be made with a call to
+/// `CascadeBuilder::exclude`.
+#[cfg(feature = "builder")]
+#[derive(Default)]
+pub struct ExcludeSet {
+    size: usize,
+    set: Vec<CascadeIndexGenerator>,
+}
+
+#[cfg(test)]
+mod tests {
+    use Bloom;
+    use Cascade;
+    #[cfg(feature = "builder")]
+    use CascadeBuilder;
+    #[cfg(feature = "builder")]
+    use CascadeError;
+    use CascadeIndexGenerator;
+    #[cfg(feature = "builder")]
+    use ExcludeSet;
+    use HashAlgorithm;
+
+    #[test]
+    fn bloom_v1_test_from_bytes() {
+        let src: Vec<u8> = vec![
+            0x01, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x41, 0x00,
+        ];
+        let mut reader = src.as_slice();
+
+        match Bloom::read(&mut reader) {
+            Ok(Some((bloom, 1, HashAlgorithm::MurmurHash3))) => {
+                assert!(bloom.has(
+                    &mut CascadeIndexGenerator::new(HashAlgorithm::MurmurHash3, b"this".to_vec()),
+                    &vec![]
+                ));
+                assert!(bloom.has(
+                    &mut CascadeIndexGenerator::new(HashAlgorithm::MurmurHash3, b"that".to_vec()),
+                    &vec![]
+                ));
+                assert!(!bloom.has(
+                    &mut CascadeIndexGenerator::new(HashAlgorithm::MurmurHash3, b"other".to_vec()),
+                    &vec![]
+                ));
+            }
+            Ok(_) => panic!("Parsing failed"),
+            Err(_) => panic!("Parsing failed"),
+        };
+        assert!(reader.is_empty());
+
+        let short: Vec<u8> = vec![
+            0x01, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x41,
+        ];
+        assert!(Bloom::read(&mut short.as_slice()).is_err());
+
+        let empty: Vec<u8> = Vec::new();
+        let mut reader = empty.as_slice();
+        match Bloom::read(&mut reader) {
+            Ok(should_be_none) => assert!(should_be_none.is_none()),
+            Err(_) => panic!("Parsing failed"),
+        };
+    }
+
+    #[test]
+    fn bloom_v3_unsupported() {
+        let src: Vec<u8> = vec![0x03, 0x01, 0x00];
+        assert!(Bloom::read(&mut src.as_slice()).is_err());
+    }
+
+    #[test]
+    fn cascade_v1_murmur_from_file_bytes_test() {
+        let v = include_bytes!("../test_data/test_v1_murmur_mlbf").to_vec();
+        let cascade = Cascade::from_bytes(v)
+            .expect("parsing Cascade should succeed")
+            .expect("Cascade should be Some");
+        // Key format is SHA256(issuer SPKI) + serial number
+        let key_for_revoked_cert_1 = vec![
+            0x2e, 0xb2, 0xd5, 0xa8, 0x60, 0xfe, 0x50, 0xe9, 0xc2, 0x42, 0x36, 0x85, 0x52, 0x98,
+            0x01, 0x50, 0xe4, 0x5d, 0xb5, 0x32, 0x1a, 0x5b, 0x00, 0x5e, 0x26, 0xd6, 0x76, 0x25,
+            0x3a, 0x40, 0x9b, 0xf5, 0x06, 0x2d, 0xf5, 0x68, 0xa0, 0x51, 0x31, 0x08, 0x20, 0xd7,
+            0xec, 0x43, 0x27, 0xe1, 0xba, 0xfd,
+        ];
+        assert!(cascade.has(key_for_revoked_cert_1));
+        let key_for_revoked_cert_2 = vec![
+            0xf1, 0x1c, 0x3d, 0xd0, 0x48, 0xf7, 0x4e, 0xdb, 0x7c, 0x45, 0x19, 0x2b, 0x83, 0xe5,
+            0x98, 0x0d, 0x2f, 0x67, 0xec, 0x84, 0xb4, 0xdd, 0xb9, 0x39, 0x6e, 0x33, 0xff, 0x51,
+            0x73, 0xed, 0x69, 0x8f, 0x00, 0xd2, 0xe8, 0xf6, 0xaa, 0x80, 0x48, 0x1c, 0xd4,
+        ];
+        assert!(cascade.has(key_for_revoked_cert_2));
+        let key_for_valid_cert = vec![
+            0x99, 0xfc, 0x9d, 0x40, 0xf1, 0xad, 0xb1, 0x63, 0x65, 0x61, 0xa6, 0x1d, 0x68, 0x3d,
+            0x9e, 0xa6, 0xb4, 0x60, 0xc5, 0x7d, 0x0c, 0x75, 0xea, 0x00, 0xc3, 0x41, 0xb9, 0xdf,
+            0xb9, 0x0b, 0x5f, 0x39, 0x0b, 0x77, 0x75, 0xf7, 0xaf, 0x9a, 0xe5, 0x42, 0x65, 0xc9,
+            0xcd, 0x32, 0x57, 0x10, 0x77, 0x8e,
+        ];
+        assert!(!cascade.has(key_for_valid_cert));
+
+        assert_eq!(cascade.approximate_size_of(), 15408);
+
+        let v = include_bytes!("../test_data/test_v1_murmur_short_mlbf").to_vec();
+        assert!(Cascade::from_bytes(v).is_err());
+    }
+
+    #[test]
+    fn cascade_v2_sha256l32_from_file_bytes_test() {
+        let v = include_bytes!("../test_data/test_v2_sha256l32_mlbf").to_vec();
+        let cascade = Cascade::from_bytes(v)
+            .expect("parsing Cascade should succeed")
+            .expect("Cascade should be Some");
+
+        assert!(cascade.salt.len() == 0);
+        assert!(cascade.inverted == false);
+        assert!(cascade.has(b"this".to_vec()) == true);
+        assert!(cascade.has(b"that".to_vec()) == true);
+        assert!(cascade.has(b"other".to_vec()) == false);
+        assert_eq!(cascade.approximate_size_of(), 1001);
+    }
+
+    #[test]
+    fn cascade_v2_sha256l32_with_salt_from_file_bytes_test() {
+        let v = include_bytes!("../test_data/test_v2_sha256l32_salt_mlbf").to_vec();
+        let cascade = Cascade::from_bytes(v)
+            .expect("parsing Cascade should succeed")
+            .expect("Cascade should be Some");
+
+        assert!(cascade.salt == b"nacl".to_vec());
+        assert!(cascade.inverted == false);
+        assert!(cascade.has(b"this".to_vec()) == true);
+        assert!(cascade.has(b"that".to_vec()) == true);
+        assert!(cascade.has(b"other".to_vec()) == false);
+        assert_eq!(cascade.approximate_size_of(), 1001);
+    }
+
+    #[test]
+    fn cascade_v2_murmur_from_file_bytes_test() {
+        let v = include_bytes!("../test_data/test_v2_murmur_mlbf").to_vec();
+        let cascade = Cascade::from_bytes(v)
+            .expect("parsing Cascade should succeed")
+            .expect("Cascade should be Some");
+
+        assert!(cascade.salt.len() == 0);
+        assert!(cascade.inverted == false);
+        assert!(cascade.has(b"this".to_vec()) == true);
+        assert!(cascade.has(b"that".to_vec()) == true);
+        assert!(cascade.has(b"other".to_vec()) == false);
+        assert_eq!(cascade.approximate_size_of(), 992);
+    }
+
+    #[test]
+    fn cascade_v2_murmur_inverted_from_file_bytes_test() {
+        let v = include_bytes!("../test_data/test_v2_murmur_inverted_mlbf").to_vec();
+        let cascade = Cascade::from_bytes(v)
+            .expect("parsing Cascade should succeed")
+            .expect("Cascade should be Some");
+
+        assert!(cascade.salt.len() == 0);
+        assert!(cascade.inverted == true);
+        assert!(cascade.has(b"this".to_vec()) == true);
+        assert!(cascade.has(b"that".to_vec()) == true);
+        assert!(cascade.has(b"other".to_vec()) == false);
+        assert_eq!(cascade.approximate_size_of(), 1058);
+    }
+
+    #[test]
+    fn cascade_v2_sha256l32_inverted_from_file_bytes_test() {
+        let v = include_bytes!("../test_data/test_v2_sha256l32_inverted_mlbf").to_vec();
+        let cascade = Cascade::from_bytes(v)
+            .expect("parsing Cascade should succeed")
+            .expect("Cascade should be Some");
+
+        assert!(cascade.salt.len() == 0);
+        assert!(cascade.inverted == true);
+        assert!(cascade.has(b"this".to_vec()) == true);
+        assert!(cascade.has(b"that".to_vec()) == true);
+        assert!(cascade.has(b"other".to_vec()) == false);
+        assert_eq!(cascade.approximate_size_of(), 1061);
+    }
+
+    #[test]
+    fn cascade_v2_sha256ctr_from_file_bytes_test() {
+        let v = include_bytes!("../test_data/test_v2_sha256ctr_salt_mlbf").to_vec();
+        let cascade = Cascade::from_bytes(v)
+            .expect("parsing Cascade should succeed")
+            .expect("Cascade should be Some");
+
+        assert!(cascade.salt == b"nacl".to_vec());
+        assert!(cascade.inverted == false);
+        assert!(cascade.has(b"this".to_vec()) == true);
+        assert!(cascade.has(b"that".to_vec()) == true);
+        assert!(cascade.has(b"other".to_vec()) == false);
+        assert_eq!(cascade.approximate_size_of(), 1070);
+    }
+
+    #[test]
+    fn cascade_empty() {
+        let cascade = Cascade::from_bytes(Vec::new()).expect("parsing Cascade should succeed");
+        assert!(cascade.is_none());
+    }
+
+    #[test]
+    fn cascade_test_from_bytes() {
+        let unknown_version: Vec<u8> = vec![0xff, 0xff, 0x00, 0x00];
+        match Cascade::from_bytes(unknown_version) {
+            Ok(_) => panic!("Cascade::from_bytes allows unknown version."),
+            Err(_) => (),
+        }
+
+        let first_layer_is_zero: Vec<u8> = vec![
+            0x01, 0x00, 0x01, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+        ];
+        match Cascade::from_bytes(first_layer_is_zero) {
+            Ok(_) => panic!("Cascade::from_bytes allows zero indexed layers."),
+            Err(_) => (),
+        }
+
+        let second_layer_is_three: Vec<u8> = vec![
+            0x01, 0x00, 0x01, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01,
+            0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00,
+        ];
+        match Cascade::from_bytes(second_layer_is_three) {
+            Ok(_) => panic!("Cascade::from_bytes allows non-sequential layers."),
+            Err(_) => (),
+        }
+    }
+
+    #[test]
+    #[cfg(feature = "builder")]
+    fn cascade_builder_test_collision() {
+        let mut builder = CascadeBuilder::default(1, 1);
+        builder.include(b"collision!".to_vec()).ok();
+        builder.exclude(b"collision!".to_vec()).ok();
+        assert!(matches!(builder.finalize(), Err(CascadeError::Collision)));
+    }
+
+    #[test]
+    #[cfg(feature = "builder")]
+    fn cascade_builder_test_exclude_too_few() {
+        let mut builder = CascadeBuilder::default(1, 1);
+        builder.include(b"1".to_vec()).ok();
+        assert!(matches!(
+            builder.finalize(),
+            Err(CascadeError::CapacityViolation(_))
+        ));
+    }
+
+    #[test]
+    #[cfg(feature = "builder")]
+    fn cascade_builder_test_include_too_few() {
+        let mut builder = CascadeBuilder::default(1, 1);
+        assert!(matches!(
+            builder.exclude(b"1".to_vec()),
+            Err(CascadeError::CapacityViolation(_))
+        ));
+    }
+
+    #[test]
+    #[cfg(feature = "builder")]
+    fn cascade_builder_test_include_too_many() {
+        let mut builder = CascadeBuilder::default(1, 1);
+        builder.include(b"1".to_vec()).ok();
+        assert!(matches!(
+            builder.include(b"2".to_vec()),
+            Err(CascadeError::CapacityViolation(_))
+        ));
+    }
+
+    #[test]
+    #[cfg(feature = "builder")]
+    fn cascade_builder_test_exclude_too_many() {
+        let mut builder = CascadeBuilder::default(1, 1);
+        builder.include(b"1".to_vec()).ok();
+        builder.exclude(b"2".to_vec()).ok();
+        assert!(matches!(
+            builder.exclude(b"3".to_vec()),
+            Err(CascadeError::CapacityViolation(_))
+        ));
+    }
+
+    #[test]
+    #[cfg(feature = "builder")]
+    fn cascade_builder_test_exclude_threaded_no_collect() {
+        let mut builder = CascadeBuilder::default(1, 3);
+        let mut exclude_set = ExcludeSet::default();
+        builder.include(b"1".to_vec()).ok();
+        builder.exclude_threaded(&mut exclude_set, b"2".to_vec());
+        builder.exclude_threaded(&mut exclude_set, b"3".to_vec());
+        builder.exclude_threaded(&mut exclude_set, b"4".to_vec());
+        assert!(matches!(
+            builder.finalize(),
+            Err(CascadeError::CapacityViolation(_))
+        ));
+    }
+
+    #[test]
+    #[cfg(feature = "builder")]
+    fn cascade_builder_test_exclude_threaded_too_many() {
+        let mut builder = CascadeBuilder::default(1, 3);
+        let mut exclude_set = ExcludeSet::default();
+        builder.include(b"1".to_vec()).ok();
+        builder.exclude_threaded(&mut exclude_set, b"2".to_vec());
+        builder.exclude_threaded(&mut exclude_set, b"3".to_vec());
+        builder.exclude_threaded(&mut exclude_set, b"4".to_vec());
+        builder.exclude_threaded(&mut exclude_set, b"5".to_vec());
+        assert!(matches!(
+            builder.collect_exclude_set(&mut exclude_set),
+            Err(CascadeError::CapacityViolation(_))
+        ));
+    }
+
+    #[test]
+    #[cfg(feature = "builder")]
+    fn cascade_builder_test_exclude_threaded() {
+        let mut builder = CascadeBuilder::default(1, 3);
+        let mut exclude_set = ExcludeSet::default();
+        builder.include(b"1".to_vec()).ok();
+        builder.exclude_threaded(&mut exclude_set, b"2".to_vec());
+        builder.exclude_threaded(&mut exclude_set, b"3".to_vec());
+        builder.exclude_threaded(&mut exclude_set, b"4".to_vec());
+        builder.collect_exclude_set(&mut exclude_set).ok();
+        builder.finalize().ok();
+    }
+
+    #[cfg(feature = "builder")]
+    fn cascade_builder_test_generate(hash_alg: HashAlgorithm, inverted: bool) {
+        let total = 10_000_usize;
+        let included = 100_usize;
+
+        let salt = vec![0u8; 16];
+        let mut builder =
+            CascadeBuilder::new(hash_alg, salt, included, (total - included) as usize);
+        for i in 0..included {
+            builder.include(i.to_le_bytes().to_vec()).ok();
+        }
+        for i in included..total {
+            builder.exclude(i.to_le_bytes().to_vec()).ok();
+        }
+        let mut cascade = builder.finalize().unwrap();
+
+        if inverted {
+            cascade.invert()
+        }
+
+        // Ensure we can serialize / deserialize
+        let cascade_bytes = cascade.to_bytes().expect("failed to serialize cascade");
+
+        let cascade = Cascade::from_bytes(cascade_bytes)
+            .expect("failed to deserialize cascade")
+            .expect("cascade should not be None here");
+
+        // Ensure each query gives the correct result
+        for i in 0..included {
+            assert!(cascade.has(i.to_le_bytes().to_vec()) == true ^ inverted)
+        }
+        for i in included..total {
+            assert!(cascade.has(i.to_le_bytes().to_vec()) == false ^ inverted)
+        }
+    }
+
+    #[test]
+    #[cfg(feature = "builder")]
+    fn cascade_builder_test_generate_murmurhash3_inverted() {
+        cascade_builder_test_generate(HashAlgorithm::MurmurHash3, true);
+    }
+
+    #[test]
+    #[cfg(feature = "builder")]
+    fn cascade_builder_test_generate_murmurhash3() {
+        cascade_builder_test_generate(HashAlgorithm::MurmurHash3, false);
+    }
+
+    #[test]
+    #[cfg(feature = "builder")]
+    fn cascade_builder_test_generate_sha256l32() {
+        cascade_builder_test_generate(HashAlgorithm::Sha256l32, false);
+    }
+
+    #[test]
+    #[cfg(feature = "builder")]
+    fn cascade_builder_test_generate_sha256() {
+        cascade_builder_test_generate(HashAlgorithm::Sha256, false);
+    }
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 19:33:14 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 19:33:14 +0000
commit	36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree	105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/rust_cascade/src
parent	Initial commit. (diff)
download	firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip