Adding upstream version 86.0.1.upstream/86.0.1 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-28 14:29:10 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-28 14:29:10 +0000
commit: 2aa4a82499d4becd2284cdb482213d541b8804dd (patch)
tree: b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/memchr/src/tests/mod.rs
parent: Initial commit. (diff)
download: firefox-upstream.tar.xz
firefox-upstream.zip
1 files changed, 362 insertions, 0 deletions
diff --git a/third_party/rust/memchr/src/tests/mod.rs b/third_party/rust/memchr/src/tests/mod.rs
new file mode 100644
index 0000000000..82c1a248e9
--- /dev/null
+++ b/third_party/rust/memchr/src/tests/mod.rs
@@ -0,0 +1,362 @@
+use std::iter::repeat;
+
+mod iter;
+mod memchr;
+
+#[cfg(target_endian = "little")]
+#[test]
+fn byte_order() {
+    eprintln!("LITTLE ENDIAN");
+}
+
+#[cfg(target_endian = "big")]
+#[test]
+fn byte_order() {
+    eprintln!("BIG ENDIAN");
+}
+
+/// Create a sequence of tests that should be run by memchr implementations.
+fn memchr_tests() -> Vec<MemchrTest> {
+    let mut tests = Vec::new();
+    for statict in MEMCHR_TESTS {
+        assert!(!statict.corpus.contains("%"), "% is not allowed in corpora");
+        assert!(!statict.corpus.contains("#"), "# is not allowed in corpora");
+        assert!(!statict.needles.contains(&b'%'), "% is an invalid needle");
+        assert!(!statict.needles.contains(&b'#'), "# is an invalid needle");
+
+        let t = MemchrTest {
+            corpus: statict.corpus.to_string(),
+            needles: statict.needles.to_vec(),
+            positions: statict.positions.to_vec(),
+        };
+        tests.push(t.clone());
+        tests.extend(t.expand());
+    }
+    tests
+}
+
+/// A set of tests for memchr-like functions.
+///
+/// These tests mostly try to cover the short string cases. We cover the longer
+/// string cases via the benchmarks (which are tests themselves), via
+/// quickcheck tests and via automatic expansion of each test case (by
+/// increasing the corpus size). Finally, we cover different alignment cases
+/// in the tests by varying the starting point of the slice.
+const MEMCHR_TESTS: &[MemchrTestStatic] = &[
+    // one needle (applied to memchr + memchr2 + memchr3)
+    MemchrTestStatic { corpus: "a", needles: &[b'a'], positions: &[0] },
+    MemchrTestStatic { corpus: "aa", needles: &[b'a'], positions: &[0, 1] },
+    MemchrTestStatic {
+        corpus: "aaa",
+        needles: &[b'a'],
+        positions: &[0, 1, 2],
+    },
+    MemchrTestStatic { corpus: "", needles: &[b'a'], positions: &[] },
+    MemchrTestStatic { corpus: "z", needles: &[b'a'], positions: &[] },
+    MemchrTestStatic { corpus: "zz", needles: &[b'a'], positions: &[] },
+    MemchrTestStatic { corpus: "zza", needles: &[b'a'], positions: &[2] },
+    MemchrTestStatic { corpus: "zaza", needles: &[b'a'], positions: &[1, 3] },
+    MemchrTestStatic { corpus: "zzza", needles: &[b'a'], positions: &[3] },
+    MemchrTestStatic { corpus: "\x00a", needles: &[b'a'], positions: &[1] },
+    MemchrTestStatic { corpus: "\x00", needles: &[b'\x00'], positions: &[0] },
+    MemchrTestStatic {
+        corpus: "\x00\x00",
+        needles: &[b'\x00'],
+        positions: &[0, 1],
+    },
+    MemchrTestStatic {
+        corpus: "\x00a\x00",
+        needles: &[b'\x00'],
+        positions: &[0, 2],
+    },
+    MemchrTestStatic {
+        corpus: "zzzzzzzzzzzzzzzza",
+        needles: &[b'a'],
+        positions: &[16],
+    },
+    MemchrTestStatic {
+        corpus: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza",
+        needles: &[b'a'],
+        positions: &[32],
+    },
+    // two needles (applied to memchr2 + memchr3)
+    MemchrTestStatic {
+        corpus: "az",
+        needles: &[b'a', b'z'],
+        positions: &[0, 1],
+    },
+    MemchrTestStatic {
+        corpus: "az",
+        needles: &[b'a', b'z'],
+        positions: &[0, 1],
+    },
+    MemchrTestStatic { corpus: "az", needles: &[b'x', b'y'], positions: &[] },
+    MemchrTestStatic { corpus: "az", needles: &[b'a', b'y'], positions: &[0] },
+    MemchrTestStatic { corpus: "az", needles: &[b'x', b'z'], positions: &[1] },
+    MemchrTestStatic {
+        corpus: "yyyyaz",
+        needles: &[b'a', b'z'],
+        positions: &[4, 5],
+    },
+    MemchrTestStatic {
+        corpus: "yyyyaz",
+        needles: &[b'z', b'a'],
+        positions: &[4, 5],
+    },
+    // three needles (applied to memchr3)
+    MemchrTestStatic {
+        corpus: "xyz",
+        needles: &[b'x', b'y', b'z'],
+        positions: &[0, 1, 2],
+    },
+    MemchrTestStatic {
+        corpus: "zxy",
+        needles: &[b'x', b'y', b'z'],
+        positions: &[0, 1, 2],
+    },
+    MemchrTestStatic {
+        corpus: "zxy",
+        needles: &[b'x', b'a', b'z'],
+        positions: &[0, 1],
+    },
+    MemchrTestStatic {
+        corpus: "zxy",
+        needles: &[b't', b'a', b'z'],
+        positions: &[0],
+    },
+    MemchrTestStatic {
+        corpus: "yxz",
+        needles: &[b't', b'a', b'z'],
+        positions: &[2],
+    },
+];
+
+/// A description of a test on a memchr like function.
+#[derive(Clone, Debug)]
+struct MemchrTest {
+    /// The thing to search. We use `&str` instead of `&[u8]` because they
+    /// are nicer to write in tests, and we don't miss much since memchr
+    /// doesn't care about UTF-8.
+    ///
+    /// Corpora cannot contain either '%' or '#'. We use these bytes when
+    /// expanding test cases into many test cases, and we assume they are not
+    /// used. If they are used, `memchr_tests` will panic.
+    corpus: String,
+    /// The needles to search for. This is intended to be an "alternation" of
+    /// needles. The number of needles may cause this test to be skipped for
+    /// some memchr variants. For example, a test with 2 needles cannot be used
+    /// to test `memchr`, but can be used to test `memchr2` and `memchr3`.
+    /// However, a test with only 1 needle can be used to test all of `memchr`,
+    /// `memchr2` and `memchr3`. We achieve this by filling in the needles with
+    /// bytes that we never used in the corpus (such as '#').
+    needles: Vec<u8>,
+    /// The positions expected to match for all of the needles.
+    positions: Vec<usize>,
+}
+
+/// Like MemchrTest, but easier to define as a constant.
+#[derive(Clone, Debug)]
+struct MemchrTestStatic {
+    corpus: &'static str,
+    needles: &'static [u8],
+    positions: &'static [usize],
+}
+
+impl MemchrTest {
+    fn one<F: Fn(u8, &[u8]) -> Option<usize>>(&self, reverse: bool, f: F) {
+        let needles = match self.needles(1) {
+            None => return,
+            Some(needles) => needles,
+        };
+        // We test different alignments here. Since some implementations use
+        // AVX2, which can read 32 bytes at a time, we test at least that.
+        // Moreover, with loop unrolling, we sometimes process 64 (sse2) or 128
+        // (avx) bytes at a time, so we include that in our offsets as well.
+        //
+        // You might think this would cause most needles to not be found, but
+        // we actually expand our tests to include corpus sizes all the way up
+        // to >500 bytes, so we should exericse most branches.
+        for align in 0..130 {
+            let corpus = self.corpus(align);
+            assert_eq!(
+                self.positions(align, reverse).get(0).cloned(),
+                f(needles[0], corpus.as_bytes()),
+                "search for {:?} failed in: {:?} (len: {}, alignment: {})",
+                needles[0] as char,
+                corpus,
+                corpus.len(),
+                align
+            );
+        }
+    }
+
+    fn two<F: Fn(u8, u8, &[u8]) -> Option<usize>>(&self, reverse: bool, f: F) {
+        let needles = match self.needles(2) {
+            None => return,
+            Some(needles) => needles,
+        };
+        for align in 0..130 {
+            let corpus = self.corpus(align);
+            assert_eq!(
+                self.positions(align, reverse).get(0).cloned(),
+                f(needles[0], needles[1], corpus.as_bytes()),
+                "search for {:?}|{:?} failed in: {:?} \
+                 (len: {}, alignment: {})",
+                needles[0] as char,
+                needles[1] as char,
+                corpus,
+                corpus.len(),
+                align
+            );
+        }
+    }
+
+    fn three<F: Fn(u8, u8, u8, &[u8]) -> Option<usize>>(
+        &self,
+        reverse: bool,
+        f: F,
+    ) {
+        let needles = match self.needles(3) {
+            None => return,
+            Some(needles) => needles,
+        };
+        for align in 0..130 {
+            let corpus = self.corpus(align);
+            assert_eq!(
+                self.positions(align, reverse).get(0).cloned(),
+                f(needles[0], needles[1], needles[2], corpus.as_bytes()),
+                "search for {:?}|{:?}|{:?} failed in: {:?} \
+                 (len: {}, alignment: {})",
+                needles[0] as char,
+                needles[1] as char,
+                needles[2] as char,
+                corpus,
+                corpus.len(),
+                align
+            );
+        }
+    }
+
+    fn iter_one<'a, I, F>(&'a self, reverse: bool, f: F)
+    where
+        F: FnOnce(u8, &'a [u8]) -> I,
+        I: Iterator<Item = usize>,
+    {
+        if let Some(ns) = self.needles(1) {
+            self.iter(reverse, f(ns[0], self.corpus.as_bytes()));
+        }
+    }
+
+    fn iter_two<'a, I, F>(&'a self, reverse: bool, f: F)
+    where
+        F: FnOnce(u8, u8, &'a [u8]) -> I,
+        I: Iterator<Item = usize>,
+    {
+        if let Some(ns) = self.needles(2) {
+            self.iter(reverse, f(ns[0], ns[1], self.corpus.as_bytes()));
+        }
+    }
+
+    fn iter_three<'a, I, F>(&'a self, reverse: bool, f: F)
+    where
+        F: FnOnce(u8, u8, u8, &'a [u8]) -> I,
+        I: Iterator<Item = usize>,
+    {
+        if let Some(ns) = self.needles(3) {
+            self.iter(reverse, f(ns[0], ns[1], ns[2], self.corpus.as_bytes()));
+        }
+    }
+
+    /// Test that the positions yielded by the given iterator match the
+    /// positions in this test. If reverse is true, then reverse the positions
+    /// before comparing them.
+    fn iter<I: Iterator<Item = usize>>(&self, reverse: bool, it: I) {
+        assert_eq!(
+            self.positions(0, reverse),
+            it.collect::<Vec<usize>>(),
+            r"search for {:?} failed in: {:?}",
+            self.needles.iter().map(|&b| b as char).collect::<Vec<char>>(),
+            self.corpus
+        );
+    }
+
+    /// Expand this test into many variations of the same test.
+    ///
+    /// In particular, this will generate more tests with larger corpus sizes.
+    /// The expected positions are updated to maintain the integrity of the
+    /// test.
+    ///
+    /// This is important in testing a memchr implementation, because there are
+    /// often different cases depending on the length of the corpus.
+    ///
+    /// Note that we extend the corpus by adding `%` bytes, which we
+    /// don't otherwise use as a needle.
+    fn expand(&self) -> Vec<MemchrTest> {
+        let mut more = Vec::new();
+
+        // Add bytes to the start of the corpus.
+        for i in 1..515 {
+            let mut t = self.clone();
+            let mut new_corpus: String = repeat('%').take(i).collect();
+            new_corpus.push_str(&t.corpus);
+            t.corpus = new_corpus;
+            t.positions = t.positions.into_iter().map(|p| p + i).collect();
+            more.push(t);
+        }
+        // Add bytes to the end of the corpus.
+        for i in 1..515 {
+            let mut t = self.clone();
+            let padding: String = repeat('%').take(i).collect();
+            t.corpus.push_str(&padding);
+            more.push(t);
+        }
+
+        more
+    }
+
+    /// Return the corpus at the given alignment.
+    ///
+    /// If the alignment exceeds the length of the corpus, then this returns
+    /// an empty slice.
+    fn corpus(&self, align: usize) -> &str {
+        self.corpus.get(align..).unwrap_or("")
+    }
+
+    /// Return exactly `count` needles from this test. If this test has less
+    /// than `count` needles, then add `#` until the number of needles
+    /// matches `count`. If this test has more than `count` needles, then
+    /// return `None` (because there is no way to use this test data for a
+    /// search using fewer needles).
+    fn needles(&self, count: usize) -> Option<Vec<u8>> {
+        if self.needles.len() > count {
+            return None;
+        }
+
+        let mut needles = self.needles.to_vec();
+        for _ in needles.len()..count {
+            // we assume # is never used in tests.
+            needles.push(b'#');
+        }
+        Some(needles)
+    }
+
+    /// Return the positions in this test, reversed if `reverse` is true.
+    ///
+    /// If alignment is given, then all positions greater than or equal to that
+    /// alignment are offset by the alignment. Positions less than the
+    /// alignment are dropped.
+    fn positions(&self, align: usize, reverse: bool) -> Vec<usize> {
+        let positions = if reverse {
+            let mut positions = self.positions.to_vec();
+            positions.reverse();
+            positions
+        } else {
+            self.positions.to_vec()
+        };
+        positions
+            .into_iter()
+            .filter(|&p| p >= align)
+            .map(|p| p - align)
+            .collect()
+    }
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-28 14:29:10 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-28 14:29:10 +0000
commit	2aa4a82499d4becd2284cdb482213d541b8804dd (patch)
tree	b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/memchr/src/tests/mod.rs
parent	Initial commit. (diff)
download	firefox-upstream.tar.xz firefox-upstream.zip