summaryrefslogtreecommitdiffstats
path: root/vendor/aho-corasick/src/packed/tests.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/aho-corasick/src/packed/tests.rs')
-rw-r--r--vendor/aho-corasick/src/packed/tests.rs568
1 files changed, 568 insertions, 0 deletions
diff --git a/vendor/aho-corasick/src/packed/tests.rs b/vendor/aho-corasick/src/packed/tests.rs
new file mode 100644
index 000000000..91410cb02
--- /dev/null
+++ b/vendor/aho-corasick/src/packed/tests.rs
@@ -0,0 +1,568 @@
+use std::collections::HashMap;
+use std::usize;
+
+use crate::packed::{Config, MatchKind};
+use crate::Match;
+
+/// A description of a single test against a multi-pattern searcher.
+///
+/// A single test may not necessarily pass on every configuration of a
+/// searcher. The tests are categorized and grouped appropriately below.
+#[derive(Clone, Debug, Eq, PartialEq)]
+struct SearchTest {
+ /// The name of this test, for debugging.
+ name: &'static str,
+ /// The patterns to search for.
+ patterns: &'static [&'static str],
+ /// The text to search.
+ haystack: &'static str,
+ /// Each match is a triple of (pattern_index, start, end), where
+ /// pattern_index is an index into `patterns` and `start`/`end` are indices
+ /// into `haystack`.
+ matches: &'static [(usize, usize, usize)],
+}
+
+struct SearchTestOwned {
+ offset: usize,
+ name: String,
+ patterns: Vec<String>,
+ haystack: String,
+ matches: Vec<(usize, usize, usize)>,
+}
+
+impl SearchTest {
+ fn variations(&self) -> Vec<SearchTestOwned> {
+ let mut tests = vec![];
+ for i in 0..=260 {
+ tests.push(self.offset_prefix(i));
+ tests.push(self.offset_suffix(i));
+ tests.push(self.offset_both(i));
+ }
+ tests
+ }
+
+ fn offset_both(&self, off: usize) -> SearchTestOwned {
+ SearchTestOwned {
+ offset: off,
+ name: self.name.to_string(),
+ patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
+ haystack: format!(
+ "{}{}{}",
+ "Z".repeat(off),
+ self.haystack,
+ "Z".repeat(off)
+ ),
+ matches: self
+ .matches
+ .iter()
+ .map(|&(id, s, e)| (id, s + off, e + off))
+ .collect(),
+ }
+ }
+
+ fn offset_prefix(&self, off: usize) -> SearchTestOwned {
+ SearchTestOwned {
+ offset: off,
+ name: self.name.to_string(),
+ patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
+ haystack: format!("{}{}", "Z".repeat(off), self.haystack),
+ matches: self
+ .matches
+ .iter()
+ .map(|&(id, s, e)| (id, s + off, e + off))
+ .collect(),
+ }
+ }
+
+ fn offset_suffix(&self, off: usize) -> SearchTestOwned {
+ SearchTestOwned {
+ offset: off,
+ name: self.name.to_string(),
+ patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
+ haystack: format!("{}{}", self.haystack, "Z".repeat(off)),
+ matches: self.matches.to_vec(),
+ }
+ }
+
+ // fn to_owned(&self) -> SearchTestOwned {
+ // SearchTestOwned {
+ // name: self.name.to_string(),
+ // patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
+ // haystack: self.haystack.to_string(),
+ // matches: self.matches.iter().cloned().collect(),
+ // }
+ // }
+}
+
+/// Short-hand constructor for SearchTest. We use it a lot below.
+macro_rules! t {
+ ($name:ident, $patterns:expr, $haystack:expr, $matches:expr) => {
+ SearchTest {
+ name: stringify!($name),
+ patterns: $patterns,
+ haystack: $haystack,
+ matches: $matches,
+ }
+ };
+}
+
+/// A collection of test groups.
+type TestCollection = &'static [&'static [SearchTest]];
+
+// Define several collections corresponding to the different type of match
+// semantics supported. These collections have some overlap, but each
+// collection should have some tests that no other collection has.
+
+/// Tests for leftmost-first match semantics.
+const PACKED_LEFTMOST_FIRST: TestCollection =
+ &[BASICS, LEFTMOST, LEFTMOST_FIRST, REGRESSION, TEDDY];
+
+/// Tests for leftmost-longest match semantics.
+const PACKED_LEFTMOST_LONGEST: TestCollection =
+ &[BASICS, LEFTMOST, LEFTMOST_LONGEST, REGRESSION, TEDDY];
+
+// Now define the individual tests that make up the collections above.
+
+/// A collection of tests for the that should always be true regardless of
+/// match semantics. That is, all combinations of leftmost-{first, longest}
+/// should produce the same answer.
+const BASICS: &'static [SearchTest] = &[
+ t!(basic001, &["a"], "", &[]),
+ t!(basic010, &["a"], "a", &[(0, 0, 1)]),
+ t!(basic020, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]),
+ t!(basic030, &["a"], "aaa", &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]),
+ t!(basic040, &["a"], "aba", &[(0, 0, 1), (0, 2, 3)]),
+ t!(basic050, &["a"], "bba", &[(0, 2, 3)]),
+ t!(basic060, &["a"], "bbb", &[]),
+ t!(basic070, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]),
+ t!(basic100, &["aa"], "", &[]),
+ t!(basic110, &["aa"], "aa", &[(0, 0, 2)]),
+ t!(basic120, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]),
+ t!(basic130, &["aa"], "abbab", &[]),
+ t!(basic140, &["aa"], "abbabaa", &[(0, 5, 7)]),
+ t!(basic150, &["aaa"], "aaa", &[(0, 0, 3)]),
+ t!(basic200, &["abc"], "abc", &[(0, 0, 3)]),
+ t!(basic210, &["abc"], "zazabzabcz", &[(0, 6, 9)]),
+ t!(basic220, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]),
+ t!(basic300, &["a", "b"], "", &[]),
+ t!(basic310, &["a", "b"], "z", &[]),
+ t!(basic320, &["a", "b"], "b", &[(1, 0, 1)]),
+ t!(basic330, &["a", "b"], "a", &[(0, 0, 1)]),
+ t!(
+ basic340,
+ &["a", "b"],
+ "abba",
+ &[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),]
+ ),
+ t!(
+ basic350,
+ &["b", "a"],
+ "abba",
+ &[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),]
+ ),
+ t!(basic360, &["abc", "bc"], "xbc", &[(1, 1, 3),]),
+ t!(basic400, &["foo", "bar"], "", &[]),
+ t!(basic410, &["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6),]),
+ t!(basic420, &["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6),]),
+ t!(basic430, &["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6),]),
+ t!(basic440, &["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6),]),
+ t!(basic450, &["foo", "bar"], "bafofoo", &[(0, 4, 7),]),
+ t!(basic460, &["bar", "foo"], "bafofoo", &[(1, 4, 7),]),
+ t!(basic470, &["foo", "bar"], "fobabar", &[(1, 4, 7),]),
+ t!(basic480, &["bar", "foo"], "fobabar", &[(0, 4, 7),]),
+ t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7),]),
+ t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10),]),
+ t!(
+ basic720,
+ &["yabcdef", "bcdeyabc", "abcdezghi"],
+ "yabcdezghi",
+ &[(2, 1, 10),]
+ ),
+ t!(basic810, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]),
+ t!(basic820, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]),
+ t!(basic830, &["abc", "bc"], "zazabcz", &[(0, 3, 6),]),
+ t!(
+ basic840,
+ &["ab", "ba"],
+ "abababa",
+ &[(0, 0, 2), (0, 2, 4), (0, 4, 6),]
+ ),
+ t!(basic850, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9),]),
+];
+
+/// Tests for leftmost match semantics. These should pass for both
+/// leftmost-first and leftmost-longest match kinds. Stated differently, among
+/// ambiguous matches, the longest match and the match that appeared first when
+/// constructing the automaton should always be the same.
+const LEFTMOST: &'static [SearchTest] = &[
+ t!(leftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
+ t!(leftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]),
+ t!(leftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
+ t!(leftmost032, &["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]),
+ t!(leftmost300, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]),
+ t!(leftmost310, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]),
+ t!(leftmost320, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]),
+ t!(leftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]),
+ t!(leftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]),
+ t!(leftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]),
+ t!(
+ leftmost360,
+ &["abcdefghi", "hz", "abcdefgh"],
+ "abcdefghz",
+ &[(2, 0, 8),]
+ ),
+ t!(
+ leftmost370,
+ &["abcdefghi", "cde", "hz", "abcdefgh"],
+ "abcdefghz",
+ &[(3, 0, 8),]
+ ),
+ t!(
+ leftmost380,
+ &["abcdefghi", "hz", "abcdefgh", "a"],
+ "abcdefghz",
+ &[(2, 0, 8),]
+ ),
+ t!(
+ leftmost390,
+ &["b", "abcdefghi", "hz", "abcdefgh"],
+ "abcdefghz",
+ &[(3, 0, 8),]
+ ),
+ t!(
+ leftmost400,
+ &["h", "abcdefghi", "hz", "abcdefgh"],
+ "abcdefghz",
+ &[(3, 0, 8),]
+ ),
+ t!(
+ leftmost410,
+ &["z", "abcdefghi", "hz", "abcdefgh"],
+ "abcdefghz",
+ &[(3, 0, 8), (0, 8, 9),]
+ ),
+];
+
+/// Tests for non-overlapping leftmost-first match semantics. These tests
+/// should generally be specific to leftmost-first, which means they should
+/// generally fail under leftmost-longest semantics.
+const LEFTMOST_FIRST: &'static [SearchTest] = &[
+ t!(leftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
+ t!(leftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]),
+ t!(leftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
+ t!(leftfirst040, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]),
+ t!(leftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]),
+ t!(leftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
+ t!(leftfirst300, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]),
+ t!(
+ leftfirst310,
+ &["abcd", "b", "bce", "ce"],
+ "abce",
+ &[(1, 1, 2), (3, 2, 4),]
+ ),
+ t!(
+ leftfirst320,
+ &["a", "abcdefghi", "hz", "abcdefgh"],
+ "abcdefghz",
+ &[(0, 0, 1), (2, 7, 9),]
+ ),
+ t!(leftfirst330, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]),
+ t!(
+ leftfirst340,
+ &["abcdef", "x", "x", "x", "x", "x", "x", "abcde"],
+ "abcdef",
+ &[(0, 0, 6)]
+ ),
+];
+
+/// Tests for non-overlapping leftmost-longest match semantics. These tests
+/// should generally be specific to leftmost-longest, which means they should
+/// generally fail under leftmost-first semantics.
+const LEFTMOST_LONGEST: &'static [SearchTest] = &[
+ t!(leftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]),
+ t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]),
+ t!(leftlong040, &["a", "ab"], "a", &[(0, 0, 1)]),
+ t!(leftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]),
+ t!(leftlong060, &["ab", "a"], "a", &[(1, 0, 1)]),
+ t!(leftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]),
+ t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]),
+ t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
+ t!(leftlong300, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]),
+ t!(
+ leftlong310,
+ &["a", "abcdefghi", "hz", "abcdefgh"],
+ "abcdefghz",
+ &[(3, 0, 8),]
+ ),
+ t!(leftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]),
+ t!(leftlong330, &["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4),]),
+ t!(leftlong340, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]),
+];
+
+/// Regression tests that are applied to all combinations.
+///
+/// If regression tests are needed for specific match semantics, then add them
+/// to the appropriate group above.
+const REGRESSION: &'static [SearchTest] = &[
+ t!(regression010, &["inf", "ind"], "infind", &[(0, 0, 3), (1, 3, 6),]),
+ t!(regression020, &["ind", "inf"], "infind", &[(1, 0, 3), (0, 3, 6),]),
+ t!(
+ regression030,
+ &["libcore/", "libstd/"],
+ "libcore/char/methods.rs",
+ &[(0, 0, 8),]
+ ),
+ t!(
+ regression040,
+ &["libstd/", "libcore/"],
+ "libcore/char/methods.rs",
+ &[(1, 0, 8),]
+ ),
+ t!(
+ regression050,
+ &["\x00\x00\x01", "\x00\x00\x00"],
+ "\x00\x00\x00",
+ &[(1, 0, 3),]
+ ),
+ t!(
+ regression060,
+ &["\x00\x00\x00", "\x00\x00\x01"],
+ "\x00\x00\x00",
+ &[(0, 0, 3),]
+ ),
+];
+
+const TEDDY: &'static [SearchTest] = &[
+ t!(
+ teddy010,
+ &["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"],
+ "abcdefghijk",
+ &[
+ (0, 0, 1),
+ (1, 1, 2),
+ (2, 2, 3),
+ (3, 3, 4),
+ (4, 4, 5),
+ (5, 5, 6),
+ (6, 6, 7),
+ (7, 7, 8),
+ (8, 8, 9),
+ (9, 9, 10),
+ (10, 10, 11)
+ ]
+ ),
+ t!(
+ teddy020,
+ &["ab", "bc", "cd", "de", "ef", "fg", "gh", "hi", "ij", "jk", "kl"],
+ "abcdefghijk",
+ &[(0, 0, 2), (2, 2, 4), (4, 4, 6), (6, 6, 8), (8, 8, 10),]
+ ),
+ t!(
+ teddy030,
+ &["abc"],
+ "abcdefghijklmnopqrstuvwxyzabcdefghijk",
+ &[(0, 0, 3), (0, 26, 29)]
+ ),
+];
+
+// Now define a test for each combination of things above that we want to run.
+// Since there are a few different combinations for each collection of tests,
+// we define a couple of macros to avoid repetition drudgery. The testconfig
+// macro constructs the automaton from a given match kind, and runs the search
+// tests one-by-one over the given collection. The `with` parameter allows one
+// to configure the config with additional parameters. The testcombo macro
+// invokes testconfig in precisely this way: it sets up several tests where
+// each one turns a different knob on Config.
+
+macro_rules! testconfig {
+ ($name:ident, $collection:expr, $with:expr) => {
+ #[test]
+ fn $name() {
+ run_search_tests($collection, |test| {
+ let mut config = Config::new();
+ $with(&mut config);
+ config
+ .builder()
+ .extend(test.patterns.iter().map(|p| p.as_bytes()))
+ .build()
+ .unwrap()
+ .find_iter(&test.haystack)
+ .collect()
+ });
+ }
+ };
+}
+
+#[cfg(target_arch = "x86_64")]
+testconfig!(
+ search_default_leftmost_first,
+ PACKED_LEFTMOST_FIRST,
+ |_: &mut Config| {}
+);
+
+#[cfg(target_arch = "x86_64")]
+testconfig!(
+ search_default_leftmost_longest,
+ PACKED_LEFTMOST_LONGEST,
+ |c: &mut Config| {
+ c.match_kind(MatchKind::LeftmostLongest);
+ }
+);
+
+#[cfg(target_arch = "x86_64")]
+testconfig!(
+ search_teddy_leftmost_first,
+ PACKED_LEFTMOST_FIRST,
+ |c: &mut Config| {
+ c.force_teddy(true);
+ }
+);
+
+#[cfg(target_arch = "x86_64")]
+testconfig!(
+ search_teddy_leftmost_longest,
+ PACKED_LEFTMOST_LONGEST,
+ |c: &mut Config| {
+ c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
+ }
+);
+
+#[cfg(target_arch = "x86_64")]
+testconfig!(
+ search_teddy_ssse3_leftmost_first,
+ PACKED_LEFTMOST_FIRST,
+ |c: &mut Config| {
+ c.force_teddy(true);
+ if is_x86_feature_detected!("ssse3") {
+ c.force_avx(Some(false));
+ }
+ }
+);
+
+#[cfg(target_arch = "x86_64")]
+testconfig!(
+ search_teddy_ssse3_leftmost_longest,
+ PACKED_LEFTMOST_LONGEST,
+ |c: &mut Config| {
+ c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
+ if is_x86_feature_detected!("ssse3") {
+ c.force_avx(Some(false));
+ }
+ }
+);
+
+#[cfg(target_arch = "x86_64")]
+testconfig!(
+ search_teddy_avx2_leftmost_first,
+ PACKED_LEFTMOST_FIRST,
+ |c: &mut Config| {
+ c.force_teddy(true);
+ if is_x86_feature_detected!("avx2") {
+ c.force_avx(Some(true));
+ }
+ }
+);
+
+#[cfg(target_arch = "x86_64")]
+testconfig!(
+ search_teddy_avx2_leftmost_longest,
+ PACKED_LEFTMOST_LONGEST,
+ |c: &mut Config| {
+ c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
+ if is_x86_feature_detected!("avx2") {
+ c.force_avx(Some(true));
+ }
+ }
+);
+
+#[cfg(target_arch = "x86_64")]
+testconfig!(
+ search_teddy_fat_leftmost_first,
+ PACKED_LEFTMOST_FIRST,
+ |c: &mut Config| {
+ c.force_teddy(true);
+ if is_x86_feature_detected!("avx2") {
+ c.force_teddy_fat(Some(true));
+ }
+ }
+);
+
+#[cfg(target_arch = "x86_64")]
+testconfig!(
+ search_teddy_fat_leftmost_longest,
+ PACKED_LEFTMOST_LONGEST,
+ |c: &mut Config| {
+ c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
+ if is_x86_feature_detected!("avx2") {
+ c.force_teddy_fat(Some(true));
+ }
+ }
+);
+
+testconfig!(
+ search_rabinkarp_leftmost_first,
+ PACKED_LEFTMOST_FIRST,
+ |c: &mut Config| {
+ c.force_rabin_karp(true);
+ }
+);
+
+testconfig!(
+ search_rabinkarp_leftmost_longest,
+ PACKED_LEFTMOST_LONGEST,
+ |c: &mut Config| {
+ c.force_rabin_karp(true).match_kind(MatchKind::LeftmostLongest);
+ }
+);
+
+#[test]
+fn search_tests_have_unique_names() {
+ let assert = |constname, tests: &[SearchTest]| {
+ let mut seen = HashMap::new(); // map from test name to position
+ for (i, test) in tests.iter().enumerate() {
+ if !seen.contains_key(test.name) {
+ seen.insert(test.name, i);
+ } else {
+ let last = seen[test.name];
+ panic!(
+ "{} tests have duplicate names at positions {} and {}",
+ constname, last, i
+ );
+ }
+ }
+ };
+ assert("BASICS", BASICS);
+ assert("LEFTMOST", LEFTMOST);
+ assert("LEFTMOST_FIRST", LEFTMOST_FIRST);
+ assert("LEFTMOST_LONGEST", LEFTMOST_LONGEST);
+ assert("REGRESSION", REGRESSION);
+ assert("TEDDY", TEDDY);
+}
+
+fn run_search_tests<F: FnMut(&SearchTestOwned) -> Vec<Match>>(
+ which: TestCollection,
+ mut f: F,
+) {
+ let get_match_triples =
+ |matches: Vec<Match>| -> Vec<(usize, usize, usize)> {
+ matches
+ .into_iter()
+ .map(|m| (m.pattern(), m.start(), m.end()))
+ .collect()
+ };
+ for &tests in which {
+ for spec in tests {
+ for test in spec.variations() {
+ assert_eq!(
+ test.matches,
+ get_match_triples(f(&test)).as_slice(),
+ "test: {}, patterns: {:?}, haystack: {:?}, offset: {:?}",
+ test.name,
+ test.patterns,
+ test.haystack,
+ test.offset,
+ );
+ }
+ }
+ }
+}