diff options
Diffstat (limited to 'vendor/aho-corasick/src/packed/tests.rs')
-rw-r--r-- | vendor/aho-corasick/src/packed/tests.rs | 568 |
1 files changed, 568 insertions, 0 deletions
diff --git a/vendor/aho-corasick/src/packed/tests.rs b/vendor/aho-corasick/src/packed/tests.rs new file mode 100644 index 000000000..91410cb02 --- /dev/null +++ b/vendor/aho-corasick/src/packed/tests.rs @@ -0,0 +1,568 @@ +use std::collections::HashMap; +use std::usize; + +use crate::packed::{Config, MatchKind}; +use crate::Match; + +/// A description of a single test against a multi-pattern searcher. +/// +/// A single test may not necessarily pass on every configuration of a +/// searcher. The tests are categorized and grouped appropriately below. +#[derive(Clone, Debug, Eq, PartialEq)] +struct SearchTest { + /// The name of this test, for debugging. + name: &'static str, + /// The patterns to search for. + patterns: &'static [&'static str], + /// The text to search. + haystack: &'static str, + /// Each match is a triple of (pattern_index, start, end), where + /// pattern_index is an index into `patterns` and `start`/`end` are indices + /// into `haystack`. + matches: &'static [(usize, usize, usize)], +} + +struct SearchTestOwned { + offset: usize, + name: String, + patterns: Vec<String>, + haystack: String, + matches: Vec<(usize, usize, usize)>, +} + +impl SearchTest { + fn variations(&self) -> Vec<SearchTestOwned> { + let mut tests = vec![]; + for i in 0..=260 { + tests.push(self.offset_prefix(i)); + tests.push(self.offset_suffix(i)); + tests.push(self.offset_both(i)); + } + tests + } + + fn offset_both(&self, off: usize) -> SearchTestOwned { + SearchTestOwned { + offset: off, + name: self.name.to_string(), + patterns: self.patterns.iter().map(|s| s.to_string()).collect(), + haystack: format!( + "{}{}{}", + "Z".repeat(off), + self.haystack, + "Z".repeat(off) + ), + matches: self + .matches + .iter() + .map(|&(id, s, e)| (id, s + off, e + off)) + .collect(), + } + } + + fn offset_prefix(&self, off: usize) -> SearchTestOwned { + SearchTestOwned { + offset: off, + name: self.name.to_string(), + patterns: self.patterns.iter().map(|s| s.to_string()).collect(), + haystack: format!("{}{}", "Z".repeat(off), self.haystack), + matches: self + .matches + .iter() + .map(|&(id, s, e)| (id, s + off, e + off)) + .collect(), + } + } + + fn offset_suffix(&self, off: usize) -> SearchTestOwned { + SearchTestOwned { + offset: off, + name: self.name.to_string(), + patterns: self.patterns.iter().map(|s| s.to_string()).collect(), + haystack: format!("{}{}", self.haystack, "Z".repeat(off)), + matches: self.matches.to_vec(), + } + } + + // fn to_owned(&self) -> SearchTestOwned { + // SearchTestOwned { + // name: self.name.to_string(), + // patterns: self.patterns.iter().map(|s| s.to_string()).collect(), + // haystack: self.haystack.to_string(), + // matches: self.matches.iter().cloned().collect(), + // } + // } +} + +/// Short-hand constructor for SearchTest. We use it a lot below. +macro_rules! t { + ($name:ident, $patterns:expr, $haystack:expr, $matches:expr) => { + SearchTest { + name: stringify!($name), + patterns: $patterns, + haystack: $haystack, + matches: $matches, + } + }; +} + +/// A collection of test groups. +type TestCollection = &'static [&'static [SearchTest]]; + +// Define several collections corresponding to the different type of match +// semantics supported. These collections have some overlap, but each +// collection should have some tests that no other collection has. + +/// Tests for leftmost-first match semantics. +const PACKED_LEFTMOST_FIRST: TestCollection = + &[BASICS, LEFTMOST, LEFTMOST_FIRST, REGRESSION, TEDDY]; + +/// Tests for leftmost-longest match semantics. +const PACKED_LEFTMOST_LONGEST: TestCollection = + &[BASICS, LEFTMOST, LEFTMOST_LONGEST, REGRESSION, TEDDY]; + +// Now define the individual tests that make up the collections above. + +/// A collection of tests for the that should always be true regardless of +/// match semantics. That is, all combinations of leftmost-{first, longest} +/// should produce the same answer. +const BASICS: &'static [SearchTest] = &[ + t!(basic001, &["a"], "", &[]), + t!(basic010, &["a"], "a", &[(0, 0, 1)]), + t!(basic020, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]), + t!(basic030, &["a"], "aaa", &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]), + t!(basic040, &["a"], "aba", &[(0, 0, 1), (0, 2, 3)]), + t!(basic050, &["a"], "bba", &[(0, 2, 3)]), + t!(basic060, &["a"], "bbb", &[]), + t!(basic070, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]), + t!(basic100, &["aa"], "", &[]), + t!(basic110, &["aa"], "aa", &[(0, 0, 2)]), + t!(basic120, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]), + t!(basic130, &["aa"], "abbab", &[]), + t!(basic140, &["aa"], "abbabaa", &[(0, 5, 7)]), + t!(basic150, &["aaa"], "aaa", &[(0, 0, 3)]), + t!(basic200, &["abc"], "abc", &[(0, 0, 3)]), + t!(basic210, &["abc"], "zazabzabcz", &[(0, 6, 9)]), + t!(basic220, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]), + t!(basic300, &["a", "b"], "", &[]), + t!(basic310, &["a", "b"], "z", &[]), + t!(basic320, &["a", "b"], "b", &[(1, 0, 1)]), + t!(basic330, &["a", "b"], "a", &[(0, 0, 1)]), + t!( + basic340, + &["a", "b"], + "abba", + &[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),] + ), + t!( + basic350, + &["b", "a"], + "abba", + &[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),] + ), + t!(basic360, &["abc", "bc"], "xbc", &[(1, 1, 3),]), + t!(basic400, &["foo", "bar"], "", &[]), + t!(basic410, &["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6),]), + t!(basic420, &["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6),]), + t!(basic430, &["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6),]), + t!(basic440, &["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6),]), + t!(basic450, &["foo", "bar"], "bafofoo", &[(0, 4, 7),]), + t!(basic460, &["bar", "foo"], "bafofoo", &[(1, 4, 7),]), + t!(basic470, &["foo", "bar"], "fobabar", &[(1, 4, 7),]), + t!(basic480, &["bar", "foo"], "fobabar", &[(0, 4, 7),]), + t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7),]), + t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10),]), + t!( + basic720, + &["yabcdef", "bcdeyabc", "abcdezghi"], + "yabcdezghi", + &[(2, 1, 10),] + ), + t!(basic810, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]), + t!(basic820, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]), + t!(basic830, &["abc", "bc"], "zazabcz", &[(0, 3, 6),]), + t!( + basic840, + &["ab", "ba"], + "abababa", + &[(0, 0, 2), (0, 2, 4), (0, 4, 6),] + ), + t!(basic850, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9),]), +]; + +/// Tests for leftmost match semantics. These should pass for both +/// leftmost-first and leftmost-longest match kinds. Stated differently, among +/// ambiguous matches, the longest match and the match that appeared first when +/// constructing the automaton should always be the same. +const LEFTMOST: &'static [SearchTest] = &[ + t!(leftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]), + t!(leftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]), + t!(leftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]), + t!(leftmost032, &["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]), + t!(leftmost300, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]), + t!(leftmost310, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]), + t!(leftmost320, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]), + t!(leftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]), + t!(leftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]), + t!(leftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]), + t!( + leftmost360, + &["abcdefghi", "hz", "abcdefgh"], + "abcdefghz", + &[(2, 0, 8),] + ), + t!( + leftmost370, + &["abcdefghi", "cde", "hz", "abcdefgh"], + "abcdefghz", + &[(3, 0, 8),] + ), + t!( + leftmost380, + &["abcdefghi", "hz", "abcdefgh", "a"], + "abcdefghz", + &[(2, 0, 8),] + ), + t!( + leftmost390, + &["b", "abcdefghi", "hz", "abcdefgh"], + "abcdefghz", + &[(3, 0, 8),] + ), + t!( + leftmost400, + &["h", "abcdefghi", "hz", "abcdefgh"], + "abcdefghz", + &[(3, 0, 8),] + ), + t!( + leftmost410, + &["z", "abcdefghi", "hz", "abcdefgh"], + "abcdefghz", + &[(3, 0, 8), (0, 8, 9),] + ), +]; + +/// Tests for non-overlapping leftmost-first match semantics. These tests +/// should generally be specific to leftmost-first, which means they should +/// generally fail under leftmost-longest semantics. +const LEFTMOST_FIRST: &'static [SearchTest] = &[ + t!(leftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]), + t!(leftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]), + t!(leftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]), + t!(leftfirst040, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]), + t!(leftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]), + t!(leftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]), + t!(leftfirst300, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]), + t!( + leftfirst310, + &["abcd", "b", "bce", "ce"], + "abce", + &[(1, 1, 2), (3, 2, 4),] + ), + t!( + leftfirst320, + &["a", "abcdefghi", "hz", "abcdefgh"], + "abcdefghz", + &[(0, 0, 1), (2, 7, 9),] + ), + t!(leftfirst330, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]), + t!( + leftfirst340, + &["abcdef", "x", "x", "x", "x", "x", "x", "abcde"], + "abcdef", + &[(0, 0, 6)] + ), +]; + +/// Tests for non-overlapping leftmost-longest match semantics. These tests +/// should generally be specific to leftmost-longest, which means they should +/// generally fail under leftmost-first semantics. +const LEFTMOST_LONGEST: &'static [SearchTest] = &[ + t!(leftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]), + t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]), + t!(leftlong040, &["a", "ab"], "a", &[(0, 0, 1)]), + t!(leftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]), + t!(leftlong060, &["ab", "a"], "a", &[(1, 0, 1)]), + t!(leftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]), + t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]), + t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]), + t!(leftlong300, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]), + t!( + leftlong310, + &["a", "abcdefghi", "hz", "abcdefgh"], + "abcdefghz", + &[(3, 0, 8),] + ), + t!(leftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]), + t!(leftlong330, &["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4),]), + t!(leftlong340, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]), +]; + +/// Regression tests that are applied to all combinations. +/// +/// If regression tests are needed for specific match semantics, then add them +/// to the appropriate group above. +const REGRESSION: &'static [SearchTest] = &[ + t!(regression010, &["inf", "ind"], "infind", &[(0, 0, 3), (1, 3, 6),]), + t!(regression020, &["ind", "inf"], "infind", &[(1, 0, 3), (0, 3, 6),]), + t!( + regression030, + &["libcore/", "libstd/"], + "libcore/char/methods.rs", + &[(0, 0, 8),] + ), + t!( + regression040, + &["libstd/", "libcore/"], + "libcore/char/methods.rs", + &[(1, 0, 8),] + ), + t!( + regression050, + &["\x00\x00\x01", "\x00\x00\x00"], + "\x00\x00\x00", + &[(1, 0, 3),] + ), + t!( + regression060, + &["\x00\x00\x00", "\x00\x00\x01"], + "\x00\x00\x00", + &[(0, 0, 3),] + ), +]; + +const TEDDY: &'static [SearchTest] = &[ + t!( + teddy010, + &["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"], + "abcdefghijk", + &[ + (0, 0, 1), + (1, 1, 2), + (2, 2, 3), + (3, 3, 4), + (4, 4, 5), + (5, 5, 6), + (6, 6, 7), + (7, 7, 8), + (8, 8, 9), + (9, 9, 10), + (10, 10, 11) + ] + ), + t!( + teddy020, + &["ab", "bc", "cd", "de", "ef", "fg", "gh", "hi", "ij", "jk", "kl"], + "abcdefghijk", + &[(0, 0, 2), (2, 2, 4), (4, 4, 6), (6, 6, 8), (8, 8, 10),] + ), + t!( + teddy030, + &["abc"], + "abcdefghijklmnopqrstuvwxyzabcdefghijk", + &[(0, 0, 3), (0, 26, 29)] + ), +]; + +// Now define a test for each combination of things above that we want to run. +// Since there are a few different combinations for each collection of tests, +// we define a couple of macros to avoid repetition drudgery. The testconfig +// macro constructs the automaton from a given match kind, and runs the search +// tests one-by-one over the given collection. The `with` parameter allows one +// to configure the config with additional parameters. The testcombo macro +// invokes testconfig in precisely this way: it sets up several tests where +// each one turns a different knob on Config. + +macro_rules! testconfig { + ($name:ident, $collection:expr, $with:expr) => { + #[test] + fn $name() { + run_search_tests($collection, |test| { + let mut config = Config::new(); + $with(&mut config); + config + .builder() + .extend(test.patterns.iter().map(|p| p.as_bytes())) + .build() + .unwrap() + .find_iter(&test.haystack) + .collect() + }); + } + }; +} + +#[cfg(target_arch = "x86_64")] +testconfig!( + search_default_leftmost_first, + PACKED_LEFTMOST_FIRST, + |_: &mut Config| {} +); + +#[cfg(target_arch = "x86_64")] +testconfig!( + search_default_leftmost_longest, + PACKED_LEFTMOST_LONGEST, + |c: &mut Config| { + c.match_kind(MatchKind::LeftmostLongest); + } +); + +#[cfg(target_arch = "x86_64")] +testconfig!( + search_teddy_leftmost_first, + PACKED_LEFTMOST_FIRST, + |c: &mut Config| { + c.force_teddy(true); + } +); + +#[cfg(target_arch = "x86_64")] +testconfig!( + search_teddy_leftmost_longest, + PACKED_LEFTMOST_LONGEST, + |c: &mut Config| { + c.force_teddy(true).match_kind(MatchKind::LeftmostLongest); + } +); + +#[cfg(target_arch = "x86_64")] +testconfig!( + search_teddy_ssse3_leftmost_first, + PACKED_LEFTMOST_FIRST, + |c: &mut Config| { + c.force_teddy(true); + if is_x86_feature_detected!("ssse3") { + c.force_avx(Some(false)); + } + } +); + +#[cfg(target_arch = "x86_64")] +testconfig!( + search_teddy_ssse3_leftmost_longest, + PACKED_LEFTMOST_LONGEST, + |c: &mut Config| { + c.force_teddy(true).match_kind(MatchKind::LeftmostLongest); + if is_x86_feature_detected!("ssse3") { + c.force_avx(Some(false)); + } + } +); + +#[cfg(target_arch = "x86_64")] +testconfig!( + search_teddy_avx2_leftmost_first, + PACKED_LEFTMOST_FIRST, + |c: &mut Config| { + c.force_teddy(true); + if is_x86_feature_detected!("avx2") { + c.force_avx(Some(true)); + } + } +); + +#[cfg(target_arch = "x86_64")] +testconfig!( + search_teddy_avx2_leftmost_longest, + PACKED_LEFTMOST_LONGEST, + |c: &mut Config| { + c.force_teddy(true).match_kind(MatchKind::LeftmostLongest); + if is_x86_feature_detected!("avx2") { + c.force_avx(Some(true)); + } + } +); + +#[cfg(target_arch = "x86_64")] +testconfig!( + search_teddy_fat_leftmost_first, + PACKED_LEFTMOST_FIRST, + |c: &mut Config| { + c.force_teddy(true); + if is_x86_feature_detected!("avx2") { + c.force_teddy_fat(Some(true)); + } + } +); + +#[cfg(target_arch = "x86_64")] +testconfig!( + search_teddy_fat_leftmost_longest, + PACKED_LEFTMOST_LONGEST, + |c: &mut Config| { + c.force_teddy(true).match_kind(MatchKind::LeftmostLongest); + if is_x86_feature_detected!("avx2") { + c.force_teddy_fat(Some(true)); + } + } +); + +testconfig!( + search_rabinkarp_leftmost_first, + PACKED_LEFTMOST_FIRST, + |c: &mut Config| { + c.force_rabin_karp(true); + } +); + +testconfig!( + search_rabinkarp_leftmost_longest, + PACKED_LEFTMOST_LONGEST, + |c: &mut Config| { + c.force_rabin_karp(true).match_kind(MatchKind::LeftmostLongest); + } +); + +#[test] +fn search_tests_have_unique_names() { + let assert = |constname, tests: &[SearchTest]| { + let mut seen = HashMap::new(); // map from test name to position + for (i, test) in tests.iter().enumerate() { + if !seen.contains_key(test.name) { + seen.insert(test.name, i); + } else { + let last = seen[test.name]; + panic!( + "{} tests have duplicate names at positions {} and {}", + constname, last, i + ); + } + } + }; + assert("BASICS", BASICS); + assert("LEFTMOST", LEFTMOST); + assert("LEFTMOST_FIRST", LEFTMOST_FIRST); + assert("LEFTMOST_LONGEST", LEFTMOST_LONGEST); + assert("REGRESSION", REGRESSION); + assert("TEDDY", TEDDY); +} + +fn run_search_tests<F: FnMut(&SearchTestOwned) -> Vec<Match>>( + which: TestCollection, + mut f: F, +) { + let get_match_triples = + |matches: Vec<Match>| -> Vec<(usize, usize, usize)> { + matches + .into_iter() + .map(|m| (m.pattern(), m.start(), m.end())) + .collect() + }; + for &tests in which { + for spec in tests { + for test in spec.variations() { + assert_eq!( + test.matches, + get_match_triples(f(&test)).as_slice(), + "test: {}, patterns: {:?}, haystack: {:?}, offset: {:?}", + test.name, + test.patterns, + test.haystack, + test.offset, + ); + } + } + } +} |