From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- library/core/tests/pattern.rs | 503 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 503 insertions(+) create mode 100644 library/core/tests/pattern.rs (limited to 'library/core/tests/pattern.rs') diff --git a/library/core/tests/pattern.rs b/library/core/tests/pattern.rs new file mode 100644 index 000000000..d4bec996d --- /dev/null +++ b/library/core/tests/pattern.rs @@ -0,0 +1,503 @@ +use std::str::pattern::*; + +// This macro makes it easier to write +// tests that do a series of iterations +macro_rules! search_asserts { + ($haystack:expr, $needle:expr, $testname:expr, [$($func:ident),*], $result:expr) => { + let mut searcher = $needle.into_searcher($haystack); + let arr = [$( Step::from(searcher.$func()) ),*]; + assert_eq!(&arr[..], &$result, $testname); + } +} + +/// Combined enum for the results of next() and next_match()/next_reject() +#[derive(Debug, PartialEq, Eq)] +enum Step { + // variant names purposely chosen to + // be the same length for easy alignment + Matches(usize, usize), + Rejects(usize, usize), + InRange(usize, usize), + Done, +} + +use self::Step::*; + +impl From for Step { + fn from(x: SearchStep) -> Self { + match x { + SearchStep::Match(a, b) => Matches(a, b), + SearchStep::Reject(a, b) => Rejects(a, b), + SearchStep::Done => Done, + } + } +} + +impl From> for Step { + fn from(x: Option<(usize, usize)>) -> Self { + match x { + Some((a, b)) => InRange(a, b), + None => Done, + } + } +} + +// FIXME(Manishearth) these tests focus on single-character searching (CharSearcher) +// and on next()/next_match(), not next_reject(). This is because +// the memchr changes make next_match() for single chars complex, but next_reject() +// continues to use next() under the hood. We should add more test cases for all +// of these, as well as tests for StrSearcher and higher level tests for str::find() (etc) + +#[test] +fn test_simple_iteration() { + search_asserts!( + "abcdeabcd", + 'a', + "forward iteration for ASCII string", + // a b c d e a b c d EOF + [next, next, next, next, next, next, next, next, next, next], + [ + Matches(0, 1), + Rejects(1, 2), + Rejects(2, 3), + Rejects(3, 4), + Rejects(4, 5), + Matches(5, 6), + Rejects(6, 7), + Rejects(7, 8), + Rejects(8, 9), + Done + ] + ); + + search_asserts!( + "abcdeabcd", + 'a', + "reverse iteration for ASCII string", + // d c b a e d c b a EOF + [ + next_back, next_back, next_back, next_back, next_back, next_back, next_back, next_back, + next_back, next_back + ], + [ + Rejects(8, 9), + Rejects(7, 8), + Rejects(6, 7), + Matches(5, 6), + Rejects(4, 5), + Rejects(3, 4), + Rejects(2, 3), + Rejects(1, 2), + Matches(0, 1), + Done + ] + ); + + search_asserts!( + "我爱我的猫", + '我', + "forward iteration for Chinese string", + // 我 愛 我 的 貓 EOF + [next, next, next, next, next, next], + [Matches(0, 3), Rejects(3, 6), Matches(6, 9), Rejects(9, 12), Rejects(12, 15), Done] + ); + + search_asserts!( + "我的猫说meow", + 'm', + "forward iteration for mixed string", + // 我 的 猫 说 m e o w EOF + [next, next, next, next, next, next, next, next, next], + [ + Rejects(0, 3), + Rejects(3, 6), + Rejects(6, 9), + Rejects(9, 12), + Matches(12, 13), + Rejects(13, 14), + Rejects(14, 15), + Rejects(15, 16), + Done + ] + ); + + search_asserts!( + "我的猫说meow", + '猫', + "reverse iteration for mixed string", + // w o e m 说 猫 的 我 EOF + [ + next_back, next_back, next_back, next_back, next_back, next_back, next_back, next_back, + next_back + ], + [ + Rejects(15, 16), + Rejects(14, 15), + Rejects(13, 14), + Rejects(12, 13), + Rejects(9, 12), + Matches(6, 9), + Rejects(3, 6), + Rejects(0, 3), + Done + ] + ); +} + +#[test] +fn test_simple_search() { + search_asserts!( + "abcdeabcdeabcde", + 'a', + "next_match for ASCII string", + [next_match, next_match, next_match, next_match], + [InRange(0, 1), InRange(5, 6), InRange(10, 11), Done] + ); + + search_asserts!( + "abcdeabcdeabcde", + 'a', + "next_match_back for ASCII string", + [next_match_back, next_match_back, next_match_back, next_match_back], + [InRange(10, 11), InRange(5, 6), InRange(0, 1), Done] + ); + + search_asserts!( + "abcdeab", + 'a', + "next_reject for ASCII string", + [next_reject, next_reject, next_match, next_reject, next_reject], + [InRange(1, 2), InRange(2, 3), InRange(5, 6), InRange(6, 7), Done] + ); + + search_asserts!( + "abcdeabcdeabcde", + 'a', + "next_reject_back for ASCII string", + [ + next_reject_back, + next_reject_back, + next_match_back, + next_reject_back, + next_reject_back, + next_reject_back + ], + [ + InRange(14, 15), + InRange(13, 14), + InRange(10, 11), + InRange(9, 10), + InRange(8, 9), + InRange(7, 8) + ] + ); +} + +// Á, 각, ก, 😀 all end in 0x81 +// 🁀, ᘀ do not end in 0x81 but contain the byte +// ꁁ has 0x81 as its second and third bytes. +// +// The memchr-using implementation of next_match +// and next_match_back temporarily violate +// the property that the search is always on a unicode boundary, +// which is fine as long as this never reaches next() or next_back(). +// So we test if next() is correct after each next_match() as well. +const STRESS: &str = "Áa🁀bÁꁁfg😁각กᘀ각aÁ각ꁁก😁a"; + +#[test] +fn test_stress_indices() { + // this isn't really a test, more of documentation on the indices of each character in the stresstest string + + search_asserts!( + STRESS, + 'x', + "Indices of characters in stress test", + [ + next, next, next, next, next, next, next, next, next, next, next, next, next, next, + next, next, next, next, next, next, next + ], + [ + Rejects(0, 2), // Á + Rejects(2, 3), // a + Rejects(3, 7), // 🁀 + Rejects(7, 8), // b + Rejects(8, 10), // Á + Rejects(10, 13), // ꁁ + Rejects(13, 14), // f + Rejects(14, 15), // g + Rejects(15, 19), // 😀 + Rejects(19, 22), // 각 + Rejects(22, 25), // ก + Rejects(25, 28), // ᘀ + Rejects(28, 31), // 각 + Rejects(31, 32), // a + Rejects(32, 34), // Á + Rejects(34, 37), // 각 + Rejects(37, 40), // ꁁ + Rejects(40, 43), // ก + Rejects(43, 47), // 😀 + Rejects(47, 48), // a + Done + ] + ); +} + +#[test] +fn test_forward_search_shared_bytes() { + search_asserts!( + STRESS, + 'Á', + "Forward search for two-byte Latin character", + [next_match, next_match, next_match, next_match], + [InRange(0, 2), InRange(8, 10), InRange(32, 34), Done] + ); + + search_asserts!( + STRESS, + 'Á', + "Forward search for two-byte Latin character; check if next() still works", + [next_match, next, next_match, next, next_match, next, next_match], + [ + InRange(0, 2), + Rejects(2, 3), + InRange(8, 10), + Rejects(10, 13), + InRange(32, 34), + Rejects(34, 37), + Done + ] + ); + + search_asserts!( + STRESS, + '각', + "Forward search for three-byte Hangul character", + [next_match, next, next_match, next_match, next_match], + [InRange(19, 22), Rejects(22, 25), InRange(28, 31), InRange(34, 37), Done] + ); + + search_asserts!( + STRESS, + '각', + "Forward search for three-byte Hangul character; check if next() still works", + [next_match, next, next_match, next, next_match, next, next_match], + [ + InRange(19, 22), + Rejects(22, 25), + InRange(28, 31), + Rejects(31, 32), + InRange(34, 37), + Rejects(37, 40), + Done + ] + ); + + search_asserts!( + STRESS, + 'ก', + "Forward search for three-byte Thai character", + [next_match, next, next_match, next, next_match], + [InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done] + ); + + search_asserts!( + STRESS, + 'ก', + "Forward search for three-byte Thai character; check if next() still works", + [next_match, next, next_match, next, next_match], + [InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done] + ); + + search_asserts!( + STRESS, + '😁', + "Forward search for four-byte emoji", + [next_match, next, next_match, next, next_match], + [InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done] + ); + + search_asserts!( + STRESS, + '😁', + "Forward search for four-byte emoji; check if next() still works", + [next_match, next, next_match, next, next_match], + [InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done] + ); + + search_asserts!( + STRESS, + 'ꁁ', + "Forward search for three-byte Yi character with repeated bytes", + [next_match, next, next_match, next, next_match], + [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done] + ); + + search_asserts!( + STRESS, + 'ꁁ', + "Forward search for three-byte Yi character with repeated bytes; check if next() still works", + [next_match, next, next_match, next, next_match], + [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done] + ); +} + +#[test] +fn test_reverse_search_shared_bytes() { + search_asserts!( + STRESS, + 'Á', + "Reverse search for two-byte Latin character", + [next_match_back, next_match_back, next_match_back, next_match_back], + [InRange(32, 34), InRange(8, 10), InRange(0, 2), Done] + ); + + search_asserts!( + STRESS, + 'Á', + "Reverse search for two-byte Latin character; check if next_back() still works", + [next_match_back, next_back, next_match_back, next_back, next_match_back, next_back], + [InRange(32, 34), Rejects(31, 32), InRange(8, 10), Rejects(7, 8), InRange(0, 2), Done] + ); + + search_asserts!( + STRESS, + '각', + "Reverse search for three-byte Hangul character", + [next_match_back, next_back, next_match_back, next_match_back, next_match_back], + [InRange(34, 37), Rejects(32, 34), InRange(28, 31), InRange(19, 22), Done] + ); + + search_asserts!( + STRESS, + '각', + "Reverse search for three-byte Hangul character; check if next_back() still works", + [ + next_match_back, + next_back, + next_match_back, + next_back, + next_match_back, + next_back, + next_match_back + ], + [ + InRange(34, 37), + Rejects(32, 34), + InRange(28, 31), + Rejects(25, 28), + InRange(19, 22), + Rejects(15, 19), + Done + ] + ); + + search_asserts!( + STRESS, + 'ก', + "Reverse search for three-byte Thai character", + [next_match_back, next_back, next_match_back, next_back, next_match_back], + [InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done] + ); + + search_asserts!( + STRESS, + 'ก', + "Reverse search for three-byte Thai character; check if next_back() still works", + [next_match_back, next_back, next_match_back, next_back, next_match_back], + [InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done] + ); + + search_asserts!( + STRESS, + '😁', + "Reverse search for four-byte emoji", + [next_match_back, next_back, next_match_back, next_back, next_match_back], + [InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done] + ); + + search_asserts!( + STRESS, + '😁', + "Reverse search for four-byte emoji; check if next_back() still works", + [next_match_back, next_back, next_match_back, next_back, next_match_back], + [InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done] + ); + + search_asserts!( + STRESS, + 'ꁁ', + "Reverse search for three-byte Yi character with repeated bytes", + [next_match_back, next_back, next_match_back, next_back, next_match_back], + [InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done] + ); + + search_asserts!( + STRESS, + 'ꁁ', + "Reverse search for three-byte Yi character with repeated bytes; check if next_back() still works", + [next_match_back, next_back, next_match_back, next_back, next_match_back], + [InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done] + ); +} + +#[test] +fn double_ended_regression_test() { + // https://github.com/rust-lang/rust/issues/47175 + // Ensures that double ended searching comes to a convergence + search_asserts!( + "abcdeabcdeabcde", + 'a', + "alternating double ended search", + [next_match, next_match_back, next_match, next_match_back], + [InRange(0, 1), InRange(10, 11), InRange(5, 6), Done] + ); + search_asserts!( + "abcdeabcdeabcde", + 'a', + "triple double ended search for a", + [next_match, next_match_back, next_match_back, next_match_back], + [InRange(0, 1), InRange(10, 11), InRange(5, 6), Done] + ); + search_asserts!( + "abcdeabcdeabcde", + 'd', + "triple double ended search for d", + [next_match, next_match_back, next_match_back, next_match_back], + [InRange(3, 4), InRange(13, 14), InRange(8, 9), Done] + ); + search_asserts!( + STRESS, + 'Á', + "Double ended search for two-byte Latin character", + [next_match, next_match_back, next_match, next_match_back], + [InRange(0, 2), InRange(32, 34), InRange(8, 10), Done] + ); + search_asserts!( + STRESS, + '각', + "Reverse double ended search for three-byte Hangul character", + [next_match_back, next_back, next_match, next, next_match_back, next_match], + [InRange(34, 37), Rejects(32, 34), InRange(19, 22), Rejects(22, 25), InRange(28, 31), Done] + ); + search_asserts!( + STRESS, + 'ก', + "Double ended search for three-byte Thai character", + [next_match, next_back, next, next_match_back, next_match], + [InRange(22, 25), Rejects(47, 48), Rejects(25, 28), InRange(40, 43), Done] + ); + search_asserts!( + STRESS, + '😁', + "Double ended search for four-byte emoji", + [next_match_back, next, next_match, next_back, next_match], + [InRange(43, 47), Rejects(0, 2), InRange(15, 19), Rejects(40, 43), Done] + ); + search_asserts!( + STRESS, + 'ꁁ', + "Double ended search for three-byte Yi character with repeated bytes", + [next_match, next, next_match_back, next_back, next_match], + [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(34, 37), Done] + ); +} -- cgit v1.2.3