diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
commit | 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch) | |
tree | 173a775858bd501c378080a10dca74132f05bc50 /vendor/regex-automata/tests | |
parent | Initial commit. (diff) | |
download | rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip |
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/regex-automata/tests')
-rw-r--r-- | vendor/regex-automata/tests/collection.rs | 461 | ||||
-rw-r--r-- | vendor/regex-automata/tests/regression.rs | 42 | ||||
-rw-r--r-- | vendor/regex-automata/tests/suite.rs | 250 | ||||
-rw-r--r-- | vendor/regex-automata/tests/tests.rs | 25 | ||||
-rw-r--r-- | vendor/regex-automata/tests/unescape.rs | 84 |
5 files changed, 862 insertions, 0 deletions
diff --git a/vendor/regex-automata/tests/collection.rs b/vendor/regex-automata/tests/collection.rs new file mode 100644 index 000000000..68b03229e --- /dev/null +++ b/vendor/regex-automata/tests/collection.rs @@ -0,0 +1,461 @@ +use std::collections::BTreeMap; +use std::env; +use std::fmt::{self, Write}; +use std::thread; + +use regex; +use regex_automata::{DenseDFA, ErrorKind, Regex, RegexBuilder, StateID, DFA}; +use serde_bytes; +use toml; + +macro_rules! load { + ($col:ident, $path:expr) => { + $col.extend(RegexTests::load( + concat!("../data/tests/", $path), + include_bytes!(concat!("../data/tests/", $path)), + )); + }; +} + +lazy_static! { + pub static ref SUITE: RegexTestCollection = { + let mut col = RegexTestCollection::new(); + load!(col, "fowler/basic.toml"); + load!(col, "fowler/nullsubexpr.toml"); + load!(col, "fowler/repetition.toml"); + load!(col, "fowler/repetition-long.toml"); + load!(col, "crazy.toml"); + load!(col, "flags.toml"); + load!(col, "iter.toml"); + load!(col, "no-unicode.toml"); + load!(col, "unicode.toml"); + col + }; +} + +#[derive(Clone, Debug)] +pub struct RegexTestCollection { + pub by_name: BTreeMap<String, RegexTest>, +} + +#[derive(Clone, Debug, Deserialize)] +pub struct RegexTests { + pub tests: Vec<RegexTest>, +} + +#[derive(Clone, Debug, Deserialize)] +pub struct RegexTest { + pub name: String, + #[serde(default)] + pub options: Vec<RegexTestOption>, + pub pattern: String, + #[serde(with = "serde_bytes")] + pub input: Vec<u8>, + #[serde(rename = "matches")] + pub matches: Vec<Match>, + #[serde(default)] + pub captures: Vec<Option<Match>>, + #[serde(default)] + pub fowler_line_number: Option<u64>, +} + +#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq)] +#[serde(rename_all = "kebab-case")] +pub enum RegexTestOption { + Anchored, + CaseInsensitive, + NoUnicode, + Escaped, + #[serde(rename = "invalid-utf8")] + InvalidUTF8, +} + +#[derive(Clone, Copy, Deserialize, Eq, PartialEq)] +pub struct Match { + pub start: usize, + pub end: usize, +} + +impl RegexTestCollection { + fn new() -> RegexTestCollection { + RegexTestCollection { by_name: BTreeMap::new() } + } + + fn extend(&mut self, tests: RegexTests) { + for test in tests.tests { + let name = test.name.clone(); + if self.by_name.contains_key(&name) { + panic!("found duplicate test {}", name); + } + self.by_name.insert(name, test); + } + } + + pub fn tests(&self) -> Vec<&RegexTest> { + self.by_name.values().collect() + } +} + +impl RegexTests { + fn load(path: &str, slice: &[u8]) -> RegexTests { + let mut data: RegexTests = toml::from_slice(slice) + .expect(&format!("failed to load {}", path)); + for test in &mut data.tests { + if test.options.contains(&RegexTestOption::Escaped) { + test.input = unescape_bytes(&test.input); + } + } + data + } +} + +#[derive(Debug)] +pub struct RegexTester { + asserted: bool, + results: RegexTestResults, + skip_expensive: bool, + whitelist: Vec<regex::Regex>, + blacklist: Vec<regex::Regex>, +} + +impl Drop for RegexTester { + fn drop(&mut self) { + // If we haven't asserted yet, then the test is probably buggy, so + // fail it. But if we're already panicking (e.g., a bug in the regex + // engine), then don't double-panic, which causes an immediate abort. + if !thread::panicking() && !self.asserted { + panic!("must call RegexTester::assert at end of test"); + } + } +} + +impl RegexTester { + pub fn new() -> RegexTester { + let mut tester = RegexTester { + asserted: false, + results: RegexTestResults::default(), + skip_expensive: false, + whitelist: vec![], + blacklist: vec![], + }; + for x in env::var("REGEX_TEST").unwrap_or("".to_string()).split(",") { + let x = x.trim(); + if x.is_empty() { + continue; + } + if x.starts_with("-") { + tester = tester.blacklist(&x[1..]); + } else { + tester = tester.whitelist(x); + } + } + tester + } + + pub fn skip_expensive(mut self) -> RegexTester { + self.skip_expensive = true; + self + } + + pub fn whitelist(mut self, name: &str) -> RegexTester { + self.whitelist.push(regex::Regex::new(name).unwrap()); + self + } + + pub fn blacklist(mut self, name: &str) -> RegexTester { + self.blacklist.push(regex::Regex::new(name).unwrap()); + self + } + + pub fn assert(&mut self) { + self.asserted = true; + self.results.assert(); + } + + pub fn build_regex<S: StateID>( + &self, + mut builder: RegexBuilder, + test: &RegexTest, + ) -> Option<Regex<DenseDFA<Vec<S>, S>>> { + if self.skip(test) { + return None; + } + self.apply_options(test, &mut builder); + + match builder.build_with_size::<S>(&test.pattern) { + Ok(re) => Some(re), + Err(err) => { + if let ErrorKind::Unsupported(_) = *err.kind() { + None + } else { + panic!( + "failed to build {:?} with pattern '{:?}': {}", + test.name, test.pattern, err + ); + } + } + } + } + + pub fn test_all<'a, I, T>(&mut self, builder: RegexBuilder, tests: I) + where + I: IntoIterator<IntoIter = T, Item = &'a RegexTest>, + T: Iterator<Item = &'a RegexTest>, + { + for test in tests { + let builder = builder.clone(); + let re: Regex = match self.build_regex(builder, test) { + None => continue, + Some(re) => re, + }; + self.test(test, &re); + } + } + + pub fn test<'a, D: DFA>(&mut self, test: &RegexTest, re: &Regex<D>) { + self.test_is_match(test, re); + self.test_find(test, re); + // Some tests (namely, fowler) are designed only to detect the + // first match even if there are more subsequent matches. To that + // end, we only test match iteration when the number of matches + // expected is not 1, or if the test name has 'iter' in it. + if test.name.contains("iter") || test.matches.len() != 1 { + self.test_find_iter(test, re); + } + } + + pub fn test_is_match<'a, D: DFA>( + &mut self, + test: &RegexTest, + re: &Regex<D>, + ) { + self.asserted = false; + + let got = re.is_match(&test.input); + let expected = test.matches.len() >= 1; + if got == expected { + self.results.succeeded.push(test.clone()); + return; + } + self.results.failed.push(RegexTestFailure { + test: test.clone(), + kind: RegexTestFailureKind::IsMatch, + }); + } + + pub fn test_find<'a, D: DFA>(&mut self, test: &RegexTest, re: &Regex<D>) { + self.asserted = false; + + let got = + re.find(&test.input).map(|(start, end)| Match { start, end }); + if got == test.matches.get(0).map(|&m| m) { + self.results.succeeded.push(test.clone()); + return; + } + self.results.failed.push(RegexTestFailure { + test: test.clone(), + kind: RegexTestFailureKind::Find { got }, + }); + } + + pub fn test_find_iter<'a, D: DFA>( + &mut self, + test: &RegexTest, + re: &Regex<D>, + ) { + self.asserted = false; + + let got: Vec<Match> = re + .find_iter(&test.input) + .map(|(start, end)| Match { start, end }) + .collect(); + if got == test.matches { + self.results.succeeded.push(test.clone()); + return; + } + self.results.failed.push(RegexTestFailure { + test: test.clone(), + kind: RegexTestFailureKind::FindIter { got }, + }); + } + + fn skip(&self, test: &RegexTest) -> bool { + if self.skip_expensive { + if test.name.starts_with("repetition-long") { + return true; + } + } + if !self.blacklist.is_empty() { + if self.blacklist.iter().any(|re| re.is_match(&test.name)) { + return true; + } + } + if !self.whitelist.is_empty() { + if !self.whitelist.iter().any(|re| re.is_match(&test.name)) { + return true; + } + } + false + } + + fn apply_options(&self, test: &RegexTest, builder: &mut RegexBuilder) { + for opt in &test.options { + match *opt { + RegexTestOption::Anchored => { + builder.anchored(true); + } + RegexTestOption::CaseInsensitive => { + builder.case_insensitive(true); + } + RegexTestOption::NoUnicode => { + builder.unicode(false); + } + RegexTestOption::Escaped => {} + RegexTestOption::InvalidUTF8 => { + builder.allow_invalid_utf8(true); + } + } + } + } +} + +#[derive(Clone, Debug, Default)] +pub struct RegexTestResults { + /// Tests that succeeded. + pub succeeded: Vec<RegexTest>, + /// Failed tests, indexed by group name. + pub failed: Vec<RegexTestFailure>, +} + +#[derive(Clone, Debug)] +pub struct RegexTestFailure { + test: RegexTest, + kind: RegexTestFailureKind, +} + +#[derive(Clone, Debug)] +pub enum RegexTestFailureKind { + IsMatch, + Find { got: Option<Match> }, + FindIter { got: Vec<Match> }, +} + +impl RegexTestResults { + pub fn assert(&self) { + if self.failed.is_empty() { + return; + } + let failures = self + .failed + .iter() + .map(|f| f.to_string()) + .collect::<Vec<String>>() + .join("\n\n"); + panic!( + "found {} failures:\n{}\n{}\n{}\n\n\ + Set the REGEX_TEST environment variable to filter tests, \n\ + e.g., REGEX_TEST=crazy-misc,-crazy-misc2 runs every test \n\ + whose name contains crazy-misc but not crazy-misc2\n\n", + self.failed.len(), + "~".repeat(79), + failures.trim(), + "~".repeat(79) + ) + } +} + +impl fmt::Display for RegexTestFailure { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{}: {}\n \ + options: {:?}\n \ + pattern: {}\n \ + pattern (escape): {}\n \ + input: {}\n \ + input (escape): {}\n \ + input (hex): {}", + self.test.name, + self.kind.fmt(&self.test)?, + self.test.options, + self.test.pattern, + escape_default(&self.test.pattern), + nice_raw_bytes(&self.test.input), + escape_bytes(&self.test.input), + hex_bytes(&self.test.input) + ) + } +} + +impl RegexTestFailureKind { + fn fmt(&self, test: &RegexTest) -> Result<String, fmt::Error> { + let mut buf = String::new(); + match *self { + RegexTestFailureKind::IsMatch => { + if let Some(&m) = test.matches.get(0) { + write!(buf, "expected match (at {}), but none found", m)? + } else { + write!(buf, "expected no match, but found a match")? + } + } + RegexTestFailureKind::Find { got } => write!( + buf, + "expected {:?}, but found {:?}", + test.matches.get(0), + got + )?, + RegexTestFailureKind::FindIter { ref got } => write!( + buf, + "expected {:?}, but found {:?}", + test.matches, got + )?, + } + Ok(buf) + } +} + +impl fmt::Display for Match { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "({}, {})", self.start, self.end) + } +} + +impl fmt::Debug for Match { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "({}, {})", self.start, self.end) + } +} + +fn nice_raw_bytes(bytes: &[u8]) -> String { + use std::str; + + match str::from_utf8(bytes) { + Ok(s) => s.to_string(), + Err(_) => escape_bytes(bytes), + } +} + +fn escape_bytes(bytes: &[u8]) -> String { + use std::ascii; + + let escaped = bytes + .iter() + .flat_map(|&b| ascii::escape_default(b)) + .collect::<Vec<u8>>(); + String::from_utf8(escaped).unwrap() +} + +fn hex_bytes(bytes: &[u8]) -> String { + bytes.iter().map(|&b| format!(r"\x{:02X}", b)).collect() +} + +fn escape_default(s: &str) -> String { + s.chars().flat_map(|c| c.escape_default()).collect() +} + +fn unescape_bytes(bytes: &[u8]) -> Vec<u8> { + use std::str; + use unescape::unescape; + + unescape(&str::from_utf8(bytes).expect("all input must be valid UTF-8")) +} diff --git a/vendor/regex-automata/tests/regression.rs b/vendor/regex-automata/tests/regression.rs new file mode 100644 index 000000000..c2d2c1226 --- /dev/null +++ b/vendor/regex-automata/tests/regression.rs @@ -0,0 +1,42 @@ +use regex_automata::{dense, DFA}; + +// A regression test for checking that minimization correctly translates +// whether a state is a match state or not. Previously, it was possible for +// minimization to mark a non-matching state as matching. +#[test] +fn minimize_sets_correct_match_states() { + let pattern = + // This is a subset of the grapheme matching regex. I couldn't seem + // to get a repro any smaller than this unfortunately. + r"(?x) + (?: + \p{gcb=Prepend}* + (?: + (?: + (?: + \p{gcb=L}* + (?:\p{gcb=V}+|\p{gcb=LV}\p{gcb=V}*|\p{gcb=LVT}) + \p{gcb=T}* + ) + | + \p{gcb=L}+ + | + \p{gcb=T}+ + ) + | + \p{Extended_Pictographic} + (?:\p{gcb=Extend}*\p{gcb=ZWJ}\p{Extended_Pictographic})* + | + [^\p{gcb=Control}\p{gcb=CR}\p{gcb=LF}] + ) + [\p{gcb=Extend}\p{gcb=ZWJ}\p{gcb=SpacingMark}]* + ) + "; + + let dfa = dense::Builder::new() + .minimize(true) + .anchored(true) + .build(pattern) + .unwrap(); + assert_eq!(None, dfa.find(b"\xE2")); +} diff --git a/vendor/regex-automata/tests/suite.rs b/vendor/regex-automata/tests/suite.rs new file mode 100644 index 000000000..839719403 --- /dev/null +++ b/vendor/regex-automata/tests/suite.rs @@ -0,0 +1,250 @@ +use regex_automata::{DenseDFA, Regex, RegexBuilder, SparseDFA}; + +use collection::{RegexTester, SUITE}; + +#[test] +fn unminimized_standard() { + let mut builder = RegexBuilder::new(); + builder.minimize(false).premultiply(false).byte_classes(false); + + let mut tester = RegexTester::new().skip_expensive(); + tester.test_all(builder, SUITE.tests()); + tester.assert(); +} + +#[test] +fn unminimized_premultiply() { + let mut builder = RegexBuilder::new(); + builder.minimize(false).premultiply(true).byte_classes(false); + + let mut tester = RegexTester::new().skip_expensive(); + tester.test_all(builder, SUITE.tests()); + tester.assert(); +} + +#[test] +fn unminimized_byte_class() { + let mut builder = RegexBuilder::new(); + builder.minimize(false).premultiply(false).byte_classes(true); + + let mut tester = RegexTester::new(); + tester.test_all(builder, SUITE.tests()); + tester.assert(); +} + +#[test] +fn unminimized_premultiply_byte_class() { + let mut builder = RegexBuilder::new(); + builder.minimize(false).premultiply(true).byte_classes(true); + + let mut tester = RegexTester::new(); + tester.test_all(builder, SUITE.tests()); + tester.assert(); +} + +#[test] +fn unminimized_standard_no_nfa_shrink() { + let mut builder = RegexBuilder::new(); + builder + .minimize(false) + .premultiply(false) + .byte_classes(false) + .shrink(false); + + let mut tester = RegexTester::new().skip_expensive(); + tester.test_all(builder, SUITE.tests()); + tester.assert(); +} + +#[test] +fn minimized_standard() { + let mut builder = RegexBuilder::new(); + builder.minimize(true).premultiply(false).byte_classes(false); + + let mut tester = RegexTester::new().skip_expensive(); + tester.test_all(builder, SUITE.tests()); + tester.assert(); +} + +#[test] +fn minimized_premultiply() { + let mut builder = RegexBuilder::new(); + builder.minimize(true).premultiply(true).byte_classes(false); + + let mut tester = RegexTester::new().skip_expensive(); + tester.test_all(builder, SUITE.tests()); + tester.assert(); +} + +#[test] +fn minimized_byte_class() { + let mut builder = RegexBuilder::new(); + builder.minimize(true).premultiply(false).byte_classes(true); + + let mut tester = RegexTester::new(); + tester.test_all(builder, SUITE.tests()); + tester.assert(); +} + +#[test] +fn minimized_premultiply_byte_class() { + let mut builder = RegexBuilder::new(); + builder.minimize(true).premultiply(true).byte_classes(true); + + let mut tester = RegexTester::new(); + tester.test_all(builder, SUITE.tests()); + tester.assert(); +} + +#[test] +fn minimized_standard_no_nfa_shrink() { + let mut builder = RegexBuilder::new(); + builder + .minimize(true) + .premultiply(false) + .byte_classes(false) + .shrink(false); + + let mut tester = RegexTester::new().skip_expensive(); + tester.test_all(builder, SUITE.tests()); + tester.assert(); +} + +// A basic sanity test that checks we can convert a regex to a smaller +// representation and that the resulting regex still passes our tests. +// +// If tests grow minimal regexes that cannot be represented in 16 bits, then +// we'll either want to skip those or increase the size to test to u32. +#[test] +fn u16() { + let mut builder = RegexBuilder::new(); + builder.minimize(true).premultiply(false).byte_classes(true); + + let mut tester = RegexTester::new().skip_expensive(); + for test in SUITE.tests() { + let builder = builder.clone(); + let re: Regex = match tester.build_regex(builder, test) { + None => continue, + Some(re) => re, + }; + let small_re = Regex::from_dfas( + re.forward().to_u16().unwrap(), + re.reverse().to_u16().unwrap(), + ); + + tester.test(test, &small_re); + } + tester.assert(); +} + +// Test that sparse DFAs work using the standard configuration. +#[test] +fn sparse_unminimized_standard() { + let mut builder = RegexBuilder::new(); + builder.minimize(false).premultiply(false).byte_classes(false); + + let mut tester = RegexTester::new().skip_expensive(); + for test in SUITE.tests() { + let builder = builder.clone(); + let re: Regex = match tester.build_regex(builder, test) { + None => continue, + Some(re) => re, + }; + let fwd = re.forward().to_sparse().unwrap(); + let rev = re.reverse().to_sparse().unwrap(); + let sparse_re = Regex::from_dfas(fwd, rev); + + tester.test(test, &sparse_re); + } + tester.assert(); +} + +// Test that sparse DFAs work after converting them to a different state ID +// representation. +#[test] +fn sparse_u16() { + let mut builder = RegexBuilder::new(); + builder.minimize(true).premultiply(false).byte_classes(false); + + let mut tester = RegexTester::new().skip_expensive(); + for test in SUITE.tests() { + let builder = builder.clone(); + let re: Regex = match tester.build_regex(builder, test) { + None => continue, + Some(re) => re, + }; + let fwd = re.forward().to_sparse().unwrap().to_u16().unwrap(); + let rev = re.reverse().to_sparse().unwrap().to_u16().unwrap(); + let sparse_re = Regex::from_dfas(fwd, rev); + + tester.test(test, &sparse_re); + } + tester.assert(); +} + +// Another basic sanity test that checks we can serialize and then deserialize +// a regex, and that the resulting regex can be used for searching correctly. +#[test] +fn serialization_roundtrip() { + let mut builder = RegexBuilder::new(); + builder.premultiply(false).byte_classes(true); + + let mut tester = RegexTester::new().skip_expensive(); + for test in SUITE.tests() { + let builder = builder.clone(); + let re: Regex = match tester.build_regex(builder, test) { + None => continue, + Some(re) => re, + }; + + let fwd_bytes = re.forward().to_bytes_native_endian().unwrap(); + let rev_bytes = re.reverse().to_bytes_native_endian().unwrap(); + let fwd: DenseDFA<&[usize], usize> = + unsafe { DenseDFA::from_bytes(&fwd_bytes) }; + let rev: DenseDFA<&[usize], usize> = + unsafe { DenseDFA::from_bytes(&rev_bytes) }; + let re = Regex::from_dfas(fwd, rev); + + tester.test(test, &re); + } + tester.assert(); +} + +// A basic sanity test that checks we can serialize and then deserialize a +// regex using sparse DFAs, and that the resulting regex can be used for +// searching correctly. +#[test] +fn sparse_serialization_roundtrip() { + let mut builder = RegexBuilder::new(); + builder.byte_classes(true); + + let mut tester = RegexTester::new().skip_expensive(); + for test in SUITE.tests() { + let builder = builder.clone(); + let re: Regex = match tester.build_regex(builder, test) { + None => continue, + Some(re) => re, + }; + + let fwd_bytes = re + .forward() + .to_sparse() + .unwrap() + .to_bytes_native_endian() + .unwrap(); + let rev_bytes = re + .reverse() + .to_sparse() + .unwrap() + .to_bytes_native_endian() + .unwrap(); + let fwd: SparseDFA<&[u8], usize> = + unsafe { SparseDFA::from_bytes(&fwd_bytes) }; + let rev: SparseDFA<&[u8], usize> = + unsafe { SparseDFA::from_bytes(&rev_bytes) }; + let re = Regex::from_dfas(fwd, rev); + + tester.test(test, &re); + } + tester.assert(); +} diff --git a/vendor/regex-automata/tests/tests.rs b/vendor/regex-automata/tests/tests.rs new file mode 100644 index 000000000..fb4cd7717 --- /dev/null +++ b/vendor/regex-automata/tests/tests.rs @@ -0,0 +1,25 @@ +#[cfg(feature = "std")] +#[macro_use] +extern crate lazy_static; +#[cfg(feature = "std")] +extern crate regex; +#[cfg(feature = "std")] +extern crate regex_automata; +#[cfg(feature = "std")] +extern crate serde; +#[cfg(feature = "std")] +extern crate serde_bytes; +#[cfg(feature = "std")] +#[macro_use] +extern crate serde_derive; +#[cfg(feature = "std")] +extern crate toml; + +#[cfg(feature = "std")] +mod collection; +#[cfg(feature = "std")] +mod regression; +#[cfg(feature = "std")] +mod suite; +#[cfg(feature = "std")] +mod unescape; diff --git a/vendor/regex-automata/tests/unescape.rs b/vendor/regex-automata/tests/unescape.rs new file mode 100644 index 000000000..43fe04e71 --- /dev/null +++ b/vendor/regex-automata/tests/unescape.rs @@ -0,0 +1,84 @@ +#[derive(Clone, Copy, Eq, PartialEq)] +enum State { + /// The state after seeing a `\`. + Escape, + /// The state after seeing a `\x`. + HexFirst, + /// The state after seeing a `\x[0-9A-Fa-f]`. + HexSecond(char), + /// Default state. + Literal, +} + +pub fn unescape(s: &str) -> Vec<u8> { + use self::State::*; + + let mut bytes = vec![]; + let mut state = Literal; + for c in s.chars() { + match state { + Escape => match c { + '\\' => { + bytes.push(b'\\'); + state = Literal; + } + 'n' => { + bytes.push(b'\n'); + state = Literal; + } + 'r' => { + bytes.push(b'\r'); + state = Literal; + } + 't' => { + bytes.push(b'\t'); + state = Literal; + } + 'x' => { + state = HexFirst; + } + c => { + bytes.extend(format!(r"\{}", c).into_bytes()); + state = Literal; + } + }, + HexFirst => match c { + '0'..='9' | 'A'..='F' | 'a'..='f' => { + state = HexSecond(c); + } + c => { + bytes.extend(format!(r"\x{}", c).into_bytes()); + state = Literal; + } + }, + HexSecond(first) => match c { + '0'..='9' | 'A'..='F' | 'a'..='f' => { + let ordinal = format!("{}{}", first, c); + let byte = u8::from_str_radix(&ordinal, 16).unwrap(); + bytes.push(byte); + state = Literal; + } + c => { + let original = format!(r"\x{}{}", first, c); + bytes.extend(original.into_bytes()); + state = Literal; + } + }, + Literal => match c { + '\\' => { + state = Escape; + } + c => { + bytes.extend(c.to_string().as_bytes()); + } + }, + } + } + match state { + Escape => bytes.push(b'\\'), + HexFirst => bytes.extend(b"\\x"), + HexSecond(c) => bytes.extend(format!("\\x{}", c).into_bytes()), + Literal => {} + } + bytes +} |