summaryrefslogtreecommitdiffstats
path: root/vendor/icu_list/src/string_matcher.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/icu_list/src/string_matcher.rs')
-rw-r--r--vendor/icu_list/src/string_matcher.rs213
1 files changed, 0 insertions, 213 deletions
diff --git a/vendor/icu_list/src/string_matcher.rs b/vendor/icu_list/src/string_matcher.rs
deleted file mode 100644
index ba4833605..000000000
--- a/vendor/icu_list/src/string_matcher.rs
+++ /dev/null
@@ -1,213 +0,0 @@
-// This file is part of ICU4X. For terms of use, please see the file
-// called LICENSE at the top level of the ICU4X source tree
-// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
-
-use alloc::borrow::Cow;
-#[cfg(any(feature = "serde_human", feature = "datagen"))]
-use alloc::string::ToString;
-use icu_provider::{yoke, zerofrom};
-use regex_automata::dfa::sparse::DFA;
-use regex_automata::dfa::Automaton;
-
-/// A precompiled regex
-#[derive(Clone, Debug, yoke::Yokeable, zerofrom::ZeroFrom)]
-#[allow(clippy::exhaustive_structs)] // not a public API
-pub struct StringMatcher<'data> {
- // Safety: These always represent a valid DFA (DFA::from_bytes(dfa_bytes).is_ok())
- dfa_bytes: Cow<'data, [u8]>,
- pattern: Option<Cow<'data, str>>,
-}
-
-impl PartialEq for StringMatcher<'_> {
- fn eq(&self, other: &Self) -> bool {
- self.dfa_bytes == other.dfa_bytes
- }
-}
-
-#[cfg(feature = "datagen")]
-impl databake::Bake for StringMatcher<'_> {
- fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
- env.insert("icu_list");
- let bytes = (&&*self.dfa_bytes).bake(env);
- // Safe because our own data is safe
- databake::quote! {
- unsafe { ::icu_list::provider::StringMatcher::from_dfa_bytes_unchecked(#bytes) }
- }
- }
-}
-
-#[cfg(feature = "datagen")]
-impl serde::Serialize for StringMatcher<'_> {
- fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
- where
- S: serde::ser::Serializer,
- {
- if serializer.is_human_readable() {
- self.pattern
- .as_ref()
- .map(|pattern| pattern.serialize(serializer))
- .unwrap_or_else(|| {
- use serde::ser::Error;
- Err(S::Error::custom(
- "cannot serialize a deserialized bincode StringMatcher to JSON",
- ))
- })
- } else {
- self.dfa_bytes.serialize(serializer)
- }
- }
-}
-
-#[cfg(feature = "serde")]
-impl<'de: 'data, 'data> serde::Deserialize<'de> for StringMatcher<'data> {
- fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
- where
- D: serde::de::Deserializer<'de>,
- {
- use icu_provider::serde::borrow_de_utils::CowBytesWrap;
-
- #[cfg(feature = "serde_human")]
- if deserializer.is_human_readable() {
- use serde::de::Error;
- return StringMatcher::new(<&str>::deserialize(deserializer)?)
- .map_err(|e| D::Error::custom(e.to_string()));
- }
-
- if cfg!(target_endian = "big") {
- // TODO: Convert LE to BE. For now we just behave like the
- // accept-nothing DFA on BE systems.
- return Ok(StringMatcher {
- dfa_bytes: Cow::Borrowed(&[]),
- pattern: None,
- });
- }
-
- let dfa_bytes = <CowBytesWrap<'de>>::deserialize(deserializer)?.0;
-
- // Verify safety invariant
- DFA::from_bytes(&dfa_bytes).map_err(|e| {
- use serde::de::Error;
- D::Error::custom(alloc::format!("Invalid DFA bytes: {}", e))
- })?;
-
- Ok(StringMatcher {
- dfa_bytes,
- pattern: None,
- })
- }
-}
-
-impl<'data> StringMatcher<'data> {
- /// Creates a `StringMatcher` from a serialized DFA. Used internally by databake.
- ///
- /// # Safety
- ///
- /// `dfa_bytes` has to be a valid DFA (regex_automata::dfa::sparse::DFA::from_bytes(dfa_bytes).is_ok())
- pub const unsafe fn from_dfa_bytes_unchecked(dfa_bytes: &'data [u8]) -> Self {
- Self {
- dfa_bytes: Cow::Borrowed(dfa_bytes),
- pattern: None,
- }
- }
-
- /// Creates a `StringMatcher` from regex.
- #[cfg(any(feature = "datagen", feature = "serde_human",))]
- pub fn new(pattern: &str) -> Result<Self, icu_provider::DataError> {
- use regex_automata::{
- dfa::dense::{Builder, Config},
- SyntaxConfig,
- };
-
- let mut builder = Builder::new();
- let dfa = builder
- .syntax(SyntaxConfig::new().case_insensitive(true))
- .configure(Config::new().anchored(true).minimize(true))
- .build(pattern)
- .map_err(|_| {
- icu_provider::DataError::custom("Cannot build DFA").with_display_context(&pattern)
- })?
- .to_sparse()
- .map_err(|_| {
- icu_provider::DataError::custom("Cannot sparsify DFA")
- .with_display_context(&pattern)
- })?;
-
- Ok(Self {
- dfa_bytes: dfa.to_bytes_little_endian().into(),
- pattern: Some(pattern.to_string().into()),
- })
- }
-
- #[allow(clippy::unwrap_used)] // by invariant
- pub(crate) fn test(&self, string: &str) -> bool {
- cfg!(target_endian = "little")
- && matches!(
- // Safe due to struct invariant.
- unsafe { DFA::from_bytes_unchecked(&self.dfa_bytes).unwrap().0 }
- .find_earliest_fwd(string.as_bytes()),
- Ok(Some(_))
- )
- }
-}
-
-#[cfg(all(test, feature = "datagen"))]
-mod test {
- use super::*;
-
- #[test]
- fn test_string_matcher() {
- let matcher = StringMatcher::new("abc.*").unwrap();
- assert!(!matcher.test("ab"));
- assert!(matcher.test("abc"));
- assert!(matcher.test("abcde"));
- }
-
- #[test]
- fn test_postcard_serialization() {
- let matcher = StringMatcher::new("abc*").unwrap();
-
- let mut bytes = postcard::to_stdvec(&matcher).unwrap();
- assert_eq!(
- postcard::from_bytes::<StringMatcher>(&bytes).unwrap(),
- matcher
- );
-
- // A corrupted byte leads to an error
- bytes[17] ^= 255;
- assert!(postcard::from_bytes::<StringMatcher>(&bytes).is_err());
- bytes[17] ^= 255;
-
- // An extra byte leads to an error
- bytes.insert(123, 40);
- assert!(postcard::from_bytes::<StringMatcher>(&bytes).is_err());
- bytes.remove(123);
-
- // Missing bytes lead to an error
- assert!(postcard::from_bytes::<StringMatcher>(&bytes[0..bytes.len() - 5]).is_err());
- }
-
- #[test]
- #[cfg(feature = "serde_human")]
- fn test_json_serialization() {
- let matcher = StringMatcher::new("abc*").unwrap();
-
- let json = serde_json::to_string(&matcher).unwrap();
- assert_eq!(
- serde_json::from_str::<StringMatcher>(&json).unwrap(),
- matcher
- );
- assert!(serde_json::from_str::<StringMatcher>(".*[").is_err());
- }
-
- #[test]
- #[ignore] // https://github.com/rust-lang/rust/issues/98906
- fn databake() {
- databake::test_bake!(
- StringMatcher,
- const: unsafe {
- crate::provider::StringMatcher::from_dfa_bytes_unchecked(&[49u8, 50u8, 51u8, ])
- },
- icu_list
- );
- }
-}