summaryrefslogtreecommitdiffstats
path: root/vendor/icu_list/src/provider
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--vendor/icu_list/src/provider.rs465
-rw-r--r--vendor/icu_list/src/provider/mod.rs261
-rw-r--r--vendor/icu_list/src/provider/serde_dfa.rs244
3 files changed, 505 insertions, 465 deletions
diff --git a/vendor/icu_list/src/provider.rs b/vendor/icu_list/src/provider.rs
deleted file mode 100644
index 27f3e4fec..000000000
--- a/vendor/icu_list/src/provider.rs
+++ /dev/null
@@ -1,465 +0,0 @@
-// This file is part of ICU4X. For terms of use, please see the file
-// called LICENSE at the top level of the ICU4X source tree
-// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
-
-// Provider structs must be stable
-#![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)]
-
-//! Data provider struct definitions for this ICU4X component.
-//!
-//! Read more about data providers: [`icu_provider`]
-
-use crate::ListLength;
-use alloc::borrow::Cow;
-use icu_provider::DataMarker;
-use icu_provider::{yoke, zerofrom};
-use writeable::{LengthHint, Writeable};
-
-pub use crate::string_matcher::StringMatcher;
-
-/// Symbols and metadata required for [`ListFormatter`](crate::ListFormatter).
-#[icu_provider::data_struct(
- AndListV1Marker = "list/and@1",
- OrListV1Marker = "list/or@1",
- UnitListV1Marker = "list/unit@1"
-)]
-#[derive(Clone, Debug)]
-#[cfg_attr(
- feature = "datagen",
- derive(serde::Serialize, databake::Bake),
- databake(path = icu_list::provider),
-)]
-pub struct ListFormatterPatternsV1<'data>(
- #[cfg_attr(feature = "datagen", serde(with = "deduplicating_array"))]
- /// The patterns in the order start, middle, end, pair, short_start, short_middle,
- /// short_end, short_pair, narrow_start, narrow_middle, narrow_end, narrow_pair,
- pub [ConditionalListJoinerPattern<'data>; 12],
-);
-
-#[cfg(feature = "serde")]
-impl<'de> serde::Deserialize<'de> for ListFormatterPatternsV1<'de> {
- fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
- where
- D: serde::de::Deserializer<'de>,
- {
- #[cfg(not(feature = "serde_human"))]
- if deserializer.is_human_readable() {
- use serde::de::Error;
- return Err(D::Error::custom(
- "Deserializing human-readable ListFormatter data requires the 'serde_human' feature",
- ));
- }
-
- Ok(ListFormatterPatternsV1(deduplicating_array::deserialize(
- deserializer,
- )?))
- }
-}
-
-pub(crate) struct ErasedListV1Marker;
-
-impl DataMarker for ErasedListV1Marker {
- type Yokeable = ListFormatterPatternsV1<'static>;
-}
-
-impl<'data> ListFormatterPatternsV1<'data> {
- pub(crate) fn start(&self, style: ListLength) -> &ConditionalListJoinerPattern<'data> {
- #![allow(clippy::indexing_slicing)] // style as usize < 3
- &self.0[4 * (style as usize)]
- }
-
- pub(crate) fn middle(&self, style: ListLength) -> &ConditionalListJoinerPattern<'data> {
- #![allow(clippy::indexing_slicing)] // style as usize < 3
- &self.0[4 * (style as usize) + 1]
- }
-
- pub(crate) fn end(&self, style: ListLength) -> &ConditionalListJoinerPattern<'data> {
- #![allow(clippy::indexing_slicing)] // style as usize < 3
- &self.0[4 * (style as usize) + 2]
- }
-
- pub(crate) fn pair(&self, style: ListLength) -> &ConditionalListJoinerPattern<'data> {
- #![allow(clippy::indexing_slicing)] // style as usize < 3
- &self.0[4 * (style as usize) + 3]
- }
-
- /// The range of the number of bytes required by the list literals to join a
- /// list of length `len`. If none of the patterns are conditional, this is exact.
- pub(crate) fn size_hint(&self, style: ListLength, len: usize) -> LengthHint {
- match len {
- 0 | 1 => LengthHint::exact(0),
- 2 => self.pair(style).size_hint(),
- n => {
- self.start(style).size_hint()
- + self.middle(style).size_hint() * (n - 3)
- + self.end(style).size_hint()
- }
- }
- }
-}
-
-/// A pattern that can behave conditionally on the next element.
-#[derive(Clone, Debug, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom)]
-#[cfg_attr(
- feature = "datagen",
- derive(serde::Serialize, databake::Bake),
- databake(path = icu_list::provider),
-)]
-#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
-pub struct ConditionalListJoinerPattern<'data> {
- /// The default pattern
- #[cfg_attr(feature = "serde", serde(borrow))]
- pub default: ListJoinerPattern<'data>,
- /// And optional special case
- #[cfg_attr(feature = "serde", serde(borrow))]
- pub special_case: Option<SpecialCasePattern<'data>>,
-}
-
-/// The special case of a [`ConditionalListJoinerPattern`]
-#[derive(Clone, Debug, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom)]
-#[cfg_attr(
- feature = "datagen",
- derive(serde::Serialize, databake::Bake),
- databake(path = icu_list::provider),
-)]
-#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
-pub struct SpecialCasePattern<'data> {
- /// The condition on the following element
- #[cfg_attr(feature = "serde", serde(borrow))]
- pub condition: StringMatcher<'data>,
- /// The pattern if the condition matches
- #[cfg_attr(feature = "serde", serde(borrow))]
- pub pattern: ListJoinerPattern<'data>,
-}
-
-/// A pattern containing two numeric placeholders ("{0}, and {1}.")
-#[derive(Clone, Debug, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom)]
-#[cfg_attr(feature = "datagen", derive(serde::Serialize))]
-pub struct ListJoinerPattern<'data> {
- /// The pattern string without the placeholders
- string: Cow<'data, str>,
- /// The index of the first placeholder. Always <= index_1.
- // Always 0 for CLDR data, so we don't need to serialize it.
- // In-memory we have free space for it as index_1 doesn't
- // fill a word.
- #[cfg_attr(feature = "datagen", serde(skip))]
- index_0: u8,
- /// The index of the second placeholder. Always < string.len().
- index_1: u8,
-}
-
-#[cfg(feature = "serde")]
-impl<'de: 'data, 'data> serde::Deserialize<'de> for ListJoinerPattern<'data> {
- fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
- where
- D: serde::Deserializer<'de>,
- {
- #[derive(serde::Deserialize)]
- struct Dummy<'data> {
- #[cfg_attr(feature = "serde", serde(borrow))]
- string: Cow<'data, str>,
- index_1: u8,
- }
- let Dummy { string, index_1 } = Dummy::deserialize(deserializer)?;
-
- if index_1 as usize > string.len() {
- use serde::de::Error;
- Err(D::Error::custom("invalid index_1"))
- } else {
- Ok(ListJoinerPattern {
- string,
- index_0: 0,
- index_1,
- })
- }
- }
-}
-
-impl<'a> ListJoinerPattern<'a> {
- /// Constructs a [`ListJoinerPattern`] from raw parts. Used by databake.
- ///
- /// # Safety
- /// index_1 may be at most string.len()
- pub const unsafe fn from_parts_unchecked(string: &'a str, index_1: u8) -> Self {
- Self {
- string: Cow::Borrowed(string),
- index_0: 0,
- index_1,
- }
- }
-}
-
-pub(crate) type PatternParts<'a> = (&'a str, &'a str, &'a str);
-
-impl<'a> ConditionalListJoinerPattern<'a> {
- pub(crate) fn parts<'b, W: Writeable + ?Sized>(
- &'a self,
- following_value: &'b W,
- ) -> PatternParts<'a> {
- match &self.special_case {
- Some(SpecialCasePattern { condition, pattern })
- // TODO: Implement lookahead instead of materializing here.
- if condition.test(&*following_value.write_to_string()) =>
- {
- pattern.borrow_tuple()
- }
- _ => self.default.borrow_tuple(),
- }
- }
-
- /// The expected length of this pattern
- pub fn size_hint(&'a self) -> LengthHint {
- let mut hint = self.default.size_hint();
- if let Some(special_case) = &self.special_case {
- hint |= special_case.pattern.size_hint()
- }
- hint
- }
-}
-
-impl<'data> ListJoinerPattern<'data> {
- fn borrow_tuple(&'data self) -> PatternParts<'data> {
- #![allow(clippy::indexing_slicing)] // by invariant
- let index_0 = self.index_0 as usize;
- let index_1 = self.index_1 as usize;
- (
- &self.string[0..index_0],
- &self.string[index_0..index_1],
- &self.string[index_1..],
- )
- }
-
- fn size_hint(&self) -> LengthHint {
- LengthHint::exact(self.string.len())
- }
-}
-
-#[cfg(feature = "datagen")]
-mod datagen {
- #![allow(clippy::indexing_slicing)] // datagen
-
- use super::*;
- use icu_provider::DataError;
-
- impl<'data> ListFormatterPatternsV1<'data> {
- /// The patterns in the order start, middle, end, pair, short_start, short_middle,
- /// short_end, short_pair, narrow_start, narrow_middle, narrow_end, narrow_pair,
- pub fn try_new(patterns: [&str; 12]) -> Result<Self, DataError> {
- Ok(Self([
- ListJoinerPattern::from_str(patterns[0], true, false)?.into(),
- ListJoinerPattern::from_str(patterns[1], false, false)?.into(),
- ListJoinerPattern::from_str(patterns[2], false, true)?.into(),
- ListJoinerPattern::from_str(patterns[3], true, true)?.into(),
- ListJoinerPattern::from_str(patterns[4], true, false)?.into(),
- ListJoinerPattern::from_str(patterns[5], false, false)?.into(),
- ListJoinerPattern::from_str(patterns[6], false, true)?.into(),
- ListJoinerPattern::from_str(patterns[7], true, true)?.into(),
- ListJoinerPattern::from_str(patterns[8], true, false)?.into(),
- ListJoinerPattern::from_str(patterns[9], false, false)?.into(),
- ListJoinerPattern::from_str(patterns[10], false, true)?.into(),
- ListJoinerPattern::from_str(patterns[11], true, true)?.into(),
- ]))
- }
-
- /// Adds a special case to all `pattern`s that will evaluate to
- /// `alternative_pattern` when `regex` matches the following element.
- /// The regex is interpreted case-insensitive and anchored to the beginning, but
- /// to improve efficiency does not search for full matches. If a full match is
- /// required, use `$`.
- pub fn make_conditional(
- &mut self,
- pattern: &str,
- regex: &StringMatcher<'static>,
- alternative_pattern: &str,
- ) -> Result<(), DataError> {
- let old = ListJoinerPattern::from_str(pattern, true, true)?;
- for i in 0..12 {
- if self.0[i].default == old {
- self.0[i].special_case = Some(SpecialCasePattern {
- condition: regex.clone(),
- pattern: ListJoinerPattern::from_str(
- alternative_pattern,
- i % 4 == 0 || i % 4 == 3, // allow_prefix = start or pair
- i % 4 == 2 || i % 4 == 3, // allow_suffix = end or pair
- )?,
- });
- }
- }
- Ok(())
- }
- }
-
- impl<'data> ListJoinerPattern<'data> {
- /// Construct the pattern from a CLDR pattern string
- pub fn from_str(
- pattern: &str,
- allow_prefix: bool,
- allow_suffix: bool,
- ) -> Result<Self, DataError> {
- match (pattern.find("{0}"), pattern.find("{1}")) {
- (Some(index_0), Some(index_1))
- if index_0 < index_1
- && (allow_prefix || index_0 == 0)
- && (allow_suffix || index_1 == pattern.len() - 3) =>
- {
- if (index_0 > 0 && !cfg!(test)) || index_1 - 3 >= 256 {
- return Err(DataError::custom(
- "Found valid pattern that cannot be stored in ListFormatterPatternsV1",
- )
- .with_debug_context(pattern));
- }
- Ok(ListJoinerPattern {
- string: Cow::Owned(alloc::format!(
- "{}{}{}",
- &pattern[0..index_0],
- &pattern[index_0 + 3..index_1],
- &pattern[index_1 + 3..]
- )),
- index_0: index_0 as u8,
- index_1: (index_1 - 3) as u8,
- })
- }
- _ => Err(DataError::custom("Invalid list pattern").with_debug_context(pattern)),
- }
- }
- }
-
- impl<'data> From<ListJoinerPattern<'data>> for ConditionalListJoinerPattern<'data> {
- fn from(default: ListJoinerPattern<'data>) -> Self {
- Self {
- default,
- special_case: None,
- }
- }
- }
-
- impl databake::Bake for ListJoinerPattern<'_> {
- fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
- env.insert("icu_list");
- let string = (&*self.string).bake(env);
- let index_1 = self.index_1.bake(env);
- // Safe because our own data is safe
- databake::quote! { unsafe {
- ::icu_list::provider::ListJoinerPattern::from_parts_unchecked(#string, #index_1)
- }}
- }
- }
-}
-
-#[cfg(all(test, feature = "datagen"))]
-pub(crate) mod test {
- use super::*;
-
- pub fn test_patterns() -> ListFormatterPatternsV1<'static> {
- let mut patterns = ListFormatterPatternsV1::try_new([
- // Wide: general
- "@{0}:{1}",
- "{0},{1}",
- "{0}.{1}!",
- "${0};{1}+",
- // Short: different pattern lengths
- "{0}1{1}",
- "{0}12{1}",
- "{0}12{1}34",
- "{0}123{1}456",
- // Narrow: conditionals
- "{0}: {1}",
- "{0}, {1}",
- "{0}. {1}",
- "{0}. {1}",
- ])
- .unwrap();
- patterns
- .make_conditional("{0}. {1}", &StringMatcher::new("A").unwrap(), "{0} :o {1}")
- .unwrap();
- patterns
- }
-
- #[test]
- fn rejects_bad_patterns() {
- assert!(ListJoinerPattern::from_str("{0} and", true, true).is_err());
- assert!(ListJoinerPattern::from_str("and {1}", true, true).is_err());
- assert!(ListJoinerPattern::from_str("{1} and {0}", true, true).is_err());
- assert!(ListJoinerPattern::from_str("{1{0}}", true, true).is_err());
- assert!(ListJoinerPattern::from_str("{0\u{202e}} and {1}", true, true).is_err());
- assert!(ListJoinerPattern::from_str("{{0}} {{1}}", true, true).is_ok());
-
- assert!(ListJoinerPattern::from_str("{0} and {1} ", true, true).is_ok());
- assert!(ListJoinerPattern::from_str("{0} and {1} ", true, false).is_err());
- assert!(ListJoinerPattern::from_str(" {0} and {1}", true, true).is_ok());
- assert!(ListJoinerPattern::from_str(" {0} and {1}", false, true).is_err());
- }
-
- #[test]
- fn produces_correct_parts() {
- assert_eq!(
- test_patterns().pair(ListLength::Wide).parts(""),
- ("$", ";", "+")
- );
- }
-
- #[test]
- fn produces_correct_parts_conditionally() {
- assert_eq!(
- test_patterns().end(ListLength::Narrow).parts("A"),
- ("", " :o ", "")
- );
- assert_eq!(
- test_patterns().end(ListLength::Narrow).parts("a"),
- ("", " :o ", "")
- );
- assert_eq!(
- test_patterns().end(ListLength::Narrow).parts("ab"),
- ("", " :o ", "")
- );
- assert_eq!(
- test_patterns().end(ListLength::Narrow).parts("B"),
- ("", ". ", "")
- );
- assert_eq!(
- test_patterns().end(ListLength::Narrow).parts("BA"),
- ("", ". ", "")
- );
- }
-
- #[test]
- fn size_hint_works() {
- let pattern = test_patterns();
-
- assert_eq!(
- pattern.size_hint(ListLength::Short, 0),
- LengthHint::exact(0)
- );
- assert_eq!(
- pattern.size_hint(ListLength::Short, 1),
- LengthHint::exact(0)
- );
-
- // pair pattern "{0}123{1}456"
- assert_eq!(
- pattern.size_hint(ListLength::Short, 2),
- LengthHint::exact(6)
- );
-
- // patterns "{0}1{1}", "{0}12{1}" (x197), and "{0}12{1}34"
- assert_eq!(
- pattern.size_hint(ListLength::Short, 200),
- LengthHint::exact(1 + 2 * 197 + 4)
- );
-
- // patterns "{0}: {1}", "{0}, {1}" (x197), and "{0} :o {1}" or "{0}. {1}"
- assert_eq!(
- pattern.size_hint(ListLength::Narrow, 200),
- LengthHint::exact(2 + 197 * 2) + LengthHint::between(2, 4)
- );
- }
-
- #[test]
- fn databake() {
- databake::test_bake!(
- ListJoinerPattern,
- const: unsafe { crate::provider::ListJoinerPattern::from_parts_unchecked(", ", 2u8) },
- icu_list
- );
- }
-}
diff --git a/vendor/icu_list/src/provider/mod.rs b/vendor/icu_list/src/provider/mod.rs
new file mode 100644
index 000000000..efab7c8bc
--- /dev/null
+++ b/vendor/icu_list/src/provider/mod.rs
@@ -0,0 +1,261 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+// Provider structs must be stable
+#![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)]
+
+//! 🚧 \[Unstable\] Data provider struct definitions for this ICU4X component.
+//!
+//! <div class="stab unstable">
+//! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+//! including in SemVer minor releases. While the serde representation of data structs is guaranteed
+//! to be stable, their Rust representation might not be. Use with caution.
+//! </div>
+//!
+//! Read more about data providers: [`icu_provider`]
+
+use crate::ListLength;
+use alloc::borrow::Cow;
+use icu_provider::DataMarker;
+use icu_provider::{yoke, zerofrom};
+
+mod serde_dfa;
+pub use serde_dfa::SerdeDFA;
+
+/// Symbols and metadata required for [`ListFormatter`](crate::ListFormatter).
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+#[icu_provider::data_struct(
+ AndListV1Marker = "list/and@1",
+ OrListV1Marker = "list/or@1",
+ UnitListV1Marker = "list/unit@1"
+)]
+#[derive(Clone, Debug)]
+#[cfg_attr(
+ feature = "datagen",
+ derive(serde::Serialize, databake::Bake),
+ databake(path = icu_list::provider),
+)]
+pub struct ListFormatterPatternsV1<'data>(
+ #[cfg_attr(feature = "datagen", serde(with = "deduplicating_array"))]
+ /// The patterns in the order start, middle, end, pair, short_start, short_middle,
+ /// short_end, short_pair, narrow_start, narrow_middle, narrow_end, narrow_pair,
+ pub [ConditionalListJoinerPattern<'data>; 12],
+);
+
+#[cfg(feature = "serde")]
+impl<'de> serde::Deserialize<'de> for ListFormatterPatternsV1<'de> {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: serde::de::Deserializer<'de>,
+ {
+ #[cfg(not(feature = "serde_human"))]
+ if deserializer.is_human_readable() {
+ use serde::de::Error;
+ return Err(D::Error::custom(
+ "Deserializing human-readable ListFormatter data requires the 'serde_human' feature",
+ ));
+ }
+
+ Ok(ListFormatterPatternsV1(deduplicating_array::deserialize(
+ deserializer,
+ )?))
+ }
+}
+
+pub(crate) struct ErasedListV1Marker;
+
+impl DataMarker for ErasedListV1Marker {
+ type Yokeable = ListFormatterPatternsV1<'static>;
+}
+
+impl<'data> ListFormatterPatternsV1<'data> {
+ pub(crate) fn start(&self, style: ListLength) -> &ConditionalListJoinerPattern<'data> {
+ #![allow(clippy::indexing_slicing)] // style as usize < 3
+ &self.0[4 * (style as usize)]
+ }
+
+ pub(crate) fn middle(&self, style: ListLength) -> &ConditionalListJoinerPattern<'data> {
+ #![allow(clippy::indexing_slicing)] // style as usize < 3
+ &self.0[4 * (style as usize) + 1]
+ }
+
+ pub(crate) fn end(&self, style: ListLength) -> &ConditionalListJoinerPattern<'data> {
+ #![allow(clippy::indexing_slicing)] // style as usize < 3
+ &self.0[4 * (style as usize) + 2]
+ }
+
+ pub(crate) fn pair(&self, style: ListLength) -> &ConditionalListJoinerPattern<'data> {
+ #![allow(clippy::indexing_slicing)] // style as usize < 3
+ &self.0[4 * (style as usize) + 3]
+ }
+}
+
+/// A pattern that can behave conditionally on the next element.
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+#[derive(Clone, Debug, yoke::Yokeable, zerofrom::ZeroFrom)]
+#[cfg_attr(
+ feature = "datagen",
+ derive(PartialEq, serde::Serialize, databake::Bake),
+ databake(path = icu_list::provider),
+)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
+pub struct ConditionalListJoinerPattern<'data> {
+ /// The default pattern
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub default: ListJoinerPattern<'data>,
+ /// And optional special case
+ #[cfg_attr(
+ feature = "serde",
+ serde(borrow, deserialize_with = "SpecialCasePattern::deserialize_option")
+ )]
+ pub special_case: Option<SpecialCasePattern<'data>>,
+}
+
+/// The special case of a [`ConditionalListJoinerPattern`]
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+#[derive(Clone, Debug, yoke::Yokeable, zerofrom::ZeroFrom)]
+#[cfg_attr(
+ feature = "datagen",
+ derive(PartialEq, serde::Serialize, databake::Bake),
+ databake(path = icu_list::provider),
+)]
+pub struct SpecialCasePattern<'data> {
+ /// The condition on the following element
+ pub condition: SerdeDFA<'data>,
+ /// The pattern if the condition matches
+ pub pattern: ListJoinerPattern<'data>,
+}
+
+#[cfg(feature = "serde")]
+impl<'data> SpecialCasePattern<'data> {
+ // If the condition doesn't deserialize, the whole special case becomes `None`
+ fn deserialize_option<'de: 'data, D>(deserializer: D) -> Result<Option<Self>, D::Error>
+ where
+ D: serde::de::Deserializer<'de>,
+ {
+ use serde::Deserialize;
+
+ #[derive(Deserialize)]
+ struct SpecialCasePatternOptionalDfa<'data> {
+ #[cfg_attr(
+ feature = "serde",
+ serde(borrow, deserialize_with = "SerdeDFA::maybe_deserialize")
+ )]
+ pub condition: Option<SerdeDFA<'data>>,
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ pub pattern: ListJoinerPattern<'data>,
+ }
+
+ Ok(
+ match Option::<SpecialCasePatternOptionalDfa<'data>>::deserialize(deserializer)? {
+ Some(SpecialCasePatternOptionalDfa {
+ condition: Some(condition),
+ pattern,
+ }) => Some(SpecialCasePattern { condition, pattern }),
+ _ => None,
+ },
+ )
+ }
+}
+
+/// A pattern containing two numeric placeholders ("{0}, and {1}.")
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+#[derive(Clone, Debug, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom)]
+#[cfg_attr(feature = "datagen", derive(serde::Serialize))]
+pub struct ListJoinerPattern<'data> {
+ /// The pattern string without the placeholders
+ pub(crate) string: Cow<'data, str>,
+ /// The index of the first placeholder. Always <= index_1.
+ // Always 0 for CLDR data, so we don't need to serialize it.
+ // In-memory we have free space for it as index_1 doesn't
+ // fill a word.
+ #[cfg_attr(feature = "datagen", serde(skip))]
+ pub(crate) index_0: u8,
+ /// The index of the second placeholder. Always < string.len().
+ pub(crate) index_1: u8,
+}
+
+#[cfg(feature = "serde")]
+impl<'de: 'data, 'data> serde::Deserialize<'de> for ListJoinerPattern<'data> {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: serde::Deserializer<'de>,
+ {
+ #[derive(serde::Deserialize)]
+ struct Dummy<'data> {
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ string: Cow<'data, str>,
+ index_1: u8,
+ }
+ let Dummy { string, index_1 } = Dummy::deserialize(deserializer)?;
+
+ if index_1 as usize > string.len() {
+ use serde::de::Error;
+ Err(D::Error::custom("invalid index_1"))
+ } else {
+ Ok(ListJoinerPattern {
+ string,
+ index_0: 0,
+ index_1,
+ })
+ }
+ }
+}
+
+impl<'a> ListJoinerPattern<'a> {
+ /// Constructs a [`ListJoinerPattern`] from raw parts. Used by databake.
+ ///
+ /// # Safety
+ /// index_1 may be at most string.len()
+ pub const unsafe fn from_parts_unchecked(string: &'a str, index_1: u8) -> Self {
+ Self {
+ string: Cow::Borrowed(string),
+ index_0: 0,
+ index_1,
+ }
+ }
+}
+
+#[cfg(feature = "datagen")]
+impl databake::Bake for ListJoinerPattern<'_> {
+ fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
+ env.insert("icu_list");
+ let string = (&*self.string).bake(env);
+ let index_1 = self.index_1.bake(env);
+ // Safe because our own data is safe
+ databake::quote! { unsafe {
+ ::icu_list::provider::ListJoinerPattern::from_parts_unchecked(#string, #index_1)
+ }}
+ }
+}
+
+#[cfg(all(test, feature = "datagen"))]
+#[test]
+fn databake() {
+ databake::test_bake!(
+ ListJoinerPattern,
+ const: unsafe { crate::provider::ListJoinerPattern::from_parts_unchecked(", ", 2u8) },
+ icu_list
+ );
+}
diff --git a/vendor/icu_list/src/provider/serde_dfa.rs b/vendor/icu_list/src/provider/serde_dfa.rs
new file mode 100644
index 000000000..e2424e1e9
--- /dev/null
+++ b/vendor/icu_list/src/provider/serde_dfa.rs
@@ -0,0 +1,244 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use alloc::borrow::Cow;
+use icu_provider::{yoke, zerofrom};
+use regex_automata::dfa::sparse::DFA;
+
+/// A serde-compatible version of [regex_automata::dfa::sparse::DFA]. This does not implement
+/// [`serde::Deserialize`] directly, as binary deserialization is not supported in big-endian
+/// platforms. `Self::maybe_deserialize` can be used to deserialize to `Option<SerdeDFA>`.
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+#[derive(Clone, Debug, yoke::Yokeable, zerofrom::ZeroFrom)]
+pub struct SerdeDFA<'data> {
+ // Safety: These always represent a valid DFA (DFA::from_bytes(dfa_bytes).is_ok())
+ dfa_bytes: Cow<'data, [u8]>,
+ pattern: Option<Cow<'data, str>>,
+}
+
+#[cfg(feature = "datagen")]
+impl PartialEq for SerdeDFA<'_> {
+ fn eq(&self, other: &Self) -> bool {
+ self.dfa_bytes == other.dfa_bytes
+ }
+}
+
+#[cfg(feature = "datagen")]
+impl databake::Bake for SerdeDFA<'_> {
+ fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
+ env.insert("icu_list");
+ let le_bytes = self.deref().to_bytes_little_endian().as_slice().bake(env);
+ let be_bytes = self.deref().to_bytes_big_endian().as_slice().bake(env);
+ // Safe because of `to_bytes_little_endian`/`to_bytes_big_endian`'s invariant.
+ databake::quote! {
+ unsafe {
+ ::icu_list::provider::SerdeDFA::from_dfa_bytes_unchecked(
+ if cfg!(target_endian = "little") {
+ &#le_bytes
+ } else {
+ &#be_bytes
+ }
+ )
+ }
+ }
+ }
+}
+
+#[cfg(feature = "datagen")]
+impl serde::Serialize for SerdeDFA<'_> {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::ser::Serializer,
+ {
+ if serializer.is_human_readable() {
+ self.pattern
+ .as_ref()
+ .map(|pattern| pattern.serialize(serializer))
+ .unwrap_or_else(|| {
+ use serde::ser::Error;
+ Err(S::Error::custom(
+ "cannot serialize a deserialized bincode SerdeDFA to JSON",
+ ))
+ })
+ } else {
+ self.deref().to_bytes_little_endian().serialize(serializer)
+ }
+ }
+}
+
+#[cfg(feature = "serde")]
+impl<'data> SerdeDFA<'data> {
+ /// Deserializes to `Option<Self>`. Will return `None` for non-human-readable serialization
+ /// formats on big-endian systems, as `regex_automata` serialization is endian-sensitive.
+ pub fn maybe_deserialize<'de: 'data, D>(deserializer: D) -> Result<Option<Self>, D::Error>
+ where
+ D: serde::de::Deserializer<'de>,
+ {
+ use icu_provider::serde::borrow_de_utils::CowBytesWrap;
+ use serde::Deserialize;
+
+ #[cfg(feature = "serde_human")]
+ if deserializer.is_human_readable() {
+ #[cfg(not(feature = "std"))]
+ use alloc::string::ToString;
+ use serde::de::Error;
+ return SerdeDFA::new(Cow::<str>::deserialize(deserializer)?)
+ .map(Some)
+ .map_err(|e| D::Error::custom(e.to_string()));
+ }
+
+ let dfa_bytes = <CowBytesWrap<'de>>::deserialize(deserializer)?.0;
+
+ if cfg!(target_endian = "big") {
+ return Ok(None);
+ }
+
+ // Verify safety invariant
+ DFA::from_bytes(&dfa_bytes).map_err(|e| {
+ use serde::de::Error;
+ D::Error::custom(alloc::format!("Invalid DFA bytes: {}", e))
+ })?;
+
+ Ok(Some(SerdeDFA {
+ dfa_bytes,
+ pattern: None,
+ }))
+ }
+}
+
+impl<'data> SerdeDFA<'data> {
+ /// Creates a `SerdeDFA` from raw bytes. Used internally by databake.
+ ///
+ /// # Safety
+ ///
+ /// `dfa_bytes` has to be a valid DFA (regex_automata::dfa::sparse::DFA::from_bytes(dfa_bytes).is_ok())
+ pub const unsafe fn from_dfa_bytes_unchecked(dfa_bytes: &'data [u8]) -> Self {
+ Self {
+ dfa_bytes: Cow::Borrowed(dfa_bytes),
+ pattern: None,
+ }
+ }
+
+ /// Creates a `SerdeDFA` from a regex.
+ #[cfg(any(feature = "datagen", feature = "serde_human",))]
+ pub fn new(pattern: Cow<'data, str>) -> Result<Self, icu_provider::DataError> {
+ use regex_automata::{
+ dfa::dense::{Builder, Config},
+ SyntaxConfig,
+ };
+
+ let mut builder = Builder::new();
+ let dfa = builder
+ .syntax(SyntaxConfig::new().case_insensitive(true))
+ .configure(Config::new().anchored(true).minimize(true))
+ .build(&pattern)
+ .map_err(|_| {
+ icu_provider::DataError::custom("Cannot build DFA").with_display_context(&pattern)
+ })?
+ .to_sparse()
+ .map_err(|_| {
+ icu_provider::DataError::custom("Cannot sparsify DFA")
+ .with_display_context(&pattern)
+ })?;
+
+ Ok(Self {
+ dfa_bytes: dfa.to_bytes_native_endian().into(),
+ pattern: Some(pattern),
+ })
+ }
+
+ /// Returns the represented [`DFA`]
+ #[allow(clippy::unwrap_used)] // by invariant
+ pub fn deref(&'data self) -> DFA<&'data [u8]> {
+ // Safe due to struct invariant.
+ unsafe { DFA::from_bytes_unchecked(&self.dfa_bytes).unwrap().0 }
+ }
+}
+
+#[cfg(all(test, feature = "datagen"))]
+mod test {
+ use super::*;
+
+ #[test]
+ fn test_serde_dfa() {
+ use regex_automata::dfa::Automaton;
+
+ let matcher = SerdeDFA::new(Cow::Borrowed("abc")).unwrap();
+
+ assert!(matcher.deref().find_earliest_fwd(b"ab").unwrap().is_none());
+ assert!(matcher.deref().find_earliest_fwd(b"abc").unwrap().is_some());
+ assert!(matcher
+ .deref()
+ .find_earliest_fwd(b"abcde")
+ .unwrap()
+ .is_some());
+ assert!(matcher
+ .deref()
+ .find_earliest_fwd(b" abcde")
+ .unwrap()
+ .is_none());
+ }
+
+ #[derive(serde::Deserialize)]
+ struct OptionSerdeDFA<'data>(
+ #[serde(borrow, deserialize_with = "SerdeDFA::maybe_deserialize")] Option<SerdeDFA<'data>>,
+ );
+
+ #[test]
+ #[cfg(target_endian = "little")]
+ fn test_postcard_serialization() {
+ let matcher = SerdeDFA::new(Cow::Borrowed("abc*")).unwrap();
+
+ let mut bytes = postcard::to_stdvec(&matcher).unwrap();
+ assert_eq!(
+ postcard::from_bytes::<OptionSerdeDFA>(&bytes).unwrap().0,
+ Some(matcher)
+ );
+
+ // A corrupted byte leads to an error
+ bytes[17] ^= 255;
+ assert!(postcard::from_bytes::<OptionSerdeDFA>(&bytes).is_err());
+ bytes[17] ^= 255;
+
+ // An extra byte leads to an error
+ bytes.insert(123, 40);
+ assert!(postcard::from_bytes::<OptionSerdeDFA>(&bytes).is_err());
+ bytes.remove(123);
+
+ // Missing bytes lead to an error
+ assert!(postcard::from_bytes::<OptionSerdeDFA>(&bytes[0..bytes.len() - 5]).is_err());
+ }
+
+ #[test]
+ #[cfg(feature = "serde_human")]
+ fn test_json_serialization() {
+ let matcher = SerdeDFA::new(Cow::Borrowed("abc*")).unwrap();
+
+ let json = serde_json::to_string(&matcher).unwrap();
+ assert_eq!(
+ serde_json::from_str::<OptionSerdeDFA>(&json).unwrap().0,
+ Some(matcher)
+ );
+ assert!(serde_json::from_str::<OptionSerdeDFA>(".*[").is_err());
+ }
+
+ #[test]
+ #[ignore] // https://github.com/rust-lang/rust/issues/98906
+ fn databake() {
+ databake::test_bake!(
+ SerdeDFA,
+ const: unsafe { crate::provider::SerdeDFA::from_dfa_bytes_unchecked(if cfg!(target_endian = "little") {
+ &[1] // TODO: set this when activating the test
+ } else {
+ &[2] // TODO: set this when activating the test
+ })},
+ icu_list
+ );
+ }
+}