Merging upstream version 1.69.0+dfsg1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-17 12:19:50 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-17 12:19:50 +0000
commit: 2e00214b3efbdfeefaa0fe9e8b8fd519de7adc35 (patch)
tree: d325add32978dbdc1db975a438b3a77d571b1ab8 /vendor/icu_list/src
parent: Releasing progress-linux version 1.68.2+dfsg1-1~progress7.99u1. (diff)
download: rustc-2e00214b3efbdfeefaa0fe9e8b8fd519de7adc35.tar.xz
rustc-2e00214b3efbdfeefaa0fe9e8b8fd519de7adc35.zip
8 files changed, 909 insertions, 682 deletions
diff --git a/vendor/icu_list/src/lazy_automaton.rs b/vendor/icu_list/src/lazy_automaton.rs
new file mode 100644
index 000000000..3431b3c9d
--- /dev/null
+++ b/vendor/icu_list/src/lazy_automaton.rs
@@ -0,0 +1,79 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use regex_automata::dfa::sparse::DFA;
+use regex_automata::dfa::Automaton;
+use regex_automata::util::id::StateID;
+use writeable::Writeable;
+
+pub trait LazyAutomaton: Automaton {
+    // Like Automaton::find_earliest_fwd, but doesn't require a materialized string.
+    fn matches_earliest_fwd_lazy<S: Writeable + ?Sized>(&self, haystack: &S) -> bool;
+}
+
+impl<T: AsRef<[u8]>> LazyAutomaton for DFA<T> {
+    fn matches_earliest_fwd_lazy<S: Writeable + ?Sized>(&self, haystack: &S) -> bool {
+        struct DFAStepper<'a> {
+            dfa: &'a DFA<&'a [u8]>,
+            state: StateID,
+        }
+
+        impl core::fmt::Write for DFAStepper<'_> {
+            fn write_str(&mut self, s: &str) -> core::fmt::Result {
+                for &byte in s.as_bytes() {
+                    self.state = self.dfa.next_state(self.state, byte);
+                    if self.dfa.is_match_state(self.state) || self.dfa.is_dead_state(self.state) {
+                        // We matched or are in a no-match-cycle, return early
+                        return Err(core::fmt::Error);
+                    }
+                }
+                Ok(())
+            }
+        }
+
+        let mut stepper = DFAStepper {
+            // If start == 0 the start state does not depend on the actual string, so
+            // we can just pass an empty slice.
+            state: self.start_state_forward(None, &[], 0, 0),
+            dfa: &self.as_ref(),
+        };
+
+        if haystack.write_to(&mut stepper).is_ok() {
+            stepper.state = self.next_eoi_state(stepper.state);
+        }
+
+        self.is_match_state(stepper.state)
+    }
+}
+
+#[cfg(test)]
+#[test]
+fn test() {
+    use crate::provider::SerdeDFA;
+    use alloc::borrow::Cow;
+
+    let matcher = SerdeDFA::new(Cow::Borrowed("11(000)*$")).unwrap();
+
+    for writeable in [1i32, 11, 110, 11000, 211000] {
+        assert_eq!(
+            matcher
+                .deref()
+                .find_earliest_fwd(writeable.write_to_string().as_bytes())
+                .unwrap()
+                .is_some(),
+            matcher.deref().matches_earliest_fwd_lazy(&writeable)
+        );
+    }
+
+    struct ExitEarlyTest;
+
+    impl writeable::Writeable for ExitEarlyTest {
+        fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
+            sink.write_str("12")?;
+            unreachable!()
+        }
+    }
+
+    assert!(!matcher.deref().matches_earliest_fwd_lazy(&ExitEarlyTest));
+}
diff --git a/vendor/icu_list/src/lib.rs b/vendor/icu_list/src/lib.rs
index 18f2156a6..61aec0fa3 100644
--- a/vendor/icu_list/src/lib.rs
+++ b/vendor/icu_list/src/lib.rs
@@ -93,8 +93,9 @@
 extern crate alloc;
 
 mod error;
+mod lazy_automaton;
 mod list_formatter;
-mod string_matcher;
+mod patterns;
 
 pub mod provider;
 
diff --git a/vendor/icu_list/src/list_formatter.rs b/vendor/icu_list/src/list_formatter.rs
index 36f5fbb7b..93f035eab 100644
--- a/vendor/icu_list/src/list_formatter.rs
+++ b/vendor/icu_list/src/list_formatter.rs
@@ -72,8 +72,39 @@ impl ListFormatter {
     );
 
     /// Returns a [`Writeable`] composed of the input [`Writeable`]s and the language-dependent
-    /// formatting. The first layer of parts contains [`parts::ELEMENT`] for input
-    /// elements, and [`parts::LITERAL`] for list literals.
+    /// formatting.
+    ///
+    /// The [`Writeable`] is annotated with [`parts::ELEMENT`] for input elements,
+    /// and [`parts::LITERAL`] for list literals.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use icu::list::*;
+    /// # use icu::locid::locale;
+    /// # use writeable::*;
+    /// let formatteur = ListFormatter::try_new_and_with_length_unstable(
+    ///     &icu_testdata::unstable(),
+    ///     &locale!("fr").into(),
+    ///     ListLength::Wide,
+    /// )
+    /// .unwrap();
+    /// let pays = ["Italie", "France", "Espagne", "Allemagne"];
+    ///
+    /// assert_writeable_parts_eq!(
+    ///     formatteur.format(pays.iter()),
+    ///     "Italie, France, Espagne et Allemagne",
+    ///     [
+    ///         (0, 6, parts::ELEMENT),
+    ///         (6, 8, parts::LITERAL),
+    ///         (8, 14, parts::ELEMENT),
+    ///         (14, 16, parts::LITERAL),
+    ///         (16, 23, parts::ELEMENT),
+    ///         (23, 27, parts::LITERAL),
+    ///         (27, 36, parts::ELEMENT),
+    ///     ]
+    /// );
+    /// ```
     pub fn format<'a, W: Writeable + 'a, I: Iterator<Item = W> + Clone + 'a>(
         &'a self,
         values: I,
@@ -99,6 +130,9 @@ pub mod parts {
     use writeable::Part;
 
     /// The [`Part`] used by [`FormattedList`](super::FormattedList) to mark the part of the string that is an element.
+    ///
+    /// * `category`: `"list"`
+    /// * `value`: `"element"`
     pub const ELEMENT: Part = Part {
         category: "list",
         value: "element",
@@ -106,6 +140,9 @@ pub mod parts {
 
     /// The [`Part`] used by [`FormattedList`](super::FormattedList) to mark the part of the string that is a list literal,
     /// such as ", " or " and ".
+    ///
+    /// * `category`: `"list"`
+    /// * `value`: `"literal"`
     pub const LITERAL: Part = Part {
         category: "list",
         value: "literal",
@@ -234,7 +271,7 @@ mod tests {
 
     fn formatter(length: ListLength) -> ListFormatter {
         ListFormatter {
-            data: DataPayload::from_owned(crate::provider::test::test_patterns()),
+            data: DataPayload::from_owned(crate::patterns::test::test_patterns()),
             length,
         }
     }
diff --git a/vendor/icu_list/src/patterns.rs b/vendor/icu_list/src/patterns.rs
new file mode 100644
index 000000000..8cfcb98c1
--- /dev/null
+++ b/vendor/icu_list/src/patterns.rs
@@ -0,0 +1,283 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::lazy_automaton::LazyAutomaton;
+use crate::provider::*;
+use crate::ListLength;
+#[cfg(feature = "datagen")]
+use alloc::borrow::Cow;
+#[cfg(feature = "datagen")]
+use icu_provider::DataError;
+use writeable::{LengthHint, Writeable};
+
+impl<'data> ListFormatterPatternsV1<'data> {
+    /// Creates a new [`ListFormatterPatternsV1`] from the given patterns. Fails if any pattern is invalid.
+    ///
+    /// See [`ListJoinerPattern::from_str`]. `allow_prefix` will be true for `pair` and `end` patterns,
+    /// `allow_suffix` for `start` and `pair` patterns.
+    #[cfg(feature = "datagen")]
+    pub fn try_new(
+        [start, middle, end, pair, short_start, short_middle, short_end, short_pair, narrow_start, narrow_middle, narrow_end, narrow_pair]: [&str; 12],
+    ) -> Result<Self, DataError> {
+        Ok(Self([
+            ListJoinerPattern::from_str(start, true, false)?.into(),
+            ListJoinerPattern::from_str(middle, false, false)?.into(),
+            ListJoinerPattern::from_str(end, false, true)?.into(),
+            ListJoinerPattern::from_str(pair, true, true)?.into(),
+            ListJoinerPattern::from_str(short_start, true, false)?.into(),
+            ListJoinerPattern::from_str(short_middle, false, false)?.into(),
+            ListJoinerPattern::from_str(short_end, false, true)?.into(),
+            ListJoinerPattern::from_str(short_pair, true, true)?.into(),
+            ListJoinerPattern::from_str(narrow_start, true, false)?.into(),
+            ListJoinerPattern::from_str(narrow_middle, false, false)?.into(),
+            ListJoinerPattern::from_str(narrow_end, false, true)?.into(),
+            ListJoinerPattern::from_str(narrow_pair, true, true)?.into(),
+        ]))
+    }
+
+    /// Adds a special case to all `pattern`s that will evaluate to
+    /// `alternative_pattern` when `regex` matches the following element.
+    /// The regex is interpreted case-insensitive and anchored to the beginning, but
+    /// to improve efficiency does not search for full matches. If a full match is
+    /// required, use `$`.
+    #[cfg(feature = "datagen")]
+    pub fn make_conditional(
+        &mut self,
+        pattern: &str,
+        regex: &SerdeDFA<'static>,
+        alternative_pattern: &str,
+    ) -> Result<(), DataError> {
+        let old = ListJoinerPattern::from_str(pattern, true, true)?;
+        for i in 0..12 {
+            #[allow(clippy::indexing_slicing)] // self.0 is &[_; 12]
+            if self.0[i].default == old {
+                self.0[i].special_case = Some(SpecialCasePattern {
+                    condition: regex.clone(),
+                    pattern: ListJoinerPattern::from_str(
+                        alternative_pattern,
+                        i % 4 == 0 || i % 4 == 3, // allow_prefix = start or pair
+                        i % 4 == 2 || i % 4 == 3, // allow_suffix = end or pair
+                    )?,
+                });
+            }
+        }
+        Ok(())
+    }
+
+    /// The range of the number of bytes required by the list literals to join a
+    /// list of length `len`. If none of the patterns are conditional, this is exact.
+    pub(crate) fn size_hint(&self, style: ListLength, len: usize) -> LengthHint {
+        match len {
+            0 | 1 => LengthHint::exact(0),
+            2 => self.pair(style).size_hint(),
+            n => {
+                self.start(style).size_hint()
+                    + self.middle(style).size_hint() * (n - 3)
+                    + self.end(style).size_hint()
+            }
+        }
+    }
+}
+
+type PatternParts<'a> = (&'a str, &'a str, &'a str);
+
+impl<'a> ConditionalListJoinerPattern<'a> {
+    pub(crate) fn parts<'b, W: Writeable + ?Sized>(
+        &'a self,
+        following_value: &'b W,
+    ) -> PatternParts<'a> {
+        match &self.special_case {
+            Some(SpecialCasePattern { condition, pattern })
+                if condition.deref().matches_earliest_fwd_lazy(following_value) =>
+            {
+                pattern.borrow_tuple()
+            }
+            _ => self.default.borrow_tuple(),
+        }
+    }
+
+    /// The expected length of this pattern
+    fn size_hint(&'a self) -> LengthHint {
+        let mut hint = self.default.size_hint();
+        if let Some(special_case) = &self.special_case {
+            hint |= special_case.pattern.size_hint()
+        }
+        hint
+    }
+}
+
+impl<'data> ListJoinerPattern<'data> {
+    /// Construct the pattern from a CLDR pattern string
+    #[cfg(feature = "datagen")]
+    pub fn from_str(
+        pattern: &str,
+        allow_prefix: bool,
+        allow_suffix: bool,
+    ) -> Result<Self, DataError> {
+        match (pattern.find("{0}"), pattern.find("{1}")) {
+            (Some(index_0), Some(index_1))
+                if index_0 < index_1
+                    && (allow_prefix || index_0 == 0)
+                    && (allow_suffix || index_1 == pattern.len() - 3) =>
+            {
+                if (index_0 > 0 && !cfg!(test)) || index_1 - 3 >= 256 {
+                    return Err(DataError::custom(
+                        "Found valid pattern that cannot be stored in ListFormatterPatternsV1",
+                    )
+                    .with_debug_context(pattern));
+                }
+                #[allow(clippy::indexing_slicing)] // find
+                Ok(ListJoinerPattern {
+                    string: Cow::Owned(alloc::format!(
+                        "{}{}{}",
+                        &pattern[0..index_0],
+                        &pattern[index_0 + 3..index_1],
+                        &pattern[index_1 + 3..]
+                    )),
+                    index_0: index_0 as u8,
+                    index_1: (index_1 - 3) as u8,
+                })
+            }
+            _ => Err(DataError::custom("Invalid list pattern").with_debug_context(pattern)),
+        }
+    }
+
+    fn borrow_tuple(&'data self) -> PatternParts<'data> {
+        #![allow(clippy::indexing_slicing)] // by invariant
+        let index_0 = self.index_0 as usize;
+        let index_1 = self.index_1 as usize;
+        (
+            &self.string[0..index_0],
+            &self.string[index_0..index_1],
+            &self.string[index_1..],
+        )
+    }
+
+    fn size_hint(&self) -> LengthHint {
+        LengthHint::exact(self.string.len())
+    }
+}
+
+#[cfg(feature = "datagen")]
+impl<'data> From<ListJoinerPattern<'data>> for ConditionalListJoinerPattern<'data> {
+    fn from(default: ListJoinerPattern<'data>) -> Self {
+        Self {
+            default,
+            special_case: None,
+        }
+    }
+}
+
+#[cfg(all(test, feature = "datagen"))]
+pub mod test {
+    use super::*;
+
+    pub fn test_patterns() -> ListFormatterPatternsV1<'static> {
+        let mut patterns = ListFormatterPatternsV1::try_new([
+            // Wide: general
+            "@{0}:{1}",
+            "{0},{1}",
+            "{0}.{1}!",
+            "${0};{1}+",
+            // Short: different pattern lengths
+            "{0}1{1}",
+            "{0}12{1}",
+            "{0}12{1}34",
+            "{0}123{1}456",
+            // Narrow: conditionals
+            "{0}: {1}",
+            "{0}, {1}",
+            "{0}. {1}",
+            "{0}. {1}",
+        ])
+        .unwrap();
+        patterns
+            .make_conditional(
+                "{0}. {1}",
+                &SerdeDFA::new(Cow::Borrowed("A")).unwrap(),
+                "{0} :o {1}",
+            )
+            .unwrap();
+        patterns
+    }
+
+    #[test]
+    fn rejects_bad_patterns() {
+        assert!(ListJoinerPattern::from_str("{0} and", true, true).is_err());
+        assert!(ListJoinerPattern::from_str("and {1}", true, true).is_err());
+        assert!(ListJoinerPattern::from_str("{1} and {0}", true, true).is_err());
+        assert!(ListJoinerPattern::from_str("{1{0}}", true, true).is_err());
+        assert!(ListJoinerPattern::from_str("{0\u{202e}} and {1}", true, true).is_err());
+        assert!(ListJoinerPattern::from_str("{{0}} {{1}}", true, true).is_ok());
+
+        assert!(ListJoinerPattern::from_str("{0} and {1} ", true, true).is_ok());
+        assert!(ListJoinerPattern::from_str("{0} and {1} ", true, false).is_err());
+        assert!(ListJoinerPattern::from_str(" {0} and {1}", true, true).is_ok());
+        assert!(ListJoinerPattern::from_str(" {0} and {1}", false, true).is_err());
+    }
+
+    #[test]
+    fn produces_correct_parts() {
+        assert_eq!(
+            test_patterns().pair(ListLength::Wide).parts(""),
+            ("$", ";", "+")
+        );
+    }
+
+    #[test]
+    fn produces_correct_parts_conditionally() {
+        assert_eq!(
+            test_patterns().end(ListLength::Narrow).parts("A"),
+            ("", " :o ", "")
+        );
+        assert_eq!(
+            test_patterns().end(ListLength::Narrow).parts("a"),
+            ("", " :o ", "")
+        );
+        assert_eq!(
+            test_patterns().end(ListLength::Narrow).parts("ab"),
+            ("", " :o ", "")
+        );
+        assert_eq!(
+            test_patterns().end(ListLength::Narrow).parts("B"),
+            ("", ". ", "")
+        );
+        assert_eq!(
+            test_patterns().end(ListLength::Narrow).parts("BA"),
+            ("", ". ", "")
+        );
+    }
+
+    #[test]
+    fn size_hint_works() {
+        let pattern = test_patterns();
+
+        assert_eq!(
+            pattern.size_hint(ListLength::Short, 0),
+            LengthHint::exact(0)
+        );
+        assert_eq!(
+            pattern.size_hint(ListLength::Short, 1),
+            LengthHint::exact(0)
+        );
+
+        // pair pattern "{0}123{1}456"
+        assert_eq!(
+            pattern.size_hint(ListLength::Short, 2),
+            LengthHint::exact(6)
+        );
+
+        // patterns "{0}1{1}", "{0}12{1}" (x197), and "{0}12{1}34"
+        assert_eq!(
+            pattern.size_hint(ListLength::Short, 200),
+            LengthHint::exact(1 + 2 * 197 + 4)
+        );
+
+        // patterns "{0}: {1}", "{0}, {1}" (x197), and "{0} :o {1}" or "{0}. {1}"
+        assert_eq!(
+            pattern.size_hint(ListLength::Narrow, 200),
+            LengthHint::exact(2 + 197 * 2) + LengthHint::between(2, 4)
+        );
+    }
+}
diff --git a/vendor/icu_list/src/provider.rs b/vendor/icu_list/src/provider.rs
deleted file mode 100644
index 27f3e4fec..000000000
--- a/vendor/icu_list/src/provider.rs
+++ /dev/null
@@ -1,465 +0,0 @@
-// This file is part of ICU4X. For terms of use, please see the file
-// called LICENSE at the top level of the ICU4X source tree
-// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
-
-// Provider structs must be stable
-#![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)]
-
-//! Data provider struct definitions for this ICU4X component.
-//!
-//! Read more about data providers: [`icu_provider`]
-
-use crate::ListLength;
-use alloc::borrow::Cow;
-use icu_provider::DataMarker;
-use icu_provider::{yoke, zerofrom};
-use writeable::{LengthHint, Writeable};
-
-pub use crate::string_matcher::StringMatcher;
-
-/// Symbols and metadata required for [`ListFormatter`](crate::ListFormatter).
-#[icu_provider::data_struct(
-    AndListV1Marker = "list/and@1",
-    OrListV1Marker = "list/or@1",
-    UnitListV1Marker = "list/unit@1"
-)]
-#[derive(Clone, Debug)]
-#[cfg_attr(
-    feature = "datagen",
-    derive(serde::Serialize, databake::Bake),
-    databake(path = icu_list::provider),
-)]
-pub struct ListFormatterPatternsV1<'data>(
-    #[cfg_attr(feature = "datagen", serde(with = "deduplicating_array"))]
-    /// The patterns in the order start, middle, end, pair, short_start, short_middle,
-    /// short_end, short_pair, narrow_start, narrow_middle, narrow_end, narrow_pair,
-    pub [ConditionalListJoinerPattern<'data>; 12],
-);
-
-#[cfg(feature = "serde")]
-impl<'de> serde::Deserialize<'de> for ListFormatterPatternsV1<'de> {
-    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
-    where
-        D: serde::de::Deserializer<'de>,
-    {
-        #[cfg(not(feature = "serde_human"))]
-        if deserializer.is_human_readable() {
-            use serde::de::Error;
-            return Err(D::Error::custom(
-                    "Deserializing human-readable ListFormatter data requires the 'serde_human' feature",
-                ));
-        }
-
-        Ok(ListFormatterPatternsV1(deduplicating_array::deserialize(
-            deserializer,
-        )?))
-    }
-}
-
-pub(crate) struct ErasedListV1Marker;
-
-impl DataMarker for ErasedListV1Marker {
-    type Yokeable = ListFormatterPatternsV1<'static>;
-}
-
-impl<'data> ListFormatterPatternsV1<'data> {
-    pub(crate) fn start(&self, style: ListLength) -> &ConditionalListJoinerPattern<'data> {
-        #![allow(clippy::indexing_slicing)] // style as usize < 3
-        &self.0[4 * (style as usize)]
-    }
-
-    pub(crate) fn middle(&self, style: ListLength) -> &ConditionalListJoinerPattern<'data> {
-        #![allow(clippy::indexing_slicing)] // style as usize < 3
-        &self.0[4 * (style as usize) + 1]
-    }
-
-    pub(crate) fn end(&self, style: ListLength) -> &ConditionalListJoinerPattern<'data> {
-        #![allow(clippy::indexing_slicing)] // style as usize < 3
-        &self.0[4 * (style as usize) + 2]
-    }
-
-    pub(crate) fn pair(&self, style: ListLength) -> &ConditionalListJoinerPattern<'data> {
-        #![allow(clippy::indexing_slicing)] // style as usize < 3
-        &self.0[4 * (style as usize) + 3]
-    }
-
-    /// The range of the number of bytes required by the list literals to join a
-    /// list of length `len`. If none of the patterns are conditional, this is exact.
-    pub(crate) fn size_hint(&self, style: ListLength, len: usize) -> LengthHint {
-        match len {
-            0 | 1 => LengthHint::exact(0),
-            2 => self.pair(style).size_hint(),
-            n => {
-                self.start(style).size_hint()
-                    + self.middle(style).size_hint() * (n - 3)
-                    + self.end(style).size_hint()
-            }
-        }
-    }
-}
-
-/// A pattern that can behave conditionally on the next element.
-#[derive(Clone, Debug, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom)]
-#[cfg_attr(
-    feature = "datagen",
-    derive(serde::Serialize, databake::Bake),
-    databake(path = icu_list::provider),
-)]
-#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
-pub struct ConditionalListJoinerPattern<'data> {
-    /// The default pattern
-    #[cfg_attr(feature = "serde", serde(borrow))]
-    pub default: ListJoinerPattern<'data>,
-    /// And optional special case
-    #[cfg_attr(feature = "serde", serde(borrow))]
-    pub special_case: Option<SpecialCasePattern<'data>>,
-}
-
-/// The special case of a [`ConditionalListJoinerPattern`]
-#[derive(Clone, Debug, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom)]
-#[cfg_attr(
-    feature = "datagen",
-    derive(serde::Serialize, databake::Bake),
-    databake(path = icu_list::provider),
-)]
-#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
-pub struct SpecialCasePattern<'data> {
-    /// The condition on the following element
-    #[cfg_attr(feature = "serde", serde(borrow))]
-    pub condition: StringMatcher<'data>,
-    /// The pattern if the condition matches
-    #[cfg_attr(feature = "serde", serde(borrow))]
-    pub pattern: ListJoinerPattern<'data>,
-}
-
-/// A pattern containing two numeric placeholders ("{0}, and {1}.")
-#[derive(Clone, Debug, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom)]
-#[cfg_attr(feature = "datagen", derive(serde::Serialize))]
-pub struct ListJoinerPattern<'data> {
-    /// The pattern string without the placeholders
-    string: Cow<'data, str>,
-    /// The index of the first placeholder. Always <= index_1.
-    // Always 0 for CLDR data, so we don't need to serialize it.
-    // In-memory we have free space for it as index_1 doesn't
-    // fill a word.
-    #[cfg_attr(feature = "datagen", serde(skip))]
-    index_0: u8,
-    /// The index of the second placeholder. Always < string.len().
-    index_1: u8,
-}
-
-#[cfg(feature = "serde")]
-impl<'de: 'data, 'data> serde::Deserialize<'de> for ListJoinerPattern<'data> {
-    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
-    where
-        D: serde::Deserializer<'de>,
-    {
-        #[derive(serde::Deserialize)]
-        struct Dummy<'data> {
-            #[cfg_attr(feature = "serde", serde(borrow))]
-            string: Cow<'data, str>,
-            index_1: u8,
-        }
-        let Dummy { string, index_1 } = Dummy::deserialize(deserializer)?;
-
-        if index_1 as usize > string.len() {
-            use serde::de::Error;
-            Err(D::Error::custom("invalid index_1"))
-        } else {
-            Ok(ListJoinerPattern {
-                string,
-                index_0: 0,
-                index_1,
-            })
-        }
-    }
-}
-
-impl<'a> ListJoinerPattern<'a> {
-    /// Constructs a [`ListJoinerPattern`] from raw parts. Used by databake.
-    ///
-    /// # Safety
-    /// index_1 may be at most string.len()
-    pub const unsafe fn from_parts_unchecked(string: &'a str, index_1: u8) -> Self {
-        Self {
-            string: Cow::Borrowed(string),
-            index_0: 0,
-            index_1,
-        }
-    }
-}
-
-pub(crate) type PatternParts<'a> = (&'a str, &'a str, &'a str);
-
-impl<'a> ConditionalListJoinerPattern<'a> {
-    pub(crate) fn parts<'b, W: Writeable + ?Sized>(
-        &'a self,
-        following_value: &'b W,
-    ) -> PatternParts<'a> {
-        match &self.special_case {
-            Some(SpecialCasePattern { condition, pattern })
-                // TODO: Implement lookahead instead of materializing here.
-                if condition.test(&*following_value.write_to_string()) =>
-            {
-                pattern.borrow_tuple()
-            }
-            _ => self.default.borrow_tuple(),
-        }
-    }
-
-    /// The expected length of this pattern
-    pub fn size_hint(&'a self) -> LengthHint {
-        let mut hint = self.default.size_hint();
-        if let Some(special_case) = &self.special_case {
-            hint |= special_case.pattern.size_hint()
-        }
-        hint
-    }
-}
-
-impl<'data> ListJoinerPattern<'data> {
-    fn borrow_tuple(&'data self) -> PatternParts<'data> {
-        #![allow(clippy::indexing_slicing)] // by invariant
-        let index_0 = self.index_0 as usize;
-        let index_1 = self.index_1 as usize;
-        (
-            &self.string[0..index_0],
-            &self.string[index_0..index_1],
-            &self.string[index_1..],
-        )
-    }
-
-    fn size_hint(&self) -> LengthHint {
-        LengthHint::exact(self.string.len())
-    }
-}
-
-#[cfg(feature = "datagen")]
-mod datagen {
-    #![allow(clippy::indexing_slicing)] // datagen
-
-    use super::*;
-    use icu_provider::DataError;
-
-    impl<'data> ListFormatterPatternsV1<'data> {
-        /// The patterns in the order start, middle, end, pair, short_start, short_middle,
-        /// short_end, short_pair, narrow_start, narrow_middle, narrow_end, narrow_pair,
-        pub fn try_new(patterns: [&str; 12]) -> Result<Self, DataError> {
-            Ok(Self([
-                ListJoinerPattern::from_str(patterns[0], true, false)?.into(),
-                ListJoinerPattern::from_str(patterns[1], false, false)?.into(),
-                ListJoinerPattern::from_str(patterns[2], false, true)?.into(),
-                ListJoinerPattern::from_str(patterns[3], true, true)?.into(),
-                ListJoinerPattern::from_str(patterns[4], true, false)?.into(),
-                ListJoinerPattern::from_str(patterns[5], false, false)?.into(),
-                ListJoinerPattern::from_str(patterns[6], false, true)?.into(),
-                ListJoinerPattern::from_str(patterns[7], true, true)?.into(),
-                ListJoinerPattern::from_str(patterns[8], true, false)?.into(),
-                ListJoinerPattern::from_str(patterns[9], false, false)?.into(),
-                ListJoinerPattern::from_str(patterns[10], false, true)?.into(),
-                ListJoinerPattern::from_str(patterns[11], true, true)?.into(),
-            ]))
-        }
-
-        /// Adds a special case to all `pattern`s that will evaluate to
-        /// `alternative_pattern` when `regex` matches the following element.
-        /// The regex is interpreted case-insensitive and anchored to the beginning, but
-        /// to improve efficiency does not search for full matches. If a full match is
-        /// required, use `$`.
-        pub fn make_conditional(
-            &mut self,
-            pattern: &str,
-            regex: &StringMatcher<'static>,
-            alternative_pattern: &str,
-        ) -> Result<(), DataError> {
-            let old = ListJoinerPattern::from_str(pattern, true, true)?;
-            for i in 0..12 {
-                if self.0[i].default == old {
-                    self.0[i].special_case = Some(SpecialCasePattern {
-                        condition: regex.clone(),
-                        pattern: ListJoinerPattern::from_str(
-                            alternative_pattern,
-                            i % 4 == 0 || i % 4 == 3, // allow_prefix = start or pair
-                            i % 4 == 2 || i % 4 == 3, // allow_suffix = end or pair
-                        )?,
-                    });
-                }
-            }
-            Ok(())
-        }
-    }
-
-    impl<'data> ListJoinerPattern<'data> {
-        /// Construct the pattern from a CLDR pattern string
-        pub fn from_str(
-            pattern: &str,
-            allow_prefix: bool,
-            allow_suffix: bool,
-        ) -> Result<Self, DataError> {
-            match (pattern.find("{0}"), pattern.find("{1}")) {
-                (Some(index_0), Some(index_1))
-                    if index_0 < index_1
-                        && (allow_prefix || index_0 == 0)
-                        && (allow_suffix || index_1 == pattern.len() - 3) =>
-                {
-                    if (index_0 > 0 && !cfg!(test)) || index_1 - 3 >= 256 {
-                        return Err(DataError::custom(
-                            "Found valid pattern that cannot be stored in ListFormatterPatternsV1",
-                        )
-                        .with_debug_context(pattern));
-                    }
-                    Ok(ListJoinerPattern {
-                        string: Cow::Owned(alloc::format!(
-                            "{}{}{}",
-                            &pattern[0..index_0],
-                            &pattern[index_0 + 3..index_1],
-                            &pattern[index_1 + 3..]
-                        )),
-                        index_0: index_0 as u8,
-                        index_1: (index_1 - 3) as u8,
-                    })
-                }
-                _ => Err(DataError::custom("Invalid list pattern").with_debug_context(pattern)),
-            }
-        }
-    }
-
-    impl<'data> From<ListJoinerPattern<'data>> for ConditionalListJoinerPattern<'data> {
-        fn from(default: ListJoinerPattern<'data>) -> Self {
-            Self {
-                default,
-                special_case: None,
-            }
-        }
-    }
-
-    impl databake::Bake for ListJoinerPattern<'_> {
-        fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
-            env.insert("icu_list");
-            let string = (&*self.string).bake(env);
-            let index_1 = self.index_1.bake(env);
-            // Safe because our own data is safe
-            databake::quote! { unsafe {
-                ::icu_list::provider::ListJoinerPattern::from_parts_unchecked(#string, #index_1)
-            }}
-        }
-    }
-}
-
-#[cfg(all(test, feature = "datagen"))]
-pub(crate) mod test {
-    use super::*;
-
-    pub fn test_patterns() -> ListFormatterPatternsV1<'static> {
-        let mut patterns = ListFormatterPatternsV1::try_new([
-            // Wide: general
-            "@{0}:{1}",
-            "{0},{1}",
-            "{0}.{1}!",
-            "${0};{1}+",
-            // Short: different pattern lengths
-            "{0}1{1}",
-            "{0}12{1}",
-            "{0}12{1}34",
-            "{0}123{1}456",
-            // Narrow: conditionals
-            "{0}: {1}",
-            "{0}, {1}",
-            "{0}. {1}",
-            "{0}. {1}",
-        ])
-        .unwrap();
-        patterns
-            .make_conditional("{0}. {1}", &StringMatcher::new("A").unwrap(), "{0} :o {1}")
-            .unwrap();
-        patterns
-    }
-
-    #[test]
-    fn rejects_bad_patterns() {
-        assert!(ListJoinerPattern::from_str("{0} and", true, true).is_err());
-        assert!(ListJoinerPattern::from_str("and {1}", true, true).is_err());
-        assert!(ListJoinerPattern::from_str("{1} and {0}", true, true).is_err());
-        assert!(ListJoinerPattern::from_str("{1{0}}", true, true).is_err());
-        assert!(ListJoinerPattern::from_str("{0\u{202e}} and {1}", true, true).is_err());
-        assert!(ListJoinerPattern::from_str("{{0}} {{1}}", true, true).is_ok());
-
-        assert!(ListJoinerPattern::from_str("{0} and {1} ", true, true).is_ok());
-        assert!(ListJoinerPattern::from_str("{0} and {1} ", true, false).is_err());
-        assert!(ListJoinerPattern::from_str(" {0} and {1}", true, true).is_ok());
-        assert!(ListJoinerPattern::from_str(" {0} and {1}", false, true).is_err());
-    }
-
-    #[test]
-    fn produces_correct_parts() {
-        assert_eq!(
-            test_patterns().pair(ListLength::Wide).parts(""),
-            ("$", ";", "+")
-        );
-    }
-
-    #[test]
-    fn produces_correct_parts_conditionally() {
-        assert_eq!(
-            test_patterns().end(ListLength::Narrow).parts("A"),
-            ("", " :o ", "")
-        );
-        assert_eq!(
-            test_patterns().end(ListLength::Narrow).parts("a"),
-            ("", " :o ", "")
-        );
-        assert_eq!(
-            test_patterns().end(ListLength::Narrow).parts("ab"),
-            ("", " :o ", "")
-        );
-        assert_eq!(
-            test_patterns().end(ListLength::Narrow).parts("B"),
-            ("", ". ", "")
-        );
-        assert_eq!(
-            test_patterns().end(ListLength::Narrow).parts("BA"),
-            ("", ". ", "")
-        );
-    }
-
-    #[test]
-    fn size_hint_works() {
-        let pattern = test_patterns();
-
-        assert_eq!(
-            pattern.size_hint(ListLength::Short, 0),
-            LengthHint::exact(0)
-        );
-        assert_eq!(
-            pattern.size_hint(ListLength::Short, 1),
-            LengthHint::exact(0)
-        );
-
-        // pair pattern "{0}123{1}456"
-        assert_eq!(
-            pattern.size_hint(ListLength::Short, 2),
-            LengthHint::exact(6)
-        );
-
-        // patterns "{0}1{1}", "{0}12{1}" (x197), and "{0}12{1}34"
-        assert_eq!(
-            pattern.size_hint(ListLength::Short, 200),
-            LengthHint::exact(1 + 2 * 197 + 4)
-        );
-
-        // patterns "{0}: {1}", "{0}, {1}" (x197), and "{0} :o {1}" or "{0}. {1}"
-        assert_eq!(
-            pattern.size_hint(ListLength::Narrow, 200),
-            LengthHint::exact(2 + 197 * 2) + LengthHint::between(2, 4)
-        );
-    }
-
-    #[test]
-    fn databake() {
-        databake::test_bake!(
-            ListJoinerPattern,
-            const: unsafe { crate::provider::ListJoinerPattern::from_parts_unchecked(", ", 2u8) },
-            icu_list
-        );
-    }
-}
diff --git a/vendor/icu_list/src/provider/mod.rs b/vendor/icu_list/src/provider/mod.rs
new file mode 100644
index 000000000..efab7c8bc
--- /dev/null
+++ b/vendor/icu_list/src/provider/mod.rs
@@ -0,0 +1,261 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+// Provider structs must be stable
+#![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)]
+
+//! 🚧 \[Unstable\] Data provider struct definitions for this ICU4X component.
+//!
+//! <div class="stab unstable">
+//! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+//! including in SemVer minor releases. While the serde representation of data structs is guaranteed
+//! to be stable, their Rust representation might not be. Use with caution.
+//! </div>
+//!
+//! Read more about data providers: [`icu_provider`]
+
+use crate::ListLength;
+use alloc::borrow::Cow;
+use icu_provider::DataMarker;
+use icu_provider::{yoke, zerofrom};
+
+mod serde_dfa;
+pub use serde_dfa::SerdeDFA;
+
+/// Symbols and metadata required for [`ListFormatter`](crate::ListFormatter).
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+#[icu_provider::data_struct(
+    AndListV1Marker = "list/and@1",
+    OrListV1Marker = "list/or@1",
+    UnitListV1Marker = "list/unit@1"
+)]
+#[derive(Clone, Debug)]
+#[cfg_attr(
+    feature = "datagen",
+    derive(serde::Serialize, databake::Bake),
+    databake(path = icu_list::provider),
+)]
+pub struct ListFormatterPatternsV1<'data>(
+    #[cfg_attr(feature = "datagen", serde(with = "deduplicating_array"))]
+    /// The patterns in the order start, middle, end, pair, short_start, short_middle,
+    /// short_end, short_pair, narrow_start, narrow_middle, narrow_end, narrow_pair,
+    pub [ConditionalListJoinerPattern<'data>; 12],
+);
+
+#[cfg(feature = "serde")]
+impl<'de> serde::Deserialize<'de> for ListFormatterPatternsV1<'de> {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: serde::de::Deserializer<'de>,
+    {
+        #[cfg(not(feature = "serde_human"))]
+        if deserializer.is_human_readable() {
+            use serde::de::Error;
+            return Err(D::Error::custom(
+                    "Deserializing human-readable ListFormatter data requires the 'serde_human' feature",
+                ));
+        }
+
+        Ok(ListFormatterPatternsV1(deduplicating_array::deserialize(
+            deserializer,
+        )?))
+    }
+}
+
+pub(crate) struct ErasedListV1Marker;
+
+impl DataMarker for ErasedListV1Marker {
+    type Yokeable = ListFormatterPatternsV1<'static>;
+}
+
+impl<'data> ListFormatterPatternsV1<'data> {
+    pub(crate) fn start(&self, style: ListLength) -> &ConditionalListJoinerPattern<'data> {
+        #![allow(clippy::indexing_slicing)] // style as usize < 3
+        &self.0[4 * (style as usize)]
+    }
+
+    pub(crate) fn middle(&self, style: ListLength) -> &ConditionalListJoinerPattern<'data> {
+        #![allow(clippy::indexing_slicing)] // style as usize < 3
+        &self.0[4 * (style as usize) + 1]
+    }
+
+    pub(crate) fn end(&self, style: ListLength) -> &ConditionalListJoinerPattern<'data> {
+        #![allow(clippy::indexing_slicing)] // style as usize < 3
+        &self.0[4 * (style as usize) + 2]
+    }
+
+    pub(crate) fn pair(&self, style: ListLength) -> &ConditionalListJoinerPattern<'data> {
+        #![allow(clippy::indexing_slicing)] // style as usize < 3
+        &self.0[4 * (style as usize) + 3]
+    }
+}
+
+/// A pattern that can behave conditionally on the next element.
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+#[derive(Clone, Debug, yoke::Yokeable, zerofrom::ZeroFrom)]
+#[cfg_attr(
+    feature = "datagen",
+    derive(PartialEq, serde::Serialize, databake::Bake),
+    databake(path = icu_list::provider),
+)]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
+pub struct ConditionalListJoinerPattern<'data> {
+    /// The default pattern
+    #[cfg_attr(feature = "serde", serde(borrow))]
+    pub default: ListJoinerPattern<'data>,
+    /// And optional special case
+    #[cfg_attr(
+        feature = "serde",
+        serde(borrow, deserialize_with = "SpecialCasePattern::deserialize_option")
+    )]
+    pub special_case: Option<SpecialCasePattern<'data>>,
+}
+
+/// The special case of a [`ConditionalListJoinerPattern`]
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+#[derive(Clone, Debug, yoke::Yokeable, zerofrom::ZeroFrom)]
+#[cfg_attr(
+    feature = "datagen",
+    derive(PartialEq, serde::Serialize, databake::Bake),
+    databake(path = icu_list::provider),
+)]
+pub struct SpecialCasePattern<'data> {
+    /// The condition on the following element
+    pub condition: SerdeDFA<'data>,
+    /// The pattern if the condition matches
+    pub pattern: ListJoinerPattern<'data>,
+}
+
+#[cfg(feature = "serde")]
+impl<'data> SpecialCasePattern<'data> {
+    // If the condition doesn't deserialize, the whole special case becomes `None`
+    fn deserialize_option<'de: 'data, D>(deserializer: D) -> Result<Option<Self>, D::Error>
+    where
+        D: serde::de::Deserializer<'de>,
+    {
+        use serde::Deserialize;
+
+        #[derive(Deserialize)]
+        struct SpecialCasePatternOptionalDfa<'data> {
+            #[cfg_attr(
+                feature = "serde",
+                serde(borrow, deserialize_with = "SerdeDFA::maybe_deserialize")
+            )]
+            pub condition: Option<SerdeDFA<'data>>,
+            #[cfg_attr(feature = "serde", serde(borrow))]
+            pub pattern: ListJoinerPattern<'data>,
+        }
+
+        Ok(
+            match Option::<SpecialCasePatternOptionalDfa<'data>>::deserialize(deserializer)? {
+                Some(SpecialCasePatternOptionalDfa {
+                    condition: Some(condition),
+                    pattern,
+                }) => Some(SpecialCasePattern { condition, pattern }),
+                _ => None,
+            },
+        )
+    }
+}
+
+/// A pattern containing two numeric placeholders ("{0}, and {1}.")
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+#[derive(Clone, Debug, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom)]
+#[cfg_attr(feature = "datagen", derive(serde::Serialize))]
+pub struct ListJoinerPattern<'data> {
+    /// The pattern string without the placeholders
+    pub(crate) string: Cow<'data, str>,
+    /// The index of the first placeholder. Always <= index_1.
+    // Always 0 for CLDR data, so we don't need to serialize it.
+    // In-memory we have free space for it as index_1 doesn't
+    // fill a word.
+    #[cfg_attr(feature = "datagen", serde(skip))]
+    pub(crate) index_0: u8,
+    /// The index of the second placeholder. Always < string.len().
+    pub(crate) index_1: u8,
+}
+
+#[cfg(feature = "serde")]
+impl<'de: 'data, 'data> serde::Deserialize<'de> for ListJoinerPattern<'data> {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        #[derive(serde::Deserialize)]
+        struct Dummy<'data> {
+            #[cfg_attr(feature = "serde", serde(borrow))]
+            string: Cow<'data, str>,
+            index_1: u8,
+        }
+        let Dummy { string, index_1 } = Dummy::deserialize(deserializer)?;
+
+        if index_1 as usize > string.len() {
+            use serde::de::Error;
+            Err(D::Error::custom("invalid index_1"))
+        } else {
+            Ok(ListJoinerPattern {
+                string,
+                index_0: 0,
+                index_1,
+            })
+        }
+    }
+}
+
+impl<'a> ListJoinerPattern<'a> {
+    /// Constructs a [`ListJoinerPattern`] from raw parts. Used by databake.
+    ///
+    /// # Safety
+    /// index_1 may be at most string.len()
+    pub const unsafe fn from_parts_unchecked(string: &'a str, index_1: u8) -> Self {
+        Self {
+            string: Cow::Borrowed(string),
+            index_0: 0,
+            index_1,
+        }
+    }
+}
+
+#[cfg(feature = "datagen")]
+impl databake::Bake for ListJoinerPattern<'_> {
+    fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
+        env.insert("icu_list");
+        let string = (&*self.string).bake(env);
+        let index_1 = self.index_1.bake(env);
+        // Safe because our own data is safe
+        databake::quote! { unsafe {
+            ::icu_list::provider::ListJoinerPattern::from_parts_unchecked(#string, #index_1)
+        }}
+    }
+}
+
+#[cfg(all(test, feature = "datagen"))]
+#[test]
+fn databake() {
+    databake::test_bake!(
+        ListJoinerPattern,
+        const: unsafe { crate::provider::ListJoinerPattern::from_parts_unchecked(", ", 2u8) },
+        icu_list
+    );
+}
diff --git a/vendor/icu_list/src/provider/serde_dfa.rs b/vendor/icu_list/src/provider/serde_dfa.rs
new file mode 100644
index 000000000..e2424e1e9
--- /dev/null
+++ b/vendor/icu_list/src/provider/serde_dfa.rs
@@ -0,0 +1,244 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use alloc::borrow::Cow;
+use icu_provider::{yoke, zerofrom};
+use regex_automata::dfa::sparse::DFA;
+
+/// A serde-compatible version of [regex_automata::dfa::sparse::DFA]. This does not implement
+/// [`serde::Deserialize`] directly, as binary deserialization is not supported in big-endian
+/// platforms. `Self::maybe_deserialize` can be used to deserialize to `Option<SerdeDFA>`.
+///
+/// <div class="stab unstable">
+/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
+/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
+/// to be stable, their Rust representation might not be. Use with caution.
+/// </div>
+#[derive(Clone, Debug, yoke::Yokeable, zerofrom::ZeroFrom)]
+pub struct SerdeDFA<'data> {
+    // Safety: These always represent a valid DFA (DFA::from_bytes(dfa_bytes).is_ok())
+    dfa_bytes: Cow<'data, [u8]>,
+    pattern: Option<Cow<'data, str>>,
+}
+
+#[cfg(feature = "datagen")]
+impl PartialEq for SerdeDFA<'_> {
+    fn eq(&self, other: &Self) -> bool {
+        self.dfa_bytes == other.dfa_bytes
+    }
+}
+
+#[cfg(feature = "datagen")]
+impl databake::Bake for SerdeDFA<'_> {
+    fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
+        env.insert("icu_list");
+        let le_bytes = self.deref().to_bytes_little_endian().as_slice().bake(env);
+        let be_bytes = self.deref().to_bytes_big_endian().as_slice().bake(env);
+        // Safe because of `to_bytes_little_endian`/`to_bytes_big_endian`'s invariant.
+        databake::quote! {
+            unsafe {
+                ::icu_list::provider::SerdeDFA::from_dfa_bytes_unchecked(
+                    if cfg!(target_endian = "little") {
+                        &#le_bytes
+                    } else {
+                        &#be_bytes
+                    }
+                )
+            }
+        }
+    }
+}
+
+#[cfg(feature = "datagen")]
+impl serde::Serialize for SerdeDFA<'_> {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::ser::Serializer,
+    {
+        if serializer.is_human_readable() {
+            self.pattern
+                .as_ref()
+                .map(|pattern| pattern.serialize(serializer))
+                .unwrap_or_else(|| {
+                    use serde::ser::Error;
+                    Err(S::Error::custom(
+                        "cannot serialize a deserialized bincode SerdeDFA to JSON",
+                    ))
+                })
+        } else {
+            self.deref().to_bytes_little_endian().serialize(serializer)
+        }
+    }
+}
+
+#[cfg(feature = "serde")]
+impl<'data> SerdeDFA<'data> {
+    /// Deserializes to `Option<Self>`. Will return `None` for non-human-readable serialization
+    /// formats on big-endian systems, as `regex_automata` serialization is endian-sensitive.
+    pub fn maybe_deserialize<'de: 'data, D>(deserializer: D) -> Result<Option<Self>, D::Error>
+    where
+        D: serde::de::Deserializer<'de>,
+    {
+        use icu_provider::serde::borrow_de_utils::CowBytesWrap;
+        use serde::Deserialize;
+
+        #[cfg(feature = "serde_human")]
+        if deserializer.is_human_readable() {
+            #[cfg(not(feature = "std"))]
+            use alloc::string::ToString;
+            use serde::de::Error;
+            return SerdeDFA::new(Cow::<str>::deserialize(deserializer)?)
+                .map(Some)
+                .map_err(|e| D::Error::custom(e.to_string()));
+        }
+
+        let dfa_bytes = <CowBytesWrap<'de>>::deserialize(deserializer)?.0;
+
+        if cfg!(target_endian = "big") {
+            return Ok(None);
+        }
+
+        // Verify safety invariant
+        DFA::from_bytes(&dfa_bytes).map_err(|e| {
+            use serde::de::Error;
+            D::Error::custom(alloc::format!("Invalid DFA bytes: {}", e))
+        })?;
+
+        Ok(Some(SerdeDFA {
+            dfa_bytes,
+            pattern: None,
+        }))
+    }
+}
+
+impl<'data> SerdeDFA<'data> {
+    /// Creates a `SerdeDFA` from raw bytes. Used internally by databake.
+    ///
+    /// # Safety
+    ///
+    /// `dfa_bytes` has to be a valid DFA (regex_automata::dfa::sparse::DFA::from_bytes(dfa_bytes).is_ok())
+    pub const unsafe fn from_dfa_bytes_unchecked(dfa_bytes: &'data [u8]) -> Self {
+        Self {
+            dfa_bytes: Cow::Borrowed(dfa_bytes),
+            pattern: None,
+        }
+    }
+
+    /// Creates a `SerdeDFA` from a regex.
+    #[cfg(any(feature = "datagen", feature = "serde_human",))]
+    pub fn new(pattern: Cow<'data, str>) -> Result<Self, icu_provider::DataError> {
+        use regex_automata::{
+            dfa::dense::{Builder, Config},
+            SyntaxConfig,
+        };
+
+        let mut builder = Builder::new();
+        let dfa = builder
+            .syntax(SyntaxConfig::new().case_insensitive(true))
+            .configure(Config::new().anchored(true).minimize(true))
+            .build(&pattern)
+            .map_err(|_| {
+                icu_provider::DataError::custom("Cannot build DFA").with_display_context(&pattern)
+            })?
+            .to_sparse()
+            .map_err(|_| {
+                icu_provider::DataError::custom("Cannot sparsify DFA")
+                    .with_display_context(&pattern)
+            })?;
+
+        Ok(Self {
+            dfa_bytes: dfa.to_bytes_native_endian().into(),
+            pattern: Some(pattern),
+        })
+    }
+
+    /// Returns the represented [`DFA`]
+    #[allow(clippy::unwrap_used)] // by invariant
+    pub fn deref(&'data self) -> DFA<&'data [u8]> {
+        // Safe due to struct invariant.
+        unsafe { DFA::from_bytes_unchecked(&self.dfa_bytes).unwrap().0 }
+    }
+}
+
+#[cfg(all(test, feature = "datagen"))]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_serde_dfa() {
+        use regex_automata::dfa::Automaton;
+
+        let matcher = SerdeDFA::new(Cow::Borrowed("abc")).unwrap();
+
+        assert!(matcher.deref().find_earliest_fwd(b"ab").unwrap().is_none());
+        assert!(matcher.deref().find_earliest_fwd(b"abc").unwrap().is_some());
+        assert!(matcher
+            .deref()
+            .find_earliest_fwd(b"abcde")
+            .unwrap()
+            .is_some());
+        assert!(matcher
+            .deref()
+            .find_earliest_fwd(b" abcde")
+            .unwrap()
+            .is_none());
+    }
+
+    #[derive(serde::Deserialize)]
+    struct OptionSerdeDFA<'data>(
+        #[serde(borrow, deserialize_with = "SerdeDFA::maybe_deserialize")] Option<SerdeDFA<'data>>,
+    );
+
+    #[test]
+    #[cfg(target_endian = "little")]
+    fn test_postcard_serialization() {
+        let matcher = SerdeDFA::new(Cow::Borrowed("abc*")).unwrap();
+
+        let mut bytes = postcard::to_stdvec(&matcher).unwrap();
+        assert_eq!(
+            postcard::from_bytes::<OptionSerdeDFA>(&bytes).unwrap().0,
+            Some(matcher)
+        );
+
+        // A corrupted byte leads to an error
+        bytes[17] ^= 255;
+        assert!(postcard::from_bytes::<OptionSerdeDFA>(&bytes).is_err());
+        bytes[17] ^= 255;
+
+        // An extra byte leads to an error
+        bytes.insert(123, 40);
+        assert!(postcard::from_bytes::<OptionSerdeDFA>(&bytes).is_err());
+        bytes.remove(123);
+
+        // Missing bytes lead to an error
+        assert!(postcard::from_bytes::<OptionSerdeDFA>(&bytes[0..bytes.len() - 5]).is_err());
+    }
+
+    #[test]
+    #[cfg(feature = "serde_human")]
+    fn test_json_serialization() {
+        let matcher = SerdeDFA::new(Cow::Borrowed("abc*")).unwrap();
+
+        let json = serde_json::to_string(&matcher).unwrap();
+        assert_eq!(
+            serde_json::from_str::<OptionSerdeDFA>(&json).unwrap().0,
+            Some(matcher)
+        );
+        assert!(serde_json::from_str::<OptionSerdeDFA>(".*[").is_err());
+    }
+
+    #[test]
+    #[ignore] // https://github.com/rust-lang/rust/issues/98906
+    fn databake() {
+        databake::test_bake!(
+            SerdeDFA,
+            const: unsafe { crate::provider::SerdeDFA::from_dfa_bytes_unchecked(if cfg!(target_endian = "little") {
+                &[1] // TODO: set this when activating the test
+            } else {
+                &[2] // TODO: set this when activating the test
+            })},
+            icu_list
+        );
+    }
+}
diff --git a/vendor/icu_list/src/string_matcher.rs b/vendor/icu_list/src/string_matcher.rs
deleted file mode 100644
index ba4833605..000000000
--- a/vendor/icu_list/src/string_matcher.rs
+++ /dev/null
@@ -1,213 +0,0 @@
-// This file is part of ICU4X. For terms of use, please see the file
-// called LICENSE at the top level of the ICU4X source tree
-// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
-
-use alloc::borrow::Cow;
-#[cfg(any(feature = "serde_human", feature = "datagen"))]
-use alloc::string::ToString;
-use icu_provider::{yoke, zerofrom};
-use regex_automata::dfa::sparse::DFA;
-use regex_automata::dfa::Automaton;
-
-/// A precompiled regex
-#[derive(Clone, Debug, yoke::Yokeable, zerofrom::ZeroFrom)]
-#[allow(clippy::exhaustive_structs)] // not a public API
-pub struct StringMatcher<'data> {
-    // Safety: These always represent a valid DFA (DFA::from_bytes(dfa_bytes).is_ok())
-    dfa_bytes: Cow<'data, [u8]>,
-    pattern: Option<Cow<'data, str>>,
-}
-
-impl PartialEq for StringMatcher<'_> {
-    fn eq(&self, other: &Self) -> bool {
-        self.dfa_bytes == other.dfa_bytes
-    }
-}
-
-#[cfg(feature = "datagen")]
-impl databake::Bake for StringMatcher<'_> {
-    fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
-        env.insert("icu_list");
-        let bytes = (&&*self.dfa_bytes).bake(env);
-        // Safe because our own data is safe
-        databake::quote! {
-            unsafe { ::icu_list::provider::StringMatcher::from_dfa_bytes_unchecked(#bytes) }
-        }
-    }
-}
-
-#[cfg(feature = "datagen")]
-impl serde::Serialize for StringMatcher<'_> {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::ser::Serializer,
-    {
-        if serializer.is_human_readable() {
-            self.pattern
-                .as_ref()
-                .map(|pattern| pattern.serialize(serializer))
-                .unwrap_or_else(|| {
-                    use serde::ser::Error;
-                    Err(S::Error::custom(
-                        "cannot serialize a deserialized bincode StringMatcher to JSON",
-                    ))
-                })
-        } else {
-            self.dfa_bytes.serialize(serializer)
-        }
-    }
-}
-
-#[cfg(feature = "serde")]
-impl<'de: 'data, 'data> serde::Deserialize<'de> for StringMatcher<'data> {
-    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
-    where
-        D: serde::de::Deserializer<'de>,
-    {
-        use icu_provider::serde::borrow_de_utils::CowBytesWrap;
-
-        #[cfg(feature = "serde_human")]
-        if deserializer.is_human_readable() {
-            use serde::de::Error;
-            return StringMatcher::new(<&str>::deserialize(deserializer)?)
-                .map_err(|e| D::Error::custom(e.to_string()));
-        }
-
-        if cfg!(target_endian = "big") {
-            // TODO: Convert LE to BE. For now we just behave like the
-            // accept-nothing DFA on BE systems.
-            return Ok(StringMatcher {
-                dfa_bytes: Cow::Borrowed(&[]),
-                pattern: None,
-            });
-        }
-
-        let dfa_bytes = <CowBytesWrap<'de>>::deserialize(deserializer)?.0;
-
-        // Verify safety invariant
-        DFA::from_bytes(&dfa_bytes).map_err(|e| {
-            use serde::de::Error;
-            D::Error::custom(alloc::format!("Invalid DFA bytes: {}", e))
-        })?;
-
-        Ok(StringMatcher {
-            dfa_bytes,
-            pattern: None,
-        })
-    }
-}
-
-impl<'data> StringMatcher<'data> {
-    /// Creates a `StringMatcher` from a serialized DFA. Used internally by databake.
-    ///
-    /// # Safety
-    ///
-    /// `dfa_bytes` has to be a valid DFA (regex_automata::dfa::sparse::DFA::from_bytes(dfa_bytes).is_ok())
-    pub const unsafe fn from_dfa_bytes_unchecked(dfa_bytes: &'data [u8]) -> Self {
-        Self {
-            dfa_bytes: Cow::Borrowed(dfa_bytes),
-            pattern: None,
-        }
-    }
-
-    /// Creates a `StringMatcher` from regex.
-    #[cfg(any(feature = "datagen", feature = "serde_human",))]
-    pub fn new(pattern: &str) -> Result<Self, icu_provider::DataError> {
-        use regex_automata::{
-            dfa::dense::{Builder, Config},
-            SyntaxConfig,
-        };
-
-        let mut builder = Builder::new();
-        let dfa = builder
-            .syntax(SyntaxConfig::new().case_insensitive(true))
-            .configure(Config::new().anchored(true).minimize(true))
-            .build(pattern)
-            .map_err(|_| {
-                icu_provider::DataError::custom("Cannot build DFA").with_display_context(&pattern)
-            })?
-            .to_sparse()
-            .map_err(|_| {
-                icu_provider::DataError::custom("Cannot sparsify DFA")
-                    .with_display_context(&pattern)
-            })?;
-
-        Ok(Self {
-            dfa_bytes: dfa.to_bytes_little_endian().into(),
-            pattern: Some(pattern.to_string().into()),
-        })
-    }
-
-    #[allow(clippy::unwrap_used)] // by invariant
-    pub(crate) fn test(&self, string: &str) -> bool {
-        cfg!(target_endian = "little")
-            && matches!(
-                // Safe due to struct invariant.
-                unsafe { DFA::from_bytes_unchecked(&self.dfa_bytes).unwrap().0 }
-                    .find_earliest_fwd(string.as_bytes()),
-                Ok(Some(_))
-            )
-    }
-}
-
-#[cfg(all(test, feature = "datagen"))]
-mod test {
-    use super::*;
-
-    #[test]
-    fn test_string_matcher() {
-        let matcher = StringMatcher::new("abc.*").unwrap();
-        assert!(!matcher.test("ab"));
-        assert!(matcher.test("abc"));
-        assert!(matcher.test("abcde"));
-    }
-
-    #[test]
-    fn test_postcard_serialization() {
-        let matcher = StringMatcher::new("abc*").unwrap();
-
-        let mut bytes = postcard::to_stdvec(&matcher).unwrap();
-        assert_eq!(
-            postcard::from_bytes::<StringMatcher>(&bytes).unwrap(),
-            matcher
-        );
-
-        // A corrupted byte leads to an error
-        bytes[17] ^= 255;
-        assert!(postcard::from_bytes::<StringMatcher>(&bytes).is_err());
-        bytes[17] ^= 255;
-
-        // An extra byte leads to an error
-        bytes.insert(123, 40);
-        assert!(postcard::from_bytes::<StringMatcher>(&bytes).is_err());
-        bytes.remove(123);
-
-        // Missing bytes lead to an error
-        assert!(postcard::from_bytes::<StringMatcher>(&bytes[0..bytes.len() - 5]).is_err());
-    }
-
-    #[test]
-    #[cfg(feature = "serde_human")]
-    fn test_json_serialization() {
-        let matcher = StringMatcher::new("abc*").unwrap();
-
-        let json = serde_json::to_string(&matcher).unwrap();
-        assert_eq!(
-            serde_json::from_str::<StringMatcher>(&json).unwrap(),
-            matcher
-        );
-        assert!(serde_json::from_str::<StringMatcher>(".*[").is_err());
-    }
-
-    #[test]
-    #[ignore] // https://github.com/rust-lang/rust/issues/98906
-    fn databake() {
-        databake::test_bake!(
-            StringMatcher,
-            const: unsafe {
-                crate::provider::StringMatcher::from_dfa_bytes_unchecked(&[49u8, 50u8, 51u8, ])
-            },
-            icu_list
-        );
-    }
-}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-17 12:19:50 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-17 12:19:50 +0000
commit	2e00214b3efbdfeefaa0fe9e8b8fd519de7adc35 (patch)
tree	d325add32978dbdc1db975a438b3a77d571b1ab8 /vendor/icu_list/src
parent	Releasing progress-linux version 1.68.2+dfsg1-1~progress7.99u1. (diff)
download	rustc-2e00214b3efbdfeefaa0fe9e8b8fd519de7adc35.tar.xz rustc-2e00214b3efbdfeefaa0fe9e8b8fd519de7adc35.zip