1 files changed, 696 insertions, 0 deletions
diff --git a/third_party/rust/regex-automata/src/util/prefilter/mod.rs b/third_party/rust/regex-automata/src/util/prefilter/mod.rs
new file mode 100644
index 0000000000..51fc922337
--- /dev/null
+++ b/third_party/rust/regex-automata/src/util/prefilter/mod.rs
@@ -0,0 +1,696 @@
+/*!
+Defines a prefilter for accelerating regex searches.
+
+A prefilter can be created by building a [`Prefilter`] value.
+
+A prefilter represents one of the most important optimizations available for
+accelerating regex searches. The idea of a prefilter is to very quickly find
+candidate locations in a haystack where a regex _could_ match. Once a candidate
+is found, it is then intended for the regex engine to run at that position to
+determine whether the candidate is a match or a false positive.
+
+In the aforementioned description of the prefilter optimization also lay its
+demise. Namely, if a prefilter has a high false positive rate and it produces
+lots of candidates, then a prefilter can overall make a regex search slower.
+It can run more slowly because more time is spent ping-ponging between the
+prefilter search and the regex engine attempting to confirm each candidate as
+a match. This ping-ponging has overhead that adds up, and is exacerbated by
+a high false positive rate.
+
+Nevertheless, the optimization is still generally worth performing in most
+cases. Particularly given just how much throughput can be improved. (It is not
+uncommon for prefilter optimizations to improve throughput by one or two orders
+of magnitude.)
+
+Typically a prefilter is used to find occurrences of literal prefixes from a
+regex pattern, but this isn't required. A prefilter can be used to look for
+suffixes or even inner literals.
+
+Note that as of now, prefilters throw away information about which pattern
+each literal comes from. In other words, when a prefilter finds a match,
+there's no way to know which pattern (or patterns) it came from. Therefore,
+in order to confirm a match, you'll have to check all of the patterns by
+running the full regex engine.
+*/
+
+mod aho_corasick;
+mod byteset;
+mod memchr;
+mod memmem;
+mod teddy;
+
+use core::{
+    borrow::Borrow,
+    fmt::Debug,
+    panic::{RefUnwindSafe, UnwindSafe},
+};
+
+#[cfg(feature = "alloc")]
+use alloc::sync::Arc;
+
+#[cfg(feature = "syntax")]
+use regex_syntax::hir::{literal, Hir};
+
+use crate::util::search::{MatchKind, Span};
+
+pub(crate) use crate::util::prefilter::{
+    aho_corasick::AhoCorasick,
+    byteset::ByteSet,
+    memchr::{Memchr, Memchr2, Memchr3},
+    memmem::Memmem,
+    teddy::Teddy,
+};
+
+/// A prefilter for accelerating regex searches.
+///
+/// If you already have your literals that you want to search with,
+/// then the vanilla [`Prefilter::new`] constructor is for you. But
+/// if you have an [`Hir`] value from the `regex-syntax` crate, then
+/// [`Prefilter::from_hir_prefix`] might be more convenient. Namely, it uses
+/// the [`regex-syntax::hir::literal`](regex_syntax::hir::literal) module to
+/// extract literal prefixes for you, optimize them and then select and build a
+/// prefilter matcher.
+///
+/// A prefilter must have **zero false negatives**. However, by its very
+/// nature, it may produce false positives. That is, a prefilter will never
+/// skip over a position in the haystack that corresponds to a match of the
+/// original regex pattern, but it *may* produce a match for a position
+/// in the haystack that does *not* correspond to a match of the original
+/// regex pattern. If you use either the [`Prefilter::from_hir_prefix`] or
+/// [`Prefilter::from_hirs_prefix`] constructors, then this guarantee is
+/// upheld for you automatically. This guarantee is not preserved if you use
+/// [`Prefilter::new`] though, since it is up to the caller to provide correct
+/// literal strings with respect to the original regex pattern.
+///
+/// # Cloning
+///
+/// It is an API guarantee that cloning a prefilter is cheap. That is, cloning
+/// it will not duplicate whatever heap memory is used to represent the
+/// underlying matcher.
+///
+/// # Example
+///
+/// This example shows how to attach a `Prefilter` to the
+/// [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM) in order to accelerate
+/// searches.
+///
+/// ```
+/// use regex_automata::{
+///     nfa::thompson::pikevm::PikeVM,
+///     util::prefilter::Prefilter,
+///     Match, MatchKind,
+/// };
+///
+/// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["Bruce "])
+///     .expect("a prefilter");
+/// let re = PikeVM::builder()
+///     .configure(PikeVM::config().prefilter(Some(pre)))
+///     .build(r"Bruce \w+")?;
+/// let mut cache = re.create_cache();
+/// assert_eq!(
+///     Some(Match::must(0, 6..23)),
+///     re.find(&mut cache, "Hello Bruce Springsteen!"),
+/// );
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+///
+/// But note that if you get your prefilter incorrect, it could lead to an
+/// incorrect result!
+///
+/// ```
+/// use regex_automata::{
+///     nfa::thompson::pikevm::PikeVM,
+///     util::prefilter::Prefilter,
+///     Match, MatchKind,
+/// };
+///
+/// // This prefilter is wrong!
+/// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["Patti "])
+///     .expect("a prefilter");
+/// let re = PikeVM::builder()
+///     .configure(PikeVM::config().prefilter(Some(pre)))
+///     .build(r"Bruce \w+")?;
+/// let mut cache = re.create_cache();
+/// // We find no match even though the regex does match.
+/// assert_eq!(
+///     None,
+///     re.find(&mut cache, "Hello Bruce Springsteen!"),
+/// );
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+#[derive(Clone, Debug)]
+pub struct Prefilter {
+    #[cfg(not(feature = "alloc"))]
+    _unused: (),
+    #[cfg(feature = "alloc")]
+    pre: Arc<dyn PrefilterI>,
+    #[cfg(feature = "alloc")]
+    is_fast: bool,
+}
+
+impl Prefilter {
+    /// Create a new prefilter from a sequence of needles and a corresponding
+    /// match semantics.
+    ///
+    /// This may return `None` for a variety of reasons, for example, if
+    /// a suitable prefilter could not be constructed. That might occur
+    /// if they are unavailable (e.g., the `perf-literal-substring` and
+    /// `perf-literal-multisubstring` features aren't enabled), or it might
+    /// occur because of heuristics or other artifacts of how the prefilter
+    /// works.
+    ///
+    /// Note that if you have an [`Hir`] expression, it may be more convenient
+    /// to use [`Prefilter::from_hir_prefix`]. It will automatically handle the
+    /// task of extracting prefix literals for you.
+    ///
+    /// # Example
+    ///
+    /// This example shows how match semantics can impact the matching
+    /// algorithm used by the prefilter. For this reason, it is important to
+    /// ensure that the match semantics given here are consistent with the
+    /// match semantics intended for the regular expression that the literals
+    /// were extracted from.
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     util::{prefilter::Prefilter, syntax},
+    ///     MatchKind, Span,
+    /// };
+    ///
+    /// let hay = "Hello samwise";
+    ///
+    /// // With leftmost-first, we find 'samwise' here because it comes
+    /// // before 'sam' in the sequence we give it..
+    /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["samwise", "sam"])
+    ///     .expect("a prefilter");
+    /// assert_eq!(
+    ///     Some(Span::from(6..13)),
+    ///     pre.find(hay.as_bytes(), Span::from(0..hay.len())),
+    /// );
+    /// // Still with leftmost-first but with the literals reverse, now 'sam'
+    /// // will match instead!
+    /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["sam", "samwise"])
+    ///     .expect("a prefilter");
+    /// assert_eq!(
+    ///     Some(Span::from(6..9)),
+    ///     pre.find(hay.as_bytes(), Span::from(0..hay.len())),
+    /// );
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn new<B: AsRef<[u8]>>(
+        kind: MatchKind,
+        needles: &[B],
+    ) -> Option<Prefilter> {
+        Choice::new(kind, needles).and_then(Prefilter::from_choice)
+    }
+
+    /// This turns a prefilter selection into a `Prefilter`. That is, in turns
+    /// the enum given into a trait object.
+    fn from_choice(choice: Choice) -> Option<Prefilter> {
+        #[cfg(not(feature = "alloc"))]
+        {
+            None
+        }
+        #[cfg(feature = "alloc")]
+        {
+            let pre: Arc<dyn PrefilterI> = match choice {
+                Choice::Memchr(p) => Arc::new(p),
+                Choice::Memchr2(p) => Arc::new(p),
+                Choice::Memchr3(p) => Arc::new(p),
+                Choice::Memmem(p) => Arc::new(p),
+                Choice::Teddy(p) => Arc::new(p),
+                Choice::ByteSet(p) => Arc::new(p),
+                Choice::AhoCorasick(p) => Arc::new(p),
+            };
+            let is_fast = pre.is_fast();
+            Some(Prefilter { pre, is_fast })
+        }
+    }
+
+    /// This attempts to extract prefixes from the given `Hir` expression for
+    /// the given match semantics, and if possible, builds a prefilter for
+    /// them.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to build a prefilter directly from an [`Hir`]
+    /// expression, and use to find an occurrence of a prefix from the regex
+    /// pattern.
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     util::{prefilter::Prefilter, syntax},
+    ///     MatchKind, Span,
+    /// };
+    ///
+    /// let hir = syntax::parse(r"(Bruce|Patti) \w+")?;
+    /// let pre = Prefilter::from_hir_prefix(MatchKind::LeftmostFirst, &hir)
+    ///     .expect("a prefilter");
+    /// let hay = "Hello Patti Scialfa!";
+    /// assert_eq!(
+    ///     Some(Span::from(6..12)),
+    ///     pre.find(hay.as_bytes(), Span::from(0..hay.len())),
+    /// );
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[cfg(feature = "syntax")]
+    pub fn from_hir_prefix(kind: MatchKind, hir: &Hir) -> Option<Prefilter> {
+        Prefilter::from_hirs_prefix(kind, &[hir])
+    }
+
+    /// This attempts to extract prefixes from the given `Hir` expressions for
+    /// the given match semantics, and if possible, builds a prefilter for
+    /// them.
+    ///
+    /// Note that as of now, prefilters throw away information about which
+    /// pattern each literal comes from. In other words, when a prefilter finds
+    /// a match, there's no way to know which pattern (or patterns) it came
+    /// from. Therefore, in order to confirm a match, you'll have to check all
+    /// of the patterns by running the full regex engine.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to build a prefilter directly from multiple
+    /// `Hir` expressions expression, and use it to find an occurrence of a
+    /// prefix from the regex patterns.
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     util::{prefilter::Prefilter, syntax},
+    ///     MatchKind, Span,
+    /// };
+    ///
+    /// let hirs = syntax::parse_many(&[
+    ///     r"(Bruce|Patti) \w+",
+    ///     r"Mrs?\. Doubtfire",
+    /// ])?;
+    /// let pre = Prefilter::from_hirs_prefix(MatchKind::LeftmostFirst, &hirs)
+    ///     .expect("a prefilter");
+    /// let hay = "Hello Mrs. Doubtfire";
+    /// assert_eq!(
+    ///     Some(Span::from(6..20)),
+    ///     pre.find(hay.as_bytes(), Span::from(0..hay.len())),
+    /// );
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[cfg(feature = "syntax")]
+    pub fn from_hirs_prefix<H: Borrow<Hir>>(
+        kind: MatchKind,
+        hirs: &[H],
+    ) -> Option<Prefilter> {
+        prefixes(kind, hirs)
+            .literals()
+            .and_then(|lits| Prefilter::new(kind, lits))
+    }
+
+    /// Run this prefilter on `haystack[span.start..end]` and return a matching
+    /// span if one exists.
+    ///
+    /// The span returned is guaranteed to have a start position greater than
+    /// or equal to the one given, and an end position less than or equal to
+    /// the one given.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to build a prefilter directly from an [`Hir`]
+    /// expression, and use it to find an occurrence of a prefix from the regex
+    /// pattern.
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     util::{prefilter::Prefilter, syntax},
+    ///     MatchKind, Span,
+    /// };
+    ///
+    /// let hir = syntax::parse(r"Bruce \w+")?;
+    /// let pre = Prefilter::from_hir_prefix(MatchKind::LeftmostFirst, &hir)
+    ///     .expect("a prefilter");
+    /// let hay = "Hello Bruce Springsteen!";
+    /// assert_eq!(
+    ///     Some(Span::from(6..12)),
+    ///     pre.find(hay.as_bytes(), Span::from(0..hay.len())),
+    /// );
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn find(&self, haystack: &[u8], span: Span) -> Option<Span> {
+        #[cfg(not(feature = "alloc"))]
+        {
+            unreachable!()
+        }
+        #[cfg(feature = "alloc")]
+        {
+            self.pre.find(haystack, span)
+        }
+    }
+
+    /// Returns the span of a prefix of `haystack[span.start..span.end]` if
+    /// the prefilter matches.
+    ///
+    /// The span returned is guaranteed to have a start position equivalent to
+    /// the one given, and an end position less than or equal to the one given.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to build a prefilter directly from an [`Hir`]
+    /// expression, and use it to find an occurrence of a prefix from the regex
+    /// pattern that begins at the start of a haystack only.
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     util::{prefilter::Prefilter, syntax},
+    ///     MatchKind, Span,
+    /// };
+    ///
+    /// let hir = syntax::parse(r"Bruce \w+")?;
+    /// let pre = Prefilter::from_hir_prefix(MatchKind::LeftmostFirst, &hir)
+    ///     .expect("a prefilter");
+    /// let hay = "Hello Bruce Springsteen!";
+    /// // Nothing is found here because 'Bruce' does
+    /// // not occur at the beginning of our search.
+    /// assert_eq!(
+    ///     None,
+    ///     pre.prefix(hay.as_bytes(), Span::from(0..hay.len())),
+    /// );
+    /// // But if we change where we start the search
+    /// // to begin where 'Bruce ' begins, then a
+    /// // match will be found.
+    /// assert_eq!(
+    ///     Some(Span::from(6..12)),
+    ///     pre.prefix(hay.as_bytes(), Span::from(6..hay.len())),
+    /// );
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn prefix(&self, haystack: &[u8], span: Span) -> Option<Span> {
+        #[cfg(not(feature = "alloc"))]
+        {
+            unreachable!()
+        }
+        #[cfg(feature = "alloc")]
+        {
+            self.pre.prefix(haystack, span)
+        }
+    }
+
+    /// Returns the heap memory, in bytes, used by the underlying prefilter.
+    #[inline]
+    pub fn memory_usage(&self) -> usize {
+        #[cfg(not(feature = "alloc"))]
+        {
+            unreachable!()
+        }
+        #[cfg(feature = "alloc")]
+        {
+            self.pre.memory_usage()
+        }
+    }
+
+    /// Implementations might return true here if they believe themselves to
+    /// be "fast." The concept of "fast" is deliberately left vague, but in
+    /// practice this usually corresponds to whether it's believed that SIMD
+    /// will be used.
+    ///
+    /// Why do we care about this? Well, some prefilter tricks tend to come
+    /// with their own bits of overhead, and so might only make sense if we
+    /// know that a scan will be *much* faster than the regex engine itself.
+    /// Otherwise, the trick may not be worth doing. Whether something is
+    /// "much" faster than the regex engine generally boils down to whether
+    /// SIMD is used. (But not always. Even a SIMD matcher with a high false
+    /// positive rate can become quite slow.)
+    ///
+    /// Even if this returns true, it is still possible for the prefilter to
+    /// be "slow." Remember, prefilters are just heuristics. We can't really
+    /// *know* a prefilter will be fast without actually trying the prefilter.
+    /// (Which of course we cannot afford to do.)
+    #[inline]
+    pub(crate) fn is_fast(&self) -> bool {
+        #[cfg(not(feature = "alloc"))]
+        {
+            unreachable!()
+        }
+        #[cfg(feature = "alloc")]
+        {
+            self.is_fast
+        }
+    }
+}
+
+/// A trait for abstracting over prefilters. Basically, a prefilter is
+/// something that do an unanchored *and* an anchored search in a haystack
+/// within a given span.
+///
+/// This exists pretty much only so that we can use prefilters as a trait
+/// object (which is what `Prefilter` is). If we ever move off of trait objects
+/// and to an enum, then it's likely this trait could be removed.
+pub(crate) trait PrefilterI:
+    Debug + Send + Sync + RefUnwindSafe + UnwindSafe + 'static
+{
+    /// Run this prefilter on `haystack[span.start..end]` and return a matching
+    /// span if one exists.
+    ///
+    /// The span returned is guaranteed to have a start position greater than
+    /// or equal to the one given, and an end position less than or equal to
+    /// the one given.
+    fn find(&self, haystack: &[u8], span: Span) -> Option<Span>;
+
+    /// Returns the span of a prefix of `haystack[span.start..span.end]` if
+    /// the prefilter matches.
+    ///
+    /// The span returned is guaranteed to have a start position equivalent to
+    /// the one given, and an end position less than or equal to the one given.
+    fn prefix(&self, haystack: &[u8], span: Span) -> Option<Span>;
+
+    /// Returns the heap memory, in bytes, used by the underlying prefilter.
+    fn memory_usage(&self) -> usize;
+
+    /// Implementations might return true here if they believe themselves to
+    /// be "fast." See [`Prefilter::is_fast`] for more details.
+    fn is_fast(&self) -> bool;
+}
+
+#[cfg(feature = "alloc")]
+impl<P: PrefilterI + ?Sized> PrefilterI for Arc<P> {
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn find(&self, haystack: &[u8], span: Span) -> Option<Span> {
+        (&**self).find(haystack, span)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn prefix(&self, haystack: &[u8], span: Span) -> Option<Span> {
+        (&**self).prefix(haystack, span)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn memory_usage(&self) -> usize {
+        (&**self).memory_usage()
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn is_fast(&self) -> bool {
+        (&**self).is_fast()
+    }
+}
+
+/// A type that encapsulates the selection of a prefilter algorithm from a
+/// sequence of needles.
+///
+/// The existence of this type is a little tricky, because we don't (currently)
+/// use it for performing a search. Instead, we really only consume it by
+/// converting the underlying prefilter into a trait object, whether that be
+/// `dyn PrefilterI` or `dyn Strategy` (for the meta regex engine). In order
+/// to avoid re-copying the prefilter selection logic, we isolate it here, and
+/// then force anything downstream that wants to convert it to a trait object
+/// to do trivial case analysis on it.
+///
+/// One wonders whether we *should* use an enum instead of a trait object.
+/// At time of writing, I chose trait objects based on instinct because 1) I
+/// knew I wasn't going to inline anything and 2) there would potentially be
+/// many different choices. However, as of time of writing, I haven't actually
+/// compared the trait object approach to the enum approach. That probably
+/// should be litigated, but I ran out of steam.
+///
+/// Note that if the `alloc` feature is disabled, then values of this type
+/// are (and should) never be constructed. Also, in practice, for any of the
+/// prefilters to be selected, you'll need at least one of the `perf-literal-*`
+/// features enabled.
+#[derive(Clone, Debug)]
+pub(crate) enum Choice {
+    Memchr(Memchr),
+    Memchr2(Memchr2),
+    Memchr3(Memchr3),
+    Memmem(Memmem),
+    Teddy(Teddy),
+    ByteSet(ByteSet),
+    AhoCorasick(AhoCorasick),
+}
+
+impl Choice {
+    /// Select what is believed to be the best prefilter algorithm for the
+    /// match semantics and sequence of needles given.
+    ///
+    /// This selection algorithm uses the needles as given without any
+    /// modification. For example, if `[bar]` is given, then this doesn't
+    /// try to select `memchr` for `b`. Instead, it would select `memmem`
+    /// for `bar`. If callers would want `memchr` selected for `[bar]`, then
+    /// callers should massages the literals themselves. That is, callers are
+    /// responsible for heuristics surrounding which sequence of literals is
+    /// best.
+    ///
+    /// What this selection algorithm does is attempt to use the fastest
+    /// prefilter that works for the literals given. So if `[a, b]`, is given,
+    /// then `memchr2` is selected.
+    ///
+    /// Of course, which prefilter is selected is also subject to what
+    /// is available. For example, if `alloc` isn't enabled, then
+    /// that limits which prefilters can be selected. Similarly, if
+    /// `perf-literal-substring` isn't enabled, then nothing from the `memchr`
+    /// crate can be returned.
+    pub(crate) fn new<B: AsRef<[u8]>>(
+        kind: MatchKind,
+        needles: &[B],
+    ) -> Option<Choice> {
+        // An empty set means the regex matches nothing, so no sense in
+        // building a prefilter.
+        if needles.len() == 0 {
+            debug!("prefilter building failed: found empty set of literals");
+            return None;
+        }
+        // If the regex can match the empty string, then the prefilter
+        // will by definition match at every position. This is obviously
+        // completely ineffective.
+        if needles.iter().any(|n| n.as_ref().is_empty()) {
+            debug!("prefilter building failed: literals match empty string");
+            return None;
+        }
+        // BREADCRUMBS: Perhaps the literal optimizer should special case
+        // sequences of length two or three if the leading bytes of each are
+        // "rare"? Or perhaps, if there are two or three total possible leading
+        // bytes, regardless of the number of literals, and all are rare...
+        // Then well, perhaps we should use memchr2 or memchr3 in those cases?
+        if let Some(pre) = Memchr::new(kind, needles) {
+            debug!("prefilter built: memchr");
+            return Some(Choice::Memchr(pre));
+        }
+        if let Some(pre) = Memchr2::new(kind, needles) {
+            debug!("prefilter built: memchr2");
+            return Some(Choice::Memchr2(pre));
+        }
+        if let Some(pre) = Memchr3::new(kind, needles) {
+            debug!("prefilter built: memchr3");
+            return Some(Choice::Memchr3(pre));
+        }
+        if let Some(pre) = Memmem::new(kind, needles) {
+            debug!("prefilter built: memmem");
+            return Some(Choice::Memmem(pre));
+        }
+        if let Some(pre) = Teddy::new(kind, needles) {
+            debug!("prefilter built: teddy");
+            return Some(Choice::Teddy(pre));
+        }
+        if let Some(pre) = ByteSet::new(kind, needles) {
+            debug!("prefilter built: byteset");
+            return Some(Choice::ByteSet(pre));
+        }
+        if let Some(pre) = AhoCorasick::new(kind, needles) {
+            debug!("prefilter built: aho-corasick");
+            return Some(Choice::AhoCorasick(pre));
+        }
+        debug!("prefilter building failed: no strategy could be found");
+        None
+    }
+}
+
+/// Extracts all of the prefix literals from the given HIR expressions into a
+/// single `Seq`. The literals in the sequence are ordered with respect to the
+/// order of the given HIR expressions and consistent with the match semantics
+/// given.
+///
+/// The sequence returned is "optimized." That is, they may be shrunk or even
+/// truncated according to heuristics with the intent of making them more
+/// useful as a prefilter. (Which translates to both using faster algorithms
+/// and minimizing the false positive rate.)
+///
+/// Note that this erases any connection between the literals and which pattern
+/// (or patterns) they came from.
+///
+/// The match kind given must correspond to the match semantics of the regex
+/// that is represented by the HIRs given. The match semantics may change the
+/// literal sequence returned.
+#[cfg(feature = "syntax")]
+pub(crate) fn prefixes<H>(kind: MatchKind, hirs: &[H]) -> literal::Seq
+where
+    H: core::borrow::Borrow<Hir>,
+{
+    let mut extractor = literal::Extractor::new();
+    extractor.kind(literal::ExtractKind::Prefix);
+
+    let mut prefixes = literal::Seq::empty();
+    for hir in hirs {
+        prefixes.union(&mut extractor.extract(hir.borrow()));
+    }
+    debug!(
+        "prefixes (len={:?}, exact={:?}) extracted before optimization: {:?}",
+        prefixes.len(),
+        prefixes.is_exact(),
+        prefixes
+    );
+    match kind {
+        MatchKind::All => {
+            prefixes.sort();
+            prefixes.dedup();
+        }
+        MatchKind::LeftmostFirst => {
+            prefixes.optimize_for_prefix_by_preference();
+        }
+    }
+    debug!(
+        "prefixes (len={:?}, exact={:?}) extracted after optimization: {:?}",
+        prefixes.len(),
+        prefixes.is_exact(),
+        prefixes
+    );
+    prefixes
+}
+
+/// Like `prefixes`, but for all suffixes of all matches for the given HIRs.
+#[cfg(feature = "syntax")]
+pub(crate) fn suffixes<H>(kind: MatchKind, hirs: &[H]) -> literal::Seq
+where
+    H: core::borrow::Borrow<Hir>,
+{
+    let mut extractor = literal::Extractor::new();
+    extractor.kind(literal::ExtractKind::Suffix);
+
+    let mut suffixes = literal::Seq::empty();
+    for hir in hirs {
+        suffixes.union(&mut extractor.extract(hir.borrow()));
+    }
+    debug!(
+        "suffixes (len={:?}, exact={:?}) extracted before optimization: {:?}",
+        suffixes.len(),
+        suffixes.is_exact(),
+        suffixes
+    );
+    match kind {
+        MatchKind::All => {
+            suffixes.sort();
+            suffixes.dedup();
+        }
+        MatchKind::LeftmostFirst => {
+            suffixes.optimize_for_suffix_by_preference();
+        }
+    }
+    debug!(
+        "suffixes (len={:?}, exact={:?}) extracted after optimization: {:?}",
+        suffixes.len(),
+        suffixes.is_exact(),
+        suffixes
+    );
+    suffixes
+}