Merging upstream version 1.74.1+dfsg1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-30 18:31:44 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-30 18:31:44 +0000
commit: c23a457e72abe608715ac76f076f47dc42af07a5 (patch)
tree: 2772049aaf84b5c9d0ed12ec8d86812f7a7904b6 /vendor/regex-automata/src/dfa/mod.rs
parent: Releasing progress-linux version 1.73.0+dfsg1-1~progress7.99u1. (diff)
download: rustc-c23a457e72abe608715ac76f076f47dc42af07a5.tar.xz
rustc-c23a457e72abe608715ac76f076f47dc42af07a5.zip
1 files changed, 66 insertions, 69 deletions
diff --git a/vendor/regex-automata/src/dfa/mod.rs b/vendor/regex-automata/src/dfa/mod.rs
index 6f9fe605e..4bb870435 100644
--- a/vendor/regex-automata/src/dfa/mod.rs
+++ b/vendor/regex-automata/src/dfa/mod.rs
@@ -1,5 +1,5 @@
 /*!
-A module for building and searching with determinstic finite automata (DFAs).
+A module for building and searching with deterministic finite automata (DFAs).
 
 Like other modules in this crate, DFAs support a rich regex syntax with Unicode
 features. DFAs also have extensive options for configuring the best space vs
@@ -26,20 +26,25 @@ DFAs implement. (A `regex::Regex` is generic over this trait.)
 [`dense::DFA::to_bytes_little_endian`]) and cheap deserialization (e.g.,
 [`dense::DFA::from_bytes`]).
 
+There is also a [`onepass`] module that provides a [one-pass
+DFA](onepass::DFA). The unique advantage of this DFA is that, for the class
+of regexes it can be built with, it supports reporting the spans of matching
+capturing groups. It is the only DFA in this crate capable of such a thing.
+
 # Example: basic regex searching
 
 This example shows how to compile a regex using the default configuration
 and then use it to find matches in a byte string:
 
 ```
-use regex_automata::{MultiMatch, dfa::regex::Regex};
+use regex_automata::{Match, dfa::regex::Regex};
 
 let re = Regex::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}")?;
 let text = b"2018-12-24 2016-10-08";
-let matches: Vec<MultiMatch> = re.find_leftmost_iter(text).collect();
+let matches: Vec<Match> = re.find_iter(text).collect();
 assert_eq!(matches, vec![
-    MultiMatch::must(0, 0, 10),
-    MultiMatch::must(0, 11, 21),
+    Match::must(0, 0..10),
+    Match::must(0, 11..21),
 ]);
 # Ok::<(), Box<dyn std::error::Error>>(())
 ```
@@ -51,36 +56,15 @@ simultaneously. You can use this support with standard leftmost-first style
 searching to find non-overlapping matches:
 
 ```
-use regex_automata::{MultiMatch, dfa::regex::Regex};
+# if cfg!(miri) { return Ok(()); } // miri takes too long
+use regex_automata::{Match, dfa::regex::Regex};
 
 let re = Regex::new_many(&[r"\w+", r"\S+"])?;
 let text = b"@foo bar";
-let matches: Vec<MultiMatch> = re.find_leftmost_iter(text).collect();
+let matches: Vec<Match> = re.find_iter(text).collect();
 assert_eq!(matches, vec![
-    MultiMatch::must(1, 0, 4),
-    MultiMatch::must(0, 5, 8),
-]);
-# Ok::<(), Box<dyn std::error::Error>>(())
-```
-
-Or use overlapping style searches to find all possible occurrences:
-
-```
-use regex_automata::{MatchKind, MultiMatch, dfa::{dense, regex::Regex}};
-
-// N.B. For overlapping searches, we need the underlying DFA to report all
-// possible matches.
-let re = Regex::builder()
-    .dense(dense::Config::new().match_kind(MatchKind::All))
-    .build_many(&[r"\w{3}", r"\S{3}"])?;
-let text = b"@foo bar";
-let matches: Vec<MultiMatch> = re.find_overlapping_iter(text).collect();
-assert_eq!(matches, vec![
-    MultiMatch::must(1, 0, 3),
-    MultiMatch::must(0, 1, 4),
-    MultiMatch::must(1, 1, 4),
-    MultiMatch::must(0, 5, 8),
-    MultiMatch::must(1, 5, 8),
+    Match::must(1, 0..4),
+    Match::must(0, 5..8),
 ]);
 # Ok::<(), Box<dyn std::error::Error>>(())
 ```
@@ -96,14 +80,14 @@ Using sparse DFAs is as easy as using `Regex::new_sparse` instead of
 `Regex::new`:
 
 ```
-use regex_automata::{MultiMatch, dfa::regex::Regex};
+use regex_automata::{Match, dfa::regex::Regex};
 
 let re = Regex::new_sparse(r"[0-9]{4}-[0-9]{2}-[0-9]{2}").unwrap();
 let text = b"2018-12-24 2016-10-08";
-let matches: Vec<MultiMatch> = re.find_leftmost_iter(text).collect();
+let matches: Vec<Match> = re.find_iter(text).collect();
 assert_eq!(matches, vec![
-    MultiMatch::must(0, 0, 10),
-    MultiMatch::must(0, 11, 21),
+    Match::must(0, 0..10),
+    Match::must(0, 11..21),
 ]);
 # Ok::<(), Box<dyn std::error::Error>>(())
 ```
@@ -112,7 +96,7 @@ If you already have dense DFAs for some reason, they can be converted to sparse
 DFAs and used to build a new `Regex`. For example:
 
 ```
-use regex_automata::{MultiMatch, dfa::regex::Regex};
+use regex_automata::{Match, dfa::regex::Regex};
 
 let dense_re = Regex::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}").unwrap();
 let sparse_re = Regex::builder().build_from_dfas(
@@ -120,10 +104,10 @@ let sparse_re = Regex::builder().build_from_dfas(
     dense_re.reverse().to_sparse()?,
 );
 let text = b"2018-12-24 2016-10-08";
-let matches: Vec<MultiMatch> = sparse_re.find_leftmost_iter(text).collect();
+let matches: Vec<Match> = sparse_re.find_iter(text).collect();
 assert_eq!(matches, vec![
-    MultiMatch::must(0, 0, 10),
-    MultiMatch::must(0, 11, 21),
+    Match::must(0, 0..10),
+    Match::must(0, 11..21),
 ]);
 # Ok::<(), Box<dyn std::error::Error>>(())
 ```
@@ -136,7 +120,7 @@ bit contrived, this same technique can be used in your program to
 deserialize a DFA at start up time or by memory mapping a file.
 
 ```
-use regex_automata::{MultiMatch, dfa::{dense, regex::Regex}};
+use regex_automata::{Match, dfa::{dense, regex::Regex}};
 
 let re1 = Regex::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}").unwrap();
 // serialize both the forward and reverse DFAs, see note below
@@ -150,10 +134,10 @@ let re2 = Regex::builder().build_from_dfas(fwd, rev);
 
 // we can use it like normal
 let text = b"2018-12-24 2016-10-08";
-let matches: Vec<MultiMatch> = re2.find_leftmost_iter(text).collect();
+let matches: Vec<Match> = re2.find_iter(text).collect();
 assert_eq!(matches, vec![
-    MultiMatch::must(0, 0, 10),
-    MultiMatch::must(0, 11, 21),
+    Match::must(0, 0..10),
+    Match::must(0, 11..21),
 ]);
 # Ok::<(), Box<dyn std::error::Error>>(())
 ```
@@ -183,7 +167,7 @@ valid DFA.
 The same process can be achieved with sparse DFAs as well:
 
 ```
-use regex_automata::{MultiMatch, dfa::{sparse, regex::Regex}};
+use regex_automata::{Match, dfa::{sparse, regex::Regex}};
 
 let re1 = Regex::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}").unwrap();
 // serialize both
@@ -197,17 +181,17 @@ let re2 = Regex::builder().build_from_dfas(fwd, rev);
 
 // we can use it like normal
 let text = b"2018-12-24 2016-10-08";
-let matches: Vec<MultiMatch> = re2.find_leftmost_iter(text).collect();
+let matches: Vec<Match> = re2.find_iter(text).collect();
 assert_eq!(matches, vec![
-    MultiMatch::must(0, 0, 10),
-    MultiMatch::must(0, 11, 21),
+    Match::must(0, 0..10),
+    Match::must(0, 11..21),
 ]);
 # Ok::<(), Box<dyn std::error::Error>>(())
 ```
 
 Note that unlike dense DFAs, sparse DFAs have no alignment requirements.
 Conversely, dense DFAs must be be aligned to the same alignment as a
-[`StateID`](crate::util::id::StateID).
+[`StateID`](crate::util::primitives::StateID).
 
 # Support for `no_std` and `alloc`-only
 
@@ -232,8 +216,8 @@ you would any regex.
 Deserialization can happen anywhere. For example, with bytes embedded into a
 binary or with a file memory mapped at runtime.
 
-TODO: Include link to `regex-cli` here pointing out how to generate Rust code
-for deserializing DFAs.
+The `regex-cli` command (found in the same repository as this crate) can be
+used to serialize DFAs to files and generate Rust code to read them.
 
 # Syntax
 
@@ -283,7 +267,7 @@ the regexes in this module are almost universally slow to compile, especially
 when they contain large Unicode character classes. For example, on my system,
 compiling `\w{50}` takes about 1 second and almost 15MB of memory! (Compiling
 a sparse regex takes about the same time but only uses about 1.2MB of
-memory.) Conversly, compiling the same regex without Unicode support, e.g.,
+memory.) Conversely, compiling the same regex without Unicode support, e.g.,
 `(?-u)\w{50}`, takes under 1 millisecond and about 15KB of memory. For this
 reason, you should only use Unicode character classes if you absolutely need
 them! (They are enabled by default though.)
@@ -299,10 +283,10 @@ optimizations means that searches may run much slower than what you're
 accustomed to, although, it does provide more predictable and consistent
 performance.
 * There is no `&str` API like in the regex crate. In this module, all APIs
-operate on `&[u8]`. By default, match indices are guaranteed to fall on UTF-8
-boundaries, unless any of [`SyntaxConfig::utf8`](crate::SyntaxConfig::utf8),
-[`nfa::thompson::Config::utf8`](crate::nfa::thompson::Config::utf8) or
-[`regex::Config::utf8`] are disabled.
+operate on `&[u8]`. By default, match indices are
+guaranteed to fall on UTF-8 boundaries, unless either of
+[`syntax::Config::utf8`](crate::util::syntax::Config::utf8) or
+[`thompson::Config::utf8`](crate::nfa::thompson::Config::utf8) are disabled.
 
 With some of the downsides out of the way, here are some positive differences:
 
@@ -334,9 +318,11 @@ via [`dense::Config::minimize`], but it can increase compilation times
 dramatically.
 */
 
-pub use crate::dfa::automaton::{Automaton, OverlappingState};
-#[cfg(feature = "alloc")]
-pub use crate::dfa::error::Error;
+#[cfg(feature = "dfa-search")]
+pub use crate::dfa::{
+    automaton::{Automaton, OverlappingState},
+    start::StartKind,
+};
 
 /// This is an alias for a state ID of zero. It has special significance
 /// because it always corresponds to the first state in a DFA, and the first
@@ -344,20 +330,31 @@ pub use crate::dfa::error::Error;
 /// of its transitions set to itself. Moreover, the dead state is used as a
 /// sentinel for various things. e.g., In search, reaching a dead state means
 /// that the search must stop.
-const DEAD: crate::util::id::StateID = crate::util::id::StateID::ZERO;
+const DEAD: crate::util::primitives::StateID =
+    crate::util::primitives::StateID::ZERO;
 
-mod accel;
-mod automaton;
+#[cfg(feature = "dfa-search")]
 pub mod dense;
-#[cfg(feature = "alloc")]
+#[cfg(feature = "dfa-onepass")]
+pub mod onepass;
+#[cfg(feature = "dfa-search")]
+pub mod regex;
+#[cfg(feature = "dfa-search")]
+pub mod sparse;
+
+#[cfg(feature = "dfa-search")]
+pub(crate) mod accel;
+#[cfg(feature = "dfa-search")]
+mod automaton;
+#[cfg(feature = "dfa-build")]
 mod determinize;
-#[cfg(feature = "alloc")]
-pub(crate) mod error;
-#[cfg(feature = "alloc")]
+#[cfg(feature = "dfa-build")]
 mod minimize;
-pub mod regex;
+#[cfg(any(feature = "dfa-build", feature = "dfa-onepass"))]
+mod remapper;
+#[cfg(feature = "dfa-search")]
 mod search;
-pub mod sparse;
+#[cfg(feature = "dfa-search")]
 mod special;
-#[cfg(feature = "transducer")]
-mod transducer;
+#[cfg(feature = "dfa-search")]
+mod start;
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-30 18:31:44 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-30 18:31:44 +0000
commit	c23a457e72abe608715ac76f076f47dc42af07a5 (patch)
tree	2772049aaf84b5c9d0ed12ec8d86812f7a7904b6 /vendor/regex-automata/src/dfa/mod.rs
parent	Releasing progress-linux version 1.73.0+dfsg1-1~progress7.99u1. (diff)
download	rustc-c23a457e72abe608715ac76f076f47dc42af07a5.tar.xz rustc-c23a457e72abe608715ac76f076f47dc42af07a5.zip