Merging upstream version 1.74.1+dfsg1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-30 18:31:36 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-30 18:31:36 +0000
commit: e02c5b5930c2c9ba3e5423fe12e2ef0155017297 (patch)
tree: fd60ebbbb5299e16e5fca8c773ddb74f764760db /vendor/regex-automata-0.2.0/src/util/syntax.rs
parent: Adding debian version 1.73.0+dfsg1-1. (diff)
download: rustc-e02c5b5930c2c9ba3e5423fe12e2ef0155017297.tar.xz
rustc-e02c5b5930c2c9ba3e5423fe12e2ef0155017297.zip
1 files changed, 272 insertions, 0 deletions
diff --git a/vendor/regex-automata-0.2.0/src/util/syntax.rs b/vendor/regex-automata-0.2.0/src/util/syntax.rs
new file mode 100644
index 000000000..88beeee75
--- /dev/null
+++ b/vendor/regex-automata-0.2.0/src/util/syntax.rs
@@ -0,0 +1,272 @@
+use regex_syntax::ParserBuilder;
+
+/// A common set of configuration options that apply to the syntax of a regex.
+///
+/// This represents a group of configuration options that specifically apply
+/// to how the concrete syntax of a regular expression is interpreted. In
+/// particular, they are generally forwarded to the
+/// [`ParserBuilder`](https://docs.rs/regex-syntax/*/regex_syntax/struct.ParserBuilder.html)
+/// in the
+/// [`regex-syntax`](https://docs.rs/regex-syntax)
+/// crate when building a regex from its concrete syntax directly.
+///
+/// These options are defined as a group since they apply to every regex engine
+/// in this crate. Instead of re-defining them on every engine's builder, they
+/// are instead provided here as one cohesive unit.
+#[derive(Clone, Copy, Debug)]
+pub struct SyntaxConfig {
+    case_insensitive: bool,
+    multi_line: bool,
+    dot_matches_new_line: bool,
+    swap_greed: bool,
+    ignore_whitespace: bool,
+    unicode: bool,
+    utf8: bool,
+    nest_limit: u32,
+    octal: bool,
+}
+
+impl SyntaxConfig {
+    /// Return a new default syntax configuration.
+    pub fn new() -> SyntaxConfig {
+        // These defaults match the ones used in regex-syntax.
+        SyntaxConfig {
+            case_insensitive: false,
+            multi_line: false,
+            dot_matches_new_line: false,
+            swap_greed: false,
+            ignore_whitespace: false,
+            unicode: true,
+            utf8: true,
+            nest_limit: 250,
+            octal: false,
+        }
+    }
+
+    /// Enable or disable the case insensitive flag by default.
+    ///
+    /// When Unicode mode is enabled, case insensitivity is Unicode-aware.
+    /// Specifically, it will apply the "simple" case folding rules as
+    /// specified by Unicode.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `i` flag.
+    pub fn case_insensitive(mut self, yes: bool) -> SyntaxConfig {
+        self.case_insensitive = yes;
+        self
+    }
+
+    /// Enable or disable the multi-line matching flag by default.
+    ///
+    /// When this is enabled, the `^` and `$` look-around assertions will
+    /// match immediately after and immediately before a new line character,
+    /// respectively. Note that the `\A` and `\z` look-around assertions are
+    /// unaffected by this setting and always correspond to matching at the
+    /// beginning and end of the input.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `m` flag.
+    pub fn multi_line(mut self, yes: bool) -> SyntaxConfig {
+        self.multi_line = yes;
+        self
+    }
+
+    /// Enable or disable the "dot matches any character" flag by default.
+    ///
+    /// When this is enabled, `.` will match any character. When it's disabled,
+    /// then `.` will match any character except for a new line character.
+    ///
+    /// Note that `.` is impacted by whether the "unicode" setting is enabled
+    /// or not. When Unicode is enabled (the defualt), `.` will match any UTF-8
+    /// encoding of any Unicode scalar value (sans a new line, depending on
+    /// whether this "dot matches new line" option is enabled). When Unicode
+    /// mode is disabled, `.` will match any byte instead. Because of this,
+    /// when Unicode mode is disabled, `.` can only be used when the "allow
+    /// invalid UTF-8" option is enabled, since `.` could otherwise match
+    /// invalid UTF-8.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `s` flag.
+    pub fn dot_matches_new_line(mut self, yes: bool) -> SyntaxConfig {
+        self.dot_matches_new_line = yes;
+        self
+    }
+
+    /// Enable or disable the "swap greed" flag by default.
+    ///
+    /// When this is enabled, `.*` (for example) will become ungreedy and `.*?`
+    /// will become greedy.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `U` flag.
+    pub fn swap_greed(mut self, yes: bool) -> SyntaxConfig {
+        self.swap_greed = yes;
+        self
+    }
+
+    /// Enable verbose mode in the regular expression.
+    ///
+    /// When enabled, verbose mode permits insigificant whitespace in many
+    /// places in the regular expression, as well as comments. Comments are
+    /// started using `#` and continue until the end of the line.
+    ///
+    /// By default, this is disabled. It may be selectively enabled in the
+    /// regular expression by using the `x` flag regardless of this setting.
+    pub fn ignore_whitespace(mut self, yes: bool) -> SyntaxConfig {
+        self.ignore_whitespace = yes;
+        self
+    }
+
+    /// Enable or disable the Unicode flag (`u`) by default.
+    ///
+    /// By default this is **enabled**. It may alternatively be selectively
+    /// disabled in the regular expression itself via the `u` flag.
+    ///
+    /// Note that unless "allow invalid UTF-8" is enabled (it's disabled by
+    /// default), a regular expression will fail to parse if Unicode mode is
+    /// disabled and a sub-expression could possibly match invalid UTF-8.
+    ///
+    /// **WARNING**: Unicode mode can greatly increase the size of the compiled
+    /// DFA, which can noticeably impact both memory usage and compilation
+    /// time. This is especially noticeable if your regex contains character
+    /// classes like `\w` that are impacted by whether Unicode is enabled or
+    /// not. If Unicode is not necessary, you are encouraged to disable it.
+    pub fn unicode(mut self, yes: bool) -> SyntaxConfig {
+        self.unicode = yes;
+        self
+    }
+
+    /// When disabled, the builder will permit the construction of a regular
+    /// expression that may match invalid UTF-8.
+    ///
+    /// For example, when [`SyntaxConfig::unicode`] is disabled, then
+    /// expressions like `[^a]` may match invalid UTF-8 since they can match
+    /// any single byte that is not `a`. By default, these sub-expressions
+    /// are disallowed to avoid returning offsets that split a UTF-8
+    /// encoded codepoint. However, in cases where matching at arbitrary
+    /// locations is desired, this option can be disabled to permit all such
+    /// sub-expressions.
+    ///
+    /// When enabled (the default), the builder is guaranteed to produce a
+    /// regex that will only ever match valid UTF-8 (otherwise, the builder
+    /// will return an error).
+    pub fn utf8(mut self, yes: bool) -> SyntaxConfig {
+        self.utf8 = yes;
+        self
+    }
+
+    /// Set the nesting limit used for the regular expression parser.
+    ///
+    /// The nesting limit controls how deep the abstract syntax tree is allowed
+    /// to be. If the AST exceeds the given limit (e.g., with too many nested
+    /// groups), then an error is returned by the parser.
+    ///
+    /// The purpose of this limit is to act as a heuristic to prevent stack
+    /// overflow when building a finite automaton from a regular expression's
+    /// abstract syntax tree. In particular, construction currently uses
+    /// recursion. In the future, the implementation may stop using recursion
+    /// and this option will no longer be necessary.
+    ///
+    /// This limit is not checked until the entire AST is parsed. Therefore,
+    /// if callers want to put a limit on the amount of heap space used, then
+    /// they should impose a limit on the length, in bytes, of the concrete
+    /// pattern string. In particular, this is viable since the parser will
+    /// limit itself to heap space proportional to the lenth of the pattern
+    /// string.
+    ///
+    /// Note that a nest limit of `0` will return a nest limit error for most
+    /// patterns but not all. For example, a nest limit of `0` permits `a` but
+    /// not `ab`, since `ab` requires a concatenation AST item, which results
+    /// in a nest depth of `1`. In general, a nest limit is not something that
+    /// manifests in an obvious way in the concrete syntax, therefore, it
+    /// should not be used in a granular way.
+    pub fn nest_limit(mut self, limit: u32) -> SyntaxConfig {
+        self.nest_limit = limit;
+        self
+    }
+
+    /// Whether to support octal syntax or not.
+    ///
+    /// Octal syntax is a little-known way of uttering Unicode codepoints in
+    /// a regular expression. For example, `a`, `\x61`, `\u0061` and
+    /// `\141` are all equivalent regular expressions, where the last example
+    /// shows octal syntax.
+    ///
+    /// While supporting octal syntax isn't in and of itself a problem, it does
+    /// make good error messages harder. That is, in PCRE based regex engines,
+    /// syntax like `\1` invokes a backreference, which is explicitly
+    /// unsupported in Rust's regex engine. However, many users expect it to
+    /// be supported. Therefore, when octal support is disabled, the error
+    /// message will explicitly mention that backreferences aren't supported.
+    ///
+    /// Octal syntax is disabled by default.
+    pub fn octal(mut self, yes: bool) -> SyntaxConfig {
+        self.octal = yes;
+        self
+    }
+
+    /// Returns whether "unicode" mode is enabled.
+    pub fn get_unicode(&self) -> bool {
+        self.unicode
+    }
+
+    /// Returns whether "case insensitive" mode is enabled.
+    pub fn get_case_insensitive(&self) -> bool {
+        self.case_insensitive
+    }
+
+    /// Returns whether "multi line" mode is enabled.
+    pub fn get_multi_line(&self) -> bool {
+        self.multi_line
+    }
+
+    /// Returns whether "dot matches new line" mode is enabled.
+    pub fn get_dot_matches_new_line(&self) -> bool {
+        self.dot_matches_new_line
+    }
+
+    /// Returns whether "swap greed" mode is enabled.
+    pub fn get_swap_greed(&self) -> bool {
+        self.swap_greed
+    }
+
+    /// Returns whether "ignore whitespace" mode is enabled.
+    pub fn get_ignore_whitespace(&self) -> bool {
+        self.ignore_whitespace
+    }
+
+    /// Returns whether UTF-8 mode is enabled.
+    pub fn get_utf8(&self) -> bool {
+        self.utf8
+    }
+
+    /// Returns the "nest limit" setting.
+    pub fn get_nest_limit(&self) -> u32 {
+        self.nest_limit
+    }
+
+    /// Returns whether "octal" mode is enabled.
+    pub fn get_octal(&self) -> bool {
+        self.octal
+    }
+
+    /// Applies this configuration to the given parser.
+    pub(crate) fn apply(&self, builder: &mut ParserBuilder) {
+        builder
+            .unicode(self.unicode)
+            .case_insensitive(self.case_insensitive)
+            .multi_line(self.multi_line)
+            .dot_matches_new_line(self.dot_matches_new_line)
+            .swap_greed(self.swap_greed)
+            .ignore_whitespace(self.ignore_whitespace)
+            .allow_invalid_utf8(!self.utf8)
+            .nest_limit(self.nest_limit)
+            .octal(self.octal);
+    }
+}
+
+impl Default for SyntaxConfig {
+    fn default() -> SyntaxConfig {
+        SyntaxConfig::new()
+    }
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-30 18:31:36 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-30 18:31:36 +0000
commit	e02c5b5930c2c9ba3e5423fe12e2ef0155017297 (patch)
tree	fd60ebbbb5299e16e5fca8c773ddb74f764760db /vendor/regex-automata-0.2.0/src/util/syntax.rs
parent	Adding debian version 1.73.0+dfsg1-1. (diff)
download	rustc-e02c5b5930c2c9ba3e5423fe12e2ef0155017297.tar.xz rustc-e02c5b5930c2c9ba3e5423fe12e2ef0155017297.zip