summaryrefslogtreecommitdiffstats
path: root/vendor/regex/src/re_builder.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/regex/src/re_builder.rs')
-rw-r--r--vendor/regex/src/re_builder.rs421
1 files changed, 0 insertions, 421 deletions
diff --git a/vendor/regex/src/re_builder.rs b/vendor/regex/src/re_builder.rs
deleted file mode 100644
index ee6383690..000000000
--- a/vendor/regex/src/re_builder.rs
+++ /dev/null
@@ -1,421 +0,0 @@
-/// The set of user configurable options for compiling zero or more regexes.
-#[derive(Clone, Debug)]
-#[allow(missing_docs)]
-pub struct RegexOptions {
- pub pats: Vec<String>,
- pub size_limit: usize,
- pub dfa_size_limit: usize,
- pub nest_limit: u32,
- pub case_insensitive: bool,
- pub multi_line: bool,
- pub dot_matches_new_line: bool,
- pub swap_greed: bool,
- pub ignore_whitespace: bool,
- pub unicode: bool,
- pub octal: bool,
-}
-
-impl Default for RegexOptions {
- fn default() -> Self {
- RegexOptions {
- pats: vec![],
- size_limit: 10 * (1 << 20),
- dfa_size_limit: 2 * (1 << 20),
- nest_limit: 250,
- case_insensitive: false,
- multi_line: false,
- dot_matches_new_line: false,
- swap_greed: false,
- ignore_whitespace: false,
- unicode: true,
- octal: false,
- }
- }
-}
-
-macro_rules! define_builder {
- ($name:ident, $regex_mod:ident, $only_utf8:expr) => {
- pub mod $name {
- use super::RegexOptions;
- use crate::error::Error;
- use crate::exec::ExecBuilder;
-
- use crate::$regex_mod::Regex;
-
- /// A configurable builder for a regular expression.
- ///
- /// A builder can be used to configure how the regex is built, for example, by
- /// setting the default flags (which can be overridden in the expression
- /// itself) or setting various limits.
- #[derive(Debug)]
- pub struct RegexBuilder(RegexOptions);
-
- impl RegexBuilder {
- /// Create a new regular expression builder with the given pattern.
- ///
- /// If the pattern is invalid, then an error will be returned when
- /// `build` is called.
- pub fn new(pattern: &str) -> RegexBuilder {
- let mut builder = RegexBuilder(RegexOptions::default());
- builder.0.pats.push(pattern.to_owned());
- builder
- }
-
- /// Consume the builder and compile the regular expression.
- ///
- /// Note that calling `as_str` on the resulting `Regex` will produce the
- /// pattern given to `new` verbatim. Notably, it will not incorporate any
- /// of the flags set on this builder.
- pub fn build(&self) -> Result<Regex, Error> {
- ExecBuilder::new_options(self.0.clone())
- .only_utf8($only_utf8)
- .build()
- .map(Regex::from)
- }
-
- /// Set the value for the case insensitive (`i`) flag.
- ///
- /// When enabled, letters in the pattern will match both upper case and
- /// lower case variants.
- pub fn case_insensitive(
- &mut self,
- yes: bool,
- ) -> &mut RegexBuilder {
- self.0.case_insensitive = yes;
- self
- }
-
- /// Set the value for the multi-line matching (`m`) flag.
- ///
- /// When enabled, `^` matches the beginning of lines and `$` matches the
- /// end of lines.
- ///
- /// By default, they match beginning/end of the input.
- pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder {
- self.0.multi_line = yes;
- self
- }
-
- /// Set the value for the any character (`s`) flag, where in `.` matches
- /// anything when `s` is set and matches anything except for new line when
- /// it is not set (the default).
- ///
- /// N.B. "matches anything" means "any byte" when Unicode is disabled and
- /// means "any valid UTF-8 encoding of any Unicode scalar value" when
- /// Unicode is enabled.
- pub fn dot_matches_new_line(
- &mut self,
- yes: bool,
- ) -> &mut RegexBuilder {
- self.0.dot_matches_new_line = yes;
- self
- }
-
- /// Set the value for the greedy swap (`U`) flag.
- ///
- /// When enabled, a pattern like `a*` is lazy (tries to find shortest
- /// match) and `a*?` is greedy (tries to find longest match).
- ///
- /// By default, `a*` is greedy and `a*?` is lazy.
- pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder {
- self.0.swap_greed = yes;
- self
- }
-
- /// Set the value for the ignore whitespace (`x`) flag.
- ///
- /// When enabled, whitespace such as new lines and spaces will be ignored
- /// between expressions of the pattern, and `#` can be used to start a
- /// comment until the next new line.
- pub fn ignore_whitespace(
- &mut self,
- yes: bool,
- ) -> &mut RegexBuilder {
- self.0.ignore_whitespace = yes;
- self
- }
-
- /// Set the value for the Unicode (`u`) flag.
- ///
- /// Enabled by default. When disabled, character classes such as `\w` only
- /// match ASCII word characters instead of all Unicode word characters.
- pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder {
- self.0.unicode = yes;
- self
- }
-
- /// Whether to support octal syntax or not.
- ///
- /// Octal syntax is a little-known way of uttering Unicode codepoints in
- /// a regular expression. For example, `a`, `\x61`, `\u0061` and
- /// `\141` are all equivalent regular expressions, where the last example
- /// shows octal syntax.
- ///
- /// While supporting octal syntax isn't in and of itself a problem, it does
- /// make good error messages harder. That is, in PCRE based regex engines,
- /// syntax like `\0` invokes a backreference, which is explicitly
- /// unsupported in Rust's regex engine. However, many users expect it to
- /// be supported. Therefore, when octal support is disabled, the error
- /// message will explicitly mention that backreferences aren't supported.
- ///
- /// Octal syntax is disabled by default.
- pub fn octal(&mut self, yes: bool) -> &mut RegexBuilder {
- self.0.octal = yes;
- self
- }
-
- /// Set the approximate size limit of the compiled regular expression.
- ///
- /// This roughly corresponds to the number of bytes occupied by a single
- /// compiled program. If the program exceeds this number, then a
- /// compilation error is returned.
- pub fn size_limit(
- &mut self,
- limit: usize,
- ) -> &mut RegexBuilder {
- self.0.size_limit = limit;
- self
- }
-
- /// Set the approximate size of the cache used by the DFA.
- ///
- /// This roughly corresponds to the number of bytes that the DFA will
- /// use while searching.
- ///
- /// Note that this is a *per thread* limit. There is no way to set a global
- /// limit. In particular, if a regex is used from multiple threads
- /// simultaneously, then each thread may use up to the number of bytes
- /// specified here.
- pub fn dfa_size_limit(
- &mut self,
- limit: usize,
- ) -> &mut RegexBuilder {
- self.0.dfa_size_limit = limit;
- self
- }
-
- /// Set the nesting limit for this parser.
- ///
- /// The nesting limit controls how deep the abstract syntax tree is allowed
- /// to be. If the AST exceeds the given limit (e.g., with too many nested
- /// groups), then an error is returned by the parser.
- ///
- /// The purpose of this limit is to act as a heuristic to prevent stack
- /// overflow for consumers that do structural induction on an `Ast` using
- /// explicit recursion. While this crate never does this (instead using
- /// constant stack space and moving the call stack to the heap), other
- /// crates may.
- ///
- /// This limit is not checked until the entire Ast is parsed. Therefore,
- /// if callers want to put a limit on the amount of heap space used, then
- /// they should impose a limit on the length, in bytes, of the concrete
- /// pattern string. In particular, this is viable since this parser
- /// implementation will limit itself to heap space proportional to the
- /// length of the pattern string.
- ///
- /// Note that a nest limit of `0` will return a nest limit error for most
- /// patterns but not all. For example, a nest limit of `0` permits `a` but
- /// not `ab`, since `ab` requires a concatenation, which results in a nest
- /// depth of `1`. In general, a nest limit is not something that manifests
- /// in an obvious way in the concrete syntax, therefore, it should not be
- /// used in a granular way.
- pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder {
- self.0.nest_limit = limit;
- self
- }
- }
- }
- };
-}
-
-define_builder!(bytes, re_bytes, false);
-define_builder!(unicode, re_unicode, true);
-
-macro_rules! define_set_builder {
- ($name:ident, $regex_mod:ident, $only_utf8:expr) => {
- pub mod $name {
- use super::RegexOptions;
- use crate::error::Error;
- use crate::exec::ExecBuilder;
-
- use crate::re_set::$regex_mod::RegexSet;
-
- /// A configurable builder for a set of regular expressions.
- ///
- /// A builder can be used to configure how the regexes are built, for example,
- /// by setting the default flags (which can be overridden in the expression
- /// itself) or setting various limits.
- #[derive(Debug)]
- pub struct RegexSetBuilder(RegexOptions);
-
- impl RegexSetBuilder {
- /// Create a new regular expression builder with the given pattern.
- ///
- /// If the pattern is invalid, then an error will be returned when
- /// `build` is called.
- pub fn new<I, S>(patterns: I) -> RegexSetBuilder
- where
- S: AsRef<str>,
- I: IntoIterator<Item = S>,
- {
- let mut builder = RegexSetBuilder(RegexOptions::default());
- for pat in patterns {
- builder.0.pats.push(pat.as_ref().to_owned());
- }
- builder
- }
-
- /// Consume the builder and compile the regular expressions into a set.
- pub fn build(&self) -> Result<RegexSet, Error> {
- ExecBuilder::new_options(self.0.clone())
- .only_utf8($only_utf8)
- .build()
- .map(RegexSet::from)
- }
-
- /// Set the value for the case insensitive (`i`) flag.
- pub fn case_insensitive(
- &mut self,
- yes: bool,
- ) -> &mut RegexSetBuilder {
- self.0.case_insensitive = yes;
- self
- }
-
- /// Set the value for the multi-line matching (`m`) flag.
- pub fn multi_line(
- &mut self,
- yes: bool,
- ) -> &mut RegexSetBuilder {
- self.0.multi_line = yes;
- self
- }
-
- /// Set the value for the any character (`s`) flag, where in `.` matches
- /// anything when `s` is set and matches anything except for new line when
- /// it is not set (the default).
- ///
- /// N.B. "matches anything" means "any byte" for `regex::bytes::RegexSet`
- /// expressions and means "any Unicode scalar value" for `regex::RegexSet`
- /// expressions.
- pub fn dot_matches_new_line(
- &mut self,
- yes: bool,
- ) -> &mut RegexSetBuilder {
- self.0.dot_matches_new_line = yes;
- self
- }
-
- /// Set the value for the greedy swap (`U`) flag.
- pub fn swap_greed(
- &mut self,
- yes: bool,
- ) -> &mut RegexSetBuilder {
- self.0.swap_greed = yes;
- self
- }
-
- /// Set the value for the ignore whitespace (`x`) flag.
- pub fn ignore_whitespace(
- &mut self,
- yes: bool,
- ) -> &mut RegexSetBuilder {
- self.0.ignore_whitespace = yes;
- self
- }
-
- /// Set the value for the Unicode (`u`) flag.
- pub fn unicode(&mut self, yes: bool) -> &mut RegexSetBuilder {
- self.0.unicode = yes;
- self
- }
-
- /// Whether to support octal syntax or not.
- ///
- /// Octal syntax is a little-known way of uttering Unicode codepoints in
- /// a regular expression. For example, `a`, `\x61`, `\u0061` and
- /// `\141` are all equivalent regular expressions, where the last example
- /// shows octal syntax.
- ///
- /// While supporting octal syntax isn't in and of itself a problem, it does
- /// make good error messages harder. That is, in PCRE based regex engines,
- /// syntax like `\0` invokes a backreference, which is explicitly
- /// unsupported in Rust's regex engine. However, many users expect it to
- /// be supported. Therefore, when octal support is disabled, the error
- /// message will explicitly mention that backreferences aren't supported.
- ///
- /// Octal syntax is disabled by default.
- pub fn octal(&mut self, yes: bool) -> &mut RegexSetBuilder {
- self.0.octal = yes;
- self
- }
-
- /// Set the approximate size limit of the compiled regular expression.
- ///
- /// This roughly corresponds to the number of bytes occupied by a single
- /// compiled program. If the program exceeds this number, then a
- /// compilation error is returned.
- pub fn size_limit(
- &mut self,
- limit: usize,
- ) -> &mut RegexSetBuilder {
- self.0.size_limit = limit;
- self
- }
-
- /// Set the approximate size of the cache used by the DFA.
- ///
- /// This roughly corresponds to the number of bytes that the DFA will
- /// use while searching.
- ///
- /// Note that this is a *per thread* limit. There is no way to set a global
- /// limit. In particular, if a regex is used from multiple threads
- /// simultaneously, then each thread may use up to the number of bytes
- /// specified here.
- pub fn dfa_size_limit(
- &mut self,
- limit: usize,
- ) -> &mut RegexSetBuilder {
- self.0.dfa_size_limit = limit;
- self
- }
-
- /// Set the nesting limit for this parser.
- ///
- /// The nesting limit controls how deep the abstract syntax tree is allowed
- /// to be. If the AST exceeds the given limit (e.g., with too many nested
- /// groups), then an error is returned by the parser.
- ///
- /// The purpose of this limit is to act as a heuristic to prevent stack
- /// overflow for consumers that do structural induction on an `Ast` using
- /// explicit recursion. While this crate never does this (instead using
- /// constant stack space and moving the call stack to the heap), other
- /// crates may.
- ///
- /// This limit is not checked until the entire Ast is parsed. Therefore,
- /// if callers want to put a limit on the amount of heap space used, then
- /// they should impose a limit on the length, in bytes, of the concrete
- /// pattern string. In particular, this is viable since this parser
- /// implementation will limit itself to heap space proportional to the
- /// length of the pattern string.
- ///
- /// Note that a nest limit of `0` will return a nest limit error for most
- /// patterns but not all. For example, a nest limit of `0` permits `a` but
- /// not `ab`, since `ab` requires a concatenation, which results in a nest
- /// depth of `1`. In general, a nest limit is not something that manifests
- /// in an obvious way in the concrete syntax, therefore, it should not be
- /// used in a granular way.
- pub fn nest_limit(
- &mut self,
- limit: u32,
- ) -> &mut RegexSetBuilder {
- self.0.nest_limit = limit;
- self
- }
- }
- }
- };
-}
-
-define_set_builder!(set_bytes, bytes, false);
-define_set_builder!(set_unicode, unicode, true);