diff options
Diffstat (limited to 'vendor/regex/src/re_builder.rs')
-rw-r--r-- | vendor/regex/src/re_builder.rs | 421 |
1 files changed, 0 insertions, 421 deletions
diff --git a/vendor/regex/src/re_builder.rs b/vendor/regex/src/re_builder.rs deleted file mode 100644 index ee6383690..000000000 --- a/vendor/regex/src/re_builder.rs +++ /dev/null @@ -1,421 +0,0 @@ -/// The set of user configurable options for compiling zero or more regexes. -#[derive(Clone, Debug)] -#[allow(missing_docs)] -pub struct RegexOptions { - pub pats: Vec<String>, - pub size_limit: usize, - pub dfa_size_limit: usize, - pub nest_limit: u32, - pub case_insensitive: bool, - pub multi_line: bool, - pub dot_matches_new_line: bool, - pub swap_greed: bool, - pub ignore_whitespace: bool, - pub unicode: bool, - pub octal: bool, -} - -impl Default for RegexOptions { - fn default() -> Self { - RegexOptions { - pats: vec![], - size_limit: 10 * (1 << 20), - dfa_size_limit: 2 * (1 << 20), - nest_limit: 250, - case_insensitive: false, - multi_line: false, - dot_matches_new_line: false, - swap_greed: false, - ignore_whitespace: false, - unicode: true, - octal: false, - } - } -} - -macro_rules! define_builder { - ($name:ident, $regex_mod:ident, $only_utf8:expr) => { - pub mod $name { - use super::RegexOptions; - use crate::error::Error; - use crate::exec::ExecBuilder; - - use crate::$regex_mod::Regex; - - /// A configurable builder for a regular expression. - /// - /// A builder can be used to configure how the regex is built, for example, by - /// setting the default flags (which can be overridden in the expression - /// itself) or setting various limits. - #[derive(Debug)] - pub struct RegexBuilder(RegexOptions); - - impl RegexBuilder { - /// Create a new regular expression builder with the given pattern. - /// - /// If the pattern is invalid, then an error will be returned when - /// `build` is called. - pub fn new(pattern: &str) -> RegexBuilder { - let mut builder = RegexBuilder(RegexOptions::default()); - builder.0.pats.push(pattern.to_owned()); - builder - } - - /// Consume the builder and compile the regular expression. - /// - /// Note that calling `as_str` on the resulting `Regex` will produce the - /// pattern given to `new` verbatim. Notably, it will not incorporate any - /// of the flags set on this builder. - pub fn build(&self) -> Result<Regex, Error> { - ExecBuilder::new_options(self.0.clone()) - .only_utf8($only_utf8) - .build() - .map(Regex::from) - } - - /// Set the value for the case insensitive (`i`) flag. - /// - /// When enabled, letters in the pattern will match both upper case and - /// lower case variants. - pub fn case_insensitive( - &mut self, - yes: bool, - ) -> &mut RegexBuilder { - self.0.case_insensitive = yes; - self - } - - /// Set the value for the multi-line matching (`m`) flag. - /// - /// When enabled, `^` matches the beginning of lines and `$` matches the - /// end of lines. - /// - /// By default, they match beginning/end of the input. - pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder { - self.0.multi_line = yes; - self - } - - /// Set the value for the any character (`s`) flag, where in `.` matches - /// anything when `s` is set and matches anything except for new line when - /// it is not set (the default). - /// - /// N.B. "matches anything" means "any byte" when Unicode is disabled and - /// means "any valid UTF-8 encoding of any Unicode scalar value" when - /// Unicode is enabled. - pub fn dot_matches_new_line( - &mut self, - yes: bool, - ) -> &mut RegexBuilder { - self.0.dot_matches_new_line = yes; - self - } - - /// Set the value for the greedy swap (`U`) flag. - /// - /// When enabled, a pattern like `a*` is lazy (tries to find shortest - /// match) and `a*?` is greedy (tries to find longest match). - /// - /// By default, `a*` is greedy and `a*?` is lazy. - pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder { - self.0.swap_greed = yes; - self - } - - /// Set the value for the ignore whitespace (`x`) flag. - /// - /// When enabled, whitespace such as new lines and spaces will be ignored - /// between expressions of the pattern, and `#` can be used to start a - /// comment until the next new line. - pub fn ignore_whitespace( - &mut self, - yes: bool, - ) -> &mut RegexBuilder { - self.0.ignore_whitespace = yes; - self - } - - /// Set the value for the Unicode (`u`) flag. - /// - /// Enabled by default. When disabled, character classes such as `\w` only - /// match ASCII word characters instead of all Unicode word characters. - pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder { - self.0.unicode = yes; - self - } - - /// Whether to support octal syntax or not. - /// - /// Octal syntax is a little-known way of uttering Unicode codepoints in - /// a regular expression. For example, `a`, `\x61`, `\u0061` and - /// `\141` are all equivalent regular expressions, where the last example - /// shows octal syntax. - /// - /// While supporting octal syntax isn't in and of itself a problem, it does - /// make good error messages harder. That is, in PCRE based regex engines, - /// syntax like `\0` invokes a backreference, which is explicitly - /// unsupported in Rust's regex engine. However, many users expect it to - /// be supported. Therefore, when octal support is disabled, the error - /// message will explicitly mention that backreferences aren't supported. - /// - /// Octal syntax is disabled by default. - pub fn octal(&mut self, yes: bool) -> &mut RegexBuilder { - self.0.octal = yes; - self - } - - /// Set the approximate size limit of the compiled regular expression. - /// - /// This roughly corresponds to the number of bytes occupied by a single - /// compiled program. If the program exceeds this number, then a - /// compilation error is returned. - pub fn size_limit( - &mut self, - limit: usize, - ) -> &mut RegexBuilder { - self.0.size_limit = limit; - self - } - - /// Set the approximate size of the cache used by the DFA. - /// - /// This roughly corresponds to the number of bytes that the DFA will - /// use while searching. - /// - /// Note that this is a *per thread* limit. There is no way to set a global - /// limit. In particular, if a regex is used from multiple threads - /// simultaneously, then each thread may use up to the number of bytes - /// specified here. - pub fn dfa_size_limit( - &mut self, - limit: usize, - ) -> &mut RegexBuilder { - self.0.dfa_size_limit = limit; - self - } - - /// Set the nesting limit for this parser. - /// - /// The nesting limit controls how deep the abstract syntax tree is allowed - /// to be. If the AST exceeds the given limit (e.g., with too many nested - /// groups), then an error is returned by the parser. - /// - /// The purpose of this limit is to act as a heuristic to prevent stack - /// overflow for consumers that do structural induction on an `Ast` using - /// explicit recursion. While this crate never does this (instead using - /// constant stack space and moving the call stack to the heap), other - /// crates may. - /// - /// This limit is not checked until the entire Ast is parsed. Therefore, - /// if callers want to put a limit on the amount of heap space used, then - /// they should impose a limit on the length, in bytes, of the concrete - /// pattern string. In particular, this is viable since this parser - /// implementation will limit itself to heap space proportional to the - /// length of the pattern string. - /// - /// Note that a nest limit of `0` will return a nest limit error for most - /// patterns but not all. For example, a nest limit of `0` permits `a` but - /// not `ab`, since `ab` requires a concatenation, which results in a nest - /// depth of `1`. In general, a nest limit is not something that manifests - /// in an obvious way in the concrete syntax, therefore, it should not be - /// used in a granular way. - pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder { - self.0.nest_limit = limit; - self - } - } - } - }; -} - -define_builder!(bytes, re_bytes, false); -define_builder!(unicode, re_unicode, true); - -macro_rules! define_set_builder { - ($name:ident, $regex_mod:ident, $only_utf8:expr) => { - pub mod $name { - use super::RegexOptions; - use crate::error::Error; - use crate::exec::ExecBuilder; - - use crate::re_set::$regex_mod::RegexSet; - - /// A configurable builder for a set of regular expressions. - /// - /// A builder can be used to configure how the regexes are built, for example, - /// by setting the default flags (which can be overridden in the expression - /// itself) or setting various limits. - #[derive(Debug)] - pub struct RegexSetBuilder(RegexOptions); - - impl RegexSetBuilder { - /// Create a new regular expression builder with the given pattern. - /// - /// If the pattern is invalid, then an error will be returned when - /// `build` is called. - pub fn new<I, S>(patterns: I) -> RegexSetBuilder - where - S: AsRef<str>, - I: IntoIterator<Item = S>, - { - let mut builder = RegexSetBuilder(RegexOptions::default()); - for pat in patterns { - builder.0.pats.push(pat.as_ref().to_owned()); - } - builder - } - - /// Consume the builder and compile the regular expressions into a set. - pub fn build(&self) -> Result<RegexSet, Error> { - ExecBuilder::new_options(self.0.clone()) - .only_utf8($only_utf8) - .build() - .map(RegexSet::from) - } - - /// Set the value for the case insensitive (`i`) flag. - pub fn case_insensitive( - &mut self, - yes: bool, - ) -> &mut RegexSetBuilder { - self.0.case_insensitive = yes; - self - } - - /// Set the value for the multi-line matching (`m`) flag. - pub fn multi_line( - &mut self, - yes: bool, - ) -> &mut RegexSetBuilder { - self.0.multi_line = yes; - self - } - - /// Set the value for the any character (`s`) flag, where in `.` matches - /// anything when `s` is set and matches anything except for new line when - /// it is not set (the default). - /// - /// N.B. "matches anything" means "any byte" for `regex::bytes::RegexSet` - /// expressions and means "any Unicode scalar value" for `regex::RegexSet` - /// expressions. - pub fn dot_matches_new_line( - &mut self, - yes: bool, - ) -> &mut RegexSetBuilder { - self.0.dot_matches_new_line = yes; - self - } - - /// Set the value for the greedy swap (`U`) flag. - pub fn swap_greed( - &mut self, - yes: bool, - ) -> &mut RegexSetBuilder { - self.0.swap_greed = yes; - self - } - - /// Set the value for the ignore whitespace (`x`) flag. - pub fn ignore_whitespace( - &mut self, - yes: bool, - ) -> &mut RegexSetBuilder { - self.0.ignore_whitespace = yes; - self - } - - /// Set the value for the Unicode (`u`) flag. - pub fn unicode(&mut self, yes: bool) -> &mut RegexSetBuilder { - self.0.unicode = yes; - self - } - - /// Whether to support octal syntax or not. - /// - /// Octal syntax is a little-known way of uttering Unicode codepoints in - /// a regular expression. For example, `a`, `\x61`, `\u0061` and - /// `\141` are all equivalent regular expressions, where the last example - /// shows octal syntax. - /// - /// While supporting octal syntax isn't in and of itself a problem, it does - /// make good error messages harder. That is, in PCRE based regex engines, - /// syntax like `\0` invokes a backreference, which is explicitly - /// unsupported in Rust's regex engine. However, many users expect it to - /// be supported. Therefore, when octal support is disabled, the error - /// message will explicitly mention that backreferences aren't supported. - /// - /// Octal syntax is disabled by default. - pub fn octal(&mut self, yes: bool) -> &mut RegexSetBuilder { - self.0.octal = yes; - self - } - - /// Set the approximate size limit of the compiled regular expression. - /// - /// This roughly corresponds to the number of bytes occupied by a single - /// compiled program. If the program exceeds this number, then a - /// compilation error is returned. - pub fn size_limit( - &mut self, - limit: usize, - ) -> &mut RegexSetBuilder { - self.0.size_limit = limit; - self - } - - /// Set the approximate size of the cache used by the DFA. - /// - /// This roughly corresponds to the number of bytes that the DFA will - /// use while searching. - /// - /// Note that this is a *per thread* limit. There is no way to set a global - /// limit. In particular, if a regex is used from multiple threads - /// simultaneously, then each thread may use up to the number of bytes - /// specified here. - pub fn dfa_size_limit( - &mut self, - limit: usize, - ) -> &mut RegexSetBuilder { - self.0.dfa_size_limit = limit; - self - } - - /// Set the nesting limit for this parser. - /// - /// The nesting limit controls how deep the abstract syntax tree is allowed - /// to be. If the AST exceeds the given limit (e.g., with too many nested - /// groups), then an error is returned by the parser. - /// - /// The purpose of this limit is to act as a heuristic to prevent stack - /// overflow for consumers that do structural induction on an `Ast` using - /// explicit recursion. While this crate never does this (instead using - /// constant stack space and moving the call stack to the heap), other - /// crates may. - /// - /// This limit is not checked until the entire Ast is parsed. Therefore, - /// if callers want to put a limit on the amount of heap space used, then - /// they should impose a limit on the length, in bytes, of the concrete - /// pattern string. In particular, this is viable since this parser - /// implementation will limit itself to heap space proportional to the - /// length of the pattern string. - /// - /// Note that a nest limit of `0` will return a nest limit error for most - /// patterns but not all. For example, a nest limit of `0` permits `a` but - /// not `ab`, since `ab` requires a concatenation, which results in a nest - /// depth of `1`. In general, a nest limit is not something that manifests - /// in an obvious way in the concrete syntax, therefore, it should not be - /// used in a granular way. - pub fn nest_limit( - &mut self, - limit: u32, - ) -> &mut RegexSetBuilder { - self.0.nest_limit = limit; - self - } - } - } - }; -} - -define_set_builder!(set_bytes, bytes, false); -define_set_builder!(set_unicode, unicode, true); |