summaryrefslogtreecommitdiffstats
path: root/vendor/regex-syntax/src/parser.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/regex-syntax/src/parser.rs')
-rw-r--r--vendor/regex-syntax/src/parser.rs83
1 files changed, 56 insertions, 27 deletions
diff --git a/vendor/regex-syntax/src/parser.rs b/vendor/regex-syntax/src/parser.rs
index ded95b280..2e7a2bb80 100644
--- a/vendor/regex-syntax/src/parser.rs
+++ b/vendor/regex-syntax/src/parser.rs
@@ -1,16 +1,26 @@
-use crate::ast;
-use crate::hir;
+use crate::{ast, hir, Error};
-use crate::Result;
+/// A convenience routine for parsing a regex using default options.
+///
+/// This is equivalent to `Parser::new().parse(pattern)`.
+///
+/// If you need to set non-default options, then use a [`ParserBuilder`].
+///
+/// This routine returns an [`Hir`](hir::Hir) value. Namely, it automatically
+/// parses the pattern as an [`Ast`](ast::Ast) and then invokes the translator
+/// to convert the `Ast` into an `Hir`. If you need access to the `Ast`, then
+/// you should use a [`ast::parse::Parser`].
+pub fn parse(pattern: &str) -> Result<hir::Hir, Error> {
+ Parser::new().parse(pattern)
+}
/// A builder for a regular expression parser.
///
/// This builder permits modifying configuration options for the parser.
///
-/// This type combines the builder options for both the
-/// [AST `ParserBuilder`](ast/parse/struct.ParserBuilder.html)
-/// and the
-/// [HIR `TranslatorBuilder`](hir/translate/struct.TranslatorBuilder.html).
+/// This type combines the builder options for both the [AST
+/// `ParserBuilder`](ast::parse::ParserBuilder) and the [HIR
+/// `TranslatorBuilder`](hir::translate::TranslatorBuilder).
#[derive(Clone, Debug, Default)]
pub struct ParserBuilder {
ast: ast::parse::ParserBuilder,
@@ -78,19 +88,23 @@ impl ParserBuilder {
self
}
- /// When enabled, the parser will permit the construction of a regular
+ /// When disabled, translation will permit the construction of a regular
/// expression that may match invalid UTF-8.
///
- /// When disabled (the default), the parser is guaranteed to produce
- /// an expression that will only ever match valid UTF-8 (otherwise, the
- /// parser will return an error).
- ///
- /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII
- /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause
- /// the parser to return an error. Namely, a negated ASCII word boundary
- /// can result in matching positions that aren't valid UTF-8 boundaries.
- pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut ParserBuilder {
- self.hir.allow_invalid_utf8(yes);
+ /// When enabled (the default), the translator is guaranteed to produce an
+ /// expression that, for non-empty matches, will only ever produce spans
+ /// that are entirely valid UTF-8 (otherwise, the translator will return an
+ /// error).
+ ///
+ /// Perhaps surprisingly, when UTF-8 is enabled, an empty regex or even
+ /// a negated ASCII word boundary (uttered as `(?-u:\B)` in the concrete
+ /// syntax) will be allowed even though they can produce matches that split
+ /// a UTF-8 encoded codepoint. This only applies to zero-width or "empty"
+ /// matches, and it is expected that the regex engine itself must handle
+ /// these cases if necessary (perhaps by suppressing any zero-width matches
+ /// that split a codepoint).
+ pub fn utf8(&mut self, yes: bool) -> &mut ParserBuilder {
+ self.hir.utf8(yes);
self
}
@@ -134,6 +148,23 @@ impl ParserBuilder {
self
}
+ /// Enable or disable the CRLF mode flag by default.
+ ///
+ /// By default this is disabled. It may alternatively be selectively
+ /// enabled in the regular expression itself via the `R` flag.
+ ///
+ /// When CRLF mode is enabled, the following happens:
+ ///
+ /// * Unless `dot_matches_new_line` is enabled, `.` will match any character
+ /// except for `\r` and `\n`.
+ /// * When `multi_line` mode is enabled, `^` and `$` will treat `\r\n`,
+ /// `\r` and `\n` as line terminators. And in particular, neither will
+ /// match between a `\r` and a `\n`.
+ pub fn crlf(&mut self, yes: bool) -> &mut ParserBuilder {
+ self.hir.crlf(yes);
+ self
+ }
+
/// Enable or disable the "swap greed" flag by default.
///
/// By default this is disabled. It may alternatively be selectively
@@ -148,9 +179,9 @@ impl ParserBuilder {
/// By default this is **enabled**. It may alternatively be selectively
/// disabled in the regular expression itself via the `u` flag.
///
- /// Note that unless `allow_invalid_utf8` is enabled (it's disabled by
- /// default), a regular expression will fail to parse if Unicode mode is
- /// disabled and a sub-expression could possibly match invalid UTF-8.
+ /// Note that unless `utf8` is disabled (it's enabled by default), a
+ /// regular expression will fail to parse if Unicode mode is disabled and a
+ /// sub-expression could possibly match invalid UTF-8.
pub fn unicode(&mut self, yes: bool) -> &mut ParserBuilder {
self.hir.unicode(yes);
self
@@ -167,10 +198,9 @@ impl ParserBuilder {
/// convenience for never having to deal with it at all.
///
/// If callers have more fine grained use cases that need an AST, then please
-/// see the [`ast::parse`](ast/parse/index.html) module.
+/// see the [`ast::parse`] module.
///
-/// A `Parser` can be configured in more detail via a
-/// [`ParserBuilder`](struct.ParserBuilder.html).
+/// A `Parser` can be configured in more detail via a [`ParserBuilder`].
#[derive(Clone, Debug)]
pub struct Parser {
ast: ast::parse::Parser,
@@ -184,15 +214,14 @@ impl Parser {
/// a high level intermediate representation of the given regular
/// expression.
///
- /// To set configuration options on the parser, use
- /// [`ParserBuilder`](struct.ParserBuilder.html).
+ /// To set configuration options on the parser, use [`ParserBuilder`].
pub fn new() -> Parser {
ParserBuilder::new().build()
}
/// Parse the regular expression into a high level intermediate
/// representation.
- pub fn parse(&mut self, pattern: &str) -> Result<hir::Hir> {
+ pub fn parse(&mut self, pattern: &str) -> Result<hir::Hir, Error> {
let ast = self.ast.parse(pattern)?;
let hir = self.hir.translate(pattern, &ast)?;
Ok(hir)