Merging upstream version 1.76.0+dfsg1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-06-19 09:26:03 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-06-19 09:26:03 +0000
commit: 9918693037dce8aa4bb6f08741b6812923486c18 (patch)
tree: 21d2b40bec7e6a7ea664acee056eb3d08e15a1cf /vendor/regex-syntax
parent: Releasing progress-linux version 1.75.0+dfsg1-5~progress7.99u1. (diff)
download: rustc-9918693037dce8aa4bb6f08741b6812923486c18.tar.xz
rustc-9918693037dce8aa4bb6f08741b6812923486c18.zip
15 files changed, 1390 insertions, 457 deletions
diff --git a/vendor/regex-syntax/.cargo-checksum.json b/vendor/regex-syntax/.cargo-checksum.json
index 94024d262..8152441ca 100644
--- a/vendor/regex-syntax/.cargo-checksum.json
+++ b/vendor/regex-syntax/.cargo-checksum.json
@@ -1 +1 @@
-{"files":{"Cargo.toml":"572d5198c4a39a5a55d9cbbb79de0e93f0b734f9a8cb3b7a80c0046897a79c85","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","README.md":"1f5f6c3e0f7e452236eb13a0a8627dceb35e7bd9e18798916b74b724ba8161fe","benches/bench.rs":"d2b6ae5b939abd6093064f144b981b7739d7f474ec0698a1268052fc92406635","src/ast/mod.rs":"f18ba2a3a082fa342512feed20da3bf5fb8b6d92d2e246809732860b446f75c9","src/ast/parse.rs":"49478a4ae5b557dc46aa7071c91c7a6905a0ce62910f8c8fefce464e5779e934","src/ast/print.rs":"62d319cd0b7e6f437dc8dcaf798046a44afa03e9aeb6a384d5cffa448383af53","src/ast/visitor.rs":"a58170758852b2a59c9232f3a027a91f0603b26dd0d9acbde73ac1f575ca600b","src/debug.rs":"7a16cca02be9715fdc8c26a32279465774623cd12fab1ec59ac25a6e3047817f","src/either.rs":"1758e3edd056884eccadd995708d1e374ba9aa65846bd0e13b1aae852607c560","src/error.rs":"01a67e3407b0d0d869119363e47a94d92158834bfe5936366c2e3f6f4ed13f36","src/hir/interval.rs":"2358e74b4d4aabfa62f79df855fd5d183779b86c4e14aae4ee42d8695bb3d010","src/hir/literal.rs":"a57f77b49998f4e4be9f9e4512f9934bc61640786dd7ac07c708825ba2b83429","src/hir/mod.rs":"48f885da2485afd638823dbdbe591f71051db398e86055820557d627d8b23309","src/hir/print.rs":"1f1fb454af939a53ea2799f55b67c2a2615c47c24dbf9f48a7c2a2b402d36e1f","src/hir/translate.rs":"1fbba4c456891ead0298ab6457ac5e9649431e52e75acc85a293d2a17886ac84","src/hir/visitor.rs":"e98aab188f92a92baee7b490d8558383373398697ae97335ae2635b6a5aa45ca","src/lib.rs":"4dbb230403b281f35dc1f9494e6e33e18bad2c2c648da78fd2b4662b85f88748","src/parser.rs":"cac139ed552a63ac4f81d60610cf0c2084421e34729475707681ef9392e1e9ae","src/rank.rs":"ff3d58b0cc5ffa69e2e8c56fc7d9ef41dd399d59a639a253a51551b858cb5bbd","src/unicode.rs":"9829458ef321b3bc22c21eae4b22805b33f8b5e67022928ffd9a9e0287bc7c31","src/unicode_tables/LICENSE-UNICODE":"74db5baf44a41b1000312c673544b3374e4198af5605c7f9080a402cec42cfa3","src/unicode_tables/age.rs":"2a2599a4e406fbbd0efd16aa6ce385c3f97b87c34820d6686a9f9113a5231c67","src/unicode_tables/case_folding_simple.rs":"9583803d4a10486da372b76979dbd26349b40766229467238eff972c1d78e47b","src/unicode_tables/general_category.rs":"36a93ba1cdeed96a00ff29a5ab5afd2c578a89541bf4dd8b18478146cebda0aa","src/unicode_tables/grapheme_cluster_break.rs":"39c388e9805a8391d3d3e69d74d831ce4fb99aa7e13e52c64dd2bd16d4765301","src/unicode_tables/mod.rs":"26c837099cd934c8062e24bc9a0aaecf15fe1de03f9c6da3f3e1e5ac3ca24bee","src/unicode_tables/perl_decimal.rs":"a98ea4afe71c2947023ae12bd25c46bf4c7de48eeb40979eca5c96ba62cee02e","src/unicode_tables/perl_space.rs":"ea2b3b84b4a48334082dadc6c37d9fcc9c9ded84b40e8f5c9c9314898638967e","src/unicode_tables/perl_word.rs":"6f1156bd6af32151ecffea4abe07a38fa04b1fc1b227ec1a8dac5d5f08d9d74b","src/unicode_tables/property_bool.rs":"0bd64f6e3228eaecf47824e238bdf1f8a9eef113ace6e790a57f045a8106701c","src/unicode_tables/property_names.rs":"5ca25437927eb70c62adf7d038e99a601cfb8a718677fd6de832589664d3c481","src/unicode_tables/property_values.rs":"5b4cc02392d382cf7af60455fc87b9980e97409b62a4b8d6c5843190d2e2d21d","src/unicode_tables/script.rs":"ea1d771b6d0a4b12d143f9bad2ea9342a0887878cbbe3c11262b6eabedaf2dd4","src/unicode_tables/script_extension.rs":"beeb8349703d903ff861beb8401bfd2599e457dc25df872e69d6ad1615f8b5e9","src/unicode_tables/sentence_break.rs":"2befe2a27cc4e8aecb624e310ef9f371462470dd3b2f572cec1f5873a5e30aa9","src/unicode_tables/word_break.rs":"94679177731b515f0c360eff394286a1f99b59527bdbc826cbf51d32f9666187","src/utf8.rs":"e9a13623a94295b81969c5483de17219ff74bb20768be13c527010351245acbd","test":"01d6f6e9a689fb794173288a52f40f53b4f782176d0fcd648c7c6d3a2df05c63"},"package":"436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78"}
-\ No newline at end of file
+{"files":{"Cargo.toml":"33c96af38ed9f42d1ccbf85ecfeea1d46202943d01c595b8ee4dddef760e6bd5","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","README.md":"b2484aa7e66fb92d1378e9a7ce7605af18f77cb12c179866eaf92ba28cfec1d9","benches/bench.rs":"d2b6ae5b939abd6093064f144b981b7739d7f474ec0698a1268052fc92406635","src/ast/mod.rs":"700c2f779fccb529db7b444819d53c38f916b065d3d05a74282f929af581e8b1","src/ast/parse.rs":"fcd45146eaf747d15a2a519d34754638d451ab83e88b5962841cf7a0dd32e988","src/ast/print.rs":"99cb69ece252ef31e0be177fb3364797eb30b785f936532b8dcd8106e7be0738","src/ast/visitor.rs":"f0fdf758801fe70e6b299b73ab63196e814af95ef6eccad7ef4f72075743fcf6","src/debug.rs":"7a16cca02be9715fdc8c26a32279465774623cd12fab1ec59ac25a6e3047817f","src/either.rs":"1758e3edd056884eccadd995708d1e374ba9aa65846bd0e13b1aae852607c560","src/error.rs":"01a67e3407b0d0d869119363e47a94d92158834bfe5936366c2e3f6f4ed13f36","src/hir/interval.rs":"2358e74b4d4aabfa62f79df855fd5d183779b86c4e14aae4ee42d8695bb3d010","src/hir/literal.rs":"6a8108b8919fbfd9ab93072846124c51d2998489810fcd6e7a89fdccc45833e0","src/hir/mod.rs":"eca183b8e173f486c1a11a5fa10895c96067162c8ec936871f937ca7fca5f710","src/hir/print.rs":"ad51c515c933bfd67d307ba3d7e6ac59c9c5903b4f393a9f9a4785c92b88348d","src/hir/translate.rs":"5fbff527c53f217ba2bac9b0948d7de74164625d08674b91a479ced271159ebd","src/hir/visitor.rs":"71ca9c93aa48a5ed445399659fa6455093a1bbd9ef44b66bc7095c1b08b2ec1f","src/lib.rs":"5ae457d402e49443bdb23b71353693dd3b0d263b57a6eeb9eb5b5dae5c901bdd","src/parser.rs":"6b2f4f27e3331a01a25b87c89368dd2e54396bd425dac57941f9c1ebfd238ac8","src/rank.rs":"ff3d58b0cc5ffa69e2e8c56fc7d9ef41dd399d59a639a253a51551b858cb5bbd","src/unicode.rs":"9829458ef321b3bc22c21eae4b22805b33f8b5e67022928ffd9a9e0287bc7c31","src/unicode_tables/LICENSE-UNICODE":"74db5baf44a41b1000312c673544b3374e4198af5605c7f9080a402cec42cfa3","src/unicode_tables/age.rs":"2a2599a4e406fbbd0efd16aa6ce385c3f97b87c34820d6686a9f9113a5231c67","src/unicode_tables/case_folding_simple.rs":"9583803d4a10486da372b76979dbd26349b40766229467238eff972c1d78e47b","src/unicode_tables/general_category.rs":"36a93ba1cdeed96a00ff29a5ab5afd2c578a89541bf4dd8b18478146cebda0aa","src/unicode_tables/grapheme_cluster_break.rs":"39c388e9805a8391d3d3e69d74d831ce4fb99aa7e13e52c64dd2bd16d4765301","src/unicode_tables/mod.rs":"26c837099cd934c8062e24bc9a0aaecf15fe1de03f9c6da3f3e1e5ac3ca24bee","src/unicode_tables/perl_decimal.rs":"a98ea4afe71c2947023ae12bd25c46bf4c7de48eeb40979eca5c96ba62cee02e","src/unicode_tables/perl_space.rs":"ea2b3b84b4a48334082dadc6c37d9fcc9c9ded84b40e8f5c9c9314898638967e","src/unicode_tables/perl_word.rs":"6f1156bd6af32151ecffea4abe07a38fa04b1fc1b227ec1a8dac5d5f08d9d74b","src/unicode_tables/property_bool.rs":"0bd64f6e3228eaecf47824e238bdf1f8a9eef113ace6e790a57f045a8106701c","src/unicode_tables/property_names.rs":"5ca25437927eb70c62adf7d038e99a601cfb8a718677fd6de832589664d3c481","src/unicode_tables/property_values.rs":"5b4cc02392d382cf7af60455fc87b9980e97409b62a4b8d6c5843190d2e2d21d","src/unicode_tables/script.rs":"ea1d771b6d0a4b12d143f9bad2ea9342a0887878cbbe3c11262b6eabedaf2dd4","src/unicode_tables/script_extension.rs":"beeb8349703d903ff861beb8401bfd2599e457dc25df872e69d6ad1615f8b5e9","src/unicode_tables/sentence_break.rs":"2befe2a27cc4e8aecb624e310ef9f371462470dd3b2f572cec1f5873a5e30aa9","src/unicode_tables/word_break.rs":"94679177731b515f0c360eff394286a1f99b59527bdbc826cbf51d32f9666187","src/utf8.rs":"e9a13623a94295b81969c5483de17219ff74bb20768be13c527010351245acbd","test":"c7de5fbc0010d9b5b758cd49956375a64b88601c068167fd366808950257f108"},"package":"c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"}
+\ No newline at end of file
diff --git a/vendor/regex-syntax/Cargo.toml b/vendor/regex-syntax/Cargo.toml
index cd4b92cb2..3602ab33c 100644
--- a/vendor/regex-syntax/Cargo.toml
+++ b/vendor/regex-syntax/Cargo.toml
@@ -11,16 +11,18 @@
 
 [package]
 edition = "2021"
-rust-version = "1.60.0"
+rust-version = "1.65"
 name = "regex-syntax"
-version = "0.7.2"
-authors = ["The Rust Project Developers"]
+version = "0.8.2"
+authors = [
+    "The Rust Project Developers",
+    "Andrew Gallant <jamslam@gmail.com>",
+]
 description = "A regular expression parser."
-homepage = "https://github.com/rust-lang/regex"
 documentation = "https://docs.rs/regex-syntax"
 readme = "README.md"
 license = "MIT OR Apache-2.0"
-repository = "https://github.com/rust-lang/regex"
+repository = "https://github.com/rust-lang/regex/tree/master/regex-syntax"
 
 [package.metadata.docs.rs]
 all-features = true
@@ -29,7 +31,13 @@ rustdoc-args = [
     "docsrs",
 ]
 
+[dependencies.arbitrary]
+version = "1.3.0"
+features = ["derive"]
+optional = true
+
 [features]
+arbitrary = ["dep:arbitrary"]
 default = [
     "std",
     "unicode",
diff --git a/vendor/regex-syntax/README.md b/vendor/regex-syntax/README.md
index ff4fe094c..529513b0c 100644
--- a/vendor/regex-syntax/README.md
+++ b/vendor/regex-syntax/README.md
@@ -4,7 +4,6 @@ This crate provides a robust regular expression parser.
 
 [![Build status](https://github.com/rust-lang/regex/workflows/ci/badge.svg)](https://github.com/rust-lang/regex/actions)
 [![Crates.io](https://img.shields.io/crates/v/regex-syntax.svg)](https://crates.io/crates/regex-syntax)
-[![Rust](https://img.shields.io/badge/rust-1.28.0%2B-blue.svg?maxAge=3600)](https://github.com/rust-lang/regex)
 
 
 ### Documentation
diff --git a/vendor/regex-syntax/src/ast/mod.rs b/vendor/regex-syntax/src/ast/mod.rs
index a95b1c873..6a77ee134 100644
--- a/vendor/regex-syntax/src/ast/mod.rs
+++ b/vendor/regex-syntax/src/ast/mod.rs
@@ -20,6 +20,7 @@ mod visitor;
 /// valid Unicode property name. That particular error is reported when
 /// translating an AST to the high-level intermediate representation (`HIR`).
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub struct Error {
     /// The kind of error.
     kind: ErrorKind,
@@ -70,6 +71,7 @@ impl Error {
 /// new variant is not considered a breaking change.
 #[non_exhaustive]
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub enum ErrorKind {
     /// The capturing group limit was exceeded.
     ///
@@ -160,6 +162,18 @@ pub enum ErrorKind {
     /// `(?i)*`. It is, however, possible to create a repetition operating on
     /// an empty sub-expression. For example, `()*` is still considered valid.
     RepetitionMissing,
+    /// The special word boundary syntax, `\b{something}`, was used, but
+    /// either EOF without `}` was seen, or an invalid character in the
+    /// braces was seen.
+    SpecialWordBoundaryUnclosed,
+    /// The special word boundary syntax, `\b{something}`, was used, but
+    /// `something` was not recognized as a valid word boundary kind.
+    SpecialWordBoundaryUnrecognized,
+    /// The syntax `\b{` was observed, but afterwards the end of the pattern
+    /// was observed without being able to tell whether it was meant to be a
+    /// bounded repetition on the `\b` or the beginning of a special word
+    /// boundary assertion.
+    SpecialWordOrRepetitionUnexpectedEof,
     /// The Unicode class is not valid. This typically occurs when a `\p` is
     /// followed by something other than a `{`.
     UnicodeClassInvalid,
@@ -258,6 +272,29 @@ impl core::fmt::Display for ErrorKind {
             RepetitionMissing => {
                 write!(f, "repetition operator missing expression")
             }
+            SpecialWordBoundaryUnclosed => {
+                write!(
+                    f,
+                    "special word boundary assertion is either \
+                     unclosed or contains an invalid character",
+                )
+            }
+            SpecialWordBoundaryUnrecognized => {
+                write!(
+                    f,
+                    "unrecognized special word boundary assertion, \
+                     valid choices are: start, end, start-half \
+                     or end-half",
+                )
+            }
+            SpecialWordOrRepetitionUnexpectedEof => {
+                write!(
+                    f,
+                    "found either the beginning of a special word \
+                     boundary or a bounded repetition on a \\b with \
+                     an opening brace, but no closing brace",
+                )
+            }
             UnicodeClassInvalid => {
                 write!(f, "invalid Unicode character class")
             }
@@ -278,6 +315,7 @@ impl core::fmt::Display for ErrorKind {
 /// All span positions are absolute byte offsets that can be used on the
 /// original regular expression that was parsed.
 #[derive(Clone, Copy, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub struct Span {
     /// The start byte offset.
     pub start: Position,
@@ -308,6 +346,7 @@ impl PartialOrd for Span {
 /// A position encodes one half of a span, and include the byte offset, line
 /// number and column number.
 #[derive(Clone, Copy, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub struct Position {
     /// The absolute offset of this position, starting at `0` from the
     /// beginning of the regular expression pattern string.
@@ -396,6 +435,7 @@ impl Position {
 /// comment contains a span of precisely where it occurred in the original
 /// regular expression.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub struct WithComments {
     /// The actual ast.
     pub ast: Ast,
@@ -408,6 +448,7 @@ pub struct WithComments {
 /// A regular expression can only contain comments when the `x` flag is
 /// enabled.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub struct Comment {
     /// The span of this comment, including the beginning `#` and ending `\n`.
     pub span: Span,
@@ -424,31 +465,97 @@ pub struct Comment {
 /// This type defines its own destructor that uses constant stack space and
 /// heap space proportional to the size of the `Ast`.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub enum Ast {
     /// An empty regex that matches everything.
-    Empty(Span),
+    Empty(Box<Span>),
     /// A set of flags, e.g., `(?is)`.
-    Flags(SetFlags),
+    Flags(Box<SetFlags>),
     /// A single character literal, which includes escape sequences.
-    Literal(Literal),
+    Literal(Box<Literal>),
     /// The "any character" class.
-    Dot(Span),
+    Dot(Box<Span>),
     /// A single zero-width assertion.
-    Assertion(Assertion),
-    /// A single character class. This includes all forms of character classes
-    /// except for `.`. e.g., `\d`, `\pN`, `[a-z]` and `[[:alpha:]]`.
-    Class(Class),
+    Assertion(Box<Assertion>),
+    /// A single Unicode character class, e.g., `\pL` or `\p{Greek}`.
+    ClassUnicode(Box<ClassUnicode>),
+    /// A single perl character class, e.g., `\d` or `\W`.
+    ClassPerl(Box<ClassPerl>),
+    /// A single bracketed character class set, which may contain zero or more
+    /// character ranges and/or zero or more nested classes. e.g.,
+    /// `[a-zA-Z\pL]`.
+    ClassBracketed(Box<ClassBracketed>),
     /// A repetition operator applied to an arbitrary regular expression.
-    Repetition(Repetition),
+    Repetition(Box<Repetition>),
     /// A grouped regular expression.
-    Group(Group),
+    Group(Box<Group>),
     /// An alternation of regular expressions.
-    Alternation(Alternation),
+    Alternation(Box<Alternation>),
     /// A concatenation of regular expressions.
-    Concat(Concat),
+    Concat(Box<Concat>),
 }
 
 impl Ast {
+    /// Create an "empty" AST item.
+    pub fn empty(span: Span) -> Ast {
+        Ast::Empty(Box::new(span))
+    }
+
+    /// Create a "flags" AST item.
+    pub fn flags(e: SetFlags) -> Ast {
+        Ast::Flags(Box::new(e))
+    }
+
+    /// Create a "literal" AST item.
+    pub fn literal(e: Literal) -> Ast {
+        Ast::Literal(Box::new(e))
+    }
+
+    /// Create a "dot" AST item.
+    pub fn dot(span: Span) -> Ast {
+        Ast::Dot(Box::new(span))
+    }
+
+    /// Create a "assertion" AST item.
+    pub fn assertion(e: Assertion) -> Ast {
+        Ast::Assertion(Box::new(e))
+    }
+
+    /// Create a "Unicode class" AST item.
+    pub fn class_unicode(e: ClassUnicode) -> Ast {
+        Ast::ClassUnicode(Box::new(e))
+    }
+
+    /// Create a "Perl class" AST item.
+    pub fn class_perl(e: ClassPerl) -> Ast {
+        Ast::ClassPerl(Box::new(e))
+    }
+
+    /// Create a "bracketed class" AST item.
+    pub fn class_bracketed(e: ClassBracketed) -> Ast {
+        Ast::ClassBracketed(Box::new(e))
+    }
+
+    /// Create a "repetition" AST item.
+    pub fn repetition(e: Repetition) -> Ast {
+        Ast::Repetition(Box::new(e))
+    }
+
+    /// Create a "group" AST item.
+    pub fn group(e: Group) -> Ast {
+        Ast::Group(Box::new(e))
+    }
+
+    /// Create a "alternation" AST item.
+    pub fn alternation(e: Alternation) -> Ast {
+        Ast::Alternation(Box::new(e))
+    }
+
+    /// Create a "concat" AST item.
+    pub fn concat(e: Concat) -> Ast {
+        Ast::Concat(Box::new(e))
+    }
+
     /// Return the span of this abstract syntax tree.
     pub fn span(&self) -> &Span {
         match *self {
@@ -457,7 +564,9 @@ impl Ast {
             Ast::Literal(ref x) => &x.span,
             Ast::Dot(ref span) => span,
             Ast::Assertion(ref x) => &x.span,
-            Ast::Class(ref x) => x.span(),
+            Ast::ClassUnicode(ref x) => &x.span,
+            Ast::ClassPerl(ref x) => &x.span,
+            Ast::ClassBracketed(ref x) => &x.span,
             Ast::Repetition(ref x) => &x.span,
             Ast::Group(ref x) => &x.span,
             Ast::Alternation(ref x) => &x.span,
@@ -481,8 +590,10 @@ impl Ast {
             | Ast::Flags(_)
             | Ast::Literal(_)
             | Ast::Dot(_)
-            | Ast::Assertion(_) => false,
-            Ast::Class(_)
+            | Ast::Assertion(_)
+            | Ast::ClassUnicode(_)
+            | Ast::ClassPerl(_) => false,
+            Ast::ClassBracketed(_)
             | Ast::Repetition(_)
             | Ast::Group(_)
             | Ast::Alternation(_)
@@ -508,6 +619,7 @@ impl core::fmt::Display for Ast {
 
 /// An alternation of regular expressions.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub struct Alternation {
     /// The span of this alternation.
     pub span: Span,
@@ -518,20 +630,21 @@ pub struct Alternation {
 impl Alternation {
     /// Return this alternation as an AST.
     ///
-    /// If this alternation contains zero ASTs, then Ast::Empty is
-    /// returned. If this alternation contains exactly 1 AST, then the
-    /// corresponding AST is returned. Otherwise, Ast::Alternation is returned.
+    /// If this alternation contains zero ASTs, then `Ast::empty` is returned.
+    /// If this alternation contains exactly 1 AST, then the corresponding AST
+    /// is returned. Otherwise, `Ast::alternation` is returned.
     pub fn into_ast(mut self) -> Ast {
         match self.asts.len() {
-            0 => Ast::Empty(self.span),
+            0 => Ast::empty(self.span),
             1 => self.asts.pop().unwrap(),
-            _ => Ast::Alternation(self),
+            _ => Ast::alternation(self),
         }
     }
 }
 
 /// A concatenation of regular expressions.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub struct Concat {
     /// The span of this concatenation.
     pub span: Span,
@@ -542,14 +655,14 @@ pub struct Concat {
 impl Concat {
     /// Return this concatenation as an AST.
     ///
-    /// If this concatenation contains zero ASTs, then Ast::Empty is
-    /// returned. If this concatenation contains exactly 1 AST, then the
-    /// corresponding AST is returned. Otherwise, Ast::Concat is returned.
+    /// If this alternation contains zero ASTs, then `Ast::empty` is returned.
+    /// If this alternation contains exactly 1 AST, then the corresponding AST
+    /// is returned. Otherwise, `Ast::concat` is returned.
     pub fn into_ast(mut self) -> Ast {
         match self.asts.len() {
-            0 => Ast::Empty(self.span),
+            0 => Ast::empty(self.span),
             1 => self.asts.pop().unwrap(),
-            _ => Ast::Concat(self),
+            _ => Ast::concat(self),
         }
     }
 }
@@ -560,6 +673,7 @@ impl Concat {
 /// represented in their literal form, e.g., `a` or in their escaped form,
 /// e.g., `\x61`.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub struct Literal {
     /// The span of this literal.
     pub span: Span,
@@ -584,6 +698,7 @@ impl Literal {
 
 /// The kind of a single literal expression.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub enum LiteralKind {
     /// The literal is written verbatim, e.g., `a` or `☃`.
     Verbatim,
@@ -613,6 +728,7 @@ pub enum LiteralKind {
 /// A special literal is a special escape sequence recognized by the regex
 /// parser, e.g., `\f` or `\n`.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub enum SpecialLiteralKind {
     /// Bell, spelled `\a` (`\x07`).
     Bell,
@@ -637,6 +753,7 @@ pub enum SpecialLiteralKind {
 /// differ when used without brackets in the number of hex digits that must
 /// follow.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub enum HexLiteralKind {
     /// A `\x` prefix. When used without brackets, this form is limited to
     /// two digits.
@@ -662,32 +779,9 @@ impl HexLiteralKind {
     }
 }
 
-/// A single character class expression.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub enum Class {
-    /// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
-    Unicode(ClassUnicode),
-    /// A perl character class, e.g., `\d` or `\W`.
-    Perl(ClassPerl),
-    /// A bracketed character class set, which may contain zero or more
-    /// character ranges and/or zero or more nested classes. e.g.,
-    /// `[a-zA-Z\pL]`.
-    Bracketed(ClassBracketed),
-}
-
-impl Class {
-    /// Return the span of this character class.
-    pub fn span(&self) -> &Span {
-        match *self {
-            Class::Perl(ref x) => &x.span,
-            Class::Unicode(ref x) => &x.span,
-            Class::Bracketed(ref x) => &x.span,
-        }
-    }
-}
-
 /// A Perl character class.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub struct ClassPerl {
     /// The span of this class.
     pub span: Span,
@@ -700,6 +794,7 @@ pub struct ClassPerl {
 
 /// The available Perl character classes.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub enum ClassPerlKind {
     /// Decimal numbers.
     Digit,
@@ -711,6 +806,7 @@ pub enum ClassPerlKind {
 
 /// An ASCII character class.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub struct ClassAscii {
     /// The span of this class.
     pub span: Span,
@@ -723,6 +819,7 @@ pub struct ClassAscii {
 
 /// The available ASCII character classes.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub enum ClassAsciiKind {
     /// `[0-9A-Za-z]`
     Alnum,
@@ -786,6 +883,7 @@ impl ClassAsciiKind {
 
 /// A Unicode character class.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub struct ClassUnicode {
     /// The span of this class.
     pub span: Span,
@@ -838,8 +936,156 @@ pub enum ClassUnicodeKind {
     },
 }
 
+#[cfg(feature = "arbitrary")]
+impl arbitrary::Arbitrary<'_> for ClassUnicodeKind {
+    fn arbitrary(
+        u: &mut arbitrary::Unstructured,
+    ) -> arbitrary::Result<ClassUnicodeKind> {
+        #[cfg(any(
+            feature = "unicode-age",
+            feature = "unicode-bool",
+            feature = "unicode-gencat",
+            feature = "unicode-perl",
+            feature = "unicode-script",
+            feature = "unicode-segment",
+        ))]
+        {
+            use alloc::string::ToString;
+
+            use super::unicode_tables::{
+                property_names::PROPERTY_NAMES,
+                property_values::PROPERTY_VALUES,
+            };
+
+            match u.choose_index(3)? {
+                0 => {
+                    let all = PROPERTY_VALUES
+                        .iter()
+                        .flat_map(|e| e.1.iter())
+                        .filter(|(name, _)| name.len() == 1)
+                        .count();
+                    let idx = u.choose_index(all)?;
+                    let value = PROPERTY_VALUES
+                        .iter()
+                        .flat_map(|e| e.1.iter())
+                        .take(idx + 1)
+                        .last()
+                        .unwrap()
+                        .0
+                        .chars()
+                        .next()
+                        .unwrap();
+                    Ok(ClassUnicodeKind::OneLetter(value))
+                }
+                1 => {
+                    let all = PROPERTY_VALUES
+                        .iter()
+                        .map(|e| e.1.len())
+                        .sum::<usize>()
+                        + PROPERTY_NAMES.len();
+                    let idx = u.choose_index(all)?;
+                    let name = PROPERTY_VALUES
+                        .iter()
+                        .flat_map(|e| e.1.iter())
+                        .chain(PROPERTY_NAMES)
+                        .map(|(_, e)| e)
+                        .take(idx + 1)
+                        .last()
+                        .unwrap();
+                    Ok(ClassUnicodeKind::Named(name.to_string()))
+                }
+                2 => {
+                    let all = PROPERTY_VALUES
+                        .iter()
+                        .map(|e| e.1.len())
+                        .sum::<usize>();
+                    let idx = u.choose_index(all)?;
+                    let (prop, value) = PROPERTY_VALUES
+                        .iter()
+                        .flat_map(|e| {
+                            e.1.iter().map(|(_, value)| (e.0, value))
+                        })
+                        .take(idx + 1)
+                        .last()
+                        .unwrap();
+                    Ok(ClassUnicodeKind::NamedValue {
+                        op: u.arbitrary()?,
+                        name: prop.to_string(),
+                        value: value.to_string(),
+                    })
+                }
+                _ => unreachable!("index chosen is impossible"),
+            }
+        }
+        #[cfg(not(any(
+            feature = "unicode-age",
+            feature = "unicode-bool",
+            feature = "unicode-gencat",
+            feature = "unicode-perl",
+            feature = "unicode-script",
+            feature = "unicode-segment",
+        )))]
+        {
+            match u.choose_index(3)? {
+                0 => Ok(ClassUnicodeKind::OneLetter(u.arbitrary()?)),
+                1 => Ok(ClassUnicodeKind::Named(u.arbitrary()?)),
+                2 => Ok(ClassUnicodeKind::NamedValue {
+                    op: u.arbitrary()?,
+                    name: u.arbitrary()?,
+                    value: u.arbitrary()?,
+                }),
+                _ => unreachable!("index chosen is impossible"),
+            }
+        }
+    }
+
+    fn size_hint(depth: usize) -> (usize, Option<usize>) {
+        #[cfg(any(
+            feature = "unicode-age",
+            feature = "unicode-bool",
+            feature = "unicode-gencat",
+            feature = "unicode-perl",
+            feature = "unicode-script",
+            feature = "unicode-segment",
+        ))]
+        {
+            arbitrary::size_hint::and_all(&[
+                usize::size_hint(depth),
+                usize::size_hint(depth),
+                arbitrary::size_hint::or(
+                    (0, Some(0)),
+                    ClassUnicodeOpKind::size_hint(depth),
+                ),
+            ])
+        }
+        #[cfg(not(any(
+            feature = "unicode-age",
+            feature = "unicode-bool",
+            feature = "unicode-gencat",
+            feature = "unicode-perl",
+            feature = "unicode-script",
+            feature = "unicode-segment",
+        )))]
+        {
+            arbitrary::size_hint::and(
+                usize::size_hint(depth),
+                arbitrary::size_hint::or_all(&[
+                    char::size_hint(depth),
+                    String::size_hint(depth),
+                    arbitrary::size_hint::and_all(&[
+                        String::size_hint(depth),
+                        String::size_hint(depth),
+                        ClassUnicodeOpKind::size_hint(depth),
+                    ]),
+                ]),
+            )
+        }
+    }
+}
+
 /// The type of op used in a Unicode character class.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub enum ClassUnicodeOpKind {
     /// A property set to a specific value, e.g., `\p{scx=Katakana}`.
     Equal,
@@ -862,6 +1108,7 @@ impl ClassUnicodeOpKind {
 
 /// A bracketed character class, e.g., `[a-z0-9]`.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub struct ClassBracketed {
     /// The span of this class.
     pub span: Span,
@@ -880,6 +1127,7 @@ pub struct ClassBracketed {
 /// items (literals, ranges, other bracketed classes) or a tree of binary set
 /// operations.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub enum ClassSet {
     /// An item, which can be a single literal, range, nested character class
     /// or a union of items.
@@ -913,6 +1161,7 @@ impl ClassSet {
 
 /// A single component of a character class set.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub enum ClassSetItem {
     /// An empty item.
     ///
@@ -956,6 +1205,7 @@ impl ClassSetItem {
 
 /// A single character class range in a set.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub struct ClassSetRange {
     /// The span of this range.
     pub span: Span,
@@ -977,6 +1227,7 @@ impl ClassSetRange {
 
 /// A union of items inside a character class set.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub struct ClassSetUnion {
     /// The span of the items in this operation. e.g., the `a-z0-9` in
     /// `[^a-z0-9]`
@@ -1021,6 +1272,7 @@ impl ClassSetUnion {
 
 /// A Unicode character class set operation.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub struct ClassSetBinaryOp {
     /// The span of this operation. e.g., the `a-z--[h-p]` in `[a-z--h-p]`.
     pub span: Span,
@@ -1038,6 +1290,7 @@ pub struct ClassSetBinaryOp {
 /// explicit union operator. Concatenation inside a character class corresponds
 /// to the union operation.
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub enum ClassSetBinaryOpKind {
     /// The intersection of two sets, e.g., `\pN&&[a-z]`.
     Intersection,
@@ -1051,6 +1304,7 @@ pub enum ClassSetBinaryOpKind {
 
 /// A single zero-width assertion.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub struct Assertion {
     /// The span of this assertion.
     pub span: Span,
@@ -1060,6 +1314,7 @@ pub struct Assertion {
 
 /// An assertion kind.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub enum AssertionKind {
     /// `^`
     StartLine,
@@ -1073,10 +1328,23 @@ pub enum AssertionKind {
     WordBoundary,
     /// `\B`
     NotWordBoundary,
+    /// `\b{start}`
+    WordBoundaryStart,
+    /// `\b{end}`
+    WordBoundaryEnd,
+    /// `\<` (alias for `\b{start}`)
+    WordBoundaryStartAngle,
+    /// `\>` (alias for `\b{end}`)
+    WordBoundaryEndAngle,
+    /// `\b{start-half}`
+    WordBoundaryStartHalf,
+    /// `\b{end-half}`
+    WordBoundaryEndHalf,
 }
 
 /// A repetition operation applied to a regular expression.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub struct Repetition {
     /// The span of this operation.
     pub span: Span,
@@ -1090,6 +1358,7 @@ pub struct Repetition {
 
 /// The repetition operator itself.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub struct RepetitionOp {
     /// The span of this operator. This includes things like `+`, `*?` and
     /// `{m,n}`.
@@ -1100,6 +1369,7 @@ pub struct RepetitionOp {
 
 /// The kind of a repetition operator.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub enum RepetitionKind {
     /// `?`
     ZeroOrOne,
@@ -1113,6 +1383,7 @@ pub enum RepetitionKind {
 
 /// A range repetition operator.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub enum RepetitionRange {
     /// `{m}`
     Exactly(u32),
@@ -1142,6 +1413,7 @@ impl RepetitionRange {
 /// contains a sub-expression, e.g., `(a)`, `(?P<name>a)`, `(?:a)` and
 /// `(?is:a)`.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub struct Group {
     /// The span of this group.
     pub span: Span,
@@ -1183,6 +1455,7 @@ impl Group {
 
 /// The kind of a group.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub enum GroupKind {
     /// `(a)`
     CaptureIndex(u32),
@@ -1211,8 +1484,38 @@ pub struct CaptureName {
     pub index: u32,
 }
 
+#[cfg(feature = "arbitrary")]
+impl arbitrary::Arbitrary<'_> for CaptureName {
+    fn arbitrary(
+        u: &mut arbitrary::Unstructured,
+    ) -> arbitrary::Result<CaptureName> {
+        let len = u.arbitrary_len::<char>()?;
+        if len == 0 {
+            return Err(arbitrary::Error::NotEnoughData);
+        }
+        let mut name: String = String::new();
+        for _ in 0..len {
+            let ch: char = u.arbitrary()?;
+            let cp = u32::from(ch);
+            let ascii_letter_offset = u8::try_from(cp % 26).unwrap();
+            let ascii_letter = b'a' + ascii_letter_offset;
+            name.push(char::from(ascii_letter));
+        }
+        Ok(CaptureName { span: u.arbitrary()?, name, index: u.arbitrary()? })
+    }
+
+    fn size_hint(depth: usize) -> (usize, Option<usize>) {
+        arbitrary::size_hint::and_all(&[
+            Span::size_hint(depth),
+            usize::size_hint(depth),
+            u32::size_hint(depth),
+        ])
+    }
+}
+
 /// A group of flags that is not applied to a particular regular expression.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub struct SetFlags {
     /// The span of these flags, including the grouping parentheses.
     pub span: Span,
@@ -1224,6 +1527,7 @@ pub struct SetFlags {
 ///
 /// This corresponds only to the sequence of flags themselves, e.g., `is-u`.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub struct Flags {
     /// The span of this group of flags.
     pub span: Span,
@@ -1276,6 +1580,7 @@ impl Flags {
 
 /// A single item in a group of flags.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub struct FlagsItem {
     /// The span of this item.
     pub span: Span,
@@ -1285,6 +1590,7 @@ pub struct FlagsItem {
 
 /// The kind of an item in a group of flags.
 #[derive(Clone, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub enum FlagsItemKind {
     /// A negation operator applied to all subsequent flags in the enclosing
     /// group.
@@ -1305,6 +1611,7 @@ impl FlagsItemKind {
 
 /// A single flag.
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
+#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
 pub enum Flag {
     /// `i`
     CaseInsensitive,
@@ -1334,8 +1641,10 @@ impl Drop for Ast {
             | Ast::Literal(_)
             | Ast::Dot(_)
             | Ast::Assertion(_)
-            // Classes are recursive, so they get their own Drop impl.
-            | Ast::Class(_) => return,
+            | Ast::ClassUnicode(_)
+            | Ast::ClassPerl(_)
+            // Bracketed classes are recursive, they get their own Drop impl.
+            | Ast::ClassBracketed(_) => return,
             Ast::Repetition(ref x) if !x.ast.has_subexprs() => return,
             Ast::Group(ref x) if !x.ast.has_subexprs() => return,
             Ast::Alternation(ref x) if x.asts.is_empty() => return,
@@ -1344,7 +1653,7 @@ impl Drop for Ast {
         }
 
         let empty_span = || Span::splat(Position::new(0, 0, 0));
-        let empty_ast = || Ast::Empty(empty_span());
+        let empty_ast = || Ast::empty(empty_span());
         let mut stack = vec![mem::replace(self, empty_ast())];
         while let Some(mut ast) = stack.pop() {
             match ast {
@@ -1353,8 +1662,11 @@ impl Drop for Ast {
                 | Ast::Literal(_)
                 | Ast::Dot(_)
                 | Ast::Assertion(_)
-                // Classes are recursive, so they get their own Drop impl.
-                | Ast::Class(_) => {}
+                | Ast::ClassUnicode(_)
+                | Ast::ClassPerl(_)
+                // Bracketed classes are recursive, so they get their own Drop
+                // impl.
+                | Ast::ClassBracketed(_) => {}
                 Ast::Repetition(ref mut x) => {
                     stack.push(mem::replace(&mut x.ast, empty_ast()));
                 }
@@ -1447,9 +1759,9 @@ mod tests {
 
         let run = || {
             let span = || Span::splat(Position::new(0, 0, 0));
-            let mut ast = Ast::Empty(span());
+            let mut ast = Ast::empty(span());
             for i in 0..200 {
-                ast = Ast::Group(Group {
+                ast = Ast::group(Group {
                     span: span(),
                     kind: GroupKind::CaptureIndex(i),
                     ast: Box::new(ast),
@@ -1478,4 +1790,20 @@ mod tests {
             .join()
             .unwrap();
     }
+
+    // This tests that our `Ast` has a reasonable size. This isn't a hard rule
+    // and it can be increased if given a good enough reason. But this test
+    // exists because the size of `Ast` was at one point over 200 bytes on a
+    // 64-bit target. Wow.
+    #[test]
+    fn ast_size() {
+        let max = 2 * core::mem::size_of::<usize>();
+        let size = core::mem::size_of::<Ast>();
+        assert!(
+            size <= max,
+            "Ast size of {} bytes is bigger than suggested max {}",
+            size,
+            max
+        );
+    }
 }
diff --git a/vendor/regex-syntax/src/ast/parse.rs b/vendor/regex-syntax/src/ast/parse.rs
index 9cf64e9ec..593b14fbc 100644
--- a/vendor/regex-syntax/src/ast/parse.rs
+++ b/vendor/regex-syntax/src/ast/parse.rs
@@ -53,11 +53,11 @@ impl Primitive {
     /// Convert this primitive into a proper AST.
     fn into_ast(self) -> Ast {
         match self {
-            Primitive::Literal(lit) => Ast::Literal(lit),
-            Primitive::Assertion(assert) => Ast::Assertion(assert),
-            Primitive::Dot(span) => Ast::Dot(span),
-            Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)),
-            Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)),
+            Primitive::Literal(lit) => Ast::literal(lit),
+            Primitive::Assertion(assert) => Ast::assertion(assert),
+            Primitive::Dot(span) => Ast::dot(span),
+            Primitive::Perl(cls) => Ast::class_perl(cls),
+            Primitive::Unicode(cls) => Ast::class_unicode(cls),
         }
     }
 
@@ -383,7 +383,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
 
     /// Return a reference to the pattern being parsed.
     fn pattern(&self) -> &str {
-        self.pattern.borrow()
+        self.pattern
     }
 
     /// Create a new error with the given span and error type.
@@ -691,7 +691,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
                     self.parser().ignore_whitespace.set(v);
                 }
 
-                concat.asts.push(Ast::Flags(set));
+                concat.asts.push(Ast::flags(set));
                 Ok(concat)
             }
             Either::Right(group) => {
@@ -764,7 +764,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
                 group.ast = Box::new(group_concat.into_ast());
             }
         }
-        prior_concat.asts.push(Ast::Group(group));
+        prior_concat.asts.push(Ast::group(group));
         Ok(prior_concat)
     }
 
@@ -783,7 +783,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
             Some(GroupState::Alternation(mut alt)) => {
                 alt.span.end = self.pos();
                 alt.asts.push(concat.into_ast());
-                Ok(Ast::Alternation(alt))
+                Ok(Ast::alternation(alt))
             }
             Some(GroupState::Group { group, .. }) => {
                 return Err(
@@ -850,7 +850,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
     fn pop_class(
         &self,
         nested_union: ast::ClassSetUnion,
-    ) -> Result<Either<ast::ClassSetUnion, ast::Class>> {
+    ) -> Result<Either<ast::ClassSetUnion, ast::ClassBracketed>> {
         assert_eq!(self.char(), ']');
 
         let item = ast::ClassSet::Item(nested_union.into_item());
@@ -882,7 +882,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
                 set.span.end = self.pos();
                 set.kind = prevset;
                 if stack.is_empty() {
-                    Ok(Either::Right(ast::Class::Bracketed(set)))
+                    Ok(Either::Right(set))
                 } else {
                     union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
                     Ok(Either::Left(union))
@@ -976,7 +976,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
                 '|' => concat = self.push_alternate(concat)?,
                 '[' => {
                     let class = self.parse_set_class()?;
-                    concat.asts.push(Ast::Class(class));
+                    concat.asts.push(Ast::class_bracketed(class));
                 }
                 '?' => {
                     concat = self.parse_uncounted_repetition(
@@ -1057,7 +1057,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
             greedy = false;
             self.bump();
         }
-        concat.asts.push(Ast::Repetition(ast::Repetition {
+        concat.asts.push(Ast::repetition(ast::Repetition {
             span: ast.span().with_end(self.pos()),
             op: ast::RepetitionOp {
                 span: Span::new(op_start, self.pos()),
@@ -1159,7 +1159,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
                 self.error(op_span, ast::ErrorKind::RepetitionCountInvalid)
             );
         }
-        concat.asts.push(Ast::Repetition(ast::Repetition {
+        concat.asts.push(Ast::repetition(ast::Repetition {
             span: ast.span().with_end(self.pos()),
             op: ast::RepetitionOp {
                 span: op_span,
@@ -1212,7 +1212,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
             Ok(Either::Right(ast::Group {
                 span: open_span,
                 kind: ast::GroupKind::CaptureName { starts_with_p, name },
-                ast: Box::new(Ast::Empty(self.span())),
+                ast: Box::new(Ast::empty(self.span())),
             }))
         } else if self.bump_if("?") {
             if self.is_eof() {
@@ -1241,7 +1241,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
                 Ok(Either::Right(ast::Group {
                     span: open_span,
                     kind: ast::GroupKind::NonCapturing(flags),
-                    ast: Box::new(Ast::Empty(self.span())),
+                    ast: Box::new(Ast::empty(self.span())),
                 }))
             }
         } else {
@@ -1249,7 +1249,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
             Ok(Either::Right(ast::Group {
                 span: open_span,
                 kind: ast::GroupKind::CaptureIndex(capture_index),
-                ast: Box::new(Ast::Empty(self.span())),
+                ast: Box::new(Ast::empty(self.span())),
             }))
         }
     }
@@ -1528,18 +1528,115 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
                 span,
                 kind: ast::AssertionKind::EndText,
             })),
-            'b' => Ok(Primitive::Assertion(ast::Assertion {
-                span,
-                kind: ast::AssertionKind::WordBoundary,
-            })),
+            'b' => {
+                let mut wb = ast::Assertion {
+                    span,
+                    kind: ast::AssertionKind::WordBoundary,
+                };
+                // After a \b, we "try" to parse things like \b{start} for
+                // special word boundary assertions.
+                if !self.is_eof() && self.char() == '{' {
+                    if let Some(kind) =
+                        self.maybe_parse_special_word_boundary(start)?
+                    {
+                        wb.kind = kind;
+                        wb.span.end = self.pos();
+                    }
+                }
+                Ok(Primitive::Assertion(wb))
+            }
             'B' => Ok(Primitive::Assertion(ast::Assertion {
                 span,
                 kind: ast::AssertionKind::NotWordBoundary,
             })),
+            '<' => Ok(Primitive::Assertion(ast::Assertion {
+                span,
+                kind: ast::AssertionKind::WordBoundaryStartAngle,
+            })),
+            '>' => Ok(Primitive::Assertion(ast::Assertion {
+                span,
+                kind: ast::AssertionKind::WordBoundaryEndAngle,
+            })),
             _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
         }
     }
 
+    /// Attempt to parse a specialty word boundary. That is, `\b{start}`,
+    /// `\b{end}`, `\b{start-half}` or `\b{end-half}`.
+    ///
+    /// This is similar to `maybe_parse_ascii_class` in that, in most cases,
+    /// if it fails it will just return `None` with no error. This is done
+    /// because `\b{5}` is a valid expression and we want to let that be parsed
+    /// by the existing counted repetition parsing code. (I thought about just
+    /// invoking the counted repetition code from here, but it seemed a little
+    /// ham-fisted.)
+    ///
+    /// Unlike `maybe_parse_ascii_class` though, this can return an error.
+    /// Namely, if we definitely know it isn't a counted repetition, then we
+    /// return an error specific to the specialty word boundaries.
+    ///
+    /// This assumes the parser is positioned at a `{` immediately following
+    /// a `\b`. When `None` is returned, the parser is returned to the position
+    /// at which it started: pointing at a `{`.
+    ///
+    /// The position given should correspond to the start of the `\b`.
+    fn maybe_parse_special_word_boundary(
+        &self,
+        wb_start: Position,
+    ) -> Result<Option<ast::AssertionKind>> {
+        assert_eq!(self.char(), '{');
+
+        let is_valid_char = |c| match c {
+            'A'..='Z' | 'a'..='z' | '-' => true,
+            _ => false,
+        };
+        let start = self.pos();
+        if !self.bump_and_bump_space() {
+            return Err(self.error(
+                Span::new(wb_start, self.pos()),
+                ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
+            ));
+        }
+        let start_contents = self.pos();
+        // This is one of the critical bits: if the first non-whitespace
+        // character isn't in [-A-Za-z] (i.e., this can't be a special word
+        // boundary), then we bail and let the counted repetition parser deal
+        // with this.
+        if !is_valid_char(self.char()) {
+            self.parser().pos.set(start);
+            return Ok(None);
+        }
+
+        // Now collect up our chars until we see a '}'.
+        let mut scratch = self.parser().scratch.borrow_mut();
+        scratch.clear();
+        while !self.is_eof() && is_valid_char(self.char()) {
+            scratch.push(self.char());
+            self.bump_and_bump_space();
+        }
+        if self.is_eof() || self.char() != '}' {
+            return Err(self.error(
+                Span::new(start, self.pos()),
+                ast::ErrorKind::SpecialWordBoundaryUnclosed,
+            ));
+        }
+        let end = self.pos();
+        self.bump();
+        let kind = match scratch.as_str() {
+            "start" => ast::AssertionKind::WordBoundaryStart,
+            "end" => ast::AssertionKind::WordBoundaryEnd,
+            "start-half" => ast::AssertionKind::WordBoundaryStartHalf,
+            "end-half" => ast::AssertionKind::WordBoundaryEndHalf,
+            _ => {
+                return Err(self.error(
+                    Span::new(start_contents, end),
+                    ast::ErrorKind::SpecialWordBoundaryUnrecognized,
+                ))
+            }
+        };
+        Ok(Some(kind))
+    }
+
     /// Parse an octal representation of a Unicode codepoint up to 3 digits
     /// long. This expects the parser to be positioned at the first octal
     /// digit and advances the parser to the first character immediately
@@ -1743,7 +1840,7 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
     /// is successful, then the parser is advanced to the position immediately
     /// following the closing `]`.
     #[inline(never)]
-    fn parse_set_class(&self) -> Result<ast::Class> {
+    fn parse_set_class(&self) -> Result<ast::ClassBracketed> {
         assert_eq!(self.char(), '[');
 
         let mut union =
@@ -1967,9 +2064,9 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
         // because parsing cannot fail with any interesting error. For example,
         // in order to use an ASCII character class, it must be enclosed in
         // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
-        // of it as "ASCII character characters have the syntax `[:NAME:]`
-        // which can only appear within character brackets." This means that
-        // things like `[[:lower:]A]` are legal constructs.
+        // of it as "ASCII character classes have the syntax `[:NAME:]` which
+        // can only appear within character brackets." This means that things
+        // like `[[:lower:]A]` are legal constructs.
         //
         // However, if one types an incorrect ASCII character class, e.g.,
         // `[[:loower:]]`, then we treat that as a normal nested character
@@ -2189,12 +2286,12 @@ impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
             | Ast::Literal(_)
             | Ast::Dot(_)
             | Ast::Assertion(_)
-            | Ast::Class(ast::Class::Unicode(_))
-            | Ast::Class(ast::Class::Perl(_)) => {
+            | Ast::ClassUnicode(_)
+            | Ast::ClassPerl(_) => {
                 // These are all base cases, so we don't increment depth.
                 return Ok(());
             }
-            Ast::Class(ast::Class::Bracketed(ref x)) => &x.span,
+            Ast::ClassBracketed(ref x) => &x.span,
             Ast::Repetition(ref x) => &x.span,
             Ast::Group(ref x) => &x.span,
             Ast::Alternation(ref x) => &x.span,
@@ -2210,12 +2307,12 @@ impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
             | Ast::Literal(_)
             | Ast::Dot(_)
             | Ast::Assertion(_)
-            | Ast::Class(ast::Class::Unicode(_))
-            | Ast::Class(ast::Class::Perl(_)) => {
+            | Ast::ClassUnicode(_)
+            | Ast::ClassPerl(_) => {
                 // These are all base cases, so we don't decrement depth.
                 Ok(())
             }
-            Ast::Class(ast::Class::Bracketed(_))
+            Ast::ClassBracketed(_)
             | Ast::Repetition(_)
             | Ast::Group(_)
             | Ast::Alternation(_)
@@ -2426,12 +2523,12 @@ mod tests {
 
     /// Create a meta literal starting at the given position.
     fn meta_lit(c: char, span: Span) -> Ast {
-        Ast::Literal(ast::Literal { span, kind: ast::LiteralKind::Meta, c })
+        Ast::literal(ast::Literal { span, kind: ast::LiteralKind::Meta, c })
     }
 
     /// Create a verbatim literal with the given span.
     fn lit_with(c: char, span: Span) -> Ast {
-        Ast::Literal(ast::Literal {
+        Ast::literal(ast::Literal {
             span,
             kind: ast::LiteralKind::Verbatim,
             c,
@@ -2445,17 +2542,17 @@ mod tests {
 
     /// Create a concatenation with the given span.
     fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
-        Ast::Concat(ast::Concat { span, asts })
+        Ast::concat(ast::Concat { span, asts })
     }
 
     /// Create an alternation with the given span.
     fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
-        Ast::Alternation(ast::Alternation { span: span(range), asts })
+        Ast::alternation(ast::Alternation { span: span(range), asts })
     }
 
     /// Create a capturing group with the given span.
     fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
-        Ast::Group(ast::Group {
+        Ast::group(ast::Group {
             span: span(range),
             kind: ast::GroupKind::CaptureIndex(index),
             ast: Box::new(ast),
@@ -2488,7 +2585,7 @@ mod tests {
                 },
             );
         }
-        Ast::Flags(ast::SetFlags {
+        Ast::flags(ast::SetFlags {
             span: span_range(pat, range.clone()),
             flags: ast::Flags {
                 span: span_range(pat, (range.start + 2)..(range.end - 1)),
@@ -2502,7 +2599,7 @@ mod tests {
         // A nest limit of 0 still allows some types of regexes.
         assert_eq!(
             parser_nest_limit("", 0).parse(),
-            Ok(Ast::Empty(span(0..0)))
+            Ok(Ast::empty(span(0..0)))
         );
         assert_eq!(parser_nest_limit("a", 0).parse(), Ok(lit('a', 0)));
 
@@ -2516,7 +2613,7 @@ mod tests {
         );
         assert_eq!(
             parser_nest_limit("a+", 1).parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..2),
                 op: ast::RepetitionOp {
                     span: span(1..2),
@@ -2542,14 +2639,14 @@ mod tests {
         );
         assert_eq!(
             parser_nest_limit("a+*", 2).parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..3),
                 op: ast::RepetitionOp {
                     span: span(2..3),
                     kind: ast::RepetitionKind::ZeroOrMore,
                 },
                 greedy: true,
-                ast: Box::new(Ast::Repetition(ast::Repetition {
+                ast: Box::new(Ast::repetition(ast::Repetition {
                     span: span(0..2),
                     op: ast::RepetitionOp {
                         span: span(1..2),
@@ -2606,7 +2703,7 @@ mod tests {
         );
         assert_eq!(
             parser_nest_limit("[a]", 1).parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..3),
                 negated: false,
                 kind: ast::ClassSet::Item(ast::ClassSetItem::Literal(
@@ -2616,7 +2713,7 @@ mod tests {
                         c: 'a',
                     }
                 )),
-            })))
+            }))
         );
         assert_eq!(
             parser_nest_limit("[ab]", 1).parse().unwrap_err(),
@@ -2776,7 +2873,7 @@ bar
                 vec![
                     lit_with('a', span_range(pat, 0..1)),
                     lit_with(' ', span_range(pat, 1..2)),
-                    Ast::Group(ast::Group {
+                    Ast::group(ast::Group {
                         span: span_range(pat, 2..9),
                         kind: ast::GroupKind::NonCapturing(ast::Flags {
                             span: span_range(pat, 4..5),
@@ -2803,7 +2900,7 @@ bar
                 span_range(pat, 0..pat.len()),
                 vec![
                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
-                    Ast::Group(ast::Group {
+                    Ast::group(ast::Group {
                         span: span_range(pat, 4..pat.len()),
                         kind: ast::GroupKind::CaptureName {
                             starts_with_p: true,
@@ -2825,7 +2922,7 @@ bar
                 span_range(pat, 0..pat.len()),
                 vec![
                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
-                    Ast::Group(ast::Group {
+                    Ast::group(ast::Group {
                         span: span_range(pat, 4..pat.len()),
                         kind: ast::GroupKind::CaptureIndex(1),
                         ast: Box::new(lit_with('a', span_range(pat, 7..8))),
@@ -2840,7 +2937,7 @@ bar
                 span_range(pat, 0..pat.len()),
                 vec![
                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
-                    Ast::Group(ast::Group {
+                    Ast::group(ast::Group {
                         span: span_range(pat, 4..pat.len()),
                         kind: ast::GroupKind::NonCapturing(ast::Flags {
                             span: span_range(pat, 8..8),
@@ -2858,7 +2955,7 @@ bar
                 span_range(pat, 0..pat.len()),
                 vec![
                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
-                    Ast::Literal(ast::Literal {
+                    Ast::literal(ast::Literal {
                         span: span(4..13),
                         kind: ast::LiteralKind::HexBrace(
                             ast::HexLiteralKind::X
@@ -2877,7 +2974,7 @@ bar
                 span_range(pat, 0..pat.len()),
                 vec![
                     flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
-                    Ast::Literal(ast::Literal {
+                    Ast::literal(ast::Literal {
                         span: span_range(pat, 4..6),
                         kind: ast::LiteralKind::Superfluous,
                         c: ' ',
@@ -2895,9 +2992,9 @@ bar
             Ok(concat_with(
                 span_range(pat, 0..3),
                 vec![
-                    Ast::Dot(span_range(pat, 0..1)),
+                    Ast::dot(span_range(pat, 0..1)),
                     lit_with('\n', span_range(pat, 1..2)),
-                    Ast::Dot(span_range(pat, 2..3)),
+                    Ast::dot(span_range(pat, 2..3)),
                 ]
             ))
         );
@@ -2933,7 +3030,7 @@ bar
     fn parse_uncounted_repetition() {
         assert_eq!(
             parser(r"a*").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..2),
                 op: ast::RepetitionOp {
                     span: span(1..2),
@@ -2945,7 +3042,7 @@ bar
         );
         assert_eq!(
             parser(r"a+").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..2),
                 op: ast::RepetitionOp {
                     span: span(1..2),
@@ -2958,7 +3055,7 @@ bar
 
         assert_eq!(
             parser(r"a?").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..2),
                 op: ast::RepetitionOp {
                     span: span(1..2),
@@ -2970,7 +3067,7 @@ bar
         );
         assert_eq!(
             parser(r"a??").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..3),
                 op: ast::RepetitionOp {
                     span: span(1..3),
@@ -2982,7 +3079,7 @@ bar
         );
         assert_eq!(
             parser(r"a?").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..2),
                 op: ast::RepetitionOp {
                     span: span(1..2),
@@ -2997,7 +3094,7 @@ bar
             Ok(concat(
                 0..3,
                 vec![
-                    Ast::Repetition(ast::Repetition {
+                    Ast::repetition(ast::Repetition {
                         span: span(0..2),
                         op: ast::RepetitionOp {
                             span: span(1..2),
@@ -3015,7 +3112,7 @@ bar
             Ok(concat(
                 0..4,
                 vec![
-                    Ast::Repetition(ast::Repetition {
+                    Ast::repetition(ast::Repetition {
                         span: span(0..3),
                         op: ast::RepetitionOp {
                             span: span(1..3),
@@ -3034,7 +3131,7 @@ bar
                 0..3,
                 vec![
                     lit('a', 0),
-                    Ast::Repetition(ast::Repetition {
+                    Ast::repetition(ast::Repetition {
                         span: span(1..3),
                         op: ast::RepetitionOp {
                             span: span(2..3),
@@ -3048,7 +3145,7 @@ bar
         );
         assert_eq!(
             parser(r"(ab)?").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..5),
                 op: ast::RepetitionOp {
                     span: span(4..5),
@@ -3067,8 +3164,8 @@ bar
             Ok(alt(
                 0..3,
                 vec![
-                    Ast::Empty(span(0..0)),
-                    Ast::Repetition(ast::Repetition {
+                    Ast::empty(span(0..0)),
+                    Ast::repetition(ast::Repetition {
                         span: span(1..3),
                         op: ast::RepetitionOp {
                             span: span(2..3),
@@ -3157,7 +3254,7 @@ bar
     fn parse_counted_repetition() {
         assert_eq!(
             parser(r"a{5}").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..4),
                 op: ast::RepetitionOp {
                     span: span(1..4),
@@ -3171,7 +3268,7 @@ bar
         );
         assert_eq!(
             parser(r"a{5,}").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..5),
                 op: ast::RepetitionOp {
                     span: span(1..5),
@@ -3185,7 +3282,7 @@ bar
         );
         assert_eq!(
             parser(r"a{5,9}").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..6),
                 op: ast::RepetitionOp {
                     span: span(1..6),
@@ -3199,7 +3296,7 @@ bar
         );
         assert_eq!(
             parser(r"a{5}?").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..5),
                 op: ast::RepetitionOp {
                     span: span(1..5),
@@ -3217,7 +3314,7 @@ bar
                 0..5,
                 vec![
                     lit('a', 0),
-                    Ast::Repetition(ast::Repetition {
+                    Ast::repetition(ast::Repetition {
                         span: span(1..5),
                         op: ast::RepetitionOp {
                             span: span(2..5),
@@ -3237,7 +3334,7 @@ bar
                 0..6,
                 vec![
                     lit('a', 0),
-                    Ast::Repetition(ast::Repetition {
+                    Ast::repetition(ast::Repetition {
                         span: span(1..5),
                         op: ast::RepetitionOp {
                             span: span(2..5),
@@ -3255,7 +3352,7 @@ bar
 
         assert_eq!(
             parser(r"a{ 5 }").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..6),
                 op: ast::RepetitionOp {
                     span: span(1..6),
@@ -3269,7 +3366,7 @@ bar
         );
         assert_eq!(
             parser(r"a{ 5 , 9 }").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..10),
                 op: ast::RepetitionOp {
                     span: span(1..10),
@@ -3283,7 +3380,7 @@ bar
         );
         assert_eq!(
             parser_ignore_whitespace(r"a{5,9} ?").parse(),
-            Ok(Ast::Repetition(ast::Repetition {
+            Ok(Ast::repetition(ast::Repetition {
                 span: span(0..8),
                 op: ast::RepetitionOp {
                     span: span(1..8),
@@ -3295,6 +3392,23 @@ bar
                 ast: Box::new(lit('a', 0)),
             }))
         );
+        assert_eq!(
+            parser(r"\b{5,9}").parse(),
+            Ok(Ast::repetition(ast::Repetition {
+                span: span(0..7),
+                op: ast::RepetitionOp {
+                    span: span(2..7),
+                    kind: ast::RepetitionKind::Range(
+                        ast::RepetitionRange::Bounded(5, 9)
+                    ),
+                },
+                greedy: true,
+                ast: Box::new(Ast::assertion(ast::Assertion {
+                    span: span(0..2),
+                    kind: ast::AssertionKind::WordBoundary,
+                })),
+            }))
+        );
 
         assert_eq!(
             parser(r"(?i){0}").parse().unwrap_err(),
@@ -3414,7 +3528,7 @@ bar
     fn parse_alternate() {
         assert_eq!(
             parser(r"a|b").parse(),
-            Ok(Ast::Alternation(ast::Alternation {
+            Ok(Ast::alternation(ast::Alternation {
                 span: span(0..3),
                 asts: vec![lit('a', 0), lit('b', 2)],
             }))
@@ -3424,7 +3538,7 @@ bar
             Ok(group(
                 0..5,
                 1,
-                Ast::Alternation(ast::Alternation {
+                Ast::alternation(ast::Alternation {
                     span: span(1..4),
                     asts: vec![lit('a', 1), lit('b', 3)],
                 })
@@ -3433,14 +3547,14 @@ bar
 
         assert_eq!(
             parser(r"a|b|c").parse(),
-            Ok(Ast::Alternation(ast::Alternation {
+            Ok(Ast::alternation(ast::Alternation {
                 span: span(0..5),
                 asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)],
             }))
         );
         assert_eq!(
             parser(r"ax|by|cz").parse(),
-            Ok(Ast::Alternation(ast::Alternation {
+            Ok(Ast::alternation(ast::Alternation {
                 span: span(0..8),
                 asts: vec![
                     concat(0..2, vec![lit('a', 0), lit('x', 1)]),
@@ -3454,7 +3568,7 @@ bar
             Ok(group(
                 0..10,
                 1,
-                Ast::Alternation(ast::Alternation {
+                Ast::alternation(ast::Alternation {
                     span: span(1..9),
                     asts: vec![
                         concat(1..3, vec![lit('a', 1), lit('x', 2)]),
@@ -3503,7 +3617,7 @@ bar
             parser(r"|").parse(),
             Ok(alt(
                 0..1,
-                vec![Ast::Empty(span(0..0)), Ast::Empty(span(1..1)),]
+                vec![Ast::empty(span(0..0)), Ast::empty(span(1..1)),]
             ))
         );
         assert_eq!(
@@ -3511,19 +3625,19 @@ bar
             Ok(alt(
                 0..2,
                 vec![
-                    Ast::Empty(span(0..0)),
-                    Ast::Empty(span(1..1)),
-                    Ast::Empty(span(2..2)),
+                    Ast::empty(span(0..0)),
+                    Ast::empty(span(1..1)),
+                    Ast::empty(span(2..2)),
                 ]
             ))
         );
         assert_eq!(
             parser(r"a|").parse(),
-            Ok(alt(0..2, vec![lit('a', 0), Ast::Empty(span(2..2)),]))
+            Ok(alt(0..2, vec![lit('a', 0), Ast::empty(span(2..2)),]))
         );
         assert_eq!(
             parser(r"|a").parse(),
-            Ok(alt(0..2, vec![Ast::Empty(span(0..0)), lit('a', 1),]))
+            Ok(alt(0..2, vec![Ast::empty(span(0..0)), lit('a', 1),]))
         );
 
         assert_eq!(
@@ -3533,7 +3647,7 @@ bar
                 1,
                 alt(
                     1..2,
-                    vec![Ast::Empty(span(1..1)), Ast::Empty(span(2..2)),]
+                    vec![Ast::empty(span(1..1)), Ast::empty(span(2..2)),]
                 )
             ))
         );
@@ -3542,7 +3656,7 @@ bar
             Ok(group(
                 0..4,
                 1,
-                alt(1..3, vec![lit('a', 1), Ast::Empty(span(3..3)),])
+                alt(1..3, vec![lit('a', 1), Ast::empty(span(3..3)),])
             ))
         );
         assert_eq!(
@@ -3550,7 +3664,7 @@ bar
             Ok(group(
                 0..4,
                 1,
-                alt(1..3, vec![Ast::Empty(span(1..1)), lit('a', 2),])
+                alt(1..3, vec![Ast::empty(span(1..1)), lit('a', 2),])
             ))
         );
 
@@ -3606,7 +3720,7 @@ bar
     fn parse_group() {
         assert_eq!(
             parser("(?i)").parse(),
-            Ok(Ast::Flags(ast::SetFlags {
+            Ok(Ast::flags(ast::SetFlags {
                 span: span(0..4),
                 flags: ast::Flags {
                     span: span(2..3),
@@ -3621,7 +3735,7 @@ bar
         );
         assert_eq!(
             parser("(?iU)").parse(),
-            Ok(Ast::Flags(ast::SetFlags {
+            Ok(Ast::flags(ast::SetFlags {
                 span: span(0..5),
                 flags: ast::Flags {
                     span: span(2..4),
@@ -3644,7 +3758,7 @@ bar
         );
         assert_eq!(
             parser("(?i-U)").parse(),
-            Ok(Ast::Flags(ast::SetFlags {
+            Ok(Ast::flags(ast::SetFlags {
                 span: span(0..6),
                 flags: ast::Flags {
                     span: span(2..5),
@@ -3672,15 +3786,15 @@ bar
 
         assert_eq!(
             parser("()").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..2),
                 kind: ast::GroupKind::CaptureIndex(1),
-                ast: Box::new(Ast::Empty(span(1..1))),
+                ast: Box::new(Ast::empty(span(1..1))),
             }))
         );
         assert_eq!(
             parser("(a)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..3),
                 kind: ast::GroupKind::CaptureIndex(1),
                 ast: Box::new(lit('a', 1)),
@@ -3688,20 +3802,20 @@ bar
         );
         assert_eq!(
             parser("(())").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..4),
                 kind: ast::GroupKind::CaptureIndex(1),
-                ast: Box::new(Ast::Group(ast::Group {
+                ast: Box::new(Ast::group(ast::Group {
                     span: span(1..3),
                     kind: ast::GroupKind::CaptureIndex(2),
-                    ast: Box::new(Ast::Empty(span(2..2))),
+                    ast: Box::new(Ast::empty(span(2..2))),
                 })),
             }))
         );
 
         assert_eq!(
             parser("(?:a)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..5),
                 kind: ast::GroupKind::NonCapturing(ast::Flags {
                     span: span(2..2),
@@ -3713,7 +3827,7 @@ bar
 
         assert_eq!(
             parser("(?i:a)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..6),
                 kind: ast::GroupKind::NonCapturing(ast::Flags {
                     span: span(2..3),
@@ -3729,7 +3843,7 @@ bar
         );
         assert_eq!(
             parser("(?i-U:a)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..8),
                 kind: ast::GroupKind::NonCapturing(ast::Flags {
                     span: span(2..5),
@@ -3818,7 +3932,7 @@ bar
     fn parse_capture_name() {
         assert_eq!(
             parser("(?<a>z)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..7),
                 kind: ast::GroupKind::CaptureName {
                     starts_with_p: false,
@@ -3833,7 +3947,7 @@ bar
         );
         assert_eq!(
             parser("(?P<a>z)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..8),
                 kind: ast::GroupKind::CaptureName {
                     starts_with_p: true,
@@ -3848,7 +3962,7 @@ bar
         );
         assert_eq!(
             parser("(?P<abc>z)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..10),
                 kind: ast::GroupKind::CaptureName {
                     starts_with_p: true,
@@ -3864,7 +3978,7 @@ bar
 
         assert_eq!(
             parser("(?P<a_1>z)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..10),
                 kind: ast::GroupKind::CaptureName {
                     starts_with_p: true,
@@ -3880,7 +3994,7 @@ bar
 
         assert_eq!(
             parser("(?P<a.1>z)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..10),
                 kind: ast::GroupKind::CaptureName {
                     starts_with_p: true,
@@ -3896,7 +4010,7 @@ bar
 
         assert_eq!(
             parser("(?P<a[1]>z)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: span(0..11),
                 kind: ast::GroupKind::CaptureName {
                     starts_with_p: true,
@@ -3912,7 +4026,7 @@ bar
 
         assert_eq!(
             parser("(?P<a¾>)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: Span::new(
                     Position::new(0, 1, 1),
                     Position::new(9, 1, 9),
@@ -3928,7 +4042,7 @@ bar
                         index: 1,
                     }
                 },
-                ast: Box::new(Ast::Empty(Span::new(
+                ast: Box::new(Ast::empty(Span::new(
                     Position::new(8, 1, 8),
                     Position::new(8, 1, 8),
                 ))),
@@ -3936,7 +4050,7 @@ bar
         );
         assert_eq!(
             parser("(?P<名字>)").parse(),
-            Ok(Ast::Group(ast::Group {
+            Ok(Ast::group(ast::Group {
                 span: Span::new(
                     Position::new(0, 1, 1),
                     Position::new(12, 1, 9),
@@ -3952,7 +4066,7 @@ bar
                         index: 1,
                     }
                 },
-                ast: Box::new(Ast::Empty(Span::new(
+                ast: Box::new(Ast::empty(Span::new(
                     Position::new(11, 1, 8),
                     Position::new(11, 1, 8),
                 ))),
@@ -4382,6 +4496,48 @@ bar
             }))
         );
         assert_eq!(
+            parser(r"\b{start}").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..9),
+                kind: ast::AssertionKind::WordBoundaryStart,
+            }))
+        );
+        assert_eq!(
+            parser(r"\b{end}").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..7),
+                kind: ast::AssertionKind::WordBoundaryEnd,
+            }))
+        );
+        assert_eq!(
+            parser(r"\b{start-half}").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..14),
+                kind: ast::AssertionKind::WordBoundaryStartHalf,
+            }))
+        );
+        assert_eq!(
+            parser(r"\b{end-half}").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..12),
+                kind: ast::AssertionKind::WordBoundaryEndHalf,
+            }))
+        );
+        assert_eq!(
+            parser(r"\<").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..2),
+                kind: ast::AssertionKind::WordBoundaryStartAngle,
+            }))
+        );
+        assert_eq!(
+            parser(r"\>").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..2),
+                kind: ast::AssertionKind::WordBoundaryEndAngle,
+            }))
+        );
+        assert_eq!(
             parser(r"\B").parse_primitive(),
             Ok(Primitive::Assertion(ast::Assertion {
                 span: span(0..2),
@@ -4418,20 +4574,60 @@ bar
                 kind: ast::ErrorKind::EscapeUnrecognized,
             }
         );
-        // But also, < and > are banned, so that we may evolve them into
-        // start/end word boundary assertions. (Not sure if we will...)
+
+        // Starting a special word boundary without any non-whitespace chars
+        // after the brace makes it ambiguous whether the user meant to write
+        // a counted repetition (probably not?) or an actual special word
+        // boundary assertion.
         assert_eq!(
-            parser(r"\<").parse_escape().unwrap_err(),
+            parser(r"\b{").parse_escape().unwrap_err(),
             TestError {
-                span: span(0..2),
-                kind: ast::ErrorKind::EscapeUnrecognized,
+                span: span(0..3),
+                kind: ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
             }
         );
         assert_eq!(
-            parser(r"\>").parse_escape().unwrap_err(),
+            parser_ignore_whitespace(r"\b{ ").parse_escape().unwrap_err(),
             TestError {
-                span: span(0..2),
-                kind: ast::ErrorKind::EscapeUnrecognized,
+                span: span(0..4),
+                kind: ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
+            }
+        );
+        // When 'x' is not enabled, the space is seen as a non-[-A-Za-z] char,
+        // and thus causes the parser to treat it as a counted repetition.
+        assert_eq!(
+            parser(r"\b{ ").parse().unwrap_err(),
+            TestError {
+                span: span(4..4),
+                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
+            }
+        );
+        // In this case, we got some valid chars that makes it look like the
+        // user is writing one of the special word boundary assertions, but
+        // we forget to close the brace.
+        assert_eq!(
+            parser(r"\b{foo").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..6),
+                kind: ast::ErrorKind::SpecialWordBoundaryUnclosed,
+            }
+        );
+        // We get the same error as above, except it is provoked by seeing a
+        // char that we know is invalid before seeing a closing brace.
+        assert_eq!(
+            parser(r"\b{foo!}").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..6),
+                kind: ast::ErrorKind::SpecialWordBoundaryUnclosed,
+            }
+        );
+        // And this one occurs when, syntactically, everything looks okay, but
+        // we don't use a valid spelling of a word boundary assertion.
+        assert_eq!(
+            parser(r"\b{foo}").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..6),
+                kind: ast::ErrorKind::SpecialWordBoundaryUnrecognized,
             }
         );
 
@@ -4494,15 +4690,15 @@ bar
         );
         assert_eq!(
             parser_octal(r"\778").parse(),
-            Ok(Ast::Concat(ast::Concat {
+            Ok(Ast::concat(ast::Concat {
                 span: span(0..4),
                 asts: vec![
-                    Ast::Literal(ast::Literal {
+                    Ast::literal(ast::Literal {
                         span: span(0..3),
                         kind: ast::LiteralKind::Octal,
                         c: '?',
                     }),
-                    Ast::Literal(ast::Literal {
+                    Ast::literal(ast::Literal {
                         span: span(3..4),
                         kind: ast::LiteralKind::Verbatim,
                         c: '8',
@@ -4512,15 +4708,15 @@ bar
         );
         assert_eq!(
             parser_octal(r"\7777").parse(),
-            Ok(Ast::Concat(ast::Concat {
+            Ok(Ast::concat(ast::Concat {
                 span: span(0..5),
                 asts: vec![
-                    Ast::Literal(ast::Literal {
+                    Ast::literal(ast::Literal {
                         span: span(0..4),
                         kind: ast::LiteralKind::Octal,
                         c: '\u{01FF}',
                     }),
-                    Ast::Literal(ast::Literal {
+                    Ast::literal(ast::Literal {
                         span: span(4..5),
                         kind: ast::LiteralKind::Verbatim,
                         c: '7',
@@ -4965,15 +5161,15 @@ bar
 
         assert_eq!(
             parser("[[:alnum:]]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..11),
                 negated: false,
                 kind: itemset(item_ascii(alnum(span(1..10), false))),
-            })))
+            }))
         );
         assert_eq!(
             parser("[[[:alnum:]]]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..13),
                 negated: false,
                 kind: itemset(item_bracket(ast::ClassBracketed {
@@ -4981,11 +5177,11 @@ bar
                     negated: false,
                     kind: itemset(item_ascii(alnum(span(2..11), false))),
                 })),
-            })))
+            }))
         );
         assert_eq!(
             parser("[[:alnum:]&&[:lower:]]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..22),
                 negated: false,
                 kind: intersection(
@@ -4993,11 +5189,11 @@ bar
                     itemset(item_ascii(alnum(span(1..10), false))),
                     itemset(item_ascii(lower(span(12..21), false))),
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser("[[:alnum:]--[:lower:]]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..22),
                 negated: false,
                 kind: difference(
@@ -5005,11 +5201,11 @@ bar
                     itemset(item_ascii(alnum(span(1..10), false))),
                     itemset(item_ascii(lower(span(12..21), false))),
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser("[[:alnum:]~~[:lower:]]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..22),
                 negated: false,
                 kind: symdifference(
@@ -5017,20 +5213,20 @@ bar
                     itemset(item_ascii(alnum(span(1..10), false))),
                     itemset(item_ascii(lower(span(12..21), false))),
                 ),
-            })))
+            }))
         );
 
         assert_eq!(
             parser("[a]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..3),
                 negated: false,
                 kind: itemset(lit(span(1..2), 'a')),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[a\]]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..5),
                 negated: false,
                 kind: union(
@@ -5044,11 +5240,11 @@ bar
                         }),
                     ]
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[a\-z]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..6),
                 negated: false,
                 kind: union(
@@ -5063,44 +5259,44 @@ bar
                         lit(span(4..5), 'z'),
                     ]
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser("[ab]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..4),
                 negated: false,
                 kind: union(
                     span(1..3),
                     vec![lit(span(1..2), 'a'), lit(span(2..3), 'b'),]
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser("[a-]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..4),
                 negated: false,
                 kind: union(
                     span(1..3),
                     vec![lit(span(1..2), 'a'), lit(span(2..3), '-'),]
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser("[-a]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..4),
                 negated: false,
                 kind: union(
                     span(1..3),
                     vec![lit(span(1..2), '-'), lit(span(2..3), 'a'),]
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[\pL]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..5),
                 negated: false,
                 kind: itemset(item_unicode(ast::ClassUnicode {
@@ -5108,11 +5304,11 @@ bar
                     negated: false,
                     kind: ast::ClassUnicodeKind::OneLetter('L'),
                 })),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[\w]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..4),
                 negated: false,
                 kind: itemset(item_perl(ast::ClassPerl {
@@ -5120,11 +5316,11 @@ bar
                     kind: ast::ClassPerlKind::Word,
                     negated: false,
                 })),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[a\wz]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..6),
                 negated: false,
                 kind: union(
@@ -5139,20 +5335,20 @@ bar
                         lit(span(4..5), 'z'),
                     ]
                 ),
-            })))
+            }))
         );
 
         assert_eq!(
             parser("[a-z]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..5),
                 negated: false,
                 kind: itemset(range(span(1..4), 'a', 'z')),
-            })))
+            }))
         );
         assert_eq!(
             parser("[a-cx-z]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..8),
                 negated: false,
                 kind: union(
@@ -5162,11 +5358,11 @@ bar
                         range(span(4..7), 'x', 'z'),
                     ]
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[\w&&a-cx-z]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..12),
                 negated: false,
                 kind: intersection(
@@ -5184,11 +5380,11 @@ bar
                         ]
                     ),
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[a-cx-z&&\w]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..12),
                 negated: false,
                 kind: intersection(
@@ -5206,11 +5402,11 @@ bar
                         negated: false,
                     })),
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[a--b--c]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..9),
                 negated: false,
                 kind: difference(
@@ -5222,11 +5418,11 @@ bar
                     ),
                     itemset(lit(span(7..8), 'c')),
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[a~~b~~c]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..9),
                 negated: false,
                 kind: symdifference(
@@ -5238,11 +5434,11 @@ bar
                     ),
                     itemset(lit(span(7..8), 'c')),
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[\^&&^]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..7),
                 negated: false,
                 kind: intersection(
@@ -5254,11 +5450,11 @@ bar
                     })),
                     itemset(lit(span(5..6), '^')),
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[\&&&&]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..7),
                 negated: false,
                 kind: intersection(
@@ -5270,11 +5466,11 @@ bar
                     })),
                     itemset(lit(span(5..6), '&')),
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[&&&&]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..6),
                 negated: false,
                 kind: intersection(
@@ -5286,13 +5482,13 @@ bar
                     ),
                     itemset(empty(span(5..5))),
                 ),
-            })))
+            }))
         );
 
         let pat = "[☃-⛄]";
         assert_eq!(
             parser(pat).parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span_range(pat, 0..9),
                 negated: false,
                 kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
@@ -5308,20 +5504,20 @@ bar
                         c: '⛄',
                     },
                 })),
-            })))
+            }))
         );
 
         assert_eq!(
             parser(r"[]]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..3),
                 negated: false,
                 kind: itemset(lit(span(1..2), ']')),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[]\[]").parse(),
-            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+            Ok(Ast::class_bracketed(ast::ClassBracketed {
                 span: span(0..5),
                 negated: false,
                 kind: union(
@@ -5335,14 +5531,14 @@ bar
                         }),
                     ]
                 ),
-            })))
+            }))
         );
         assert_eq!(
             parser(r"[\[]]").parse(),
             Ok(concat(
                 0..5,
                 vec![
-                    Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                    Ast::class_bracketed(ast::ClassBracketed {
                         span: span(0..4),
                         negated: false,
                         kind: itemset(ast::ClassSetItem::Literal(
@@ -5352,8 +5548,8 @@ bar
                                 c: '[',
                             }
                         )),
-                    })),
-                    Ast::Literal(ast::Literal {
+                    }),
+                    Ast::literal(ast::Literal {
                         span: span(4..5),
                         kind: ast::LiteralKind::Verbatim,
                         c: ']',
@@ -5914,15 +6110,15 @@ bar
 
         assert_eq!(
             parser(r"\pNz").parse(),
-            Ok(Ast::Concat(ast::Concat {
+            Ok(Ast::concat(ast::Concat {
                 span: span(0..4),
                 asts: vec![
-                    Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
+                    Ast::class_unicode(ast::ClassUnicode {
                         span: span(0..3),
                         negated: false,
                         kind: ast::ClassUnicodeKind::OneLetter('N'),
-                    })),
-                    Ast::Literal(ast::Literal {
+                    }),
+                    Ast::literal(ast::Literal {
                         span: span(3..4),
                         kind: ast::LiteralKind::Verbatim,
                         c: 'z',
@@ -5932,15 +6128,15 @@ bar
         );
         assert_eq!(
             parser(r"\p{Greek}z").parse(),
-            Ok(Ast::Concat(ast::Concat {
+            Ok(Ast::concat(ast::Concat {
                 span: span(0..10),
                 asts: vec![
-                    Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
+                    Ast::class_unicode(ast::ClassUnicode {
                         span: span(0..9),
                         negated: false,
                         kind: ast::ClassUnicodeKind::Named(s("Greek")),
-                    })),
-                    Ast::Literal(ast::Literal {
+                    }),
+                    Ast::literal(ast::Literal {
                         span: span(9..10),
                         kind: ast::LiteralKind::Verbatim,
                         c: 'z',
@@ -6017,23 +6213,23 @@ bar
 
         assert_eq!(
             parser(r"\d").parse(),
-            Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl {
+            Ok(Ast::class_perl(ast::ClassPerl {
                 span: span(0..2),
                 kind: ast::ClassPerlKind::Digit,
                 negated: false,
-            })))
+            }))
         );
         assert_eq!(
             parser(r"\dz").parse(),
-            Ok(Ast::Concat(ast::Concat {
+            Ok(Ast::concat(ast::Concat {
                 span: span(0..3),
                 asts: vec![
-                    Ast::Class(ast::Class::Perl(ast::ClassPerl {
+                    Ast::class_perl(ast::ClassPerl {
                         span: span(0..2),
                         kind: ast::ClassPerlKind::Digit,
                         negated: false,
-                    })),
-                    Ast::Literal(ast::Literal {
+                    }),
+                    Ast::literal(ast::Literal {
                         span: span(2..3),
                         kind: ast::LiteralKind::Verbatim,
                         c: 'z',
diff --git a/vendor/regex-syntax/src/ast/print.rs b/vendor/regex-syntax/src/ast/print.rs
index 86a87e143..1ceb3c7fa 100644
--- a/vendor/regex-syntax/src/ast/print.rs
+++ b/vendor/regex-syntax/src/ast/print.rs
@@ -80,27 +80,21 @@ impl<W: fmt::Write> Visitor for Writer<W> {
     fn visit_pre(&mut self, ast: &Ast) -> fmt::Result {
         match *ast {
             Ast::Group(ref x) => self.fmt_group_pre(x),
-            Ast::Class(ast::Class::Bracketed(ref x)) => {
-                self.fmt_class_bracketed_pre(x)
-            }
+            Ast::ClassBracketed(ref x) => self.fmt_class_bracketed_pre(x),
             _ => Ok(()),
         }
     }
 
     fn visit_post(&mut self, ast: &Ast) -> fmt::Result {
-        use crate::ast::Class;
-
         match *ast {
             Ast::Empty(_) => Ok(()),
             Ast::Flags(ref x) => self.fmt_set_flags(x),
             Ast::Literal(ref x) => self.fmt_literal(x),
             Ast::Dot(_) => self.wtr.write_str("."),
             Ast::Assertion(ref x) => self.fmt_assertion(x),
-            Ast::Class(Class::Perl(ref x)) => self.fmt_class_perl(x),
-            Ast::Class(Class::Unicode(ref x)) => self.fmt_class_unicode(x),
-            Ast::Class(Class::Bracketed(ref x)) => {
-                self.fmt_class_bracketed_post(x)
-            }
+            Ast::ClassPerl(ref x) => self.fmt_class_perl(x),
+            Ast::ClassUnicode(ref x) => self.fmt_class_unicode(x),
+            Ast::ClassBracketed(ref x) => self.fmt_class_bracketed_post(x),
             Ast::Repetition(ref x) => self.fmt_repetition(x),
             Ast::Group(ref x) => self.fmt_group_post(x),
             Ast::Alternation(_) => Ok(()),
@@ -267,6 +261,12 @@ impl<W: fmt::Write> Writer<W> {
             EndText => self.wtr.write_str(r"\z"),
             WordBoundary => self.wtr.write_str(r"\b"),
             NotWordBoundary => self.wtr.write_str(r"\B"),
+            WordBoundaryStart => self.wtr.write_str(r"\b{start}"),
+            WordBoundaryEnd => self.wtr.write_str(r"\b{end}"),
+            WordBoundaryStartAngle => self.wtr.write_str(r"\<"),
+            WordBoundaryEndAngle => self.wtr.write_str(r"\>"),
+            WordBoundaryStartHalf => self.wtr.write_str(r"\b{start-half}"),
+            WordBoundaryEndHalf => self.wtr.write_str(r"\b{end-half}"),
         }
     }
 
diff --git a/vendor/regex-syntax/src/ast/visitor.rs b/vendor/regex-syntax/src/ast/visitor.rs
index ab136739e..c1bb24d97 100644
--- a/vendor/regex-syntax/src/ast/visitor.rs
+++ b/vendor/regex-syntax/src/ast/visitor.rs
@@ -48,6 +48,11 @@ pub trait Visitor {
         Ok(())
     }
 
+    /// This method is called between child nodes of a concatenation.
+    fn visit_concat_in(&mut self) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
     /// This method is called on every [`ClassSetItem`](ast::ClassSetItem)
     /// before descending into child nodes.
     fn visit_class_set_item_pre(
@@ -228,8 +233,14 @@ impl<'a> HeapVisitor<'a> {
                 // If this is a concat/alternate, then we might have additional
                 // inductive steps to process.
                 if let Some(x) = self.pop(frame) {
-                    if let Frame::Alternation { .. } = x {
-                        visitor.visit_alternation_in()?;
+                    match x {
+                        Frame::Alternation { .. } => {
+                            visitor.visit_alternation_in()?;
+                        }
+                        Frame::Concat { .. } => {
+                            visitor.visit_concat_in()?;
+                        }
+                        _ => {}
                     }
                     ast = x.child();
                     self.stack.push((post_ast, x));
@@ -253,7 +264,7 @@ impl<'a> HeapVisitor<'a> {
         visitor: &mut V,
     ) -> Result<Option<Frame<'a>>, V::Err> {
         Ok(match *ast {
-            Ast::Class(ast::Class::Bracketed(ref x)) => {
+            Ast::ClassBracketed(ref x) => {
                 self.visit_class(x, visitor)?;
                 None
             }
diff --git a/vendor/regex-syntax/src/hir/literal.rs b/vendor/regex-syntax/src/hir/literal.rs
index bd3a2d143..a5a3737f6 100644
--- a/vendor/regex-syntax/src/hir/literal.rs
+++ b/vendor/regex-syntax/src/hir/literal.rs
@@ -23,7 +23,7 @@ effective literal optimizations:
 to lead to substring search that is only a little faster than a regex search,
 and thus the overhead of using literal optimizations in the first place might
 make things slower overall.
-* The literals in your [`Seq`] shoudn't be too short. In general, longer is
+* The literals in your [`Seq`] shouldn't be too short. In general, longer is
 better. A sequence corresponding to single bytes that occur frequently in the
 haystack, for example, is probably a bad literal optimization because it's
 likely to produce many false positive candidates. Longer literals are less
@@ -51,7 +51,7 @@ the "trickier" parts are how to combine literal sequences, and that is all
 implemented on [`Seq`].
 */
 
-use core::{cmp, mem};
+use core::{cmp, mem, num::NonZeroUsize};
 
 use alloc::{vec, vec::Vec};
 
@@ -477,7 +477,7 @@ impl Extractor {
                 }
                 seq
             }
-            hir::Repetition { min, max: Some(max), .. } if min < max => {
+            hir::Repetition { min, .. } => {
                 assert!(min > 0); // handled above
                 let limit =
                     u32::try_from(self.limit_repeat).unwrap_or(u32::MAX);
@@ -491,10 +491,6 @@ impl Extractor {
                 seq.make_inexact();
                 seq
             }
-            hir::Repetition { .. } => {
-                subseq.make_inexact();
-                subseq
-            }
         }
     }
 
@@ -692,7 +688,7 @@ impl Default for ExtractKind {
 /// from making assumptions about what literals are required in order to match
 /// a particular [`Hir`] expression. Generally speaking, when a set is in this
 /// state, literal optimizations are inhibited. A good example of a regex that
-/// will cause this sort of set to apppear is `[A-Za-z]`. The character class
+/// will cause this sort of set to appear is `[A-Za-z]`. The character class
 /// is just too big (and also too narrow) to be usefully expanded into 52
 /// different literals. (Note that the decision for when a seq should become
 /// infinite is determined by the caller. A seq itself has no hard-coded
@@ -1571,7 +1567,7 @@ impl Seq {
     /// unioning `self` with `other`. If either set is infinite, then this
     /// returns `None`.
     #[inline]
-    fn max_union_len(&self, other: &Seq) -> Option<usize> {
+    pub fn max_union_len(&self, other: &Seq) -> Option<usize> {
         let len1 = self.len()?;
         let len2 = other.len()?;
         Some(len1.saturating_add(len2))
@@ -1581,7 +1577,7 @@ impl Seq {
     /// cross product of `self` with `other`. If either set is infinite, then
     /// this returns `None`.
     #[inline]
-    fn max_cross_len(&self, other: &Seq) -> Option<usize> {
+    pub fn max_cross_len(&self, other: &Seq) -> Option<usize> {
         let len1 = self.len()?;
         let len2 = other.len()?;
         Some(len1.saturating_mul(len2))
@@ -1841,6 +1837,14 @@ impl Seq {
             None => return,
             Some(len) => len,
         };
+        // Just give up now if our sequence contains an empty string.
+        if self.min_literal_len().map_or(false, |len| len == 0) {
+            // We squash the sequence so that nobody else gets any bright
+            // ideas to try and use it. An empty string implies a match at
+            // every position. A prefilter cannot help you here.
+            self.make_infinite();
+            return;
+        }
         // Make sure we start with the smallest sequence possible. We use a
         // special version of preference minimization that retains exactness.
         // This is legal because optimization is only expected to occur once
@@ -1910,34 +1914,41 @@ impl Seq {
                 // longest common prefix to be subject to the poison check.
             }
         }
-        // Everything below this check is more-or-less about trying to
-        // heuristically reduce the false positive rate of a prefilter. But
-        // if our sequence is completely exact, then it's possible the regex
-        // engine can be skipped entirely. In this case, the false positive
-        // rate is zero because every literal match corresponds to a regex
-        // match.
+        // If we have an exact sequence, we *probably* just want to keep it
+        // as-is. But there are some cases where we don't. So we save a copy of
+        // the exact sequence now, and then try to do some more optimizations
+        // below. If those don't work out, we go back to this exact sequence.
         //
-        // This is OK even if the sequence contains a poison literal. Remember,
-        // a literal is only poisononous because of what we assume about its
-        // impact on the false positive rate. However, we do still check for
-        // an empty string. Empty strings are weird and it's best to let the
-        // regex engine handle those.
+        // The specific motivation for this is that we sometimes wind up with
+        // an exact sequence with a hefty number of literals. Say, 100. If we
+        // stuck with that, it would be too big for Teddy and would result in
+        // using Aho-Corasick. Which is fine... but the lazy DFA is plenty
+        // suitable in such cases. The real issue is that we will wind up not
+        // using a fast prefilter at all. So in cases like this, even though
+        // we have an exact sequence, it would be better to try and shrink the
+        // sequence (which we do below) and use it as a prefilter that can
+        // produce false positive matches.
         //
-        // We do currently do this check after the longest common prefix (or
-        // suffix) check, under the theory that single-substring search is so
-        // fast that we want that even if we'd end up turning an exact sequence
-        // into an inexact one. But this might be wrong...
-        if self.is_exact()
-            && self.min_literal_len().map_or(false, |len| len > 0)
-        {
-            return;
-        }
+        // But if the shrinking below results in a sequence that "sucks," then
+        // we don't want to use that because we already have an exact sequence
+        // in hand.
+        let exact: Option<Seq> =
+            if self.is_exact() { Some(self.clone()) } else { None };
         // Now we attempt to shorten the sequence. The idea here is that we
         // don't want to look for too many literals, but we want to shorten
         // our sequence enough to improve our odds of using better algorithms
         // downstream (such as Teddy).
+        //
+        // The pair of numbers in this list corresponds to the maximal prefix
+        // (in bytes) to keep for all literals and the length of the sequence
+        // at which to do it.
+        //
+        // So for example, the pair (3, 500) would mean, "if we have more than
+        // 500 literals in our sequence, then truncate all of our literals
+        // such that they are at most 3 bytes in length and the minimize the
+        // sequence."
         const ATTEMPTS: [(usize, usize); 5] =
-            [(5, 64), (4, 64), (3, 64), (2, 64), (1, 10)];
+            [(5, 10), (4, 10), (3, 64), (2, 64), (1, 10)];
         for (keep, limit) in ATTEMPTS {
             let len = match self.len() {
                 None => break,
@@ -1951,7 +1962,11 @@ impl Seq {
             } else {
                 self.keep_last_bytes(keep);
             }
-            self.minimize_by_preference();
+            if prefix {
+                if let Some(ref mut lits) = self.literals {
+                    PreferenceTrie::minimize(lits, true);
+                }
+            }
         }
         // Check for a poison literal. A poison literal is one that is short
         // and is believed to have a very high match count. These poisons
@@ -1968,6 +1983,30 @@ impl Seq {
                 self.make_infinite();
             }
         }
+        // OK, if we had an exact sequence before attempting more optimizations
+        // above and our post-optimized sequence sucks for some reason or
+        // another, then we go back to the exact sequence.
+        if let Some(exact) = exact {
+            // If optimizing resulted in dropping our literals, then certainly
+            // backup and use the exact sequence that we had.
+            if !self.is_finite() {
+                *self = exact;
+                return;
+            }
+            // If our optimized sequence contains a short literal, then it's
+            // *probably* not so great. So throw it away and revert to the
+            // exact sequence.
+            if self.min_literal_len().map_or(true, |len| len <= 2) {
+                *self = exact;
+                return;
+            }
+            // Finally, if our optimized sequence is "big" (i.e., can't use
+            // Teddy), then also don't use it and rely on the exact sequence.
+            if self.len().map_or(true, |len| len > 64) {
+                *self = exact;
+                return;
+            }
+        }
     }
 }
 
@@ -1977,7 +2016,7 @@ impl core::fmt::Debug for Seq {
         if let Some(lits) = self.literals() {
             f.debug_list().entries(lits.iter()).finish()
         } else {
-            write!(f, "[∅]")
+            write!(f, "[∞]")
         }
     }
 }
@@ -2160,12 +2199,19 @@ impl core::fmt::Debug for Literal {
 /// never seen this show up on a profile. Because of the heuristic limits
 /// imposed on literal extractions, the size of the inputs here is usually
 /// very small.)
-#[derive(Debug, Default)]
+#[derive(Debug)]
 struct PreferenceTrie {
     /// The states in this trie. The index of a state in this vector is its ID.
     states: Vec<State>,
+    /// This vec indicates which states are match states. It always has
+    /// the same length as `states` and is indexed by the same state ID.
+    /// A state with identifier `sid` is a match state if and only if
+    /// `matches[sid].is_some()`. The option contains the index of the literal
+    /// corresponding to the match. The index is offset by 1 so that it fits in
+    /// a NonZeroUsize.
+    matches: Vec<Option<NonZeroUsize>>,
     /// The index to allocate to the next literal added to this trie. Starts at
-    /// 0 and increments by 1 for every literal successfully added to the trie.
+    /// 1 and increments by 1 for every literal successfully added to the trie.
     next_literal_index: usize,
 }
 
@@ -2176,9 +2222,6 @@ struct State {
     /// are sorted by byte. There is at most one such transition for any
     /// particular byte.
     trans: Vec<(u8, usize)>,
-    /// Whether this is a matching state or not. If it is, then it contains the
-    /// index to the matching literal.
-    literal_index: Option<usize>,
 }
 
 impl PreferenceTrie {
@@ -2192,20 +2235,19 @@ impl PreferenceTrie {
     /// after them and because any removed literals are guaranteed to never
     /// match.
     fn minimize(literals: &mut Vec<Literal>, keep_exact: bool) {
-        use core::cell::RefCell;
-
-        // MSRV(1.61): Use retain_mut here to avoid interior mutability.
-        let trie = RefCell::new(PreferenceTrie::default());
+        let mut trie = PreferenceTrie {
+            states: vec![],
+            matches: vec![],
+            next_literal_index: 1,
+        };
         let mut make_inexact = vec![];
-        literals.retain(|lit| {
-            match trie.borrow_mut().insert(lit.as_bytes()) {
-                Ok(_) => true,
-                Err(i) => {
-                    if !keep_exact {
-                        make_inexact.push(i);
-                    }
-                    false
+        literals.retain_mut(|lit| match trie.insert(lit.as_bytes()) {
+            Ok(_) => true,
+            Err(i) => {
+                if !keep_exact {
+                    make_inexact.push(i.checked_sub(1).unwrap());
                 }
+                false
             }
         });
         for i in make_inexact {
@@ -2225,15 +2267,15 @@ impl PreferenceTrie {
     /// search.
     fn insert(&mut self, bytes: &[u8]) -> Result<usize, usize> {
         let mut prev = self.root();
-        if let Some(idx) = self.states[prev].literal_index {
-            return Err(idx);
+        if let Some(idx) = self.matches[prev] {
+            return Err(idx.get());
         }
         for &b in bytes.iter() {
             match self.states[prev].trans.binary_search_by_key(&b, |t| t.0) {
                 Ok(i) => {
                     prev = self.states[prev].trans[i].1;
-                    if let Some(idx) = self.states[prev].literal_index {
-                        return Err(idx);
+                    if let Some(idx) = self.matches[prev] {
+                        return Err(idx.get());
                     }
                 }
                 Err(i) => {
@@ -2245,7 +2287,7 @@ impl PreferenceTrie {
         }
         let idx = self.next_literal_index;
         self.next_literal_index += 1;
-        self.states[prev].literal_index = Some(idx);
+        self.matches[prev] = NonZeroUsize::new(idx);
         Ok(idx)
     }
 
@@ -2262,6 +2304,7 @@ impl PreferenceTrie {
     fn create_state(&mut self) -> usize {
         let id = self.states.len();
         self.states.push(State::default());
+        self.matches.push(None);
         id
     }
 }
@@ -2603,6 +2646,12 @@ mod tests {
             ]),
             e(r"(ab|cd)(ef|gh)(ij|kl)")
         );
+
+        assert_eq!(inexact([E("abab")], [E("abab")]), e(r"(ab){2}"));
+
+        assert_eq!(inexact([I("abab")], [I("abab")]), e(r"(ab){2,3}"));
+
+        assert_eq!(inexact([I("abab")], [I("abab")]), e(r"(ab){2,}"));
     }
 
     #[test]
@@ -2815,13 +2864,13 @@ mod tests {
     // repeats.
     #[test]
     fn crazy_repeats() {
-        assert_eq!(inexact([I("")], [I("")]), e(r"(?:){4294967295}"));
+        assert_eq!(inexact([E("")], [E("")]), e(r"(?:){4294967295}"));
         assert_eq!(
-            inexact([I("")], [I("")]),
+            inexact([E("")], [E("")]),
             e(r"(?:){64}{64}{64}{64}{64}{64}")
         );
-        assert_eq!(inexact([I("")], [I("")]), e(r"x{0}{4294967295}"));
-        assert_eq!(inexact([I("")], [I("")]), e(r"(?:|){4294967295}"));
+        assert_eq!(inexact([E("")], [E("")]), e(r"x{0}{4294967295}"));
+        assert_eq!(inexact([E("")], [E("")]), e(r"(?:|){4294967295}"));
 
         assert_eq!(
             inexact([E("")], [E("")]),
diff --git a/vendor/regex-syntax/src/hir/mod.rs b/vendor/regex-syntax/src/hir/mod.rs
index 062d4dcab..ce38ead7b 100644
--- a/vendor/regex-syntax/src/hir/mod.rs
+++ b/vendor/regex-syntax/src/hir/mod.rs
@@ -88,6 +88,9 @@ pub enum ErrorKind {
     /// This error occurs when translating a pattern that could match a byte
     /// sequence that isn't UTF-8 and `utf8` was enabled.
     InvalidUtf8,
+    /// This error occurs when one uses a non-ASCII byte for a line terminator,
+    /// but where Unicode mode is enabled and UTF-8 mode is disabled.
+    InvalidLineTerminator,
     /// This occurs when an unrecognized Unicode property name could not
     /// be found.
     UnicodePropertyNotFound,
@@ -120,6 +123,7 @@ impl core::fmt::Display for ErrorKind {
         let msg = match *self {
             UnicodeNotAllowed => "Unicode not allowed here",
             InvalidUtf8 => "pattern can match invalid UTF-8",
+            InvalidLineTerminator => "invalid line terminator, must be ASCII",
             UnicodePropertyNotFound => "Unicode property not found",
             UnicodePropertyValueNotFound => "Unicode property value not found",
             UnicodePerlClassNotFound => {
@@ -180,7 +184,7 @@ impl core::fmt::Display for ErrorKind {
 /// matches.
 ///
 /// For empty matches, those can occur at any position. It is the
-/// repsonsibility of the regex engine to determine whether empty matches are
+/// responsibility of the regex engine to determine whether empty matches are
 /// permitted between the code units of a single codepoint.
 ///
 /// # Stack space
@@ -355,7 +359,13 @@ impl Hir {
 
     /// Creates a repetition HIR expression.
     #[inline]
-    pub fn repetition(rep: Repetition) -> Hir {
+    pub fn repetition(mut rep: Repetition) -> Hir {
+        // If the sub-expression of a repetition can only match the empty
+        // string, then we force its maximum to be at most 1.
+        if rep.sub.properties().maximum_len() == Some(0) {
+            rep.min = cmp::min(rep.min, 1);
+            rep.max = rep.max.map(|n| cmp::min(n, 1)).or(Some(1));
+        }
         // The regex 'a{0}' is always equivalent to the empty regex. This is
         // true even when 'a' is an expression that never matches anything
         // (like '\P{any}').
@@ -547,7 +557,7 @@ impl Hir {
         // We rebuild the alternation by simplifying it. We proceed similarly
         // as the concatenation case. But in this case, there's no literal
         // simplification happening. We're just flattening alternations.
-        let mut new = vec![];
+        let mut new = Vec::with_capacity(subs.len());
         for sub in subs {
             let (kind, props) = sub.into_parts();
             match kind {
@@ -642,6 +652,12 @@ impl Hir {
                 cls.push(ClassBytesRange::new(b'\0', b'\xFF'));
                 Hir::class(Class::Bytes(cls))
             }
+            Dot::AnyCharExcept(ch) => {
+                let mut cls =
+                    ClassUnicode::new([ClassUnicodeRange::new(ch, ch)]);
+                cls.negate();
+                Hir::class(Class::Unicode(cls))
+            }
             Dot::AnyCharExceptLF => {
                 let mut cls = ClassUnicode::empty();
                 cls.push(ClassUnicodeRange::new('\0', '\x09'));
@@ -655,6 +671,12 @@ impl Hir {
                 cls.push(ClassUnicodeRange::new('\x0E', '\u{10FFFF}'));
                 Hir::class(Class::Unicode(cls))
             }
+            Dot::AnyByteExcept(byte) => {
+                let mut cls =
+                    ClassBytes::new([ClassBytesRange::new(byte, byte)]);
+                cls.negate();
+                Hir::class(Class::Bytes(cls))
+            }
             Dot::AnyByteExceptLF => {
                 let mut cls = ClassBytes::empty();
                 cls.push(ClassBytesRange::new(b'\0', b'\x09'));
@@ -775,13 +797,18 @@ impl core::fmt::Debug for Literal {
 /// The high-level intermediate representation of a character class.
 ///
 /// A character class corresponds to a set of characters. A character is either
-/// defined by a Unicode scalar value or a byte. Unicode characters are used
-/// by default, while bytes are used when Unicode mode (via the `u` flag) is
-/// disabled.
+/// defined by a Unicode scalar value or a byte.
 ///
 /// A character class, regardless of its character type, is represented by a
 /// sequence of non-overlapping non-adjacent ranges of characters.
 ///
+/// There are no guarantees about which class variant is used. Generally
+/// speaking, the Unicode variat is used whenever a class needs to contain
+/// non-ASCII Unicode scalar values. But the Unicode variant can be used even
+/// when Unicode mode is disabled. For example, at the time of writing, the
+/// regex `(?-u:a|\xc2\xa0)` will compile down to HIR for the Unicode class
+/// `[a\u00A0]` due to optimizations.
+///
 /// Note that `Bytes` variant may be produced even when it exclusively matches
 /// valid UTF-8. This is because a `Bytes` variant represents an intention by
 /// the author of the regular expression to disable Unicode mode, which in turn
@@ -1304,8 +1331,9 @@ impl ClassUnicodeRange {
     }
 }
 
-/// A set of characters represented by arbitrary bytes (where one byte
-/// corresponds to one character).
+/// A set of characters represented by arbitrary bytes.
+///
+/// Each byte corresponds to one character.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct ClassBytes {
     set: IntervalSet<ClassBytesRange>,
@@ -1607,6 +1635,42 @@ pub enum Look {
     WordUnicode = 1 << 8,
     /// Match a Unicode-aware negation of a word boundary.
     WordUnicodeNegate = 1 << 9,
+    /// Match the start of an ASCII-only word boundary. That is, this matches a
+    /// position at either the beginning of the haystack or where the previous
+    /// character is not a word character and the following character is a word
+    /// character.
+    WordStartAscii = 1 << 10,
+    /// Match the end of an ASCII-only word boundary. That is, this matches
+    /// a position at either the end of the haystack or where the previous
+    /// character is a word character and the following character is not a word
+    /// character.
+    WordEndAscii = 1 << 11,
+    /// Match the start of a Unicode word boundary. That is, this matches a
+    /// position at either the beginning of the haystack or where the previous
+    /// character is not a word character and the following character is a word
+    /// character.
+    WordStartUnicode = 1 << 12,
+    /// Match the end of a Unicode word boundary. That is, this matches a
+    /// position at either the end of the haystack or where the previous
+    /// character is a word character and the following character is not a word
+    /// character.
+    WordEndUnicode = 1 << 13,
+    /// Match the start half of an ASCII-only word boundary. That is, this
+    /// matches a position at either the beginning of the haystack or where the
+    /// previous character is not a word character.
+    WordStartHalfAscii = 1 << 14,
+    /// Match the end half of an ASCII-only word boundary. That is, this
+    /// matches a position at either the end of the haystack or where the
+    /// following character is not a word character.
+    WordEndHalfAscii = 1 << 15,
+    /// Match the start half of a Unicode word boundary. That is, this matches
+    /// a position at either the beginning of the haystack or where the
+    /// previous character is not a word character.
+    WordStartHalfUnicode = 1 << 16,
+    /// Match the end half of a Unicode word boundary. That is, this matches
+    /// a position at either the end of the haystack or where the following
+    /// character is not a word character.
+    WordEndHalfUnicode = 1 << 17,
 }
 
 impl Look {
@@ -1628,6 +1692,14 @@ impl Look {
             Look::WordAsciiNegate => Look::WordAsciiNegate,
             Look::WordUnicode => Look::WordUnicode,
             Look::WordUnicodeNegate => Look::WordUnicodeNegate,
+            Look::WordStartAscii => Look::WordEndAscii,
+            Look::WordEndAscii => Look::WordStartAscii,
+            Look::WordStartUnicode => Look::WordEndUnicode,
+            Look::WordEndUnicode => Look::WordStartUnicode,
+            Look::WordStartHalfAscii => Look::WordEndHalfAscii,
+            Look::WordEndHalfAscii => Look::WordStartHalfAscii,
+            Look::WordStartHalfUnicode => Look::WordEndHalfUnicode,
+            Look::WordEndHalfUnicode => Look::WordStartHalfUnicode,
         }
     }
 
@@ -1636,28 +1708,36 @@ impl Look {
     /// constructor is guaranteed to return the same look-around variant that
     /// one started with within a semver compatible release of this crate.
     #[inline]
-    pub const fn as_repr(self) -> u16 {
+    pub const fn as_repr(self) -> u32 {
         // AFAIK, 'as' is the only way to zero-cost convert an int enum to an
         // actual int.
-        self as u16
+        self as u32
     }
 
     /// Given the underlying representation of a `Look` value, return the
     /// corresponding `Look` value if the representation is valid. Otherwise
     /// `None` is returned.
     #[inline]
-    pub const fn from_repr(repr: u16) -> Option<Look> {
+    pub const fn from_repr(repr: u32) -> Option<Look> {
         match repr {
-            0b00_0000_0001 => Some(Look::Start),
-            0b00_0000_0010 => Some(Look::End),
-            0b00_0000_0100 => Some(Look::StartLF),
-            0b00_0000_1000 => Some(Look::EndLF),
-            0b00_0001_0000 => Some(Look::StartCRLF),
-            0b00_0010_0000 => Some(Look::EndCRLF),
-            0b00_0100_0000 => Some(Look::WordAscii),
-            0b00_1000_0000 => Some(Look::WordAsciiNegate),
-            0b01_0000_0000 => Some(Look::WordUnicode),
-            0b10_0000_0000 => Some(Look::WordUnicodeNegate),
+            0b00_0000_0000_0000_0001 => Some(Look::Start),
+            0b00_0000_0000_0000_0010 => Some(Look::End),
+            0b00_0000_0000_0000_0100 => Some(Look::StartLF),
+            0b00_0000_0000_0000_1000 => Some(Look::EndLF),
+            0b00_0000_0000_0001_0000 => Some(Look::StartCRLF),
+            0b00_0000_0000_0010_0000 => Some(Look::EndCRLF),
+            0b00_0000_0000_0100_0000 => Some(Look::WordAscii),
+            0b00_0000_0000_1000_0000 => Some(Look::WordAsciiNegate),
+            0b00_0000_0001_0000_0000 => Some(Look::WordUnicode),
+            0b00_0000_0010_0000_0000 => Some(Look::WordUnicodeNegate),
+            0b00_0000_0100_0000_0000 => Some(Look::WordStartAscii),
+            0b00_0000_1000_0000_0000 => Some(Look::WordEndAscii),
+            0b00_0001_0000_0000_0000 => Some(Look::WordStartUnicode),
+            0b00_0010_0000_0000_0000 => Some(Look::WordEndUnicode),
+            0b00_0100_0000_0000_0000 => Some(Look::WordStartHalfAscii),
+            0b00_1000_0000_0000_0000 => Some(Look::WordEndHalfAscii),
+            0b01_0000_0000_0000_0000 => Some(Look::WordStartHalfUnicode),
+            0b10_0000_0000_0000_0000 => Some(Look::WordEndHalfUnicode),
             _ => None,
         }
     }
@@ -1682,6 +1762,14 @@ impl Look {
             Look::WordAsciiNegate => 'B',
             Look::WordUnicode => '𝛃',
             Look::WordUnicodeNegate => '𝚩',
+            Look::WordStartAscii => '<',
+            Look::WordEndAscii => '>',
+            Look::WordStartUnicode => '〈',
+            Look::WordEndUnicode => '〉',
+            Look::WordStartHalfAscii => '◁',
+            Look::WordEndHalfAscii => '▷',
+            Look::WordStartHalfUnicode => '◀',
+            Look::WordEndHalfUnicode => '▶',
         }
     }
 }
@@ -1766,6 +1854,18 @@ pub enum Dot {
     ///
     /// This is equivalent to `(?s-u:.)` and also `(?-u:[\x00-\xFF])`.
     AnyByte,
+    /// Matches the UTF-8 encoding of any Unicode scalar value except for the
+    /// `char` given.
+    ///
+    /// This is equivalent to using `(?u-s:.)` with the line terminator set
+    /// to a particular ASCII byte. (Because of peculiarities in the regex
+    /// engines, a line terminator must be a single byte. It follows that when
+    /// UTF-8 mode is enabled, this single byte must also be a Unicode scalar
+    /// value. That is, ti must be ASCII.)
+    ///
+    /// (This and `AnyCharExceptLF` both exist because of legacy reasons.
+    /// `AnyCharExceptLF` will be dropped in the next breaking change release.)
+    AnyCharExcept(char),
     /// Matches the UTF-8 encoding of any Unicode scalar value except for `\n`.
     ///
     /// This is equivalent to `(?u-s:.)` and also `[\p{any}--\n]`.
@@ -1775,6 +1875,17 @@ pub enum Dot {
     ///
     /// This is equivalent to `(?uR-s:.)` and also `[\p{any}--\r\n]`.
     AnyCharExceptCRLF,
+    /// Matches any byte value except for the `u8` given.
+    ///
+    /// This is equivalent to using `(?-us:.)` with the line terminator set
+    /// to a particular ASCII byte. (Because of peculiarities in the regex
+    /// engines, a line terminator must be a single byte. It follows that when
+    /// UTF-8 mode is enabled, this single byte must also be a Unicode scalar
+    /// value. That is, ti must be ASCII.)
+    ///
+    /// (This and `AnyByteExceptLF` both exist because of legacy reasons.
+    /// `AnyByteExceptLF` will be dropped in the next breaking change release.)
+    AnyByteExcept(u8),
     /// Matches any byte value except for `\n`.
     ///
     /// This is equivalent to `(?-su:.)` and also `(?-u:[[\x00-\xFF]--\n])`.
@@ -2410,10 +2521,10 @@ impl Properties {
             inner.look_set_prefix = p.look_set_prefix();
             inner.look_set_suffix = p.look_set_suffix();
         }
-        // If the static captures len of the sub-expression is not known or is
-        // zero, then it automatically propagates to the repetition, regardless
-        // of the repetition. Otherwise, it might change, but only when the
-        // repetition can match 0 times.
+        // If the static captures len of the sub-expression is not known or
+        // is greater than zero, then it automatically propagates to the
+        // repetition, regardless of the repetition. Otherwise, it might
+        // change, but only when the repetition can match 0 times.
         if rep.min == 0
             && inner.static_explicit_captures_len.map_or(false, |len| len > 0)
         {
@@ -2549,7 +2660,7 @@ pub struct LookSet {
     /// range of `u16` values to be represented. For example, even if the
     /// current implementation only makes use of the 10 least significant bits,
     /// it may use more bits in a future semver compatible release.
-    pub bits: u16,
+    pub bits: u32,
 }
 
 impl LookSet {
@@ -2652,13 +2763,22 @@ impl LookSet {
     pub fn contains_word_unicode(self) -> bool {
         self.contains(Look::WordUnicode)
             || self.contains(Look::WordUnicodeNegate)
+            || self.contains(Look::WordStartUnicode)
+            || self.contains(Look::WordEndUnicode)
+            || self.contains(Look::WordStartHalfUnicode)
+            || self.contains(Look::WordEndHalfUnicode)
     }
 
     /// Returns true if and only if this set contains any ASCII word boundary
     /// or negated ASCII word boundary assertions.
     #[inline]
     pub fn contains_word_ascii(self) -> bool {
-        self.contains(Look::WordAscii) || self.contains(Look::WordAsciiNegate)
+        self.contains(Look::WordAscii)
+            || self.contains(Look::WordAsciiNegate)
+            || self.contains(Look::WordStartAscii)
+            || self.contains(Look::WordEndAscii)
+            || self.contains(Look::WordStartHalfAscii)
+            || self.contains(Look::WordEndHalfAscii)
     }
 
     /// Returns an iterator over all of the look-around assertions in this set.
@@ -2737,29 +2857,31 @@ impl LookSet {
         *self = self.intersect(other);
     }
 
-    /// Return a `LookSet` from the slice given as a native endian 16-bit
+    /// Return a `LookSet` from the slice given as a native endian 32-bit
     /// integer.
     ///
     /// # Panics
     ///
-    /// This panics if `slice.len() < 2`.
+    /// This panics if `slice.len() < 4`.
     #[inline]
     pub fn read_repr(slice: &[u8]) -> LookSet {
-        let bits = u16::from_ne_bytes(slice[..2].try_into().unwrap());
+        let bits = u32::from_ne_bytes(slice[..4].try_into().unwrap());
         LookSet { bits }
     }
 
-    /// Write a `LookSet` as a native endian 16-bit integer to the beginning
+    /// Write a `LookSet` as a native endian 32-bit integer to the beginning
     /// of the slice given.
     ///
     /// # Panics
     ///
-    /// This panics if `slice.len() < 2`.
+    /// This panics if `slice.len() < 4`.
     #[inline]
     pub fn write_repr(self, slice: &mut [u8]) {
         let raw = self.bits.to_ne_bytes();
         slice[0] = raw[0];
         slice[1] = raw[1];
+        slice[2] = raw[2];
+        slice[3] = raw[3];
     }
 }
 
@@ -2792,9 +2914,9 @@ impl Iterator for LookSetIter {
             return None;
         }
         // We'll never have more than u8::MAX distinct look-around assertions,
-        // so 'repr' will always fit into a u16.
-        let repr = u16::try_from(self.set.bits.trailing_zeros()).unwrap();
-        let look = Look::from_repr(1 << repr)?;
+        // so 'bit' will always fit into a u16.
+        let bit = u16::try_from(self.set.bits.trailing_zeros()).unwrap();
+        let look = Look::from_repr(1 << bit)?;
         self.set = self.set.remove(look);
         Some(look)
     }
@@ -3716,7 +3838,7 @@ mod tests {
         assert_eq!(0, set.iter().count());
 
         let set = LookSet::full();
-        assert_eq!(10, set.iter().count());
+        assert_eq!(18, set.iter().count());
 
         let set =
             LookSet::empty().insert(Look::StartLF).insert(Look::WordUnicode);
@@ -3734,6 +3856,6 @@ mod tests {
         let res = format!("{:?}", LookSet::empty());
         assert_eq!("∅", res);
         let res = format!("{:?}", LookSet::full());
-        assert_eq!("Az^$rRbB𝛃𝚩", res);
+        assert_eq!("Az^$rRbB𝛃𝚩<>〈〉◁▷◀▶", res);
     }
 }
diff --git a/vendor/regex-syntax/src/hir/print.rs b/vendor/regex-syntax/src/hir/print.rs
index fcb7cd252..dfa6d4032 100644
--- a/vendor/regex-syntax/src/hir/print.rs
+++ b/vendor/regex-syntax/src/hir/print.rs
@@ -89,9 +89,16 @@ impl<W: fmt::Write> Visitor for Writer<W> {
 
     fn visit_pre(&mut self, hir: &Hir) -> fmt::Result {
         match *hir.kind() {
-            // Empty is represented by nothing in the concrete syntax, and
-            // repetition operators are strictly suffix oriented.
-            HirKind::Empty | HirKind::Repetition(_) => {}
+            HirKind::Empty => {
+                // Technically an empty sub-expression could be "printed" by
+                // just ignoring it, but in practice, you could have a
+                // repetition operator attached to an empty expression, and you
+                // really need something in the concrete syntax to make that
+                // work as you'd expect.
+                self.wtr.write_str(r"(?:)")?;
+            }
+            // Repetition operators are strictly suffix oriented.
+            HirKind::Repetition(_) => {}
             HirKind::Literal(hir::Literal(ref bytes)) => {
                 // See the comment on the 'Concat' and 'Alternation' case below
                 // for why we put parens here. Literals are, conceptually,
@@ -195,6 +202,30 @@ impl<W: fmt::Write> Visitor for Writer<W> {
                 hir::Look::WordUnicodeNegate => {
                     self.wtr.write_str(r"\B")?;
                 }
+                hir::Look::WordStartAscii => {
+                    self.wtr.write_str(r"(?-u:\b{start})")?;
+                }
+                hir::Look::WordEndAscii => {
+                    self.wtr.write_str(r"(?-u:\b{end})")?;
+                }
+                hir::Look::WordStartUnicode => {
+                    self.wtr.write_str(r"\b{start}")?;
+                }
+                hir::Look::WordEndUnicode => {
+                    self.wtr.write_str(r"\b{end}")?;
+                }
+                hir::Look::WordStartHalfAscii => {
+                    self.wtr.write_str(r"(?-u:\b{start-half})")?;
+                }
+                hir::Look::WordEndHalfAscii => {
+                    self.wtr.write_str(r"(?-u:\b{end-half})")?;
+                }
+                hir::Look::WordStartHalfUnicode => {
+                    self.wtr.write_str(r"\b{start-half}")?;
+                }
+                hir::Look::WordEndHalfUnicode => {
+                    self.wtr.write_str(r"\b{end-half}")?;
+                }
             },
             HirKind::Capture(hir::Capture { ref name, .. }) => {
                 self.wtr.write_str("(")?;
@@ -424,20 +455,20 @@ mod tests {
         // Test that various zero-length repetitions always translate to an
         // empty regex. This is more a property of HIR's smart constructors
         // than the printer though.
-        roundtrip("a{0}", "");
-        roundtrip("(?:ab){0}", "");
+        roundtrip("a{0}", "(?:)");
+        roundtrip("(?:ab){0}", "(?:)");
         #[cfg(feature = "unicode-gencat")]
         {
-            roundtrip(r"\p{any}{0}", "");
-            roundtrip(r"\P{any}{0}", "");
+            roundtrip(r"\p{any}{0}", "(?:)");
+            roundtrip(r"\P{any}{0}", "(?:)");
         }
     }
 
     #[test]
     fn print_group() {
-        roundtrip("()", "()");
-        roundtrip("(?P<foo>)", "(?P<foo>)");
-        roundtrip("(?:)", "");
+        roundtrip("()", "((?:))");
+        roundtrip("(?P<foo>)", "(?P<foo>(?:))");
+        roundtrip("(?:)", "(?:)");
 
         roundtrip("(a)", "(a)");
         roundtrip("(?P<foo>a)", "(?P<foo>a)");
@@ -448,8 +479,8 @@ mod tests {
 
     #[test]
     fn print_alternation() {
-        roundtrip("|", "(?:|)");
-        roundtrip("||", "(?:||)");
+        roundtrip("|", "(?:(?:)|(?:))");
+        roundtrip("||", "(?:(?:)|(?:)|(?:))");
 
         roundtrip("a|b", "[ab]");
         roundtrip("ab|cd", "(?:(?:ab)|(?:cd))");
@@ -503,7 +534,7 @@ mod tests {
             }),
             Hir::look(hir::Look::End),
         ]);
-        assert_eq!(r"(?:\A(?:\A\z)+\z)", expr.to_string());
+        assert_eq!(r"(?:\A\A\z\z)", expr.to_string());
     }
 
     // Just like regression_repetition_concat, but with the repetition using
@@ -540,7 +571,7 @@ mod tests {
             }),
             Hir::look(hir::Look::End),
         ]);
-        assert_eq!(r"(?:\A(?:\A|\z)+\z)", expr.to_string());
+        assert_eq!(r"(?:\A(?:\A|\z)\z)", expr.to_string());
     }
 
     // This regression test is very similar in flavor to
diff --git a/vendor/regex-syntax/src/hir/translate.rs b/vendor/regex-syntax/src/hir/translate.rs
index ff9c5ee91..313a1e9e8 100644
--- a/vendor/regex-syntax/src/hir/translate.rs
+++ b/vendor/regex-syntax/src/hir/translate.rs
@@ -19,6 +19,7 @@ type Result<T> = core::result::Result<T, Error>;
 #[derive(Clone, Debug)]
 pub struct TranslatorBuilder {
     utf8: bool,
+    line_terminator: u8,
     flags: Flags,
 }
 
@@ -31,7 +32,11 @@ impl Default for TranslatorBuilder {
 impl TranslatorBuilder {
     /// Create a new translator builder with a default c onfiguration.
     pub fn new() -> TranslatorBuilder {
-        TranslatorBuilder { utf8: true, flags: Flags::default() }
+        TranslatorBuilder {
+            utf8: true,
+            line_terminator: b'\n',
+            flags: Flags::default(),
+        }
     }
 
     /// Build a translator using the current configuration.
@@ -40,6 +45,7 @@ impl TranslatorBuilder {
             stack: RefCell::new(vec![]),
             flags: Cell::new(self.flags),
             utf8: self.utf8,
+            line_terminator: self.line_terminator,
         }
     }
 
@@ -63,6 +69,31 @@ impl TranslatorBuilder {
         self
     }
 
+    /// Sets the line terminator for use with `(?u-s:.)` and `(?-us:.)`.
+    ///
+    /// Namely, instead of `.` (by default) matching everything except for `\n`,
+    /// this will cause `.` to match everything except for the byte given.
+    ///
+    /// If `.` is used in a context where Unicode mode is enabled and this byte
+    /// isn't ASCII, then an error will be returned. When Unicode mode is
+    /// disabled, then any byte is permitted, but will return an error if UTF-8
+    /// mode is enabled and it is a non-ASCII byte.
+    ///
+    /// In short, any ASCII value for a line terminator is always okay. But a
+    /// non-ASCII byte might result in an error depending on whether Unicode
+    /// mode or UTF-8 mode are enabled.
+    ///
+    /// Note that if `R` mode is enabled then it always takes precedence and
+    /// the line terminator will be treated as `\r` and `\n` simultaneously.
+    ///
+    /// Note also that this *doesn't* impact the look-around assertions
+    /// `(?m:^)` and `(?m:$)`. That's usually controlled by additional
+    /// configuration in the regex engine itself.
+    pub fn line_terminator(&mut self, byte: u8) -> &mut TranslatorBuilder {
+        self.line_terminator = byte;
+        self
+    }
+
     /// Enable or disable the case insensitive flag (`i`) by default.
     pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
         self.flags.case_insensitive = if yes { Some(true) } else { None };
@@ -120,6 +151,8 @@ pub struct Translator {
     flags: Cell<Flags>,
     /// Whether we're allowed to produce HIR that can match arbitrary bytes.
     utf8: bool,
+    /// The line terminator to use for `.`.
+    line_terminator: u8,
 }
 
 impl Translator {
@@ -304,7 +337,7 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
 
     fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
         match *ast {
-            Ast::Class(ast::Class::Bracketed(_)) => {
+            Ast::ClassBracketed(_) => {
                 if self.flags().unicode() {
                     let cls = hir::ClassUnicode::empty();
                     self.push(HirFrame::ClassUnicode(cls));
@@ -321,14 +354,14 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
                     .unwrap_or_else(|| self.flags());
                 self.push(HirFrame::Group { old_flags });
             }
-            Ast::Concat(ref x) if x.asts.is_empty() => {}
             Ast::Concat(_) => {
                 self.push(HirFrame::Concat);
             }
-            Ast::Alternation(ref x) if x.asts.is_empty() => {}
-            Ast::Alternation(_) => {
+            Ast::Alternation(ref x) => {
                 self.push(HirFrame::Alternation);
-                self.push(HirFrame::AlternationBranch);
+                if !x.asts.is_empty() {
+                    self.push(HirFrame::AlternationBranch);
+                }
             }
             _ => {}
         }
@@ -353,29 +386,20 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
                 // consistency sake.
                 self.push(HirFrame::Expr(Hir::empty()));
             }
-            Ast::Literal(ref x) => {
-                match self.ast_literal_to_scalar(x)? {
-                    Either::Right(byte) => self.push_byte(byte),
-                    Either::Left(ch) => {
-                        if !self.flags().unicode() && ch.len_utf8() > 1 {
-                            return Err(self
-                                .error(x.span, ErrorKind::UnicodeNotAllowed));
-                        }
-                        match self.case_fold_char(x.span, ch)? {
-                            None => self.push_char(ch),
-                            Some(expr) => self.push(HirFrame::Expr(expr)),
-                        }
-                    }
-                }
-                // self.push(HirFrame::Expr(self.hir_literal(x)?));
-            }
-            Ast::Dot(span) => {
-                self.push(HirFrame::Expr(self.hir_dot(span)?));
+            Ast::Literal(ref x) => match self.ast_literal_to_scalar(x)? {
+                Either::Right(byte) => self.push_byte(byte),
+                Either::Left(ch) => match self.case_fold_char(x.span, ch)? {
+                    None => self.push_char(ch),
+                    Some(expr) => self.push(HirFrame::Expr(expr)),
+                },
+            },
+            Ast::Dot(ref span) => {
+                self.push(HirFrame::Expr(self.hir_dot(**span)?));
             }
             Ast::Assertion(ref x) => {
                 self.push(HirFrame::Expr(self.hir_assertion(x)?));
             }
-            Ast::Class(ast::Class::Perl(ref x)) => {
+            Ast::ClassPerl(ref x) => {
                 if self.flags().unicode() {
                     let cls = self.hir_perl_unicode_class(x)?;
                     let hcls = hir::Class::Unicode(cls);
@@ -386,11 +410,11 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
                     self.push(HirFrame::Expr(Hir::class(hcls)));
                 }
             }
-            Ast::Class(ast::Class::Unicode(ref x)) => {
+            Ast::ClassUnicode(ref x) => {
                 let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
                 self.push(HirFrame::Expr(Hir::class(cls)));
             }
-            Ast::Class(ast::Class::Bracketed(ref ast)) => {
+            Ast::ClassBracketed(ref ast) => {
                 if self.flags().unicode() {
                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
                     self.unicode_fold_and_negate(
@@ -841,8 +865,8 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
             })?;
             Ok(Some(Hir::class(hir::Class::Unicode(cls))))
         } else {
-            if c.len_utf8() > 1 {
-                return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
+            if !c.is_ascii() {
+                return Ok(None);
             }
             // If case folding won't do anything, then don't bother trying.
             match c {
@@ -862,10 +886,38 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
     }
 
     fn hir_dot(&self, span: Span) -> Result<Hir> {
-        if !self.flags().unicode() && self.trans().utf8 {
+        let (utf8, lineterm, flags) =
+            (self.trans().utf8, self.trans().line_terminator, self.flags());
+        if utf8 && (!flags.unicode() || !lineterm.is_ascii()) {
             return Err(self.error(span, ErrorKind::InvalidUtf8));
         }
-        Ok(Hir::dot(self.flags().dot()))
+        let dot = if flags.dot_matches_new_line() {
+            if flags.unicode() {
+                hir::Dot::AnyChar
+            } else {
+                hir::Dot::AnyByte
+            }
+        } else {
+            if flags.unicode() {
+                if flags.crlf() {
+                    hir::Dot::AnyCharExceptCRLF
+                } else {
+                    if !lineterm.is_ascii() {
+                        return Err(
+                            self.error(span, ErrorKind::InvalidLineTerminator)
+                        );
+                    }
+                    hir::Dot::AnyCharExcept(char::from(lineterm))
+                }
+            } else {
+                if flags.crlf() {
+                    hir::Dot::AnyByteExceptCRLF
+                } else {
+                    hir::Dot::AnyByteExcept(lineterm)
+                }
+            }
+        };
+        Ok(Hir::dot(dot))
     }
 
     fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
@@ -903,6 +955,34 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
             } else {
                 hir::Look::WordAsciiNegate
             }),
+            ast::AssertionKind::WordBoundaryStart
+            | ast::AssertionKind::WordBoundaryStartAngle => {
+                Hir::look(if unicode {
+                    hir::Look::WordStartUnicode
+                } else {
+                    hir::Look::WordStartAscii
+                })
+            }
+            ast::AssertionKind::WordBoundaryEnd
+            | ast::AssertionKind::WordBoundaryEndAngle => {
+                Hir::look(if unicode {
+                    hir::Look::WordEndUnicode
+                } else {
+                    hir::Look::WordEndAscii
+                })
+            }
+            ast::AssertionKind::WordBoundaryStartHalf => {
+                Hir::look(if unicode {
+                    hir::Look::WordStartHalfUnicode
+                } else {
+                    hir::Look::WordStartHalfAscii
+                })
+            }
+            ast::AssertionKind::WordBoundaryEndHalf => Hir::look(if unicode {
+                hir::Look::WordEndHalfUnicode
+            } else {
+                hir::Look::WordEndHalfAscii
+            }),
         })
     }
 
@@ -1124,9 +1204,8 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
         match self.ast_literal_to_scalar(ast)? {
             Either::Right(byte) => Ok(byte),
             Either::Left(ch) => {
-                let cp = u32::from(ch);
-                if cp <= 0x7F {
-                    Ok(u8::try_from(cp).unwrap())
+                if ch.is_ascii() {
+                    Ok(u8::try_from(ch).unwrap())
                 } else {
                     // We can't feasibly support Unicode in
                     // byte oriented classes. Byte classes don't
@@ -1209,30 +1288,6 @@ impl Flags {
         }
     }
 
-    fn dot(&self) -> hir::Dot {
-        if self.dot_matches_new_line() {
-            if self.unicode() {
-                hir::Dot::AnyChar
-            } else {
-                hir::Dot::AnyByte
-            }
-        } else {
-            if self.unicode() {
-                if self.crlf() {
-                    hir::Dot::AnyCharExceptCRLF
-                } else {
-                    hir::Dot::AnyCharExceptLF
-                }
-            } else {
-                if self.crlf() {
-                    hir::Dot::AnyByteExceptCRLF
-                } else {
-                    hir::Dot::AnyByteExceptLF
-                }
-            }
-        }
-    }
-
     fn case_insensitive(&self) -> bool {
         self.case_insensitive.unwrap_or(false)
     }
@@ -1598,16 +1653,7 @@ mod tests {
         assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
         assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));
 
-        assert_eq!(
-            t_err("(?-u)☃"),
-            TestError {
-                kind: hir::ErrorKind::UnicodeNotAllowed,
-                span: Span::new(
-                    Position::new(5, 1, 6),
-                    Position::new(8, 1, 7)
-                ),
-            }
-        );
+        assert_eq!(t("(?-u)☃"), hir_lit("☃"));
         assert_eq!(
             t_err(r"(?-u)\xFF"),
             TestError {
@@ -1685,16 +1731,7 @@ mod tests {
         );
         assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));
 
-        assert_eq!(
-            t_err("(?i-u)β"),
-            TestError {
-                kind: hir::ErrorKind::UnicodeNotAllowed,
-                span: Span::new(
-                    Position::new(6, 1, 7),
-                    Position::new(8, 1, 8),
-                ),
-            }
-        );
+        assert_eq!(t("(?i-u)β"), hir_lit("β"),);
     }
 
     #[test]
@@ -3489,6 +3526,15 @@ mod tests {
         assert!(!props(r"(?:z|xx)@|xx").is_alternation_literal());
     }
 
+    // This tests that the smart Hir::repetition constructors does some basic
+    // simplifications.
+    #[test]
+    fn smart_repetition() {
+        assert_eq!(t(r"a{0}"), Hir::empty());
+        assert_eq!(t(r"a{1}"), hir_lit("a"));
+        assert_eq!(t(r"\B{32111}"), hir_look(hir::Look::WordUnicodeNegate));
+    }
+
     // This tests that the smart Hir::concat constructor simplifies the given
     // exprs in a way we expect.
     #[test]
@@ -3580,4 +3626,99 @@ mod tests {
             ]),
         );
     }
+
+    #[test]
+    fn regression_alt_empty_concat() {
+        use crate::ast::{self, Ast};
+
+        let span = Span::splat(Position::new(0, 0, 0));
+        let ast = Ast::alternation(ast::Alternation {
+            span,
+            asts: vec![Ast::concat(ast::Concat { span, asts: vec![] })],
+        });
+
+        let mut t = Translator::new();
+        assert_eq!(Ok(Hir::empty()), t.translate("", &ast));
+    }
+
+    #[test]
+    fn regression_empty_alt() {
+        use crate::ast::{self, Ast};
+
+        let span = Span::splat(Position::new(0, 0, 0));
+        let ast = Ast::concat(ast::Concat {
+            span,
+            asts: vec![Ast::alternation(ast::Alternation {
+                span,
+                asts: vec![],
+            })],
+        });
+
+        let mut t = Translator::new();
+        assert_eq!(Ok(Hir::fail()), t.translate("", &ast));
+    }
+
+    #[test]
+    fn regression_singleton_alt() {
+        use crate::{
+            ast::{self, Ast},
+            hir::Dot,
+        };
+
+        let span = Span::splat(Position::new(0, 0, 0));
+        let ast = Ast::concat(ast::Concat {
+            span,
+            asts: vec![Ast::alternation(ast::Alternation {
+                span,
+                asts: vec![Ast::dot(span)],
+            })],
+        });
+
+        let mut t = Translator::new();
+        assert_eq!(Ok(Hir::dot(Dot::AnyCharExceptLF)), t.translate("", &ast));
+    }
+
+    // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63168
+    #[test]
+    fn regression_fuzz_match() {
+        let pat = "[(\u{6} \0-\u{afdf5}]  \0 ";
+        let ast = ParserBuilder::new()
+            .octal(false)
+            .ignore_whitespace(true)
+            .build()
+            .parse(pat)
+            .unwrap();
+        let hir = TranslatorBuilder::new()
+            .utf8(true)
+            .case_insensitive(false)
+            .multi_line(false)
+            .dot_matches_new_line(false)
+            .swap_greed(true)
+            .unicode(true)
+            .build()
+            .translate(pat, &ast)
+            .unwrap();
+        assert_eq!(
+            hir,
+            Hir::concat(vec![
+                hir_uclass(&[('\0', '\u{afdf5}')]),
+                hir_lit("\0"),
+            ])
+        );
+    }
+
+    // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63155
+    #[cfg(feature = "unicode")]
+    #[test]
+    fn regression_fuzz_difference1() {
+        let pat = r"\W\W|\W[^\v--\W\W\P{Script_Extensions:Pau_Cin_Hau}\u10A1A1-\U{3E3E3}--~~~~--~~~~~~~~------~~~~~~--~~~~~~]*";
+        let _ = t(pat); // shouldn't panic
+    }
+
+    // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63153
+    #[test]
+    fn regression_fuzz_char_decrement1() {
+        let pat = "w[w[^w?\rw\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\r\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0w?\rw[^w?\rw[^w?\rw[^w\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0\0\0\0\0\0\0\0*\0\0\u{1}\0]\0\0-*\0][^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0\0\0\0\0\0\0\0x\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\0\0\0*??\0\u{7f}{2}\u{10}??\0\0\0\0\0\0\0\0\0\u{3}\0\0\0}\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\0\u{1}\0]\0\u{1}\u{1}H-i]-]\0\0\0\0\u{1}\0]\0\0\0\u{1}\0]\0\0-*\0\0\0\0\u{1}9-\u{7f}]\0'|-\u{7f}]\0'|(?i-ux)[-\u{7f}]\0'\u{3}\0\0\0}\0-*\0]<D\0\0\0\0\0\0\u{1}]\0\0\0\0]\0\0-*\0]\0\0 ";
+        let _ = t(pat); // shouldn't panic
+    }
 }
diff --git a/vendor/regex-syntax/src/hir/visitor.rs b/vendor/regex-syntax/src/hir/visitor.rs
index e5f15cf1c..f30f0a163 100644
--- a/vendor/regex-syntax/src/hir/visitor.rs
+++ b/vendor/regex-syntax/src/hir/visitor.rs
@@ -41,6 +41,11 @@ pub trait Visitor {
     fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
         Ok(())
     }
+
+    /// This method is called between child nodes of a concatenation.
+    fn visit_concat_in(&mut self) -> Result<(), Self::Err> {
+        Ok(())
+    }
 }
 
 /// Executes an implementation of `Visitor` in constant stack space.
@@ -131,8 +136,14 @@ impl<'a> HeapVisitor<'a> {
                 // If this is a concat/alternate, then we might have additional
                 // inductive steps to process.
                 if let Some(x) = self.pop(frame) {
-                    if let Frame::Alternation { .. } = x {
-                        visitor.visit_alternation_in()?;
+                    match x {
+                        Frame::Alternation { .. } => {
+                            visitor.visit_alternation_in()?;
+                        }
+                        Frame::Concat { .. } => {
+                            visitor.visit_concat_in()?;
+                        }
+                        _ => {}
                     }
                     hir = x.child();
                     self.stack.push((post_hir, x));
diff --git a/vendor/regex-syntax/src/lib.rs b/vendor/regex-syntax/src/lib.rs
index 754858900..20f25db71 100644
--- a/vendor/regex-syntax/src/lib.rs
+++ b/vendor/regex-syntax/src/lib.rs
@@ -157,6 +157,11 @@ The following features are available:
   [Unicode text segmentation algorithms](https://www.unicode.org/reports/tr29/).
   This enables using classes like `\p{gcb=Extend}`, `\p{wb=Katakana}` and
   `\p{sb=ATerm}`.
+* **arbitrary** -
+  Enabling this feature introduces a public dependency on the
+  [`arbitrary`](https://crates.io/crates/arbitrary)
+  crate. Namely, it implements the `Arbitrary` trait from that crate for the
+  [`Ast`](crate::ast::Ast) type. This feature is disabled by default.
 */
 
 #![no_std]
@@ -317,6 +322,9 @@ pub fn is_escapeable_character(c: char) -> bool {
         // escapeable, \< and \> will result in a parse error. Thus, we can
         // turn them into something else in the future without it being a
         // backwards incompatible change.
+        //
+        // OK, now we support \< and \>, and we need to retain them as *not*
+        // escapeable here since the escape sequence is significant.
         '<' | '>' => false,
         _ => true,
     }
@@ -364,7 +372,7 @@ pub fn try_is_word_character(
 /// Returns true if and only if the given character is an ASCII word character.
 ///
 /// An ASCII word character is defined by the following character class:
-/// `[_0-9a-zA-Z]'.
+/// `[_0-9a-zA-Z]`.
 pub fn is_word_byte(c: u8) -> bool {
     match c {
         b'_' | b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' => true,
diff --git a/vendor/regex-syntax/src/parser.rs b/vendor/regex-syntax/src/parser.rs
index 2e7a2bb80..f482b8466 100644
--- a/vendor/regex-syntax/src/parser.rs
+++ b/vendor/regex-syntax/src/parser.rs
@@ -165,6 +165,31 @@ impl ParserBuilder {
         self
     }
 
+    /// Sets the line terminator for use with `(?u-s:.)` and `(?-us:.)`.
+    ///
+    /// Namely, instead of `.` (by default) matching everything except for `\n`,
+    /// this will cause `.` to match everything except for the byte given.
+    ///
+    /// If `.` is used in a context where Unicode mode is enabled and this byte
+    /// isn't ASCII, then an error will be returned. When Unicode mode is
+    /// disabled, then any byte is permitted, but will return an error if UTF-8
+    /// mode is enabled and it is a non-ASCII byte.
+    ///
+    /// In short, any ASCII value for a line terminator is always okay. But a
+    /// non-ASCII byte might result in an error depending on whether Unicode
+    /// mode or UTF-8 mode are enabled.
+    ///
+    /// Note that if `R` mode is enabled then it always takes precedence and
+    /// the line terminator will be treated as `\r` and `\n` simultaneously.
+    ///
+    /// Note also that this *doesn't* impact the look-around assertions
+    /// `(?m:^)` and `(?m:$)`. That's usually controlled by additional
+    /// configuration in the regex engine itself.
+    pub fn line_terminator(&mut self, byte: u8) -> &mut ParserBuilder {
+        self.hir.line_terminator(byte);
+        self
+    }
+
     /// Enable or disable the "swap greed" flag by default.
     ///
     /// By default this is disabled. It may alternatively be selectively
diff --git a/vendor/regex-syntax/test b/vendor/regex-syntax/test
index a4d6cfaba..8626c3bfc 100755
--- a/vendor/regex-syntax/test
+++ b/vendor/regex-syntax/test
@@ -2,6 +2,10 @@
 
 set -e
 
+# cd to the directory containing this crate's Cargo.toml so that we don't need
+# to pass --manifest-path to every `cargo` command.
+cd "$(dirname "$0")"
+
 # This is a convenience script for running a broad swath of the syntax tests.
 echo "===== DEFAULT FEATURES ==="
 cargo test
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-06-19 09:26:03 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-06-19 09:26:03 +0000
commit	9918693037dce8aa4bb6f08741b6812923486c18 (patch)
tree	21d2b40bec7e6a7ea664acee056eb3d08e15a1cf /vendor/regex-syntax
parent	Releasing progress-linux version 1.75.0+dfsg1-5~progress7.99u1. (diff)
download	rustc-9918693037dce8aa4bb6f08741b6812923486c18.tar.xz rustc-9918693037dce8aa4bb6f08741b6812923486c18.zip