13 files changed, 700 insertions, 227 deletions
diff --git a/vendor/regex/src/compile.rs b/vendor/regex/src/compile.rs
index 90ca25015..23e63ec89 100644
--- a/vendor/regex/src/compile.rs
+++ b/vendor/regex/src/compile.rs
@@ -4,7 +4,7 @@ use std::iter;
 use std::result;
 use std::sync::Arc;
 
-use regex_syntax::hir::{self, Hir};
+use regex_syntax::hir::{self, Hir, Look};
 use regex_syntax::is_word_byte;
 use regex_syntax::utf8::{Utf8Range, Utf8Sequence, Utf8Sequences};
 
@@ -137,13 +137,24 @@ impl Compiler {
     }
 
     fn compile_one(mut self, expr: &Hir) -> result::Result<Program, Error> {
+        if self.compiled.only_utf8
+            && expr.properties().look_set().contains(Look::WordAsciiNegate)
+        {
+            return Err(Error::Syntax(
+                "ASCII-only \\B is not allowed in Unicode regexes \
+                 because it may result in invalid UTF-8 matches"
+                    .to_string(),
+            ));
+        }
         // If we're compiling a forward DFA and we aren't anchored, then
         // add a `.*?` before the first capture group.
         // Other matching engines handle this by baking the logic into the
         // matching engine itself.
         let mut dotstar_patch = Patch { hole: Hole::None, entry: 0 };
-        self.compiled.is_anchored_start = expr.is_anchored_start();
-        self.compiled.is_anchored_end = expr.is_anchored_end();
+        self.compiled.is_anchored_start =
+            expr.properties().look_set_prefix().contains(Look::Start);
+        self.compiled.is_anchored_end =
+            expr.properties().look_set_suffix().contains(Look::End);
         if self.compiled.needs_dotstar() {
             dotstar_patch = self.c_dotstar()?;
             self.compiled.start = dotstar_patch.entry;
@@ -159,6 +170,8 @@ impl Compiler {
         self.fill_to_next(patch.hole);
         self.compiled.matches = vec![self.insts.len()];
         self.push_compiled(Inst::Match(0));
+        self.compiled.static_captures_len =
+            expr.properties().static_explicit_captures_len();
         self.compile_finish()
     }
 
@@ -168,10 +181,12 @@ impl Compiler {
     ) -> result::Result<Program, Error> {
         debug_assert!(exprs.len() > 1);
 
-        self.compiled.is_anchored_start =
-            exprs.iter().all(|e| e.is_anchored_start());
-        self.compiled.is_anchored_end =
-            exprs.iter().all(|e| e.is_anchored_end());
+        self.compiled.is_anchored_start = exprs
+            .iter()
+            .all(|e| e.properties().look_set_prefix().contains(Look::Start));
+        self.compiled.is_anchored_end = exprs
+            .iter()
+            .all(|e| e.properties().look_set_suffix().contains(Look::End));
         let mut dotstar_patch = Patch { hole: Hole::None, entry: 0 };
         if self.compiled.needs_dotstar() {
             dotstar_patch = self.c_dotstar()?;
@@ -272,17 +287,21 @@ impl Compiler {
         self.check_size()?;
         match *expr.kind() {
             Empty => self.c_empty(),
-            Literal(hir::Literal::Unicode(c)) => self.c_char(c),
-            Literal(hir::Literal::Byte(b)) => {
-                assert!(self.compiled.uses_bytes());
-                self.c_byte(b)
+            Literal(hir::Literal(ref bytes)) => {
+                if self.compiled.is_reverse {
+                    let mut bytes = bytes.to_vec();
+                    bytes.reverse();
+                    self.c_literal(&bytes)
+                } else {
+                    self.c_literal(bytes)
+                }
             }
             Class(hir::Class::Unicode(ref cls)) => self.c_class(cls.ranges()),
             Class(hir::Class::Bytes(ref cls)) => {
                 if self.compiled.uses_bytes() {
                     self.c_class_bytes(cls.ranges())
                 } else {
-                    assert!(cls.is_all_ascii());
+                    assert!(cls.is_ascii());
                     let mut char_ranges = vec![];
                     for r in cls.iter() {
                         let (s, e) = (r.start() as char, r.end() as char);
@@ -291,92 +310,94 @@ impl Compiler {
                     self.c_class(&char_ranges)
                 }
             }
-            Anchor(hir::Anchor::StartLine) if self.compiled.is_reverse => {
-                self.byte_classes.set_range(b'\n', b'\n');
-                self.c_empty_look(prog::EmptyLook::EndLine)
-            }
-            Anchor(hir::Anchor::StartLine) => {
-                self.byte_classes.set_range(b'\n', b'\n');
-                self.c_empty_look(prog::EmptyLook::StartLine)
-            }
-            Anchor(hir::Anchor::EndLine) if self.compiled.is_reverse => {
-                self.byte_classes.set_range(b'\n', b'\n');
-                self.c_empty_look(prog::EmptyLook::StartLine)
-            }
-            Anchor(hir::Anchor::EndLine) => {
-                self.byte_classes.set_range(b'\n', b'\n');
-                self.c_empty_look(prog::EmptyLook::EndLine)
-            }
-            Anchor(hir::Anchor::StartText) if self.compiled.is_reverse => {
-                self.c_empty_look(prog::EmptyLook::EndText)
-            }
-            Anchor(hir::Anchor::StartText) => {
-                self.c_empty_look(prog::EmptyLook::StartText)
-            }
-            Anchor(hir::Anchor::EndText) if self.compiled.is_reverse => {
-                self.c_empty_look(prog::EmptyLook::StartText)
-            }
-            Anchor(hir::Anchor::EndText) => {
-                self.c_empty_look(prog::EmptyLook::EndText)
-            }
-            WordBoundary(hir::WordBoundary::Unicode) => {
-                if !cfg!(feature = "unicode-perl") {
-                    return Err(Error::Syntax(
-                        "Unicode word boundaries are unavailable when \
-                         the unicode-perl feature is disabled"
-                            .to_string(),
-                    ));
+            Look(ref look) => match *look {
+                hir::Look::Start if self.compiled.is_reverse => {
+                    self.c_empty_look(prog::EmptyLook::EndText)
                 }
-                self.compiled.has_unicode_word_boundary = true;
-                self.byte_classes.set_word_boundary();
-                // We also make sure that all ASCII bytes are in a different
-                // class from non-ASCII bytes. Otherwise, it's possible for
-                // ASCII bytes to get lumped into the same class as non-ASCII
-                // bytes. This in turn may cause the lazy DFA to falsely start
-                // when it sees an ASCII byte that maps to a byte class with
-                // non-ASCII bytes. This ensures that never happens.
-                self.byte_classes.set_range(0, 0x7F);
-                self.c_empty_look(prog::EmptyLook::WordBoundary)
-            }
-            WordBoundary(hir::WordBoundary::UnicodeNegate) => {
-                if !cfg!(feature = "unicode-perl") {
+                hir::Look::Start => {
+                    self.c_empty_look(prog::EmptyLook::StartText)
+                }
+                hir::Look::End if self.compiled.is_reverse => {
+                    self.c_empty_look(prog::EmptyLook::StartText)
+                }
+                hir::Look::End => self.c_empty_look(prog::EmptyLook::EndText),
+                hir::Look::StartLF if self.compiled.is_reverse => {
+                    self.byte_classes.set_range(b'\n', b'\n');
+                    self.c_empty_look(prog::EmptyLook::EndLine)
+                }
+                hir::Look::StartLF => {
+                    self.byte_classes.set_range(b'\n', b'\n');
+                    self.c_empty_look(prog::EmptyLook::StartLine)
+                }
+                hir::Look::EndLF if self.compiled.is_reverse => {
+                    self.byte_classes.set_range(b'\n', b'\n');
+                    self.c_empty_look(prog::EmptyLook::StartLine)
+                }
+                hir::Look::EndLF => {
+                    self.byte_classes.set_range(b'\n', b'\n');
+                    self.c_empty_look(prog::EmptyLook::EndLine)
+                }
+                hir::Look::StartCRLF | hir::Look::EndCRLF => {
                     return Err(Error::Syntax(
-                        "Unicode word boundaries are unavailable when \
-                         the unicode-perl feature is disabled"
+                        "CRLF-aware line anchors are not supported yet"
                             .to_string(),
                     ));
                 }
-                self.compiled.has_unicode_word_boundary = true;
-                self.byte_classes.set_word_boundary();
-                // See comments above for why we set the ASCII range here.
-                self.byte_classes.set_range(0, 0x7F);
-                self.c_empty_look(prog::EmptyLook::NotWordBoundary)
-            }
-            WordBoundary(hir::WordBoundary::Ascii) => {
-                self.byte_classes.set_word_boundary();
-                self.c_empty_look(prog::EmptyLook::WordBoundaryAscii)
-            }
-            WordBoundary(hir::WordBoundary::AsciiNegate) => {
-                self.byte_classes.set_word_boundary();
-                self.c_empty_look(prog::EmptyLook::NotWordBoundaryAscii)
-            }
-            Group(ref g) => match g.kind {
-                hir::GroupKind::NonCapturing => self.c(&g.hir),
-                hir::GroupKind::CaptureIndex(index) => {
-                    if index as usize >= self.compiled.captures.len() {
-                        self.compiled.captures.push(None);
+                hir::Look::WordAscii => {
+                    self.byte_classes.set_word_boundary();
+                    self.c_empty_look(prog::EmptyLook::WordBoundaryAscii)
+                }
+                hir::Look::WordAsciiNegate => {
+                    self.byte_classes.set_word_boundary();
+                    self.c_empty_look(prog::EmptyLook::NotWordBoundaryAscii)
+                }
+                hir::Look::WordUnicode => {
+                    if !cfg!(feature = "unicode-perl") {
+                        return Err(Error::Syntax(
+                            "Unicode word boundaries are unavailable when \
+                         the unicode-perl feature is disabled"
+                                .to_string(),
+                        ));
                     }
-                    self.c_capture(2 * index as usize, &g.hir)
+                    self.compiled.has_unicode_word_boundary = true;
+                    self.byte_classes.set_word_boundary();
+                    // We also make sure that all ASCII bytes are in a different
+                    // class from non-ASCII bytes. Otherwise, it's possible for
+                    // ASCII bytes to get lumped into the same class as non-ASCII
+                    // bytes. This in turn may cause the lazy DFA to falsely start
+                    // when it sees an ASCII byte that maps to a byte class with
+                    // non-ASCII bytes. This ensures that never happens.
+                    self.byte_classes.set_range(0, 0x7F);
+                    self.c_empty_look(prog::EmptyLook::WordBoundary)
                 }
-                hir::GroupKind::CaptureName { index, ref name } => {
-                    if index as usize >= self.compiled.captures.len() {
-                        let n = name.to_string();
-                        self.compiled.captures.push(Some(n.clone()));
-                        self.capture_name_idx.insert(n, index as usize);
+                hir::Look::WordUnicodeNegate => {
+                    if !cfg!(feature = "unicode-perl") {
+                        return Err(Error::Syntax(
+                            "Unicode word boundaries are unavailable when \
+                         the unicode-perl feature is disabled"
+                                .to_string(),
+                        ));
                     }
-                    self.c_capture(2 * index as usize, &g.hir)
+                    self.compiled.has_unicode_word_boundary = true;
+                    self.byte_classes.set_word_boundary();
+                    // See comments above for why we set the ASCII range here.
+                    self.byte_classes.set_range(0, 0x7F);
+                    self.c_empty_look(prog::EmptyLook::NotWordBoundary)
                 }
             },
+            Capture(hir::Capture { index, ref name, ref sub }) => {
+                if index as usize >= self.compiled.captures.len() {
+                    let name = match *name {
+                        None => None,
+                        Some(ref boxed_str) => Some(boxed_str.to_string()),
+                    };
+                    self.compiled.captures.push(name.clone());
+                    if let Some(name) = name {
+                        self.capture_name_idx.insert(name, index as usize);
+                    }
+                }
+                self.c_capture(2 * index as usize, sub)
+            }
             Concat(ref es) => {
                 if self.compiled.is_reverse {
                     self.c_concat(es.iter().rev())
@@ -420,21 +441,19 @@ impl Compiler {
     }
 
     fn c_dotstar(&mut self) -> Result {
-        Ok(if !self.compiled.only_utf8() {
-            self.c(&Hir::repetition(hir::Repetition {
-                kind: hir::RepetitionKind::ZeroOrMore,
-                greedy: false,
-                hir: Box::new(Hir::any(true)),
-            }))?
-            .unwrap()
+        let hir = if self.compiled.only_utf8() {
+            Hir::dot(hir::Dot::AnyChar)
         } else {
-            self.c(&Hir::repetition(hir::Repetition {
-                kind: hir::RepetitionKind::ZeroOrMore,
+            Hir::dot(hir::Dot::AnyByte)
+        };
+        Ok(self
+            .c(&Hir::repetition(hir::Repetition {
+                min: 0,
+                max: None,
                 greedy: false,
-                hir: Box::new(Hir::any(false)),
+                sub: Box::new(hir),
             }))?
-            .unwrap()
-        })
+            .unwrap())
     }
 
     fn c_char(&mut self, c: char) -> ResultOrEmpty {
@@ -457,7 +476,11 @@ impl Compiler {
     fn c_class(&mut self, ranges: &[hir::ClassUnicodeRange]) -> ResultOrEmpty {
         use std::mem::size_of;
 
-        assert!(!ranges.is_empty());
+        if ranges.is_empty() {
+            return Err(Error::Syntax(
+                "empty character classes are not allowed".to_string(),
+            ));
+        }
         if self.compiled.uses_bytes() {
             Ok(Some(CompileClass { c: self, ranges }.compile()?))
         } else {
@@ -482,7 +505,11 @@ impl Compiler {
         &mut self,
         ranges: &[hir::ClassBytesRange],
     ) -> ResultOrEmpty {
-        debug_assert!(!ranges.is_empty());
+        if ranges.is_empty() {
+            return Err(Error::Syntax(
+                "empty character classes are not allowed".to_string(),
+            ));
+        }
 
         let first_split_entry = self.insts.len();
         let mut holes = vec![];
@@ -513,6 +540,52 @@ impl Compiler {
         Ok(Some(Patch { hole, entry: self.insts.len() - 1 }))
     }
 
+    fn c_literal(&mut self, bytes: &[u8]) -> ResultOrEmpty {
+        match core::str::from_utf8(bytes) {
+            Ok(string) => {
+                let mut it = string.chars();
+                let Patch { mut hole, entry } = loop {
+                    match it.next() {
+                        None => return self.c_empty(),
+                        Some(ch) => {
+                            if let Some(p) = self.c_char(ch)? {
+                                break p;
+                            }
+                        }
+                    }
+                };
+                for ch in it {
+                    if let Some(p) = self.c_char(ch)? {
+                        self.fill(hole, p.entry);
+                        hole = p.hole;
+                    }
+                }
+                Ok(Some(Patch { hole, entry }))
+            }
+            Err(_) => {
+                assert!(self.compiled.uses_bytes());
+                let mut it = bytes.iter().copied();
+                let Patch { mut hole, entry } = loop {
+                    match it.next() {
+                        None => return self.c_empty(),
+                        Some(byte) => {
+                            if let Some(p) = self.c_byte(byte)? {
+                                break p;
+                            }
+                        }
+                    }
+                };
+                for byte in it {
+                    if let Some(p) = self.c_byte(byte)? {
+                        self.fill(hole, p.entry);
+                        hole = p.hole;
+                    }
+                }
+                Ok(Some(Patch { hole, entry }))
+            }
+        }
+    }
+
     fn c_concat<'a, I>(&mut self, exprs: I) -> ResultOrEmpty
     where
         I: IntoIterator<Item = &'a Hir>,
@@ -587,19 +660,15 @@ impl Compiler {
     }
 
     fn c_repeat(&mut self, rep: &hir::Repetition) -> ResultOrEmpty {
-        use regex_syntax::hir::RepetitionKind::*;
-        match rep.kind {
-            ZeroOrOne => self.c_repeat_zero_or_one(&rep.hir, rep.greedy),
-            ZeroOrMore => self.c_repeat_zero_or_more(&rep.hir, rep.greedy),
-            OneOrMore => self.c_repeat_one_or_more(&rep.hir, rep.greedy),
-            Range(hir::RepetitionRange::Exactly(min_max)) => {
-                self.c_repeat_range(&rep.hir, rep.greedy, min_max, min_max)
-            }
-            Range(hir::RepetitionRange::AtLeast(min)) => {
-                self.c_repeat_range_min_or_more(&rep.hir, rep.greedy, min)
+        match (rep.min, rep.max) {
+            (0, Some(1)) => self.c_repeat_zero_or_one(&rep.sub, rep.greedy),
+            (0, None) => self.c_repeat_zero_or_more(&rep.sub, rep.greedy),
+            (1, None) => self.c_repeat_one_or_more(&rep.sub, rep.greedy),
+            (min, None) => {
+                self.c_repeat_range_min_or_more(&rep.sub, rep.greedy, min)
             }
-            Range(hir::RepetitionRange::Bounded(min, max)) => {
-                self.c_repeat_range(&rep.hir, rep.greedy, min, max)
+            (min, Some(max)) => {
+                self.c_repeat_range(&rep.sub, rep.greedy, min, max)
             }
         }
     }
diff --git a/vendor/regex/src/dfa.rs b/vendor/regex/src/dfa.rs
index dc9952120..78ed71021 100644
--- a/vendor/regex/src/dfa.rs
+++ b/vendor/regex/src/dfa.rs
@@ -1576,7 +1576,7 @@ impl<'a> Fsm<'a> {
     /// inputs, a new state could be created for every byte of input. (This is
     /// bad for memory use, so we bound it with a cache.)
     fn approximate_size(&self) -> usize {
-        self.cache.size + self.prog.approximate_size()
+        self.cache.size
     }
 }
 
diff --git a/vendor/regex/src/error.rs b/vendor/regex/src/error.rs
index 3e0ec7521..6c341f604 100644
--- a/vendor/regex/src/error.rs
+++ b/vendor/regex/src/error.rs
@@ -6,8 +6,26 @@ use std::iter::repeat;
 pub enum Error {
     /// A syntax error.
     Syntax(String),
-    /// The compiled program exceeded the set size limit.
-    /// The argument is the size limit imposed.
+    /// The compiled program exceeded the set size
+    /// limit. The argument is the size limit imposed by
+    /// [`RegexBuilder::size_limit`](crate::RegexBuilder::size_limit). Even
+    /// when not configured explicitly, it defaults to a reasonable limit.
+    ///
+    /// If you're getting this error, it occurred because your regex has been
+    /// compiled to an intermediate state that is too big. It is important to
+    /// note that exceeding this limit does _not_ mean the regex is too big to
+    /// _work_, but rather, the regex is big enough that it may wind up being
+    /// surprisingly slow when used in a search. In other words, this error is
+    /// meant to be a practical heuristic for avoiding a performance footgun,
+    /// and especially so for the case where the regex pattern is coming from
+    /// an untrusted source.
+    ///
+    /// There are generally two ways to move forward if you hit this error.
+    /// The first is to find some way to use a smaller regex. The second is to
+    /// increase the size limit via `RegexBuilder::size_limit`. However, if
+    /// your regex pattern is not from a trusted source, then neither of these
+    /// approaches may be appropriate. Instead, you'll have to determine just
+    /// how big of a regex you want to allow.
     CompiledTooBig(usize),
     /// Hints that destructuring should not be exhaustive.
     ///
diff --git a/vendor/regex/src/exec.rs b/vendor/regex/src/exec.rs
index b9abcdc04..ee8b589d2 100644
--- a/vendor/regex/src/exec.rs
+++ b/vendor/regex/src/exec.rs
@@ -4,9 +4,9 @@ use std::panic::AssertUnwindSafe;
 use std::sync::Arc;
 
 #[cfg(feature = "perf-literal")]
-use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
-use regex_syntax::hir::literal::Literals;
-use regex_syntax::hir::Hir;
+use aho_corasick::{AhoCorasick, MatchKind};
+use regex_syntax::hir::literal;
+use regex_syntax::hir::{Hir, Look};
 use regex_syntax::ParserBuilder;
 
 use crate::backtrack;
@@ -78,15 +78,18 @@ struct ExecReadOnly {
     /// not supported.) Note that this program contains an embedded `.*?`
     /// preceding the first capture group, unless the regex is anchored at the
     /// beginning.
+    #[allow(dead_code)]
     dfa: Program,
     /// The same as above, except the program is reversed (and there is no
     /// preceding `.*?`). This is used by the DFA to find the starting location
     /// of matches.
+    #[allow(dead_code)]
     dfa_reverse: Program,
     /// A set of suffix literals extracted from the regex.
     ///
     /// Prefix literals are stored on the `Program`, since they are used inside
     /// the matching engines.
+    #[allow(dead_code)]
     suffixes: LiteralSearcher,
     /// An Aho-Corasick automaton with leftmost-first match semantics.
     ///
@@ -98,7 +101,7 @@ struct ExecReadOnly {
     /// if we were to exhaust the ID space, we probably would have long
     /// surpassed the compilation size limit.
     #[cfg(feature = "perf-literal")]
-    ac: Option<AhoCorasick<u32>>,
+    ac: Option<AhoCorasick>,
     /// match_type encodes as much upfront knowledge about how we're going to
     /// execute a search as possible.
     match_type: MatchType,
@@ -121,8 +124,8 @@ pub struct ExecBuilder {
 /// literals.
 struct Parsed {
     exprs: Vec<Hir>,
-    prefixes: Literals,
-    suffixes: Literals,
+    prefixes: literal::Seq,
+    suffixes: literal::Seq,
     bytes: bool,
 }
 
@@ -228,8 +231,8 @@ impl ExecBuilder {
     /// Parse the current set of patterns into their AST and extract literals.
     fn parse(&self) -> Result<Parsed, Error> {
         let mut exprs = Vec::with_capacity(self.options.pats.len());
-        let mut prefixes = Some(Literals::empty());
-        let mut suffixes = Some(Literals::empty());
+        let mut prefixes = Some(literal::Seq::empty());
+        let mut suffixes = Some(literal::Seq::empty());
         let mut bytes = false;
         let is_set = self.options.pats.len() > 1;
         // If we're compiling a regex set and that set has any anchored
@@ -243,54 +246,103 @@ impl ExecBuilder {
                 .swap_greed(self.options.swap_greed)
                 .ignore_whitespace(self.options.ignore_whitespace)
                 .unicode(self.options.unicode)
-                .allow_invalid_utf8(!self.only_utf8)
+                .utf8(self.only_utf8)
                 .nest_limit(self.options.nest_limit)
                 .build();
             let expr =
                 parser.parse(pat).map_err(|e| Error::Syntax(e.to_string()))?;
-            bytes = bytes || !expr.is_always_utf8();
+            let props = expr.properties();
+            // This used to just check whether the HIR matched valid UTF-8
+            // or not, but in regex-syntax 0.7, we changed our definition of
+            // "matches valid UTF-8" to exclude zero-width matches. And in
+            // particular, previously, we considered WordAsciiNegate (that
+            // is '(?-u:\B)') to be capable of matching invalid UTF-8. Our
+            // matcher engines were built under this assumption and fixing
+            // them is not worth it with the imminent plan to switch over to
+            // regex-automata. So for now, we retain the previous behavior by
+            // just explicitly treating the presence of a negated ASCII word
+            // boundary as forcing use to use a byte oriented automaton.
+            bytes = bytes
+                || !props.is_utf8()
+                || props.look_set().contains(Look::WordAsciiNegate);
 
             if cfg!(feature = "perf-literal") {
-                if !expr.is_anchored_start() && expr.is_any_anchored_start() {
+                if !props.look_set_prefix().contains(Look::Start)
+                    && props.look_set().contains(Look::Start)
+                {
                     // Partial anchors unfortunately make it hard to use
                     // prefixes, so disable them.
                     prefixes = None;
-                } else if is_set && expr.is_anchored_start() {
+                } else if is_set
+                    && props.look_set_prefix_any().contains(Look::Start)
+                {
                     // Regex sets with anchors do not go well with literal
                     // optimizations.
                     prefixes = None;
+                } else if props.look_set_prefix_any().contains_word() {
+                    // The new literal extractor ignores look-around while
+                    // the old one refused to extract prefixes from regexes
+                    // that began with a \b. These old creaky regex internals
+                    // can't deal with it, so we drop it.
+                    prefixes = None;
+                } else if props.look_set_prefix_any().contains(Look::StartLF) {
+                    // Similar to the reasoning for word boundaries, this old
+                    // regex engine can't handle literal prefixes with '(?m:^)'
+                    // at the beginning of a regex.
+                    prefixes = None;
                 }
-                prefixes = prefixes.and_then(|mut prefixes| {
-                    if !prefixes.union_prefixes(&expr) {
-                        None
-                    } else {
-                        Some(prefixes)
-                    }
-                });
 
-                if !expr.is_anchored_end() && expr.is_any_anchored_end() {
+                if !props.look_set_suffix().contains(Look::End)
+                    && props.look_set().contains(Look::End)
+                {
                     // Partial anchors unfortunately make it hard to use
                     // suffixes, so disable them.
                     suffixes = None;
-                } else if is_set && expr.is_anchored_end() {
+                } else if is_set
+                    && props.look_set_suffix_any().contains(Look::End)
+                {
                     // Regex sets with anchors do not go well with literal
                     // optimizations.
                     suffixes = None;
+                } else if props.look_set_suffix_any().contains_word() {
+                    // See the prefix case for reasoning here.
+                    suffixes = None;
+                } else if props.look_set_suffix_any().contains(Look::EndLF) {
+                    // See the prefix case for reasoning here.
+                    suffixes = None;
                 }
-                suffixes = suffixes.and_then(|mut suffixes| {
-                    if !suffixes.union_suffixes(&expr) {
-                        None
+
+                let (mut pres, mut suffs) =
+                    if prefixes.is_none() && suffixes.is_none() {
+                        (literal::Seq::infinite(), literal::Seq::infinite())
                     } else {
-                        Some(suffixes)
-                    }
+                        literal_analysis(&expr)
+                    };
+                // These old creaky regex internals can't handle cases where
+                // the literal sequences are exact but there are look-around
+                // assertions. So we make sure the sequences are inexact if
+                // there are look-around assertions anywhere. This forces the
+                // regex engines to run instead of assuming that a literal
+                // match implies an overall match.
+                if !props.look_set().is_empty() {
+                    pres.make_inexact();
+                    suffs.make_inexact();
+                }
+                prefixes = prefixes.and_then(|mut prefixes| {
+                    prefixes.union(&mut pres);
+                    Some(prefixes)
+                });
+                suffixes = suffixes.and_then(|mut suffixes| {
+                    suffixes.union(&mut suffs);
+                    Some(suffixes)
                 });
             }
             exprs.push(expr);
         }
         Ok(Parsed {
             exprs,
-            prefixes: prefixes.unwrap_or_else(Literals::empty),
-            suffixes: suffixes.unwrap_or_else(Literals::empty),
+            prefixes: prefixes.unwrap_or_else(literal::Seq::empty),
+            suffixes: suffixes.unwrap_or_else(literal::Seq::empty),
             bytes,
         })
     }
@@ -356,7 +408,7 @@ impl ExecBuilder {
     }
 
     #[cfg(feature = "perf-literal")]
-    fn build_aho_corasick(&self, parsed: &Parsed) -> Option<AhoCorasick<u32>> {
+    fn build_aho_corasick(&self, parsed: &Parsed) -> Option<AhoCorasick> {
         if parsed.exprs.len() != 1 {
             return None;
         }
@@ -370,10 +422,9 @@ impl ExecBuilder {
             return None;
         }
         Some(
-            AhoCorasickBuilder::new()
+            AhoCorasick::builder()
                 .match_kind(MatchKind::LeftmostFirst)
-                .auto_configure(&lits)
-                .build_with_size::<u32, _, _>(&lits)
+                .build(&lits)
                 // This should never happen because we'd long exceed the
                 // compilation limit for regexes first.
                 .expect("AC automaton too big"),
@@ -1311,6 +1362,12 @@ impl Exec {
     pub fn capture_name_idx(&self) -> &Arc<HashMap<String, usize>> {
         &self.ro.nfa.capture_name_idx
     }
+
+    /// If the number of capture groups in every match is always the same, then
+    /// return that number. Otherwise return `None`.
+    pub fn static_captures_len(&self) -> Option<usize> {
+        self.ro.nfa.static_captures_len
+    }
 }
 
 impl Clone for Exec {
@@ -1382,7 +1439,18 @@ impl ExecReadOnly {
                     // This case shouldn't happen. When the regex isn't
                     // anchored, then complete prefixes should imply complete
                     // suffixes.
-                    Some(MatchType::Literal(MatchLiteralType::Unanchored))
+                    //
+                    // The above is wrong! This case can happen. While
+                    // complete prefixes should imply complete suffixes
+                    // here, that doesn't necessarily mean we have a useful
+                    // prefix matcher! It could be the case that the literal
+                    // searcher decided the prefixes---even though they are
+                    // "complete"---weren't good enough and thus created an
+                    // empty matcher. If that happens and we return Unanchored
+                    // here, then we'll end up using that matcher, which is
+                    // very bad because it matches at every position. So...
+                    // return None.
+                    None
                 };
             }
             None
@@ -1557,7 +1625,7 @@ fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
     // optimization pipeline, because this is a terribly inflexible way to go
     // about things.
 
-    if !expr.is_alternation_literal() {
+    if !expr.properties().is_alternation_literal() {
         return None;
     }
     let alts = match *expr.kind() {
@@ -1565,25 +1633,19 @@ fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
         _ => return None, // one literal isn't worth it
     };
 
-    let extendlit = |lit: &Literal, dst: &mut Vec<u8>| match *lit {
-        Literal::Unicode(c) => {
-            let mut buf = [0; 4];
-            dst.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
-        }
-        Literal::Byte(b) => {
-            dst.push(b);
-        }
-    };
-
     let mut lits = vec![];
     for alt in alts {
         let mut lit = vec![];
         match *alt.kind() {
-            HirKind::Literal(ref x) => extendlit(x, &mut lit),
+            HirKind::Literal(Literal(ref bytes)) => {
+                lit.extend_from_slice(bytes)
+            }
             HirKind::Concat(ref exprs) => {
                 for e in exprs {
                     match *e.kind() {
-                        HirKind::Literal(ref x) => extendlit(x, &mut lit),
+                        HirKind::Literal(Literal(ref bytes)) => {
+                            lit.extend_from_slice(bytes);
+                        }
                         _ => unreachable!("expected literal, got {:?}", e),
                     }
                 }
@@ -1595,6 +1657,48 @@ fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
     Some(lits)
 }
 
+#[cfg(not(feature = "perf-literal"))]
+fn literal_analysis(_: &Hir) -> (literal::Seq, literal::Seq) {
+    (literal::Seq::infinite(), literal::Seq::infinite())
+}
+
+#[cfg(feature = "perf-literal")]
+fn literal_analysis(expr: &Hir) -> (literal::Seq, literal::Seq) {
+    const ATTEMPTS: [(usize, usize); 3] = [(5, 50), (4, 30), (3, 20)];
+
+    let mut prefixes = literal::Extractor::new()
+        .kind(literal::ExtractKind::Prefix)
+        .extract(expr);
+    for (keep, limit) in ATTEMPTS {
+        let len = match prefixes.len() {
+            None => break,
+            Some(len) => len,
+        };
+        if len <= limit {
+            break;
+        }
+        prefixes.keep_first_bytes(keep);
+        prefixes.minimize_by_preference();
+    }
+
+    let mut suffixes = literal::Extractor::new()
+        .kind(literal::ExtractKind::Suffix)
+        .extract(expr);
+    for (keep, limit) in ATTEMPTS {
+        let len = match suffixes.len() {
+            None => break,
+            Some(len) => len,
+        };
+        if len <= limit {
+            break;
+        }
+        suffixes.keep_last_bytes(keep);
+        suffixes.minimize_by_preference();
+    }
+
+    (prefixes, suffixes)
+}
+
 #[cfg(test)]
 mod test {
     #[test]
diff --git a/vendor/regex/src/expand.rs b/vendor/regex/src/expand.rs
index 67b514926..98fafc949 100644
--- a/vendor/regex/src/expand.rs
+++ b/vendor/regex/src/expand.rs
@@ -182,7 +182,8 @@ fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option<CaptureRef<'_>> {
     })
 }
 
-/// Returns true if and only if the given byte is allowed in a capture name.
+/// Returns true if and only if the given byte is allowed in a capture name
+/// written in non-brace form.
 fn is_valid_cap_letter(b: u8) -> bool {
     match b {
         b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true,
@@ -236,4 +237,11 @@ mod tests {
     find!(find_cap_ref17, "$x_$y", c!("x_", 3));
     find!(find_cap_ref18, "${#}", c!("#", 4));
     find!(find_cap_ref19, "${Z[}", c!("Z[", 5));
+    find!(find_cap_ref20, "${¾}", c!("¾", 5));
+    find!(find_cap_ref21, "${¾a}", c!("¾a", 6));
+    find!(find_cap_ref22, "${a¾}", c!("a¾", 6));
+    find!(find_cap_ref23, "${☃}", c!("☃", 6));
+    find!(find_cap_ref24, "${a☃}", c!("a☃", 7));
+    find!(find_cap_ref25, "${☃a}", c!("☃a", 7));
+    find!(find_cap_ref26, "${名字}", c!("名字", 9));
 }
diff --git a/vendor/regex/src/lib.rs b/vendor/regex/src/lib.rs
index 6b95739c5..82c1b77ad 100644
--- a/vendor/regex/src/lib.rs
+++ b/vendor/regex/src/lib.rs
@@ -199,6 +199,8 @@ instead.)
 This implementation executes regular expressions **only** on valid UTF-8
 while exposing match locations as byte indices into the search string. (To
 relax this restriction, use the [`bytes`](bytes/index.html) sub-module.)
+Conceptually, the regex engine works by matching a haystack as if it were a
+sequence of Unicode scalar values.
 
 Only simple case folding is supported. Namely, when matching
 case-insensitively, the characters are first mapped using the "simple" case
@@ -285,9 +287,9 @@ a separate crate, [`regex-syntax`](https://docs.rs/regex-syntax).
 .             any character except new line (includes new line with s flag)
 \d            digit (\p{Nd})
 \D            not digit
-\pN           One-letter name Unicode character class
+\pX           Unicode character class identified by a one-letter name
 \p{Greek}     Unicode character class (general category or script)
-\PN           Negated one-letter name Unicode character class
+\PX           Negated Unicode character class identified by a one-letter name
 \P{Greek}     negated Unicode character class (general category or script)
 </pre>
 
@@ -325,6 +327,25 @@ xy    concatenation (x followed by y)
 x|y   alternation (x or y, prefer x)
 </pre>
 
+This example shows how an alternation works, and what it means to prefer a
+branch in the alternation over subsequent branches.
+
+```
+use regex::Regex;
+
+let haystack = "samwise";
+// If 'samwise' comes first in our alternation, then it is
+// preferred as a match, even if the regex engine could
+// technically detect that 'sam' led to a match earlier.
+let re = Regex::new(r"samwise|sam").unwrap();
+assert_eq!("samwise", re.find(haystack).unwrap().as_str());
+// But if 'sam' comes first, then it will match instead.
+// In this case, it is impossible for 'samwise' to match
+// because 'sam' is a prefix of it.
+let re = Regex::new(r"sam|samwise").unwrap();
+assert_eq!("sam", re.find(haystack).unwrap().as_str());
+```
+
 ## Repetitions
 
 <pre class="rust">
@@ -360,12 +381,19 @@ regex matches `abc` at positions `0`, `1`, `2` and `3`.
 
 <pre class="rust">
 (exp)          numbered capture group (indexed by opening parenthesis)
-(?P&lt;name&gt;exp)  named (also numbered) capture group (allowed chars: [_0-9a-zA-Z.\[\]])
+(?P&lt;name&gt;exp)  named (also numbered) capture group (names must be alpha-numeric)
+(?&lt;name&gt;exp)   named (also numbered) capture group (names must be alpha-numeric)
 (?:exp)        non-capturing group
 (?flags)       set flags within current group
 (?flags:exp)   set flags for exp (non-capturing)
 </pre>
 
+Capture group names must be any sequence of alpha-numeric Unicode codepoints,
+in addition to `.`, `_`, `[` and `]`. Names must start with either an `_` or
+an alphabetic codepoint. Alphabetic codepoints correspond to the `Alphabetic`
+Unicode property, while numeric codepoints correspond to the union of the
+`Decimal_Number`, `Letter_Number` and `Other_Number` general categories.
+
 Flags are each a single character. For example, `(?x)` sets the flag `x`
 and `(?-x)` clears the flag `x`. Multiple flags can be set or cleared at
 the same time: `(?xy)` sets both the `x` and `y` flags and `(?x-y)` sets
@@ -379,9 +407,13 @@ m     multi-line mode: ^ and $ match begin/end of line
 s     allow . to match \n
 U     swap the meaning of x* and x*?
 u     Unicode support (enabled by default)
-x     ignore whitespace and allow line comments (starting with `#`)
+x     verbose mode, ignores whitespace and allow line comments (starting with `#`)
 </pre>
 
+Note that in verbose mode, whitespace is ignored everywhere, including within
+character classes. To insert whitespace, use its escaped form or a hex literal.
+For example, `\ ` or `\x20` for an ASCII space.
+
 Flags can be toggled within a pattern. Here's an example that matches
 case-insensitively for the first part but case-sensitively for the second part:
 
diff --git a/vendor/regex/src/literal/imp.rs b/vendor/regex/src/literal/imp.rs
index 90b2f1160..75fa6e37b 100644
--- a/vendor/regex/src/literal/imp.rs
+++ b/vendor/regex/src/literal/imp.rs
@@ -1,8 +1,8 @@
 use std::mem;
 
-use aho_corasick::{self, packed, AhoCorasick, AhoCorasickBuilder};
+use aho_corasick::{self, packed, AhoCorasick};
 use memchr::{memchr, memchr2, memchr3, memmem};
-use regex_syntax::hir::literal::{Literal, Literals};
+use regex_syntax::hir::literal::{Literal, Seq};
 
 /// A prefix extracted from a compiled regular expression.
 ///
@@ -26,7 +26,7 @@ enum Matcher {
     /// A single substring, using vector accelerated routines when available.
     Memmem(Memmem),
     /// An Aho-Corasick automaton.
-    AC { ac: AhoCorasick<u32>, lits: Vec<Literal> },
+    AC { ac: AhoCorasick, lits: Vec<Literal> },
     /// A packed multiple substring searcher, using SIMD.
     ///
     /// Note that Aho-Corasick will actually use this packed searcher
@@ -39,27 +39,26 @@ enum Matcher {
 impl LiteralSearcher {
     /// Returns a matcher that never matches and never advances the input.
     pub fn empty() -> Self {
-        Self::new(Literals::empty(), Matcher::Empty)
+        Self::new(Seq::infinite(), Matcher::Empty)
     }
 
     /// Returns a matcher for literal prefixes from the given set.
-    pub fn prefixes(lits: Literals) -> Self {
+    pub fn prefixes(lits: Seq) -> Self {
         let matcher = Matcher::prefixes(&lits);
         Self::new(lits, matcher)
     }
 
     /// Returns a matcher for literal suffixes from the given set.
-    pub fn suffixes(lits: Literals) -> Self {
+    pub fn suffixes(lits: Seq) -> Self {
         let matcher = Matcher::suffixes(&lits);
         Self::new(lits, matcher)
     }
 
-    fn new(lits: Literals, matcher: Matcher) -> Self {
-        let complete = lits.all_complete();
+    fn new(lits: Seq, matcher: Matcher) -> Self {
         LiteralSearcher {
-            complete,
-            lcp: Memmem::new(lits.longest_common_prefix()),
-            lcs: Memmem::new(lits.longest_common_suffix()),
+            complete: lits.is_exact(),
+            lcp: Memmem::new(lits.longest_common_prefix().unwrap_or(b"")),
+            lcs: Memmem::new(lits.longest_common_suffix().unwrap_or(b"")),
             matcher,
         }
     }
@@ -150,7 +149,7 @@ impl LiteralSearcher {
             Empty => 0,
             Bytes(ref sset) => sset.dense.len(),
             Memmem(_) => 1,
-            AC { ref ac, .. } => ac.pattern_count(),
+            AC { ref ac, .. } => ac.patterns_len(),
             Packed { ref lits, .. } => lits.len(),
         }
     }
@@ -162,27 +161,31 @@ impl LiteralSearcher {
             Empty => 0,
             Bytes(ref sset) => sset.approximate_size(),
             Memmem(ref single) => single.approximate_size(),
-            AC { ref ac, .. } => ac.heap_bytes(),
-            Packed { ref s, .. } => s.heap_bytes(),
+            AC { ref ac, .. } => ac.memory_usage(),
+            Packed { ref s, .. } => s.memory_usage(),
         }
     }
 }
 
 impl Matcher {
-    fn prefixes(lits: &Literals) -> Self {
+    fn prefixes(lits: &Seq) -> Self {
         let sset = SingleByteSet::prefixes(lits);
         Matcher::new(lits, sset)
     }
 
-    fn suffixes(lits: &Literals) -> Self {
+    fn suffixes(lits: &Seq) -> Self {
         let sset = SingleByteSet::suffixes(lits);
         Matcher::new(lits, sset)
     }
 
-    fn new(lits: &Literals, sset: SingleByteSet) -> Self {
-        if lits.literals().is_empty() {
+    fn new(lits: &Seq, sset: SingleByteSet) -> Self {
+        if lits.is_empty() || lits.min_literal_len() == Some(0) {
             return Matcher::Empty;
         }
+        let lits = match lits.literals() {
+            None => return Matcher::Empty,
+            Some(members) => members,
+        };
         if sset.dense.len() >= 26 {
             // Avoid trying to match a large number of single bytes.
             // This is *very* sensitive to a frequency analysis comparison
@@ -195,26 +198,26 @@ impl Matcher {
         if sset.complete {
             return Matcher::Bytes(sset);
         }
-        if lits.literals().len() == 1 {
-            return Matcher::Memmem(Memmem::new(&lits.literals()[0]));
+        if lits.len() == 1 {
+            return Matcher::Memmem(Memmem::new(lits[0].as_bytes()));
         }
 
-        let pats = lits.literals().to_owned();
+        let pats: Vec<&[u8]> = lits.iter().map(|lit| lit.as_bytes()).collect();
         let is_aho_corasick_fast = sset.dense.len() <= 1 && sset.all_ascii;
-        if lits.literals().len() <= 100 && !is_aho_corasick_fast {
+        if lits.len() <= 100 && !is_aho_corasick_fast {
             let mut builder = packed::Config::new()
                 .match_kind(packed::MatchKind::LeftmostFirst)
                 .builder();
             if let Some(s) = builder.extend(&pats).build() {
-                return Matcher::Packed { s, lits: pats };
+                return Matcher::Packed { s, lits: lits.to_owned() };
             }
         }
-        let ac = AhoCorasickBuilder::new()
+        let ac = AhoCorasick::builder()
             .match_kind(aho_corasick::MatchKind::LeftmostFirst)
-            .dfa(true)
-            .build_with_size::<u32, _, _>(&pats)
+            .kind(Some(aho_corasick::AhoCorasickKind::DFA))
+            .build(&pats)
             .unwrap();
-        Matcher::AC { ac, lits: pats }
+        Matcher::AC { ac, lits: lits.to_owned() }
     }
 }
 
@@ -257,7 +260,7 @@ impl<'a> Iterator for LiteralIter<'a> {
                 } else {
                     let next = &lits[0];
                     *lits = &lits[1..];
-                    Some(&**next)
+                    Some(next.as_bytes())
                 }
             }
             LiteralIter::Packed(ref mut lits) => {
@@ -266,7 +269,7 @@ impl<'a> Iterator for LiteralIter<'a> {
                 } else {
                     let next = &lits[0];
                     *lits = &lits[1..];
-                    Some(&**next)
+                    Some(next.as_bytes())
                 }
             }
         }
@@ -291,11 +294,15 @@ impl SingleByteSet {
         }
     }
 
-    fn prefixes(lits: &Literals) -> SingleByteSet {
+    fn prefixes(lits: &Seq) -> SingleByteSet {
         let mut sset = SingleByteSet::new();
-        for lit in lits.literals() {
+        let lits = match lits.literals() {
+            None => return sset,
+            Some(lits) => lits,
+        };
+        for lit in lits.iter() {
             sset.complete = sset.complete && lit.len() == 1;
-            if let Some(&b) = lit.get(0) {
+            if let Some(&b) = lit.as_bytes().get(0) {
                 if !sset.sparse[b as usize] {
                     if b > 0x7F {
                         sset.all_ascii = false;
@@ -308,11 +315,15 @@ impl SingleByteSet {
         sset
     }
 
-    fn suffixes(lits: &Literals) -> SingleByteSet {
+    fn suffixes(lits: &Seq) -> SingleByteSet {
         let mut sset = SingleByteSet::new();
-        for lit in lits.literals() {
+        let lits = match lits.literals() {
+            None => return sset,
+            Some(lits) => lits,
+        };
+        for lit in lits.iter() {
             sset.complete = sset.complete && lit.len() == 1;
-            if let Some(&b) = lit.get(lit.len().checked_sub(1).unwrap()) {
+            if let Some(&b) = lit.as_bytes().last() {
                 if !sset.sparse[b as usize] {
                     if b > 0x7F {
                         sset.all_ascii = false;
diff --git a/vendor/regex/src/literal/mod.rs b/vendor/regex/src/literal/mod.rs
index 980f52330..b9fb77aed 100644
--- a/vendor/regex/src/literal/mod.rs
+++ b/vendor/regex/src/literal/mod.rs
@@ -6,7 +6,7 @@ mod imp;
 #[allow(missing_docs)]
 #[cfg(not(feature = "perf-literal"))]
 mod imp {
-    use regex_syntax::hir::literal::Literals;
+    use regex_syntax::hir::literal::Seq;
 
     #[derive(Clone, Debug)]
     pub struct LiteralSearcher(());
@@ -16,11 +16,11 @@ mod imp {
             LiteralSearcher(())
         }
 
-        pub fn prefixes(_: Literals) -> Self {
+        pub fn prefixes(_: Seq) -> Self {
             LiteralSearcher(())
         }
 
-        pub fn suffixes(_: Literals) -> Self {
+        pub fn suffixes(_: Seq) -> Self {
             LiteralSearcher(())
         }
 
diff --git a/vendor/regex/src/prog.rs b/vendor/regex/src/prog.rs
index c211f71d8..100862cf1 100644
--- a/vendor/regex/src/prog.rs
+++ b/vendor/regex/src/prog.rs
@@ -27,6 +27,9 @@ pub struct Program {
     pub captures: Vec<Option<String>>,
     /// Pointers to all named capture groups into `captures`.
     pub capture_name_idx: Arc<HashMap<String, usize>>,
+    /// If the number of capture groups is the same for all possible matches,
+    /// then this is that number.
+    pub static_captures_len: Option<usize>,
     /// A pointer to the start instruction. This can vary depending on how
     /// the program was compiled. For example, programs for use with the DFA
     /// engine have a `.*?` inserted at the beginning of unanchored regular
@@ -83,6 +86,7 @@ impl Program {
             matches: vec![],
             captures: vec![],
             capture_name_idx: Arc::new(HashMap::new()),
+            static_captures_len: None,
             start: 0,
             byte_classes: vec![0; 256],
             only_utf8: true,
diff --git a/vendor/regex/src/re_bytes.rs b/vendor/regex/src/re_bytes.rs
index 07e9f98ac..e3a3b019b 100644
--- a/vendor/regex/src/re_bytes.rs
+++ b/vendor/regex/src/re_bytes.rs
@@ -17,7 +17,7 @@ use crate::re_trait::{self, RegularExpression, SubCapturesPosIter};
 /// Match represents a single match of a regex in a haystack.
 ///
 /// The lifetime parameter `'t` refers to the lifetime of the matched text.
-#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+#[derive(Copy, Clone, Eq, PartialEq)]
 pub struct Match<'t> {
     text: &'t [u8],
     start: usize,
@@ -37,6 +37,18 @@ impl<'t> Match<'t> {
         self.end
     }
 
+    /// Returns true if and only if this match has a length of zero.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.start == self.end
+    }
+
+    /// Returns the length, in bytes, of this match.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.end - self.start
+    }
+
     /// Returns the range over the starting and ending byte offsets of the
     /// match in the haystack.
     #[inline]
@@ -57,6 +69,24 @@ impl<'t> Match<'t> {
     }
 }
 
+impl<'t> std::fmt::Debug for Match<'t> {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        let mut fmt = f.debug_struct("Match");
+        fmt.field("start", &self.start).field("end", &self.end);
+        if let Ok(s) = std::str::from_utf8(self.as_bytes()) {
+            fmt.field("bytes", &s);
+        } else {
+            // FIXME: It would be nice if this could be printed as a string
+            // with invalid UTF-8 replaced with hex escapes. A alloc would
+            // probably okay if that makes it easier, but regex-automata does
+            // (at time of writing) have internal routines that do this. So
+            // maybe we should expose them.
+            fmt.field("bytes", &self.as_bytes());
+        }
+        fmt.finish()
+    }
+}
+
 impl<'t> From<Match<'t>> for Range<usize> {
     fn from(m: Match<'t>) -> Range<usize> {
         m.range()
@@ -253,12 +283,7 @@ impl Regex {
     /// The `0`th capture group is always unnamed, so it must always be
     /// accessed with `get(0)` or `[0]`.
     pub fn captures<'t>(&self, text: &'t [u8]) -> Option<Captures<'t>> {
-        let mut locs = self.capture_locations();
-        self.captures_read_at(&mut locs, text, 0).map(move |_| Captures {
-            text,
-            locs: locs.0,
-            named_groups: self.0.capture_name_idx().clone(),
-        })
+        self.captures_at(text, 0)
     }
 
     /// Returns an iterator over all the non-overlapping capture groups matched
@@ -537,7 +562,14 @@ impl Regex {
     /// This method may have the same performance characteristics as
     /// `is_match`, except it provides an end location for a match. In
     /// particular, the location returned *may be shorter* than the proper end
-    /// of the leftmost-first match.
+    /// of the leftmost-first match that you would find via `Regex::find`.
+    ///
+    /// Note that it is not guaranteed that this routine finds the shortest or
+    /// "earliest" possible match. Instead, the main idea of this API is that
+    /// it returns the offset at the point at which the internal regex engine
+    /// has determined that a match has occurred. This may vary depending on
+    /// which internal regex engine is used, and thus, the offset itself may
+    /// change.
     ///
     /// # Example
     ///
@@ -598,6 +630,25 @@ impl Regex {
             .map(|(s, e)| Match::new(text, s, e))
     }
 
+    /// Returns the same as [`Regex::captures`], but starts the search at the
+    /// given offset.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `start == 0`.
+    pub fn captures_at<'t>(
+        &self,
+        text: &'t [u8],
+        start: usize,
+    ) -> Option<Captures<'t>> {
+        let mut locs = self.capture_locations();
+        self.captures_read_at(&mut locs, text, start).map(move |_| Captures {
+            text,
+            locs: locs.0,
+            named_groups: self.0.capture_name_idx().clone(),
+        })
+    }
+
     /// This is like `captures`, but uses
     /// [`CaptureLocations`](struct.CaptureLocations.html)
     /// instead of
@@ -667,6 +718,46 @@ impl Regex {
         self.0.capture_names().len()
     }
 
+    /// Returns the total number of capturing groups that appear in every
+    /// possible match.
+    ///
+    /// If the number of capture groups can vary depending on the match, then
+    /// this returns `None`. That is, a value is only returned when the number
+    /// of matching groups is invariant or "static."
+    ///
+    /// Note that like [`Regex::captures_len`], this **does** include the
+    /// implicit capturing group corresponding to the entire match. Therefore,
+    /// when a non-None value is returned, it is guaranteed to be at least `1`.
+    /// Stated differently, a return value of `Some(0)` is impossible.
+    ///
+    /// # Example
+    ///
+    /// This shows a few cases where a static number of capture groups is
+    /// available and a few cases where it is not.
+    ///
+    /// ```
+    /// use regex::bytes::Regex;
+    ///
+    /// let len = |pattern| {
+    ///     Regex::new(pattern).map(|re| re.static_captures_len())
+    /// };
+    ///
+    /// assert_eq!(Some(1), len("a")?);
+    /// assert_eq!(Some(2), len("(a)")?);
+    /// assert_eq!(Some(2), len("(a)|(b)")?);
+    /// assert_eq!(Some(3), len("(a)(b)|(c)(d)")?);
+    /// assert_eq!(None, len("(a)|b")?);
+    /// assert_eq!(None, len("a|(b)")?);
+    /// assert_eq!(None, len("(b)*")?);
+    /// assert_eq!(Some(2), len("(b)+")?);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn static_captures_len(&self) -> Option<usize> {
+        self.0.static_captures_len().map(|len| len.saturating_add(1))
+    }
+
     /// Returns an empty set of capture locations that can be reused in
     /// multiple calls to `captures_read` or `captures_read_at`.
     pub fn capture_locations(&self) -> CaptureLocations {
@@ -856,6 +947,27 @@ impl<'r> FusedIterator for CaptureNames<'r> {}
 /// In order to build a value of this type, you'll need to call the
 /// `capture_locations` method on the `Regex` being used to execute the search.
 /// The value returned can then be reused in subsequent searches.
+///
+/// # Example
+///
+/// This example shows how to create and use `CaptureLocations` in a search.
+///
+/// ```
+/// use regex::bytes::Regex;
+///
+/// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap();
+/// let mut locs = re.capture_locations();
+/// let m = re.captures_read(&mut locs, b"Bruce Springsteen").unwrap();
+/// assert_eq!(0..17, m.range());
+/// assert_eq!(Some((0, 17)), locs.get(0));
+/// assert_eq!(Some((0, 5)), locs.get(1));
+/// assert_eq!(Some((6, 17)), locs.get(2));
+///
+/// // Asking for an invalid capture group always returns None.
+/// assert_eq!(None, locs.get(3));
+/// assert_eq!(None, locs.get(34973498648));
+/// assert_eq!(None, locs.get(9944060567225171988));
+/// ```
 #[derive(Clone, Debug)]
 pub struct CaptureLocations(re_trait::Locations);
 
diff --git a/vendor/regex/src/re_set.rs b/vendor/regex/src/re_set.rs
index a6d886d76..7c8253f0c 100644
--- a/vendor/regex/src/re_set.rs
+++ b/vendor/regex/src/re_set.rs
@@ -289,6 +289,12 @@ impl RegexSet {
     }
 }
 
+impl Default for RegexSet {
+    fn default() -> Self {
+        RegexSet::empty()
+    }
+}
+
 /// A set of matches returned by a regex set.
 #[derive(Clone, Debug)]
 pub struct SetMatches {
@@ -315,6 +321,11 @@ impl SetMatches {
     }
 
     /// The total number of regexes in the set that created these matches.
+    ///
+    /// **WARNING:** This always returns the same value as [`RegexSet::len`].
+    /// In particular, it does *not* return the number of elements yielded by
+    /// [`SetMatches::iter`]. The only way to determine the total number of
+    /// matched regexes is to iterate over them.
     pub fn len(&self) -> usize {
         self.matches.len()
     }
diff --git a/vendor/regex/src/re_trait.rs b/vendor/regex/src/re_trait.rs
index d0c717df5..505810c84 100644
--- a/vendor/regex/src/re_trait.rs
+++ b/vendor/regex/src/re_trait.rs
@@ -20,7 +20,7 @@ impl Locations {
     /// not match anything. The positions returned are *always* byte indices
     /// with respect to the original string matched.
     pub fn pos(&self, i: usize) -> Option<(usize, usize)> {
-        let (s, e) = (i * 2, i * 2 + 1);
+        let (s, e) = (i.checked_mul(2)?, i.checked_mul(2)?.checked_add(1)?);
         match (self.0.get(s), self.0.get(e)) {
             (Some(&Some(s)), Some(&Some(e))) => Some((s, e)),
             _ => None,
diff --git a/vendor/regex/src/re_unicode.rs b/vendor/regex/src/re_unicode.rs
index 197510ea0..57689086d 100644
--- a/vendor/regex/src/re_unicode.rs
+++ b/vendor/regex/src/re_unicode.rs
@@ -25,7 +25,7 @@ pub fn escape(text: &str) -> String {
 /// Match represents a single match of a regex in a haystack.
 ///
 /// The lifetime parameter `'t` refers to the lifetime of the matched text.
-#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+#[derive(Copy, Clone, Eq, PartialEq)]
 pub struct Match<'t> {
     text: &'t str,
     start: usize,
@@ -45,6 +45,18 @@ impl<'t> Match<'t> {
         self.end
     }
 
+    /// Returns true if and only if this match has a length of zero.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.start == self.end
+    }
+
+    /// Returns the length, in bytes, of this match.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.end - self.start
+    }
+
     /// Returns the range over the starting and ending byte offsets of the
     /// match in the haystack.
     #[inline]
@@ -65,6 +77,16 @@ impl<'t> Match<'t> {
     }
 }
 
+impl<'t> std::fmt::Debug for Match<'t> {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        f.debug_struct("Match")
+            .field("start", &self.start)
+            .field("end", &self.end)
+            .field("string", &self.as_str())
+            .finish()
+    }
+}
+
 impl<'t> From<Match<'t>> for &'t str {
     fn from(m: Match<'t>) -> &'t str {
         m.as_str()
@@ -309,12 +331,7 @@ impl Regex {
     /// The `0`th capture group is always unnamed, so it must always be
     /// accessed with `get(0)` or `[0]`.
     pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
-        let mut locs = self.capture_locations();
-        self.captures_read_at(&mut locs, text, 0).map(move |_| Captures {
-            text,
-            locs: locs.0,
-            named_groups: self.0.capture_name_idx().clone(),
-        })
+        self.captures_at(text, 0)
     }
 
     /// Returns an iterator over all the non-overlapping capture groups matched
@@ -595,7 +612,14 @@ impl Regex {
     /// This method may have the same performance characteristics as
     /// `is_match`, except it provides an end location for a match. In
     /// particular, the location returned *may be shorter* than the proper end
-    /// of the leftmost-first match.
+    /// of the leftmost-first match that you would find via `Regex::find`.
+    ///
+    /// Note that it is not guaranteed that this routine finds the shortest or
+    /// "earliest" possible match. Instead, the main idea of this API is that
+    /// it returns the offset at the point at which the internal regex engine
+    /// has determined that a match has occurred. This may vary depending on
+    /// which internal regex engine is used, and thus, the offset itself may
+    /// change.
     ///
     /// # Example
     ///
@@ -615,12 +639,12 @@ impl Regex {
         self.shortest_match_at(text, 0)
     }
 
-    /// Returns the same as shortest_match, but starts the search at the given
-    /// offset.
+    /// Returns the same as `shortest_match`, but starts the search at the
+    /// given offset.
     ///
     /// The significance of the starting point is that it takes the surrounding
-    /// context into consideration. For example, the `\A` anchor can only
-    /// match when `start == 0`.
+    /// context into consideration. For example, the `\A` anchor can only match
+    /// when `start == 0`.
     pub fn shortest_match_at(
         &self,
         text: &str,
@@ -656,6 +680,25 @@ impl Regex {
             .map(|(s, e)| Match::new(text, s, e))
     }
 
+    /// Returns the same as [`Regex::captures`], but starts the search at the
+    /// given offset.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `start == 0`.
+    pub fn captures_at<'t>(
+        &self,
+        text: &'t str,
+        start: usize,
+    ) -> Option<Captures<'t>> {
+        let mut locs = self.capture_locations();
+        self.captures_read_at(&mut locs, text, start).map(move |_| Captures {
+            text,
+            locs: locs.0,
+            named_groups: self.0.capture_name_idx().clone(),
+        })
+    }
+
     /// This is like `captures`, but uses
     /// [`CaptureLocations`](struct.CaptureLocations.html)
     /// instead of
@@ -725,6 +768,46 @@ impl Regex {
         self.0.capture_names().len()
     }
 
+    /// Returns the total number of capturing groups that appear in every
+    /// possible match.
+    ///
+    /// If the number of capture groups can vary depending on the match, then
+    /// this returns `None`. That is, a value is only returned when the number
+    /// of matching groups is invariant or "static."
+    ///
+    /// Note that like [`Regex::captures_len`], this **does** include the
+    /// implicit capturing group corresponding to the entire match. Therefore,
+    /// when a non-None value is returned, it is guaranteed to be at least `1`.
+    /// Stated differently, a return value of `Some(0)` is impossible.
+    ///
+    /// # Example
+    ///
+    /// This shows a few cases where a static number of capture groups is
+    /// available and a few cases where it is not.
+    ///
+    /// ```
+    /// use regex::Regex;
+    ///
+    /// let len = |pattern| {
+    ///     Regex::new(pattern).map(|re| re.static_captures_len())
+    /// };
+    ///
+    /// assert_eq!(Some(1), len("a")?);
+    /// assert_eq!(Some(2), len("(a)")?);
+    /// assert_eq!(Some(2), len("(a)|(b)")?);
+    /// assert_eq!(Some(3), len("(a)(b)|(c)(d)")?);
+    /// assert_eq!(None, len("(a)|b")?);
+    /// assert_eq!(None, len("a|(b)")?);
+    /// assert_eq!(None, len("(b)*")?);
+    /// assert_eq!(Some(2), len("(b)+")?);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn static_captures_len(&self) -> Option<usize> {
+        self.0.static_captures_len().map(|len| len.saturating_add(1))
+    }
+
     /// Returns an empty set of capture locations that can be reused in
     /// multiple calls to `captures_read` or `captures_read_at`.
     pub fn capture_locations(&self) -> CaptureLocations {
@@ -866,6 +949,27 @@ impl<'r, 't> FusedIterator for SplitN<'r, 't> {}
 /// In order to build a value of this type, you'll need to call the
 /// `capture_locations` method on the `Regex` being used to execute the search.
 /// The value returned can then be reused in subsequent searches.
+///
+/// # Example
+///
+/// This example shows how to create and use `CaptureLocations` in a search.
+///
+/// ```
+/// use regex::Regex;
+///
+/// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap();
+/// let mut locs = re.capture_locations();
+/// let m = re.captures_read(&mut locs, "Bruce Springsteen").unwrap();
+/// assert_eq!(0..17, m.range());
+/// assert_eq!(Some((0, 17)), locs.get(0));
+/// assert_eq!(Some((0, 5)), locs.get(1));
+/// assert_eq!(Some((6, 17)), locs.get(2));
+///
+/// // Asking for an invalid capture group always returns None.
+/// assert_eq!(None, locs.get(3));
+/// assert_eq!(None, locs.get(34973498648));
+/// assert_eq!(None, locs.get(9944060567225171988));
+/// ```
 #[derive(Clone, Debug)]
 pub struct CaptureLocations(re_trait::Locations);