summaryrefslogtreecommitdiffstats
path: root/third_party/rust/regex/src/re_trait.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/regex/src/re_trait.rs')
-rw-r--r--third_party/rust/regex/src/re_trait.rs261
1 files changed, 261 insertions, 0 deletions
diff --git a/third_party/rust/regex/src/re_trait.rs b/third_party/rust/regex/src/re_trait.rs
new file mode 100644
index 0000000000..b56804ef01
--- /dev/null
+++ b/third_party/rust/regex/src/re_trait.rs
@@ -0,0 +1,261 @@
+/// Slot is a single saved capture location. Note that there are two slots for
+/// every capture in a regular expression (one slot each for the start and end
+/// of the capture).
+pub type Slot = Option<usize>;
+
+/// Locations represents the offsets of each capturing group in a regex for
+/// a single match.
+///
+/// Unlike `Captures`, a `Locations` value only stores offsets.
+#[doc(hidden)]
+#[derive(Clone, Debug)]
+pub struct Locations(Vec<Slot>);
+
+impl Locations {
+ /// Returns the start and end positions of the Nth capture group. Returns
+ /// `None` if `i` is not a valid capture group or if the capture group did
+ /// not match anything. The positions returned are *always* byte indices
+ /// with respect to the original string matched.
+ pub fn pos(&self, i: usize) -> Option<(usize, usize)> {
+ let (s, e) = (i * 2, i * 2 + 1);
+ match (self.0.get(s), self.0.get(e)) {
+ (Some(&Some(s)), Some(&Some(e))) => Some((s, e)),
+ _ => None,
+ }
+ }
+
+ /// Creates an iterator of all the capture group positions in order of
+ /// appearance in the regular expression. Positions are byte indices
+ /// in terms of the original string matched.
+ pub fn iter(&self) -> SubCapturesPosIter {
+ SubCapturesPosIter { idx: 0, locs: self }
+ }
+
+ /// Returns the total number of capturing groups.
+ ///
+ /// This is always at least `1` since every regex has at least `1`
+ /// capturing group that corresponds to the entire match.
+ pub fn len(&self) -> usize {
+ self.0.len() / 2
+ }
+
+ /// Return the individual slots as a slice.
+ pub(crate) fn as_slots(&mut self) -> &mut [Slot] {
+ &mut self.0
+ }
+}
+
+/// An iterator over capture group positions for a particular match of a
+/// regular expression.
+///
+/// Positions are byte indices in terms of the original string matched.
+///
+/// `'c` is the lifetime of the captures.
+pub struct SubCapturesPosIter<'c> {
+ idx: usize,
+ locs: &'c Locations,
+}
+
+impl<'c> Iterator for SubCapturesPosIter<'c> {
+ type Item = Option<(usize, usize)>;
+
+ fn next(&mut self) -> Option<Option<(usize, usize)>> {
+ if self.idx >= self.locs.len() {
+ return None;
+ }
+ let x = match self.locs.pos(self.idx) {
+ None => Some(None),
+ Some((s, e)) => Some(Some((s, e))),
+ };
+ self.idx += 1;
+ x
+ }
+}
+
+/// `RegularExpression` describes types that can implement regex searching.
+///
+/// This trait is my attempt at reducing code duplication and to standardize
+/// the internal API. Specific duplication that is avoided are the `find`
+/// and `capture` iterators, which are slightly tricky.
+///
+/// It's not clear whether this trait is worth it, and it also isn't
+/// clear whether it's useful as a public trait or not. Methods like
+/// `next_after_empty` reak of bad design, but the rest of the methods seem
+/// somewhat reasonable. One particular thing this trait would expose would be
+/// the ability to start the search of a regex anywhere in a haystack, which
+/// isn't possible in the current public API.
+pub trait RegularExpression: Sized {
+ /// The type of the haystack.
+ type Text: ?Sized;
+
+ /// The number of capture slots in the compiled regular expression. This is
+ /// always two times the number of capture groups (two slots per group).
+ fn slots_len(&self) -> usize;
+
+ /// Allocates fresh space for all capturing groups in this regex.
+ fn locations(&self) -> Locations {
+ Locations(vec![None; self.slots_len()])
+ }
+
+ /// Returns the position of the next character after `i`.
+ ///
+ /// For example, a haystack with type `&[u8]` probably returns `i+1`,
+ /// whereas a haystack with type `&str` probably returns `i` plus the
+ /// length of the next UTF-8 sequence.
+ fn next_after_empty(&self, text: &Self::Text, i: usize) -> usize;
+
+ /// Returns the location of the shortest match.
+ fn shortest_match_at(
+ &self,
+ text: &Self::Text,
+ start: usize,
+ ) -> Option<usize>;
+
+ /// Returns whether the regex matches the text given.
+ fn is_match_at(&self, text: &Self::Text, start: usize) -> bool;
+
+ /// Returns the leftmost-first match location if one exists.
+ fn find_at(
+ &self,
+ text: &Self::Text,
+ start: usize,
+ ) -> Option<(usize, usize)>;
+
+ /// Returns the leftmost-first match location if one exists, and also
+ /// fills in any matching capture slot locations.
+ fn captures_read_at(
+ &self,
+ locs: &mut Locations,
+ text: &Self::Text,
+ start: usize,
+ ) -> Option<(usize, usize)>;
+
+ /// Returns an iterator over all non-overlapping successive leftmost-first
+ /// matches.
+ fn find_iter(self, text: &Self::Text) -> Matches<Self> {
+ Matches { re: self, text: text, last_end: 0, last_match: None }
+ }
+
+ /// Returns an iterator over all non-overlapping successive leftmost-first
+ /// matches with captures.
+ fn captures_iter(self, text: &Self::Text) -> CaptureMatches<Self> {
+ CaptureMatches(self.find_iter(text))
+ }
+}
+
+/// An iterator over all non-overlapping successive leftmost-first matches.
+pub struct Matches<'t, R>
+where
+ R: RegularExpression,
+ R::Text: 't,
+{
+ re: R,
+ text: &'t R::Text,
+ last_end: usize,
+ last_match: Option<usize>,
+}
+
+impl<'t, R> Matches<'t, R>
+where
+ R: RegularExpression,
+ R::Text: 't,
+{
+ /// Return the text being searched.
+ pub fn text(&self) -> &'t R::Text {
+ self.text
+ }
+
+ /// Return the underlying regex.
+ pub fn regex(&self) -> &R {
+ &self.re
+ }
+}
+
+impl<'t, R> Iterator for Matches<'t, R>
+where
+ R: RegularExpression,
+ R::Text: 't + AsRef<[u8]>,
+{
+ type Item = (usize, usize);
+
+ fn next(&mut self) -> Option<(usize, usize)> {
+ if self.last_end > self.text.as_ref().len() {
+ return None;
+ }
+ let (s, e) = match self.re.find_at(self.text, self.last_end) {
+ None => return None,
+ Some((s, e)) => (s, e),
+ };
+ if s == e {
+ // This is an empty match. To ensure we make progress, start
+ // the next search at the smallest possible starting position
+ // of the next match following this one.
+ self.last_end = self.re.next_after_empty(self.text, e);
+ // Don't accept empty matches immediately following a match.
+ // Just move on to the next match.
+ if Some(e) == self.last_match {
+ return self.next();
+ }
+ } else {
+ self.last_end = e;
+ }
+ self.last_match = Some(e);
+ Some((s, e))
+ }
+}
+
+/// An iterator over all non-overlapping successive leftmost-first matches with
+/// captures.
+pub struct CaptureMatches<'t, R>(Matches<'t, R>)
+where
+ R: RegularExpression,
+ R::Text: 't;
+
+impl<'t, R> CaptureMatches<'t, R>
+where
+ R: RegularExpression,
+ R::Text: 't,
+{
+ /// Return the text being searched.
+ pub fn text(&self) -> &'t R::Text {
+ self.0.text()
+ }
+
+ /// Return the underlying regex.
+ pub fn regex(&self) -> &R {
+ self.0.regex()
+ }
+}
+
+impl<'t, R> Iterator for CaptureMatches<'t, R>
+where
+ R: RegularExpression,
+ R::Text: 't + AsRef<[u8]>,
+{
+ type Item = Locations;
+
+ fn next(&mut self) -> Option<Locations> {
+ if self.0.last_end > self.0.text.as_ref().len() {
+ return None;
+ }
+ let mut locs = self.0.re.locations();
+ let (s, e) = match self.0.re.captures_read_at(
+ &mut locs,
+ self.0.text,
+ self.0.last_end,
+ ) {
+ None => return None,
+ Some((s, e)) => (s, e),
+ };
+ if s == e {
+ self.0.last_end = self.0.re.next_after_empty(self.0.text, e);
+ if Some(e) == self.0.last_match {
+ return self.next();
+ }
+ } else {
+ self.0.last_end = e;
+ }
+ self.0.last_match = Some(e);
+ Some(locs)
+ }
+}