summaryrefslogtreecommitdiffstats
path: root/xpcom/string/RustRegex.h
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /xpcom/string/RustRegex.h
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--xpcom/string/RustRegex.h707
1 files changed, 707 insertions, 0 deletions
diff --git a/xpcom/string/RustRegex.h b/xpcom/string/RustRegex.h
new file mode 100644
index 0000000000..80b8140bb5
--- /dev/null
+++ b/xpcom/string/RustRegex.h
@@ -0,0 +1,707 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_RustRegex_h
+#define mozilla_RustRegex_h
+
+#include "nsPrintfCString.h"
+#include "nsTArray.h"
+#include "rure.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/UniquePtr.h"
+
+namespace mozilla {
+
+// This header is a thin wrapper around the `rure.h` header file, which declares
+// the C API for interacting with the rust `regex` crate. This is intended to
+// make the type more ergonomic to use with mozilla types.
+
+class RustRegex;
+class RustRegexSet;
+class RustRegexOptions;
+class RustRegexCaptures;
+class RustRegexIter;
+class RustRegexIterCaptureNames;
+
+using RustRegexMatch = rure_match;
+
+/*
+ * RustRegexCaptures represents storage for sub-capture locations of a match.
+ *
+ * Computing the capture groups of a match can carry a significant performance
+ * penalty, so their use in the API is optional.
+ *
+ * A RustRegexCaptures value may outlive its corresponding RustRegex and can be
+ * freed independently.
+ *
+ * It is not safe to use from multiple threads simultaneously.
+ */
+class RustRegexCaptures final {
+ public:
+ RustRegexCaptures() = default;
+
+ // Check if the `RustRegexCaptures` object is valid.
+ bool IsValid() const { return mPtr != nullptr; }
+ explicit operator bool() const { return IsValid(); }
+
+ /*
+ * CaptureAt returns Some if and only if the capturing group at the
+ * index given was part of the match. If so, the returned RustRegexMatch
+ * object contains the start and end offsets (in bytes) of the match.
+ *
+ * If no capture group with the index aIdx exists, or the group was not part
+ * of the match, then Nothing is returned. (A capturing group exists if and
+ * only if aIdx is less than Length().)
+ *
+ * Note that index 0 corresponds to the full match.
+ */
+ Maybe<RustRegexMatch> CaptureAt(size_t aIdx) const {
+ RustRegexMatch match;
+ if (mPtr && rure_captures_at(mPtr.get(), aIdx, &match)) {
+ return Some(match);
+ }
+ return Nothing();
+ }
+ Maybe<RustRegexMatch> operator[](size_t aIdx) const {
+ return CaptureAt(aIdx);
+ }
+
+ /*
+ * Returns the number of capturing groups in this `RustRegexCaptures`.
+ */
+ size_t Length() const { return mPtr ? rure_captures_len(mPtr.get()) : 0; }
+
+ private:
+ friend class RustRegex;
+ friend class RustRegexIter;
+
+ explicit RustRegexCaptures(rure* aRe)
+ : mPtr(aRe ? rure_captures_new(aRe) : nullptr) {}
+
+ struct Deleter {
+ void operator()(rure_captures* ptr) const { rure_captures_free(ptr); }
+ };
+ UniquePtr<rure_captures, Deleter> mPtr;
+};
+
+/*
+ * RustRegexIterCaptureNames is an iterator over the list of capture group names
+ * in this particular RustRegex.
+ *
+ * A RustRegexIterCaptureNames value may not outlive its corresponding
+ * RustRegex, and should be destroyed before its corresponding RustRegex is
+ * destroyed.
+ *
+ * It is not safe to use from multiple threads simultaneously.
+ */
+class RustRegexIterCaptureNames {
+ public:
+ RustRegexIterCaptureNames() = delete;
+
+ // Check if the `RustRegexIterCaptureNames` object is valid.
+ bool IsValid() const { return mPtr != nullptr; }
+ explicit operator bool() const { return IsValid(); }
+
+ /*
+ * Advances the iterator and returns true if and only if another capture group
+ * name exists.
+ *
+ * The value of the capture group name is written to the provided pointer.
+ */
+ mozilla::Maybe<const char*> Next() {
+ char* next = nullptr;
+ if (mPtr && rure_iter_capture_names_next(mPtr.get(), &next)) {
+ return Some(next);
+ }
+ return Nothing();
+ }
+
+ private:
+ friend class RustRegex;
+
+ explicit RustRegexIterCaptureNames(rure* aRe)
+ : mPtr(aRe ? rure_iter_capture_names_new(aRe) : nullptr) {}
+
+ struct Deleter {
+ void operator()(rure_iter_capture_names* ptr) const {
+ rure_iter_capture_names_free(ptr);
+ }
+ };
+ UniquePtr<rure_iter_capture_names, Deleter> mPtr;
+};
+
+/*
+ * RustRegexIter is an iterator over successive non-overlapping matches in a
+ * particular haystack.
+ *
+ * A RustRegexIter value may not outlive its corresponding RustRegex and should
+ * be destroyed before its corresponding RustRegex is destroyed.
+ *
+ * It is not safe to use from multiple threads simultaneously.
+ */
+class RustRegexIter {
+ public:
+ RustRegexIter() = delete;
+
+ // Check if the `RustRegexIter` object is valid.
+ bool IsValid() const { return mPtr != nullptr; }
+ explicit operator bool() const { return IsValid(); }
+
+ /*
+ * Next() returns Some if and only if this regex matches anywhere in haystack.
+ * The returned RustRegexMatch object contains the start and end offsets (in
+ * bytes) of the match.
+ *
+ * If no match is found, then subsequent calls will return Nothing()
+ * indefinitely.
+ *
+ * Next() should be preferred to NextCaptures() since it may be faster.
+ *
+ * N.B. The performance of this search is not impacted by the presence of
+ * capturing groups in your regular expression.
+ */
+ mozilla::Maybe<RustRegexMatch> Next() {
+ RustRegexMatch match{};
+ if (mPtr &&
+ rure_iter_next(mPtr.get(), mHaystackPtr, mHaystackSize, &match)) {
+ return Some(match);
+ }
+ return Nothing();
+ }
+
+ /*
+ * NextCaptures returns a valid RustRegexCaptures if and only if this regex
+ * matches anywhere in haystack. If a match is found, then all of its capture
+ * locations are stored in the returned RustRegexCaptures object.
+ *
+ * If no match is found, then subsequent calls will return an invalid
+ * `RustRegexCaptures` indefinitely.
+ *
+ * Only use this function if you specifically need access to capture
+ * locations. It is not necessary to use this function just because your
+ * regular expression contains capturing groups.
+ *
+ * Capture locations can be accessed using the methods on RustRegexCaptures.
+ *
+ * N.B. The performance of this search can be impacted by the number of
+ * capturing groups. If you're using this function, it may be beneficial to
+ * use non-capturing groups (e.g., `(?:re)`) where possible.
+ */
+ RustRegexCaptures NextCaptures() {
+ RustRegexCaptures captures(mRe);
+ if (mPtr && rure_iter_next_captures(mPtr.get(), mHaystackPtr, mHaystackSize,
+ captures.mPtr.get())) {
+ return captures;
+ }
+ return {};
+ }
+
+ private:
+ friend class RustRegex;
+ RustRegexIter(rure* aRe, const std::string_view& aHaystack)
+ : mRe(aRe),
+ mHaystackPtr(reinterpret_cast<const uint8_t*>(aHaystack.data())),
+ mHaystackSize(aHaystack.size()),
+ mPtr(aRe ? rure_iter_new(aRe) : nullptr) {}
+
+ rure* MOZ_NON_OWNING_REF mRe;
+ const uint8_t* MOZ_NON_OWNING_REF mHaystackPtr;
+ size_t mHaystackSize;
+
+ struct Deleter {
+ void operator()(rure_iter* ptr) const { rure_iter_free(ptr); }
+ };
+ UniquePtr<rure_iter, Deleter> mPtr;
+};
+
+/*
+ * RustRegexOptions is the set of configuration options for compiling a regular
+ * expression.
+ *
+ * All flags on this type can be used to set default flags while compiling, and
+ * can be toggled in the expression itself using standard syntax, e.g. `(?i)`
+ * turns case-insensitive matching on, and `(?-i)` disables it.
+ *
+ * In addition, two non-flag options are available: setting the size limit of
+ * the compiled program and setting the size limit of the cache of states that
+ * the DFA uses while searching.
+ *
+ * For most uses, the default settings will work fine, and a default-constructed
+ * RustRegexOptions can be passed.
+ */
+class RustRegexOptions {
+ public:
+ RustRegexOptions() = default;
+
+ /*
+ * Set the value for the case insensitive (i) flag.
+ *
+ * When enabled, letters in the pattern will match both upper case and lower
+ * case variants.
+ */
+ RustRegexOptions& CaseInsensitive(bool aYes) {
+ return SetFlag(aYes, RURE_FLAG_CASEI);
+ }
+
+ /*
+ * Set the value for the multi-line matching (m) flag.
+ *
+ * When enabled, ^ matches the beginning of lines and $ matches the end of
+ * lines.
+ *
+ * By default, they match beginning/end of the input.
+ */
+ RustRegexOptions& MultiLine(bool aYes) {
+ return SetFlag(aYes, RURE_FLAG_MULTI);
+ }
+
+ /*
+ * Set the value for the any character (s) flag, where in . matches anything
+ * when s is set and matches anything except for new line when it is not set
+ * (the default).
+ *
+ * N.B. “matches anything” means “any byte” when Unicode is disabled and means
+ * “any valid UTF-8 encoding of any Unicode scalar value” when Unicode is
+ * enabled.
+ */
+ RustRegexOptions& DotMatchesNewLine(bool aYes) {
+ return SetFlag(aYes, RURE_FLAG_DOTNL);
+ }
+
+ /*
+ * Set the value for the greedy swap (U) flag.
+ *
+ * When enabled, a pattern like a* is lazy (tries to find shortest match) and
+ * a*? is greedy (tries to find longest match).
+ *
+ * By default, a* is greedy and a*? is lazy.
+ */
+ RustRegexOptions& SwapGreed(bool aYes) {
+ return SetFlag(aYes, RURE_FLAG_SWAP_GREED);
+ }
+
+ /*
+ * Set the value for the ignore whitespace (x) flag.
+ *
+ * When enabled, whitespace such as new lines and spaces will be ignored
+ * between expressions of the pattern, and # can be used to start a comment
+ * until the next new line.
+ */
+ RustRegexOptions& IgnoreWhitespace(bool aYes) {
+ return SetFlag(aYes, RURE_FLAG_SPACE);
+ }
+
+ /*
+ * Set the value for the Unicode (u) flag.
+ *
+ * Enabled by default. When disabled, character classes such as \w only match
+ * ASCII word characters instead of all Unicode word characters.
+ */
+ RustRegexOptions& Unicode(bool aYes) {
+ return SetFlag(aYes, RURE_FLAG_UNICODE);
+ }
+
+ /*
+ * SizeLimit sets the appoximate size limit of the compiled regular
+ * expression.
+ *
+ * This size limit roughly corresponds to the number of bytes occupied by
+ * a single compiled program. If the program would exceed this number,
+ * then an invalid RustRegex will be constructed.
+ */
+ RustRegexOptions& SizeLimit(size_t aLimit) {
+ mSizeLimit = Some(aLimit);
+ return *this;
+ }
+
+ /*
+ * DFASizeLimit sets the approximate size of the cache used by the DFA during
+ * search.
+ *
+ * This roughly corresponds to the number of bytes that the DFA will use while
+ * searching.
+ *
+ * Note that this is a *per thread* limit. There is no way to set a global
+ * limit. In particular, if a regular expression is used from multiple threads
+ * simultaneously, then each thread may use up to the number of bytes
+ * specified here.
+ */
+ RustRegexOptions& DFASizeLimit(size_t aLimit) {
+ mDFASizeLimit = Some(aLimit);
+ return *this;
+ }
+
+ private:
+ friend class RustRegex;
+ friend class RustRegexSet;
+
+ struct OptionsDeleter {
+ void operator()(rure_options* ptr) const { rure_options_free(ptr); }
+ };
+
+ UniquePtr<rure_options, OptionsDeleter> GetOptions() const {
+ UniquePtr<rure_options, OptionsDeleter> options;
+ if (mSizeLimit || mDFASizeLimit) {
+ options.reset(rure_options_new());
+ if (mSizeLimit) {
+ rure_options_size_limit(options.get(), *mSizeLimit);
+ }
+ if (mDFASizeLimit) {
+ rure_options_dfa_size_limit(options.get(), *mDFASizeLimit);
+ }
+ }
+ return options;
+ }
+
+ uint32_t GetFlags() const { return mFlags; }
+
+ RustRegexOptions& SetFlag(bool aYes, uint32_t aFlag) {
+ if (aYes) {
+ mFlags |= aFlag;
+ } else {
+ mFlags &= ~aFlag;
+ }
+ return *this;
+ }
+
+ uint32_t mFlags = RURE_DEFAULT_FLAGS;
+ Maybe<size_t> mSizeLimit;
+ Maybe<size_t> mDFASizeLimit;
+};
+
+/*
+ * RustRegex is the type of a compiled regular expression.
+ *
+ * A RustRegex can be safely used from multiple threads simultaneously.
+ *
+ * When calling the matching methods on this type, they will generally have the
+ * following parameters:
+ *
+ * aHaystack
+ * may contain arbitrary bytes, but ASCII compatible text is more useful.
+ * UTF-8 is even more useful. Other text encodings aren't supported.
+ *
+ * aStart
+ * the position in bytes at which to start searching. Note that setting the
+ * start position is distinct from using a substring for `aHaystack`, since
+ * the regex engine may look at bytes before the start position to determine
+ * match information. For example, if the start position is greater than 0,
+ * then the \A ("begin text") anchor can never match.
+ */
+class RustRegex final {
+ public:
+ // Create a new invalid RustRegex object
+ RustRegex() = default;
+
+ /*
+ * Compiles the given pattern into a regular expression. The pattern must be
+ * valid UTF-8 and the length corresponds to the number of bytes in the
+ * pattern.
+ *
+ * If an error occurs, the constructed RustRegex will be `!IsValid()`.
+ *
+ * The compiled expression returned may be used from multiple threads
+ * simultaneously.
+ */
+ explicit RustRegex(const std::string_view& aPattern,
+ const RustRegexOptions& aOptions = {}) {
+#ifdef DEBUG
+ rure_error* error = rure_error_new();
+#else
+ rure_error* error = nullptr;
+#endif
+ mPtr.reset(rure_compile(reinterpret_cast<const uint8_t*>(aPattern.data()),
+ aPattern.size(), aOptions.GetFlags(),
+ aOptions.GetOptions().get(), error));
+#ifdef DEBUG
+ if (!mPtr) {
+ NS_WARNING(nsPrintfCString("RustRegex compile failed: %s",
+ rure_error_message(error))
+ .get());
+ }
+ rure_error_free(error);
+#endif
+ }
+
+ // Check if the compiled `RustRegex` is valid.
+ bool IsValid() const { return mPtr != nullptr; }
+ explicit operator bool() const { return IsValid(); }
+
+ /*
+ * IsMatch returns true if and only if this regex matches anywhere in
+ * aHaystack.
+ *
+ * See the type-level comment for details on aHaystack and aStart.
+ *
+ * IsMatch() should be preferred to Find() since it may be faster.
+ *
+ * N.B. The performance of this search is not impacted by the presence of
+ * capturing groups in your regular expression.
+ */
+ bool IsMatch(const std::string_view& aHaystack, size_t aStart = 0) const {
+ return mPtr &&
+ rure_is_match(mPtr.get(),
+ reinterpret_cast<const uint8_t*>(aHaystack.data()),
+ aHaystack.size(), aStart);
+ }
+
+ /*
+ * Find returns Some if and only if this regex matches anywhere in
+ * haystack. The returned RustRegexMatch object contains the start and end
+ * offsets (in bytes) of the match.
+ *
+ * See the type-level comment for details on aHaystack and aStart.
+ *
+ * Find() should be preferred to FindCaptures() since it may be faster.
+ *
+ * N.B. The performance of this search is not impacted by the presence of
+ * capturing groups in your regular expression.
+ */
+ Maybe<RustRegexMatch> Find(const std::string_view& aHaystack,
+ size_t aStart = 0) const {
+ RustRegexMatch match{};
+ if (mPtr && rure_find(mPtr.get(),
+ reinterpret_cast<const uint8_t*>(aHaystack.data()),
+ aHaystack.size(), aStart, &match)) {
+ return Some(match);
+ }
+ return Nothing();
+ }
+
+ /*
+ * FindCaptures() returns a valid RustRegexCaptures if and only if this
+ * regex matches anywhere in haystack. If a match is found, then all of its
+ * capture locations are stored in the returned RustRegexCaptures object.
+ *
+ * See the type-level comment for details on aHaystack and aStart.
+ *
+ * Only use this function if you specifically need access to capture
+ * locations. It is not necessary to use this function just because your
+ * regular expression contains capturing groups.
+ *
+ * Capture locations can be accessed using the methods on RustRegexCaptures.
+ *
+ * N.B. The performance of this search can be impacted by the number of
+ * capturing groups. If you're using this function, it may be beneficial to
+ * use non-capturing groups (e.g., `(?:re)`) where possible.
+ */
+ RustRegexCaptures FindCaptures(const std::string_view& aHaystack,
+ size_t aStart = 0) const {
+ RustRegexCaptures captures(mPtr.get());
+ if (mPtr &&
+ rure_find_captures(mPtr.get(),
+ reinterpret_cast<const uint8_t*>(aHaystack.data()),
+ aHaystack.size(), aStart, captures.mPtr.get())) {
+ return captures;
+ }
+ return {};
+ }
+
+ /*
+ * ShortestMatch() returns Some if and only if this regex matches anywhere
+ * in haystack. If a match is found, then its end location is stored in the
+ * pointer given. The end location is the place at which the regex engine
+ * determined that a match exists, but may occur before the end of the
+ * proper leftmost-first match.
+ *
+ * See the type-level comment for details on aHaystack and aStart.
+ *
+ * ShortestMatch should be preferred to Find since it may be faster.
+ *
+ * N.B. The performance of this search is not impacted by the presence of
+ * capturing groups in your regular expression.
+ */
+ Maybe<size_t> ShortestMatch(const std::string_view& aHaystack,
+ size_t aStart = 0) const {
+ size_t end = 0;
+ if (mPtr &&
+ rure_shortest_match(mPtr.get(),
+ reinterpret_cast<const uint8_t*>(aHaystack.data()),
+ aHaystack.size(), aStart, &end)) {
+ return Some(end);
+ }
+ return Nothing();
+ }
+
+ /*
+ * Create an iterator over all successive non-overlapping matches of this
+ * regex in aHaystack.
+ *
+ * See the type-level comment for details on aHaystack.
+ *
+ * Both aHaystack and this regex must remain valid until the returned
+ * `RustRegexIter` is destroyed.
+ */
+ RustRegexIter IterMatches(const std::string_view& aHaystack) const {
+ return RustRegexIter(mPtr.get(), aHaystack);
+ }
+
+ /*
+ * Returns the capture index for the name given. If no such named capturing
+ * group exists in this regex, then -1 is returned.
+ *
+ * The capture index may be used with RustRegexCaptures::CaptureAt.
+ *
+ * This function never returns 0 since the first capture group always
+ * corresponds to the entire match and is always unnamed.
+ */
+ int32_t CaptureNameIndex(const char* aName) const {
+ return mPtr ? rure_capture_name_index(mPtr.get(), aName) : -1;
+ }
+
+ /*
+ * Create an iterator over the list of capture group names in this particular
+ * regex.
+ *
+ * This regex must remain valid until the returned `RustRegexIterCaptureNames`
+ * is destroyed.
+ */
+ RustRegexIterCaptureNames IterCaptureNames() const {
+ return RustRegexIterCaptureNames(mPtr.get());
+ }
+
+ /*
+ * Count the number of successive non-overlapping matches of this regex in
+ * aHaystack.
+ *
+ * See the type-level comment for details on aHaystack.
+ */
+ size_t CountMatches(const std::string_view& aHaystack) const {
+ size_t count = 0;
+ auto iter = IterMatches(aHaystack);
+ while (iter.Next()) {
+ count++;
+ }
+ return count;
+ }
+
+ private:
+ struct Deleter {
+ void operator()(rure* ptr) const { rure_free(ptr); }
+ };
+ UniquePtr<rure, Deleter> mPtr;
+};
+
+/*
+ * RustRegexSet is the type of a set of compiled regular expression.
+ *
+ * A RustRegexSet can be safely used from multiple threads simultaneously.
+ *
+ * When calling the matching methods on this type, they will generally have the
+ * following parameters:
+ *
+ * aHaystack
+ * may contain arbitrary bytes, but ASCII compatible text is more useful.
+ * UTF-8 is even more useful. Other text encodings aren't supported.
+ *
+ * aStart
+ * the position in bytes at which to start searching. Note that setting the
+ * start position is distinct from using a substring for `aHaystack`, since
+ * the regex engine may look at bytes before the start position to determine
+ * match information. For example, if the start position is greater than 0,
+ * then the \A ("begin text") anchor can never match.
+ */
+class RustRegexSet final {
+ public:
+ /*
+ * Compiles the given range of patterns into a single regular expression which
+ * can be matched in a linear-scan. Each pattern in aPatterns must be valid
+ * UTF-8, and implicitly coerce to `std::string_view`.
+ *
+ * If an error occurs, the constructed RustRegexSet will be `!IsValid()`.
+ *
+ * The compiled expression returned may be used from multiple threads
+ * simultaneously.
+ */
+ template <typename Patterns>
+ explicit RustRegexSet(Patterns&& aPatterns,
+ const RustRegexOptions& aOptions = {}) {
+#ifdef DEBUG
+ rure_error* error = rure_error_new();
+#else
+ rure_error* error = nullptr;
+#endif
+ AutoTArray<const uint8_t*, 4> patternPtrs;
+ AutoTArray<size_t, 4> patternSizes;
+ for (auto&& pattern : std::forward<Patterns>(aPatterns)) {
+ std::string_view view = pattern;
+ patternPtrs.AppendElement(reinterpret_cast<const uint8_t*>(view.data()));
+ patternSizes.AppendElement(view.size());
+ }
+ mPtr.reset(rure_compile_set(patternPtrs.Elements(), patternSizes.Elements(),
+ patternPtrs.Length(), aOptions.GetFlags(),
+ aOptions.GetOptions().get(), error));
+#ifdef DEBUG
+ if (!mPtr) {
+ NS_WARNING(nsPrintfCString("RustRegexSet compile failed: %s",
+ rure_error_message(error))
+ .get());
+ }
+ rure_error_free(error);
+#endif
+ }
+
+ // Check if the `RustRegexSet` object is valid.
+ bool IsValid() const { return mPtr != nullptr; }
+ explicit operator bool() const { return IsValid(); }
+
+ /*
+ * IsMatch returns true if and only if any regexes within the set
+ * match anywhere in the haystack. Once a match has been located, the
+ * matching engine will quit immediately.
+ *
+ * See the type-level comment for details on aHaystack and aStart.
+ */
+ bool IsMatch(const std::string_view& aHaystack, size_t aStart = 0) const {
+ return mPtr &&
+ rure_set_is_match(mPtr.get(),
+ reinterpret_cast<const uint8_t*>(aHaystack.data()),
+ aHaystack.size(), aStart);
+ }
+
+ struct SetMatches {
+ bool matchedAny = false;
+ nsTArray<bool> matches;
+ };
+
+ /*
+ * Matches() compares each regex in the set against the haystack and
+ * returns a list with the match result of each pattern. Match results are
+ * ordered in the same way as the regex set was compiled. For example, index 0
+ * of matches corresponds to the first pattern passed to the constructor.
+ *
+ * See the type-level comment for details on aHaystack and aStart.
+ *
+ * Only use this function if you specifically need to know which regexes
+ * matched within the set. To determine if any of the regexes matched without
+ * caring which, use IsMatch.
+ */
+ SetMatches Matches(const std::string_view& aHaystack,
+ size_t aStart = 0) const {
+ nsTArray<bool> matches;
+ matches.SetLength(Length());
+ bool any = mPtr && rure_set_matches(
+ mPtr.get(),
+ reinterpret_cast<const uint8_t*>(aHaystack.data()),
+ aHaystack.size(), aStart, matches.Elements());
+ return SetMatches{any, std::move(matches)};
+ }
+
+ /*
+ * Returns the number of patterns the regex set was compiled with.
+ */
+ size_t Length() const { return mPtr ? rure_set_len(mPtr.get()) : 0; }
+
+ private:
+ struct Deleter {
+ void operator()(rure_set* ptr) const { rure_set_free(ptr); }
+ };
+ UniquePtr<rure_set, Deleter> mPtr;
+};
+
+} // namespace mozilla
+
+#endif // mozilla_RustRegex_h