diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /js/src/irregexp/imported/regexp-ast.h | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'js/src/irregexp/imported/regexp-ast.h')
-rw-r--r-- | js/src/irregexp/imported/regexp-ast.h | 735 |
1 files changed, 735 insertions, 0 deletions
diff --git a/js/src/irregexp/imported/regexp-ast.h b/js/src/irregexp/imported/regexp-ast.h new file mode 100644 index 0000000000..997282e519 --- /dev/null +++ b/js/src/irregexp/imported/regexp-ast.h @@ -0,0 +1,735 @@ +// Copyright 2016 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_REGEXP_REGEXP_AST_H_ +#define V8_REGEXP_REGEXP_AST_H_ + +#ifdef V8_INTL_SUPPORT +#include "unicode/uniset.h" +#endif // V8_INTL_SUPPORT +#include "irregexp/RegExpShim.h" + +namespace v8 { +namespace internal { + +#define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ + VISIT(Disjunction) \ + VISIT(Alternative) \ + VISIT(Assertion) \ + VISIT(ClassRanges) \ + VISIT(ClassSetOperand) \ + VISIT(ClassSetExpression) \ + VISIT(Atom) \ + VISIT(Quantifier) \ + VISIT(Capture) \ + VISIT(Group) \ + VISIT(Lookaround) \ + VISIT(BackReference) \ + VISIT(Empty) \ + VISIT(Text) + +#define FORWARD_DECLARE(Name) class RegExp##Name; +FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE) +#undef FORWARD_DECLARE + +class RegExpCompiler; +class RegExpNode; +class RegExpTree; + +class RegExpVisitor { + public: + virtual ~RegExpVisitor() = default; +#define MAKE_CASE(Name) \ + virtual void* Visit##Name(RegExp##Name*, void* data) = 0; + FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) +#undef MAKE_CASE +}; + +// A simple closed interval. +class Interval { + public: + Interval() : from_(kNone), to_(kNone - 1) {} // '- 1' for branchless size(). + Interval(int from, int to) : from_(from), to_(to) {} + Interval Union(Interval that) { + if (that.from_ == kNone) return *this; + if (from_ == kNone) return that; + return Interval(std::min(from_, that.from_), std::max(to_, that.to_)); + } + + static Interval Empty() { return Interval(); } + + bool Contains(int value) const { return (from_ <= value) && (value <= to_); } + bool is_empty() const { return from_ == kNone; } + int from() const { return from_; } + int to() const { return to_; } + int size() const { return to_ - from_ + 1; } + + static constexpr int kNone = -1; + + private: + int from_; + int to_; +}; + +// Named standard character sets. +enum class StandardCharacterSet : char { + kWhitespace = 's', // Like /\s/. + kNotWhitespace = 'S', // Like /\S/. + kWord = 'w', // Like /\w/. + kNotWord = 'W', // Like /\W/. + kDigit = 'd', // Like /\d/. + kNotDigit = 'D', // Like /\D/. + kLineTerminator = 'n', // The inverse of /./. + kNotLineTerminator = '.', // Like /./. + kEverything = '*', // Matches every character, like /./s. +}; + +// Represents code points (with values up to 0x10FFFF) in the range from from_ +// to to_, both ends are inclusive. +class CharacterRange { + public: + CharacterRange() = default; + // For compatibility with the CHECK_OK macro. + CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT + + static inline CharacterRange Singleton(base::uc32 value) { + return CharacterRange(value, value); + } + static inline CharacterRange Range(base::uc32 from, base::uc32 to) { + DCHECK(0 <= from && to <= kMaxCodePoint); + DCHECK(static_cast<uint32_t>(from) <= static_cast<uint32_t>(to)); + return CharacterRange(from, to); + } + static inline CharacterRange Everything() { + return CharacterRange(0, kMaxCodePoint); + } + + static inline ZoneList<CharacterRange>* List(Zone* zone, + CharacterRange range) { + ZoneList<CharacterRange>* list = + zone->New<ZoneList<CharacterRange>>(1, zone); + list->Add(range, zone); + return list; + } + + // Add class escapes. Add case equivalent closure for \w and \W if necessary. + V8_EXPORT_PRIVATE static void AddClassEscape( + StandardCharacterSet standard_character_set, + ZoneList<CharacterRange>* ranges, bool add_unicode_case_equivalents, + Zone* zone); + // Add case equivalents to ranges. Only used for /i, not for /ui or /vi, as + // the semantics for unicode mode are slightly different. + // See https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch Note 4. + V8_EXPORT_PRIVATE static void AddCaseEquivalents( + Isolate* isolate, Zone* zone, ZoneList<CharacterRange>* ranges, + bool is_one_byte); + // Add case equivalent code points to ranges. Only used for /ui and /vi, not + // for /i, as the semantics for non-unicode mode are slightly different. + // See https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch Note 4. + static void AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges, + Zone* zone); + +#ifdef V8_INTL_SUPPORT + // Creates the closeOver of the given UnicodeSet, removing all + // characters/strings that can't be derived via simple case folding. + static void UnicodeSimpleCloseOver(icu::UnicodeSet& set); +#endif // V8_INTL_SUPPORT + + bool Contains(base::uc32 i) const { return from_ <= i && i <= to_; } + base::uc32 from() const { return from_; } + base::uc32 to() const { return to_; } + bool IsEverything(base::uc32 max) const { return from_ == 0 && to_ >= max; } + bool IsSingleton() const { return from_ == to_; } + + // Whether a range list is in canonical form: Ranges ordered by from value, + // and ranges non-overlapping and non-adjacent. + V8_EXPORT_PRIVATE static bool IsCanonical( + const ZoneList<CharacterRange>* ranges); + // Convert range list to canonical form. The characters covered by the ranges + // will still be the same, but no character is in more than one range, and + // adjacent ranges are merged. The resulting list may be shorter than the + // original, but cannot be longer. + static void Canonicalize(ZoneList<CharacterRange>* ranges); + // Negate the contents of a character range in canonical form. + static void Negate(const ZoneList<CharacterRange>* src, + ZoneList<CharacterRange>* dst, Zone* zone); + // Intersect the contents of two character ranges in canonical form. + static void Intersect(const ZoneList<CharacterRange>* lhs, + const ZoneList<CharacterRange>* rhs, + ZoneList<CharacterRange>* dst, Zone* zone); + // Subtract the contents of |to_remove| from the contents of |src|. + static void Subtract(const ZoneList<CharacterRange>* src, + const ZoneList<CharacterRange>* to_remove, + ZoneList<CharacterRange>* dst, Zone* zone); + // Remove all ranges outside the one-byte range. + static void ClampToOneByte(ZoneList<CharacterRange>* ranges); + // Checks if two ranges (both need to be canonical) are equal. + static bool Equals(const ZoneList<CharacterRange>* lhs, + const ZoneList<CharacterRange>* rhs); + + private: + CharacterRange(base::uc32 from, base::uc32 to) : from_(from), to_(to) {} + + static constexpr int kMaxCodePoint = 0x10ffff; + + base::uc32 from_ = 0; + base::uc32 to_ = 0; +}; + +inline bool operator==(const CharacterRange& lhs, const CharacterRange& rhs) { + return lhs.from() == rhs.from() && lhs.to() == rhs.to(); +} +inline bool operator!=(const CharacterRange& lhs, const CharacterRange& rhs) { + return !operator==(lhs, rhs); +} + +#define DECL_BOILERPLATE(Name) \ + void* Accept(RegExpVisitor* visitor, void* data) override; \ + RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) \ + override; \ + RegExp##Name* As##Name() override; \ + bool Is##Name() override + +class RegExpTree : public ZoneObject { + public: + static const int kInfinity = kMaxInt; + virtual ~RegExpTree() = default; + virtual void* Accept(RegExpVisitor* visitor, void* data) = 0; + virtual RegExpNode* ToNode(RegExpCompiler* compiler, + RegExpNode* on_success) = 0; + virtual bool IsTextElement() { return false; } + virtual bool IsAnchoredAtStart() { return false; } + virtual bool IsAnchoredAtEnd() { return false; } + virtual int min_match() = 0; + virtual int max_match() = 0; + // Returns the interval of registers used for captures within this + // expression. + virtual Interval CaptureRegisters() { return Interval::Empty(); } + virtual void AppendToText(RegExpText* text, Zone* zone); + V8_EXPORT_PRIVATE std::ostream& Print(std::ostream& os, Zone* zone); +#define MAKE_ASTYPE(Name) \ + virtual RegExp##Name* As##Name(); \ + virtual bool Is##Name(); + FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE) +#undef MAKE_ASTYPE +}; + + +class RegExpDisjunction final : public RegExpTree { + public: + explicit RegExpDisjunction(ZoneList<RegExpTree*>* alternatives); + + DECL_BOILERPLATE(Disjunction); + + Interval CaptureRegisters() override; + bool IsAnchoredAtStart() override; + bool IsAnchoredAtEnd() override; + int min_match() override { return min_match_; } + int max_match() override { return max_match_; } + ZoneList<RegExpTree*>* alternatives() const { return alternatives_; } + + private: + bool SortConsecutiveAtoms(RegExpCompiler* compiler); + void RationalizeConsecutiveAtoms(RegExpCompiler* compiler); + void FixSingleCharacterDisjunctions(RegExpCompiler* compiler); + ZoneList<RegExpTree*>* alternatives_; + int min_match_; + int max_match_; +}; + + +class RegExpAlternative final : public RegExpTree { + public: + explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes); + + DECL_BOILERPLATE(Alternative); + + Interval CaptureRegisters() override; + bool IsAnchoredAtStart() override; + bool IsAnchoredAtEnd() override; + int min_match() override { return min_match_; } + int max_match() override { return max_match_; } + ZoneList<RegExpTree*>* nodes() const { return nodes_; } + + private: + ZoneList<RegExpTree*>* nodes_; + int min_match_; + int max_match_; +}; + + +class RegExpAssertion final : public RegExpTree { + public: + enum class Type { + START_OF_LINE = 0, + START_OF_INPUT = 1, + END_OF_LINE = 2, + END_OF_INPUT = 3, + BOUNDARY = 4, + NON_BOUNDARY = 5, + LAST_ASSERTION_TYPE = NON_BOUNDARY, + }; + explicit RegExpAssertion(Type type) : assertion_type_(type) {} + + DECL_BOILERPLATE(Assertion); + + bool IsAnchoredAtStart() override; + bool IsAnchoredAtEnd() override; + int min_match() override { return 0; } + int max_match() override { return 0; } + Type assertion_type() const { return assertion_type_; } + + private: + const Type assertion_type_; +}; + +class CharacterSet final { + public: + explicit CharacterSet(StandardCharacterSet standard_set_type) + : standard_set_type_(standard_set_type) {} + explicit CharacterSet(ZoneList<CharacterRange>* ranges) : ranges_(ranges) {} + + ZoneList<CharacterRange>* ranges(Zone* zone); + StandardCharacterSet standard_set_type() const { + return standard_set_type_.value(); + } + void set_standard_set_type(StandardCharacterSet standard_set_type) { + standard_set_type_ = standard_set_type; + } + bool is_standard() const { return standard_set_type_.has_value(); } + V8_EXPORT_PRIVATE void Canonicalize(); + + private: + ZoneList<CharacterRange>* ranges_ = nullptr; + base::Optional<StandardCharacterSet> standard_set_type_; +}; + +class RegExpClassRanges final : public RegExpTree { + public: + // NEGATED: The character class is negated and should match everything but + // the specified ranges. + // CONTAINS_SPLIT_SURROGATE: The character class contains part of a split + // surrogate and should not be unicode-desugared (crbug.com/641091). + enum Flag { + NEGATED = 1 << 0, + CONTAINS_SPLIT_SURROGATE = 1 << 1, + }; + using ClassRangesFlags = base::Flags<Flag>; + + RegExpClassRanges(Zone* zone, ZoneList<CharacterRange>* ranges, + ClassRangesFlags class_ranges_flags = ClassRangesFlags()) + : set_(ranges), class_ranges_flags_(class_ranges_flags) { + // Convert the empty set of ranges to the negated Everything() range. + if (ranges->is_empty()) { + ranges->Add(CharacterRange::Everything(), zone); + class_ranges_flags_ ^= NEGATED; + } + } + explicit RegExpClassRanges(StandardCharacterSet standard_set_type) + : set_(standard_set_type), class_ranges_flags_() {} + + DECL_BOILERPLATE(ClassRanges); + + bool IsTextElement() override { return true; } + int min_match() override { return 1; } + // The character class may match two code units for unicode regexps. + // TODO(yangguo): we should split this class for usage in TextElement, and + // make max_match() dependent on the character class content. + int max_match() override { return 2; } + + void AppendToText(RegExpText* text, Zone* zone) override; + + // TODO(lrn): Remove need for complex version if is_standard that + // recognizes a mangled standard set and just do { return set_.is_special(); } + bool is_standard(Zone* zone); + // Returns a value representing the standard character set if is_standard() + // returns true. + StandardCharacterSet standard_type() const { + return set_.standard_set_type(); + } + + CharacterSet character_set() const { return set_; } + ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); } + + bool is_negated() const { return (class_ranges_flags_ & NEGATED) != 0; } + bool contains_split_surrogate() const { + return (class_ranges_flags_ & CONTAINS_SPLIT_SURROGATE) != 0; + } + + private: + CharacterSet set_; + ClassRangesFlags class_ranges_flags_; +}; + +struct CharacterClassStringLess { + bool operator()(const base::Vector<const base::uc32>& lhs, + const base::Vector<const base::uc32>& rhs) const { + // Longer strings first so we generate matches for the largest string + // possible. + if (lhs.length() != rhs.length()) { + return lhs.length() > rhs.length(); + } + for (int i = 0; i < lhs.length(); i++) { + if (lhs[i] != rhs[i]) { + return lhs[i] < rhs[i]; + } + } + return false; + } +}; + +// A type used for strings as part of character classes (only possible in +// unicode sets mode). +// We use a ZoneMap instead of an UnorderedZoneMap because we need to match +// the longest alternatives first. By using a ZoneMap with the custom comparator +// we can avoid sorting before assembling the code. +// Strings are likely short (the largest string in current unicode properties +// consists of 10 code points). +using CharacterClassStrings = ZoneMap<base::Vector<const base::uc32>, + RegExpTree*, CharacterClassStringLess>; + +// TODO(pthier): If we are sure we don't want to use icu::UnicodeSets +// (performance evaluation pending), this class can be merged with +// RegExpClassRanges. +class RegExpClassSetOperand final : public RegExpTree { + public: + RegExpClassSetOperand(ZoneList<CharacterRange>* ranges, + CharacterClassStrings* strings); + + DECL_BOILERPLATE(ClassSetOperand); + + bool IsTextElement() override { return true; } + int min_match() override { return min_match_; } + int max_match() override { return max_match_; } + + void Union(RegExpClassSetOperand* other, Zone* zone); + void Intersect(RegExpClassSetOperand* other, + ZoneList<CharacterRange>* temp_ranges, Zone* zone); + void Subtract(RegExpClassSetOperand* other, + ZoneList<CharacterRange>* temp_ranges, Zone* zone); + + bool has_strings() const { return strings_ != nullptr && !strings_->empty(); } + ZoneList<CharacterRange>* ranges() { return ranges_; } + CharacterClassStrings* strings() { + DCHECK_NOT_NULL(strings_); + return strings_; + } + + private: + ZoneList<CharacterRange>* ranges_; + CharacterClassStrings* strings_; + int min_match_; + int max_match_; +}; + +class RegExpClassSetExpression final : public RegExpTree { + public: + enum class OperationType { kUnion, kIntersection, kSubtraction }; + + RegExpClassSetExpression(OperationType op, bool is_negated, + bool may_contain_strings, + ZoneList<RegExpTree*>* operands); + + DECL_BOILERPLATE(ClassSetExpression); + + // Create an empty class set expression (matches everything if |is_negated|, + // nothing otherwise). + static RegExpClassSetExpression* Empty(Zone* zone, bool is_negated); + + bool IsTextElement() override { return true; } + int min_match() override { return 0; } + int max_match() override { return max_match_; } + + OperationType operation() const { return operation_; } + bool is_negated() const { return is_negated_; } + bool may_contain_strings() const { return may_contain_strings_; } + const ZoneList<RegExpTree*>* operands() const { return operands_; } + ZoneList<RegExpTree*>* operands() { return operands_; } + + private: + // Recursively evaluates the tree rooted at |root|, computing the valid + // CharacterRanges and strings after applying all set operations. + // The original tree will be modified by this method, so don't store pointers + // to inner nodes of the tree somewhere else! + // Modifying the tree in-place saves memory and speeds up multiple calls of + // the method (e.g. when unrolling quantifiers). + // |temp_ranges| is used for intermediate results, passed as parameter to + // avoid allocating new lists all the time. + static RegExpClassSetOperand* ComputeExpression( + RegExpTree* root, ZoneList<CharacterRange>* temp_ranges, Zone* zone); + + const OperationType operation_; + const bool is_negated_; + const bool may_contain_strings_; + ZoneList<RegExpTree*>* operands_ = nullptr; + int max_match_; +}; + +class RegExpAtom final : public RegExpTree { + public: + explicit RegExpAtom(base::Vector<const base::uc16> data) : data_(data) {} + + DECL_BOILERPLATE(Atom); + + bool IsTextElement() override { return true; } + int min_match() override { return data_.length(); } + int max_match() override { return data_.length(); } + void AppendToText(RegExpText* text, Zone* zone) override; + + base::Vector<const base::uc16> data() const { return data_; } + int length() const { return data_.length(); } + + private: + base::Vector<const base::uc16> data_; +}; + +class TextElement final { + public: + enum TextType { ATOM, CLASS_RANGES }; + + static TextElement Atom(RegExpAtom* atom); + static TextElement ClassRanges(RegExpClassRanges* class_ranges); + + int cp_offset() const { return cp_offset_; } + void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; } + int length() const; + + TextType text_type() const { return text_type_; } + + RegExpTree* tree() const { return tree_; } + + RegExpAtom* atom() const { + DCHECK(text_type() == ATOM); + return reinterpret_cast<RegExpAtom*>(tree()); + } + + RegExpClassRanges* class_ranges() const { + DCHECK(text_type() == CLASS_RANGES); + return reinterpret_cast<RegExpClassRanges*>(tree()); + } + + private: + TextElement(TextType text_type, RegExpTree* tree) + : cp_offset_(-1), text_type_(text_type), tree_(tree) {} + + int cp_offset_; + TextType text_type_; + RegExpTree* tree_; +}; + +class RegExpText final : public RegExpTree { + public: + explicit RegExpText(Zone* zone) : elements_(2, zone) {} + + DECL_BOILERPLATE(Text); + + bool IsTextElement() override { return true; } + int min_match() override { return length_; } + int max_match() override { return length_; } + void AppendToText(RegExpText* text, Zone* zone) override; + void AddElement(TextElement elm, Zone* zone) { + elements_.Add(elm, zone); + length_ += elm.length(); + } + ZoneList<TextElement>* elements() { return &elements_; } + + private: + ZoneList<TextElement> elements_; + int length_ = 0; +}; + + +class RegExpQuantifier final : public RegExpTree { + public: + enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE }; + RegExpQuantifier(int min, int max, QuantifierType type, RegExpTree* body) + : body_(body), + min_(min), + max_(max), + quantifier_type_(type) { + if (min > 0 && body->min_match() > kInfinity / min) { + min_match_ = kInfinity; + } else { + min_match_ = min * body->min_match(); + } + if (max > 0 && body->max_match() > kInfinity / max) { + max_match_ = kInfinity; + } else { + max_match_ = max * body->max_match(); + } + } + + DECL_BOILERPLATE(Quantifier); + + static RegExpNode* ToNode(int min, int max, bool is_greedy, RegExpTree* body, + RegExpCompiler* compiler, RegExpNode* on_success, + bool not_at_start = false); + Interval CaptureRegisters() override; + int min_match() override { return min_match_; } + int max_match() override { return max_match_; } + int min() const { return min_; } + int max() const { return max_; } + QuantifierType quantifier_type() const { return quantifier_type_; } + bool is_possessive() const { return quantifier_type_ == POSSESSIVE; } + bool is_non_greedy() const { return quantifier_type_ == NON_GREEDY; } + bool is_greedy() const { return quantifier_type_ == GREEDY; } + RegExpTree* body() const { return body_; } + + private: + RegExpTree* body_; + int min_; + int max_; + int min_match_; + int max_match_; + QuantifierType quantifier_type_; +}; + + +class RegExpCapture final : public RegExpTree { + public: + explicit RegExpCapture(int index) + : body_(nullptr), + index_(index), + min_match_(0), + max_match_(0), + name_(nullptr) {} + + DECL_BOILERPLATE(Capture); + + static RegExpNode* ToNode(RegExpTree* body, int index, + RegExpCompiler* compiler, RegExpNode* on_success); + bool IsAnchoredAtStart() override; + bool IsAnchoredAtEnd() override; + Interval CaptureRegisters() override; + int min_match() override { return min_match_; } + int max_match() override { return max_match_; } + RegExpTree* body() { return body_; } + void set_body(RegExpTree* body) { + body_ = body; + min_match_ = body->min_match(); + max_match_ = body->max_match(); + } + int index() const { return index_; } + const ZoneVector<base::uc16>* name() const { return name_; } + void set_name(const ZoneVector<base::uc16>* name) { name_ = name; } + static int StartRegister(int index) { return index * 2; } + static int EndRegister(int index) { return index * 2 + 1; } + + private: + RegExpTree* body_ = nullptr; + int index_; + int min_match_ = 0; + int max_match_ = 0; + const ZoneVector<base::uc16>* name_ = nullptr; +}; + +class RegExpGroup final : public RegExpTree { + public: + explicit RegExpGroup(RegExpTree* body) + : body_(body), + min_match_(body->min_match()), + max_match_(body->max_match()) {} + + DECL_BOILERPLATE(Group); + + bool IsAnchoredAtStart() override { return body_->IsAnchoredAtStart(); } + bool IsAnchoredAtEnd() override { return body_->IsAnchoredAtEnd(); } + int min_match() override { return min_match_; } + int max_match() override { return max_match_; } + Interval CaptureRegisters() override { return body_->CaptureRegisters(); } + RegExpTree* body() const { return body_; } + + private: + RegExpTree* body_; + int min_match_; + int max_match_; +}; + +class RegExpLookaround final : public RegExpTree { + public: + enum Type { LOOKAHEAD, LOOKBEHIND }; + + RegExpLookaround(RegExpTree* body, bool is_positive, int capture_count, + int capture_from, Type type) + : body_(body), + is_positive_(is_positive), + capture_count_(capture_count), + capture_from_(capture_from), + type_(type) {} + + DECL_BOILERPLATE(Lookaround); + + Interval CaptureRegisters() override; + bool IsAnchoredAtStart() override; + int min_match() override { return 0; } + int max_match() override { return 0; } + RegExpTree* body() const { return body_; } + bool is_positive() const { return is_positive_; } + int capture_count() const { return capture_count_; } + int capture_from() const { return capture_from_; } + Type type() const { return type_; } + + class Builder { + public: + Builder(bool is_positive, RegExpNode* on_success, + int stack_pointer_register, int position_register, + int capture_register_count = 0, int capture_register_start = 0); + RegExpNode* on_match_success() const { return on_match_success_; } + RegExpNode* ForMatch(RegExpNode* match); + + private: + bool is_positive_; + RegExpNode* on_match_success_; + RegExpNode* on_success_; + int stack_pointer_register_; + int position_register_; + }; + + private: + RegExpTree* body_; + bool is_positive_; + int capture_count_; + int capture_from_; + Type type_; +}; + + +class RegExpBackReference final : public RegExpTree { + public: + explicit RegExpBackReference(RegExpFlags flags) : flags_(flags) {} + RegExpBackReference(RegExpCapture* capture, RegExpFlags flags) + : capture_(capture), flags_(flags) {} + + DECL_BOILERPLATE(BackReference); + + int min_match() override { return 0; } + // The back reference may be recursive, e.g. /(\2)(\1)/. To avoid infinite + // recursion, we give up. Ignorance is bliss. + int max_match() override { return kInfinity; } + int index() const { return capture_->index(); } + RegExpCapture* capture() const { return capture_; } + void set_capture(RegExpCapture* capture) { capture_ = capture; } + const ZoneVector<base::uc16>* name() const { return name_; } + void set_name(const ZoneVector<base::uc16>* name) { name_ = name; } + + private: + RegExpCapture* capture_ = nullptr; + const ZoneVector<base::uc16>* name_ = nullptr; + const RegExpFlags flags_; +}; + + +class RegExpEmpty final : public RegExpTree { + public: + DECL_BOILERPLATE(Empty); + int min_match() override { return 0; } + int max_match() override { return 0; } +}; + +} // namespace internal +} // namespace v8 + +#undef DECL_BOILERPLATE + +#endif // V8_REGEXP_REGEXP_AST_H_ |