diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 01:14:29 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 01:14:29 +0000 |
commit | fbaf0bb26397aa498eb9156f06d5a6fe34dd7dd8 (patch) | |
tree | 4c1ccaf5486d4f2009f9a338a98a83e886e29c97 /js/src/irregexp/imported/regexp-compiler-tonode.cc | |
parent | Releasing progress-linux version 124.0.1-1~progress7.99u1. (diff) | |
download | firefox-fbaf0bb26397aa498eb9156f06d5a6fe34dd7dd8.tar.xz firefox-fbaf0bb26397aa498eb9156f06d5a6fe34dd7dd8.zip |
Merging upstream version 125.0.1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'js/src/irregexp/imported/regexp-compiler-tonode.cc')
-rw-r--r-- | js/src/irregexp/imported/regexp-compiler-tonode.cc | 99 |
1 files changed, 59 insertions, 40 deletions
diff --git a/js/src/irregexp/imported/regexp-compiler-tonode.cc b/js/src/irregexp/imported/regexp-compiler-tonode.cc index f5087bdb08..b1340123d8 100644 --- a/js/src/irregexp/imported/regexp-compiler-tonode.cc +++ b/js/src/irregexp/imported/regexp-compiler-tonode.cc @@ -3,7 +3,6 @@ // found in the LICENSE file. #include "irregexp/imported/regexp-compiler.h" - #include "irregexp/imported/regexp.h" #ifdef V8_INTL_SUPPORT @@ -418,27 +417,6 @@ RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler, } // namespace -#ifdef V8_INTL_SUPPORT -// static -void CharacterRange::UnicodeSimpleCloseOver(icu::UnicodeSet& set) { - // Remove characters for which closeOver() adds full-case-folding equivalents - // because we should work only with simple case folding mappings. - icu::UnicodeSet non_simple = icu::UnicodeSet(set); - non_simple.retainAll(RegExpCaseFolding::UnicodeNonSimpleCloseOverSet()); - set.removeAll(non_simple); - - set.closeOver(USET_CASE_INSENSITIVE); - // Full case folding maps single characters to multiple characters. - // Those are represented as strings in the set. Remove them so that - // we end up with only simple and common case mappings. - set.removeAllStrings(); - - // Add characters that have non-simple case foldings again (they match - // themselves). - set.addAll(non_simple); -} -#endif // V8_INTL_SUPPORT - // static void CharacterRange::AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges, Zone* zone) { @@ -460,8 +438,7 @@ void CharacterRange::AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges, } // Clear the ranges list without freeing the backing store. ranges->Rewind(0); - - UnicodeSimpleCloseOver(set); + set.closeOver(USET_SIMPLE_CASE_INSENSITIVE); for (int i = 0; i < set.getRangeCount(); i++) { ranges->Add(Range(set.getRangeStart(i), set.getRangeEnd(i)), zone); } @@ -476,7 +453,9 @@ RegExpNode* RegExpClassRanges::ToNode(RegExpCompiler* compiler, Zone* const zone = compiler->zone(); ZoneList<CharacterRange>* ranges = this->ranges(zone); - if (NeedsUnicodeCaseEquivalents(compiler->flags())) { + const bool needs_case_folding = + NeedsUnicodeCaseEquivalents(compiler->flags()) && !is_case_folded(); + if (needs_case_folding) { CharacterRange::AddUnicodeCaseEquivalents(ranges, zone); } @@ -487,8 +466,7 @@ RegExpNode* RegExpClassRanges::ToNode(RegExpCompiler* compiler, if (is_negated()) { // With /v, character classes are never negated. - // TODO(v8:11935): Change permalink once proposal is in stage 4. - // https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#sec-compileatom + // https://tc39.es/ecma262/#sec-compileatom // Atom :: CharacterClass // 4. Assert: cc.[[Invert]] is false. // Instead the complement is created when evaluating the class set. @@ -561,7 +539,12 @@ RegExpNode* RegExpClassSetOperand::ToNode(RegExpCompiler* compiler, } } if (!ranges()->is_empty()) { - alternatives->Add(zone->template New<RegExpClassRanges>(zone, ranges()), + // In unicode sets mode case folding has to be done at precise locations + // (e.g. before building complements). + // It is therefore the parsers responsibility to case fold (sub-) ranges + // before creating ClassSetOperands. + alternatives->Add(zone->template New<RegExpClassRanges>( + zone, ranges(), RegExpClassRanges::IS_CASE_FOLDED), zone); } if (empty_string != nullptr) { @@ -1034,9 +1017,8 @@ namespace { // \B to (?<=\w)(?=\w)|(?<=\W)(?=\W) RegExpNode* BoundaryAssertionAsLookaround(RegExpCompiler* compiler, RegExpNode* on_success, - RegExpAssertion::Type type, - RegExpFlags flags) { - CHECK(NeedsUnicodeCaseEquivalents(flags)); + RegExpAssertion::Type type) { + CHECK(NeedsUnicodeCaseEquivalents(compiler->flags())); Zone* zone = compiler->zone(); ZoneList<CharacterRange>* word_range = zone->New<ZoneList<CharacterRange>>(2, zone); @@ -1080,14 +1062,13 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, return AssertionNode::AtStart(on_success); case Type::BOUNDARY: return NeedsUnicodeCaseEquivalents(compiler->flags()) - ? BoundaryAssertionAsLookaround( - compiler, on_success, Type::BOUNDARY, compiler->flags()) + ? BoundaryAssertionAsLookaround(compiler, on_success, + Type::BOUNDARY) : AssertionNode::AtBoundary(on_success); case Type::NON_BOUNDARY: return NeedsUnicodeCaseEquivalents(compiler->flags()) ? BoundaryAssertionAsLookaround(compiler, on_success, - Type::NON_BOUNDARY, - compiler->flags()) + Type::NON_BOUNDARY) : AssertionNode::AtNonBoundary(on_success); case Type::END_OF_INPUT: return AssertionNode::AtEnd(on_success); @@ -1130,10 +1111,17 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) { - return compiler->zone()->New<BackReferenceNode>( - RegExpCapture::StartRegister(index()), - RegExpCapture::EndRegister(index()), flags_, compiler->read_backward(), - on_success); + RegExpNode* backref_node = on_success; + // Only one of the captures in the list can actually match. Since + // back-references to unmatched captures are treated as empty, we can simply + // create back-references to all possible captures. + for (auto capture : *captures()) { + backref_node = compiler->zone()->New<BackReferenceNode>( + RegExpCapture::StartRegister(capture->index()), + RegExpCapture::EndRegister(capture->index()), compiler->read_backward(), + backref_node); + } + return backref_node; } RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, @@ -1141,9 +1129,40 @@ RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, return on_success; } +namespace { + +class V8_NODISCARD ModifiersScope { + public: + ModifiersScope(RegExpCompiler* compiler, RegExpFlags flags) + : compiler_(compiler), previous_flags_(compiler->flags()) { + compiler->set_flags(flags); + } + ~ModifiersScope() { compiler_->set_flags(previous_flags_); } + + private: + RegExpCompiler* compiler_; + const RegExpFlags previous_flags_; +}; + +} // namespace + RegExpNode* RegExpGroup::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) { - return body_->ToNode(compiler, on_success); + // If no flags are modified, simply convert and return the body. + if (flags() == compiler->flags()) { + return body_->ToNode(compiler, on_success); + } + // Reset flags for successor node. + const RegExpFlags old_flags = compiler->flags(); + on_success = ActionNode::ModifyFlags(old_flags, on_success); + + // Convert body using modifier. + ModifiersScope modifiers_scope(compiler, flags()); + RegExpNode* body = body_->ToNode(compiler, on_success); + + // Wrap body into modifier node. + RegExpNode* modified_body = ActionNode::ModifyFlags(flags(), body); + return modified_body; } RegExpLookaround::Builder::Builder(bool is_positive, RegExpNode* on_success, |