summaryrefslogtreecommitdiffstats
path: root/js/src/irregexp/imported/regexp-compiler-tonode.cc
diff options
context:
space:
mode:
Diffstat (limited to 'js/src/irregexp/imported/regexp-compiler-tonode.cc')
-rw-r--r--js/src/irregexp/imported/regexp-compiler-tonode.cc99
1 files changed, 59 insertions, 40 deletions
diff --git a/js/src/irregexp/imported/regexp-compiler-tonode.cc b/js/src/irregexp/imported/regexp-compiler-tonode.cc
index f5087bdb08..b1340123d8 100644
--- a/js/src/irregexp/imported/regexp-compiler-tonode.cc
+++ b/js/src/irregexp/imported/regexp-compiler-tonode.cc
@@ -3,7 +3,6 @@
// found in the LICENSE file.
#include "irregexp/imported/regexp-compiler.h"
-
#include "irregexp/imported/regexp.h"
#ifdef V8_INTL_SUPPORT
@@ -418,27 +417,6 @@ RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler,
} // namespace
-#ifdef V8_INTL_SUPPORT
-// static
-void CharacterRange::UnicodeSimpleCloseOver(icu::UnicodeSet& set) {
- // Remove characters for which closeOver() adds full-case-folding equivalents
- // because we should work only with simple case folding mappings.
- icu::UnicodeSet non_simple = icu::UnicodeSet(set);
- non_simple.retainAll(RegExpCaseFolding::UnicodeNonSimpleCloseOverSet());
- set.removeAll(non_simple);
-
- set.closeOver(USET_CASE_INSENSITIVE);
- // Full case folding maps single characters to multiple characters.
- // Those are represented as strings in the set. Remove them so that
- // we end up with only simple and common case mappings.
- set.removeAllStrings();
-
- // Add characters that have non-simple case foldings again (they match
- // themselves).
- set.addAll(non_simple);
-}
-#endif // V8_INTL_SUPPORT
-
// static
void CharacterRange::AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges,
Zone* zone) {
@@ -460,8 +438,7 @@ void CharacterRange::AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges,
}
// Clear the ranges list without freeing the backing store.
ranges->Rewind(0);
-
- UnicodeSimpleCloseOver(set);
+ set.closeOver(USET_SIMPLE_CASE_INSENSITIVE);
for (int i = 0; i < set.getRangeCount(); i++) {
ranges->Add(Range(set.getRangeStart(i), set.getRangeEnd(i)), zone);
}
@@ -476,7 +453,9 @@ RegExpNode* RegExpClassRanges::ToNode(RegExpCompiler* compiler,
Zone* const zone = compiler->zone();
ZoneList<CharacterRange>* ranges = this->ranges(zone);
- if (NeedsUnicodeCaseEquivalents(compiler->flags())) {
+ const bool needs_case_folding =
+ NeedsUnicodeCaseEquivalents(compiler->flags()) && !is_case_folded();
+ if (needs_case_folding) {
CharacterRange::AddUnicodeCaseEquivalents(ranges, zone);
}
@@ -487,8 +466,7 @@ RegExpNode* RegExpClassRanges::ToNode(RegExpCompiler* compiler,
if (is_negated()) {
// With /v, character classes are never negated.
- // TODO(v8:11935): Change permalink once proposal is in stage 4.
- // https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#sec-compileatom
+ // https://tc39.es/ecma262/#sec-compileatom
// Atom :: CharacterClass
// 4. Assert: cc.[[Invert]] is false.
// Instead the complement is created when evaluating the class set.
@@ -561,7 +539,12 @@ RegExpNode* RegExpClassSetOperand::ToNode(RegExpCompiler* compiler,
}
}
if (!ranges()->is_empty()) {
- alternatives->Add(zone->template New<RegExpClassRanges>(zone, ranges()),
+ // In unicode sets mode case folding has to be done at precise locations
+ // (e.g. before building complements).
+ // It is therefore the parsers responsibility to case fold (sub-) ranges
+ // before creating ClassSetOperands.
+ alternatives->Add(zone->template New<RegExpClassRanges>(
+ zone, ranges(), RegExpClassRanges::IS_CASE_FOLDED),
zone);
}
if (empty_string != nullptr) {
@@ -1034,9 +1017,8 @@ namespace {
// \B to (?<=\w)(?=\w)|(?<=\W)(?=\W)
RegExpNode* BoundaryAssertionAsLookaround(RegExpCompiler* compiler,
RegExpNode* on_success,
- RegExpAssertion::Type type,
- RegExpFlags flags) {
- CHECK(NeedsUnicodeCaseEquivalents(flags));
+ RegExpAssertion::Type type) {
+ CHECK(NeedsUnicodeCaseEquivalents(compiler->flags()));
Zone* zone = compiler->zone();
ZoneList<CharacterRange>* word_range =
zone->New<ZoneList<CharacterRange>>(2, zone);
@@ -1080,14 +1062,13 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
return AssertionNode::AtStart(on_success);
case Type::BOUNDARY:
return NeedsUnicodeCaseEquivalents(compiler->flags())
- ? BoundaryAssertionAsLookaround(
- compiler, on_success, Type::BOUNDARY, compiler->flags())
+ ? BoundaryAssertionAsLookaround(compiler, on_success,
+ Type::BOUNDARY)
: AssertionNode::AtBoundary(on_success);
case Type::NON_BOUNDARY:
return NeedsUnicodeCaseEquivalents(compiler->flags())
? BoundaryAssertionAsLookaround(compiler, on_success,
- Type::NON_BOUNDARY,
- compiler->flags())
+ Type::NON_BOUNDARY)
: AssertionNode::AtNonBoundary(on_success);
case Type::END_OF_INPUT:
return AssertionNode::AtEnd(on_success);
@@ -1130,10 +1111,17 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
- return compiler->zone()->New<BackReferenceNode>(
- RegExpCapture::StartRegister(index()),
- RegExpCapture::EndRegister(index()), flags_, compiler->read_backward(),
- on_success);
+ RegExpNode* backref_node = on_success;
+ // Only one of the captures in the list can actually match. Since
+ // back-references to unmatched captures are treated as empty, we can simply
+ // create back-references to all possible captures.
+ for (auto capture : *captures()) {
+ backref_node = compiler->zone()->New<BackReferenceNode>(
+ RegExpCapture::StartRegister(capture->index()),
+ RegExpCapture::EndRegister(capture->index()), compiler->read_backward(),
+ backref_node);
+ }
+ return backref_node;
}
RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
@@ -1141,9 +1129,40 @@ RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
return on_success;
}
+namespace {
+
+class V8_NODISCARD ModifiersScope {
+ public:
+ ModifiersScope(RegExpCompiler* compiler, RegExpFlags flags)
+ : compiler_(compiler), previous_flags_(compiler->flags()) {
+ compiler->set_flags(flags);
+ }
+ ~ModifiersScope() { compiler_->set_flags(previous_flags_); }
+
+ private:
+ RegExpCompiler* compiler_;
+ const RegExpFlags previous_flags_;
+};
+
+} // namespace
+
RegExpNode* RegExpGroup::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
- return body_->ToNode(compiler, on_success);
+ // If no flags are modified, simply convert and return the body.
+ if (flags() == compiler->flags()) {
+ return body_->ToNode(compiler, on_success);
+ }
+ // Reset flags for successor node.
+ const RegExpFlags old_flags = compiler->flags();
+ on_success = ActionNode::ModifyFlags(old_flags, on_success);
+
+ // Convert body using modifier.
+ ModifiersScope modifiers_scope(compiler, flags());
+ RegExpNode* body = body_->ToNode(compiler, on_success);
+
+ // Wrap body into modifier node.
+ RegExpNode* modified_body = ActionNode::ModifyFlags(flags(), body);
+ return modified_body;
}
RegExpLookaround::Builder::Builder(bool is_positive, RegExpNode* on_success,