1 files changed, 59 insertions, 40 deletions
diff --git a/js/src/irregexp/imported/regexp-compiler-tonode.cc b/js/src/irregexp/imported/regexp-compiler-tonode.cc
index f5087bdb08..b1340123d8 100644
--- a/js/src/irregexp/imported/regexp-compiler-tonode.cc
+++ b/js/src/irregexp/imported/regexp-compiler-tonode.cc
@@ -3,7 +3,6 @@
 // found in the LICENSE file.
 
 #include "irregexp/imported/regexp-compiler.h"
-
 #include "irregexp/imported/regexp.h"
 
 #ifdef V8_INTL_SUPPORT
@@ -418,27 +417,6 @@ RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler,
 
 }  // namespace
 
-#ifdef V8_INTL_SUPPORT
-// static
-void CharacterRange::UnicodeSimpleCloseOver(icu::UnicodeSet& set) {
-  // Remove characters for which closeOver() adds full-case-folding equivalents
-  // because we should work only with simple case folding mappings.
-  icu::UnicodeSet non_simple = icu::UnicodeSet(set);
-  non_simple.retainAll(RegExpCaseFolding::UnicodeNonSimpleCloseOverSet());
-  set.removeAll(non_simple);
-
-  set.closeOver(USET_CASE_INSENSITIVE);
-  // Full case folding maps single characters to multiple characters.
-  // Those are represented as strings in the set. Remove them so that
-  // we end up with only simple and common case mappings.
-  set.removeAllStrings();
-
-  // Add characters that have non-simple case foldings again (they match
-  // themselves).
-  set.addAll(non_simple);
-}
-#endif  // V8_INTL_SUPPORT
-
 // static
 void CharacterRange::AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges,
                                                Zone* zone) {
@@ -460,8 +438,7 @@ void CharacterRange::AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges,
   }
   // Clear the ranges list without freeing the backing store.
   ranges->Rewind(0);
-
-  UnicodeSimpleCloseOver(set);
+  set.closeOver(USET_SIMPLE_CASE_INSENSITIVE);
   for (int i = 0; i < set.getRangeCount(); i++) {
     ranges->Add(Range(set.getRangeStart(i), set.getRangeEnd(i)), zone);
   }
@@ -476,7 +453,9 @@ RegExpNode* RegExpClassRanges::ToNode(RegExpCompiler* compiler,
   Zone* const zone = compiler->zone();
   ZoneList<CharacterRange>* ranges = this->ranges(zone);
 
-  if (NeedsUnicodeCaseEquivalents(compiler->flags())) {
+  const bool needs_case_folding =
+      NeedsUnicodeCaseEquivalents(compiler->flags()) && !is_case_folded();
+  if (needs_case_folding) {
     CharacterRange::AddUnicodeCaseEquivalents(ranges, zone);
   }
 
@@ -487,8 +466,7 @@ RegExpNode* RegExpClassRanges::ToNode(RegExpCompiler* compiler,
 
   if (is_negated()) {
     // With /v, character classes are never negated.
-    // TODO(v8:11935): Change permalink once proposal is in stage 4.
-    // https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#sec-compileatom
+    // https://tc39.es/ecma262/#sec-compileatom
     // Atom :: CharacterClass
     //   4. Assert: cc.[[Invert]] is false.
     // Instead the complement is created when evaluating the class set.
@@ -561,7 +539,12 @@ RegExpNode* RegExpClassSetOperand::ToNode(RegExpCompiler* compiler,
     }
   }
   if (!ranges()->is_empty()) {
-    alternatives->Add(zone->template New<RegExpClassRanges>(zone, ranges()),
+    // In unicode sets mode case folding has to be done at precise locations
+    // (e.g. before building complements).
+    // It is therefore the parsers responsibility to case fold (sub-) ranges
+    // before creating ClassSetOperands.
+    alternatives->Add(zone->template New<RegExpClassRanges>(
+                          zone, ranges(), RegExpClassRanges::IS_CASE_FOLDED),
                       zone);
   }
   if (empty_string != nullptr) {
@@ -1034,9 +1017,8 @@ namespace {
 //         \B to (?<=\w)(?=\w)|(?<=\W)(?=\W)
 RegExpNode* BoundaryAssertionAsLookaround(RegExpCompiler* compiler,
                                           RegExpNode* on_success,
-                                          RegExpAssertion::Type type,
-                                          RegExpFlags flags) {
-  CHECK(NeedsUnicodeCaseEquivalents(flags));
+                                          RegExpAssertion::Type type) {
+  CHECK(NeedsUnicodeCaseEquivalents(compiler->flags()));
   Zone* zone = compiler->zone();
   ZoneList<CharacterRange>* word_range =
       zone->New<ZoneList<CharacterRange>>(2, zone);
@@ -1080,14 +1062,13 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
       return AssertionNode::AtStart(on_success);
     case Type::BOUNDARY:
       return NeedsUnicodeCaseEquivalents(compiler->flags())
-                 ? BoundaryAssertionAsLookaround(
-                       compiler, on_success, Type::BOUNDARY, compiler->flags())
+                 ? BoundaryAssertionAsLookaround(compiler, on_success,
+                                                 Type::BOUNDARY)
                  : AssertionNode::AtBoundary(on_success);
     case Type::NON_BOUNDARY:
       return NeedsUnicodeCaseEquivalents(compiler->flags())
                  ? BoundaryAssertionAsLookaround(compiler, on_success,
-                                                 Type::NON_BOUNDARY,
-                                                 compiler->flags())
+                                                 Type::NON_BOUNDARY)
                  : AssertionNode::AtNonBoundary(on_success);
     case Type::END_OF_INPUT:
       return AssertionNode::AtEnd(on_success);
@@ -1130,10 +1111,17 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
 
 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
                                         RegExpNode* on_success) {
-  return compiler->zone()->New<BackReferenceNode>(
-      RegExpCapture::StartRegister(index()),
-      RegExpCapture::EndRegister(index()), flags_, compiler->read_backward(),
-      on_success);
+  RegExpNode* backref_node = on_success;
+  // Only one of the captures in the list can actually match. Since
+  // back-references to unmatched captures are treated as empty, we can simply
+  // create back-references to all possible captures.
+  for (auto capture : *captures()) {
+    backref_node = compiler->zone()->New<BackReferenceNode>(
+        RegExpCapture::StartRegister(capture->index()),
+        RegExpCapture::EndRegister(capture->index()), compiler->read_backward(),
+        backref_node);
+  }
+  return backref_node;
 }
 
 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
@@ -1141,9 +1129,40 @@ RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
   return on_success;
 }
 
+namespace {
+
+class V8_NODISCARD ModifiersScope {
+ public:
+  ModifiersScope(RegExpCompiler* compiler, RegExpFlags flags)
+      : compiler_(compiler), previous_flags_(compiler->flags()) {
+    compiler->set_flags(flags);
+  }
+  ~ModifiersScope() { compiler_->set_flags(previous_flags_); }
+
+ private:
+  RegExpCompiler* compiler_;
+  const RegExpFlags previous_flags_;
+};
+
+}  // namespace
+
 RegExpNode* RegExpGroup::ToNode(RegExpCompiler* compiler,
                                 RegExpNode* on_success) {
-  return body_->ToNode(compiler, on_success);
+  // If no flags are modified, simply convert and return the body.
+  if (flags() == compiler->flags()) {
+    return body_->ToNode(compiler, on_success);
+  }
+  // Reset flags for successor node.
+  const RegExpFlags old_flags = compiler->flags();
+  on_success = ActionNode::ModifyFlags(old_flags, on_success);
+
+  // Convert body using modifier.
+  ModifiersScope modifiers_scope(compiler, flags());
+  RegExpNode* body = body_->ToNode(compiler, on_success);
+
+  // Wrap body into modifier node.
+  RegExpNode* modified_body = ActionNode::ModifyFlags(flags(), body);
+  return modified_body;
 }
 
 RegExpLookaround::Builder::Builder(bool is_positive, RegExpNode* on_success,