Merging upstream version 125.0.1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 01:13:33 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 01:13:33 +0000
commit: 086c044dc34dfc0f74fbe41f4ecb402b2cd34884 (patch)
tree: a4f824bd33cb075dd5aa3eb5a0a94af221bbe83a /js/src/irregexp
parent: Adding debian version 124.0.1-1. (diff)
download: firefox-086c044dc34dfc0f74fbe41f4ecb402b2cd34884.tar.xz
firefox-086c044dc34dfc0f74fbe41f4ecb402b2cd34884.zip
25 files changed, 618 insertions, 427 deletions
diff --git a/js/src/irregexp/RegExpAPI.cpp b/js/src/irregexp/RegExpAPI.cpp
index f1ba1fbc4b..39a6f8ccc9 100644
--- a/js/src/irregexp/RegExpAPI.cpp
+++ b/js/src/irregexp/RegExpAPI.cpp
@@ -632,7 +632,7 @@ enum class AssembleResult {
     // RegExpShared.
     ByteArray bytecode =
         v8::internal::ByteArray::cast(*result.code).takeOwnership(cx->isolate);
-    uint32_t length = bytecode->length;
+    uint32_t length = bytecode->length();
     re->setByteCode(bytecode.release(), isLatin1);
     js::AddCellMemory(re, length, MemoryUse::RegExpSharedBytecode);
   }
@@ -773,7 +773,7 @@ bool CompilePattern(JSContext* cx, MutableHandleRegExpShared re,
   bool isLatin1 = input->hasLatin1Chars();
 
   SampleCharacters(input, compiler);
-  data.node = compiler.PreprocessRegExp(&data, flags, isLatin1);
+  data.node = compiler.PreprocessRegExp(&data, isLatin1);
   data.error = AnalyzeRegExp(cx->isolate, isLatin1, flags, data.node);
   if (data.error != RegExpError::kNone) {
     MOZ_ASSERT(data.error == RegExpError::kAnalysisStackOverflow);
diff --git a/js/src/irregexp/RegExpNativeMacroAssembler.cpp b/js/src/irregexp/RegExpNativeMacroAssembler.cpp
index 2a8b1749c2..99cfc31bfc 100644
--- a/js/src/irregexp/RegExpNativeMacroAssembler.cpp
+++ b/js/src/irregexp/RegExpNativeMacroAssembler.cpp
@@ -247,8 +247,8 @@ void SMRegExpMacroAssembler::CheckCharacterNotInRange(base::uc16 from,
 bool SMRegExpMacroAssembler::IsCharacterInRangeArray(uint32_t c,
                                                      ByteArrayData* ranges) {
   js::AutoUnsafeCallWithABI unsafe;
-  MOZ_ASSERT(ranges->length % sizeof(uint16_t) == 0);
-  uint32_t length = ranges->length / sizeof(uint16_t);
+  MOZ_ASSERT(ranges->length() % sizeof(uint16_t) == 0);
+  uint32_t length = ranges->length() / sizeof(uint16_t);
   MOZ_ASSERT(length > 0);
 
   // Fast paths.
diff --git a/js/src/irregexp/RegExpShim.cpp b/js/src/irregexp/RegExpShim.cpp
index 2b2c3cd4a0..da388e0057 100644
--- a/js/src/irregexp/RegExpShim.cpp
+++ b/js/src/irregexp/RegExpShim.cpp
@@ -227,13 +227,13 @@ Handle<ByteArray> Isolate::NewByteArray(int length, AllocationType alloc) {
 
   js::AutoEnterOOMUnsafeRegion oomUnsafe;
 
-  size_t alloc_size = sizeof(uint32_t) + length;
+  size_t alloc_size = sizeof(ByteArrayData) + length;
   ByteArrayData* data =
       static_cast<ByteArrayData*>(allocatePseudoHandle(alloc_size));
   if (!data) {
     oomUnsafe.crash("Irregexp NewByteArray");
   }
-  data->length = length;
+  new (data) ByteArrayData(length);
 
   return Handle<ByteArray>(JS::PrivateValue(data), this);
 }
@@ -261,7 +261,7 @@ Handle<FixedIntegerArray<T>> Isolate::NewFixedIntegerArray(uint32_t length) {
   if (!data) {
     oomUnsafe.crash("Irregexp NewFixedIntegerArray");
   }
-  data->length = rawLength;
+  new (data) ByteArrayData(rawLength);
 
   return Handle<FixedIntegerArray<T>>(JS::PrivateValue(data), this);
 }
diff --git a/js/src/irregexp/RegExpShim.h b/js/src/irregexp/RegExpShim.h
index 3f85413421..4d32c84920 100644
--- a/js/src/irregexp/RegExpShim.h
+++ b/js/src/irregexp/RegExpShim.h
@@ -586,15 +586,6 @@ class Object {
   // IsCharacterInRangeArray in regexp-macro-assembler.cc.
   Object(uintptr_t raw) : asBits_(raw) { MOZ_CRASH("unused"); }
 
-  // Used in regexp-interpreter.cc to check the return value of
-  // isolate->stack_guard()->HandleInterrupts(). We want to handle
-  // interrupts in the caller, so we always return false from
-  // HandleInterrupts and true here.
-  inline bool IsException(Isolate*) const {
-    MOZ_ASSERT(!value().toBoolean());
-    return true;
-  }
-
   JS::Value value() const { return JS::Value::fromRawBits(asBits_); }
 
   inline static Object cast(Object object) { return object; }
@@ -604,6 +595,14 @@ class Object {
   uint64_t asBits_;
 } JS_HAZ_GC_POINTER;
 
+// Used in regexp-interpreter.cc to check the return value of
+// isolate->stack_guard()->HandleInterrupts(). We want to handle
+// interrupts in the caller, so we return a magic value from
+// HandleInterrupts and check for it here.
+inline bool IsException(Object obj, Isolate*) {
+  return obj.value().isMagic(JS_INTERRUPT_REGEXP);
+}
+
 class Smi : public Object {
  public:
   static Smi FromInt(int32_t value) {
@@ -626,6 +625,27 @@ class HeapObject : public Object {
   }
 };
 
+// V8's values use low-bit tagging. If the LSB is 0, it's a small
+// integer. If the LSB is 1, it's a pointer to some GC thing. In V8,
+// this wrapper class is used to represent a pointer that has the low
+// bit set, or a small integer that has been shifted left by one
+// bit. We don't use the same tagging system, so all we need is a
+// transparent wrapper that automatically converts to/from the wrapped
+// type.
+template <typename T>
+class Tagged {
+ public:
+  Tagged() {}
+  MOZ_IMPLICIT Tagged(const T& value) : value_(value) {}
+  MOZ_IMPLICIT Tagged(T&& value) : value_(std::move(value)) {}
+
+  T* operator->() { return &value_; }
+  constexpr operator T() const { return value_; }
+
+ private:
+  T value_;
+};
+
 // A fixed-size array with Objects (aka Values) as element types.
 // Implemented using the dense elements of an ArrayObject.
 // Used for named captures.
@@ -668,13 +688,13 @@ T* ByteArrayData::typedData() {
 
 template <typename T>
 T ByteArrayData::getTyped(uint32_t index) {
-  MOZ_ASSERT(index < length / sizeof(T));
+  MOZ_ASSERT(index < length() / sizeof(T));
   return typedData<T>()[index];
 }
 
 template <typename T>
 void ByteArrayData::setTyped(uint32_t index, T value) {
-  MOZ_ASSERT(index < length / sizeof(T));
+  MOZ_ASSERT(index < length() / sizeof(T));
   typedData<T>()[index] = value;
 }
 
@@ -684,6 +704,7 @@ class ByteArray : public HeapObject {
   ByteArrayData* inner() const {
     return static_cast<ByteArrayData*>(value().toPrivate());
   }
+  friend bool IsByteArray(Object obj);
 
  public:
   PseudoHandle<ByteArrayData> takeOwnership(Isolate* isolate);
@@ -692,8 +713,8 @@ class ByteArray : public HeapObject {
   uint8_t get(uint32_t index) { return inner()->get(index); }
   void set(uint32_t index, uint8_t val) { inner()->set(index, val); }
 
-  uint32_t length() const { return inner()->length; }
-  uint8_t* GetDataStartAddress() { return inner()->data(); }
+  uint32_t length() const { return inner()->length(); }
+  uint8_t* begin() { return inner()->data(); }
 
   static ByteArray cast(Object object) {
     ByteArray b;
@@ -701,11 +722,17 @@ class ByteArray : public HeapObject {
     return b;
   }
 
-  bool IsByteArray() const { return true; }
-
   friend class SMRegExpMacroAssembler;
 };
 
+// This is only used in assertions. In debug builds, we put a magic value
+// in the header of each ByteArrayData, and assert here that it matches.
+inline bool IsByteArray(Object obj) {
+  MOZ_ASSERT(ByteArray::cast(obj).inner()->magic() ==
+             ByteArrayData::ExpectedMagic);
+  return true;
+}
+
 // This is a convenience class used in V8 for treating a ByteArray as an array
 // of fixed-size integers. This version supports integral types up to 32 bits.
 template <typename T>
@@ -1030,6 +1057,7 @@ class JSRegExp : public HeapObject {
 };
 
 using RegExpFlags = JS::RegExpFlags;
+using RegExpFlag = JS::RegExpFlags::Flag;
 
 inline bool IsUnicode(RegExpFlags flags) { return flags.unicode(); }
 inline bool IsGlobal(RegExpFlags flags) { return flags.global(); }
@@ -1042,6 +1070,22 @@ inline bool IsEitherUnicode(RegExpFlags flags) {
   return flags.unicode() || flags.unicodeSets();
 }
 
+inline base::Optional<RegExpFlag> TryRegExpFlagFromChar(char c) {
+  RegExpFlag flag;
+
+  // The parser only calls this after verifying that it's a supported flag.
+  MOZ_ALWAYS_TRUE(JS::MaybeParseRegExpFlag(c, &flag));
+
+  return base::Optional(flag);
+}
+
+inline bool operator==(const RegExpFlags& lhs, const int& rhs) {
+  return lhs.value() == rhs;
+}
+inline bool operator!=(const RegExpFlags& lhs, const int& rhs) {
+  return !(lhs == rhs);
+}
+
 class Histogram {
  public:
   inline void AddSample(int sample) {}
@@ -1126,9 +1170,11 @@ class Isolate {
 
   // This is called from inside no-GC code. V8 runs the interrupt
   // inside the no-GC code and then "manually relocates unhandlified
-  // references" afterwards. We just return false and let the caller
-  // handle interrupts.
-  Object HandleInterrupts() { return Object(JS::BooleanValue(false)); }
+  // references" afterwards. We just return a magic value and let the
+  // caller handle interrupts.
+  Object HandleInterrupts() {
+    return Object(JS::MagicValue(JS_INTERRUPT_REGEXP));
+  }
 
   JSContext* cx() const { return cx_; }
 
diff --git a/js/src/irregexp/RegExpTypes.h b/js/src/irregexp/RegExpTypes.h
index e2a619689c..620fac4ed5 100644
--- a/js/src/irregexp/RegExpTypes.h
+++ b/js/src/irregexp/RegExpTypes.h
@@ -21,15 +21,17 @@ namespace internal {
 
 class ByteArrayData {
  public:
-  uint32_t length;
+  ByteArrayData(uint32_t length) : length_(length) {}
+
+  uint32_t length() { return length_; };
   uint8_t* data();
 
   uint8_t get(uint32_t index) {
-    MOZ_ASSERT(index < length);
+    MOZ_ASSERT(index < length());
     return data()[index];
   }
   void set(uint32_t index, uint8_t val) {
-    MOZ_ASSERT(index < length);
+    MOZ_ASSERT(index < length());
     data()[index] = val;
   }
 
@@ -39,9 +41,19 @@ class ByteArrayData {
   template <typename T>
   void setTyped(uint32_t index, T value);
 
+#ifdef DEBUG
+  const static uint32_t ExpectedMagic = 0x12344321;
+  uint32_t magic() const { return magic_; }
+
+ private:
+  uint32_t magic_ = ExpectedMagic;
+#endif
+
  private:
   template <typename T>
   T* typedData();
+
+  uint32_t length_;
 };
 
 class Isolate;
diff --git a/js/src/irregexp/imported/gen-regexp-special-case.cc b/js/src/irregexp/imported/gen-regexp-special-case.cc
index 8f6557ed30..0875568250 100644
--- a/js/src/irregexp/imported/gen-regexp-special-case.cc
+++ b/js/src/irregexp/imported/gen-regexp-special-case.cc
@@ -8,7 +8,6 @@
 #include <sstream>
 
 #include "irregexp/imported/special-case.h"
-#include "unicode/usetiter.h"
 
 namespace v8 {
 namespace internal {
@@ -126,52 +125,6 @@ void PrintSpecial(std::ofstream& out) {
   PrintSet(out, "SpecialAddSet", special_add);
 }
 
-void PrintUnicodeSpecial(std::ofstream& out) {
-  icu::UnicodeSet non_simple_folding;
-  icu::UnicodeSet current;
-  UErrorCode status = U_ZERO_ERROR;
-  // Look at all characters except white spaces.
-  icu::UnicodeSet interestingCP(u"[^[:White_Space:]]", status);
-  CHECK_EQ(status, U_ZERO_ERROR);
-  icu::UnicodeSetIterator iter(interestingCP);
-  while (iter.next()) {
-    UChar32 c = iter.getCodepoint();
-    current.set(c, c);
-    current.closeOver(USET_CASE_INSENSITIVE).removeAllStrings();
-    CHECK(!current.isBogus());
-    // Remove characters from the closeover that have a simple case folding.
-    icu::UnicodeSet toRemove;
-    icu::UnicodeSetIterator closeOverIter(current);
-    while (closeOverIter.next()) {
-      UChar32 closeOverChar = closeOverIter.getCodepoint();
-      UChar32 closeOverSCF = u_foldCase(closeOverChar, U_FOLD_CASE_DEFAULT);
-      if (closeOverChar != closeOverSCF) {
-        toRemove.add(closeOverChar);
-      }
-    }
-    CHECK(!toRemove.isBogus());
-    current.removeAll(toRemove);
-
-    // The current character and its simple case folding are also always OK.
-    UChar32 scf = u_foldCase(c, U_FOLD_CASE_DEFAULT);
-    current.remove(c);
-    current.remove(scf);
-
-    // If there are any characters remaining, they were added due to full case
-    // foldings and shouldn't match the current charcter according to the spec.
-    if (!current.isEmpty()) {
-      // Ensure that the character doesn't have a simple case folding.
-      // Otherwise the current approach of simply removing the character from
-      // the set before calling closeOver won't work.
-      CHECK_EQ(c, scf);
-      non_simple_folding.add(c);
-    }
-  }
-  CHECK(!non_simple_folding.isBogus());
-
-  PrintSet(out, "UnicodeNonSimpleCloseOverSet", non_simple_folding);
-}
-
 void WriteHeader(const char* header_filename) {
   std::ofstream out(header_filename);
   out << std::hex << std::setfill('0') << std::setw(4);
@@ -192,7 +145,6 @@ void WriteHeader(const char* header_filename) {
       << "namespace internal {\n\n";
 
   PrintSpecial(out);
-  PrintUnicodeSpecial(out);
 
   out << "\n"
       << "}  // namespace internal\n"
diff --git a/js/src/irregexp/imported/regexp-ast.cc b/js/src/irregexp/imported/regexp-ast.cc
index 63eeb5c05d..34946bd80c 100644
--- a/js/src/irregexp/imported/regexp-ast.cc
+++ b/js/src/irregexp/imported/regexp-ast.cc
@@ -307,7 +307,7 @@ void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) {
 }
 
 void* RegExpUnparser::VisitGroup(RegExpGroup* that, void* data) {
-  os_ << "(?: ";
+  os_ << "(?" << that->flags() << ": ";
   that->body()->Accept(this, data);
   os_ << ")";
   return nullptr;
@@ -325,7 +325,11 @@ void* RegExpUnparser::VisitLookaround(RegExpLookaround* that, void* data) {
 
 void* RegExpUnparser::VisitBackReference(RegExpBackReference* that,
                                          void* data) {
-  os_ << "(<- " << that->index() << ")";
+  os_ << "(<- " << that->captures()->first()->index();
+  for (int i = 1; i < that->captures()->length(); ++i) {
+    os_ << "," << that->captures()->at(i)->index();
+  }
+  os_ << ")";
   return nullptr;
 }
 
@@ -406,10 +410,17 @@ RegExpClassSetExpression::RegExpClassSetExpression(
       may_contain_strings_(may_contain_strings),
       operands_(operands) {
   DCHECK_NOT_NULL(operands);
-  DCHECK_IMPLIES(is_negated_, !may_contain_strings_);
-  max_match_ = 0;
-  for (auto op : *operands) {
-    max_match_ = std::max(max_match_, op->max_match());
+  if (is_negated) {
+    DCHECK(!may_contain_strings_);
+    // We don't know anything about max matches for negated classes.
+    // As there are no strings involved, assume that we can match a unicode
+    // character (2 code points).
+    max_match_ = 2;
+  } else {
+    max_match_ = 0;
+    for (auto op : *operands) {
+      max_match_ = std::max(max_match_, op->max_match());
+    }
   }
 }
 
diff --git a/js/src/irregexp/imported/regexp-ast.h b/js/src/irregexp/imported/regexp-ast.h
index af90b1dda3..b2b88515d3 100644
--- a/js/src/irregexp/imported/regexp-ast.h
+++ b/js/src/irregexp/imported/regexp-ast.h
@@ -130,12 +130,6 @@ class CharacterRange {
   static void AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges,
                                         Zone* zone);
 
-#ifdef V8_INTL_SUPPORT
-  // Creates the closeOver of the given UnicodeSet, removing all
-  // characters/strings that can't be derived via simple case folding.
-  static void UnicodeSimpleCloseOver(icu::UnicodeSet& set);
-#endif  // V8_INTL_SUPPORT
-
   bool Contains(base::uc32 i) const { return from_ <= i && i <= to_; }
   base::uc32 from() const { return from_; }
   base::uc32 to() const { return to_; }
@@ -311,9 +305,12 @@ class RegExpClassRanges final : public RegExpTree {
   //     the specified ranges.
   // CONTAINS_SPLIT_SURROGATE: The character class contains part of a split
   //     surrogate and should not be unicode-desugared (crbug.com/641091).
+  // IS_CASE_FOLDED: If case folding is required (/i), it was already
+  //     performed on individual ranges and should not be applied again.
   enum Flag {
     NEGATED = 1 << 0,
     CONTAINS_SPLIT_SURROGATE = 1 << 1,
+    IS_CASE_FOLDED = 1 << 2,
   };
   using ClassRangesFlags = base::Flags<Flag>;
 
@@ -356,6 +353,9 @@ class RegExpClassRanges final : public RegExpTree {
   bool contains_split_surrogate() const {
     return (class_ranges_flags_ & CONTAINS_SPLIT_SURROGATE) != 0;
   }
+  bool is_case_folded() const {
+    return (class_ranges_flags_ & IS_CASE_FOLDED) != 0;
+  }
 
  private:
   CharacterSet set_;
@@ -626,8 +626,9 @@ class RegExpCapture final : public RegExpTree {
 
 class RegExpGroup final : public RegExpTree {
  public:
-  explicit RegExpGroup(RegExpTree* body)
+  explicit RegExpGroup(RegExpTree* body, RegExpFlags flags)
       : body_(body),
+        flags_(flags),
         min_match_(body->min_match()),
         max_match_(body->max_match()) {}
 
@@ -639,9 +640,11 @@ class RegExpGroup final : public RegExpTree {
   int max_match() override { return max_match_; }
   Interval CaptureRegisters() override { return body_->CaptureRegisters(); }
   RegExpTree* body() const { return body_; }
+  RegExpFlags flags() const { return flags_; }
 
  private:
   RegExpTree* body_;
+  const RegExpFlags flags_;
   int min_match_;
   int max_match_;
 };
@@ -651,12 +654,13 @@ class RegExpLookaround final : public RegExpTree {
   enum Type { LOOKAHEAD, LOOKBEHIND };
 
   RegExpLookaround(RegExpTree* body, bool is_positive, int capture_count,
-                   int capture_from, Type type)
+                   int capture_from, Type type, int index)
       : body_(body),
         is_positive_(is_positive),
         capture_count_(capture_count),
         capture_from_(capture_from),
-        type_(type) {}
+        type_(type),
+        index_(index) {}
 
   DECL_BOILERPLATE(Lookaround);
 
@@ -669,6 +673,7 @@ class RegExpLookaround final : public RegExpTree {
   int capture_count() const { return capture_count_; }
   int capture_from() const { return capture_from_; }
   Type type() const { return type_; }
+  int index() const { return index_; }
 
   class Builder {
    public:
@@ -692,14 +697,17 @@ class RegExpLookaround final : public RegExpTree {
   int capture_count_;
   int capture_from_;
   Type type_;
+  int index_;
 };
 
 
 class RegExpBackReference final : public RegExpTree {
  public:
-  explicit RegExpBackReference(RegExpFlags flags) : flags_(flags) {}
-  RegExpBackReference(RegExpCapture* capture, RegExpFlags flags)
-      : capture_(capture), flags_(flags) {}
+  explicit RegExpBackReference(Zone* zone) : captures_(1, zone) {}
+  explicit RegExpBackReference(RegExpCapture* capture, Zone* zone)
+      : captures_(1, zone) {
+    captures_.Add(capture, zone);
+  }
 
   DECL_BOILERPLATE(BackReference);
 
@@ -707,16 +715,16 @@ class RegExpBackReference final : public RegExpTree {
   // The back reference may be recursive, e.g. /(\2)(\1)/. To avoid infinite
   // recursion, we give up. Ignorance is bliss.
   int max_match() override { return kInfinity; }
-  int index() const { return capture_->index(); }
-  RegExpCapture* capture() const { return capture_; }
-  void set_capture(RegExpCapture* capture) { capture_ = capture; }
+  const ZoneList<RegExpCapture*>* captures() const { return &captures_; }
+  void add_capture(RegExpCapture* capture, Zone* zone) {
+    captures_.Add(capture, zone);
+  }
   const ZoneVector<base::uc16>* name() const { return name_; }
   void set_name(const ZoneVector<base::uc16>* name) { name_ = name; }
 
  private:
-  RegExpCapture* capture_ = nullptr;
+  ZoneList<RegExpCapture*> captures_;
   const ZoneVector<base::uc16>* name_ = nullptr;
-  const RegExpFlags flags_;
 };
 
 
diff --git a/js/src/irregexp/imported/regexp-bytecode-generator.cc b/js/src/irregexp/imported/regexp-bytecode-generator.cc
index c83e10a598..251ed1cda5 100644
--- a/js/src/irregexp/imported/regexp-bytecode-generator.cc
+++ b/js/src/irregexp/imported/regexp-bytecode-generator.cc
@@ -383,7 +383,7 @@ Handle<HeapObject> RegExpBytecodeGenerator::GetCode(Handle<String> source) {
         isolate_, zone(), source, buffer_.data(), length(), jump_edges_);
   } else {
     array = isolate_->factory()->NewByteArray(length());
-    Copy(array->GetDataStartAddress());
+    Copy(array->begin());
   }
 
   return array;
diff --git a/js/src/irregexp/imported/regexp-bytecode-peephole.cc b/js/src/irregexp/imported/regexp-bytecode-peephole.cc
index ec8dcf1108..0ef0bab702 100644
--- a/js/src/irregexp/imported/regexp-bytecode-peephole.cc
+++ b/js/src/irregexp/imported/regexp-bytecode-peephole.cc
@@ -1012,13 +1012,13 @@ Handle<ByteArray> RegExpBytecodePeepholeOptimization::OptimizeBytecode(
   RegExpBytecodePeephole peephole(zone, length, jump_edges);
   bool did_optimize = peephole.OptimizeBytecode(bytecode, length);
   Handle<ByteArray> array = isolate->factory()->NewByteArray(peephole.Length());
-  peephole.CopyOptimizedBytecode(array->GetDataStartAddress());
+  peephole.CopyOptimizedBytecode(array->begin());
 
   if (did_optimize && v8_flags.trace_regexp_peephole_optimization) {
     PrintF("Original Bytecode:\n");
     RegExpBytecodeDisassemble(bytecode, length, source->ToCString().get());
     PrintF("Optimized Bytecode:\n");
-    RegExpBytecodeDisassemble(array->GetDataStartAddress(), peephole.Length(),
+    RegExpBytecodeDisassemble(array->begin(), peephole.Length(),
                               source->ToCString().get());
   }
 
diff --git a/js/src/irregexp/imported/regexp-compiler-tonode.cc b/js/src/irregexp/imported/regexp-compiler-tonode.cc
index f5087bdb08..b1340123d8 100644
--- a/js/src/irregexp/imported/regexp-compiler-tonode.cc
+++ b/js/src/irregexp/imported/regexp-compiler-tonode.cc
@@ -3,7 +3,6 @@
 // found in the LICENSE file.
 
 #include "irregexp/imported/regexp-compiler.h"
-
 #include "irregexp/imported/regexp.h"
 
 #ifdef V8_INTL_SUPPORT
@@ -418,27 +417,6 @@ RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler,
 
 }  // namespace
 
-#ifdef V8_INTL_SUPPORT
-// static
-void CharacterRange::UnicodeSimpleCloseOver(icu::UnicodeSet& set) {
-  // Remove characters for which closeOver() adds full-case-folding equivalents
-  // because we should work only with simple case folding mappings.
-  icu::UnicodeSet non_simple = icu::UnicodeSet(set);
-  non_simple.retainAll(RegExpCaseFolding::UnicodeNonSimpleCloseOverSet());
-  set.removeAll(non_simple);
-
-  set.closeOver(USET_CASE_INSENSITIVE);
-  // Full case folding maps single characters to multiple characters.
-  // Those are represented as strings in the set. Remove them so that
-  // we end up with only simple and common case mappings.
-  set.removeAllStrings();
-
-  // Add characters that have non-simple case foldings again (they match
-  // themselves).
-  set.addAll(non_simple);
-}
-#endif  // V8_INTL_SUPPORT
-
 // static
 void CharacterRange::AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges,
                                                Zone* zone) {
@@ -460,8 +438,7 @@ void CharacterRange::AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges,
   }
   // Clear the ranges list without freeing the backing store.
   ranges->Rewind(0);
-
-  UnicodeSimpleCloseOver(set);
+  set.closeOver(USET_SIMPLE_CASE_INSENSITIVE);
   for (int i = 0; i < set.getRangeCount(); i++) {
     ranges->Add(Range(set.getRangeStart(i), set.getRangeEnd(i)), zone);
   }
@@ -476,7 +453,9 @@ RegExpNode* RegExpClassRanges::ToNode(RegExpCompiler* compiler,
   Zone* const zone = compiler->zone();
   ZoneList<CharacterRange>* ranges = this->ranges(zone);
 
-  if (NeedsUnicodeCaseEquivalents(compiler->flags())) {
+  const bool needs_case_folding =
+      NeedsUnicodeCaseEquivalents(compiler->flags()) && !is_case_folded();
+  if (needs_case_folding) {
     CharacterRange::AddUnicodeCaseEquivalents(ranges, zone);
   }
 
@@ -487,8 +466,7 @@ RegExpNode* RegExpClassRanges::ToNode(RegExpCompiler* compiler,
 
   if (is_negated()) {
     // With /v, character classes are never negated.
-    // TODO(v8:11935): Change permalink once proposal is in stage 4.
-    // https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#sec-compileatom
+    // https://tc39.es/ecma262/#sec-compileatom
     // Atom :: CharacterClass
     //   4. Assert: cc.[[Invert]] is false.
     // Instead the complement is created when evaluating the class set.
@@ -561,7 +539,12 @@ RegExpNode* RegExpClassSetOperand::ToNode(RegExpCompiler* compiler,
     }
   }
   if (!ranges()->is_empty()) {
-    alternatives->Add(zone->template New<RegExpClassRanges>(zone, ranges()),
+    // In unicode sets mode case folding has to be done at precise locations
+    // (e.g. before building complements).
+    // It is therefore the parsers responsibility to case fold (sub-) ranges
+    // before creating ClassSetOperands.
+    alternatives->Add(zone->template New<RegExpClassRanges>(
+                          zone, ranges(), RegExpClassRanges::IS_CASE_FOLDED),
                       zone);
   }
   if (empty_string != nullptr) {
@@ -1034,9 +1017,8 @@ namespace {
 //         \B to (?<=\w)(?=\w)|(?<=\W)(?=\W)
 RegExpNode* BoundaryAssertionAsLookaround(RegExpCompiler* compiler,
                                           RegExpNode* on_success,
-                                          RegExpAssertion::Type type,
-                                          RegExpFlags flags) {
-  CHECK(NeedsUnicodeCaseEquivalents(flags));
+                                          RegExpAssertion::Type type) {
+  CHECK(NeedsUnicodeCaseEquivalents(compiler->flags()));
   Zone* zone = compiler->zone();
   ZoneList<CharacterRange>* word_range =
       zone->New<ZoneList<CharacterRange>>(2, zone);
@@ -1080,14 +1062,13 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
       return AssertionNode::AtStart(on_success);
     case Type::BOUNDARY:
       return NeedsUnicodeCaseEquivalents(compiler->flags())
-                 ? BoundaryAssertionAsLookaround(
-                       compiler, on_success, Type::BOUNDARY, compiler->flags())
+                 ? BoundaryAssertionAsLookaround(compiler, on_success,
+                                                 Type::BOUNDARY)
                  : AssertionNode::AtBoundary(on_success);
     case Type::NON_BOUNDARY:
       return NeedsUnicodeCaseEquivalents(compiler->flags())
                  ? BoundaryAssertionAsLookaround(compiler, on_success,
-                                                 Type::NON_BOUNDARY,
-                                                 compiler->flags())
+                                                 Type::NON_BOUNDARY)
                  : AssertionNode::AtNonBoundary(on_success);
     case Type::END_OF_INPUT:
       return AssertionNode::AtEnd(on_success);
@@ -1130,10 +1111,17 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
 
 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
                                         RegExpNode* on_success) {
-  return compiler->zone()->New<BackReferenceNode>(
-      RegExpCapture::StartRegister(index()),
-      RegExpCapture::EndRegister(index()), flags_, compiler->read_backward(),
-      on_success);
+  RegExpNode* backref_node = on_success;
+  // Only one of the captures in the list can actually match. Since
+  // back-references to unmatched captures are treated as empty, we can simply
+  // create back-references to all possible captures.
+  for (auto capture : *captures()) {
+    backref_node = compiler->zone()->New<BackReferenceNode>(
+        RegExpCapture::StartRegister(capture->index()),
+        RegExpCapture::EndRegister(capture->index()), compiler->read_backward(),
+        backref_node);
+  }
+  return backref_node;
 }
 
 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
@@ -1141,9 +1129,40 @@ RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
   return on_success;
 }
 
+namespace {
+
+class V8_NODISCARD ModifiersScope {
+ public:
+  ModifiersScope(RegExpCompiler* compiler, RegExpFlags flags)
+      : compiler_(compiler), previous_flags_(compiler->flags()) {
+    compiler->set_flags(flags);
+  }
+  ~ModifiersScope() { compiler_->set_flags(previous_flags_); }
+
+ private:
+  RegExpCompiler* compiler_;
+  const RegExpFlags previous_flags_;
+};
+
+}  // namespace
+
 RegExpNode* RegExpGroup::ToNode(RegExpCompiler* compiler,
                                 RegExpNode* on_success) {
-  return body_->ToNode(compiler, on_success);
+  // If no flags are modified, simply convert and return the body.
+  if (flags() == compiler->flags()) {
+    return body_->ToNode(compiler, on_success);
+  }
+  // Reset flags for successor node.
+  const RegExpFlags old_flags = compiler->flags();
+  on_success = ActionNode::ModifyFlags(old_flags, on_success);
+
+  // Convert body using modifier.
+  ModifiersScope modifiers_scope(compiler, flags());
+  RegExpNode* body = body_->ToNode(compiler, on_success);
+
+  // Wrap body into modifier node.
+  RegExpNode* modified_body = ActionNode::ModifyFlags(flags(), body);
+  return modified_body;
 }
 
 RegExpLookaround::Builder::Builder(bool is_positive, RegExpNode* on_success,
diff --git a/js/src/irregexp/imported/regexp-compiler.cc b/js/src/irregexp/imported/regexp-compiler.cc
index 514975d8ed..73dfe1d2ad 100644
--- a/js/src/irregexp/imported/regexp-compiler.cc
+++ b/js/src/irregexp/imported/regexp-compiler.cc
@@ -707,6 +707,13 @@ ActionNode* ActionNode::EmptyMatchCheck(int start_register,
   return result;
 }
 
+ActionNode* ActionNode::ModifyFlags(RegExpFlags flags, RegExpNode* on_success) {
+  ActionNode* result =
+      on_success->zone()->New<ActionNode>(MODIFY_FLAGS, on_success);
+  result->data_.u_modify_flags.flags = flags;
+  return result;
+}
+
 #define DEFINE_ACCEPT(Type) \
   void Type##Node::Accept(NodeVisitor* visitor) { visitor->Visit##Type(this); }
 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)
@@ -1377,6 +1384,9 @@ void ActionNode::GetQuickCheckDetails(QuickCheckDetails* details,
     on_success()->GetQuickCheckDetailsFromLoopEntry(details, compiler,
                                                     filled_in, not_at_start);
   } else {
+    if (action_type() == MODIFY_FLAGS) {
+      compiler->set_flags(flags());
+    }
     on_success()->GetQuickCheckDetails(details, compiler, filled_in,
                                        not_at_start);
   }
@@ -2867,7 +2877,7 @@ int BoyerMooreLookahead::GetSkipTable(int min_lookahead, int max_lookahead,
   const int kSkipArrayEntry = 0;
   const int kDontSkipArrayEntry = 1;
 
-  std::memset(boolean_skip_table->GetDataStartAddress(), kSkipArrayEntry,
+  std::memset(boolean_skip_table->begin(), kSkipArrayEntry,
               boolean_skip_table->length());
 
   for (int i = max_lookahead; i >= min_lookahead; i--) {
@@ -3454,6 +3464,11 @@ void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
       assembler->Backtrack();
       return;
     }
+    case MODIFY_FLAGS: {
+      compiler->set_flags(flags());
+      on_success()->Emit(compiler, trace);
+      break;
+    }
     default:
       UNREACHABLE();
   }
@@ -3473,8 +3488,8 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
   RecursionCheck rc(compiler);
 
   DCHECK_EQ(start_reg_ + 1, end_reg_);
-  if (IsIgnoreCase(flags_)) {
-    bool unicode = IsEitherUnicode(flags_);
+  if (IsIgnoreCase(compiler->flags())) {
+    bool unicode = IsEitherUnicode(compiler->flags());
     assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(),
                                                unicode, trace->backtrack());
   } else {
@@ -3485,7 +3500,7 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
   if (read_backward()) trace->set_at_start(Trace::UNKNOWN);
 
   // Check that the back reference does not end inside a surrogate pair.
-  if (IsEitherUnicode(flags_) && !compiler->one_byte()) {
+  if (IsEitherUnicode(compiler->flags()) && !compiler->one_byte()) {
     assembler->CheckNotInSurrogatePair(trace->cp_offset(), trace->backtrack());
   }
   on_success()->Emit(compiler, trace);
@@ -3707,7 +3722,7 @@ class Analysis : public NodeVisitor {
   } while (false)
 
   void VisitText(TextNode* that) override {
-    that->MakeCaseIndependent(isolate(), is_one_byte_, flags_);
+    that->MakeCaseIndependent(isolate(), is_one_byte_, flags());
     EnsureAnalyzed(that->on_success());
     if (has_failed()) return;
     that->CalculateOffsets();
@@ -3715,6 +3730,9 @@ class Analysis : public NodeVisitor {
   }
 
   void VisitAction(ActionNode* that) override {
+    if (that->action_type() == ActionNode::MODIFY_FLAGS) {
+      set_flags(that->flags());
+    }
     EnsureAnalyzed(that->on_success());
     if (has_failed()) return;
     STATIC_FOR_EACH(Propagators::VisitAction(that));
@@ -3773,9 +3791,12 @@ class Analysis : public NodeVisitor {
 #undef STATIC_FOR_EACH
 
  private:
+  RegExpFlags flags() const { return flags_; }
+  void set_flags(RegExpFlags flags) { flags_ = flags; }
+
   Isolate* isolate_;
   const bool is_one_byte_;
-  const RegExpFlags flags_;
+  RegExpFlags flags_;
   RegExpError error_;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
@@ -3903,13 +3924,12 @@ RegExpNode* RegExpCompiler::OptionallyStepBackToLeadSurrogate(
 }
 
 RegExpNode* RegExpCompiler::PreprocessRegExp(RegExpCompileData* data,
-                                             RegExpFlags flags,
                                              bool is_one_byte) {
   // Wrap the body of the regexp in capture #0.
   RegExpNode* captured_body =
       RegExpCapture::ToNode(data->tree, 0, this, accept());
   RegExpNode* node = captured_body;
-  if (!data->tree->IsAnchoredAtStart() && !IsSticky(flags)) {
+  if (!data->tree->IsAnchoredAtStart() && !IsSticky(flags())) {
     // Add a .*? at the beginning, outside the body capture, unless
     // this expression is anchored at the beginning or sticky.
     RegExpNode* loop_node = RegExpQuantifier::ToNode(
@@ -3931,13 +3951,14 @@ RegExpNode* RegExpCompiler::PreprocessRegExp(RegExpCompileData* data,
     }
   }
   if (is_one_byte) {
-    node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, flags);
+    node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, flags());
     // Do it again to propagate the new nodes to places where they were not
     // put because they had not been calculated yet.
     if (node != nullptr) {
-      node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, flags);
+      node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, flags());
     }
-  } else if (IsEitherUnicode(flags) && (IsGlobal(flags) || IsSticky(flags))) {
+  } else if (IsEitherUnicode(flags()) &&
+             (IsGlobal(flags()) || IsSticky(flags()))) {
     node = OptionallyStepBackToLeadSurrogate(node);
   }
 
diff --git a/js/src/irregexp/imported/regexp-compiler.h b/js/src/irregexp/imported/regexp-compiler.h
index 91dd43ab8a..7a369430bb 100644
--- a/js/src/irregexp/imported/regexp-compiler.h
+++ b/js/src/irregexp/imported/regexp-compiler.h
@@ -501,8 +501,7 @@ class RegExpCompiler {
   // - Inserting the implicit .* before/after the regexp if necessary.
   // - If the input is a one-byte string, filtering out nodes that can't match.
   // - Fixing up regexp matches that start within a surrogate pair.
-  RegExpNode* PreprocessRegExp(RegExpCompileData* data, RegExpFlags flags,
-                               bool is_one_byte);
+  RegExpNode* PreprocessRegExp(RegExpCompileData* data, bool is_one_byte);
 
   // If the regexp matching starts within a surrogate pair, step back to the
   // lead surrogate and start matching from there.
@@ -527,7 +526,8 @@ class RegExpCompiler {
   inline void IncrementRecursionDepth() { recursion_depth_++; }
   inline void DecrementRecursionDepth() { recursion_depth_--; }
 
-  RegExpFlags flags() const { return flags_; }
+  inline RegExpFlags flags() const { return flags_; }
+  inline void set_flags(RegExpFlags flags) { flags_ = flags; }
 
   void SetRegExpTooBig() { reg_exp_too_big_ = true; }
 
@@ -571,7 +571,7 @@ class RegExpCompiler {
   int unicode_lookaround_position_register_;
   ZoneVector<RegExpNode*>* work_list_;
   int recursion_depth_;
-  const RegExpFlags flags_;
+  RegExpFlags flags_;
   RegExpMacroAssembler* macro_assembler_;
   bool one_byte_;
   bool reg_exp_too_big_;
diff --git a/js/src/irregexp/imported/regexp-dotprinter.cc b/js/src/irregexp/imported/regexp-dotprinter.cc
index 6746992a0a..cd0ca5dea8 100644
--- a/js/src/irregexp/imported/regexp-dotprinter.cc
+++ b/js/src/irregexp/imported/regexp-dotprinter.cc
@@ -231,6 +231,10 @@ void DotPrinterImpl::VisitAction(ActionNode* that) {
           << "\", shape=septagon";
       break;
     }
+    case ActionNode::MODIFY_FLAGS: {
+      os_ << "label=\"flags $" << that->flags() << "\", shape=septagon";
+      break;
+    }
   }
   os_ << "];\n";
   PrintAttributes(that);
diff --git a/js/src/irregexp/imported/regexp-interpreter.cc b/js/src/irregexp/imported/regexp-interpreter.cc
index 43c8a4a5a4..2de1b12968 100644
--- a/js/src/irregexp/imported/regexp-interpreter.cc
+++ b/js/src/irregexp/imported/regexp-interpreter.cc
@@ -88,8 +88,7 @@ int32_t Load32Aligned(const uint8_t* pc) {
   return *reinterpret_cast<const int32_t*>(pc);
 }
 
-// TODO(jgruber): Rename to Load16AlignedUnsigned.
-uint32_t Load16Aligned(const uint8_t* pc) {
+uint32_t Load16AlignedUnsigned(const uint8_t* pc) {
   DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 1);
   return *reinterpret_cast<const uint16_t*>(pc);
 }
@@ -221,17 +220,17 @@ IrregexpInterpreter::Result MaybeThrowStackOverflow(
 template <typename Char>
 void UpdateCodeAndSubjectReferences(
     Isolate* isolate, Handle<ByteArray> code_array,
-    Handle<String> subject_string, ByteArray* code_array_out,
+    Handle<String> subject_string, Tagged<ByteArray>* code_array_out,
     const uint8_t** code_base_out, const uint8_t** pc_out,
-    String* subject_string_out,
+    Tagged<String>* subject_string_out,
     base::Vector<const Char>* subject_string_vector_out) {
   DisallowGarbageCollection no_gc;
 
-  if (*code_base_out != code_array->GetDataStartAddress()) {
+  if (*code_base_out != code_array->begin()) {
     *code_array_out = *code_array;
     const intptr_t pc_offset = *pc_out - *code_base_out;
     DCHECK_GT(pc_offset, 0);
-    *code_base_out = code_array->GetDataStartAddress();
+    *code_base_out = code_array->begin();
     *pc_out = *code_base_out + pc_offset;
   }
 
@@ -244,8 +243,9 @@ void UpdateCodeAndSubjectReferences(
 // necessary.
 template <typename Char>
 IrregexpInterpreter::Result HandleInterrupts(
-    Isolate* isolate, RegExp::CallOrigin call_origin, ByteArray* code_array_out,
-    String* subject_string_out, const uint8_t** code_base_out,
+    Isolate* isolate, RegExp::CallOrigin call_origin,
+    Tagged<ByteArray>* code_array_out, Tagged<String>* subject_string_out,
+    const uint8_t** code_base_out,
     base::Vector<const Char>* subject_string_vector_out,
     const uint8_t** pc_out) {
   DisallowGarbageCollection no_gc;
@@ -276,12 +276,12 @@ IrregexpInterpreter::Result HandleInterrupts(
     } else if (check.InterruptRequested()) {
       const bool was_one_byte =
           String::IsOneByteRepresentationUnderneath(*subject_string_out);
-      Object result;
+      Tagged<Object> result;
       {
         AllowGarbageCollection yes_gc;
         result = isolate->stack_guard()->HandleInterrupts();
       }
-      if (result.IsException(isolate)) {
+      if (IsException(result, isolate)) {
         return IrregexpInterpreter::EXCEPTION;
       }
 
@@ -375,10 +375,10 @@ bool IndexIsInBounds(int index, int length) {
 
 template <typename Char>
 IrregexpInterpreter::Result RawMatch(
-    Isolate* isolate, ByteArray code_array, String subject_string,
-    base::Vector<const Char> subject, int* output_registers,
-    int output_register_count, int total_register_count, int current,
-    uint32_t current_char, RegExp::CallOrigin call_origin,
+    Isolate* isolate, Tagged<ByteArray> code_array,
+    Tagged<String> subject_string, base::Vector<const Char> subject,
+    int* output_registers, int output_register_count, int total_register_count,
+    int current, uint32_t current_char, RegExp::CallOrigin call_origin,
     const uint32_t backtrack_limit) {
   DisallowGarbageCollection no_gc;
 
@@ -430,7 +430,7 @@ IrregexpInterpreter::Result RawMatch(
 
 #endif  // V8_USE_COMPUTED_GOTO
 
-  const uint8_t* pc = code_array.GetDataStartAddress();
+  const uint8_t* pc = code_array->begin();
   const uint8_t* code_base = pc;
 
   InterpreterRegisters registers(total_register_count, output_registers,
@@ -702,8 +702,8 @@ IrregexpInterpreter::Result RawMatch(
     }
     BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
       uint32_t c = LoadPacked24Unsigned(insn);
-      uint32_t minus = Load16Aligned(pc + 4);
-      uint32_t mask = Load16Aligned(pc + 6);
+      uint32_t minus = Load16AlignedUnsigned(pc + 4);
+      uint32_t mask = Load16AlignedUnsigned(pc + 6);
       if (c != ((current_char - minus) & mask)) {
         SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
       } else {
@@ -712,8 +712,8 @@ IrregexpInterpreter::Result RawMatch(
       DISPATCH();
     }
     BYTECODE(CHECK_CHAR_IN_RANGE) {
-      uint32_t from = Load16Aligned(pc + 4);
-      uint32_t to = Load16Aligned(pc + 6);
+      uint32_t from = Load16AlignedUnsigned(pc + 4);
+      uint32_t to = Load16AlignedUnsigned(pc + 6);
       if (from <= current_char && current_char <= to) {
         SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
       } else {
@@ -722,8 +722,8 @@ IrregexpInterpreter::Result RawMatch(
       DISPATCH();
     }
     BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
-      uint32_t from = Load16Aligned(pc + 4);
-      uint32_t to = Load16Aligned(pc + 6);
+      uint32_t from = Load16AlignedUnsigned(pc + 4);
+      uint32_t to = Load16AlignedUnsigned(pc + 6);
       if (from > current_char || current_char > to) {
         SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
       } else {
@@ -914,7 +914,7 @@ IrregexpInterpreter::Result RawMatch(
     BYTECODE(SKIP_UNTIL_CHAR) {
       int32_t load_offset = LoadPacked24Signed(insn);
       int32_t advance = Load16AlignedSigned(pc + 4);
-      uint32_t c = Load16Aligned(pc + 6);
+      uint32_t c = Load16AlignedUnsigned(pc + 6);
       while (IndexIsInBounds(current + load_offset, subject.length())) {
         current_char = subject[current + load_offset];
         if (c == current_char) {
@@ -929,7 +929,7 @@ IrregexpInterpreter::Result RawMatch(
     BYTECODE(SKIP_UNTIL_CHAR_AND) {
       int32_t load_offset = LoadPacked24Signed(insn);
       int32_t advance = Load16AlignedSigned(pc + 4);
-      uint16_t c = Load16Aligned(pc + 6);
+      uint16_t c = Load16AlignedUnsigned(pc + 6);
       uint32_t mask = Load32Aligned(pc + 8);
       int32_t maximum_offset = Load32Aligned(pc + 12);
       while (static_cast<uintptr_t>(current + maximum_offset) <=
@@ -947,7 +947,7 @@ IrregexpInterpreter::Result RawMatch(
     BYTECODE(SKIP_UNTIL_CHAR_POS_CHECKED) {
       int32_t load_offset = LoadPacked24Signed(insn);
       int32_t advance = Load16AlignedSigned(pc + 4);
-      uint16_t c = Load16Aligned(pc + 6);
+      uint16_t c = Load16AlignedUnsigned(pc + 6);
       int32_t maximum_offset = Load32Aligned(pc + 8);
       while (static_cast<uintptr_t>(current + maximum_offset) <=
              static_cast<uintptr_t>(subject.length())) {
@@ -979,7 +979,7 @@ IrregexpInterpreter::Result RawMatch(
     BYTECODE(SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE) {
       int32_t load_offset = LoadPacked24Signed(insn);
       int32_t advance = Load16AlignedSigned(pc + 4);
-      uint16_t limit = Load16Aligned(pc + 6);
+      uint16_t limit = Load16AlignedUnsigned(pc + 6);
       const uint8_t* table = pc + 8;
       while (IndexIsInBounds(current + load_offset, subject.length())) {
         current_char = subject[current + load_offset];
@@ -999,8 +999,8 @@ IrregexpInterpreter::Result RawMatch(
     BYTECODE(SKIP_UNTIL_CHAR_OR_CHAR) {
       int32_t load_offset = LoadPacked24Signed(insn);
       int32_t advance = Load32Aligned(pc + 4);
-      uint16_t c = Load16Aligned(pc + 8);
-      uint16_t c2 = Load16Aligned(pc + 10);
+      uint16_t c = Load16AlignedUnsigned(pc + 8);
+      uint16_t c2 = Load16AlignedUnsigned(pc + 10);
       while (IndexIsInBounds(current + load_offset, subject.length())) {
         current_char = subject[current + load_offset];
         // The two if-statements below are split up intentionally, as combining
@@ -1047,29 +1047,29 @@ IrregexpInterpreter::Result RawMatch(
 
 // static
 IrregexpInterpreter::Result IrregexpInterpreter::Match(
-    Isolate* isolate, JSRegExp regexp, String subject_string,
+    Isolate* isolate, Tagged<JSRegExp> regexp, Tagged<String> subject_string,
     int* output_registers, int output_register_count, int start_position,
     RegExp::CallOrigin call_origin) {
-  if (v8_flags.regexp_tier_up) regexp.TierUpTick();
+  if (v8_flags.regexp_tier_up) regexp->TierUpTick();
 
   bool is_one_byte = String::IsOneByteRepresentationUnderneath(subject_string);
-  ByteArray code_array = ByteArray::cast(regexp.bytecode(is_one_byte));
-  int total_register_count = regexp.max_register_count();
+  Tagged<ByteArray> code_array = ByteArray::cast(regexp->bytecode(is_one_byte));
+  int total_register_count = regexp->max_register_count();
 
   return MatchInternal(isolate, code_array, subject_string, output_registers,
                        output_register_count, total_register_count,
-                       start_position, call_origin, regexp.backtrack_limit());
+                       start_position, call_origin, regexp->backtrack_limit());
 }
 
 IrregexpInterpreter::Result IrregexpInterpreter::MatchInternal(
-    Isolate* isolate, ByteArray code_array, String subject_string,
-    int* output_registers, int output_register_count, int total_register_count,
-    int start_position, RegExp::CallOrigin call_origin,
-    uint32_t backtrack_limit) {
-  DCHECK(subject_string.IsFlat());
+    Isolate* isolate, Tagged<ByteArray> code_array,
+    Tagged<String> subject_string, int* output_registers,
+    int output_register_count, int total_register_count, int start_position,
+    RegExp::CallOrigin call_origin, uint32_t backtrack_limit) {
+  DCHECK(subject_string->IsFlat());
 
   // TODO(chromium:1262676): Remove this CHECK once fixed.
-  CHECK(code_array.IsByteArray());
+  CHECK(IsByteArray(code_array));
 
   // Note: Heap allocation *is* allowed in two situations if calling from
   // Runtime:
@@ -1080,7 +1080,7 @@ IrregexpInterpreter::Result IrregexpInterpreter::MatchInternal(
   DisallowGarbageCollection no_gc;
 
   base::uc16 previous_char = '\n';
-  String::FlatContent subject_content = subject_string.GetFlatContent(no_gc);
+  String::FlatContent subject_content = subject_string->GetFlatContent(no_gc);
   // Because interrupts can result in GC and string content relocation, the
   // checksum verification in FlatContent may fail even though this code is
   // safe. See (2) above.
@@ -1122,10 +1122,10 @@ IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromJs(
   DisallowHandleAllocation no_handles;
   DisallowHandleDereference no_deref;
 
-  String subject_string = String::cast(Object(subject));
-  JSRegExp regexp_obj = JSRegExp::cast(Object(regexp));
+  Tagged<String> subject_string = String::cast(Tagged<Object>(subject));
+  Tagged<JSRegExp> regexp_obj = JSRegExp::cast(Tagged<Object>(regexp));
 
-  if (regexp_obj.MarkedForTierUp()) {
+  if (regexp_obj->MarkedForTierUp()) {
     // Returning RETRY will re-enter through runtime, where actual recompilation
     // for tier-up takes place.
     return IrregexpInterpreter::RETRY;
diff --git a/js/src/irregexp/imported/regexp-interpreter.h b/js/src/irregexp/imported/regexp-interpreter.h
index bc55be2b8c..825916291f 100644
--- a/js/src/irregexp/imported/regexp-interpreter.h
+++ b/js/src/irregexp/imported/regexp-interpreter.h
@@ -49,17 +49,18 @@ class V8_EXPORT_PRIVATE IrregexpInterpreter : public AllStatic {
                                    RegExp::CallOrigin call_origin,
                                    Isolate* isolate, Address regexp);
 
-  static Result MatchInternal(Isolate* isolate, ByteArray code_array,
-                              String subject_string, int* output_registers,
-                              int output_register_count,
+  static Result MatchInternal(Isolate* isolate, Tagged<ByteArray> code_array,
+                              Tagged<String> subject_string,
+                              int* output_registers, int output_register_count,
                               int total_register_count, int start_position,
                               RegExp::CallOrigin call_origin,
                               uint32_t backtrack_limit);
 
  private:
-  static Result Match(Isolate* isolate, JSRegExp regexp, String subject_string,
-                      int* output_registers, int output_register_count,
-                      int start_position, RegExp::CallOrigin call_origin);
+  static Result Match(Isolate* isolate, Tagged<JSRegExp> regexp,
+                      Tagged<String> subject_string, int* output_registers,
+                      int output_register_count, int start_position,
+                      RegExp::CallOrigin call_origin);
 };
 
 }  // namespace internal
diff --git a/js/src/irregexp/imported/regexp-macro-assembler.cc b/js/src/irregexp/imported/regexp-macro-assembler.cc
index b4d99bf775..b99c08424e 100644
--- a/js/src/irregexp/imported/regexp-macro-assembler.cc
+++ b/js/src/irregexp/imported/regexp-macro-assembler.cc
@@ -182,24 +182,25 @@ uint32_t RegExpMacroAssembler::IsCharacterInRangeArray(uint32_t current_char,
   static constexpr uint32_t kTrue = 1;
   static constexpr uint32_t kFalse = 0;
 
-  FixedUInt16Array ranges = FixedUInt16Array::cast(Object(raw_byte_array));
-  DCHECK_GE(ranges.length(), 1);
+  Tagged<FixedUInt16Array> ranges =
+      FixedUInt16Array::cast(Tagged<Object>(raw_byte_array));
+  DCHECK_GE(ranges->length(), 1);
 
   // Shortcut for fully out of range chars.
-  if (current_char < ranges.get(0)) return kFalse;
-  if (current_char >= ranges.get(ranges.length() - 1)) {
+  if (current_char < ranges->get(0)) return kFalse;
+  if (current_char >= ranges->get(ranges->length() - 1)) {
     // The last range may be open-ended.
-    return (ranges.length() % 2) == 0 ? kFalse : kTrue;
+    return (ranges->length() % 2) == 0 ? kFalse : kTrue;
   }
 
   // Binary search for the matching range. `ranges` is encoded as
   // [from0, to0, from1, to1, ..., fromN, toN], or
   // [from0, to0, from1, to1, ..., fromN] (open-ended last interval).
 
-  int mid, lower = 0, upper = ranges.length();
+  int mid, lower = 0, upper = ranges->length();
   do {
     mid = lower + (upper - lower) / 2;
-    const base::uc16 elem = ranges.get(mid);
+    const base::uc16 elem = ranges->get(mid);
     if (current_char < elem) {
       upper = mid;
     } else if (current_char > elem) {
@@ -210,7 +211,7 @@ uint32_t RegExpMacroAssembler::IsCharacterInRangeArray(uint32_t current_char,
     }
   } while (lower < upper);
 
-  const bool current_char_ge_last_elem = current_char >= ranges.get(mid);
+  const bool current_char_ge_last_elem = current_char >= ranges->get(mid);
   const int current_range_start_index =
       current_char_ge_last_elem ? mid : mid - 1;
 
@@ -277,15 +278,16 @@ bool NativeRegExpMacroAssembler::CanReadUnaligned() const {
 // static
 int NativeRegExpMacroAssembler::CheckStackGuardState(
     Isolate* isolate, int start_index, RegExp::CallOrigin call_origin,
-    Address* return_address, InstructionStream re_code, Address* subject,
-    const uint8_t** input_start, const uint8_t** input_end) {
+    Address* return_address, Tagged<InstructionStream> re_code,
+    Address* subject, const uint8_t** input_start, const uint8_t** input_end,
+    uintptr_t gap) {
   DisallowGarbageCollection no_gc;
   Address old_pc = PointerAuthentication::AuthenticatePC(return_address, 0);
-  DCHECK_LE(re_code.instruction_start(), old_pc);
-  DCHECK_LE(old_pc, re_code.code(kAcquireLoad).instruction_end());
+  DCHECK_LE(re_code->instruction_start(), old_pc);
+  DCHECK_LE(old_pc, re_code->code(kAcquireLoad)->instruction_end());
 
   StackLimitCheck check(isolate);
-  bool js_has_overflowed = check.JsHasOverflowed();
+  bool js_has_overflowed = check.JsHasOverflowed(gap);
 
   if (call_origin == RegExp::CallOrigin::kFromJs) {
     // Direct calls from JavaScript can be interrupted in two ways:
@@ -310,7 +312,8 @@ int NativeRegExpMacroAssembler::CheckStackGuardState(
   // Prepare for possible GC.
   HandleScope handles(isolate);
   Handle<InstructionStream> code_handle(re_code, isolate);
-  Handle<String> subject_handle(String::cast(Object(*subject)), isolate);
+  Handle<String> subject_handle(String::cast(Tagged<Object>(*subject)),
+                                isolate);
   bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject_handle);
   int return_value = 0;
 
@@ -322,8 +325,8 @@ int NativeRegExpMacroAssembler::CheckStackGuardState(
       return_value = EXCEPTION;
     } else if (check.InterruptRequested()) {
       AllowGarbageCollection yes_gc;
-      Object result = isolate->stack_guard()->HandleInterrupts();
-      if (result.IsException(isolate)) return_value = EXCEPTION;
+      Tagged<Object> result = isolate->stack_guard()->HandleInterrupts();
+      if (IsException(result, isolate)) return_value = EXCEPTION;
     }
 
     // We are not using operator == here because it does a slow DCHECK
@@ -371,34 +374,34 @@ int NativeRegExpMacroAssembler::Match(Handle<JSRegExp> regexp,
   // DisallowGarbageCollection, since regexps might be preempted, and another
   // thread might do allocation anyway.
 
-  String subject_ptr = *subject;
+  Tagged<String> subject_ptr = *subject;
   // Character offsets into string.
   int start_offset = previous_index;
-  int char_length = subject_ptr.length() - start_offset;
+  int char_length = subject_ptr->length() - start_offset;
   int slice_offset = 0;
 
   // The string has been flattened, so if it is a cons string it contains the
   // full string in the first part.
   if (StringShape(subject_ptr).IsCons()) {
-    DCHECK_EQ(0, ConsString::cast(subject_ptr).second().length());
-    subject_ptr = ConsString::cast(subject_ptr).first();
+    DCHECK_EQ(0, ConsString::cast(subject_ptr)->second()->length());
+    subject_ptr = ConsString::cast(subject_ptr)->first();
   } else if (StringShape(subject_ptr).IsSliced()) {
-    SlicedString slice = SlicedString::cast(subject_ptr);
-    subject_ptr = slice.parent();
-    slice_offset = slice.offset();
+    Tagged<SlicedString> slice = SlicedString::cast(subject_ptr);
+    subject_ptr = slice->parent();
+    slice_offset = slice->offset();
   }
   if (StringShape(subject_ptr).IsThin()) {
-    subject_ptr = ThinString::cast(subject_ptr).actual();
+    subject_ptr = ThinString::cast(subject_ptr)->actual();
   }
   // Ensure that an underlying string has the same representation.
-  bool is_one_byte = subject_ptr.IsOneByteRepresentation();
-  DCHECK(subject_ptr.IsExternalString() || subject_ptr.IsSeqString());
+  bool is_one_byte = subject_ptr->IsOneByteRepresentation();
+  DCHECK(IsExternalString(subject_ptr) || IsSeqString(subject_ptr));
   // String is now either Sequential or External
   int char_size_shift = is_one_byte ? 0 : 1;
 
   DisallowGarbageCollection no_gc;
   const uint8_t* input_start =
-      subject_ptr.AddressOfCharacterAt(start_offset + slice_offset, no_gc);
+      subject_ptr->AddressOfCharacterAt(start_offset + slice_offset, no_gc);
   int byte_length = char_length << char_size_shift;
   const uint8_t* input_end = input_start + byte_length;
   return Execute(*subject, start_offset, input_start, input_end, offsets_vector,
@@ -407,9 +410,9 @@ int NativeRegExpMacroAssembler::Match(Handle<JSRegExp> regexp,
 
 // static
 int NativeRegExpMacroAssembler::ExecuteForTesting(
-    String input, int start_offset, const uint8_t* input_start,
+    Tagged<String> input, int start_offset, const uint8_t* input_start,
     const uint8_t* input_end, int* output, int output_size, Isolate* isolate,
-    JSRegExp regexp) {
+    Tagged<JSRegExp> regexp) {
   return Execute(input, start_offset, input_start, input_end, output,
                  output_size, isolate, regexp);
 }
@@ -419,13 +422,14 @@ int NativeRegExpMacroAssembler::ExecuteForTesting(
 // the signature of the interpreter. We should get rid of JS objects passed to
 // internal methods.
 int NativeRegExpMacroAssembler::Execute(
-    String input,  // This needs to be the unpacked (sliced, cons) string.
+    Tagged<String>
+        input,  // This needs to be the unpacked (sliced, cons) string.
     int start_offset, const uint8_t* input_start, const uint8_t* input_end,
-    int* output, int output_size, Isolate* isolate, JSRegExp regexp) {
+    int* output, int output_size, Isolate* isolate, Tagged<JSRegExp> regexp) {
   RegExpStackScope stack_scope(isolate);
 
   bool is_one_byte = String::IsOneByteRepresentationUnderneath(input);
-  Code code = Code::cast(regexp.code(is_one_byte));
+  Tagged<Code> code = Code::cast(regexp->code(isolate, is_one_byte));
   RegExp::CallOrigin call_origin = RegExp::CallOrigin::kFromRuntime;
 
   using RegexpMatcherSig =
@@ -439,7 +443,7 @@ int NativeRegExpMacroAssembler::Execute(
                        output, output_size, call_origin, isolate, regexp.ptr());
   DCHECK_GE(result, SMALLEST_REGEXP_RESULT);
 
-  if (result == EXCEPTION && !isolate->has_pending_exception()) {
+  if (result == EXCEPTION && !isolate->has_exception()) {
     // We detected a stack overflow (on the backtrack stack) in RegExp code,
     // but haven't created the exception yet. Additionally, we allow heap
     // allocation because even though it invalidates {input_start} and
diff --git a/js/src/irregexp/imported/regexp-macro-assembler.h b/js/src/irregexp/imported/regexp-macro-assembler.h
index af7e4f5297..6863adbaff 100644
--- a/js/src/irregexp/imported/regexp-macro-assembler.h
+++ b/js/src/irregexp/imported/regexp-macro-assembler.h
@@ -301,12 +301,10 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
                    int* offsets_vector, int offsets_vector_length,
                    int previous_index, Isolate* isolate);
 
-  V8_EXPORT_PRIVATE static int ExecuteForTesting(String input, int start_offset,
-                                                 const uint8_t* input_start,
-                                                 const uint8_t* input_end,
-                                                 int* output, int output_size,
-                                                 Isolate* isolate,
-                                                 JSRegExp regexp);
+  V8_EXPORT_PRIVATE static int ExecuteForTesting(
+      Tagged<String> input, int start_offset, const uint8_t* input_start,
+      const uint8_t* input_end, int* output, int output_size, Isolate* isolate,
+      Tagged<JSRegExp> regexp);
 
   bool CanReadUnaligned() const override;
 
@@ -330,9 +328,9 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
   static int CheckStackGuardState(Isolate* isolate, int start_index,
                                   RegExp::CallOrigin call_origin,
                                   Address* return_address,
-                                  InstructionStream re_code, Address* subject,
-                                  const uint8_t** input_start,
-                                  const uint8_t** input_end);
+                                  Tagged<InstructionStream> re_code,
+                                  Address* subject, const uint8_t** input_start,
+                                  const uint8_t** input_end, uintptr_t gap);
 
   static Address word_character_map_address() {
     return reinterpret_cast<Address>(&word_character_map[0]);
@@ -348,9 +346,10 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
 
  private:
   // Returns a {Result} sentinel, or the number of successful matches.
-  static int Execute(String input, int start_offset, const uint8_t* input_start,
-                     const uint8_t* input_end, int* output, int output_size,
-                     Isolate* isolate, JSRegExp regexp);
+  static int Execute(Tagged<String> input, int start_offset,
+                     const uint8_t* input_start, const uint8_t* input_end,
+                     int* output, int output_size, Isolate* isolate,
+                     Tagged<JSRegExp> regexp);
 
   ZoneUnorderedMap<uint32_t, Handle<FixedUInt16Array>> range_array_cache_;
 };
diff --git a/js/src/irregexp/imported/regexp-nodes.h b/js/src/irregexp/imported/regexp-nodes.h
index 9407f1c5ec..f3d7e6c58f 100644
--- a/js/src/irregexp/imported/regexp-nodes.h
+++ b/js/src/irregexp/imported/regexp-nodes.h
@@ -318,7 +318,8 @@ class ActionNode : public SeqRegExpNode {
     BEGIN_NEGATIVE_SUBMATCH,
     POSITIVE_SUBMATCH_SUCCESS,
     EMPTY_MATCH_CHECK,
-    CLEAR_CAPTURES
+    CLEAR_CAPTURES,
+    MODIFY_FLAGS
   };
   static ActionNode* SetRegisterForLoop(int reg, int val,
                                         RegExpNode* on_success);
@@ -341,6 +342,7 @@ class ActionNode : public SeqRegExpNode {
                                      int repetition_register,
                                      int repetition_limit,
                                      RegExpNode* on_success);
+  static ActionNode* ModifyFlags(RegExpFlags flags, RegExpNode* on_success);
   void Accept(NodeVisitor* visitor) override;
   void Emit(RegExpCompiler* compiler, Trace* trace) override;
   void GetQuickCheckDetails(QuickCheckDetails* details,
@@ -353,6 +355,10 @@ class ActionNode : public SeqRegExpNode {
   int GreedyLoopTextLength() override {
     return kNodeIsTooComplexForGreedyLoops;
   }
+  RegExpFlags flags() {
+    DCHECK_EQ(action_type(), MODIFY_FLAGS);
+    return RegExpFlags{data_.u_modify_flags.flags};
+  }
 
  private:
   union {
@@ -382,9 +388,13 @@ class ActionNode : public SeqRegExpNode {
       int range_from;
       int range_to;
     } u_clear_captures;
+    struct {
+      int flags;
+    } u_modify_flags;
   } data_;
   ActionNode(ActionType action_type, RegExpNode* on_success)
       : SeqRegExpNode(on_success), action_type_(action_type) {}
+
   ActionType action_type_;
   friend class DotPrinterImpl;
   friend Zone;
@@ -499,12 +509,11 @@ class AssertionNode : public SeqRegExpNode {
 
 class BackReferenceNode : public SeqRegExpNode {
  public:
-  BackReferenceNode(int start_reg, int end_reg, RegExpFlags flags,
-                    bool read_backward, RegExpNode* on_success)
+  BackReferenceNode(int start_reg, int end_reg, bool read_backward,
+                    RegExpNode* on_success)
       : SeqRegExpNode(on_success),
         start_reg_(start_reg),
         end_reg_(end_reg),
-        flags_(flags),
         read_backward_(read_backward) {}
   void Accept(NodeVisitor* visitor) override;
   int start_register() { return start_reg_; }
@@ -522,7 +531,6 @@ class BackReferenceNode : public SeqRegExpNode {
  private:
   int start_reg_;
   int end_reg_;
-  RegExpFlags flags_;
   bool read_backward_;
 };
 
diff --git a/js/src/irregexp/imported/regexp-parser.cc b/js/src/irregexp/imported/regexp-parser.cc
index ea2a6c6d7a..965fc567b7 100644
--- a/js/src/irregexp/imported/regexp-parser.cc
+++ b/js/src/irregexp/imported/regexp-parser.cc
@@ -13,7 +13,7 @@
 #include "unicode/unistr.h"
 #include "unicode/usetiter.h"
 #include "unicode/utf16.h"  // For U16_NEXT
-#endif  // V8_INTL_SUPPORT
+#endif                      // V8_INTL_SUPPORT
 
 namespace v8 {
 namespace internal {
@@ -67,8 +67,7 @@ class RegExpTextBuilder {
   bool ignore_case() const { return IsIgnoreCase(flags_); }
   bool IsUnicodeMode() const {
     // Either /v or /u enable UnicodeMode
-    // TODO(v8:11935): Change permalink once proposal is in stage 4.
-    // https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#sec-parsepattern
+    // https://tc39.es/ecma262/#sec-parsepattern
     return IsUnicode(flags_) || IsUnicodeSets(flags_);
   }
   Zone* zone() const { return zone_; }
@@ -264,7 +263,7 @@ RegExpTree* RegExpTextBuilder::PopLastAtom() {
     characters_ = nullptr;
     atom = zone()->New<RegExpAtom>(char_vector);
     return atom;
-  } else if (text_.size() > 0) {
+  } else if (!text_.empty()) {
     atom = text_.back();
     text_.pop_back();
     return atom;
@@ -315,8 +314,7 @@ class RegExpBuilder {
   void FlushTerms();
   bool IsUnicodeMode() const {
     // Either /v or /u enable UnicodeMode
-    // TODO(v8:11935): Change permalink once proposal is in stage 4.
-    // https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#sec-parsepattern
+    // https://tc39.es/ecma262/#sec-parsepattern
     return IsUnicode(flags_) || IsUnicodeSets(flags_);
   }
   Zone* zone() const { return zone_; }
@@ -354,7 +352,12 @@ class RegExpParserState : public ZoneObject {
         group_type_(group_type),
         lookaround_type_(lookaround_type),
         disjunction_capture_index_(disjunction_capture_index),
-        capture_name_(capture_name) {}
+        capture_name_(capture_name) {
+    if (previous_state != nullptr) {
+      non_participating_capture_group_interval_ =
+          previous_state->non_participating_capture_group_interval();
+    }
+  }
   // Parser state of containing expression, if any.
   RegExpParserState* previous_state() const { return previous_state_; }
   bool IsSubexpression() { return previous_state_ != nullptr; }
@@ -371,6 +374,9 @@ class RegExpParserState : public ZoneObject {
   // The name of the current sub-expression, if group_type is CAPTURE. Only
   // used for named captures.
   const ZoneVector<base::uc16>* capture_name() const { return capture_name_; }
+  std::pair<int, int> non_participating_capture_group_interval() const {
+    return non_participating_capture_group_interval_;
+  }
 
   bool IsNamedCapture() const { return capture_name_ != nullptr; }
 
@@ -398,6 +404,18 @@ class RegExpParserState : public ZoneObject {
     return false;
   }
 
+  void NewAlternative(int captures_started) {
+    if (non_participating_capture_group_interval().second != 0) {
+      // Extend the non-participating interval.
+      non_participating_capture_group_interval_.second = captures_started;
+    } else {
+      // Create new non-participating interval from the start of the current
+      // enclosing group to all captures created within that group so far.
+      non_participating_capture_group_interval_ =
+          std::make_pair(capture_index(), captures_started);
+    }
+  }
+
  private:
   // Linked list implementation of stack of states.
   RegExpParserState* const previous_state_;
@@ -411,6 +429,11 @@ class RegExpParserState : public ZoneObject {
   const int disjunction_capture_index_;
   // Stored capture name (if any).
   const ZoneVector<base::uc16>* const capture_name_;
+  // Interval of (named) capture indices ]from, to] that are not participating
+  // in the current state (i.e. they cannot match).
+  // Capture indices are not participating if they were created in a different
+  // alternative.
+  std::pair<int, int> non_participating_capture_group_interval_;
 };
 
 template <class CharT>
@@ -463,17 +486,22 @@ class RegExpParserImpl final {
   RegExpTree* ParseClassSetOperand(const RegExpBuilder* builder,
                                    ClassSetOperandType* type_out,
                                    ZoneList<CharacterRange>* ranges,
-                                   CharacterClassStrings* strings);
+                                   CharacterClassStrings* strings,
+                                   base::uc32* character);
   base::uc32 ParseClassSetCharacter();
   // Parses and returns a single escaped character.
   base::uc32 ParseCharacterEscape(InClassEscapeState in_class_escape_state,
                                   bool* is_escaped_unicode_character);
 
+  void AddMaybeSimpleCaseFoldedRange(ZoneList<CharacterRange>* ranges,
+                                     CharacterRange new_range);
+
   RegExpTree* ParseClassUnion(const RegExpBuilder* builder, bool is_negated,
                               RegExpTree* first_operand,
                               ClassSetOperandType first_operand_type,
                               ZoneList<CharacterRange>* ranges,
-                              CharacterClassStrings* strings);
+                              CharacterClassStrings* strings,
+                              base::uc32 first_character);
   RegExpTree* ParseClassIntersection(const RegExpBuilder* builder,
                                      bool is_negated, RegExpTree* first_operand,
                                      ClassSetOperandType first_operand_type);
@@ -504,11 +532,10 @@ class RegExpParserImpl final {
   int captures_started() const { return captures_started_; }
   int position() const { return next_pos_ - 1; }
   bool failed() const { return failed_; }
-  RegExpFlags flags() const { return top_level_flags_; }
+  RegExpFlags flags() const { return flags_; }
   bool IsUnicodeMode() const {
     // Either /v or /u enable UnicodeMode
-    // TODO(v8:11935): Change permalink once proposal is in stage 4.
-    // https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#sec-parsepattern
+    // https://tc39.es/ecma262/#sec-parsepattern
     return IsUnicode(flags()) || IsUnicodeSets(flags()) || force_unicode_;
   }
   bool unicode_sets() const { return IsUnicodeSets(flags()); }
@@ -528,7 +555,7 @@ class RegExpParserImpl final {
   // Creates a new named capture at the specified index. Must be called exactly
   // once for each named capture. Fails if a capture with the same name is
   // encountered.
-  bool CreateNamedCaptureAtIndex(const ZoneVector<base::uc16>* name, int index);
+  bool CreateNamedCaptureAtIndex(const RegExpParserState* state, int index);
 
   // Parses the name of a capture group (?<name>pattern). The name must adhere
   // to IdentifierName in the ECMAScript standard.
@@ -543,7 +570,7 @@ class RegExpParserImpl final {
   // to avoid complicating cases in which references comes before the capture.
   void PatchNamedBackReferences();
 
-  ZoneVector<RegExpCapture*>* GetNamedCaptures() const;
+  ZoneVector<RegExpCapture*>* GetNamedCaptures();
 
   // Returns true iff the pattern contains named captures. May call
   // ScanForCaptures to look ahead at the remaining pattern.
@@ -593,16 +620,20 @@ class RegExpParserImpl final {
   RegExpError error_ = RegExpError::kNone;
   int error_pos_ = 0;
   ZoneList<RegExpCapture*>* captures_;
-  ZoneSet<RegExpCapture*, RegExpCaptureNameLess>* named_captures_;
+  // Maps capture names to a list of capture indices with this name.
+  ZoneMap<RegExpCapture*, ZoneList<int>*, RegExpCaptureNameLess>*
+      named_captures_;
   ZoneList<RegExpBackReference*>* named_back_references_;
+  ZoneList<CharacterRange>* temp_ranges_;
   const CharT* const input_;
   const int input_length_;
   base::uc32 current_;
-  const RegExpFlags top_level_flags_;
+  RegExpFlags flags_;
   bool force_unicode_ = false;  // Force parser to act as if unicode were set.
   int next_pos_;
   int captures_started_;
   int capture_count_;  // Only valid after we have scanned for captures.
+  int lookaround_count_;  // Only valid after we have scanned for lookbehinds.
   bool has_more_;
   bool simple_;
   bool contains_anchor_;
@@ -625,10 +656,11 @@ RegExpParserImpl<CharT>::RegExpParserImpl(
       input_(input),
       input_length_(input_length),
       current_(kEndMarker),
-      top_level_flags_(flags),
+      flags_(flags),
       next_pos_(0),
       captures_started_(0),
       capture_count_(0),
+      lookaround_count_(0),
       has_more_(true),
       simple_(false),
       contains_anchor_(false),
@@ -909,21 +941,21 @@ RegExpTree* RegExpParserImpl<CharT>::ParseDisjunction() {
         // Build result of subexpression.
         if (group_type == CAPTURE) {
           if (state->IsNamedCapture()) {
-            CreateNamedCaptureAtIndex(state->capture_name(),
-                                      capture_index CHECK_FAILED);
+            CreateNamedCaptureAtIndex(state, capture_index CHECK_FAILED);
           }
           RegExpCapture* capture = GetCapture(capture_index);
           capture->set_body(body);
           body = capture;
         } else if (group_type == GROUPING) {
-          body = zone()->template New<RegExpGroup>(body);
+          body = zone()->template New<RegExpGroup>(body, builder->flags());
         } else {
           DCHECK(group_type == POSITIVE_LOOKAROUND ||
                  group_type == NEGATIVE_LOOKAROUND);
           bool is_positive = (group_type == POSITIVE_LOOKAROUND);
           body = zone()->template New<RegExpLookaround>(
               body, is_positive, end_capture_index - capture_index,
-              capture_index, state->lookaround_type());
+              capture_index, state->lookaround_type(), lookaround_count_);
+          lookaround_count_++;
         }
 
         // Restore previous state.
@@ -937,6 +969,7 @@ RegExpTree* RegExpParserImpl<CharT>::ParseDisjunction() {
       }
       case '|': {
         Advance();
+        state->NewAlternative(captures_started());
         builder->NewAlternative();
         continue;
       }
@@ -984,6 +1017,7 @@ RegExpTree* RegExpParserImpl<CharT>::ParseDisjunction() {
       case '(': {
         state = ParseOpenParenthesis(state CHECK_FAILED);
         builder = state->builder();
+        flags_ = builder->flags();
         continue;
       }
       case '[': {
@@ -1037,8 +1071,8 @@ RegExpTree* RegExpParserImpl<CharT>::ParseDisjunction() {
                 builder->AddEmpty();
               } else {
                 RegExpCapture* capture = GetCapture(index);
-                RegExpTree* atom = zone()->template New<RegExpBackReference>(
-                    capture, builder->flags());
+                RegExpTree* atom =
+                    zone()->template New<RegExpBackReference>(capture, zone());
                 builder->AddAtom(atom);
               }
               break;
@@ -1246,43 +1280,91 @@ RegExpParserState* RegExpParserImpl<CharT>::ParseOpenParenthesis(
   bool is_named_capture = false;
   const ZoneVector<base::uc16>* capture_name = nullptr;
   SubexpressionType subexpr_type = CAPTURE;
+  RegExpFlags flags = state->builder()->flags();
+  bool parsing_modifiers = false;
+  bool modifiers_polarity = true;
+  RegExpFlags modifiers;
   Advance();
   if (current() == '?') {
-    switch (Next()) {
-      case ':':
-        Advance(2);
-        subexpr_type = GROUPING;
-        break;
-      case '=':
-        Advance(2);
-        lookaround_type = RegExpLookaround::LOOKAHEAD;
-        subexpr_type = POSITIVE_LOOKAROUND;
-        break;
-      case '!':
-        Advance(2);
-        lookaround_type = RegExpLookaround::LOOKAHEAD;
-        subexpr_type = NEGATIVE_LOOKAROUND;
-        break;
-      case '<':
-        Advance();
-        if (Next() == '=') {
+    do {
+      switch (Next()) {
+        case '-':
+          if (!v8_flags.js_regexp_modifiers) {
+            ReportError(RegExpError::kInvalidGroup);
+            return nullptr;
+          }
+          Advance();
+          parsing_modifiers = true;
+          if (modifiers_polarity == false) {
+            ReportError(RegExpError::kMultipleFlagDashes);
+            return nullptr;
+          }
+          modifiers_polarity = false;
+          break;
+        case 'm':
+        case 'i':
+        case 's': {
+          if (!v8_flags.js_regexp_modifiers) {
+            ReportError(RegExpError::kInvalidGroup);
+            return nullptr;
+          }
+          Advance();
+          parsing_modifiers = true;
+          RegExpFlag flag = TryRegExpFlagFromChar(current()).value();
+          if ((modifiers & flag) != 0) {
+            ReportError(RegExpError::kRepeatedFlag);
+            return nullptr;
+          }
+          modifiers |= flag;
+          flags.set(flag, modifiers_polarity);
+          break;
+        }
+        case ':':
+          Advance(2);
+          parsing_modifiers = false;
+          subexpr_type = GROUPING;
+          break;
+        case '=':
           Advance(2);
-          lookaround_type = RegExpLookaround::LOOKBEHIND;
+          parsing_modifiers = false;
+          lookaround_type = RegExpLookaround::LOOKAHEAD;
           subexpr_type = POSITIVE_LOOKAROUND;
           break;
-        } else if (Next() == '!') {
+        case '!':
           Advance(2);
-          lookaround_type = RegExpLookaround::LOOKBEHIND;
+          parsing_modifiers = false;
+          lookaround_type = RegExpLookaround::LOOKAHEAD;
           subexpr_type = NEGATIVE_LOOKAROUND;
           break;
-        }
-        is_named_capture = true;
-        has_named_captures_ = true;
-        Advance();
-        break;
-      default:
-        ReportError(RegExpError::kInvalidGroup);
-        return nullptr;
+        case '<':
+          Advance();
+          parsing_modifiers = false;
+          if (Next() == '=') {
+            Advance(2);
+            lookaround_type = RegExpLookaround::LOOKBEHIND;
+            subexpr_type = POSITIVE_LOOKAROUND;
+            break;
+          } else if (Next() == '!') {
+            Advance(2);
+            lookaround_type = RegExpLookaround::LOOKBEHIND;
+            subexpr_type = NEGATIVE_LOOKAROUND;
+            break;
+          }
+          is_named_capture = true;
+          has_named_captures_ = true;
+          Advance();
+          break;
+        default:
+          ReportError(RegExpError::kInvalidGroup);
+          return nullptr;
+      }
+    } while (parsing_modifiers);
+  }
+  if (modifiers_polarity == false) {
+    // We encountered a dash.
+    if (modifiers == 0) {
+      ReportError(RegExpError::kInvalidFlagGroup);
+      return nullptr;
     }
   }
   if (subexpr_type == CAPTURE) {
@@ -1299,7 +1381,7 @@ RegExpParserState* RegExpParserImpl<CharT>::ParseOpenParenthesis(
   // Store current state and begin new disjunction parsing.
   return zone()->template New<RegExpParserState>(
       state, subexpr_type, lookaround_type, captures_started_, capture_name,
-      state->builder()->flags(), zone());
+      flags, zone());
 }
 
 // In order to know whether an escape is a backreference or not we have to scan
@@ -1511,7 +1593,10 @@ const ZoneVector<base::uc16>* RegExpParserImpl<CharT>::ParseCaptureGroupName() {
 
 template <class CharT>
 bool RegExpParserImpl<CharT>::CreateNamedCaptureAtIndex(
-    const ZoneVector<base::uc16>* name, int index) {
+    const RegExpParserState* state, int index) {
+  const ZoneVector<base::uc16>* name = state->capture_name();
+  const std::pair<int, int> non_participating_capture_group_interval =
+      state->non_participating_capture_group_interval();
   DCHECK(0 < index && index <= captures_started_);
   DCHECK_NOT_NULL(name);
 
@@ -1521,21 +1606,33 @@ bool RegExpParserImpl<CharT>::CreateNamedCaptureAtIndex(
   capture->set_name(name);
 
   if (named_captures_ == nullptr) {
-    named_captures_ =
-        zone_->template New<ZoneSet<RegExpCapture*, RegExpCaptureNameLess>>(
-            zone());
+    named_captures_ = zone_->template New<
+        ZoneMap<RegExpCapture*, ZoneList<int>*, RegExpCaptureNameLess>>(zone());
   } else {
     // Check for duplicates and bail if we find any.
-
     const auto& named_capture_it = named_captures_->find(capture);
     if (named_capture_it != named_captures_->end()) {
-      ReportError(RegExpError::kDuplicateCaptureGroupName);
-      return false;
+      if (v8_flags.js_regexp_duplicate_named_groups) {
+        ZoneList<int>* named_capture_indices = named_capture_it->second;
+        DCHECK_NOT_NULL(named_capture_indices);
+        DCHECK(!named_capture_indices->is_empty());
+        for (int named_index : *named_capture_indices) {
+          if (named_index < non_participating_capture_group_interval.first ||
+              named_index > non_participating_capture_group_interval.second) {
+            ReportError(RegExpError::kDuplicateCaptureGroupName);
+            return false;
+          }
+        }
+      } else {
+        ReportError(RegExpError::kDuplicateCaptureGroupName);
+        return false;
+      }
     }
   }
 
-  named_captures_->emplace(capture);
-
+  auto entry = named_captures_->try_emplace(
+      capture, zone()->template New<ZoneList<int>>(1, zone()));
+  entry.first->second->Add(index, zone());
   return true;
 }
 
@@ -1558,7 +1655,7 @@ bool RegExpParserImpl<CharT>::ParseNamedBackReference(
     builder->AddEmpty();
   } else {
     RegExpBackReference* atom =
-        zone()->template New<RegExpBackReference>(builder->flags());
+        zone()->template New<RegExpBackReference>(zone());
     atom->set_name(name);
 
     builder->AddAtom(atom);
@@ -1595,16 +1692,17 @@ void RegExpParserImpl<CharT>::PatchNamedBackReferences() {
     DCHECK_NULL(search_capture->name());
     search_capture->set_name(ref->name());
 
-    int index = -1;
     const auto& capture_it = named_captures_->find(search_capture);
-    if (capture_it != named_captures_->end()) {
-      index = (*capture_it)->index();
-    } else {
+    if (capture_it == named_captures_->end()) {
       ReportError(RegExpError::kInvalidNamedCaptureReference);
       return;
     }
 
-    ref->set_capture(GetCapture(index));
+    DCHECK_IMPLIES(!v8_flags.js_regexp_duplicate_named_groups,
+                   capture_it->second->length() == 1);
+    for (int index : *capture_it->second) {
+      ref->add_capture(GetCapture(index), zone());
+    }
   }
 }
 
@@ -1627,13 +1725,22 @@ RegExpCapture* RegExpParserImpl<CharT>::GetCapture(int index) {
 }
 
 template <class CharT>
-ZoneVector<RegExpCapture*>* RegExpParserImpl<CharT>::GetNamedCaptures() const {
-  if (named_captures_ == nullptr || named_captures_->empty()) {
+ZoneVector<RegExpCapture*>* RegExpParserImpl<CharT>::GetNamedCaptures() {
+  if (named_captures_ == nullptr) {
     return nullptr;
   }
+  DCHECK(!named_captures_->empty());
 
-  return zone()->template New<ZoneVector<RegExpCapture*>>(
-      named_captures_->begin(), named_captures_->end(), zone());
+  ZoneVector<RegExpCapture*>* flattened_named_captures =
+      zone()->template New<ZoneVector<RegExpCapture*>>(zone());
+  for (auto capture : *named_captures_) {
+    DCHECK_IMPLIES(!v8_flags.js_regexp_duplicate_named_groups,
+                   capture.second->length() == 1);
+    for (int index : *capture.second) {
+      flattened_named_captures->push_back(GetCapture(index));
+    }
+  }
+  return flattened_named_captures;
 }
 
 template <class CharT>
@@ -1890,7 +1997,7 @@ bool LookupPropertyValueName(UProperty property,
       ExtractStringsFromUnicodeSet(set, result_strings, flags, zone);
     }
     const bool needs_case_folding = IsUnicodeSets(flags) && IsIgnoreCase(flags);
-    if (needs_case_folding) CharacterRange::UnicodeSimpleCloseOver(set);
+    if (needs_case_folding) set.closeOver(USET_SIMPLE_CASE_INSENSITIVE);
     set.removeAllStrings();
     if (negate) set.complement();
     for (int i = 0; i < set.getRangeCount(); i++) {
@@ -2096,13 +2203,22 @@ bool RegExpParserImpl<CharT>::AddPropertyClassRange(
     if (!IsSupportedBinaryProperty(property, unicode_sets())) return false;
     if (!IsExactPropertyAlias(name, property)) return false;
     // Negation of properties with strings is not allowed.
-    // TODO(v8:11935): Change permalink once proposal is in stage 4.
     // See
-    // https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#sec-static-semantics-maycontainstrings
+    // https://tc39.es/ecma262/#sec-static-semantics-maycontainstrings
     if (negate && IsBinaryPropertyOfStrings(property)) return false;
-    return LookupPropertyValueName(property, negate ? "N" : "Y", false,
-                                   add_to_ranges, add_to_strings, flags(),
-                                   zone());
+    if (unicode_sets()) {
+      // In /v mode we can't simple lookup the "false" binary property values,
+      // as the spec requires us to perform case folding before calculating the
+      // complement.
+      // See https://tc39.es/ecma262/#sec-compiletocharset
+      // UnicodePropertyValueExpression :: LoneUnicodePropertyNameOrValue
+      return LookupPropertyValueName(property, "Y", negate, add_to_ranges,
+                                     add_to_strings, flags(), zone());
+    } else {
+      return LookupPropertyValueName(property, negate ? "N" : "Y", false,
+                                     add_to_ranges, add_to_strings, flags(),
+                                     zone());
+    }
   } else {
     // Both property name and value name are specified. Attempt to interpret
     // the property name as enumerated property.
@@ -2325,8 +2441,7 @@ base::uc32 RegExpParserImpl<CharT>::ParseCharacterEscape(
   return c;
 }
 
-// TODO(v8:11935): Change permalink once proposal is in stage 4.
-// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassRanges
+// https://tc39.es/ecma262/#prod-ClassRanges
 template <class CharT>
 RegExpTree* RegExpParserImpl<CharT>::ParseClassRanges(
     ZoneList<CharacterRange>* ranges, bool add_unicode_case_equivalents) {
@@ -2475,8 +2590,7 @@ void AddClassString(ZoneList<base::uc32>* normalized_string,
 
 }  // namespace
 
-// TODO(v8:11935): Change permalink once proposal is in stage 4.
-// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassStringDisjunction
+// https://tc39.es/ecma262/#prod-ClassStringDisjunction
 template <class CharT>
 RegExpTree* RegExpParserImpl<CharT>::ParseClassStringDisjunction(
     ZoneList<CharacterRange>* ranges, CharacterClassStrings* strings) {
@@ -2526,8 +2640,7 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassStringDisjunction(
   return nullptr;
 }
 
-// TODO(v8:11935): Change permalink once proposal is in stage 4.
-// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassSetOperand
+// https://tc39.es/ecma262/#prod-ClassSetOperand
 // Tree returned based on type_out:
 //  * kNestedClass: RegExpClassSetExpression
 //  * For all other types: RegExpClassSetOperand
@@ -2538,12 +2651,13 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassSetOperand(
       zone()->template New<ZoneList<CharacterRange>>(1, zone());
   CharacterClassStrings* strings =
       zone()->template New<CharacterClassStrings>(zone());
-  RegExpTree* tree =
-      ParseClassSetOperand(builder, type_out, ranges, strings CHECK_FAILED);
+  base::uc32 character;
+  RegExpTree* tree = ParseClassSetOperand(builder, type_out, ranges, strings,
+                                          &character CHECK_FAILED);
   DCHECK_IMPLIES(*type_out != ClassSetOperandType::kNestedClass,
                  tree == nullptr);
   DCHECK_IMPLIES(*type_out == ClassSetOperandType::kClassSetCharacter,
-                 ranges->length() == 1);
+                 ranges->is_empty());
   DCHECK_IMPLIES(*type_out == ClassSetOperandType::kClassSetCharacter,
                  strings->empty());
   DCHECK_IMPLIES(*type_out == ClassSetOperandType::kNestedClass,
@@ -2558,21 +2672,27 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassSetOperand(
   // CharacterClassEscape includes \p{}, which can contain ranges, strings or
   // both and \P{}, which could contain nothing (i.e. \P{Any}).
   if (tree == nullptr) {
+    if (*type_out == ClassSetOperandType::kClassSetCharacter) {
+      AddMaybeSimpleCaseFoldedRange(ranges,
+                                    CharacterRange::Singleton(character));
+    }
     tree = zone()->template New<RegExpClassSetOperand>(ranges, strings);
   }
   return tree;
 }
 
-// TODO(v8:11935): Change permalink once proposal is in stage 4.
-// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassSetOperand
-// Based on |type_out| either a tree is returned or ranges/strings modified.
-// If a tree is returned, ranges/strings are not modified.
-// If |type_out| is kNestedClass, a tree of type RegExpClassSetExpression is
-// returned. For all other types, ranges is modified and nullptr is returned.
+// https://tc39.es/ecma262/#prod-ClassSetOperand
+// Based on |type_out| either a tree is returned or
+// |ranges|/|strings|/|character| modified. If a tree is returned,
+// ranges/strings are not modified. If |type_out| is kNestedClass, a tree of
+// type RegExpClassSetExpression is returned. If | type_out| is
+// kClassSetCharacter, |character| is set and nullptr returned. For all other
+// types, |ranges|/|strings|/|character| is modified and nullptr is returned.
 template <class CharT>
 RegExpTree* RegExpParserImpl<CharT>::ParseClassSetOperand(
     const RegExpBuilder* builder, ClassSetOperandType* type_out,
-    ZoneList<CharacterRange>* ranges, CharacterClassStrings* strings) {
+    ZoneList<CharacterRange>* ranges, CharacterClassStrings* strings,
+    base::uc32* character) {
   DCHECK(unicode_sets());
   base::uc32 c = current();
   if (c == '\\') {
@@ -2599,7 +2719,7 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassSetOperand(
 
   *type_out = ClassSetOperandType::kClassSetCharacter;
   c = ParseClassSetCharacter(CHECK_FAILED);
-  ranges->Add(CharacterRange::Singleton(c), zone());
+  *character = c;
   return nullptr;
 }
 
@@ -2653,13 +2773,28 @@ bool MayContainStrings(ClassSetOperandType type, RegExpTree* operand) {
 
 }  // namespace
 
-// TODO(v8:11935): Change permalink once proposal is in stage 4.
-// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassUnion
+template <class CharT>
+void RegExpParserImpl<CharT>::AddMaybeSimpleCaseFoldedRange(
+    ZoneList<CharacterRange>* ranges, CharacterRange new_range) {
+  DCHECK(unicode_sets());
+  if (ignore_case()) {
+    ZoneList<CharacterRange>* new_ranges =
+        zone()->template New<ZoneList<CharacterRange>>(2, zone());
+    new_ranges->Add(new_range, zone());
+    CharacterRange::AddUnicodeCaseEquivalents(new_ranges, zone());
+    ranges->AddAll(*new_ranges, zone());
+  } else {
+    ranges->Add(new_range, zone());
+  }
+  CharacterRange::Canonicalize(ranges);
+}
+
+// https://tc39.es/ecma262/#prod-ClassUnion
 template <class CharT>
 RegExpTree* RegExpParserImpl<CharT>::ParseClassUnion(
     const RegExpBuilder* builder, bool is_negated, RegExpTree* first_operand,
     ClassSetOperandType first_operand_type, ZoneList<CharacterRange>* ranges,
-    CharacterClassStrings* strings) {
+    CharacterClassStrings* strings, base::uc32 character) {
   DCHECK(unicode_sets());
   ZoneList<RegExpTree*>* operands =
       zone()->template New<ZoneList<RegExpTree*>>(2, zone());
@@ -2673,7 +2808,6 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassUnion(
     operands->Add(first_operand, zone());
   }
   ClassSetOperandType last_type = first_operand_type;
-  const bool needs_case_folding = ignore_case();
   while (has_more() && current() != ']') {
     if (current() == '-') {
       // Mix of ClassSetRange and ClassSubtraction is not allowed.
@@ -2690,42 +2824,36 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassUnion(
       // represent a character range.
       // In case one of them is not a ClassSetCharacter, it is a syntax error,
       // as '-' can not be used unescaped within a class with /v.
-      // TODO(v8:11935): Change permalink once proposal is in stage 4.
       // See
-      // https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassSetRange
+      // https://tc39.es/ecma262/#prod-ClassSetRange
       if (last_type != ClassSetOperandType::kClassSetCharacter) {
         return ReportError(RegExpError::kInvalidCharacterClass);
       }
-      ParseClassSetOperand(builder, &last_type, ranges, strings CHECK_FAILED);
+      base::uc32 from = character;
+      ParseClassSetOperand(builder, &last_type, ranges, strings,
+                           &character CHECK_FAILED);
       if (last_type != ClassSetOperandType::kClassSetCharacter) {
         return ReportError(RegExpError::kInvalidCharacterClass);
       }
-      // Remove the last two singleton characters added to ranges, and combine
-      // them into a range.
-      auto rhs_ranges = ranges->RemoveLast();
-      auto lhs_ranges = ranges->RemoveLast();
-      DCHECK(lhs_ranges.IsSingleton());
-      DCHECK(rhs_ranges.IsSingleton());
-      base::uc32 from = lhs_ranges.from();
-      base::uc32 to = rhs_ranges.from();
-      if (from > to) {
+      if (from > character) {
         return ReportError(RegExpError::kOutOfOrderCharacterClass);
       }
-      ranges->Add(CharacterRange::Range(from, to), zone());
+      AddMaybeSimpleCaseFoldedRange(ranges,
+                                    CharacterRange::Range(from, character));
       last_type = ClassSetOperandType::kClassSetRange;
     } else {
       DCHECK_NE(current(), '-');
-      RegExpTree* operand = ParseClassSetOperand(builder, &last_type, ranges,
-                                                 strings CHECK_FAILED);
+      if (last_type == ClassSetOperandType::kClassSetCharacter) {
+        AddMaybeSimpleCaseFoldedRange(ranges,
+                                      CharacterRange::Singleton(character));
+      }
+      RegExpTree* operand = ParseClassSetOperand(
+          builder, &last_type, ranges, strings, &character CHECK_FAILED);
       if (operand != nullptr) {
         may_contain_strings |= MayContainStrings(last_type, operand);
         // Add the range we started building as operand and reset the current
         // range.
         if (!ranges->is_empty() || !strings->empty()) {
-          if (needs_case_folding) {
-            CharacterRange::Canonicalize(ranges);
-            CharacterRange::AddUnicodeCaseEquivalents(ranges, zone());
-          }
           may_contain_strings |= !strings->empty();
           operands->Add(
               zone()->template New<RegExpClassSetOperand>(ranges, strings),
@@ -2742,12 +2870,12 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassUnion(
     return ReportError(RegExpError::kUnterminatedCharacterClass);
   }
 
+  if (last_type == ClassSetOperandType::kClassSetCharacter) {
+    AddMaybeSimpleCaseFoldedRange(ranges, CharacterRange::Singleton(character));
+  }
+
   // Add the range we started building as operand.
   if (!ranges->is_empty() || !strings->empty()) {
-    if (needs_case_folding) {
-      CharacterRange::Canonicalize(ranges);
-      CharacterRange::AddUnicodeCaseEquivalents(ranges, zone());
-    }
     may_contain_strings |= !strings->empty();
     operands->Add(zone()->template New<RegExpClassSetOperand>(ranges, strings),
                   zone());
@@ -2773,8 +2901,7 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassUnion(
       may_contain_strings, operands);
 }
 
-// TODO(v8:11935): Change permalink once proposal is in stage 4.
-// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassIntersection
+// https://tc39.es/ecma262/#prod-ClassIntersection
 template <class CharT>
 RegExpTree* RegExpParserImpl<CharT>::ParseClassIntersection(
     const RegExpBuilder* builder, bool is_negated, RegExpTree* first_operand,
@@ -2815,8 +2942,7 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassIntersection(
       may_contain_strings, operands);
 }
 
-// TODO(v8:11935): Change permalink once proposal is in stage 4.
-// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassSubtraction
+// https://tc39.es/ecma262/#prod-ClassSubtraction
 template <class CharT>
 RegExpTree* RegExpParserImpl<CharT>::ParseClassSubtraction(
     const RegExpBuilder* builder, bool is_negated, RegExpTree* first_operand,
@@ -2891,12 +3017,16 @@ RegExpTree* RegExpParserImpl<CharT>::ParseCharacterClass(
     ClassSetOperandType operand_type;
     CharacterClassStrings* strings =
         zone()->template New<CharacterClassStrings>(zone());
-    RegExpTree* operand = ParseClassSetOperand(builder, &operand_type, ranges,
-                                               strings CHECK_FAILED);
+    base::uc32 character;
+    RegExpTree* operand = ParseClassSetOperand(
+        builder, &operand_type, ranges, strings, &character CHECK_FAILED);
     switch (current()) {
       case '-':
         if (Next() == '-') {
           if (operand == nullptr) {
+            if (operand_type == ClassSetOperandType::kClassSetCharacter) {
+              ranges->Add(CharacterRange::Singleton(character), zone());
+            }
             operand =
                 zone()->template New<RegExpClassSetOperand>(ranges, strings);
           }
@@ -2908,6 +3038,9 @@ RegExpTree* RegExpParserImpl<CharT>::ParseCharacterClass(
       case '&':
         if (Next() == '&') {
           if (operand == nullptr) {
+            if (operand_type == ClassSetOperandType::kClassSetCharacter) {
+              ranges->Add(CharacterRange::Singleton(character), zone());
+            }
             operand =
                 zone()->template New<RegExpClassSetOperand>(ranges, strings);
           }
@@ -2916,7 +3049,7 @@ RegExpTree* RegExpParserImpl<CharT>::ParseCharacterClass(
         }
     }
     return ParseClassUnion(builder, is_negated, operand, operand_type, ranges,
-                           strings);
+                           strings, character);
   }
 }
 
@@ -3047,7 +3180,7 @@ bool RegExpBuilder::AddQuantifierToAtom(
   RegExpTree* atom = text_builder().PopLastAtom();
   if (atom != nullptr) {
     FlushText();
-  } else if (terms_.size() > 0) {
+  } else if (!terms_.empty()) {
     atom = terms_.back();
     terms_.pop_back();
     if (atom->IsLookaround()) {
diff --git a/js/src/irregexp/imported/regexp.h b/js/src/irregexp/imported/regexp.h
index 50269a4b71..5dc9070ed9 100644
--- a/js/src/irregexp/imported/regexp.h
+++ b/js/src/irregexp/imported/regexp.h
@@ -87,8 +87,8 @@ class RegExp final : public AllStatic {
       RegExpFlags flags, uint32_t backtrack_limit);
 
   // Ensures that a regexp is fully compiled and ready to be executed on a
-  // subject string.  Returns true on success. Return false on failure, and
-  // then an exception will be pending.
+  // subject string.  Returns true on success. Throw and return false on
+  // failure.
   V8_WARN_UNUSED_RESULT static bool EnsureFullyCompiled(Isolate* isolate,
                                                         Handle<JSRegExp> re,
                                                         Handle<String> subject);
@@ -211,14 +211,16 @@ class RegExpResultsCache final : public AllStatic {
 
   // Attempt to retrieve a cached result.  On failure, 0 is returned as a Smi.
   // On success, the returned result is guaranteed to be a COW-array.
-  static Object Lookup(Heap* heap, String key_string, Object key_pattern,
-                       FixedArray* last_match_out, ResultsCacheType type);
+  static Tagged<Object> Lookup(Heap* heap, Tagged<String> key_string,
+                               Tagged<Object> key_pattern,
+                               Tagged<FixedArray>* last_match_out,
+                               ResultsCacheType type);
   // Attempt to add value_array to the cache specified by type.  On success,
   // value_array is turned into a COW-array.
   static void Enter(Isolate* isolate, Handle<String> key_string,
                     Handle<Object> key_pattern, Handle<FixedArray> value_array,
                     Handle<FixedArray> last_match_cache, ResultsCacheType type);
-  static void Clear(FixedArray cache);
+  static void Clear(Tagged<FixedArray> cache);
 
   static constexpr int kRegExpResultsCacheSize = 0x100;
 
diff --git a/js/src/irregexp/imported/special-case.cc b/js/src/irregexp/imported/special-case.cc
index f5a9928b3a..d40ada6bb9 100644
--- a/js/src/irregexp/imported/special-case.cc
+++ b/js/src/irregexp/imported/special-case.cc
@@ -82,29 +82,6 @@ const icu::UnicodeSet& RegExpCaseFolding::SpecialAddSet() {
   return set.Pointer()->set;
 }
 
-icu::UnicodeSet BuildUnicodeNonSimpleCloseOverSet() {
-  icu::UnicodeSet set;
-  set.add(0x390);
-  set.add(0x3b0);
-  set.add(0x1fd3);
-  set.add(0x1fe3);
-  set.add(0xfb05, 0xfb06);
-  set.freeze();
-  return set;
-}
-
-struct UnicodeNonSimpleCloseOverSetData {
-  UnicodeNonSimpleCloseOverSetData() : set(BuildUnicodeNonSimpleCloseOverSet()) {}
-  const icu::UnicodeSet set;
-};
-
-//static
-const icu::UnicodeSet& RegExpCaseFolding::UnicodeNonSimpleCloseOverSet() {
-  static base::LazyInstance<UnicodeNonSimpleCloseOverSetData>::type set =
-      LAZY_INSTANCE_INITIALIZER;
-  return set.Pointer()->set;
-}
-
 
 }  // namespace internal
 }  // namespace v8
diff --git a/js/src/irregexp/imported/special-case.h b/js/src/irregexp/imported/special-case.h
index ea511af5a4..050d72a064 100644
--- a/js/src/irregexp/imported/special-case.h
+++ b/js/src/irregexp/imported/special-case.h
@@ -70,21 +70,11 @@ namespace internal {
 // another character. Characters that match no other characters in
 // their equivalence class are added to IgnoreSet. Characters that
 // match at least one other character are added to SpecialAddSet.
-//
-// For unicode ignoreCase ("iu" and "iv"),
-// UnicodeSet::closeOver(USET_CASE_INSENSITIVE) adds all characters that are in
-// the same equivalence class. This includes characaters that are in the same
-// equivalence class using full case folding. According to the spec, only
-// simple case folding shall be considered. We therefore create
-// UnicodeNonSimpleCloseOverSet containing all characters for which
-// UnicodeSet::closeOver adds characters that are not simple case folds. This
-// set should be used similar to IgnoreSet described above.
 
 class RegExpCaseFolding final : public AllStatic {
  public:
   static const icu::UnicodeSet& IgnoreSet();
   static const icu::UnicodeSet& SpecialAddSet();
-  static const icu::UnicodeSet& UnicodeNonSimpleCloseOverSet();
 
   // This implements ECMAScript 2020 21.2.2.8.2 (Runtime Semantics:
   // Canonicalize) step 3, which is used to determine whether
diff --git a/js/src/irregexp/moz.build b/js/src/irregexp/moz.build
index ff030ad4bd..2c363ad349 100644
--- a/js/src/irregexp/moz.build
+++ b/js/src/irregexp/moz.build
@@ -14,9 +14,13 @@ include("../js-cxxflags.mozbuild")
 
 CXXFLAGS += ["-Wno-error=type-limits", "-Wno-error=return-type"]
 
-# Suppress spurious warnings in third-party code. See bug 1810584.
+# Suppress spurious warnings in third-party code.
+# See bug 1810584 and bug 1879225.
 if CONFIG["CC_TYPE"] == "gcc":
-    CXXFLAGS += ["-Wno-error=nonnull"]
+    CXXFLAGS += ["-Wno-error=nonnull", "-Wno-narrowing"]
+if CONFIG["CC_TYPE"] in ("clang", "clang-cl"):
+    CXXFLAGS += ["-Wno-c++11-narrowing"]
+
 
 UNIFIED_SOURCES += [
     "imported/regexp-bytecode-generator.cc",
diff --git a/js/src/irregexp/moz.yaml b/js/src/irregexp/moz.yaml
index e230a89cfd..ca44833c24 100644
--- a/js/src/irregexp/moz.yaml
+++ b/js/src/irregexp/moz.yaml
@@ -9,8 +9,8 @@ origin:
   description: A fast regular expression engine from V8
   url: https://v8.dev
 
-  release: 30a887aeb92153885619d8bb9fa57cda7adf9276 (Thu Jul 06 11:42:30 2023).
-  revision: 30a887aeb92153885619d8bb9fa57cda7adf9276
+  release: e50ab13bbfaaf72717fd73d9a01434e4c3c1a0a8 (Thu Feb 29 03:38:59 2024).
+  revision: e50ab13bbfaaf72717fd73d9a01434e4c3c1a0a8
 
   license: BSD-3-Clause
   license-file: LICENSE.v8
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 01:13:33 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 01:13:33 +0000
commit	086c044dc34dfc0f74fbe41f4ecb402b2cd34884 (patch)
tree	a4f824bd33cb075dd5aa3eb5a0a94af221bbe83a /js/src/irregexp
parent	Adding debian version 124.0.1-1. (diff)
download	firefox-086c044dc34dfc0f74fbe41f4ecb402b2cd34884.tar.xz firefox-086c044dc34dfc0f74fbe41f4ecb402b2cd34884.zip