summaryrefslogtreecommitdiffstats
path: root/js/src/vm/RegExpObject.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'js/src/vm/RegExpObject.cpp')
-rw-r--r--js/src/vm/RegExpObject.cpp1376
1 files changed, 1376 insertions, 0 deletions
diff --git a/js/src/vm/RegExpObject.cpp b/js/src/vm/RegExpObject.cpp
new file mode 100644
index 0000000000..256aade5f8
--- /dev/null
+++ b/js/src/vm/RegExpObject.cpp
@@ -0,0 +1,1376 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "vm/RegExpObject.h"
+
+#include "mozilla/MemoryReporting.h"
+#include "mozilla/PodOperations.h"
+
+#include <type_traits>
+
+#include "builtin/RegExp.h"
+#include "builtin/SelfHostingDefines.h" // REGEXP_*_FLAG
+#include "frontend/FrontendContext.h" // AutoReportFrontendContext
+#include "frontend/TokenStream.h"
+#include "gc/HashUtil.h"
+#include "irregexp/RegExpAPI.h"
+#include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_*
+#include "js/friend/StackLimits.h" // js::ReportOverRecursed
+#include "js/Object.h" // JS::GetBuiltinClass
+#include "js/Printer.h" // js::GenericPrinter
+#include "js/RegExp.h"
+#include "js/RegExpFlags.h" // JS::RegExpFlags
+#include "util/StringBuffer.h"
+#include "util/Unicode.h"
+#include "vm/JSONPrinter.h" // js::JSONPrinter
+#include "vm/MatchPairs.h"
+#include "vm/PlainObject.h"
+#include "vm/RegExpStatics.h"
+#include "vm/StringType.h"
+
+#include "vm/JSContext-inl.h"
+#include "vm/JSObject-inl.h"
+#include "vm/NativeObject-inl.h"
+#include "vm/Shape-inl.h"
+
+using namespace js;
+
+using JS::AutoStableStringChars;
+using JS::CompileOptions;
+using JS::RegExpFlag;
+using JS::RegExpFlags;
+using mozilla::DebugOnly;
+using mozilla::PodCopy;
+
+using JS::AutoCheckCannotGC;
+
+static_assert(RegExpFlag::HasIndices == REGEXP_HASINDICES_FLAG,
+ "self-hosted JS and /d flag bits must agree");
+static_assert(RegExpFlag::Global == REGEXP_GLOBAL_FLAG,
+ "self-hosted JS and /g flag bits must agree");
+static_assert(RegExpFlag::IgnoreCase == REGEXP_IGNORECASE_FLAG,
+ "self-hosted JS and /i flag bits must agree");
+static_assert(RegExpFlag::Multiline == REGEXP_MULTILINE_FLAG,
+ "self-hosted JS and /m flag bits must agree");
+static_assert(RegExpFlag::DotAll == REGEXP_DOTALL_FLAG,
+ "self-hosted JS and /s flag bits must agree");
+static_assert(RegExpFlag::Unicode == REGEXP_UNICODE_FLAG,
+ "self-hosted JS and /u flag bits must agree");
+static_assert(RegExpFlag::UnicodeSets == REGEXP_UNICODESETS_FLAG,
+ "self-hosted JS and /v flag bits must agree");
+static_assert(RegExpFlag::Sticky == REGEXP_STICKY_FLAG,
+ "self-hosted JS and /y flag bits must agree");
+
+RegExpObject* js::RegExpAlloc(JSContext* cx, NewObjectKind newKind,
+ HandleObject proto /* = nullptr */) {
+ Rooted<RegExpObject*> regexp(
+ cx, NewObjectWithClassProtoAndKind<RegExpObject>(cx, proto, newKind));
+ if (!regexp) {
+ return nullptr;
+ }
+
+ if (!SharedShape::ensureInitialCustomShape<RegExpObject>(cx, regexp)) {
+ return nullptr;
+ }
+
+ MOZ_ASSERT(regexp->lookupPure(cx->names().lastIndex)->slot() ==
+ RegExpObject::lastIndexSlot());
+
+ return regexp;
+}
+
+/* MatchPairs */
+
+bool VectorMatchPairs::initArrayFrom(VectorMatchPairs& copyFrom) {
+ MOZ_ASSERT(copyFrom.pairCount() > 0);
+
+ if (!allocOrExpandArray(copyFrom.pairCount())) {
+ return false;
+ }
+
+ PodCopy(pairs_, copyFrom.pairs_, pairCount_);
+
+ return true;
+}
+
+bool VectorMatchPairs::allocOrExpandArray(size_t pairCount) {
+ if (!vec_.resizeUninitialized(pairCount)) {
+ return false;
+ }
+
+ pairs_ = &vec_[0];
+ pairCount_ = pairCount;
+ return true;
+}
+
+/* RegExpObject */
+
+/* static */
+RegExpShared* RegExpObject::getShared(JSContext* cx,
+ Handle<RegExpObject*> regexp) {
+ if (regexp->hasShared()) {
+ return regexp->getShared();
+ }
+
+ return createShared(cx, regexp);
+}
+
+/* static */
+bool RegExpObject::isOriginalFlagGetter(JSNative native, RegExpFlags* mask) {
+ if (native == regexp_hasIndices) {
+ *mask = RegExpFlag::HasIndices;
+ return true;
+ }
+ if (native == regexp_global) {
+ *mask = RegExpFlag::Global;
+ return true;
+ }
+ if (native == regexp_ignoreCase) {
+ *mask = RegExpFlag::IgnoreCase;
+ return true;
+ }
+ if (native == regexp_multiline) {
+ *mask = RegExpFlag::Multiline;
+ return true;
+ }
+ if (native == regexp_dotAll) {
+ *mask = RegExpFlag::DotAll;
+ return true;
+ }
+ if (native == regexp_sticky) {
+ *mask = RegExpFlag::Sticky;
+ return true;
+ }
+ if (native == regexp_unicode) {
+ *mask = RegExpFlag::Unicode;
+ return true;
+ }
+ if (native == regexp_unicodeSets) {
+ *mask = RegExpFlag::UnicodeSets;
+ return true;
+ }
+
+ return false;
+}
+
+static bool FinishRegExpClassInit(JSContext* cx, JS::HandleObject ctor,
+ JS::HandleObject proto) {
+#ifdef DEBUG
+ // Assert RegExp.prototype.exec is usually stored in a dynamic slot. The
+ // optimization in InlinableNativeIRGenerator::tryAttachIntrinsicRegExpExec
+ // depends on this.
+ Handle<NativeObject*> nproto = proto.as<NativeObject>();
+ auto prop = nproto->lookupPure(cx->names().exec);
+ MOZ_ASSERT(prop->isDataProperty());
+ MOZ_ASSERT(!nproto->isFixedSlot(prop->slot()));
+#endif
+ return true;
+}
+
+static const ClassSpec RegExpObjectClassSpec = {
+ GenericCreateConstructor<js::regexp_construct, 2, gc::AllocKind::FUNCTION>,
+ GenericCreatePrototype<RegExpObject>,
+ nullptr,
+ js::regexp_static_props,
+ js::regexp_methods,
+ js::regexp_properties,
+ FinishRegExpClassInit};
+
+const JSClass RegExpObject::class_ = {
+ "RegExp",
+ JSCLASS_HAS_RESERVED_SLOTS(RegExpObject::RESERVED_SLOTS) |
+ JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp),
+ JS_NULL_CLASS_OPS, &RegExpObjectClassSpec};
+
+const JSClass RegExpObject::protoClass_ = {
+ "RegExp.prototype", JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp),
+ JS_NULL_CLASS_OPS, &RegExpObjectClassSpec};
+
+template <typename CharT>
+RegExpObject* RegExpObject::create(JSContext* cx, const CharT* chars,
+ size_t length, RegExpFlags flags,
+ NewObjectKind newKind) {
+ static_assert(std::is_same_v<CharT, char16_t>,
+ "this code may need updating if/when CharT encodes UTF-8");
+
+ Rooted<JSAtom*> source(cx, AtomizeChars(cx, chars, length));
+ if (!source) {
+ return nullptr;
+ }
+
+ return create(cx, source, flags, newKind);
+}
+
+template RegExpObject* RegExpObject::create(JSContext* cx,
+ const char16_t* chars,
+ size_t length, RegExpFlags flags,
+ NewObjectKind newKind);
+
+RegExpObject* RegExpObject::createSyntaxChecked(JSContext* cx,
+ Handle<JSAtom*> source,
+ RegExpFlags flags,
+ NewObjectKind newKind) {
+ Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, newKind));
+ if (!regexp) {
+ return nullptr;
+ }
+
+ regexp->initAndZeroLastIndex(source, flags, cx);
+
+ return regexp;
+}
+
+RegExpObject* RegExpObject::create(JSContext* cx, Handle<JSAtom*> source,
+ RegExpFlags flags, NewObjectKind newKind) {
+ Rooted<RegExpObject*> regexp(cx);
+ {
+ AutoReportFrontendContext fc(cx);
+ CompileOptions dummyOptions(cx);
+ frontend::DummyTokenStream dummyTokenStream(&fc, dummyOptions);
+
+ LifoAllocScope allocScope(&cx->tempLifoAlloc());
+ if (!irregexp::CheckPatternSyntax(cx, cx->stackLimitForCurrentPrincipal(),
+ dummyTokenStream, source, flags)) {
+ return nullptr;
+ }
+
+ regexp = RegExpAlloc(cx, newKind);
+ if (!regexp) {
+ return nullptr;
+ }
+
+ regexp->initAndZeroLastIndex(source, flags, cx);
+
+ MOZ_ASSERT(!regexp->hasShared());
+ }
+ return regexp;
+}
+
+/* static */
+RegExpShared* RegExpObject::createShared(JSContext* cx,
+ Handle<RegExpObject*> regexp) {
+ MOZ_ASSERT(!regexp->hasShared());
+ Rooted<JSAtom*> source(cx, regexp->getSource());
+ RegExpShared* shared =
+ cx->zone()->regExps().get(cx, source, regexp->getFlags());
+ if (!shared) {
+ return nullptr;
+ }
+
+ regexp->setShared(shared);
+
+ MOZ_ASSERT(regexp->hasShared());
+
+ return shared;
+}
+
+SharedShape* RegExpObject::assignInitialShape(JSContext* cx,
+ Handle<RegExpObject*> self) {
+ MOZ_ASSERT(self->empty());
+
+ static_assert(LAST_INDEX_SLOT == 0);
+
+ /* The lastIndex property alone is writable but non-configurable. */
+ if (!NativeObject::addPropertyInReservedSlot(cx, self, cx->names().lastIndex,
+ LAST_INDEX_SLOT,
+ {PropertyFlag::Writable})) {
+ return nullptr;
+ }
+
+ return self->sharedShape();
+}
+
+void RegExpObject::initIgnoringLastIndex(JSAtom* source, RegExpFlags flags) {
+ // If this is a re-initialization with an existing RegExpShared, 'flags'
+ // may not match getShared()->flags, so forget the RegExpShared.
+ clearShared();
+
+ setSource(source);
+ setFlags(flags);
+}
+
+void RegExpObject::initAndZeroLastIndex(JSAtom* source, RegExpFlags flags,
+ JSContext* cx) {
+ initIgnoringLastIndex(source, flags);
+ zeroLastIndex(cx);
+}
+
+#if defined(DEBUG) || defined(JS_JITSPEW)
+template <typename KnownF, typename UnknownF>
+void ForEachRegExpFlag(JS::RegExpFlags flags, KnownF known, UnknownF unknown) {
+ uint8_t raw = flags.value();
+
+ for (uint8_t i = 1; i; i = i << 1) {
+ if (!(raw & i)) {
+ continue;
+ }
+ switch (raw & i) {
+ case RegExpFlag::HasIndices:
+ known("HasIndices", "d");
+ break;
+ case RegExpFlag::Global:
+ known("Global", "g");
+ break;
+ case RegExpFlag::IgnoreCase:
+ known("IgnoreCase", "i");
+ break;
+ case RegExpFlag::Multiline:
+ known("Multiline", "m");
+ break;
+ case RegExpFlag::DotAll:
+ known("DotAll", "s");
+ break;
+ case RegExpFlag::Unicode:
+ known("Unicode", "u");
+ break;
+ case RegExpFlag::Sticky:
+ known("Sticky", "y");
+ break;
+ default:
+ unknown(i);
+ break;
+ }
+ }
+}
+
+void RegExpObject::dumpOwnFields(js::JSONPrinter& json) const {
+ {
+ js::GenericPrinter& out = json.beginStringProperty("source");
+ getSource()->dumpPropertyName(out);
+ json.endStringProperty();
+ }
+
+ json.beginInlineListProperty("flags");
+ ForEachRegExpFlag(
+ getFlags(),
+ [&](const char* name, const char* c) { json.value("%s", name); },
+ [&](uint8_t value) { json.value("Unknown(%02x)", value); });
+ json.endInlineList();
+
+ {
+ js::GenericPrinter& out = json.beginStringProperty("lastIndex");
+ getLastIndex().dumpStringContent(out);
+ json.endStringProperty();
+ }
+}
+
+void RegExpObject::dumpOwnStringContent(js::GenericPrinter& out) const {
+ out.put("/");
+
+ getSource()->dumpCharsNoQuote(out);
+
+ out.put("/");
+
+ ForEachRegExpFlag(
+ getFlags(), [&](const char* name, const char* c) { out.put(c); },
+ [&](uint8_t value) {});
+}
+#endif /* defined(DEBUG) || defined(JS_JITSPEW) */
+
+static MOZ_ALWAYS_INLINE bool IsRegExpLineTerminator(const JS::Latin1Char c) {
+ return c == '\n' || c == '\r';
+}
+
+static MOZ_ALWAYS_INLINE bool IsRegExpLineTerminator(const char16_t c) {
+ return c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029;
+}
+
+static MOZ_ALWAYS_INLINE bool AppendEscapedLineTerminator(
+ StringBuffer& sb, const JS::Latin1Char c) {
+ switch (c) {
+ case '\n':
+ if (!sb.append('n')) {
+ return false;
+ }
+ break;
+ case '\r':
+ if (!sb.append('r')) {
+ return false;
+ }
+ break;
+ default:
+ MOZ_CRASH("Bad LineTerminator");
+ }
+ return true;
+}
+
+static MOZ_ALWAYS_INLINE bool AppendEscapedLineTerminator(StringBuffer& sb,
+ const char16_t c) {
+ switch (c) {
+ case '\n':
+ if (!sb.append('n')) {
+ return false;
+ }
+ break;
+ case '\r':
+ if (!sb.append('r')) {
+ return false;
+ }
+ break;
+ case 0x2028:
+ if (!sb.append("u2028")) {
+ return false;
+ }
+ break;
+ case 0x2029:
+ if (!sb.append("u2029")) {
+ return false;
+ }
+ break;
+ default:
+ MOZ_CRASH("Bad LineTerminator");
+ }
+ return true;
+}
+
+template <typename CharT>
+static MOZ_ALWAYS_INLINE bool SetupBuffer(StringBuffer& sb,
+ const CharT* oldChars, size_t oldLen,
+ const CharT* it) {
+ if constexpr (std::is_same_v<CharT, char16_t>) {
+ if (!sb.ensureTwoByteChars()) {
+ return false;
+ }
+ }
+
+ if (!sb.reserve(oldLen + 1)) {
+ return false;
+ }
+
+ sb.infallibleAppend(oldChars, size_t(it - oldChars));
+ return true;
+}
+
+// Note: leaves the string buffer empty if no escaping need be performed.
+template <typename CharT>
+static bool EscapeRegExpPattern(StringBuffer& sb, const CharT* oldChars,
+ size_t oldLen) {
+ bool inBrackets = false;
+ bool previousCharacterWasBackslash = false;
+
+ for (const CharT* it = oldChars; it < oldChars + oldLen; ++it) {
+ CharT ch = *it;
+ if (!previousCharacterWasBackslash) {
+ if (inBrackets) {
+ if (ch == ']') {
+ inBrackets = false;
+ }
+ } else if (ch == '/') {
+ // There's a forward slash that needs escaping.
+ if (sb.empty()) {
+ // This is the first char we've seen that needs escaping,
+ // copy everything up to this point.
+ if (!SetupBuffer(sb, oldChars, oldLen, it)) {
+ return false;
+ }
+ }
+ if (!sb.append('\\')) {
+ return false;
+ }
+ } else if (ch == '[') {
+ inBrackets = true;
+ }
+ }
+
+ if (IsRegExpLineTerminator(ch)) {
+ // There's LineTerminator that needs escaping.
+ if (sb.empty()) {
+ // This is the first char we've seen that needs escaping,
+ // copy everything up to this point.
+ if (!SetupBuffer(sb, oldChars, oldLen, it)) {
+ return false;
+ }
+ }
+ if (!previousCharacterWasBackslash) {
+ if (!sb.append('\\')) {
+ return false;
+ }
+ }
+ if (!AppendEscapedLineTerminator(sb, ch)) {
+ return false;
+ }
+ } else if (!sb.empty()) {
+ if (!sb.append(ch)) {
+ return false;
+ }
+ }
+
+ if (previousCharacterWasBackslash) {
+ previousCharacterWasBackslash = false;
+ } else if (ch == '\\') {
+ previousCharacterWasBackslash = true;
+ }
+ }
+
+ return true;
+}
+
+// ES6 draft rev32 21.2.3.2.4.
+JSLinearString* js::EscapeRegExpPattern(JSContext* cx, Handle<JSAtom*> src) {
+ // Step 2.
+ if (src->length() == 0) {
+ return cx->names().emptyRegExp_;
+ }
+
+ // We may never need to use |sb|. Start using it lazily.
+ JSStringBuilder sb(cx);
+ bool escapeFailed = false;
+ if (src->hasLatin1Chars()) {
+ JS::AutoCheckCannotGC nogc;
+ escapeFailed =
+ !::EscapeRegExpPattern(sb, src->latin1Chars(nogc), src->length());
+ } else {
+ JS::AutoCheckCannotGC nogc;
+ escapeFailed =
+ !::EscapeRegExpPattern(sb, src->twoByteChars(nogc), src->length());
+ }
+ if (escapeFailed) {
+ return nullptr;
+ }
+
+ // Step 3.
+ if (sb.empty()) {
+ return src;
+ }
+ return sb.finishString();
+}
+
+// ES6 draft rev32 21.2.5.14. Optimized for RegExpObject.
+JSLinearString* RegExpObject::toString(JSContext* cx,
+ Handle<RegExpObject*> obj) {
+ // Steps 3-4.
+ Rooted<JSAtom*> src(cx, obj->getSource());
+ if (!src) {
+ return nullptr;
+ }
+ Rooted<JSLinearString*> escapedSrc(cx, EscapeRegExpPattern(cx, src));
+
+ // Step 7.
+ JSStringBuilder sb(cx);
+ size_t len = escapedSrc->length();
+ if (!sb.reserve(len + 2)) {
+ return nullptr;
+ }
+ sb.infallibleAppend('/');
+ if (!sb.append(escapedSrc)) {
+ return nullptr;
+ }
+ sb.infallibleAppend('/');
+
+ // Steps 5-7.
+ if (obj->hasIndices() && !sb.append('d')) {
+ return nullptr;
+ }
+ if (obj->global() && !sb.append('g')) {
+ return nullptr;
+ }
+ if (obj->ignoreCase() && !sb.append('i')) {
+ return nullptr;
+ }
+ if (obj->multiline() && !sb.append('m')) {
+ return nullptr;
+ }
+ if (obj->dotAll() && !sb.append('s')) {
+ return nullptr;
+ }
+ if (obj->unicode() && !sb.append('u')) {
+ return nullptr;
+ }
+ if (obj->unicodeSets() && !sb.append('v')) {
+ return nullptr;
+ }
+ if (obj->sticky() && !sb.append('y')) {
+ return nullptr;
+ }
+
+ return sb.finishString();
+}
+
+template <typename CharT>
+static MOZ_ALWAYS_INLINE bool IsRegExpMetaChar(CharT ch) {
+ switch (ch) {
+ /* ES 2016 draft Mar 25, 2016 21.2.1 SyntaxCharacter. */
+ case '^':
+ case '$':
+ case '\\':
+ case '.':
+ case '*':
+ case '+':
+ case '?':
+ case '(':
+ case ')':
+ case '[':
+ case ']':
+ case '{':
+ case '}':
+ case '|':
+ return true;
+ default:
+ return false;
+ }
+}
+
+template <typename CharT>
+bool js::HasRegExpMetaChars(const CharT* chars, size_t length) {
+ for (size_t i = 0; i < length; ++i) {
+ if (IsRegExpMetaChar<CharT>(chars[i])) {
+ return true;
+ }
+ }
+ return false;
+}
+
+template bool js::HasRegExpMetaChars<Latin1Char>(const Latin1Char* chars,
+ size_t length);
+
+template bool js::HasRegExpMetaChars<char16_t>(const char16_t* chars,
+ size_t length);
+
+bool js::StringHasRegExpMetaChars(JSLinearString* str) {
+ AutoCheckCannotGC nogc;
+ if (str->hasLatin1Chars()) {
+ return HasRegExpMetaChars(str->latin1Chars(nogc), str->length());
+ }
+
+ return HasRegExpMetaChars(str->twoByteChars(nogc), str->length());
+}
+
+/* RegExpShared */
+
+RegExpShared::RegExpShared(JSAtom* source, RegExpFlags flags)
+ : CellWithTenuredGCPointer(source), pairCount_(0), flags(flags) {}
+
+void RegExpShared::traceChildren(JSTracer* trc) {
+ TraceNullableCellHeaderEdge(trc, this, "RegExpShared source");
+ if (kind() == RegExpShared::Kind::Atom) {
+ TraceNullableEdge(trc, &patternAtom_, "RegExpShared pattern atom");
+ } else {
+ for (auto& comp : compilationArray) {
+ TraceNullableEdge(trc, &comp.jitCode, "RegExpShared code");
+ }
+ TraceNullableEdge(trc, &groupsTemplate_, "RegExpShared groups template");
+ }
+}
+
+void RegExpShared::discardJitCode() {
+ for (auto& comp : compilationArray) {
+ comp.jitCode = nullptr;
+ }
+
+ // We can also purge the tables used by JIT code.
+ tables.clearAndFree();
+}
+
+void RegExpShared::finalize(JS::GCContext* gcx) {
+ for (auto& comp : compilationArray) {
+ if (comp.byteCode) {
+ size_t length = comp.byteCodeLength();
+ gcx->free_(this, comp.byteCode, length, MemoryUse::RegExpSharedBytecode);
+ }
+ }
+ if (namedCaptureIndices_) {
+ size_t length = numNamedCaptures() * sizeof(uint32_t);
+ gcx->free_(this, namedCaptureIndices_, length,
+ MemoryUse::RegExpSharedNamedCaptureData);
+ }
+ tables.~JitCodeTables();
+}
+
+/* static */
+bool RegExpShared::compileIfNecessary(JSContext* cx,
+ MutableHandleRegExpShared re,
+ Handle<JSLinearString*> input,
+ RegExpShared::CodeKind codeKind) {
+ if (codeKind == RegExpShared::CodeKind::Any) {
+ // We start by interpreting regexps, then compile them once they are
+ // sufficiently hot. For very long input strings, we tier up eagerly.
+ codeKind = RegExpShared::CodeKind::Bytecode;
+ if (re->markedForTierUp() || input->length() > 1000) {
+ codeKind = RegExpShared::CodeKind::Jitcode;
+ }
+ }
+
+ // Fall back to bytecode if native codegen is not available.
+ if (!IsNativeRegExpEnabled() && codeKind == RegExpShared::CodeKind::Jitcode) {
+ codeKind = RegExpShared::CodeKind::Bytecode;
+ }
+
+ bool needsCompile = false;
+ if (re->kind() == RegExpShared::Kind::Unparsed) {
+ needsCompile = true;
+ }
+ if (re->kind() == RegExpShared::Kind::RegExp) {
+ if (!re->isCompiled(input->hasLatin1Chars(), codeKind)) {
+ needsCompile = true;
+ }
+ }
+ if (needsCompile) {
+ return irregexp::CompilePattern(cx, re, input, codeKind);
+ }
+ return true;
+}
+
+/* static */
+RegExpRunStatus RegExpShared::execute(JSContext* cx,
+ MutableHandleRegExpShared re,
+ Handle<JSLinearString*> input,
+ size_t start, VectorMatchPairs* matches) {
+ MOZ_ASSERT(matches);
+
+ // TODO: Add tracelogger support
+
+ /* Compile the code at point-of-use. */
+ if (!compileIfNecessary(cx, re, input, RegExpShared::CodeKind::Any)) {
+ return RegExpRunStatus::Error;
+ }
+
+ /*
+ * Ensure sufficient memory for output vector.
+ * No need to initialize it. The RegExp engine fills them in on a match.
+ */
+ if (!matches->allocOrExpandArray(re->pairCount())) {
+ ReportOutOfMemory(cx);
+ return RegExpRunStatus::Error;
+ }
+
+ if (re->kind() == RegExpShared::Kind::Atom) {
+ return RegExpShared::executeAtom(re, input, start, matches);
+ }
+
+ /*
+ * Ensure sufficient memory for output vector.
+ * No need to initialize it. The RegExp engine fills them in on a match.
+ */
+ if (!matches->allocOrExpandArray(re->pairCount())) {
+ ReportOutOfMemory(cx);
+ return RegExpRunStatus::Error;
+ }
+
+ uint32_t interruptRetries = 0;
+ const uint32_t maxInterruptRetries = 4;
+ do {
+ DebugOnly<bool> alreadyThrowing = cx->isExceptionPending();
+ RegExpRunStatus result = irregexp::Execute(cx, re, input, start, matches);
+#ifdef DEBUG
+ // Check if we must simulate the interruption
+ if (js::irregexp::IsolateShouldSimulateInterrupt(cx->isolate)) {
+ js::irregexp::IsolateClearShouldSimulateInterrupt(cx->isolate);
+ cx->requestInterrupt(InterruptReason::CallbackUrgent);
+ }
+#endif
+ if (result == RegExpRunStatus::Error) {
+ /* Execute can return RegExpRunStatus::Error:
+ *
+ * 1. If the native stack overflowed
+ * 2. If the backtrack stack overflowed
+ * 3. If an interrupt was requested during execution.
+ *
+ * In the first two cases, we want to throw an error. In the
+ * third case, we want to handle the interrupt and try again.
+ * We cap the number of times we will retry.
+ */
+ if (cx->isExceptionPending()) {
+ // If this regexp is being executed by recovery instructions
+ // while bailing out to handle an exception, there may already
+ // be an exception pending. If so, just return that exception
+ // instead of reporting a new one.
+ MOZ_ASSERT(alreadyThrowing);
+ return RegExpRunStatus::Error;
+ }
+ if (cx->hasAnyPendingInterrupt()) {
+ if (!CheckForInterrupt(cx)) {
+ return RegExpRunStatus::Error;
+ }
+ if (interruptRetries++ < maxInterruptRetries) {
+ // The initial execution may have been interpreted, or the
+ // interrupt may have triggered a GC that discarded jitcode.
+ // To maximize the chance of succeeding before being
+ // interrupted again, we want to ensure we are compiled.
+ if (!compileIfNecessary(cx, re, input,
+ RegExpShared::CodeKind::Jitcode)) {
+ return RegExpRunStatus::Error;
+ }
+ continue;
+ }
+ }
+ // If we have run out of retries, this regexp takes too long to execute.
+ ReportOverRecursed(cx);
+ return RegExpRunStatus::Error;
+ }
+
+ MOZ_ASSERT(result == RegExpRunStatus::Success ||
+ result == RegExpRunStatus::Success_NotFound);
+
+ return result;
+ } while (true);
+
+ MOZ_CRASH("Unreachable");
+}
+
+void RegExpShared::useAtomMatch(Handle<JSAtom*> pattern) {
+ MOZ_ASSERT(kind() == RegExpShared::Kind::Unparsed);
+ kind_ = RegExpShared::Kind::Atom;
+ patternAtom_ = pattern;
+ pairCount_ = 1;
+}
+
+void RegExpShared::useRegExpMatch(size_t pairCount) {
+ MOZ_ASSERT(kind() == RegExpShared::Kind::Unparsed);
+ kind_ = RegExpShared::Kind::RegExp;
+ pairCount_ = pairCount;
+ ticks_ = jit::JitOptions.regexpWarmUpThreshold;
+}
+
+/* static */
+void RegExpShared::InitializeNamedCaptures(JSContext* cx, HandleRegExpShared re,
+ uint32_t numNamedCaptures,
+ Handle<PlainObject*> templateObject,
+ uint32_t* captureIndices) {
+ MOZ_ASSERT(!re->groupsTemplate_);
+ MOZ_ASSERT(!re->namedCaptureIndices_);
+
+ re->numNamedCaptures_ = numNamedCaptures;
+ re->groupsTemplate_ = templateObject;
+ re->namedCaptureIndices_ = captureIndices;
+
+ uint32_t arraySize = numNamedCaptures * sizeof(uint32_t);
+ js::AddCellMemory(re, arraySize, MemoryUse::RegExpSharedNamedCaptureData);
+}
+
+void RegExpShared::tierUpTick() {
+ MOZ_ASSERT(kind() == RegExpShared::Kind::RegExp);
+ if (ticks_ > 0) {
+ ticks_--;
+ }
+}
+
+bool RegExpShared::markedForTierUp() const {
+ if (!IsNativeRegExpEnabled()) {
+ return false;
+ }
+ if (kind() != RegExpShared::Kind::RegExp) {
+ return false;
+ }
+ return ticks_ == 0;
+}
+
+// When either unicode flag is set and if |index| points to a trail surrogate,
+// step back to the corresponding lead surrogate.
+static size_t StepBackToLeadSurrogate(const JSLinearString* input,
+ size_t index) {
+ // |index| must be a position within a two-byte string, otherwise it can't
+ // point to the trail surrogate of a surrogate pair.
+ if (index == 0 || index >= input->length() || input->hasLatin1Chars()) {
+ return index;
+ }
+
+ /*
+ * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad
+ * 21.2.2.2 step 2.
+ * Let listIndex be the index into Input of the character that was obtained
+ * from element index of str.
+ *
+ * In the spec, pattern match is performed with decoded Unicode code points,
+ * but our implementation performs it with UTF-16 encoded strings. In step 2,
+ * we should decrement lastIndex (index) if it points to a trail surrogate
+ * that has a corresponding lead surrogate.
+ *
+ * var r = /\uD83D\uDC38/ug;
+ * r.lastIndex = 1;
+ * var str = "\uD83D\uDC38";
+ * var result = r.exec(str); // pattern match starts from index 0
+ * print(result.index); // prints 0
+ *
+ * Note: This doesn't match the current spec text and result in different
+ * values for `result.index` under certain conditions. However, the spec will
+ * change to match our implementation's behavior.
+ * See https://github.com/tc39/ecma262/issues/128.
+ */
+ JS::AutoCheckCannotGC nogc;
+ const auto* chars = input->twoByteChars(nogc);
+ if (unicode::IsTrailSurrogate(chars[index]) &&
+ unicode::IsLeadSurrogate(chars[index - 1])) {
+ index--;
+ }
+ return index;
+}
+
+static RegExpRunStatus ExecuteAtomImpl(RegExpShared* re, JSLinearString* input,
+ size_t start, MatchPairs* matches) {
+ MOZ_ASSERT(re->pairCount() == 1);
+ size_t length = input->length();
+ size_t searchLength = re->patternAtom()->length();
+
+ if (re->unicode() || re->unicodeSets()) {
+ start = StepBackToLeadSurrogate(input, start);
+ }
+
+ if (re->sticky()) {
+ // First part checks size_t overflow.
+ if (searchLength + start < searchLength || searchLength + start > length) {
+ return RegExpRunStatus::Success_NotFound;
+ }
+ if (!HasSubstringAt(input, re->patternAtom(), start)) {
+ return RegExpRunStatus::Success_NotFound;
+ }
+
+ (*matches)[0].start = start;
+ (*matches)[0].limit = start + searchLength;
+ matches->checkAgainst(input->length());
+ return RegExpRunStatus::Success;
+ }
+
+ int res = StringFindPattern(input, re->patternAtom(), start);
+ if (res == -1) {
+ return RegExpRunStatus::Success_NotFound;
+ }
+
+ (*matches)[0].start = res;
+ (*matches)[0].limit = res + searchLength;
+ matches->checkAgainst(input->length());
+ return RegExpRunStatus::Success;
+}
+
+RegExpRunStatus js::ExecuteRegExpAtomRaw(RegExpShared* re,
+ JSLinearString* input, size_t start,
+ MatchPairs* matchPairs) {
+ AutoUnsafeCallWithABI unsafe;
+ return ExecuteAtomImpl(re, input, start, matchPairs);
+}
+
+/* static */
+RegExpRunStatus RegExpShared::executeAtom(MutableHandleRegExpShared re,
+ Handle<JSLinearString*> input,
+ size_t start,
+ VectorMatchPairs* matches) {
+ return ExecuteAtomImpl(re, input, start, matches);
+}
+
+size_t RegExpShared::sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) {
+ size_t n = 0;
+
+ for (const auto& compilation : compilationArray) {
+ if (compilation.byteCode) {
+ n += mallocSizeOf(compilation.byteCode);
+ }
+ }
+
+ n += tables.sizeOfExcludingThis(mallocSizeOf);
+ for (size_t i = 0; i < tables.length(); i++) {
+ n += mallocSizeOf(tables[i].get());
+ }
+
+ return n;
+}
+
+/* RegExpRealm */
+
+RegExpRealm::RegExpRealm()
+ : optimizableRegExpPrototypeShape_(nullptr),
+ optimizableRegExpInstanceShape_(nullptr) {
+ for (auto& shape : matchResultShapes_) {
+ shape = nullptr;
+ }
+}
+
+SharedShape* RegExpRealm::createMatchResultShape(JSContext* cx,
+ ResultShapeKind kind) {
+ MOZ_ASSERT(!matchResultShapes_[kind]);
+
+ /* Create template array object */
+ Rooted<ArrayObject*> templateObject(cx, NewDenseEmptyArray(cx));
+ if (!templateObject) {
+ return nullptr;
+ }
+
+ if (kind == ResultShapeKind::Indices) {
+ /* The |indices| array only has a |groups| property. */
+ if (!NativeDefineDataProperty(cx, templateObject, cx->names().groups,
+ UndefinedHandleValue, JSPROP_ENUMERATE)) {
+ return nullptr;
+ }
+ MOZ_ASSERT(templateObject->getLastProperty().slot() == IndicesGroupsSlot);
+
+ matchResultShapes_[kind].set(templateObject->sharedShape());
+ return matchResultShapes_[kind];
+ }
+
+ /* Set dummy index property */
+ if (!NativeDefineDataProperty(cx, templateObject, cx->names().index,
+ UndefinedHandleValue, JSPROP_ENUMERATE)) {
+ return nullptr;
+ }
+ MOZ_ASSERT(templateObject->getLastProperty().slot() ==
+ MatchResultObjectIndexSlot);
+
+ /* Set dummy input property */
+ if (!NativeDefineDataProperty(cx, templateObject, cx->names().input,
+ UndefinedHandleValue, JSPROP_ENUMERATE)) {
+ return nullptr;
+ }
+ MOZ_ASSERT(templateObject->getLastProperty().slot() ==
+ MatchResultObjectInputSlot);
+
+ /* Set dummy groups property */
+ if (!NativeDefineDataProperty(cx, templateObject, cx->names().groups,
+ UndefinedHandleValue, JSPROP_ENUMERATE)) {
+ return nullptr;
+ }
+ MOZ_ASSERT(templateObject->getLastProperty().slot() ==
+ MatchResultObjectGroupsSlot);
+
+ if (kind == ResultShapeKind::WithIndices) {
+ /* Set dummy indices property */
+ if (!NativeDefineDataProperty(cx, templateObject, cx->names().indices,
+ UndefinedHandleValue, JSPROP_ENUMERATE)) {
+ return nullptr;
+ }
+ MOZ_ASSERT(templateObject->getLastProperty().slot() ==
+ MatchResultObjectIndicesSlot);
+ }
+
+#ifdef DEBUG
+ if (kind == ResultShapeKind::Normal) {
+ MOZ_ASSERT(templateObject->numFixedSlots() == 0);
+ MOZ_ASSERT(templateObject->numDynamicSlots() ==
+ MatchResultObjectNumDynamicSlots);
+ MOZ_ASSERT(templateObject->slotSpan() == MatchResultObjectSlotSpan);
+ }
+#endif
+
+ matchResultShapes_[kind].set(templateObject->sharedShape());
+
+ return matchResultShapes_[kind];
+}
+
+void RegExpRealm::trace(JSTracer* trc) {
+ if (regExpStatics) {
+ regExpStatics->trace(trc);
+ }
+
+ for (auto& shape : matchResultShapes_) {
+ TraceNullableEdge(trc, &shape, "RegExpRealm::matchResultShapes_");
+ }
+
+ TraceNullableEdge(trc, &optimizableRegExpPrototypeShape_,
+ "RegExpRealm::optimizableRegExpPrototypeShape_");
+
+ TraceNullableEdge(trc, &optimizableRegExpInstanceShape_,
+ "RegExpRealm::optimizableRegExpInstanceShape_");
+}
+
+RegExpShared* RegExpZone::get(JSContext* cx, Handle<JSAtom*> source,
+ RegExpFlags flags) {
+ DependentAddPtr<Set> p(cx, set_, Key(source, flags));
+ if (p) {
+ return *p;
+ }
+
+ auto* shared = cx->newCell<RegExpShared>(source, flags);
+ if (!shared) {
+ return nullptr;
+ }
+
+ if (!p.add(cx, set_, Key(source, flags), shared)) {
+ return nullptr;
+ }
+
+ return shared;
+}
+
+size_t RegExpZone::sizeOfIncludingThis(
+ mozilla::MallocSizeOf mallocSizeOf) const {
+ return mallocSizeOf(this) + set_.sizeOfExcludingThis(mallocSizeOf);
+}
+
+RegExpZone::RegExpZone(Zone* zone) : set_(zone, zone) {}
+
+/* Functions */
+
+JSObject* js::CloneRegExpObject(JSContext* cx, Handle<RegExpObject*> regex) {
+ constexpr gc::AllocKind allocKind = RegExpObject::AllocKind;
+ static_assert(gc::GetGCKindSlots(allocKind) == RegExpObject::RESERVED_SLOTS);
+ MOZ_ASSERT(regex->asTenured().getAllocKind() == allocKind);
+
+ Rooted<SharedShape*> shape(cx, regex->sharedShape());
+ Rooted<RegExpObject*> clone(cx, NativeObject::create<RegExpObject>(
+ cx, allocKind, gc::Heap::Default, shape));
+ if (!clone) {
+ return nullptr;
+ }
+
+ RegExpShared* shared = RegExpObject::getShared(cx, regex);
+ if (!shared) {
+ return nullptr;
+ }
+
+ clone->initAndZeroLastIndex(shared->getSource(), shared->getFlags(), cx);
+ clone->setShared(shared);
+
+ return clone;
+}
+
+template <typename CharT>
+static bool ParseRegExpFlags(const CharT* chars, size_t length,
+ RegExpFlags* flagsOut, char16_t* invalidFlag) {
+ *flagsOut = RegExpFlag::NoFlags;
+
+ for (size_t i = 0; i < length; i++) {
+ uint8_t flag;
+ switch (chars[i]) {
+ case 'd':
+ flag = RegExpFlag::HasIndices;
+ break;
+ case 'g':
+ flag = RegExpFlag::Global;
+ break;
+ case 'i':
+ flag = RegExpFlag::IgnoreCase;
+ break;
+ case 'm':
+ flag = RegExpFlag::Multiline;
+ break;
+ case 's':
+ flag = RegExpFlag::DotAll;
+ break;
+ case 'u':
+ flag = RegExpFlag::Unicode;
+ break;
+ case 'v':
+ flag = RegExpFlag::UnicodeSets;
+ break;
+ case 'y':
+ flag = RegExpFlag::Sticky;
+ break;
+ default:
+ *invalidFlag = chars[i];
+ return false;
+ }
+ if (*flagsOut & flag) {
+ *invalidFlag = chars[i];
+ return false;
+ }
+
+ // /u and /v flags are mutually exclusive.
+ if (((*flagsOut & RegExpFlag::Unicode) &&
+ (flag & RegExpFlag::UnicodeSets)) ||
+ ((*flagsOut & RegExpFlag::UnicodeSets) &&
+ (flag & RegExpFlag::Unicode))) {
+ *invalidFlag = chars[i];
+ return false;
+ }
+
+ *flagsOut |= flag;
+ }
+
+ return true;
+}
+
+bool js::ParseRegExpFlags(JSContext* cx, JSString* flagStr,
+ RegExpFlags* flagsOut) {
+ JSLinearString* linear = flagStr->ensureLinear(cx);
+ if (!linear) {
+ return false;
+ }
+
+ size_t len = linear->length();
+
+ bool ok;
+ char16_t invalidFlag;
+ if (linear->hasLatin1Chars()) {
+ AutoCheckCannotGC nogc;
+ ok = ::ParseRegExpFlags(linear->latin1Chars(nogc), len, flagsOut,
+ &invalidFlag);
+ } else {
+ AutoCheckCannotGC nogc;
+ ok = ::ParseRegExpFlags(linear->twoByteChars(nogc), len, flagsOut,
+ &invalidFlag);
+ }
+
+ if (!ok) {
+ JS::TwoByteChars range(&invalidFlag, 1);
+ UniqueChars utf8(JS::CharsToNewUTF8CharsZ(cx, range).c_str());
+ if (!utf8) {
+ return false;
+ }
+ JS_ReportErrorNumberUTF8(cx, GetErrorMessage, nullptr,
+ JSMSG_BAD_REGEXP_FLAG, utf8.get());
+ return false;
+ }
+
+ return true;
+}
+
+JS::ubi::Node::Size JS::ubi::Concrete<RegExpShared>::size(
+ mozilla::MallocSizeOf mallocSizeOf) const {
+ return js::gc::Arena::thingSize(gc::AllocKind::REGEXP_SHARED) +
+ get().sizeOfExcludingThis(mallocSizeOf);
+}
+
+/*
+ * Regular Expressions.
+ */
+JS_PUBLIC_API JSObject* JS::NewRegExpObject(JSContext* cx, const char* bytes,
+ size_t length, RegExpFlags flags) {
+ AssertHeapIsIdle();
+ CHECK_THREAD(cx);
+
+ UniqueTwoByteChars chars(InflateString(cx, bytes, length));
+ if (!chars) {
+ return nullptr;
+ }
+
+ return RegExpObject::create(cx, chars.get(), length, flags, GenericObject);
+}
+
+JS_PUBLIC_API JSObject* JS::NewUCRegExpObject(JSContext* cx,
+ const char16_t* chars,
+ size_t length,
+ RegExpFlags flags) {
+ AssertHeapIsIdle();
+ CHECK_THREAD(cx);
+
+ return RegExpObject::create(cx, chars, length, flags, GenericObject);
+}
+
+JS_PUBLIC_API bool JS::SetRegExpInput(JSContext* cx, HandleObject obj,
+ HandleString input) {
+ AssertHeapIsIdle();
+ CHECK_THREAD(cx);
+ cx->check(input);
+
+ Handle<GlobalObject*> global = obj.as<GlobalObject>();
+ RegExpStatics* res = GlobalObject::getRegExpStatics(cx, global);
+ if (!res) {
+ return false;
+ }
+
+ res->reset(input);
+ return true;
+}
+
+JS_PUBLIC_API bool JS::ClearRegExpStatics(JSContext* cx, HandleObject obj) {
+ AssertHeapIsIdle();
+ CHECK_THREAD(cx);
+ MOZ_ASSERT(obj);
+
+ Handle<GlobalObject*> global = obj.as<GlobalObject>();
+ RegExpStatics* res = GlobalObject::getRegExpStatics(cx, global);
+ if (!res) {
+ return false;
+ }
+
+ res->clear();
+ return true;
+}
+
+JS_PUBLIC_API bool JS::ExecuteRegExp(JSContext* cx, HandleObject obj,
+ HandleObject reobj, const char16_t* chars,
+ size_t length, size_t* indexp, bool test,
+ MutableHandleValue rval) {
+ AssertHeapIsIdle();
+ CHECK_THREAD(cx);
+
+ Handle<GlobalObject*> global = obj.as<GlobalObject>();
+ RegExpStatics* res = GlobalObject::getRegExpStatics(cx, global);
+ if (!res) {
+ return false;
+ }
+
+ Rooted<JSLinearString*> input(cx, NewStringCopyN<CanGC>(cx, chars, length));
+ if (!input) {
+ return false;
+ }
+
+ return ExecuteRegExpLegacy(cx, res, reobj.as<RegExpObject>(), input, indexp,
+ test, rval);
+}
+
+JS_PUBLIC_API bool JS::ExecuteRegExpNoStatics(JSContext* cx, HandleObject obj,
+ const char16_t* chars,
+ size_t length, size_t* indexp,
+ bool test,
+ MutableHandleValue rval) {
+ AssertHeapIsIdle();
+ CHECK_THREAD(cx);
+
+ Rooted<JSLinearString*> input(cx, NewStringCopyN<CanGC>(cx, chars, length));
+ if (!input) {
+ return false;
+ }
+
+ return ExecuteRegExpLegacy(cx, nullptr, obj.as<RegExpObject>(), input, indexp,
+ test, rval);
+}
+
+JS_PUBLIC_API bool JS::ObjectIsRegExp(JSContext* cx, HandleObject obj,
+ bool* isRegExp) {
+ cx->check(obj);
+
+ ESClass cls;
+ if (!GetBuiltinClass(cx, obj, &cls)) {
+ return false;
+ }
+
+ *isRegExp = cls == ESClass::RegExp;
+ return true;
+}
+
+JS_PUBLIC_API RegExpFlags JS::GetRegExpFlags(JSContext* cx, HandleObject obj) {
+ AssertHeapIsIdle();
+ CHECK_THREAD(cx);
+
+ RegExpShared* shared = RegExpToShared(cx, obj);
+ if (!shared) {
+ return RegExpFlag::NoFlags;
+ }
+ return shared->getFlags();
+}
+
+JS_PUBLIC_API JSString* JS::GetRegExpSource(JSContext* cx, HandleObject obj) {
+ AssertHeapIsIdle();
+ CHECK_THREAD(cx);
+
+ RegExpShared* shared = RegExpToShared(cx, obj);
+ if (!shared) {
+ return nullptr;
+ }
+ return shared->getSource();
+}
+
+JS_PUBLIC_API bool JS::CheckRegExpSyntax(JSContext* cx, const char16_t* chars,
+ size_t length, RegExpFlags flags,
+ MutableHandleValue error) {
+ AssertHeapIsIdle();
+ CHECK_THREAD(cx);
+
+ AutoReportFrontendContext fc(cx);
+ CompileOptions dummyOptions(cx);
+ frontend::DummyTokenStream dummyTokenStream(&fc, dummyOptions);
+
+ LifoAllocScope allocScope(&cx->tempLifoAlloc());
+
+ mozilla::Range<const char16_t> source(chars, length);
+ bool success = irregexp::CheckPatternSyntax(
+ cx->tempLifoAlloc(), cx->stackLimitForCurrentPrincipal(),
+ dummyTokenStream, source, flags);
+ error.set(UndefinedValue());
+ if (!success) {
+ if (!fc.convertToRuntimeErrorAndClear()) {
+ return false;
+ }
+ // We can fail because of OOM or over-recursion even if the syntax is valid.
+ if (cx->isThrowingOutOfMemory() || cx->isThrowingOverRecursed()) {
+ return false;
+ }
+
+ if (!cx->getPendingException(error)) {
+ return false;
+ }
+ cx->clearPendingException();
+ }
+ return true;
+}