/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- * vim: set ts=8 sts=2 et sw=2 tw=80: * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "vm/RegExpObject.h" #include "mozilla/MemoryReporting.h" #include "mozilla/PodOperations.h" #include #include "builtin/RegExp.h" #include "builtin/SelfHostingDefines.h" // REGEXP_*_FLAG #include "frontend/FrontendContext.h" // AutoReportFrontendContext #include "frontend/TokenStream.h" #include "gc/HashUtil.h" #include "irregexp/RegExpAPI.h" #include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_* #include "js/friend/StackLimits.h" // js::ReportOverRecursed #include "js/Object.h" // JS::GetBuiltinClass #include "js/Printer.h" // js::GenericPrinter #include "js/RegExp.h" #include "js/RegExpFlags.h" // JS::RegExpFlags #include "util/StringBuffer.h" #include "util/Unicode.h" #include "vm/JSONPrinter.h" // js::JSONPrinter #include "vm/MatchPairs.h" #include "vm/PlainObject.h" #include "vm/RegExpStatics.h" #include "vm/StringType.h" #include "vm/JSContext-inl.h" #include "vm/JSObject-inl.h" #include "vm/NativeObject-inl.h" #include "vm/Shape-inl.h" using namespace js; using JS::AutoStableStringChars; using JS::CompileOptions; using JS::RegExpFlag; using JS::RegExpFlags; using mozilla::DebugOnly; using mozilla::PodCopy; using JS::AutoCheckCannotGC; static_assert(RegExpFlag::HasIndices == REGEXP_HASINDICES_FLAG, "self-hosted JS and /d flag bits must agree"); static_assert(RegExpFlag::Global == REGEXP_GLOBAL_FLAG, "self-hosted JS and /g flag bits must agree"); static_assert(RegExpFlag::IgnoreCase == REGEXP_IGNORECASE_FLAG, "self-hosted JS and /i flag bits must agree"); static_assert(RegExpFlag::Multiline == REGEXP_MULTILINE_FLAG, "self-hosted JS and /m flag bits must agree"); static_assert(RegExpFlag::DotAll == REGEXP_DOTALL_FLAG, "self-hosted JS and /s flag bits must agree"); static_assert(RegExpFlag::Unicode == REGEXP_UNICODE_FLAG, "self-hosted JS and /u flag bits must agree"); static_assert(RegExpFlag::UnicodeSets == REGEXP_UNICODESETS_FLAG, "self-hosted JS and /v flag bits must agree"); static_assert(RegExpFlag::Sticky == REGEXP_STICKY_FLAG, "self-hosted JS and /y flag bits must agree"); RegExpObject* js::RegExpAlloc(JSContext* cx, NewObjectKind newKind, HandleObject proto /* = nullptr */) { Rooted regexp( cx, NewObjectWithClassProtoAndKind(cx, proto, newKind)); if (!regexp) { return nullptr; } if (!SharedShape::ensureInitialCustomShape(cx, regexp)) { return nullptr; } MOZ_ASSERT(regexp->lookupPure(cx->names().lastIndex)->slot() == RegExpObject::lastIndexSlot()); return regexp; } /* MatchPairs */ bool VectorMatchPairs::initArrayFrom(VectorMatchPairs& copyFrom) { MOZ_ASSERT(copyFrom.pairCount() > 0); if (!allocOrExpandArray(copyFrom.pairCount())) { return false; } PodCopy(pairs_, copyFrom.pairs_, pairCount_); return true; } bool VectorMatchPairs::allocOrExpandArray(size_t pairCount) { if (!vec_.resizeUninitialized(pairCount)) { return false; } pairs_ = &vec_[0]; pairCount_ = pairCount; return true; } /* RegExpObject */ /* static */ RegExpShared* RegExpObject::getShared(JSContext* cx, Handle regexp) { if (regexp->hasShared()) { return regexp->getShared(); } return createShared(cx, regexp); } /* static */ bool RegExpObject::isOriginalFlagGetter(JSNative native, RegExpFlags* mask) { if (native == regexp_hasIndices) { *mask = RegExpFlag::HasIndices; return true; } if (native == regexp_global) { *mask = RegExpFlag::Global; return true; } if (native == regexp_ignoreCase) { *mask = RegExpFlag::IgnoreCase; return true; } if (native == regexp_multiline) { *mask = RegExpFlag::Multiline; return true; } if (native == regexp_dotAll) { *mask = RegExpFlag::DotAll; return true; } if (native == regexp_sticky) { *mask = RegExpFlag::Sticky; return true; } if (native == regexp_unicode) { *mask = RegExpFlag::Unicode; return true; } if (native == regexp_unicodeSets) { *mask = RegExpFlag::UnicodeSets; return true; } return false; } static bool FinishRegExpClassInit(JSContext* cx, JS::HandleObject ctor, JS::HandleObject proto) { #ifdef DEBUG // Assert RegExp.prototype.exec is usually stored in a dynamic slot. The // optimization in InlinableNativeIRGenerator::tryAttachIntrinsicRegExpExec // depends on this. Handle nproto = proto.as(); auto prop = nproto->lookupPure(cx->names().exec); MOZ_ASSERT(prop->isDataProperty()); MOZ_ASSERT(!nproto->isFixedSlot(prop->slot())); #endif return true; } static const ClassSpec RegExpObjectClassSpec = { GenericCreateConstructor, GenericCreatePrototype, nullptr, js::regexp_static_props, js::regexp_methods, js::regexp_properties, FinishRegExpClassInit}; const JSClass RegExpObject::class_ = { "RegExp", JSCLASS_HAS_RESERVED_SLOTS(RegExpObject::RESERVED_SLOTS) | JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp), JS_NULL_CLASS_OPS, &RegExpObjectClassSpec}; const JSClass RegExpObject::protoClass_ = { "RegExp.prototype", JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp), JS_NULL_CLASS_OPS, &RegExpObjectClassSpec}; template RegExpObject* RegExpObject::create(JSContext* cx, const CharT* chars, size_t length, RegExpFlags flags, NewObjectKind newKind) { static_assert(std::is_same_v, "this code may need updating if/when CharT encodes UTF-8"); Rooted source(cx, AtomizeChars(cx, chars, length)); if (!source) { return nullptr; } return create(cx, source, flags, newKind); } template RegExpObject* RegExpObject::create(JSContext* cx, const char16_t* chars, size_t length, RegExpFlags flags, NewObjectKind newKind); RegExpObject* RegExpObject::createSyntaxChecked(JSContext* cx, Handle source, RegExpFlags flags, NewObjectKind newKind) { Rooted regexp(cx, RegExpAlloc(cx, newKind)); if (!regexp) { return nullptr; } regexp->initAndZeroLastIndex(source, flags, cx); return regexp; } RegExpObject* RegExpObject::create(JSContext* cx, Handle source, RegExpFlags flags, NewObjectKind newKind) { Rooted regexp(cx); { AutoReportFrontendContext fc(cx); CompileOptions dummyOptions(cx); frontend::DummyTokenStream dummyTokenStream(&fc, dummyOptions); LifoAllocScope allocScope(&cx->tempLifoAlloc()); if (!irregexp::CheckPatternSyntax(cx, cx->stackLimitForCurrentPrincipal(), dummyTokenStream, source, flags)) { return nullptr; } regexp = RegExpAlloc(cx, newKind); if (!regexp) { return nullptr; } regexp->initAndZeroLastIndex(source, flags, cx); MOZ_ASSERT(!regexp->hasShared()); } return regexp; } /* static */ RegExpShared* RegExpObject::createShared(JSContext* cx, Handle regexp) { MOZ_ASSERT(!regexp->hasShared()); Rooted source(cx, regexp->getSource()); RegExpShared* shared = cx->zone()->regExps().get(cx, source, regexp->getFlags()); if (!shared) { return nullptr; } regexp->setShared(shared); MOZ_ASSERT(regexp->hasShared()); return shared; } SharedShape* RegExpObject::assignInitialShape(JSContext* cx, Handle self) { MOZ_ASSERT(self->empty()); static_assert(LAST_INDEX_SLOT == 0); /* The lastIndex property alone is writable but non-configurable. */ if (!NativeObject::addPropertyInReservedSlot(cx, self, cx->names().lastIndex, LAST_INDEX_SLOT, {PropertyFlag::Writable})) { return nullptr; } return self->sharedShape(); } void RegExpObject::initIgnoringLastIndex(JSAtom* source, RegExpFlags flags) { // If this is a re-initialization with an existing RegExpShared, 'flags' // may not match getShared()->flags, so forget the RegExpShared. clearShared(); setSource(source); setFlags(flags); } void RegExpObject::initAndZeroLastIndex(JSAtom* source, RegExpFlags flags, JSContext* cx) { initIgnoringLastIndex(source, flags); zeroLastIndex(cx); } template void ForEachRegExpFlag(JS::RegExpFlags flags, KnownF known, UnknownF unknown) { uint8_t raw = flags.value(); for (uint8_t i = 1; i; i = i << 1) { if (!(raw & i)) { continue; } switch (raw & i) { case RegExpFlag::HasIndices: known("HasIndices", "d"); break; case RegExpFlag::Global: known("Global", "g"); break; case RegExpFlag::IgnoreCase: known("IgnoreCase", "i"); break; case RegExpFlag::Multiline: known("Multiline", "m"); break; case RegExpFlag::DotAll: known("DotAll", "s"); break; case RegExpFlag::Unicode: known("Unicode", "u"); break; case RegExpFlag::Sticky: known("Sticky", "y"); break; default: unknown(i); break; } } } std::ostream& JS::operator<<(std::ostream& os, RegExpFlags flags) { ForEachRegExpFlag( flags, [&](const char* name, const char* c) { os << c; }, [&](uint8_t value) { os << '?'; }); return os; } #if defined(DEBUG) || defined(JS_JITSPEW) void RegExpObject::dumpOwnFields(js::JSONPrinter& json) const { { js::GenericPrinter& out = json.beginStringProperty("source"); getSource()->dumpPropertyName(out); json.endStringProperty(); } json.beginInlineListProperty("flags"); ForEachRegExpFlag( getFlags(), [&](const char* name, const char* c) { json.value("%s", name); }, [&](uint8_t value) { json.value("Unknown(%02x)", value); }); json.endInlineList(); { js::GenericPrinter& out = json.beginStringProperty("lastIndex"); getLastIndex().dumpStringContent(out); json.endStringProperty(); } } void RegExpObject::dumpOwnStringContent(js::GenericPrinter& out) const { out.put("/"); getSource()->dumpCharsNoQuote(out); out.put("/"); ForEachRegExpFlag( getFlags(), [&](const char* name, const char* c) { out.put(c); }, [&](uint8_t value) {}); } #endif /* defined(DEBUG) || defined(JS_JITSPEW) */ static MOZ_ALWAYS_INLINE bool IsRegExpLineTerminator(const JS::Latin1Char c) { return c == '\n' || c == '\r'; } static MOZ_ALWAYS_INLINE bool IsRegExpLineTerminator(const char16_t c) { return c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029; } static MOZ_ALWAYS_INLINE bool AppendEscapedLineTerminator( StringBuffer& sb, const JS::Latin1Char c) { switch (c) { case '\n': if (!sb.append('n')) { return false; } break; case '\r': if (!sb.append('r')) { return false; } break; default: MOZ_CRASH("Bad LineTerminator"); } return true; } static MOZ_ALWAYS_INLINE bool AppendEscapedLineTerminator(StringBuffer& sb, const char16_t c) { switch (c) { case '\n': if (!sb.append('n')) { return false; } break; case '\r': if (!sb.append('r')) { return false; } break; case 0x2028: if (!sb.append("u2028")) { return false; } break; case 0x2029: if (!sb.append("u2029")) { return false; } break; default: MOZ_CRASH("Bad LineTerminator"); } return true; } template static MOZ_ALWAYS_INLINE bool SetupBuffer(StringBuffer& sb, const CharT* oldChars, size_t oldLen, const CharT* it) { if constexpr (std::is_same_v) { if (!sb.ensureTwoByteChars()) { return false; } } if (!sb.reserve(oldLen + 1)) { return false; } sb.infallibleAppend(oldChars, size_t(it - oldChars)); return true; } // Note: leaves the string buffer empty if no escaping need be performed. template static bool EscapeRegExpPattern(StringBuffer& sb, const CharT* oldChars, size_t oldLen) { bool inBrackets = false; bool previousCharacterWasBackslash = false; for (const CharT* it = oldChars; it < oldChars + oldLen; ++it) { CharT ch = *it; if (!previousCharacterWasBackslash) { if (inBrackets) { if (ch == ']') { inBrackets = false; } } else if (ch == '/') { // There's a forward slash that needs escaping. if (sb.empty()) { // This is the first char we've seen that needs escaping, // copy everything up to this point. if (!SetupBuffer(sb, oldChars, oldLen, it)) { return false; } } if (!sb.append('\\')) { return false; } } else if (ch == '[') { inBrackets = true; } } if (IsRegExpLineTerminator(ch)) { // There's LineTerminator that needs escaping. if (sb.empty()) { // This is the first char we've seen that needs escaping, // copy everything up to this point. if (!SetupBuffer(sb, oldChars, oldLen, it)) { return false; } } if (!previousCharacterWasBackslash) { if (!sb.append('\\')) { return false; } } if (!AppendEscapedLineTerminator(sb, ch)) { return false; } } else if (!sb.empty()) { if (!sb.append(ch)) { return false; } } if (previousCharacterWasBackslash) { previousCharacterWasBackslash = false; } else if (ch == '\\') { previousCharacterWasBackslash = true; } } return true; } // ES6 draft rev32 21.2.3.2.4. JSLinearString* js::EscapeRegExpPattern(JSContext* cx, Handle src) { // Step 2. if (src->length() == 0) { return cx->names().emptyRegExp_; } // We may never need to use |sb|. Start using it lazily. JSStringBuilder sb(cx); bool escapeFailed = false; if (src->hasLatin1Chars()) { JS::AutoCheckCannotGC nogc; escapeFailed = !::EscapeRegExpPattern(sb, src->latin1Chars(nogc), src->length()); } else { JS::AutoCheckCannotGC nogc; escapeFailed = !::EscapeRegExpPattern(sb, src->twoByteChars(nogc), src->length()); } if (escapeFailed) { return nullptr; } // Step 3. if (sb.empty()) { return src; } return sb.finishString(); } // ES6 draft rev32 21.2.5.14. Optimized for RegExpObject. JSLinearString* RegExpObject::toString(JSContext* cx, Handle obj) { // Steps 3-4. Rooted src(cx, obj->getSource()); if (!src) { return nullptr; } Rooted escapedSrc(cx, EscapeRegExpPattern(cx, src)); // Step 7. JSStringBuilder sb(cx); size_t len = escapedSrc->length(); if (!sb.reserve(len + 2)) { return nullptr; } sb.infallibleAppend('/'); if (!sb.append(escapedSrc)) { return nullptr; } sb.infallibleAppend('/'); // Steps 5-7. if (obj->hasIndices() && !sb.append('d')) { return nullptr; } if (obj->global() && !sb.append('g')) { return nullptr; } if (obj->ignoreCase() && !sb.append('i')) { return nullptr; } if (obj->multiline() && !sb.append('m')) { return nullptr; } if (obj->dotAll() && !sb.append('s')) { return nullptr; } if (obj->unicode() && !sb.append('u')) { return nullptr; } if (obj->unicodeSets() && !sb.append('v')) { return nullptr; } if (obj->sticky() && !sb.append('y')) { return nullptr; } return sb.finishString(); } template static MOZ_ALWAYS_INLINE bool IsRegExpMetaChar(CharT ch) { switch (ch) { /* ES 2016 draft Mar 25, 2016 21.2.1 SyntaxCharacter. */ case '^': case '$': case '\\': case '.': case '*': case '+': case '?': case '(': case ')': case '[': case ']': case '{': case '}': case '|': return true; default: return false; } } template bool js::HasRegExpMetaChars(const CharT* chars, size_t length) { for (size_t i = 0; i < length; ++i) { if (IsRegExpMetaChar(chars[i])) { return true; } } return false; } template bool js::HasRegExpMetaChars(const Latin1Char* chars, size_t length); template bool js::HasRegExpMetaChars(const char16_t* chars, size_t length); bool js::StringHasRegExpMetaChars(JSLinearString* str) { AutoCheckCannotGC nogc; if (str->hasLatin1Chars()) { return HasRegExpMetaChars(str->latin1Chars(nogc), str->length()); } return HasRegExpMetaChars(str->twoByteChars(nogc), str->length()); } /* RegExpShared */ RegExpShared::RegExpShared(JSAtom* source, RegExpFlags flags) : CellWithTenuredGCPointer(source), pairCount_(0), flags(flags) {} void RegExpShared::traceChildren(JSTracer* trc) { TraceNullableCellHeaderEdge(trc, this, "RegExpShared source"); if (kind() == RegExpShared::Kind::Atom) { TraceNullableEdge(trc, &patternAtom_, "RegExpShared pattern atom"); } else { for (auto& comp : compilationArray) { TraceNullableEdge(trc, &comp.jitCode, "RegExpShared code"); } TraceNullableEdge(trc, &groupsTemplate_, "RegExpShared groups template"); } } void RegExpShared::discardJitCode() { for (auto& comp : compilationArray) { comp.jitCode = nullptr; } // We can also purge the tables used by JIT code. tables.clearAndFree(); } void RegExpShared::finalize(JS::GCContext* gcx) { for (auto& comp : compilationArray) { if (comp.byteCode) { size_t length = comp.byteCodeLength(); gcx->free_(this, comp.byteCode, length, MemoryUse::RegExpSharedBytecode); } } if (namedCaptureIndices_) { size_t length = numNamedCaptures() * sizeof(uint32_t); gcx->free_(this, namedCaptureIndices_, length, MemoryUse::RegExpSharedNamedCaptureData); } tables.~JitCodeTables(); } /* static */ bool RegExpShared::compileIfNecessary(JSContext* cx, MutableHandleRegExpShared re, Handle input, RegExpShared::CodeKind codeKind) { if (codeKind == RegExpShared::CodeKind::Any) { // We start by interpreting regexps, then compile them once they are // sufficiently hot. For very long input strings, we tier up eagerly. codeKind = RegExpShared::CodeKind::Bytecode; if (re->markedForTierUp() || input->length() > 1000) { codeKind = RegExpShared::CodeKind::Jitcode; } } // Fall back to bytecode if native codegen is not available. if (!IsNativeRegExpEnabled() && codeKind == RegExpShared::CodeKind::Jitcode) { codeKind = RegExpShared::CodeKind::Bytecode; } bool needsCompile = false; if (re->kind() == RegExpShared::Kind::Unparsed) { needsCompile = true; } if (re->kind() == RegExpShared::Kind::RegExp) { if (!re->isCompiled(input->hasLatin1Chars(), codeKind)) { needsCompile = true; } } if (needsCompile) { return irregexp::CompilePattern(cx, re, input, codeKind); } return true; } /* static */ RegExpRunStatus RegExpShared::execute(JSContext* cx, MutableHandleRegExpShared re, Handle input, size_t start, VectorMatchPairs* matches) { MOZ_ASSERT(matches); // TODO: Add tracelogger support /* Compile the code at point-of-use. */ if (!compileIfNecessary(cx, re, input, RegExpShared::CodeKind::Any)) { return RegExpRunStatus::Error; } /* * Ensure sufficient memory for output vector. * No need to initialize it. The RegExp engine fills them in on a match. */ if (!matches->allocOrExpandArray(re->pairCount())) { ReportOutOfMemory(cx); return RegExpRunStatus::Error; } if (re->kind() == RegExpShared::Kind::Atom) { return RegExpShared::executeAtom(re, input, start, matches); } /* * Ensure sufficient memory for output vector. * No need to initialize it. The RegExp engine fills them in on a match. */ if (!matches->allocOrExpandArray(re->pairCount())) { ReportOutOfMemory(cx); return RegExpRunStatus::Error; } uint32_t interruptRetries = 0; const uint32_t maxInterruptRetries = 4; do { DebugOnly alreadyThrowing = cx->isExceptionPending(); RegExpRunStatus result = irregexp::Execute(cx, re, input, start, matches); #ifdef DEBUG // Check if we must simulate the interruption if (js::irregexp::IsolateShouldSimulateInterrupt(cx->isolate)) { js::irregexp::IsolateClearShouldSimulateInterrupt(cx->isolate); cx->requestInterrupt(InterruptReason::CallbackUrgent); } #endif if (result == RegExpRunStatus::Error) { /* Execute can return RegExpRunStatus::Error: * * 1. If the native stack overflowed * 2. If the backtrack stack overflowed * 3. If an interrupt was requested during execution. * * In the first two cases, we want to throw an error. In the * third case, we want to handle the interrupt and try again. * We cap the number of times we will retry. */ if (cx->isExceptionPending()) { // If this regexp is being executed by recovery instructions // while bailing out to handle an exception, there may already // be an exception pending. If so, just return that exception // instead of reporting a new one. MOZ_ASSERT(alreadyThrowing); return RegExpRunStatus::Error; } if (cx->hasAnyPendingInterrupt()) { if (!CheckForInterrupt(cx)) { return RegExpRunStatus::Error; } if (interruptRetries++ < maxInterruptRetries) { // The initial execution may have been interpreted, or the // interrupt may have triggered a GC that discarded jitcode. // To maximize the chance of succeeding before being // interrupted again, we want to ensure we are compiled. if (!compileIfNecessary(cx, re, input, RegExpShared::CodeKind::Jitcode)) { return RegExpRunStatus::Error; } continue; } } // If we have run out of retries, this regexp takes too long to execute. ReportOverRecursed(cx); return RegExpRunStatus::Error; } MOZ_ASSERT(result == RegExpRunStatus::Success || result == RegExpRunStatus::Success_NotFound); return result; } while (true); MOZ_CRASH("Unreachable"); } void RegExpShared::useAtomMatch(Handle pattern) { MOZ_ASSERT(kind() == RegExpShared::Kind::Unparsed); kind_ = RegExpShared::Kind::Atom; patternAtom_ = pattern; pairCount_ = 1; } void RegExpShared::useRegExpMatch(size_t pairCount) { MOZ_ASSERT(kind() == RegExpShared::Kind::Unparsed); kind_ = RegExpShared::Kind::RegExp; pairCount_ = pairCount; ticks_ = jit::JitOptions.regexpWarmUpThreshold; } /* static */ void RegExpShared::InitializeNamedCaptures(JSContext* cx, HandleRegExpShared re, uint32_t numNamedCaptures, Handle templateObject, uint32_t* captureIndices) { MOZ_ASSERT(!re->groupsTemplate_); MOZ_ASSERT(!re->namedCaptureIndices_); re->numNamedCaptures_ = numNamedCaptures; re->groupsTemplate_ = templateObject; re->namedCaptureIndices_ = captureIndices; uint32_t arraySize = numNamedCaptures * sizeof(uint32_t); js::AddCellMemory(re, arraySize, MemoryUse::RegExpSharedNamedCaptureData); } void RegExpShared::tierUpTick() { MOZ_ASSERT(kind() == RegExpShared::Kind::RegExp); if (ticks_ > 0) { ticks_--; } } bool RegExpShared::markedForTierUp() const { if (!IsNativeRegExpEnabled()) { return false; } if (kind() != RegExpShared::Kind::RegExp) { return false; } return ticks_ == 0; } // When either unicode flag is set and if |index| points to a trail surrogate, // step back to the corresponding lead surrogate. static size_t StepBackToLeadSurrogate(const JSLinearString* input, size_t index) { // |index| must be a position within a two-byte string, otherwise it can't // point to the trail surrogate of a surrogate pair. if (index == 0 || index >= input->length() || input->hasLatin1Chars()) { return index; } /* * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad * 21.2.2.2 step 2. * Let listIndex be the index into Input of the character that was obtained * from element index of str. * * In the spec, pattern match is performed with decoded Unicode code points, * but our implementation performs it with UTF-16 encoded strings. In step 2, * we should decrement lastIndex (index) if it points to a trail surrogate * that has a corresponding lead surrogate. * * var r = /\uD83D\uDC38/ug; * r.lastIndex = 1; * var str = "\uD83D\uDC38"; * var result = r.exec(str); // pattern match starts from index 0 * print(result.index); // prints 0 * * Note: This doesn't match the current spec text and result in different * values for `result.index` under certain conditions. However, the spec will * change to match our implementation's behavior. * See https://github.com/tc39/ecma262/issues/128. */ JS::AutoCheckCannotGC nogc; const auto* chars = input->twoByteChars(nogc); if (unicode::IsTrailSurrogate(chars[index]) && unicode::IsLeadSurrogate(chars[index - 1])) { index--; } return index; } static RegExpRunStatus ExecuteAtomImpl(RegExpShared* re, JSLinearString* input, size_t start, MatchPairs* matches) { MOZ_ASSERT(re->pairCount() == 1); size_t length = input->length(); size_t searchLength = re->patternAtom()->length(); if (re->unicode() || re->unicodeSets()) { start = StepBackToLeadSurrogate(input, start); } if (re->sticky()) { // First part checks size_t overflow. if (searchLength + start < searchLength || searchLength + start > length) { return RegExpRunStatus::Success_NotFound; } if (!HasSubstringAt(input, re->patternAtom(), start)) { return RegExpRunStatus::Success_NotFound; } (*matches)[0].start = start; (*matches)[0].limit = start + searchLength; matches->checkAgainst(input->length()); return RegExpRunStatus::Success; } int res = StringFindPattern(input, re->patternAtom(), start); if (res == -1) { return RegExpRunStatus::Success_NotFound; } (*matches)[0].start = res; (*matches)[0].limit = res + searchLength; matches->checkAgainst(input->length()); return RegExpRunStatus::Success; } RegExpRunStatus js::ExecuteRegExpAtomRaw(RegExpShared* re, JSLinearString* input, size_t start, MatchPairs* matchPairs) { AutoUnsafeCallWithABI unsafe; return ExecuteAtomImpl(re, input, start, matchPairs); } /* static */ RegExpRunStatus RegExpShared::executeAtom(MutableHandleRegExpShared re, Handle input, size_t start, VectorMatchPairs* matches) { return ExecuteAtomImpl(re, input, start, matches); } size_t RegExpShared::sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) { size_t n = 0; for (const auto& compilation : compilationArray) { if (compilation.byteCode) { n += mallocSizeOf(compilation.byteCode); } } n += tables.sizeOfExcludingThis(mallocSizeOf); for (size_t i = 0; i < tables.length(); i++) { n += mallocSizeOf(tables[i].get()); } return n; } /* RegExpRealm */ RegExpRealm::RegExpRealm() : optimizableRegExpPrototypeShape_(nullptr), optimizableRegExpInstanceShape_(nullptr) { for (auto& shape : matchResultShapes_) { shape = nullptr; } } SharedShape* RegExpRealm::createMatchResultShape(JSContext* cx, ResultShapeKind kind) { MOZ_ASSERT(!matchResultShapes_[kind]); /* Create template array object */ Rooted templateObject(cx, NewDenseEmptyArray(cx)); if (!templateObject) { return nullptr; } if (kind == ResultShapeKind::Indices) { /* The |indices| array only has a |groups| property. */ if (!NativeDefineDataProperty(cx, templateObject, cx->names().groups, UndefinedHandleValue, JSPROP_ENUMERATE)) { return nullptr; } MOZ_ASSERT(templateObject->getLastProperty().slot() == IndicesGroupsSlot); matchResultShapes_[kind].set(templateObject->sharedShape()); return matchResultShapes_[kind]; } /* Set dummy index property */ if (!NativeDefineDataProperty(cx, templateObject, cx->names().index, UndefinedHandleValue, JSPROP_ENUMERATE)) { return nullptr; } MOZ_ASSERT(templateObject->getLastProperty().slot() == MatchResultObjectIndexSlot); /* Set dummy input property */ if (!NativeDefineDataProperty(cx, templateObject, cx->names().input, UndefinedHandleValue, JSPROP_ENUMERATE)) { return nullptr; } MOZ_ASSERT(templateObject->getLastProperty().slot() == MatchResultObjectInputSlot); /* Set dummy groups property */ if (!NativeDefineDataProperty(cx, templateObject, cx->names().groups, UndefinedHandleValue, JSPROP_ENUMERATE)) { return nullptr; } MOZ_ASSERT(templateObject->getLastProperty().slot() == MatchResultObjectGroupsSlot); if (kind == ResultShapeKind::WithIndices) { /* Set dummy indices property */ if (!NativeDefineDataProperty(cx, templateObject, cx->names().indices, UndefinedHandleValue, JSPROP_ENUMERATE)) { return nullptr; } MOZ_ASSERT(templateObject->getLastProperty().slot() == MatchResultObjectIndicesSlot); } #ifdef DEBUG if (kind == ResultShapeKind::Normal) { MOZ_ASSERT(templateObject->numFixedSlots() == 0); MOZ_ASSERT(templateObject->numDynamicSlots() == MatchResultObjectNumDynamicSlots); MOZ_ASSERT(templateObject->slotSpan() == MatchResultObjectSlotSpan); } #endif matchResultShapes_[kind].set(templateObject->sharedShape()); return matchResultShapes_[kind]; } void RegExpRealm::trace(JSTracer* trc) { if (regExpStatics) { regExpStatics->trace(trc); } for (auto& shape : matchResultShapes_) { TraceNullableEdge(trc, &shape, "RegExpRealm::matchResultShapes_"); } TraceNullableEdge(trc, &optimizableRegExpPrototypeShape_, "RegExpRealm::optimizableRegExpPrototypeShape_"); TraceNullableEdge(trc, &optimizableRegExpInstanceShape_, "RegExpRealm::optimizableRegExpInstanceShape_"); } RegExpShared* RegExpZone::get(JSContext* cx, Handle source, RegExpFlags flags) { DependentAddPtr p(cx, set_, Key(source, flags)); if (p) { return *p; } auto* shared = cx->newCell(source, flags); if (!shared) { return nullptr; } if (!p.add(cx, set_, Key(source, flags), shared)) { return nullptr; } return shared; } size_t RegExpZone::sizeOfIncludingThis( mozilla::MallocSizeOf mallocSizeOf) const { return mallocSizeOf(this) + set_.sizeOfExcludingThis(mallocSizeOf); } RegExpZone::RegExpZone(Zone* zone) : set_(zone, zone) {} /* Functions */ JSObject* js::CloneRegExpObject(JSContext* cx, Handle regex) { constexpr gc::AllocKind allocKind = RegExpObject::AllocKind; static_assert(gc::GetGCKindSlots(allocKind) == RegExpObject::RESERVED_SLOTS); MOZ_ASSERT(regex->asTenured().getAllocKind() == allocKind); Rooted shape(cx, regex->sharedShape()); Rooted clone(cx, NativeObject::create( cx, allocKind, gc::Heap::Default, shape)); if (!clone) { return nullptr; } RegExpShared* shared = RegExpObject::getShared(cx, regex); if (!shared) { return nullptr; } clone->initAndZeroLastIndex(shared->getSource(), shared->getFlags(), cx); clone->setShared(shared); return clone; } template static bool ParseRegExpFlags(const CharT* chars, size_t length, RegExpFlags* flagsOut, char16_t* invalidFlag) { *flagsOut = RegExpFlag::NoFlags; for (size_t i = 0; i < length; i++) { uint8_t flag; if (!JS::MaybeParseRegExpFlag(chars[i], &flag) || *flagsOut & flag) { *invalidFlag = chars[i]; return false; } // /u and /v flags are mutually exclusive. if (((*flagsOut & RegExpFlag::Unicode) && (flag & RegExpFlag::UnicodeSets)) || ((*flagsOut & RegExpFlag::UnicodeSets) && (flag & RegExpFlag::Unicode))) { *invalidFlag = chars[i]; return false; } *flagsOut |= flag; } return true; } bool js::ParseRegExpFlags(JSContext* cx, JSString* flagStr, RegExpFlags* flagsOut) { JSLinearString* linear = flagStr->ensureLinear(cx); if (!linear) { return false; } size_t len = linear->length(); bool ok; char16_t invalidFlag; if (linear->hasLatin1Chars()) { AutoCheckCannotGC nogc; ok = ::ParseRegExpFlags(linear->latin1Chars(nogc), len, flagsOut, &invalidFlag); } else { AutoCheckCannotGC nogc; ok = ::ParseRegExpFlags(linear->twoByteChars(nogc), len, flagsOut, &invalidFlag); } if (!ok) { JS::TwoByteChars range(&invalidFlag, 1); UniqueChars utf8(JS::CharsToNewUTF8CharsZ(cx, range).c_str()); if (!utf8) { return false; } JS_ReportErrorNumberUTF8(cx, GetErrorMessage, nullptr, JSMSG_BAD_REGEXP_FLAG, utf8.get()); return false; } return true; } JS::ubi::Node::Size JS::ubi::Concrete::size( mozilla::MallocSizeOf mallocSizeOf) const { return js::gc::Arena::thingSize(gc::AllocKind::REGEXP_SHARED) + get().sizeOfExcludingThis(mallocSizeOf); } /* * Regular Expressions. */ JS_PUBLIC_API JSObject* JS::NewRegExpObject(JSContext* cx, const char* bytes, size_t length, RegExpFlags flags) { AssertHeapIsIdle(); CHECK_THREAD(cx); UniqueTwoByteChars chars(InflateString(cx, bytes, length)); if (!chars) { return nullptr; } return RegExpObject::create(cx, chars.get(), length, flags, GenericObject); } JS_PUBLIC_API JSObject* JS::NewUCRegExpObject(JSContext* cx, const char16_t* chars, size_t length, RegExpFlags flags) { AssertHeapIsIdle(); CHECK_THREAD(cx); return RegExpObject::create(cx, chars, length, flags, GenericObject); } JS_PUBLIC_API bool JS::SetRegExpInput(JSContext* cx, HandleObject obj, HandleString input) { AssertHeapIsIdle(); CHECK_THREAD(cx); cx->check(input); Handle global = obj.as(); RegExpStatics* res = GlobalObject::getRegExpStatics(cx, global); if (!res) { return false; } res->reset(input); return true; } JS_PUBLIC_API bool JS::ClearRegExpStatics(JSContext* cx, HandleObject obj) { AssertHeapIsIdle(); CHECK_THREAD(cx); MOZ_ASSERT(obj); Handle global = obj.as(); RegExpStatics* res = GlobalObject::getRegExpStatics(cx, global); if (!res) { return false; } res->clear(); return true; } JS_PUBLIC_API bool JS::ExecuteRegExp(JSContext* cx, HandleObject obj, HandleObject reobj, const char16_t* chars, size_t length, size_t* indexp, bool test, MutableHandleValue rval) { AssertHeapIsIdle(); CHECK_THREAD(cx); Handle global = obj.as(); RegExpStatics* res = GlobalObject::getRegExpStatics(cx, global); if (!res) { return false; } Rooted input(cx, NewStringCopyN(cx, chars, length)); if (!input) { return false; } return ExecuteRegExpLegacy(cx, res, reobj.as(), input, indexp, test, rval); } JS_PUBLIC_API bool JS::ExecuteRegExpNoStatics(JSContext* cx, HandleObject obj, const char16_t* chars, size_t length, size_t* indexp, bool test, MutableHandleValue rval) { AssertHeapIsIdle(); CHECK_THREAD(cx); Rooted input(cx, NewStringCopyN(cx, chars, length)); if (!input) { return false; } return ExecuteRegExpLegacy(cx, nullptr, obj.as(), input, indexp, test, rval); } JS_PUBLIC_API bool JS::ObjectIsRegExp(JSContext* cx, HandleObject obj, bool* isRegExp) { cx->check(obj); ESClass cls; if (!GetBuiltinClass(cx, obj, &cls)) { return false; } *isRegExp = cls == ESClass::RegExp; return true; } JS_PUBLIC_API RegExpFlags JS::GetRegExpFlags(JSContext* cx, HandleObject obj) { AssertHeapIsIdle(); CHECK_THREAD(cx); RegExpShared* shared = RegExpToShared(cx, obj); if (!shared) { return RegExpFlag::NoFlags; } return shared->getFlags(); } JS_PUBLIC_API JSString* JS::GetRegExpSource(JSContext* cx, HandleObject obj) { AssertHeapIsIdle(); CHECK_THREAD(cx); RegExpShared* shared = RegExpToShared(cx, obj); if (!shared) { return nullptr; } return shared->getSource(); } JS_PUBLIC_API bool JS::CheckRegExpSyntax(JSContext* cx, const char16_t* chars, size_t length, RegExpFlags flags, MutableHandleValue error) { AssertHeapIsIdle(); CHECK_THREAD(cx); AutoReportFrontendContext fc(cx); CompileOptions dummyOptions(cx); frontend::DummyTokenStream dummyTokenStream(&fc, dummyOptions); LifoAllocScope allocScope(&cx->tempLifoAlloc()); mozilla::Range source(chars, length); bool success = irregexp::CheckPatternSyntax( cx->tempLifoAlloc(), cx->stackLimitForCurrentPrincipal(), dummyTokenStream, source, flags); error.set(UndefinedValue()); if (!success) { if (!fc.convertToRuntimeErrorAndClear()) { return false; } // We can fail because of OOM or over-recursion even if the syntax is valid. if (cx->isThrowingOutOfMemory() || cx->isThrowingOverRecursed()) { return false; } if (!cx->getPendingException(error)) { return false; } cx->clearPendingException(); } return true; }