diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /js/src/vm/RegExpShared.h | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'js/src/vm/RegExpShared.h')
-rw-r--r-- | js/src/vm/RegExpShared.h | 462 |
1 files changed, 462 insertions, 0 deletions
diff --git a/js/src/vm/RegExpShared.h b/js/src/vm/RegExpShared.h new file mode 100644 index 0000000000..4ff68e9ee1 --- /dev/null +++ b/js/src/vm/RegExpShared.h @@ -0,0 +1,462 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * The compiled representation of a RegExp, potentially shared among RegExp + * instances created during separate evaluations of a single RegExp literal in + * source code. + */ + +#ifndef vm_RegExpShared_h +#define vm_RegExpShared_h + +#include "mozilla/Assertions.h" +#include "mozilla/MemoryReporting.h" + +#include "gc/Barrier.h" +#include "gc/Policy.h" +#include "gc/SweepingAPI.h" +#include "gc/ZoneAllocator.h" +#include "irregexp/RegExpTypes.h" +#include "jit/JitCode.h" +#include "jit/JitOptions.h" +#include "js/AllocPolicy.h" +#include "js/RegExpFlags.h" // JS::RegExpFlag, JS::RegExpFlags +#include "js/UbiNode.h" +#include "js/Vector.h" +#include "vm/ArrayObject.h" + +namespace js { + +class ArrayObject; +class PlainObject; +class RegExpRealm; +class RegExpShared; +class RegExpStatics; +class VectorMatchPairs; + +using RootedRegExpShared = JS::Rooted<RegExpShared*>; +using HandleRegExpShared = JS::Handle<RegExpShared*>; +using MutableHandleRegExpShared = JS::MutableHandle<RegExpShared*>; + +enum class RegExpRunStatus : int32_t { + Error = -1, + Success = 1, + Success_NotFound = 0, +}; + +inline bool IsNativeRegExpEnabled() { + return jit::HasJitBackend() && jit::JitOptions.nativeRegExp; +} + +/* + * A RegExpShared is the compiled representation of a regexp. A RegExpShared is + * potentially pointed to by multiple RegExpObjects. Additionally, C++ code may + * have pointers to RegExpShareds on the stack. The RegExpShareds are kept in a + * table so that they can be reused when compiling the same regex string. + * + * To save memory, a RegExpShared is not created for a RegExpObject until it is + * needed for execution. When a RegExpShared needs to be created, it is looked + * up in a per-compartment table to allow reuse between objects. + * + * During a GC, RegExpShared instances are marked and swept like GC things. + * Usually, RegExpObjects clear their pointers to their RegExpShareds rather + * than explicitly tracing them, so that the RegExpShared and any jitcode can + * be reclaimed quicker. However, the RegExpShareds are traced through by + * objects when we are preserving jitcode in their zone, to avoid the same + * recompilation inefficiencies as normal Ion and baseline compilation. + */ +class RegExpShared + : public gc::CellWithTenuredGCPointer<gc::TenuredCell, JSAtom> { + friend class js::gc::CellAllocator; + + public: + enum class Kind : uint32_t { Unparsed, Atom, RegExp }; + enum class CodeKind { Bytecode, Jitcode, Any }; + + using ByteCode = js::irregexp::ByteArrayData; + using JitCodeTable = js::irregexp::ByteArray; + using JitCodeTables = Vector<JitCodeTable, 0, SystemAllocPolicy>; + + private: + friend class RegExpStatics; + friend class RegExpZone; + + struct RegExpCompilation { + HeapPtr<jit::JitCode*> jitCode; + ByteCode* byteCode = nullptr; + + bool compiled(CodeKind kind = CodeKind::Any) const { + switch (kind) { + case CodeKind::Bytecode: + return !!byteCode; + case CodeKind::Jitcode: + return !!jitCode; + case CodeKind::Any: + return !!byteCode || !!jitCode; + } + MOZ_CRASH("Unreachable"); + } + + size_t byteCodeLength() const { + MOZ_ASSERT(byteCode); + return byteCode->length; + } + }; + + public: + /* Source to the RegExp, for lazy compilation. Stored in the cell header. */ + JSAtom* getSource() const { return headerPtr(); } + + private: + RegExpCompilation compilationArray[2]; + + uint32_t pairCount_; + JS::RegExpFlags flags; + + RegExpShared::Kind kind_ = Kind::Unparsed; + GCPtr<JSAtom*> patternAtom_; + uint32_t maxRegisters_ = 0; + uint32_t ticks_ = 0; + + uint32_t numNamedCaptures_ = {}; + uint32_t* namedCaptureIndices_ = {}; + GCPtr<PlainObject*> groupsTemplate_ = {}; + + static int CompilationIndex(bool latin1) { return latin1 ? 0 : 1; } + + // Tables referenced by JIT code. + JitCodeTables tables; + + /* Internal functions. */ + RegExpShared(JSAtom* source, JS::RegExpFlags flags); + + const RegExpCompilation& compilation(bool latin1) const { + return compilationArray[CompilationIndex(latin1)]; + } + + RegExpCompilation& compilation(bool latin1) { + return compilationArray[CompilationIndex(latin1)]; + } + + public: + ~RegExpShared() = delete; + + static bool compileIfNecessary(JSContext* cx, MutableHandleRegExpShared res, + Handle<JSLinearString*> input, CodeKind code); + + static RegExpRunStatus executeAtom(MutableHandleRegExpShared re, + Handle<JSLinearString*> input, + size_t start, VectorMatchPairs* matches); + + // Execute this RegExp on input starting from searchIndex, filling in matches. + static RegExpRunStatus execute(JSContext* cx, MutableHandleRegExpShared res, + Handle<JSLinearString*> input, + size_t searchIndex, VectorMatchPairs* matches); + + // Register a table with this RegExpShared, and take ownership. + bool addTable(JitCodeTable table) { return tables.append(std::move(table)); } + + /* Accessors */ + + size_t pairCount() const { + MOZ_ASSERT(kind() != Kind::Unparsed); + return pairCount_; + } + + RegExpShared::Kind kind() const { return kind_; } + + // Use simple string matching for this regexp. + void useAtomMatch(Handle<JSAtom*> pattern); + + // Use the regular expression engine for this regexp. + void useRegExpMatch(size_t parenCount); + + static void InitializeNamedCaptures(JSContext* cx, HandleRegExpShared re, + uint32_t numNamedCaptures, + Handle<PlainObject*> templateObject, + uint32_t* captureIndices); + PlainObject* getGroupsTemplate() { return groupsTemplate_; } + + void tierUpTick(); + bool markedForTierUp() const; + + void setByteCode(ByteCode* code, bool latin1) { + compilation(latin1).byteCode = code; + } + ByteCode* getByteCode(bool latin1) const { + return compilation(latin1).byteCode; + } + void setJitCode(jit::JitCode* code, bool latin1) { + compilation(latin1).jitCode = code; + } + jit::JitCode* getJitCode(bool latin1) const { + return compilation(latin1).jitCode; + } + uint32_t getMaxRegisters() const { return maxRegisters_; } + void updateMaxRegisters(uint32_t numRegisters) { + maxRegisters_ = std::max(maxRegisters_, numRegisters); + } + + uint32_t numNamedCaptures() const { return numNamedCaptures_; } + int32_t getNamedCaptureIndex(uint32_t idx) const { + MOZ_ASSERT(idx < numNamedCaptures()); + MOZ_ASSERT(namedCaptureIndices_); + return namedCaptureIndices_[idx]; + } + + JSAtom* patternAtom() const { return patternAtom_; } + + JS::RegExpFlags getFlags() const { return flags; } + + bool hasIndices() const { return flags.hasIndices(); } + bool global() const { return flags.global(); } + bool ignoreCase() const { return flags.ignoreCase(); } + bool multiline() const { return flags.multiline(); } + bool dotAll() const { return flags.dotAll(); } + bool unicode() const { return flags.unicode(); } + bool unicodeSets() const { return flags.unicodeSets(); } + bool sticky() const { return flags.sticky(); } + + bool isCompiled(bool latin1, CodeKind codeKind = CodeKind::Any) const { + return compilation(latin1).compiled(codeKind); + } + bool isCompiled() const { return isCompiled(true) || isCompiled(false); } + + void traceChildren(JSTracer* trc); + void discardJitCode(); + void finalize(JS::GCContext* gcx); + + static size_t offsetOfSource() { return offsetOfHeaderPtr(); } + + static size_t offsetOfPatternAtom() { + return offsetof(RegExpShared, patternAtom_); + } + + static size_t offsetOfFlags() { return offsetof(RegExpShared, flags); } + + static size_t offsetOfPairCount() { + return offsetof(RegExpShared, pairCount_); + } + + static size_t offsetOfKind() { return offsetof(RegExpShared, kind_); } + + static size_t offsetOfJitCode(bool latin1) { + return offsetof(RegExpShared, compilationArray) + + (CompilationIndex(latin1) * sizeof(RegExpCompilation)) + + offsetof(RegExpCompilation, jitCode); + } + + static size_t offsetOfGroupsTemplate() { + return offsetof(RegExpShared, groupsTemplate_); + } + + size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf); + + public: + static const JS::TraceKind TraceKind = JS::TraceKind::RegExpShared; +}; + +class RegExpZone { + struct Key { + JSAtom* atom = nullptr; + JS::RegExpFlags flags = JS::RegExpFlag::NoFlags; + + Key() = default; + Key(JSAtom* atom, JS::RegExpFlags flags) : atom(atom), flags(flags) {} + MOZ_IMPLICIT Key(const WeakHeapPtr<RegExpShared*>& shared) + : atom(shared.unbarrieredGet()->getSource()), + flags(shared.unbarrieredGet()->getFlags()) {} + + using Lookup = Key; + static HashNumber hash(const Lookup& l) { + HashNumber hash = DefaultHasher<JSAtom*>::hash(l.atom); + return mozilla::AddToHash(hash, l.flags.value()); + } + static bool match(Key l, Key r) { + return l.atom == r.atom && l.flags == r.flags; + } + }; + + /* + * The set of all RegExpShareds in the zone. On every GC, every RegExpShared + * that was not marked is deleted and removed from the set. + */ + using Set = WeakCache< + JS::GCHashSet<WeakHeapPtr<RegExpShared*>, Key, ZoneAllocPolicy>>; + Set set_; + + public: + explicit RegExpZone(Zone* zone); + + ~RegExpZone() { MOZ_ASSERT(set_.empty()); } + + bool empty() const { return set_.empty(); } + + RegExpShared* maybeGet(JSAtom* source, JS::RegExpFlags flags) const { + Set::Ptr p = set_.lookup(Key(source, flags)); + return p ? *p : nullptr; + } + + RegExpShared* get(JSContext* cx, Handle<JSAtom*> source, + JS::RegExpFlags flags); + +#ifdef DEBUG + void clear() { set_.clear(); } +#endif + + size_t sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf) const; +}; + +class RegExpRealm { + public: + enum ResultShapeKind { Normal, WithIndices, Indices, NumKinds }; + + // Information about the last regular expression match. This is used by the + // static RegExp properties such as RegExp.lastParen. + UniquePtr<RegExpStatics> regExpStatics; + + private: + /* + * The shapes used for the result object of re.exec(), if there is a result. + * These are used in CreateRegExpMatchResult. There are three shapes, each of + * which is an ArrayObject shape with some additional properties. We decide + * which to use based on the |hasIndices| (/d) flag. + * + * Normal: Has |index|, |input|, and |groups| properties. + * Used for the result object if |hasIndices| is not set. + * + * WithIndices: Has |index|, |input|, |groups|, and |indices| properties. + * Used for the result object if |hasIndices| is set. + * + * Indices: Has a |groups| property. If |hasIndices| is set, used + * for the |.indices| property of the result object. + */ + HeapPtr<SharedShape*> matchResultShapes_[ResultShapeKind::NumKinds]; + + /* + * The shape of RegExp.prototype object that satisfies following: + * * RegExp.prototype.flags getter is not modified + * * RegExp.prototype.global getter is not modified + * * RegExp.prototype.ignoreCase getter is not modified + * * RegExp.prototype.multiline getter is not modified + * * RegExp.prototype.dotAll getter is not modified + * * RegExp.prototype.sticky getter is not modified + * * RegExp.prototype.unicode getter is not modified + * * RegExp.prototype.exec is an own data property + * * RegExp.prototype[@@match] is an own data property + * * RegExp.prototype[@@search] is an own data property + */ + HeapPtr<Shape*> optimizableRegExpPrototypeShape_; + + /* + * The shape of RegExp instance that satisfies following: + * * lastProperty is lastIndex + * * prototype is RegExp.prototype + */ + HeapPtr<Shape*> optimizableRegExpInstanceShape_; + + SharedShape* createMatchResultShape(JSContext* cx, ResultShapeKind kind); + + public: + explicit RegExpRealm(); + + void trace(JSTracer* trc); + + static const size_t MatchResultObjectIndexSlot = 0; + static const size_t MatchResultObjectInputSlot = 1; + static const size_t MatchResultObjectGroupsSlot = 2; + static const size_t MatchResultObjectIndicesSlot = 3; + + // Number of used and allocated dynamic slots for a Normal match result + // object. These values are checked in createMatchResultShape. + static const size_t MatchResultObjectSlotSpan = 3; + static const size_t MatchResultObjectNumDynamicSlots = 6; + + static const size_t IndicesGroupsSlot = 0; + + static size_t offsetOfMatchResultObjectIndexSlot() { + return sizeof(Value) * MatchResultObjectIndexSlot; + } + static size_t offsetOfMatchResultObjectInputSlot() { + return sizeof(Value) * MatchResultObjectInputSlot; + } + static size_t offsetOfMatchResultObjectGroupsSlot() { + return sizeof(Value) * MatchResultObjectGroupsSlot; + } + static size_t offsetOfMatchResultObjectIndicesSlot() { + return sizeof(Value) * MatchResultObjectIndicesSlot; + } + + /* Get or create the shape used for the result of .exec(). */ + SharedShape* getOrCreateMatchResultShape( + JSContext* cx, ResultShapeKind kind = ResultShapeKind::Normal) { + if (matchResultShapes_[kind]) { + return matchResultShapes_[kind]; + } + return createMatchResultShape(cx, kind); + } + + Shape* getOptimizableRegExpPrototypeShape() { + return optimizableRegExpPrototypeShape_; + } + void setOptimizableRegExpPrototypeShape(Shape* shape) { + optimizableRegExpPrototypeShape_ = shape; + } + Shape* getOptimizableRegExpInstanceShape() { + return optimizableRegExpInstanceShape_; + } + void setOptimizableRegExpInstanceShape(Shape* shape) { + optimizableRegExpInstanceShape_ = shape; + } + + static constexpr size_t offsetOfOptimizableRegExpPrototypeShape() { + return offsetof(RegExpRealm, optimizableRegExpPrototypeShape_); + } + static constexpr size_t offsetOfOptimizableRegExpInstanceShape() { + return offsetof(RegExpRealm, optimizableRegExpInstanceShape_); + } + static constexpr size_t offsetOfRegExpStatics() { + return offsetof(RegExpRealm, regExpStatics); + } + static constexpr size_t offsetOfNormalMatchResultShape() { + static_assert(sizeof(HeapPtr<SharedShape*>) == sizeof(uintptr_t)); + return offsetof(RegExpRealm, matchResultShapes_) + + ResultShapeKind::Normal * sizeof(uintptr_t); + } +}; + +RegExpRunStatus ExecuteRegExpAtomRaw(RegExpShared* re, JSLinearString* input, + size_t start, MatchPairs* matchPairs); + +} /* namespace js */ + +namespace JS { +namespace ubi { + +template <> +class Concrete<js::RegExpShared> : TracerConcrete<js::RegExpShared> { + protected: + explicit Concrete(js::RegExpShared* ptr) + : TracerConcrete<js::RegExpShared>(ptr) {} + + public: + static void construct(void* storage, js::RegExpShared* ptr) { + new (storage) Concrete(ptr); + } + + CoarseType coarseType() const final { return CoarseType::Other; } + + Size size(mozilla::MallocSizeOf mallocSizeOf) const override; + + const char16_t* typeName() const override { return concreteTypeName; } + static const char16_t concreteTypeName[]; +}; + +} // namespace ubi +} // namespace JS + +#endif /* vm_RegExpShared_h */ |