From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- js/src/vm/JSScript.h | 2297 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2297 insertions(+) create mode 100644 js/src/vm/JSScript.h (limited to 'js/src/vm/JSScript.h') diff --git a/js/src/vm/JSScript.h b/js/src/vm/JSScript.h new file mode 100644 index 0000000000..4e44d22304 --- /dev/null +++ b/js/src/vm/JSScript.h @@ -0,0 +1,2297 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* JS script descriptor. */ + +#ifndef vm_JSScript_h +#define vm_JSScript_h + +#include "mozilla/Atomics.h" +#include "mozilla/Maybe.h" +#include "mozilla/MaybeOneOf.h" +#include "mozilla/MemoryReporting.h" +#include "mozilla/RefPtr.h" +#include "mozilla/Span.h" + +#include "mozilla/UniquePtr.h" +#include "mozilla/Utf8.h" +#include "mozilla/Variant.h" + +#include // std::is_same +#include // std::move + +#include "jstypes.h" + +#include "frontend/ScriptIndex.h" // ScriptIndex +#include "gc/Barrier.h" +#include "js/ColumnNumber.h" // JS::LimitedColumnNumberOneOrigin, JS::LimitedColumnNumberOneOrigin +#include "js/CompileOptions.h" +#include "js/Transcoding.h" +#include "js/UbiNode.h" +#include "js/UniquePtr.h" +#include "js/Utility.h" +#include "util/TrailingArray.h" +#include "vm/BytecodeIterator.h" +#include "vm/BytecodeLocation.h" +#include "vm/BytecodeUtil.h" +#include "vm/MutexIDs.h" // mutexid +#include "vm/NativeObject.h" +#include "vm/SharedImmutableStringsCache.h" +#include "vm/SharedStencil.h" // js::GCThingIndex, js::SourceExtent, js::SharedImmutableScriptData, MemberInitializers +#include "vm/StencilEnums.h" // SourceRetrievable + +namespace JS { +struct ScriptSourceInfo; +template +class SourceText; +} // namespace JS + +namespace js { + +class FrontendContext; +class ScriptSource; + +class VarScope; +class LexicalScope; + +class JS_PUBLIC_API Sprinter; + +namespace coverage { +class LCovSource; +} // namespace coverage + +namespace gc { +class AllocSite; +} // namespace gc + +namespace jit { +class AutoKeepJitScripts; +class BaselineScript; +class IonScript; +struct IonScriptCounts; +class JitScript; +} // namespace jit + +class ModuleObject; +class RegExpObject; +class SourceCompressionTask; +class Shape; +class SrcNote; +class DebugScript; + +namespace frontend { +struct CompilationStencil; +struct ExtensibleCompilationStencil; +struct CompilationGCOutput; +struct CompilationStencilMerger; +class StencilXDR; +} // namespace frontend + +class ScriptCounts { + public: + typedef mozilla::Vector PCCountsVector; + + inline ScriptCounts(); + inline explicit ScriptCounts(PCCountsVector&& jumpTargets); + inline ScriptCounts(ScriptCounts&& src); + inline ~ScriptCounts(); + + inline ScriptCounts& operator=(ScriptCounts&& src); + + // Return the counter used to count the number of visits. Returns null if + // the element is not found. + PCCounts* maybeGetPCCounts(size_t offset); + const PCCounts* maybeGetPCCounts(size_t offset) const; + + // PCCounts are stored at jump-target offsets. This function looks for the + // previous PCCount which is in the same basic block as the current offset. + PCCounts* getImmediatePrecedingPCCounts(size_t offset); + + // Return the counter used to count the number of throws. Returns null if + // the element is not found. + const PCCounts* maybeGetThrowCounts(size_t offset) const; + + // Throw counts are stored at the location of each throwing + // instruction. This function looks for the previous throw count. + // + // Note: if the offset of the returned count is higher than the offset of + // the immediate preceding PCCount, then this throw happened in the same + // basic block. + const PCCounts* getImmediatePrecedingThrowCounts(size_t offset) const; + + // Return the counter used to count the number of throws. Allocate it if + // none exists yet. Returns null if the allocation failed. + PCCounts* getThrowCounts(size_t offset); + + size_t sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf); + + bool traceWeak(JSTracer* trc) { return true; } + + private: + friend class ::JSScript; + friend struct ScriptAndCounts; + + // This sorted array is used to map an offset to the number of times a + // branch got visited. + PCCountsVector pcCounts_; + + // This sorted vector is used to map an offset to the number of times an + // instruction throw. + PCCountsVector throwCounts_; + + // Information about any Ion compilations for the script. + jit::IonScriptCounts* ionCounts_; +}; + +// The key of these side-table hash maps are intentionally not traced GC +// references to JSScript. Instead, we use bare pointers and manually fix up +// when objects could have moved (see Zone::fixupScriptMapsAfterMovingGC) and +// remove when the realm is destroyed (see Zone::clearScriptCounts and +// Zone::clearScriptNames). They essentially behave as weak references, except +// that the references are not cleared early by the GC. They must be non-strong +// references because the tables are kept at the Zone level and otherwise the +// table keys would keep scripts alive, thus keeping Realms alive, beyond their +// expected lifetimes. However, We do not use actual weak references (e.g. as +// used by WeakMap tables provided in gc/WeakMap.h) because they would be +// collected before the calls to the JSScript::finalize function which are used +// to aggregate code coverage results on the realm. +// +// Note carefully, however, that there is an exceptional case for which we *do* +// want the JSScripts to be strong references (and thus traced): when the +// --dump-bytecode command line option or the PCCount JSFriend API is used, +// then the scripts for all counts must remain alive. See +// Zone::traceScriptTableRoots() for more details. +// +// TODO: Clean this up by either aggregating coverage results in some other +// way, or by tweaking sweep ordering. +using UniqueScriptCounts = js::UniquePtr; +using ScriptCountsMap = + GCRekeyableHashMap, UniqueScriptCounts, + DefaultHasher>, SystemAllocPolicy>; + +// The 'const char*' for the function name is a pointer within the LCovSource's +// LifoAlloc and will be discarded at the same time. +using ScriptLCovEntry = std::tuple; +using ScriptLCovMap = + GCRekeyableHashMap, ScriptLCovEntry, + DefaultHasher>, SystemAllocPolicy>; + +#ifdef MOZ_VTUNE +using ScriptVTuneIdMap = + GCRekeyableHashMap, uint32_t, + DefaultHasher>, SystemAllocPolicy>; +#endif +#ifdef JS_CACHEIR_SPEW +using ScriptFinalWarmUpCountEntry = std::tuple; +using ScriptFinalWarmUpCountMap = + GCRekeyableHashMap, ScriptFinalWarmUpCountEntry, + DefaultHasher>, SystemAllocPolicy>; +#endif + +// As we execute JS sources that used lazy parsing, we may generate additional +// bytecode that we would like to include in caches if they are being used. +// There is a dependency cycle between JSScript / ScriptSource / +// CompilationStencil for this scenario so introduce this smart-ptr wrapper to +// avoid needing the full details of the stencil-merger in this file. +class StencilIncrementalEncoderPtr { + public: + frontend::CompilationStencilMerger* merger_ = nullptr; + + StencilIncrementalEncoderPtr() = default; + ~StencilIncrementalEncoderPtr() { reset(); } + + bool hasEncoder() const { return bool(merger_); } + + void reset(); + + bool setInitial(JSContext* cx, + UniquePtr&& initial); + + bool addDelazification(JSContext* cx, + const frontend::CompilationStencil& delazification); +}; + +struct ScriptSourceChunk { + ScriptSource* ss = nullptr; + uint32_t chunk = 0; + + ScriptSourceChunk() = default; + + ScriptSourceChunk(ScriptSource* ss, uint32_t chunk) : ss(ss), chunk(chunk) { + MOZ_ASSERT(valid()); + } + + bool valid() const { return ss != nullptr; } + + bool operator==(const ScriptSourceChunk& other) const { + return ss == other.ss && chunk == other.chunk; + } +}; + +struct ScriptSourceChunkHasher { + using Lookup = ScriptSourceChunk; + + static HashNumber hash(const ScriptSourceChunk& ssc) { + return mozilla::AddToHash(DefaultHasher::hash(ssc.ss), + ssc.chunk); + } + static bool match(const ScriptSourceChunk& c1, const ScriptSourceChunk& c2) { + return c1 == c2; + } +}; + +template +using EntryUnits = mozilla::UniquePtr; + +// The uncompressed source cache contains *either* UTF-8 source data *or* +// UTF-16 source data. ScriptSourceChunk implies a ScriptSource that +// contains either UTF-8 data or UTF-16 data, so the nature of the key to +// Map below indicates how each SourceData ought to be interpreted. +using SourceData = mozilla::UniquePtr; + +template +inline SourceData ToSourceData(EntryUnits chars) { + static_assert(std::is_same_v::DeleterType>, + "EntryUnits and SourceData must share the same deleter " + "type, that need not know the type of the data being freed, " + "for the upcast below to be safe"); + return SourceData(chars.release()); +} + +class UncompressedSourceCache { + using Map = HashMap; + + public: + // Hold an entry in the source data cache and prevent it from being purged on + // GC. + class AutoHoldEntry { + UncompressedSourceCache* cache_ = nullptr; + ScriptSourceChunk sourceChunk_ = {}; + SourceData data_ = nullptr; + + public: + explicit AutoHoldEntry() = default; + + ~AutoHoldEntry() { + if (cache_) { + MOZ_ASSERT(sourceChunk_.valid()); + cache_->releaseEntry(*this); + } + } + + template + void holdUnits(EntryUnits units) { + MOZ_ASSERT(!cache_); + MOZ_ASSERT(!sourceChunk_.valid()); + MOZ_ASSERT(!data_); + + data_ = ToSourceData(std::move(units)); + } + + private: + void holdEntry(UncompressedSourceCache* cache, + const ScriptSourceChunk& sourceChunk) { + // Initialise the holder for a specific cache and script source. + // This will hold on to the cached source chars in the event that + // the cache is purged. + MOZ_ASSERT(!cache_); + MOZ_ASSERT(!sourceChunk_.valid()); + MOZ_ASSERT(!data_); + + cache_ = cache; + sourceChunk_ = sourceChunk; + } + + void deferDelete(SourceData data) { + // Take ownership of source chars now the cache is being purged. Remove + // our reference to the ScriptSource which might soon be destroyed. + MOZ_ASSERT(cache_); + MOZ_ASSERT(sourceChunk_.valid()); + MOZ_ASSERT(!data_); + + cache_ = nullptr; + sourceChunk_ = ScriptSourceChunk(); + + data_ = std::move(data); + } + + const ScriptSourceChunk& sourceChunk() const { return sourceChunk_; } + friend class UncompressedSourceCache; + }; + + private: + UniquePtr map_ = nullptr; + AutoHoldEntry* holder_ = nullptr; + + public: + UncompressedSourceCache() = default; + + template + const Unit* lookup(const ScriptSourceChunk& ssc, AutoHoldEntry& asp); + + bool put(const ScriptSourceChunk& ssc, SourceData data, AutoHoldEntry& asp); + + void purge(); + + size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf); + + private: + void holdEntry(AutoHoldEntry& holder, const ScriptSourceChunk& ssc); + void releaseEntry(AutoHoldEntry& holder); +}; + +template +struct SourceTypeTraits; + +template <> +struct SourceTypeTraits { + using CharT = char; + using SharedImmutableString = js::SharedImmutableString; + + static const mozilla::Utf8Unit* units(const SharedImmutableString& string) { + // Casting |char| data to |Utf8Unit| is safe because |Utf8Unit| + // contains a |char|. See the long comment in |Utf8Unit|'s definition. + return reinterpret_cast(string.chars()); + } + + static char* toString(const mozilla::Utf8Unit* units) { + auto asUnsigned = + const_cast(mozilla::Utf8AsUnsignedChars(units)); + return reinterpret_cast(asUnsigned); + } + + static UniqueChars toCacheable(EntryUnits str) { + // The cache only stores strings of |char| or |char16_t|, and right now + // it seems best not to gunk up the cache with |Utf8Unit| too. So + // cache |Utf8Unit| strings by interpreting them as |char| strings. + char* chars = toString(str.release()); + return UniqueChars(chars); + } +}; + +template <> +struct SourceTypeTraits { + using CharT = char16_t; + using SharedImmutableString = js::SharedImmutableTwoByteString; + + static const char16_t* units(const SharedImmutableString& string) { + return string.chars(); + } + + static char16_t* toString(const char16_t* units) { + return const_cast(units); + } + + static UniqueTwoByteChars toCacheable(EntryUnits str) { + return UniqueTwoByteChars(std::move(str)); + } +}; + +// Synchronously compress the source of |script|, for testing purposes. +[[nodiscard]] extern bool SynchronouslyCompressSource( + JSContext* cx, JS::Handle script); + +// [SMDOC] ScriptSource +// +// This class abstracts over the source we used to compile from. The current +// representation may transition to different modes in order to save memory. +// Abstractly the source may be one of UTF-8 or UTF-16. The data itself may be +// unavailable, retrieveable-using-source-hook, compressed, or uncompressed. If +// source is retrieved or decompressed for use, we may update the ScriptSource +// to hold the result. +class ScriptSource { + // NOTE: While ScriptSources may be compressed off thread, they are only + // modified by the main thread, and all members are always safe to access + // on the main thread. + + friend class SourceCompressionTask; + friend bool SynchronouslyCompressSource(JSContext* cx, + JS::Handle script); + + friend class frontend::StencilXDR; + + private: + // Common base class of the templated variants of PinnedUnits. + class PinnedUnitsBase { + protected: + ScriptSource* source_; + + explicit PinnedUnitsBase(ScriptSource* source) : source_(source) {} + + void addReader(); + + template + void removeReader(); + }; + + public: + // Any users that wish to manipulate the char buffer of the ScriptSource + // needs to do so via PinnedUnits for GC safety. A GC may compress + // ScriptSources. If the source were initially uncompressed, then any raw + // pointers to the char buffer would now point to the freed, uncompressed + // chars. This is analogous to Rooted. + template + class PinnedUnits : public PinnedUnitsBase { + const Unit* units_; + + public: + PinnedUnits(JSContext* cx, ScriptSource* source, + UncompressedSourceCache::AutoHoldEntry& holder, size_t begin, + size_t len); + + ~PinnedUnits(); + + const Unit* get() const { return units_; } + + const typename SourceTypeTraits::CharT* asChars() const { + return SourceTypeTraits::toString(get()); + } + }; + + template + class PinnedUnitsIfUncompressed : public PinnedUnitsBase { + const Unit* units_; + + public: + PinnedUnitsIfUncompressed(ScriptSource* source, size_t begin, size_t len); + + ~PinnedUnitsIfUncompressed(); + + const Unit* get() const { return units_; } + + const typename SourceTypeTraits::CharT* asChars() const { + return SourceTypeTraits::toString(get()); + } + }; + + private: + // Missing source text that isn't retrievable using the source hook. (All + // ScriptSources initially begin in this state. Users that are compiling + // source text will overwrite |data| to store a different state.) + struct Missing {}; + + // Source that can be retrieved using the registered source hook. |Unit| + // records the source type so that source-text coordinates in functions and + // scripts that depend on this |ScriptSource| are correct. + template + struct Retrievable { + // The source hook and script URL required to retrieve source are stored + // elsewhere, so nothing is needed here. It'd be better hygiene to store + // something source-hook-like in each |ScriptSource| that needs it, but that + // requires reimagining a source-hook API that currently depends on source + // hooks being uniquely-owned pointers... + }; + + // Uncompressed source text. Templates distinguish if we are interconvertable + // to |Retrievable| or not. + template + class UncompressedData { + typename SourceTypeTraits::SharedImmutableString string_; + + public: + explicit UncompressedData( + typename SourceTypeTraits::SharedImmutableString str) + : string_(std::move(str)) {} + + const Unit* units() const { return SourceTypeTraits::units(string_); } + + size_t length() const { return string_.length(); } + }; + + template + class Uncompressed : public UncompressedData { + using Base = UncompressedData; + + public: + using Base::Base; + }; + + // Compressed source text. Templates distinguish if we are interconvertable + // to |Retrievable| or not. + template + struct CompressedData { + // Single-byte compressed text, regardless whether the original text + // was single-byte or two-byte. + SharedImmutableString raw; + size_t uncompressedLength; + + CompressedData(SharedImmutableString raw, size_t uncompressedLength) + : raw(std::move(raw)), uncompressedLength(uncompressedLength) {} + }; + + template + struct Compressed : public CompressedData { + using Base = CompressedData; + + public: + using Base::Base; + }; + + // The set of currently allowed encoding modes. + using SourceType = + mozilla::Variant, + Uncompressed, + Compressed, + Uncompressed, + Compressed, + Uncompressed, + Compressed, + Uncompressed, + Retrievable, Retrievable, + Missing>; + + // + // Start of fields. + // + + mozilla::Atomic refs = {}; + + // An id for this source that is unique across the process. This can be used + // to refer to this source from places that don't want to hold a strong + // reference on the source itself. + // + // This is a 32 bit ID and could overflow, in which case the ID will not be + // unique anymore. + uint32_t id_ = 0; + + // Source data (as a mozilla::Variant). + SourceType data = SourceType(Missing()); + + // If the GC calls triggerConvertToCompressedSource with PinnedUnits present, + // the last PinnedUnits instance will install the compressed chars upon + // destruction. + // + // Retrievability isn't part of the type here because uncompressed->compressed + // transitions must preserve existing retrievability. + struct ReaderInstances { + size_t count = 0; + mozilla::MaybeOneOf, + CompressedData> + pendingCompressed; + }; + ExclusiveData readers_; + + // The UTF-8 encoded filename of this script. + SharedImmutableString filename_; + + // Hash of the script filename; + HashNumber filenameHash_ = 0; + + // If this ScriptSource was generated by a code-introduction mechanism such + // as |eval| or |new Function|, the debugger needs access to the "raw" + // filename of the top-level script that contains the eval-ing code. To + // keep track of this, we must preserve the original outermost filename (of + // the original introducer script), so that instead of a filename of + // "foo.js line 30 > eval line 10 > Function", we can obtain the original + // raw filename of "foo.js". + // + // In the case described above, this field will be set to to the original raw + // UTF-8 encoded filename from above, otherwise it will be mozilla::Nothing. + SharedImmutableString introducerFilename_; + + SharedImmutableTwoByteString displayURL_; + SharedImmutableTwoByteString sourceMapURL_; + + // The bytecode cache encoder is used to encode only the content of function + // which are delazified. If this value is not nullptr, then each delazified + // function should be recorded before their first execution. + StencilIncrementalEncoderPtr xdrEncoder_; + + // A string indicating how this source code was introduced into the system. + // This is a constant, statically allocated C string, so does not need memory + // management. + // + // TODO: Document the various additional introduction type constants. + const char* introductionType_ = nullptr; + + // Bytecode offset in caller script that generated this code. This is + // present for eval-ed code, as well as "new Function(...)"-introduced + // scripts. + mozilla::Maybe introductionOffset_; + + // If this source is for Function constructor, the position of ")" after + // parameter list in the source. This is used to get function body. + // 0 for other cases. + uint32_t parameterListEnd_ = 0; + + // Line number within the file where this source starts (1-origin). + uint32_t startLine_ = 0; + // Column number within the file where this source starts, + // in UTF-16 code units. + JS::LimitedColumnNumberOneOrigin startColumn_; + + // See: CompileOptions::mutedErrors. + bool mutedErrors_ = false; + + // Carry the delazification mode per source. + JS::DelazificationOption delazificationMode_ = + JS::DelazificationOption::OnDemandOnly; + + // True if an associated SourceCompressionTask was ever created. + bool hadCompressionTask_ = false; + + // + // End of fields. + // + + // How many ids have been handed out to sources. + static mozilla::Atomic idCount_; + + template + const Unit* chunkUnits(JSContext* cx, + UncompressedSourceCache::AutoHoldEntry& holder, + size_t chunk); + + // Return a string containing the chars starting at |begin| and ending at + // |begin + len|. + // + // Warning: this is *not* GC-safe! Any chars to be handed out must use + // PinnedUnits. See comment below. + template + const Unit* units(JSContext* cx, UncompressedSourceCache::AutoHoldEntry& asp, + size_t begin, size_t len); + + template + const Unit* uncompressedUnits(size_t begin, size_t len); + + public: + // When creating a JSString* from TwoByte source characters, we don't try to + // to deflate to Latin1 for longer strings, because this can be slow. + static const size_t SourceDeflateLimit = 100; + + explicit ScriptSource() + : id_(++idCount_), readers_(js::mutexid::SourceCompression) {} + ~ScriptSource() { MOZ_ASSERT(refs == 0); } + + void AddRef() { refs++; } + void Release() { + MOZ_ASSERT(refs != 0); + if (--refs == 0) { + js_delete(this); + } + } + [[nodiscard]] bool initFromOptions(FrontendContext* fc, + const JS::ReadOnlyCompileOptions& options); + + /** + * The minimum script length (in code units) necessary for a script to be + * eligible to be compressed. + */ + static constexpr size_t MinimumCompressibleLength = 256; + + SharedImmutableString getOrCreateStringZ(FrontendContext* fc, + UniqueChars&& str); + SharedImmutableTwoByteString getOrCreateStringZ(FrontendContext* fc, + UniqueTwoByteChars&& str); + + private: + class LoadSourceMatcher; + + public: + // Attempt to load usable source for |ss| -- source text on which substring + // operations and the like can be performed. On success return true and set + // |*loaded| to indicate whether usable source could be loaded; otherwise + // return false. + static bool loadSource(JSContext* cx, ScriptSource* ss, bool* loaded); + + // Assign source data from |srcBuf| to this recently-created |ScriptSource|. + template + [[nodiscard]] bool assignSource(FrontendContext* fc, + const JS::ReadOnlyCompileOptions& options, + JS::SourceText& srcBuf); + + bool hasSourceText() const { + return hasUncompressedSource() || hasCompressedSource(); + } + + private: + template + struct UncompressedDataMatcher { + template + const UncompressedData* operator()( + const Uncompressed& u) { + return &u; + } + + template + const UncompressedData* operator()(const T&) { + MOZ_CRASH( + "attempting to access uncompressed data in a ScriptSource not " + "containing it"); + return nullptr; + } + }; + + public: + template + const UncompressedData* uncompressedData() { + return data.match(UncompressedDataMatcher()); + } + + private: + template + struct CompressedDataMatcher { + template + const CompressedData* operator()( + const Compressed& c) { + return &c; + } + + template + const CompressedData* operator()(const T&) { + MOZ_CRASH( + "attempting to access compressed data in a ScriptSource not " + "containing it"); + return nullptr; + } + }; + + public: + template + const CompressedData* compressedData() { + return data.match(CompressedDataMatcher()); + } + + private: + struct HasUncompressedSource { + template + bool operator()(const Uncompressed&) { + return true; + } + + template + bool operator()(const Compressed&) { + return false; + } + + template + bool operator()(const Retrievable&) { + return false; + } + + bool operator()(const Missing&) { return false; } + }; + + public: + bool hasUncompressedSource() const { + return data.match(HasUncompressedSource()); + } + + private: + template + struct IsUncompressed { + template + bool operator()(const Uncompressed&) { + return true; + } + + template + bool operator()(const T&) { + return false; + } + }; + + public: + template + bool isUncompressed() const { + return data.match(IsUncompressed()); + } + + private: + struct HasCompressedSource { + template + bool operator()(const Compressed&) { + return true; + } + + template + bool operator()(const T&) { + return false; + } + }; + + public: + bool hasCompressedSource() const { return data.match(HasCompressedSource()); } + + private: + template + struct IsCompressed { + template + bool operator()(const Compressed&) { + return true; + } + + template + bool operator()(const T&) { + return false; + } + }; + + public: + template + bool isCompressed() const { + return data.match(IsCompressed()); + } + + private: + template + struct SourceTypeMatcher { + template