/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- * vim: set ts=8 sts=2 et sw=2 tw=80: * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* JS script descriptor. */ #ifndef vm_JSScript_h #define vm_JSScript_h #include "mozilla/Atomics.h" #include "mozilla/Maybe.h" #include "mozilla/MaybeOneOf.h" #include "mozilla/MemoryReporting.h" #include "mozilla/RefPtr.h" #include "mozilla/Span.h" #include "mozilla/UniquePtr.h" #include "mozilla/Utf8.h" #include "mozilla/Variant.h" #include // std::is_same #include // std::move #include "jstypes.h" #include "frontend/ScriptIndex.h" // ScriptIndex #include "gc/Barrier.h" #include "js/CompileOptions.h" #include "js/Transcoding.h" #include "js/UbiNode.h" #include "js/UniquePtr.h" #include "js/Utility.h" #include "util/TrailingArray.h" #include "vm/BytecodeIterator.h" #include "vm/BytecodeLocation.h" #include "vm/BytecodeUtil.h" #include "vm/MutexIDs.h" // mutexid #include "vm/NativeObject.h" #include "vm/SharedImmutableStringsCache.h" #include "vm/SharedStencil.h" // js::GCThingIndex, js::SourceExtent, js::SharedImmutableScriptData, MemberInitializers #include "vm/StencilEnums.h" // SourceRetrievable namespace JS { struct ScriptSourceInfo; template class SourceText; } // namespace JS namespace js { class FrontendContext; class ScriptSource; class VarScope; class LexicalScope; class JS_PUBLIC_API Sprinter; namespace coverage { class LCovSource; } // namespace coverage namespace gc { class AllocSite; } // namespace gc namespace jit { class AutoKeepJitScripts; class BaselineScript; class IonScript; struct IonScriptCounts; class JitScript; } // namespace jit class ModuleObject; class RegExpObject; class SourceCompressionTask; class Shape; class SrcNote; class DebugScript; namespace frontend { struct CompilationStencil; struct ExtensibleCompilationStencil; struct CompilationGCOutput; struct CompilationStencilMerger; class StencilXDR; } // namespace frontend class ScriptCounts { public: typedef mozilla::Vector PCCountsVector; inline ScriptCounts(); inline explicit ScriptCounts(PCCountsVector&& jumpTargets); inline ScriptCounts(ScriptCounts&& src); inline ~ScriptCounts(); inline ScriptCounts& operator=(ScriptCounts&& src); // Return the counter used to count the number of visits. Returns null if // the element is not found. PCCounts* maybeGetPCCounts(size_t offset); const PCCounts* maybeGetPCCounts(size_t offset) const; // PCCounts are stored at jump-target offsets. This function looks for the // previous PCCount which is in the same basic block as the current offset. PCCounts* getImmediatePrecedingPCCounts(size_t offset); // Return the counter used to count the number of throws. Returns null if // the element is not found. const PCCounts* maybeGetThrowCounts(size_t offset) const; // Throw counts are stored at the location of each throwing // instruction. This function looks for the previous throw count. // // Note: if the offset of the returned count is higher than the offset of // the immediate preceding PCCount, then this throw happened in the same // basic block. const PCCounts* getImmediatePrecedingThrowCounts(size_t offset) const; // Return the counter used to count the number of throws. Allocate it if // none exists yet. Returns null if the allocation failed. PCCounts* getThrowCounts(size_t offset); size_t sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf); bool traceWeak(JSTracer* trc) { return true; } private: friend class ::JSScript; friend struct ScriptAndCounts; // This sorted array is used to map an offset to the number of times a // branch got visited. PCCountsVector pcCounts_; // This sorted vector is used to map an offset to the number of times an // instruction throw. PCCountsVector throwCounts_; // Information about any Ion compilations for the script. jit::IonScriptCounts* ionCounts_; }; // The key of these side-table hash maps are intentionally not traced GC // references to JSScript. Instead, we use bare pointers and manually fix up // when objects could have moved (see Zone::fixupScriptMapsAfterMovingGC) and // remove when the realm is destroyed (see Zone::clearScriptCounts and // Zone::clearScriptNames). They essentially behave as weak references, except // that the references are not cleared early by the GC. They must be non-strong // references because the tables are kept at the Zone level and otherwise the // table keys would keep scripts alive, thus keeping Realms alive, beyond their // expected lifetimes. However, We do not use actual weak references (e.g. as // used by WeakMap tables provided in gc/WeakMap.h) because they would be // collected before the calls to the JSScript::finalize function which are used // to aggregate code coverage results on the realm. // // Note carefully, however, that there is an exceptional case for which we *do* // want the JSScripts to be strong references (and thus traced): when the // --dump-bytecode command line option or the PCCount JSFriend API is used, // then the scripts for all counts must remain alive. See // Zone::traceScriptTableRoots() for more details. // // TODO: Clean this up by either aggregating coverage results in some other // way, or by tweaking sweep ordering. using UniqueScriptCounts = js::UniquePtr; using ScriptCountsMap = GCRekeyableHashMap, UniqueScriptCounts, DefaultHasher>, SystemAllocPolicy>; // The 'const char*' for the function name is a pointer within the LCovSource's // LifoAlloc and will be discarded at the same time. using ScriptLCovEntry = std::tuple; using ScriptLCovMap = GCRekeyableHashMap, ScriptLCovEntry, DefaultHasher>, SystemAllocPolicy>; #ifdef MOZ_VTUNE using ScriptVTuneIdMap = GCRekeyableHashMap, uint32_t, DefaultHasher>, SystemAllocPolicy>; #endif #ifdef JS_CACHEIR_SPEW using ScriptFinalWarmUpCountEntry = std::tuple; using ScriptFinalWarmUpCountMap = GCRekeyableHashMap, ScriptFinalWarmUpCountEntry, DefaultHasher>, SystemAllocPolicy>; #endif // As we execute JS sources that used lazy parsing, we may generate additional // bytecode that we would like to include in caches if they are being used. // There is a dependency cycle between JSScript / ScriptSource / // CompilationStencil for this scenario so introduce this smart-ptr wrapper to // avoid needing the full details of the stencil-merger in this file. class StencilIncrementalEncoderPtr { public: frontend::CompilationStencilMerger* merger_ = nullptr; StencilIncrementalEncoderPtr() = default; ~StencilIncrementalEncoderPtr() { reset(); } bool hasEncoder() const { return bool(merger_); } void reset(); bool setInitial(JSContext* cx, UniquePtr&& initial); bool addDelazification(JSContext* cx, const frontend::CompilationStencil& delazification); }; struct ScriptSourceChunk { ScriptSource* ss = nullptr; uint32_t chunk = 0; ScriptSourceChunk() = default; ScriptSourceChunk(ScriptSource* ss, uint32_t chunk) : ss(ss), chunk(chunk) { MOZ_ASSERT(valid()); } bool valid() const { return ss != nullptr; } bool operator==(const ScriptSourceChunk& other) const { return ss == other.ss && chunk == other.chunk; } }; struct ScriptSourceChunkHasher { using Lookup = ScriptSourceChunk; static HashNumber hash(const ScriptSourceChunk& ssc) { return mozilla::AddToHash(DefaultHasher::hash(ssc.ss), ssc.chunk); } static bool match(const ScriptSourceChunk& c1, const ScriptSourceChunk& c2) { return c1 == c2; } }; template using EntryUnits = mozilla::UniquePtr; // The uncompressed source cache contains *either* UTF-8 source data *or* // UTF-16 source data. ScriptSourceChunk implies a ScriptSource that // contains either UTF-8 data or UTF-16 data, so the nature of the key to // Map below indicates how each SourceData ought to be interpreted. using SourceData = mozilla::UniquePtr; template inline SourceData ToSourceData(EntryUnits chars) { static_assert(std::is_same_v::DeleterType>, "EntryUnits and SourceData must share the same deleter " "type, that need not know the type of the data being freed, " "for the upcast below to be safe"); return SourceData(chars.release()); } class UncompressedSourceCache { using Map = HashMap; public: // Hold an entry in the source data cache and prevent it from being purged on // GC. class AutoHoldEntry { UncompressedSourceCache* cache_ = nullptr; ScriptSourceChunk sourceChunk_ = {}; SourceData data_ = nullptr; public: explicit AutoHoldEntry() = default; ~AutoHoldEntry() { if (cache_) { MOZ_ASSERT(sourceChunk_.valid()); cache_->releaseEntry(*this); } } template void holdUnits(EntryUnits units) { MOZ_ASSERT(!cache_); MOZ_ASSERT(!sourceChunk_.valid()); MOZ_ASSERT(!data_); data_ = ToSourceData(std::move(units)); } private: void holdEntry(UncompressedSourceCache* cache, const ScriptSourceChunk& sourceChunk) { // Initialise the holder for a specific cache and script source. // This will hold on to the cached source chars in the event that // the cache is purged. MOZ_ASSERT(!cache_); MOZ_ASSERT(!sourceChunk_.valid()); MOZ_ASSERT(!data_); cache_ = cache; sourceChunk_ = sourceChunk; } void deferDelete(SourceData data) { // Take ownership of source chars now the cache is being purged. Remove // our reference to the ScriptSource which might soon be destroyed. MOZ_ASSERT(cache_); MOZ_ASSERT(sourceChunk_.valid()); MOZ_ASSERT(!data_); cache_ = nullptr; sourceChunk_ = ScriptSourceChunk(); data_ = std::move(data); } const ScriptSourceChunk& sourceChunk() const { return sourceChunk_; } friend class UncompressedSourceCache; }; private: UniquePtr map_ = nullptr; AutoHoldEntry* holder_ = nullptr; public: UncompressedSourceCache() = default; template const Unit* lookup(const ScriptSourceChunk& ssc, AutoHoldEntry& asp); bool put(const ScriptSourceChunk& ssc, SourceData data, AutoHoldEntry& asp); void purge(); size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf); private: void holdEntry(AutoHoldEntry& holder, const ScriptSourceChunk& ssc); void releaseEntry(AutoHoldEntry& holder); }; template struct SourceTypeTraits; template <> struct SourceTypeTraits { using CharT = char; using SharedImmutableString = js::SharedImmutableString; static const mozilla::Utf8Unit* units(const SharedImmutableString& string) { // Casting |char| data to |Utf8Unit| is safe because |Utf8Unit| // contains a |char|. See the long comment in |Utf8Unit|'s definition. return reinterpret_cast(string.chars()); } static char* toString(const mozilla::Utf8Unit* units) { auto asUnsigned = const_cast(mozilla::Utf8AsUnsignedChars(units)); return reinterpret_cast(asUnsigned); } static UniqueChars toCacheable(EntryUnits str) { // The cache only stores strings of |char| or |char16_t|, and right now // it seems best not to gunk up the cache with |Utf8Unit| too. So // cache |Utf8Unit| strings by interpreting them as |char| strings. char* chars = toString(str.release()); return UniqueChars(chars); } }; template <> struct SourceTypeTraits { using CharT = char16_t; using SharedImmutableString = js::SharedImmutableTwoByteString; static const char16_t* units(const SharedImmutableString& string) { return string.chars(); } static char16_t* toString(const char16_t* units) { return const_cast(units); } static UniqueTwoByteChars toCacheable(EntryUnits str) { return UniqueTwoByteChars(std::move(str)); } }; // Synchronously compress the source of |script|, for testing purposes. [[nodiscard]] extern bool SynchronouslyCompressSource( JSContext* cx, JS::Handle script); // [SMDOC] ScriptSource // // This class abstracts over the source we used to compile from. The current // representation may transition to different modes in order to save memory. // Abstractly the source may be one of UTF-8 or UTF-16. The data itself may be // unavailable, retrieveable-using-source-hook, compressed, or uncompressed. If // source is retrieved or decompressed for use, we may update the ScriptSource // to hold the result. class ScriptSource { // NOTE: While ScriptSources may be compressed off thread, they are only // modified by the main thread, and all members are always safe to access // on the main thread. friend class SourceCompressionTask; friend bool SynchronouslyCompressSource(JSContext* cx, JS::Handle script); friend class frontend::StencilXDR; private: // Common base class of the templated variants of PinnedUnits. class PinnedUnitsBase { protected: ScriptSource* source_; explicit PinnedUnitsBase(ScriptSource* source) : source_(source) {} }; public: // Any users that wish to manipulate the char buffer of the ScriptSource // needs to do so via PinnedUnits for GC safety. A GC may compress // ScriptSources. If the source were initially uncompressed, then any raw // pointers to the char buffer would now point to the freed, uncompressed // chars. This is analogous to Rooted. template class PinnedUnits : public PinnedUnitsBase { const Unit* units_; public: PinnedUnits(JSContext* cx, ScriptSource* source, UncompressedSourceCache::AutoHoldEntry& holder, size_t begin, size_t len); ~PinnedUnits(); const Unit* get() const { return units_; } const typename SourceTypeTraits::CharT* asChars() const { return SourceTypeTraits::toString(get()); } }; private: // Missing source text that isn't retrievable using the source hook. (All // ScriptSources initially begin in this state. Users that are compiling // source text will overwrite |data| to store a different state.) struct Missing {}; // Source that can be retrieved using the registered source hook. |Unit| // records the source type so that source-text coordinates in functions and // scripts that depend on this |ScriptSource| are correct. template struct Retrievable { // The source hook and script URL required to retrieve source are stored // elsewhere, so nothing is needed here. It'd be better hygiene to store // something source-hook-like in each |ScriptSource| that needs it, but that // requires reimagining a source-hook API that currently depends on source // hooks being uniquely-owned pointers... }; // Uncompressed source text. Templates distinguish if we are interconvertable // to |Retrievable| or not. template class UncompressedData { typename SourceTypeTraits::SharedImmutableString string_; public: explicit UncompressedData( typename SourceTypeTraits::SharedImmutableString str) : string_(std::move(str)) {} const Unit* units() const { return SourceTypeTraits::units(string_); } size_t length() const { return string_.length(); } }; template class Uncompressed : public UncompressedData { using Base = UncompressedData; public: using Base::Base; }; // Compressed source text. Templates distinguish if we are interconvertable // to |Retrievable| or not. template struct CompressedData { // Single-byte compressed text, regardless whether the original text // was single-byte or two-byte. SharedImmutableString raw; size_t uncompressedLength; CompressedData(SharedImmutableString raw, size_t uncompressedLength) : raw(std::move(raw)), uncompressedLength(uncompressedLength) {} }; template struct Compressed : public CompressedData { using Base = CompressedData; public: using Base::Base; }; // The set of currently allowed encoding modes. using SourceType = mozilla::Variant, Uncompressed, Compressed, Uncompressed, Compressed, Uncompressed, Compressed, Uncompressed, Retrievable, Retrievable, Missing>; // // Start of fields. // mozilla::Atomic refs = {}; // An id for this source that is unique across the process. This can be used // to refer to this source from places that don't want to hold a strong // reference on the source itself. // // This is a 32 bit ID and could overflow, in which case the ID will not be // unique anymore. uint32_t id_ = 0; // Source data (as a mozilla::Variant). SourceType data = SourceType(Missing()); // If the GC calls triggerConvertToCompressedSource with PinnedUnits present, // the last PinnedUnits instance will install the compressed chars upon // destruction. // // Retrievability isn't part of the type here because uncompressed->compressed // transitions must preserve existing retrievability. struct ReaderInstances { size_t count = 0; mozilla::MaybeOneOf, CompressedData> pendingCompressed; }; ExclusiveData readers_; // The UTF-8 encoded filename of this script. SharedImmutableString filename_; // Hash of the script filename; HashNumber filenameHash_ = 0; // If this ScriptSource was generated by a code-introduction mechanism such // as |eval| or |new Function|, the debugger needs access to the "raw" // filename of the top-level script that contains the eval-ing code. To // keep track of this, we must preserve the original outermost filename (of // the original introducer script), so that instead of a filename of // "foo.js line 30 > eval line 10 > Function", we can obtain the original // raw filename of "foo.js". // // In the case described above, this field will be set to to the original raw // UTF-8 encoded filename from above, otherwise it will be mozilla::Nothing. SharedImmutableString introducerFilename_; SharedImmutableTwoByteString displayURL_; SharedImmutableTwoByteString sourceMapURL_; // The bytecode cache encoder is used to encode only the content of function // which are delazified. If this value is not nullptr, then each delazified // function should be recorded before their first execution. StencilIncrementalEncoderPtr xdrEncoder_; // A string indicating how this source code was introduced into the system. // This is a constant, statically allocated C string, so does not need memory // management. // // TODO: Document the various additional introduction type constants. const char* introductionType_ = nullptr; // Bytecode offset in caller script that generated this code. This is // present for eval-ed code, as well as "new Function(...)"-introduced // scripts. mozilla::Maybe introductionOffset_; // If this source is for Function constructor, the position of ")" after // parameter list in the source. This is used to get function body. // 0 for other cases. uint32_t parameterListEnd_ = 0; // Line number within the file where this source starts. uint32_t startLine_ = 0; // Column number within the file where this source starts. uint32_t startColumn_ = 0; // See: CompileOptions::mutedErrors. bool mutedErrors_ = false; // Carry the delazification mode per source. JS::DelazificationOption delazificationMode_ = JS::DelazificationOption::OnDemandOnly; // True if an associated SourceCompressionTask was ever created. bool hadCompressionTask_ = false; // // End of fields. // // How many ids have been handed out to sources. static mozilla::Atomic idCount_; template const Unit* chunkUnits(JSContext* cx, UncompressedSourceCache::AutoHoldEntry& holder, size_t chunk); // Return a string containing the chars starting at |begin| and ending at // |begin + len|. // // Warning: this is *not* GC-safe! Any chars to be handed out must use // PinnedUnits. See comment below. template const Unit* units(JSContext* cx, UncompressedSourceCache::AutoHoldEntry& asp, size_t begin, size_t len); public: // When creating a JSString* from TwoByte source characters, we don't try to // to deflate to Latin1 for longer strings, because this can be slow. static const size_t SourceDeflateLimit = 100; explicit ScriptSource() : id_(++idCount_), readers_(js::mutexid::SourceCompression) {} ~ScriptSource() { MOZ_ASSERT(refs == 0); } void AddRef() { refs++; } void Release() { MOZ_ASSERT(refs != 0); if (--refs == 0) { js_delete(this); } } [[nodiscard]] bool initFromOptions(FrontendContext* fc, const JS::ReadOnlyCompileOptions& options); /** * The minimum script length (in code units) necessary for a script to be * eligible to be compressed. */ static constexpr size_t MinimumCompressibleLength = 256; SharedImmutableString getOrCreateStringZ(FrontendContext* fc, UniqueChars&& str); SharedImmutableTwoByteString getOrCreateStringZ(FrontendContext* fc, UniqueTwoByteChars&& str); private: class LoadSourceMatcher; public: // Attempt to load usable source for |ss| -- source text on which substring // operations and the like can be performed. On success return true and set // |*loaded| to indicate whether usable source could be loaded; otherwise // return false. static bool loadSource(JSContext* cx, ScriptSource* ss, bool* loaded); // Assign source data from |srcBuf| to this recently-created |ScriptSource|. template [[nodiscard]] bool assignSource(FrontendContext* fc, const JS::ReadOnlyCompileOptions& options, JS::SourceText& srcBuf); bool hasSourceText() const { return hasUncompressedSource() || hasCompressedSource(); } private: template struct UncompressedDataMatcher { template const UncompressedData* operator()( const Uncompressed& u) { return &u; } template const UncompressedData* operator()(const T&) { MOZ_CRASH( "attempting to access uncompressed data in a ScriptSource not " "containing it"); return nullptr; } }; public: template const UncompressedData* uncompressedData() { return data.match(UncompressedDataMatcher()); } private: template struct CompressedDataMatcher { template const CompressedData* operator()( const Compressed& c) { return &c; } template const CompressedData* operator()(const T&) { MOZ_CRASH( "attempting to access compressed data in a ScriptSource not " "containing it"); return nullptr; } }; public: template const CompressedData* compressedData() { return data.match(CompressedDataMatcher()); } private: struct HasUncompressedSource { template bool operator()(const Uncompressed&) { return true; } template bool operator()(const Compressed&) { return false; } template bool operator()(const Retrievable&) { return false; } bool operator()(const Missing&) { return false; } }; public: bool hasUncompressedSource() const { return data.match(HasUncompressedSource()); } private: template struct IsUncompressed { template bool operator()(const Uncompressed&) { return true; } template bool operator()(const T&) { return false; } }; public: template bool isUncompressed() const { return data.match(IsUncompressed()); } private: struct HasCompressedSource { template bool operator()(const Compressed&) { return true; } template bool operator()(const T&) { return false; } }; public: bool hasCompressedSource() const { return data.match(HasCompressedSource()); } private: template struct IsCompressed { template bool operator()(const Compressed&) { return true; } template bool operator()(const T&) { return false; } }; public: template bool isCompressed() const { return data.match(IsCompressed()); } private: template struct SourceTypeMatcher { template