/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- * vim: set ts=8 sts=2 et sw=2 tw=80: * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #ifndef vm_StringType_h #define vm_StringType_h #include "mozilla/Maybe.h" #include "mozilla/MemoryReporting.h" #include "mozilla/Range.h" #include "mozilla/Span.h" #include "mozilla/TextUtils.h" #include // std::basic_string_view #include "jstypes.h" // js::Bit #include "gc/Allocator.h" #include "gc/Cell.h" #include "gc/MaybeRooted.h" #include "gc/Nursery.h" #include "gc/RelocationOverlay.h" #include "gc/StoreBuffer.h" #include "js/CharacterEncoding.h" #include "js/RootingAPI.h" #include "js/shadow/String.h" // JS::shadow::String #include "js/String.h" // JS::MaxStringLength #include "js/UniquePtr.h" #include "util/Text.h" class JSDependentString; class JSExtensibleString; class JSExternalString; class JSInlineString; class JSRope; namespace JS { class JS_PUBLIC_API AutoStableStringChars; } // namespace JS namespace js { class ArrayObject; class GenericPrinter; class PropertyName; class StringBuffer; namespace frontend { class ParserAtomsTable; class TaggedParserAtomIndex; class WellKnownParserAtoms; struct CompilationAtomCache; } // namespace frontend namespace jit { class MacroAssembler; } // namespace jit /* The buffer length required to contain any unsigned 32-bit integer. */ static const size_t UINT32_CHAR_BUFFER_LENGTH = sizeof("4294967295") - 1; // Maximum array index. This value is defined in the spec (ES2021 draft, 6.1.7): // // An array index is an integer index whose numeric value i is in the range // +0𝔽 ≤ i < 𝔽(2^32 - 1). const uint32_t MAX_ARRAY_INDEX = 4294967294u; // 2^32-2 (= UINT32_MAX-1) // Returns true if the characters of `s` store an unsigned 32-bit integer value // less than or equal to MAX_ARRAY_INDEX, initializing `*indexp` to that value // if so. Leading '0' isn't allowed except 0 itself. template bool CheckStringIsIndex(const CharT* s, size_t length, uint32_t* indexp); } /* namespace js */ // clang-format off /* * [SMDOC] JavaScript Strings * * Conceptually, a JS string is just an array of chars and a length. This array * of chars may or may not be null-terminated and, if it is, the null character * is not included in the length. * * To improve performance of common operations, the following optimizations are * made which affect the engine's representation of strings: * * - The plain vanilla representation is a "linear" string which consists of a * string header in the GC heap and a malloc'd char array. * * - To avoid copying a substring of an existing "base" string , a "dependent" * string (JSDependentString) can be created which points into the base * string's char array. * * - To avoid O(n^2) char buffer copying, a "rope" node (JSRope) can be created * to represent a delayed string concatenation. Concatenation (called * flattening) is performed if and when a linear char array is requested. In * general, ropes form a binary dag whose internal nodes are JSRope string * headers with no associated char array and whose leaf nodes are linear * strings. * * - To avoid copying the leftmost string when flattening, we may produce an * "extensible" string, which tracks not only its actual length but also its * buffer's overall size. If such an "extensible" string appears as the * leftmost string in a subsequent flatten, and its buffer has enough unused * space, we can simply flatten the rest of the ropes into its buffer, * leaving its text in place. We then transfer ownership of its buffer to the * flattened rope, and mutate the donor extensible string into a dependent * string referencing its original buffer. * * (The term "extensible" does not imply that we ever 'realloc' the buffer. * Extensible strings may have dependent strings pointing into them, and the * JSAPI hands out pointers to linear strings' buffers, so resizing with * 'realloc' is generally not possible.) * * - To avoid allocating small char arrays, short strings can be stored inline * in the string header (JSInlineString). These come in two flavours: * JSThinInlineString, which is the same size as JSString; and * JSFatInlineString, which has a larger header and so can fit more chars. * * - To avoid comparing O(n) string equality comparison, strings can be * canonicalized to "atoms" (JSAtom) such that there is a single atom with a * given (length,chars). * * - To avoid copying all strings created through the JSAPI, an "external" * string (JSExternalString) can be created whose chars are managed by the * JSAPI client. * * - To avoid using two bytes per character for every string, string * characters are stored as Latin1 instead of TwoByte if all characters are * representable in Latin1. * * - To avoid slow conversions from strings to integer indexes, we cache 16 bit * unsigned indexes on strings representing such numbers. * * Although all strings share the same basic memory layout, we can conceptually * arrange them into a hierarchy of operations/invariants and represent this * hierarchy in C++ with classes: * * C++ type operations+fields / invariants+properties * ========================== ========================================= * JSString (abstract) get(Latin1|TwoByte)CharsZ, get(Latin1|TwoByte)Chars, length / - * | \ * | JSRope leftChild, rightChild / - * | * JSLinearString (abstract) latin1Chars, twoByteChars / - * | * +-- JSDependentString base / - * | * +-- JSExternalString - / char array memory managed by embedding * | * +-- JSExtensibleString tracks total buffer capacity (including current text) * | * +-- JSInlineString (abstract) - / chars stored in header * | | * | +-- JSThinInlineString - / header is normal * | | * | +-- JSFatInlineString - / header is fat * | * JSAtom (abstract) - / string equality === pointer equality * | | * | +-- js::NormalAtom - JSLinearString + atom hash code * | | * | +-- js::FatInlineAtom - JSFatInlineString + atom hash code * | * js::PropertyName - / chars don't contain an index (uint32_t) * * Classes marked with (abstract) above are not literally C++ Abstract Base * Classes (since there are no virtual functions, pure or not, in this * hierarchy), but have the same meaning: there are no strings with this type as * its most-derived type. * * Atoms can additionally be permanent, i.e. unable to be collected, and can * be combined with other string types to create additional most-derived types * that satisfy the invariants of more than one of the abovementioned * most-derived types. Furthermore, each atom stores a hash number (based on its * chars). This hash number is used as key in the atoms table and when the atom * is used as key in a JS Map/Set. * * Derived string types can be queried from ancestor types via isX() and * retrieved with asX() debug-only-checked casts. * * The ensureX() operations mutate 'this' in place to effectively the type to be * at least X (e.g., ensureLinear will change a JSRope to be a JSLinearString). */ // clang-format on class JSString : public js::gc::CellWithLengthAndFlags { protected: static const size_t NUM_INLINE_CHARS_LATIN1 = 2 * sizeof(void*) / sizeof(JS::Latin1Char); static const size_t NUM_INLINE_CHARS_TWO_BYTE = 2 * sizeof(void*) / sizeof(char16_t); public: // String length and flags are stored in the cell header. MOZ_ALWAYS_INLINE size_t length() const { return headerLengthField(); } MOZ_ALWAYS_INLINE uint32_t flags() const { return headerFlagsField(); } protected: /* Fields only apply to string types commented on the right. */ struct Data { // Note: 32-bit length and flags fields are inherited from // CellWithLengthAndFlags. union { union { /* JS(Fat)InlineString */ JS::Latin1Char inlineStorageLatin1[NUM_INLINE_CHARS_LATIN1]; char16_t inlineStorageTwoByte[NUM_INLINE_CHARS_TWO_BYTE]; }; struct { union { const JS::Latin1Char* nonInlineCharsLatin1; /* JSLinearString, except JS(Fat)InlineString */ const char16_t* nonInlineCharsTwoByte; /* JSLinearString, except JS(Fat)InlineString */ JSString* left; /* JSRope */ JSRope* parent; /* Used in flattening */ } u2; union { JSLinearString* base; /* JSDependentString */ JSString* right; /* JSRope */ size_t capacity; /* JSLinearString (extensible) */ const JSExternalStringCallbacks* externalCallbacks; /* JSExternalString */ } u3; } s; }; } d; public: /* Flags exposed only for jits */ /* * Flag Encoding * * The first word of a JSString stores flags, index, and (on some * platforms) the length. The flags store both the string's type and its * character encoding. * * If LATIN1_CHARS_BIT is set, the string's characters are stored as Latin1 * instead of TwoByte. This flag can also be set for ropes, if both the * left and right nodes are Latin1. Flattening will result in a Latin1 * string in this case. * * The other flags store the string's type. Instead of using a dense index * to represent the most-derived type, string types are encoded to allow * single-op tests for hot queries (isRope, isDependent, isAtom) which, in * view of subtyping, would require slower (isX() || isY() || isZ()). * * The string type encoding can be summarized as follows. The "instance * encoding" entry for a type specifies the flag bits used to create a * string instance of that type. Abstract types have no instances and thus * have no such entry. The "subtype predicate" entry for a type specifies * the predicate used to query whether a JSString instance is subtype * (reflexively) of that type. * * String Instance Subtype * type encoding predicate * ----------------------------------------- * Rope 000000 000 xxxx0x xxx * Linear - xxxx1x xxx * Dependent 000110 000 xxx1xx xxx * External 100010 000 100010 xxx * Extensible 010010 000 010010 xxx * Inline 001010 000 xx1xxx xxx * FatInline 011010 000 x11xxx xxx * NormalAtom 000011 000 xxxxx1 xxx * PermanentAtom 100011 000 1xxxx1 xxx * InlineAtom - xx1xx1 xxx * FatInlineAtom - x11xx1 xxx * * Bits 0..2 are reserved for use by the GC (see * gc::CellFlagBitsReservedForGC). In particular, bit 0 is currently used for * FORWARD_BIT for forwarded nursery cells. The other 2 bits are currently * unused. * * Note that the first 4 flag bits 3..6 (from right to left in the previous * table) have the following meaning and can be used for some hot queries: * * Bit 3: IsAtom (Atom, PermanentAtom) * Bit 4: IsLinear * Bit 5: IsDependent * Bit 6: IsInline (Inline, FatInline) * * If INDEX_VALUE_BIT is set, bits 16 and up will also hold an integer index. */ // The low bits of flag word are reserved by GC. static_assert(js::gc::CellFlagBitsReservedForGC <= 3, "JSString::flags must reserve enough bits for Cell"); static const uint32_t ATOM_BIT = js::Bit(3); static const uint32_t LINEAR_BIT = js::Bit(4); static const uint32_t DEPENDENT_BIT = js::Bit(5); static const uint32_t INLINE_CHARS_BIT = js::Bit(6); static const uint32_t EXTENSIBLE_FLAGS = LINEAR_BIT | js::Bit(7); static const uint32_t EXTERNAL_FLAGS = LINEAR_BIT | js::Bit(8); static const uint32_t FAT_INLINE_MASK = INLINE_CHARS_BIT | js::Bit(7); /* Initial flags for various types of strings. */ static const uint32_t INIT_THIN_INLINE_FLAGS = LINEAR_BIT | INLINE_CHARS_BIT; static const uint32_t INIT_FAT_INLINE_FLAGS = LINEAR_BIT | FAT_INLINE_MASK; static const uint32_t INIT_ROPE_FLAGS = 0; static const uint32_t INIT_LINEAR_FLAGS = LINEAR_BIT; static const uint32_t INIT_DEPENDENT_FLAGS = LINEAR_BIT | DEPENDENT_BIT; static const uint32_t TYPE_FLAGS_MASK = js::BitMask(9) - js::BitMask(3); static_assert((TYPE_FLAGS_MASK & js::gc::HeaderWord::RESERVED_MASK) == 0, "GC reserved bits must not be used for Strings"); static const uint32_t LATIN1_CHARS_BIT = js::Bit(9); // Whether this atom's characters store an uint32 index value less than or // equal to MAX_ARRAY_INDEX. Not used for non-atomized strings. // See JSLinearString::isIndex. static const uint32_t ATOM_IS_INDEX_BIT = js::Bit(10); static const uint32_t INDEX_VALUE_BIT = js::Bit(11); static const uint32_t INDEX_VALUE_SHIFT = 16; // NON_DEDUP_BIT is used in string deduplication during tenuring. static const uint32_t NON_DEDUP_BIT = js::Bit(12); // If IN_STRING_TO_ATOM_CACHE is set, this string had an entry in the // StringToAtomCache at some point. Note that GC can purge the cache without // clearing this bit. static const uint32_t IN_STRING_TO_ATOM_CACHE = js::Bit(13); // Flags used during rope flattening that indicate what action to perform when // returning to the rope's parent rope. static const uint32_t FLATTEN_VISIT_RIGHT = js::Bit(14); static const uint32_t FLATTEN_FINISH_NODE = js::Bit(15); static const uint32_t FLATTEN_MASK = FLATTEN_VISIT_RIGHT | FLATTEN_FINISH_NODE; static const uint32_t PINNED_ATOM_BIT = js::Bit(15); static const uint32_t PERMANENT_ATOM_MASK = ATOM_BIT | PINNED_ATOM_BIT | js::Bit(8); static const uint32_t MAX_LENGTH = JS::MaxStringLength; static const JS::Latin1Char MAX_LATIN1_CHAR = 0xff; /* * Helper function to validate that a string of a given length is * representable by a JSString. An allocation overflow is reported if false * is returned. */ static inline bool validateLength(JSContext* maybecx, size_t length); template static inline bool validateLengthInternal(JSContext* maybecx, size_t length); static constexpr size_t offsetOfFlags() { return offsetOfHeaderFlags(); } static constexpr size_t offsetOfLength() { return offsetOfHeaderLength(); } bool sameLengthAndFlags(const JSString& other) const { return length() == other.length() && flags() == other.flags(); } static void staticAsserts() { static_assert(JSString::MAX_LENGTH < UINT32_MAX, "Length must fit in 32 bits"); static_assert( sizeof(JSString) == (offsetof(JSString, d.inlineStorageLatin1) + NUM_INLINE_CHARS_LATIN1 * sizeof(char)), "Inline Latin1 chars must fit in a JSString"); static_assert( sizeof(JSString) == (offsetof(JSString, d.inlineStorageTwoByte) + NUM_INLINE_CHARS_TWO_BYTE * sizeof(char16_t)), "Inline char16_t chars must fit in a JSString"); /* Ensure js::shadow::String has the same layout. */ using JS::shadow::String; static_assert( JSString::offsetOfRawHeaderFlagsField() == offsetof(String, flags_), "shadow::String flags offset must match JSString"); #if JS_BITS_PER_WORD == 32 static_assert(JSString::offsetOfLength() == offsetof(String, length_), "shadow::String length offset must match JSString"); #endif static_assert(offsetof(JSString, d.s.u2.nonInlineCharsLatin1) == offsetof(String, nonInlineCharsLatin1), "shadow::String nonInlineChars offset must match JSString"); static_assert(offsetof(JSString, d.s.u2.nonInlineCharsTwoByte) == offsetof(String, nonInlineCharsTwoByte), "shadow::String nonInlineChars offset must match JSString"); static_assert( offsetof(JSString, d.s.u3.externalCallbacks) == offsetof(String, externalCallbacks), "shadow::String externalCallbacks offset must match JSString"); static_assert(offsetof(JSString, d.inlineStorageLatin1) == offsetof(String, inlineStorageLatin1), "shadow::String inlineStorage offset must match JSString"); static_assert(offsetof(JSString, d.inlineStorageTwoByte) == offsetof(String, inlineStorageTwoByte), "shadow::String inlineStorage offset must match JSString"); static_assert(ATOM_BIT == String::ATOM_BIT, "shadow::String::ATOM_BIT must match JSString::ATOM_BIT"); static_assert(LINEAR_BIT == String::LINEAR_BIT, "shadow::String::LINEAR_BIT must match JSString::LINEAR_BIT"); static_assert(INLINE_CHARS_BIT == String::INLINE_CHARS_BIT, "shadow::String::INLINE_CHARS_BIT must match " "JSString::INLINE_CHARS_BIT"); static_assert(LATIN1_CHARS_BIT == String::LATIN1_CHARS_BIT, "shadow::String::LATIN1_CHARS_BIT must match " "JSString::LATIN1_CHARS_BIT"); static_assert( TYPE_FLAGS_MASK == String::TYPE_FLAGS_MASK, "shadow::String::TYPE_FLAGS_MASK must match JSString::TYPE_FLAGS_MASK"); static_assert( EXTERNAL_FLAGS == String::EXTERNAL_FLAGS, "shadow::String::EXTERNAL_FLAGS must match JSString::EXTERNAL_FLAGS"); } /* Avoid silly compile errors in JSRope::flatten */ friend class JSRope; friend class js::gc::RelocationOverlay; protected: template MOZ_ALWAYS_INLINE void setNonInlineChars(const CharT* chars); template static MOZ_ALWAYS_INLINE void checkStringCharsArena(const CharT* chars) { #ifdef MOZ_DEBUG js::AssertJSStringBufferInCorrectArena(chars); #endif } // Get correct non-inline chars enum arm for given type template MOZ_ALWAYS_INLINE const CharT* nonInlineCharsRaw() const; public: MOZ_ALWAYS_INLINE bool empty() const { return length() == 0; } inline bool getChar(JSContext* cx, size_t index, char16_t* code); /* Strings have either Latin1 or TwoByte chars. */ bool hasLatin1Chars() const { return flags() & LATIN1_CHARS_BIT; } bool hasTwoByteChars() const { return !(flags() & LATIN1_CHARS_BIT); } /* Strings might contain cached indexes. */ bool hasIndexValue() const { return flags() & INDEX_VALUE_BIT; } uint32_t getIndexValue() const { MOZ_ASSERT(hasIndexValue()); MOZ_ASSERT(isLinear()); return flags() >> INDEX_VALUE_SHIFT; } inline size_t allocSize() const; /* Fallible conversions to more-derived string types. */ inline JSLinearString* ensureLinear(JSContext* cx); /* Type query and debug-checked casts */ MOZ_ALWAYS_INLINE bool isRope() const { return !(flags() & LINEAR_BIT); } MOZ_ALWAYS_INLINE JSRope& asRope() const { MOZ_ASSERT(isRope()); return *(JSRope*)this; } MOZ_ALWAYS_INLINE bool isLinear() const { return flags() & LINEAR_BIT; } MOZ_ALWAYS_INLINE JSLinearString& asLinear() const { MOZ_ASSERT(JSString::isLinear()); return *(JSLinearString*)this; } MOZ_ALWAYS_INLINE bool isDependent() const { return flags() & DEPENDENT_BIT; } MOZ_ALWAYS_INLINE JSDependentString& asDependent() const { MOZ_ASSERT(isDependent()); return *(JSDependentString*)this; } MOZ_ALWAYS_INLINE bool isExtensible() const { return (flags() & TYPE_FLAGS_MASK) == EXTENSIBLE_FLAGS; } MOZ_ALWAYS_INLINE JSExtensibleString& asExtensible() const { MOZ_ASSERT(isExtensible()); return *(JSExtensibleString*)this; } MOZ_ALWAYS_INLINE bool isInline() const { return flags() & INLINE_CHARS_BIT; } MOZ_ALWAYS_INLINE JSInlineString& asInline() const { MOZ_ASSERT(isInline()); return *(JSInlineString*)this; } MOZ_ALWAYS_INLINE bool isFatInline() const { return (flags() & FAT_INLINE_MASK) == FAT_INLINE_MASK; } /* For hot code, prefer other type queries. */ bool isExternal() const { return (flags() & TYPE_FLAGS_MASK) == EXTERNAL_FLAGS; } MOZ_ALWAYS_INLINE JSExternalString& asExternal() const { MOZ_ASSERT(isExternal()); return *(JSExternalString*)this; } MOZ_ALWAYS_INLINE bool isAtom() const { return flags() & ATOM_BIT; } MOZ_ALWAYS_INLINE bool isPermanentAtom() const { return (flags() & PERMANENT_ATOM_MASK) == PERMANENT_ATOM_MASK; } MOZ_ALWAYS_INLINE JSAtom& asAtom() const { MOZ_ASSERT(isAtom()); return *(JSAtom*)this; } MOZ_ALWAYS_INLINE void setNonDeduplicatable() { setFlagBit(NON_DEDUP_BIT); } MOZ_ALWAYS_INLINE void clearNonDeduplicatable() { clearFlagBit(NON_DEDUP_BIT); } MOZ_ALWAYS_INLINE bool isDeduplicatable() { return !(flags() & NON_DEDUP_BIT); } void setInStringToAtomCache() { MOZ_ASSERT(!isAtom()); setFlagBit(IN_STRING_TO_ATOM_CACHE); } bool inStringToAtomCache() const { return flags() & IN_STRING_TO_ATOM_CACHE; } // Fills |array| with various strings that represent the different string // kinds and character encodings. static bool fillWithRepresentatives(JSContext* cx, JS::Handle array); /* Only called by the GC for dependent strings. */ inline bool hasBase() const { return isDependent(); } inline JSLinearString* base() const; // The base may be forwarded and becomes a relocation overlay. // The return value can be a relocation overlay when the base is forwarded, // or the return value can be the actual base when it is not forwarded. inline JSLinearString* nurseryBaseOrRelocOverlay() const; inline bool canOwnDependentChars() const; inline void setBase(JSLinearString* newBase); void traceBase(JSTracer* trc); /* Only called by the GC for strings with the AllocKind::STRING kind. */ inline void finalize(JS::GCContext* gcx); /* Gets the number of bytes that the chars take on the heap. */ size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf); bool ownsMallocedChars() const { return isLinear() && !isInline() && !isDependent() && !isExternal(); } /* Encode as many scalar values of the string as UTF-8 as can fit * into the caller-provided buffer replacing unpaired surrogates * with the REPLACEMENT CHARACTER. * * Returns the number of code units read and the number of code units * written. * * The semantics of this method match the semantics of * TextEncoder.encodeInto(). * * This function doesn't modify the representation -- rope, linear, * flat, atom, etc. -- of this string. If this string is a rope, * it also doesn't modify the representation of left or right halves * of this string, or of those halves, and so on. * * Returns mozilla::Nothing on OOM. */ mozilla::Maybe> encodeUTF8Partial( const JS::AutoRequireNoGC& nogc, mozilla::Span buffer) const; private: // To help avoid writing Spectre-unsafe code, we only allow MacroAssembler // to call the method below. friend class js::jit::MacroAssembler; static size_t offsetOfNonInlineChars() { static_assert( offsetof(JSString, d.s.u2.nonInlineCharsTwoByte) == offsetof(JSString, d.s.u2.nonInlineCharsLatin1), "nonInlineCharsTwoByte and nonInlineCharsLatin1 must have same offset"); return offsetof(JSString, d.s.u2.nonInlineCharsTwoByte); } public: static const JS::TraceKind TraceKind = JS::TraceKind::String; JS::Zone* zone() const { if (isTenured()) { // Allow permanent atoms to be accessed across zones and runtimes. if (isPermanentAtom()) { return zoneFromAnyThread(); } return asTenured().zone(); } return nurseryZone(); } void setLengthAndFlags(uint32_t len, uint32_t flags) { setHeaderLengthAndFlags(len, flags); } void setFlagBit(uint32_t flag) { setHeaderFlagBit(flag); } void clearFlagBit(uint32_t flag) { clearHeaderFlagBit(flag); } void fixupAfterMovingGC() {} js::gc::AllocKind getAllocKind() const { using js::gc::AllocKind; AllocKind kind; if (isAtom()) { if (isFatInline()) { kind = AllocKind::FAT_INLINE_ATOM; } else { kind = AllocKind::ATOM; } } else if (isFatInline()) { kind = AllocKind::FAT_INLINE_STRING; } else if (isExternal()) { kind = AllocKind::EXTERNAL_STRING; } else { kind = AllocKind::STRING; } MOZ_ASSERT_IF(isTenured(), kind == asTenured().getAllocKind()); return kind; } #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) void dump(); // Debugger-friendly stderr dump. void dump(js::GenericPrinter& out); void dumpNoNewline(js::GenericPrinter& out); void dumpCharsNoNewline(js::GenericPrinter& out); void dumpRepresentation(js::GenericPrinter& out, int indent) const; void dumpRepresentationHeader(js::GenericPrinter& out, const char* subclass) const; void dumpCharsNoQuote(js::GenericPrinter& out); template static void dumpChars(const CharT* s, size_t len, js::GenericPrinter& out); template static void dumpCharsNoQuote(const CharT* s, size_t len, js::GenericPrinter& out); bool equals(const char* s); #endif void traceChildren(JSTracer* trc); // Override base class implementation to tell GC about permanent atoms. bool isPermanentAndMayBeShared() const { return isPermanentAtom(); } static void addCellAddressToStoreBuffer(js::gc::StoreBuffer* buffer, js::gc::Cell** cellp) { buffer->putCell(reinterpret_cast(cellp)); } static void removeCellAddressFromStoreBuffer(js::gc::StoreBuffer* buffer, js::gc::Cell** cellp) { buffer->unputCell(reinterpret_cast(cellp)); } private: JSString(const JSString& other) = delete; void operator=(const JSString& other) = delete; protected: JSString() = default; }; class JSRope : public JSString { friend class js::gc::CellAllocator; template js::UniquePtr copyCharsInternal( JSContext* cx, arena_id_t destArenaId) const; enum UsingBarrier : bool { NoBarrier = false, WithIncrementalBarrier = true }; friend class JSString; JSLinearString* flatten(JSContext* maybecx); JSLinearString* flattenInternal(); template JSLinearString* flattenInternal(); template static JSLinearString* flattenInternal(JSRope* root); template static void ropeBarrierDuringFlattening(JSRope* rope); JSRope(JSString* left, JSString* right, size_t length); public: template static inline JSRope* new_( JSContext* cx, typename js::MaybeRooted::HandleType left, typename js::MaybeRooted::HandleType right, size_t length, js::gc::Heap = js::gc::Heap::Default); js::UniquePtr copyLatin1Chars( JSContext* maybecx, arena_id_t destArenaId) const; JS::UniqueTwoByteChars copyTwoByteChars(JSContext* maybecx, arena_id_t destArenaId) const; template js::UniquePtr copyChars( JSContext* maybecx, arena_id_t destArenaId) const; // Hash function specific for ropes that avoids allocating a temporary // string. There are still allocations internally so it's technically // fallible. // // Returns the same value as if this were a linear string being hashed. [[nodiscard]] bool hash(uint32_t* outhHash) const; // The process of flattening a rope temporarily overwrites the left pointer of // interior nodes in the rope DAG with the parent pointer. bool isBeingFlattened() const { return flags() & FLATTEN_MASK; } JSString* leftChild() const { MOZ_ASSERT(isRope()); MOZ_ASSERT(!isBeingFlattened()); // Flattening overwrites this field. return d.s.u2.left; } JSString* rightChild() const { MOZ_ASSERT(isRope()); return d.s.u3.right; } void traceChildren(JSTracer* trc); #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) void dumpRepresentation(js::GenericPrinter& out, int indent) const; #endif private: // To help avoid writing Spectre-unsafe code, we only allow MacroAssembler // to call the methods below. friend class js::jit::MacroAssembler; static size_t offsetOfLeft() { return offsetof(JSRope, d.s.u2.left); } static size_t offsetOfRight() { return offsetof(JSRope, d.s.u3.right); } }; static_assert(sizeof(JSRope) == sizeof(JSString), "string subclasses must be binary-compatible with JSString"); /* * There are optimized entry points for some string allocation functions. * * The meaning of suffix: * * "MaybeDeflate": for char16_t variant, characters can fit Latin1 * * "DontDeflate": for char16_t variant, characters don't fit Latin1 * * "NonStatic": characters don't match StaticStrings * * "ValidLength": length fits JSString::MAX_LENGTH */ class JSLinearString : public JSString { friend class JSString; friend class JS::AutoStableStringChars; friend class js::gc::TenuringTracer; friend class js::gc::CellAllocator; /* Vacuous and therefore unimplemented. */ JSLinearString* ensureLinear(JSContext* cx) = delete; bool isLinear() const = delete; JSLinearString& asLinear() const = delete; JSLinearString(const char16_t* chars, size_t length); JSLinearString(const JS::Latin1Char* chars, size_t length); protected: // Used to construct subclasses that do a full initialization themselves. JSLinearString() = default; /* Returns void pointer to latin1/twoByte chars, for finalizers. */ MOZ_ALWAYS_INLINE void* nonInlineCharsRaw() const { MOZ_ASSERT(!isInline()); static_assert( offsetof(JSLinearString, d.s.u2.nonInlineCharsTwoByte) == offsetof(JSLinearString, d.s.u2.nonInlineCharsLatin1), "nonInlineCharsTwoByte and nonInlineCharsLatin1 must have same offset"); return (void*)d.s.u2.nonInlineCharsTwoByte; } MOZ_ALWAYS_INLINE const JS::Latin1Char* rawLatin1Chars() const; MOZ_ALWAYS_INLINE const char16_t* rawTwoByteChars() const; public: template static inline JSLinearString* new_( JSContext* cx, js::UniquePtr chars, size_t length, js::gc::Heap heap); template static inline JSLinearString* newValidLength( JSContext* cx, js::UniquePtr chars, size_t length, js::gc::Heap heap); // Convert a plain linear string to an extensible string. For testing. The // caller must ensure that it is a plain or extensible string already, and // that `capacity` is adequate. JSExtensibleString& makeExtensible(size_t capacity); template MOZ_ALWAYS_INLINE const CharT* nonInlineChars( const JS::AutoRequireNoGC& nogc) const; MOZ_ALWAYS_INLINE const JS::Latin1Char* nonInlineLatin1Chars( const JS::AutoRequireNoGC& nogc) const { MOZ_ASSERT(!isInline()); MOZ_ASSERT(hasLatin1Chars()); return d.s.u2.nonInlineCharsLatin1; } MOZ_ALWAYS_INLINE const char16_t* nonInlineTwoByteChars(const JS::AutoRequireNoGC& nogc) const { MOZ_ASSERT(!isInline()); MOZ_ASSERT(hasTwoByteChars()); return d.s.u2.nonInlineCharsTwoByte; } template MOZ_ALWAYS_INLINE const CharT* chars(const JS::AutoRequireNoGC& nogc) const; MOZ_ALWAYS_INLINE const JS::Latin1Char* latin1Chars(const JS::AutoRequireNoGC& nogc) const { return rawLatin1Chars(); } MOZ_ALWAYS_INLINE const char16_t* twoByteChars(const JS::AutoRequireNoGC& nogc) const { return rawTwoByteChars(); } mozilla::Range latin1Range( const JS::AutoRequireNoGC& nogc) const { MOZ_ASSERT(JSString::isLinear()); return mozilla::Range(latin1Chars(nogc), length()); } mozilla::Range twoByteRange( const JS::AutoRequireNoGC& nogc) const { MOZ_ASSERT(JSString::isLinear()); return mozilla::Range(twoByteChars(nogc), length()); } MOZ_ALWAYS_INLINE char16_t latin1OrTwoByteChar(size_t index) const { MOZ_ASSERT(JSString::isLinear()); MOZ_ASSERT(index < length()); JS::AutoCheckCannotGC nogc; return hasLatin1Chars() ? latin1Chars(nogc)[index] : twoByteChars(nogc)[index]; } bool isIndexSlow(uint32_t* indexp) const { MOZ_ASSERT(JSString::isLinear()); size_t len = length(); if (len == 0 || len > js::UINT32_CHAR_BUFFER_LENGTH) { return false; } JS::AutoCheckCannotGC nogc; if (hasLatin1Chars()) { const JS::Latin1Char* s = latin1Chars(nogc); return mozilla::IsAsciiDigit(*s) && js::CheckStringIsIndex(s, len, indexp); } const char16_t* s = twoByteChars(nogc); return mozilla::IsAsciiDigit(*s) && js::CheckStringIsIndex(s, len, indexp); } // Returns true if this string's characters store an unsigned 32-bit integer // value less than or equal to MAX_ARRAY_INDEX, initializing *indexp to that // value if so. Leading '0' isn't allowed except 0 itself. // (Thus if calling isIndex returns true, js::IndexToString(cx, *indexp) will // be a string equal to this string.) inline bool isIndex(uint32_t* indexp) const; void maybeInitializeIndexValue(uint32_t index, bool allowAtom = false) { MOZ_ASSERT(JSString::isLinear()); MOZ_ASSERT_IF(hasIndexValue(), getIndexValue() == index); MOZ_ASSERT_IF(!allowAtom, !isAtom()); if (hasIndexValue() || index > UINT16_MAX) { return; } mozilla::DebugOnly containedIndex; MOZ_ASSERT(isIndexSlow(&containedIndex)); MOZ_ASSERT(index == containedIndex); setFlagBit((index << INDEX_VALUE_SHIFT) | INDEX_VALUE_BIT); MOZ_ASSERT(getIndexValue() == index); } /* * Returns a property name represented by this string, or null on failure. * You must verify that this is not an index per isIndex before calling * this method. */ inline js::PropertyName* toPropertyName(JSContext* cx); inline void finalize(JS::GCContext* gcx); inline size_t allocSize() const; #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) void dumpRepresentationChars(js::GenericPrinter& out, int indent) const; void dumpRepresentation(js::GenericPrinter& out, int indent) const; #endif // Make a partially-initialized string safe for finalization. inline void disownCharsBecauseError(); }; static_assert(sizeof(JSLinearString) == sizeof(JSString), "string subclasses must be binary-compatible with JSString"); class JSDependentString : public JSLinearString { friend class JSString; friend class js::gc::CellAllocator; JSDependentString(JSLinearString* base, size_t start, size_t length); // For JIT string allocation. JSDependentString() = default; /* Vacuous and therefore unimplemented. */ bool isDependent() const = delete; JSDependentString& asDependent() const = delete; /* The offset of this string's chars in base->chars(). */ MOZ_ALWAYS_INLINE size_t baseOffset() const { MOZ_ASSERT(JSString::isDependent()); JS::AutoCheckCannotGC nogc; size_t offset; if (hasTwoByteChars()) { offset = twoByteChars(nogc) - base()->twoByteChars(nogc); } else { offset = latin1Chars(nogc) - base()->latin1Chars(nogc); } MOZ_ASSERT(offset < base()->length()); return offset; } public: // This may return an inline string if the chars fit rather than a dependent // string. static inline JSLinearString* new_(JSContext* cx, JSLinearString* base, size_t start, size_t length, js::gc::Heap heap); template void relocateNonInlineChars(T chars, size_t offset) { setNonInlineChars(chars + offset); } #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) void dumpRepresentation(js::GenericPrinter& out, int indent) const; #endif private: // To help avoid writing Spectre-unsafe code, we only allow MacroAssembler // to call the method below. friend class js::jit::MacroAssembler; inline static size_t offsetOfBase() { return offsetof(JSDependentString, d.s.u3.base); } }; static_assert(sizeof(JSDependentString) == sizeof(JSString), "string subclasses must be binary-compatible with JSString"); class JSExtensibleString : public JSLinearString { /* Vacuous and therefore unimplemented. */ bool isExtensible() const = delete; JSExtensibleString& asExtensible() const = delete; public: MOZ_ALWAYS_INLINE size_t capacity() const { MOZ_ASSERT(JSString::isExtensible()); return d.s.u3.capacity; } #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) void dumpRepresentation(js::GenericPrinter& out, int indent) const; #endif }; static_assert(sizeof(JSExtensibleString) == sizeof(JSString), "string subclasses must be binary-compatible with JSString"); class JSInlineString : public JSLinearString { public: MOZ_ALWAYS_INLINE const JS::Latin1Char* latin1Chars(const JS::AutoRequireNoGC& nogc) const { MOZ_ASSERT(JSString::isInline()); MOZ_ASSERT(hasLatin1Chars()); return d.inlineStorageLatin1; } MOZ_ALWAYS_INLINE const char16_t* twoByteChars(const JS::AutoRequireNoGC& nogc) const { MOZ_ASSERT(JSString::isInline()); MOZ_ASSERT(hasTwoByteChars()); return d.inlineStorageTwoByte; } template static bool lengthFits(size_t length); #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) void dumpRepresentation(js::GenericPrinter& out, int indent) const; #endif private: // To help avoid writing Spectre-unsafe code, we only allow MacroAssembler // to call the method below. friend class js::jit::MacroAssembler; static size_t offsetOfInlineStorage() { return offsetof(JSInlineString, d.inlineStorageTwoByte); } }; static_assert(sizeof(JSInlineString) == sizeof(JSString), "string subclasses must be binary-compatible with JSString"); /* * On 32-bit platforms, JSThinInlineString can store 8 Latin1 characters or 4 * TwoByte characters inline. On 64-bit platforms, these numbers are 16 and 8, * respectively. */ class JSThinInlineString : public JSInlineString { friend class js::gc::CellAllocator; // The constructors return a mutable pointer to the data, because the first // thing any creator will do is copy in the string value. This also // conveniently allows doing overload resolution on CharT. explicit JSThinInlineString(size_t length, JS::Latin1Char** chars); explicit JSThinInlineString(size_t length, char16_t** chars); // For JIT string allocation. JSThinInlineString() = default; public: static const size_t MAX_LENGTH_LATIN1 = NUM_INLINE_CHARS_LATIN1; static const size_t MAX_LENGTH_TWO_BYTE = NUM_INLINE_CHARS_TWO_BYTE; template static inline JSThinInlineString* new_(JSContext* cx, js::gc::Heap heap); template static bool lengthFits(size_t length); }; static_assert(sizeof(JSThinInlineString) == sizeof(JSString), "string subclasses must be binary-compatible with JSString"); /* * On both 32-bit and 64-bit platforms, MAX_LENGTH_TWO_BYTE is 12 and * MAX_LENGTH_LATIN1 is 24. This is deliberate, in order to minimize potential * performance differences between 32-bit and 64-bit platforms. * * There are still some differences due to NUM_INLINE_CHARS_* being different. * E.g. TwoByte strings of length 5--8 will be JSFatInlineStrings on 32-bit * platforms and JSThinInlineStrings on 64-bit platforms. But the more * significant transition from inline strings to non-inline strings occurs at * length 12 (for TwoByte strings) and 24 (Latin1 strings) on both 32-bit and * 64-bit platforms. */ class JSFatInlineString : public JSInlineString { friend class js::gc::CellAllocator; static const size_t INLINE_EXTENSION_CHARS_LATIN1 = 24 - NUM_INLINE_CHARS_LATIN1; static const size_t INLINE_EXTENSION_CHARS_TWO_BYTE = 12 - NUM_INLINE_CHARS_TWO_BYTE; // The constructors return a mutable pointer to the data, because the first // thing any creator will do is copy in the string value. This also // conveniently allows doing overload resolution on CharT. explicit JSFatInlineString(size_t length, JS::Latin1Char** chars); explicit JSFatInlineString(size_t length, char16_t** chars); // For JIT string allocation. JSFatInlineString() = default; protected: /* to fool clang into not warning this is unused */ union { char inlineStorageExtensionLatin1[INLINE_EXTENSION_CHARS_LATIN1]; char16_t inlineStorageExtensionTwoByte[INLINE_EXTENSION_CHARS_TWO_BYTE]; }; public: template static inline JSFatInlineString* new_(JSContext* cx, js::gc::Heap heap); static const size_t MAX_LENGTH_LATIN1 = JSString::NUM_INLINE_CHARS_LATIN1 + INLINE_EXTENSION_CHARS_LATIN1; static const size_t MAX_LENGTH_TWO_BYTE = JSString::NUM_INLINE_CHARS_TWO_BYTE + INLINE_EXTENSION_CHARS_TWO_BYTE; template static bool lengthFits(size_t length); // Only called by the GC for strings with the AllocKind::FAT_INLINE_STRING // kind. MOZ_ALWAYS_INLINE void finalize(JS::GCContext* gcx); }; static_assert(sizeof(JSFatInlineString) % js::gc::CellAlignBytes == 0, "fat inline strings shouldn't waste space up to the next cell " "boundary"); class JSExternalString : public JSLinearString { friend class js::gc::CellAllocator; JSExternalString(const char16_t* chars, size_t length, const JSExternalStringCallbacks* callbacks); /* Vacuous and therefore unimplemented. */ bool isExternal() const = delete; JSExternalString& asExternal() const = delete; public: static inline JSExternalString* new_( JSContext* cx, const char16_t* chars, size_t length, const JSExternalStringCallbacks* callbacks); const JSExternalStringCallbacks* callbacks() const { MOZ_ASSERT(JSString::isExternal()); return d.s.u3.externalCallbacks; } // External chars are never allocated inline or in the nursery, so we can // safely expose this without requiring an AutoCheckCannotGC argument. const char16_t* twoByteChars() const { return rawTwoByteChars(); } // Only called by the GC for strings with the AllocKind::EXTERNAL_STRING // kind. inline void finalize(JS::GCContext* gcx); #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) void dumpRepresentation(js::GenericPrinter& out, int indent) const; #endif }; static_assert(sizeof(JSExternalString) == sizeof(JSString), "string subclasses must be binary-compatible with JSString"); class JSAtom : public JSLinearString { /* Vacuous and therefore unimplemented. */ bool isAtom() const = delete; JSAtom& asAtom() const = delete; public: template static inline JSAtom* newValidLength( JSContext* cx, js::UniquePtr chars, size_t length, js::HashNumber hash); /* Returns the PropertyName for this. isIndex() must be false. */ inline js::PropertyName* asPropertyName(); MOZ_ALWAYS_INLINE bool isPermanent() const { return JSString::isPermanentAtom(); } MOZ_ALWAYS_INLINE void makePermanent() { MOZ_ASSERT(JSString::isAtom()); setFlagBit(PERMANENT_ATOM_MASK); } MOZ_ALWAYS_INLINE bool isIndex() const { MOZ_ASSERT(JSString::isAtom()); mozilla::DebugOnly index; MOZ_ASSERT(!!(flags() & ATOM_IS_INDEX_BIT) == isIndexSlow(&index)); return flags() & ATOM_IS_INDEX_BIT; } MOZ_ALWAYS_INLINE bool isIndex(uint32_t* index) const { MOZ_ASSERT(JSString::isAtom()); if (!isIndex()) { return false; } *index = hasIndexValue() ? getIndexValue() : getIndexSlow(); return true; } uint32_t getIndexSlow() const; void setIsIndex(uint32_t index) { MOZ_ASSERT(JSString::isAtom()); setFlagBit(ATOM_IS_INDEX_BIT); maybeInitializeIndexValue(index, /* allowAtom = */ true); } MOZ_ALWAYS_INLINE bool isPinned() const { return flags() & PINNED_ATOM_BIT; } void setPinned() { MOZ_ASSERT(!isPinned()); setFlagBit(PINNED_ATOM_BIT); } inline js::HashNumber hash() const; inline void initHash(js::HashNumber hash); #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) void dump(js::GenericPrinter& out); void dump(); #endif }; static_assert(sizeof(JSAtom) == sizeof(JSString), "string subclasses must be binary-compatible with JSString"); namespace js { class NormalAtom : public JSAtom { friend class gc::CellAllocator; protected: HashNumber hash_; // Inline atoms, mimicking JSThinInlineString constructors. explicit NormalAtom(size_t length, JS::Latin1Char** chars, js::HashNumber hash); explicit NormalAtom(size_t length, char16_t** chars, js::HashNumber hash); // Out of line atoms, mimicking JSLinearString constructors. NormalAtom(const char16_t* chars, size_t length, js::HashNumber hash); NormalAtom(const JS::Latin1Char* chars, size_t length, js::HashNumber hash); public: HashNumber hash() const { return hash_; } void initHash(HashNumber hash) { hash_ = hash; } static constexpr size_t offsetOfHash() { return offsetof(NormalAtom, hash_); } }; static_assert(sizeof(NormalAtom) == sizeof(JSString) + sizeof(uint64_t), "NormalAtom must have size of a string + HashNumber, " "aligned to gc::CellAlignBytes"); class FatInlineAtom : public JSAtom { friend class gc::CellAllocator; protected: // Silence Clang unused-field warning. char inlineStorage_[sizeof(JSFatInlineString) - sizeof(JSString)]; HashNumber hash_; // Mimicking JSFatInlineString constructors. explicit FatInlineAtom(size_t length, JS::Latin1Char** chars, js::HashNumber hash); explicit FatInlineAtom(size_t length, char16_t** chars, js::HashNumber hash); public: HashNumber hash() const { return hash_; } void initHash(HashNumber hash) { hash_ = hash; } inline void finalize(JS::GCContext* gcx); static constexpr size_t offsetOfHash() { return offsetof(FatInlineAtom, hash_); } }; static_assert( sizeof(FatInlineAtom) == sizeof(JSFatInlineString) + sizeof(uint64_t), "FatInlineAtom must have size of a fat inline string + HashNumber, " "aligned to gc::CellAlignBytes"); // When an algorithm does not need a string represented as a single linear // array of characters, this range utility may be used to traverse the string a // sequence of linear arrays of characters. This avoids flattening ropes. template class StringSegmentRange { // If malloc() shows up in any profiles from this vector, we can add a new // StackAllocPolicy which stashes a reusable freed-at-gc buffer in the cx. using StackVector = JS::GCVector; Rooted stack; Rooted cur; bool settle(JSString* str) { while (str->isRope()) { JSRope& rope = str->asRope(); if (!stack.append(rope.rightChild())) { return false; } str = rope.leftChild(); } cur = &str->asLinear(); return true; } public: explicit StringSegmentRange(JSContext* cx) : stack(cx, StackVector(cx)), cur(cx) {} [[nodiscard]] bool init(JSString* str) { MOZ_ASSERT(stack.empty()); return settle(str); } bool empty() const { return cur == nullptr; } JSLinearString* front() const { MOZ_ASSERT(!cur->isRope()); return cur; } [[nodiscard]] bool popFront() { MOZ_ASSERT(!empty()); if (stack.empty()) { cur = nullptr; return true; } return settle(stack.popCopy()); } }; } // namespace js inline js::HashNumber JSAtom::hash() const { if (isFatInline()) { return static_cast(this)->hash(); } return static_cast(this)->hash(); } inline void JSAtom::initHash(js::HashNumber hash) { if (isFatInline()) { return static_cast(this)->initHash(hash); } return static_cast(this)->initHash(hash); } namespace js { /* * Represents an atomized string which does not contain an index (that is, an * unsigned 32-bit value). Thus for any PropertyName propname, * ToString(ToUint32(propname)) never equals propname. * * To more concretely illustrate the utility of PropertyName, consider that it * is used to partition, in a type-safe manner, the ways to refer to a * property, as follows: * * - uint32_t indexes, * - PropertyName strings which don't encode uint32_t indexes, and * - jsspecial special properties (non-ES5 properties like object-valued * jsids, JSID_EMPTY, JSID_VOID, and maybe in the future Harmony-proposed * private names). */ class PropertyName : public JSAtom { private: /* Vacuous and therefore unimplemented. */ PropertyName* asPropertyName() = delete; }; static_assert(sizeof(PropertyName) == sizeof(JSString), "string subclasses must be binary-compatible with JSString"); static MOZ_ALWAYS_INLINE jsid NameToId(PropertyName* name) { return JS::PropertyKey::NonIntAtom(name); } using PropertyNameVector = JS::GCVector; template void CopyChars(CharT* dest, const JSLinearString& str); static inline UniqueChars StringToNewUTF8CharsZ(JSContext* cx, JSString& str) { JS::AutoCheckCannotGC nogc; JSLinearString* linear = str.ensureLinear(cx); if (!linear) { return nullptr; } return UniqueChars( linear->hasLatin1Chars() ? JS::CharsToNewUTF8CharsZ(cx, linear->latin1Range(nogc)).c_str() : JS::CharsToNewUTF8CharsZ(cx, linear->twoByteRange(nogc)).c_str()); } /** * Allocate a string with the given contents. If |allowGC == CanGC|, this may * trigger a GC. */ template extern JSLinearString* NewString(JSContext* cx, UniquePtr chars, size_t length, js::gc::Heap heap = js::gc::Heap::Default); /* Like NewString, but doesn't try to deflate to Latin1. */ template extern JSLinearString* NewStringDontDeflate( JSContext* cx, UniquePtr chars, size_t length, js::gc::Heap heap = js::gc::Heap::Default); extern JSLinearString* NewDependentString( JSContext* cx, JSString* base, size_t start, size_t length, js::gc::Heap heap = js::gc::Heap::Default); /* Take ownership of an array of Latin1Chars. */ extern JSLinearString* NewLatin1StringZ( JSContext* cx, UniqueChars chars, js::gc::Heap heap = js::gc::Heap::Default); /* Copy a counted string and GC-allocate a descriptor for it. */ template extern JSLinearString* NewStringCopyN( JSContext* cx, const CharT* s, size_t n, js::gc::Heap heap = js::gc::Heap::Default); template inline JSLinearString* NewStringCopyN( JSContext* cx, const char* s, size_t n, js::gc::Heap heap = js::gc::Heap::Default) { return NewStringCopyN(cx, reinterpret_cast(s), n, heap); } template extern JSAtom* NewAtomCopyNMaybeDeflateValidLength(JSContext* cx, const CharT* s, size_t n, js::HashNumber hash); template extern JSAtom* NewAtomCopyNDontDeflateValidLength(JSContext* cx, const CharT* s, size_t n, js::HashNumber hash); /* Copy a counted string and GC-allocate a descriptor for it. */ template inline JSLinearString* NewStringCopy( JSContext* cx, mozilla::Span s, js::gc::Heap heap = js::gc::Heap::Default) { return NewStringCopyN(cx, s.data(), s.size(), heap); } /* Copy a counted string and GC-allocate a descriptor for it. */ template inline JSLinearString* NewStringCopy( JSContext* cx, std::basic_string_view s, js::gc::Heap heap = js::gc::Heap::Default) { return NewStringCopyN(cx, s.data(), s.size(), heap); } /* Like NewStringCopyN, but doesn't try to deflate to Latin1. */ template extern JSLinearString* NewStringCopyNDontDeflate( JSContext* cx, const CharT* s, size_t n, js::gc::Heap heap = js::gc::Heap::Default); template extern JSLinearString* NewStringCopyNDontDeflateNonStaticValidLength( JSContext* cx, const CharT* s, size_t n, js::gc::Heap heap = js::gc::Heap::Default); /* Copy a C string and GC-allocate a descriptor for it. */ template inline JSLinearString* NewStringCopyZ( JSContext* cx, const char16_t* s, js::gc::Heap heap = js::gc::Heap::Default) { return NewStringCopyN(cx, s, js_strlen(s), heap); } template inline JSLinearString* NewStringCopyZ( JSContext* cx, const char* s, js::gc::Heap heap = js::gc::Heap::Default) { return NewStringCopyN(cx, s, strlen(s), heap); } extern JSLinearString* NewStringCopyUTF8N( JSContext* cx, const JS::UTF8Chars utf8, js::gc::Heap heap = js::gc::Heap::Default); inline JSLinearString* NewStringCopyUTF8Z( JSContext* cx, const JS::ConstUTF8CharsZ utf8, js::gc::Heap heap = js::gc::Heap::Default) { return NewStringCopyUTF8N( cx, JS::UTF8Chars(utf8.c_str(), strlen(utf8.c_str())), heap); } JSString* NewMaybeExternalString(JSContext* cx, const char16_t* s, size_t n, const JSExternalStringCallbacks* callbacks, bool* allocatedExternal, js::gc::Heap heap = js::gc::Heap::Default); static_assert(sizeof(HashNumber) == 4); template extern JSString* ConcatStrings( JSContext* cx, typename MaybeRooted::HandleType left, typename MaybeRooted::HandleType right, js::gc::Heap heap = js::gc::Heap::Default); /* * Test if strings are equal. The caller can call the function even if str1 * or str2 are not GC-allocated things. */ extern bool EqualStrings(JSContext* cx, JSString* str1, JSString* str2, bool* result); /* Use the infallible method instead! */ extern bool EqualStrings(JSContext* cx, JSLinearString* str1, JSLinearString* str2, bool* result) = delete; /* EqualStrings is infallible on linear strings. */ extern bool EqualStrings(const JSLinearString* str1, const JSLinearString* str2); /** * Compare two strings that are known to be the same length. * Exposed for the JITs; for ordinary uses, EqualStrings() is more sensible. * * Precondition: str1->length() == str2->length(). */ extern bool EqualChars(const JSLinearString* str1, const JSLinearString* str2); /* * Return less than, equal to, or greater than zero depending on whether * `s1[0..len1]` is less than, equal to, or greater than `s2`. */ extern int32_t CompareChars(const char16_t* s1, size_t len1, JSLinearString* s2); /* * Compare two strings, like CompareChars, but store the result in `*result`. * This flattens the strings and therefore can fail. */ extern bool CompareStrings(JSContext* cx, JSString* str1, JSString* str2, int32_t* result); /* * Compare two strings, like CompareChars. */ extern int32_t CompareStrings(const JSLinearString* str1, const JSLinearString* str2); /** * Return true if the string contains only ASCII characters. */ extern bool StringIsAscii(JSLinearString* str); /* * Return true if the string matches the given sequence of ASCII bytes. */ extern bool StringEqualsAscii(JSLinearString* str, const char* asciiBytes); /* * Return true if the string matches the given sequence of ASCII * bytes. The sequence of ASCII bytes must have length "length". The * length should not include the trailing null, if any. */ extern bool StringEqualsAscii(JSLinearString* str, const char* asciiBytes, size_t length); template bool StringEqualsLiteral(JSLinearString* str, const char (&asciiBytes)[N]) { MOZ_ASSERT(asciiBytes[N - 1] == '\0'); return StringEqualsAscii(str, asciiBytes, N - 1); } extern int StringFindPattern(JSLinearString* text, JSLinearString* pat, size_t start); /** * Return true if the string contains a pattern at |start|. * * Precondition: `text` is long enough that this might be true; * that is, it has at least `start + pat->length()` characters. */ extern bool HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start); /* * Computes |str|'s substring for the range [beginInt, beginInt + lengthInt). * Negative, overlarge, swapped, etc. |beginInt| and |lengthInt| are forbidden * and constitute API misuse. */ JSString* SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt, int32_t lengthInt); inline js::HashNumber HashStringChars(JSLinearString* str) { JS::AutoCheckCannotGC nogc; size_t len = str->length(); return str->hasLatin1Chars() ? mozilla::HashString(str->latin1Chars(nogc), len) : mozilla::HashString(str->twoByteChars(nogc), len); } /*** Conversions ************************************************************/ /* * Convert a string to a printable C string. * * Asserts if the input contains any non-ASCII characters. */ UniqueChars EncodeAscii(JSContext* cx, JSString* str); /* * Convert a string to a printable C string. */ UniqueChars EncodeLatin1(JSContext* cx, JSString* str); enum class IdToPrintableBehavior : bool { /* * Request the printable representation of an identifier. */ IdIsIdentifier, /* * Request the printable representation of a property key. */ IdIsPropertyKey }; /* * Convert a jsid to a printable C string encoded in UTF-8. */ extern UniqueChars IdToPrintableUTF8(JSContext* cx, HandleId id, IdToPrintableBehavior behavior); /* * Convert a non-string value to a string, returning null after reporting an * error, otherwise returning a new string reference. */ template extern JSString* ToStringSlow( JSContext* cx, typename MaybeRooted::HandleType arg); /* * Convert the given value to a string. This method includes an inline * fast-path for the case where the value is already a string; if the value is * known not to be a string, use ToStringSlow instead. */ template static MOZ_ALWAYS_INLINE JSString* ToString(JSContext* cx, JS::HandleValue v) { if (v.isString()) { return v.toString(); } return ToStringSlow(cx, v); } /* * This function implements E-262-3 section 9.8, toString. Convert the given * value to a string of characters appended to the given buffer. On error, the * passed buffer may have partial results appended. */ inline bool ValueToStringBuffer(JSContext* cx, const Value& v, StringBuffer& sb); } /* namespace js */ MOZ_ALWAYS_INLINE bool JSString::getChar(JSContext* cx, size_t index, char16_t* code) { MOZ_ASSERT(index < length()); /* * Optimization for one level deep ropes. * This is common for the following pattern: * * while() { * text = text.substr(0, x) + "bla" + text.substr(x) * test.charCodeAt(x + 1) * } * * Note: keep this in sync with MacroAssembler::loadStringChar and * CanAttachStringChar. */ JSString* str; if (isRope()) { JSRope* rope = &asRope(); if (uint32_t(index) < rope->leftChild()->length()) { str = rope->leftChild(); } else { str = rope->rightChild(); index -= rope->leftChild()->length(); } } else { str = this; } if (!str->ensureLinear(cx)) { return false; } *code = str->asLinear().latin1OrTwoByteChar(index); return true; } MOZ_ALWAYS_INLINE JSLinearString* JSString::ensureLinear(JSContext* cx) { return isLinear() ? &asLinear() : asRope().flatten(cx); } inline JSLinearString* JSString::base() const { MOZ_ASSERT(hasBase()); MOZ_ASSERT(!d.s.u3.base->isInline()); return d.s.u3.base; } inline JSLinearString* JSString::nurseryBaseOrRelocOverlay() const { MOZ_ASSERT(hasBase()); return d.s.u3.base; } inline bool JSString::canOwnDependentChars() const { // A string that could own the malloced chars used by another (dependent) // string. It will not have a base and must be linear and non-inline. return isLinear() && !isInline() && !hasBase(); } inline void JSString::setBase(JSLinearString* newBase) { MOZ_ASSERT(hasBase()); MOZ_ASSERT(!newBase->isInline()); d.s.u3.base = newBase; } template <> MOZ_ALWAYS_INLINE const char16_t* JSLinearString::nonInlineChars( const JS::AutoRequireNoGC& nogc) const { return nonInlineTwoByteChars(nogc); } template <> MOZ_ALWAYS_INLINE const JS::Latin1Char* JSLinearString::nonInlineChars( const JS::AutoRequireNoGC& nogc) const { return nonInlineLatin1Chars(nogc); } template <> MOZ_ALWAYS_INLINE const char16_t* JSLinearString::chars( const JS::AutoRequireNoGC& nogc) const { return rawTwoByteChars(); } template <> MOZ_ALWAYS_INLINE const JS::Latin1Char* JSLinearString::chars( const JS::AutoRequireNoGC& nogc) const { return rawLatin1Chars(); } template <> MOZ_ALWAYS_INLINE js::UniquePtr JSRope::copyChars(JSContext* maybecx, arena_id_t destArenaId) const { return copyLatin1Chars(maybecx, destArenaId); } template <> MOZ_ALWAYS_INLINE JS::UniqueTwoByteChars JSRope::copyChars( JSContext* maybecx, arena_id_t destArenaId) const { return copyTwoByteChars(maybecx, destArenaId); } template <> MOZ_ALWAYS_INLINE bool JSThinInlineString::lengthFits( size_t length) { return length <= MAX_LENGTH_LATIN1; } template <> MOZ_ALWAYS_INLINE bool JSThinInlineString::lengthFits(size_t length) { return length <= MAX_LENGTH_TWO_BYTE; } template <> MOZ_ALWAYS_INLINE bool JSFatInlineString::lengthFits( size_t length) { static_assert( (INLINE_EXTENSION_CHARS_LATIN1 * sizeof(char)) % js::gc::CellAlignBytes == 0, "fat inline strings' Latin1 characters don't exactly " "fill subsequent cells and thus are wasteful"); static_assert(MAX_LENGTH_LATIN1 == (sizeof(JSFatInlineString) - offsetof(JSFatInlineString, d.inlineStorageLatin1)) / sizeof(char), "MAX_LENGTH_LATIN1 must be one less than inline Latin1 " "storage count"); return length <= MAX_LENGTH_LATIN1; } template <> MOZ_ALWAYS_INLINE bool JSFatInlineString::lengthFits(size_t length) { static_assert((INLINE_EXTENSION_CHARS_TWO_BYTE * sizeof(char16_t)) % js::gc::CellAlignBytes == 0, "fat inline strings' char16_t characters don't exactly " "fill subsequent cells and thus are wasteful"); static_assert(MAX_LENGTH_TWO_BYTE == (sizeof(JSFatInlineString) - offsetof(JSFatInlineString, d.inlineStorageTwoByte)) / sizeof(char16_t), "MAX_LENGTH_TWO_BYTE must be one less than inline " "char16_t storage count"); return length <= MAX_LENGTH_TWO_BYTE; } template <> MOZ_ALWAYS_INLINE bool JSInlineString::lengthFits( size_t length) { // If it fits in a fat inline string, it fits in any inline string. return JSFatInlineString::lengthFits(length); } template <> MOZ_ALWAYS_INLINE bool JSInlineString::lengthFits(size_t length) { // If it fits in a fat inline string, it fits in any inline string. return JSFatInlineString::lengthFits(length); } template <> MOZ_ALWAYS_INLINE void JSString::setNonInlineChars(const char16_t* chars) { // Check that the new buffer is located in the StringBufferArena checkStringCharsArena(chars); d.s.u2.nonInlineCharsTwoByte = chars; } template <> MOZ_ALWAYS_INLINE void JSString::setNonInlineChars( const JS::Latin1Char* chars) { // Check that the new buffer is located in the StringBufferArena checkStringCharsArena(chars); d.s.u2.nonInlineCharsLatin1 = chars; } MOZ_ALWAYS_INLINE const JS::Latin1Char* JSLinearString::rawLatin1Chars() const { MOZ_ASSERT(JSString::isLinear()); MOZ_ASSERT(hasLatin1Chars()); return isInline() ? d.inlineStorageLatin1 : d.s.u2.nonInlineCharsLatin1; } MOZ_ALWAYS_INLINE const char16_t* JSLinearString::rawTwoByteChars() const { MOZ_ASSERT(JSString::isLinear()); MOZ_ASSERT(hasTwoByteChars()); return isInline() ? d.inlineStorageTwoByte : d.s.u2.nonInlineCharsTwoByte; } inline js::PropertyName* JSAtom::asPropertyName() { MOZ_ASSERT(!isIndex()); return static_cast(this); } inline bool JSLinearString::isIndex(uint32_t* indexp) const { MOZ_ASSERT(JSString::isLinear()); if (isAtom()) { return asAtom().isIndex(indexp); } if (JSString::hasIndexValue()) { *indexp = getIndexValue(); return true; } return isIndexSlow(indexp); } inline size_t JSLinearString::allocSize() const { MOZ_ASSERT(ownsMallocedChars()); size_t charSize = hasLatin1Chars() ? sizeof(JS::Latin1Char) : sizeof(char16_t); size_t count = isExtensible() ? asExtensible().capacity() : length(); return count * charSize; } inline size_t JSString::allocSize() const { return ownsMallocedChars() ? asLinear().allocSize() : 0; } namespace js { namespace gc { template <> inline JSString* Cell::as() { MOZ_ASSERT(is()); return reinterpret_cast(this); } template <> inline JSString* TenuredCell::as() { MOZ_ASSERT(is()); return reinterpret_cast(this); } // StringRelocationOverlay assists with updating the string chars // pointers of dependent strings when their base strings are // deduplicated. It stores: // - nursery chars of a root base (root base is a non-dependent base), or // - nursery base of a dependent string // StringRelocationOverlay exploits the fact that the 3rd word of a JSString's // RelocationOverlay is not utilized and can be used to store extra information. class StringRelocationOverlay : public RelocationOverlay { union { // nursery chars of a root base const JS::Latin1Char* nurseryCharsLatin1; const char16_t* nurseryCharsTwoByte; // The nursery base can be forwarded, which becomes a string relocation // overlay, or it is not yet forwarded and is simply the base. JSLinearString* nurseryBaseOrRelocOverlay; }; public: explicit StringRelocationOverlay(Cell* dst) : RelocationOverlay(dst) { static_assert(sizeof(JSString) >= sizeof(StringRelocationOverlay)); } static const StringRelocationOverlay* fromCell(const Cell* cell) { return static_cast(cell); } static StringRelocationOverlay* fromCell(Cell* cell) { return static_cast(cell); } void setNext(StringRelocationOverlay* next) { MOZ_ASSERT(isForwarded()); next_ = next; } StringRelocationOverlay* next() const { MOZ_ASSERT(isForwarded()); return (StringRelocationOverlay*)next_; } template MOZ_ALWAYS_INLINE const CharT* savedNurseryChars() const; const MOZ_ALWAYS_INLINE JS::Latin1Char* savedNurseryCharsLatin1() const { return nurseryCharsLatin1; } const MOZ_ALWAYS_INLINE char16_t* savedNurseryCharsTwoByte() const { return nurseryCharsTwoByte; } JSLinearString* savedNurseryBaseOrRelocOverlay() const { return nurseryBaseOrRelocOverlay; } // Transform a nursery string to a StringRelocationOverlay that is forwarded // to a tenured string. inline static StringRelocationOverlay* forwardCell(JSString* src, Cell* dst) { MOZ_ASSERT(!src->isForwarded()); MOZ_ASSERT(!dst->isForwarded()); JS::AutoCheckCannotGC nogc; StringRelocationOverlay* overlay; // Initialize the overlay, and remember the nursery base string if there is // one, or nursery non-inlined chars if it can be the root base of other // strings. // // The non-inlined chars of a tenured dependent string should point to the // tenured root base's one with an offset. For example, a dependent string // may start from the 3rd char of its root base. During tenuring, offsets // of dependent strings can be computed from the nursery non-inlined chars // remembered in overlays. if (src->hasBase()) { auto nurseryBaseOrRelocOverlay = src->nurseryBaseOrRelocOverlay(); overlay = new (src) StringRelocationOverlay(dst); overlay->nurseryBaseOrRelocOverlay = nurseryBaseOrRelocOverlay; } else if (src->canOwnDependentChars()) { if (src->hasTwoByteChars()) { auto nurseryCharsTwoByte = src->asLinear().twoByteChars(nogc); overlay = new (src) StringRelocationOverlay(dst); overlay->nurseryCharsTwoByte = nurseryCharsTwoByte; } else { auto nurseryCharsLatin1 = src->asLinear().latin1Chars(nogc); overlay = new (src) StringRelocationOverlay(dst); overlay->nurseryCharsLatin1 = nurseryCharsLatin1; } } else { overlay = new (src) StringRelocationOverlay(dst); } return overlay; } }; template <> MOZ_ALWAYS_INLINE const JS::Latin1Char* StringRelocationOverlay::savedNurseryChars() const { return savedNurseryCharsLatin1(); } template <> MOZ_ALWAYS_INLINE const char16_t* StringRelocationOverlay::savedNurseryChars() const { return savedNurseryCharsTwoByte(); } } // namespace gc } // namespace js #endif /* vm_StringType_h */