/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- * vim: set ts=8 sts=2 et sw=2 tw=80: * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "vm/StringType-inl.h" #include "mozilla/DebugOnly.h" #include "mozilla/HashFunctions.h" #include "mozilla/Latin1.h" #include "mozilla/MathAlgorithms.h" #include "mozilla/MemoryReporting.h" #include "mozilla/PodOperations.h" #include "mozilla/RangedPtr.h" #include "mozilla/TextUtils.h" #include "mozilla/Utf8.h" #include "mozilla/Vector.h" #include // std::{all_of,copy_n,enable_if,is_const,move} #include // std::size #include // std::is_same, std::is_unsigned #include "jsfriendapi.h" #include "jsnum.h" #include "builtin/Boolean.h" #ifdef ENABLE_RECORD_TUPLE # include "builtin/RecordObject.h" #endif #include "frontend/BytecodeCompiler.h" #include "gc/AllocKind.h" #include "gc/MaybeRooted.h" #include "gc/Nursery.h" #include "js/CharacterEncoding.h" #include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_* #include "js/PropertyAndElement.h" // JS_DefineElement #include "js/StableStringChars.h" #include "js/UbiNode.h" #include "util/Unicode.h" #include "vm/GeckoProfiler.h" #include "vm/StaticStrings.h" #include "vm/ToSource.h" // js::ValueToSource #include "gc/Marking-inl.h" #include "vm/GeckoProfiler-inl.h" #ifdef ENABLE_RECORD_TUPLE # include "vm/RecordType.h" # include "vm/TupleType.h" #endif using namespace js; using mozilla::AsWritableChars; using mozilla::ConvertLatin1toUtf16; using mozilla::IsAsciiDigit; using mozilla::IsUtf16Latin1; using mozilla::LossyConvertUtf16toLatin1; using mozilla::PodCopy; using mozilla::RangedPtr; using mozilla::RoundUpPow2; using mozilla::Span; using JS::AutoCheckCannotGC; using JS::AutoStableStringChars; using UniqueLatin1Chars = UniquePtr; size_t JSString::sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) { // JSRope: do nothing, we'll count all children chars when we hit the leaf // strings. if (isRope()) { return 0; } MOZ_ASSERT(isLinear()); // JSDependentString: do nothing, we'll count the chars when we hit the base // string. if (isDependent()) { return 0; } // JSExternalString: Ask the embedding to tell us what's going on. if (isExternal()) { // Our callback isn't supposed to cause GC. JS::AutoSuppressGCAnalysis nogc; return asExternal().callbacks()->sizeOfBuffer(asExternal().twoByteChars(), mallocSizeOf); } // JSExtensibleString: count the full capacity, not just the used space. if (isExtensible()) { JSExtensibleString& extensible = asExtensible(); return extensible.hasLatin1Chars() ? mallocSizeOf(extensible.rawLatin1Chars()) : mallocSizeOf(extensible.rawTwoByteChars()); } // JSInlineString, JSFatInlineString [JSInlineAtom, JSFatInlineAtom]: the // chars are inline. if (isInline()) { return 0; } // Everything else: measure the space for the chars. JSLinearString& linear = asLinear(); MOZ_ASSERT(linear.ownsMallocedChars()); return linear.hasLatin1Chars() ? mallocSizeOf(linear.rawLatin1Chars()) : mallocSizeOf(linear.rawTwoByteChars()); } JS::ubi::Node::Size JS::ubi::Concrete::size( mozilla::MallocSizeOf mallocSizeOf) const { JSString& str = get(); size_t size; if (str.isAtom()) { size = str.isFatInline() ? sizeof(js::FatInlineAtom) : sizeof(js::NormalAtom); } else { size = str.isFatInline() ? sizeof(JSFatInlineString) : sizeof(JSString); } if (IsInsideNursery(&str)) { size += Nursery::nurseryCellHeaderSize(); } size += str.sizeOfExcludingThis(mallocSizeOf); return size; } const char16_t JS::ubi::Concrete::concreteTypeName[] = u"JSString"; mozilla::Maybe > JSString::encodeUTF8Partial( const JS::AutoRequireNoGC& nogc, mozilla::Span buffer) const { mozilla::Vector stack; const JSString* current = this; char16_t pendingLeadSurrogate = 0; // U+0000 means no pending lead surrogate size_t totalRead = 0; size_t totalWritten = 0; for (;;) { if (current->isRope()) { JSRope& rope = current->asRope(); if (!stack.append(rope.rightChild())) { // OOM return mozilla::Nothing(); } current = rope.leftChild(); continue; } JSLinearString& linear = current->asLinear(); if (MOZ_LIKELY(linear.hasLatin1Chars())) { if (MOZ_UNLIKELY(pendingLeadSurrogate)) { if (buffer.Length() < 3) { return mozilla::Some(std::make_tuple(totalRead, totalWritten)); } buffer[0] = '\xEF'; buffer[1] = '\xBF'; buffer[2] = '\xBD'; buffer = buffer.From(3); totalRead += 1; // pendingLeadSurrogate totalWritten += 3; pendingLeadSurrogate = 0; } auto src = mozilla::AsChars( mozilla::Span(linear.latin1Chars(nogc), linear.length())); size_t read; size_t written; std::tie(read, written) = mozilla::ConvertLatin1toUtf8Partial(src, buffer); buffer = buffer.From(written); totalRead += read; totalWritten += written; if (read < src.Length()) { return mozilla::Some(std::make_tuple(totalRead, totalWritten)); } } else { auto src = mozilla::Span(linear.twoByteChars(nogc), linear.length()); if (MOZ_UNLIKELY(pendingLeadSurrogate)) { char16_t first = 0; if (!src.IsEmpty()) { first = src[0]; } if (unicode::IsTrailSurrogate(first)) { // Got a surrogate pair if (buffer.Length() < 4) { return mozilla::Some(std::make_tuple(totalRead, totalWritten)); } uint32_t astral = unicode::UTF16Decode(pendingLeadSurrogate, first); buffer[0] = char(0b1111'0000 | (astral >> 18)); buffer[1] = char(0b1000'0000 | ((astral >> 12) & 0b11'1111)); buffer[2] = char(0b1000'0000 | ((astral >> 6) & 0b11'1111)); buffer[3] = char(0b1000'0000 | (astral & 0b11'1111)); src = src.From(1); buffer = buffer.From(4); totalRead += 2; // both pendingLeadSurrogate and first! totalWritten += 4; } else { // unpaired surrogate if (buffer.Length() < 3) { return mozilla::Some(std::make_tuple(totalRead, totalWritten)); } buffer[0] = '\xEF'; buffer[1] = '\xBF'; buffer[2] = '\xBD'; buffer = buffer.From(3); totalRead += 1; // pendingLeadSurrogate totalWritten += 3; } pendingLeadSurrogate = 0; } if (!src.IsEmpty()) { char16_t last = src[src.Length() - 1]; if (unicode::IsLeadSurrogate(last)) { src = src.To(src.Length() - 1); pendingLeadSurrogate = last; } else { MOZ_ASSERT(!pendingLeadSurrogate); } size_t read; size_t written; std::tie(read, written) = mozilla::ConvertUtf16toUtf8Partial(src, buffer); buffer = buffer.From(written); totalRead += read; totalWritten += written; if (read < src.Length()) { return mozilla::Some(std::make_tuple(totalRead, totalWritten)); } } } if (stack.empty()) { break; } current = stack.popCopy(); } if (MOZ_UNLIKELY(pendingLeadSurrogate)) { if (buffer.Length() < 3) { return mozilla::Some(std::make_tuple(totalRead, totalWritten)); } buffer[0] = '\xEF'; buffer[1] = '\xBF'; buffer[2] = '\xBD'; // No need to update buffer and pendingLeadSurrogate anymore totalRead += 1; totalWritten += 3; } return mozilla::Some(std::make_tuple(totalRead, totalWritten)); } #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) template /*static */ void JSString::dumpChars(const CharT* s, size_t n, js::GenericPrinter& out) { if (n == SIZE_MAX) { n = 0; while (s[n]) { n++; } } out.put("\""); dumpCharsNoQuote(s, n, out); out.putChar('"'); } template void JSString::dumpChars(const Latin1Char* s, size_t n, js::GenericPrinter& out); template void JSString::dumpChars(const char16_t* s, size_t n, js::GenericPrinter& out); template /*static */ void JSString::dumpCharsNoQuote(const CharT* s, size_t n, js::GenericPrinter& out) { for (size_t i = 0; i < n; i++) { char16_t c = s[i]; if (c == '\n') { out.put("\\n"); } else if (c == '\t') { out.put("\\t"); } else if (c >= 32 && c < 127) { out.putChar((char)s[i]); } else if (c <= 255) { out.printf("\\x%02x", unsigned(c)); } else { out.printf("\\u%04x", unsigned(c)); } } } template void JSString::dumpCharsNoQuote(const Latin1Char* s, size_t n, js::GenericPrinter& out); template void JSString::dumpCharsNoQuote(const char16_t* s, size_t n, js::GenericPrinter& out); void JSString::dumpCharsNoNewline(js::GenericPrinter& out) { if (JSLinearString* linear = ensureLinear(nullptr)) { AutoCheckCannotGC nogc; if (hasLatin1Chars()) { out.put("[Latin 1]"); dumpChars(linear->latin1Chars(nogc), length(), out); } else { out.put("[2 byte]"); dumpChars(linear->twoByteChars(nogc), length(), out); } } else { out.put("(oom in JSString::dumpCharsNoNewline)"); } } void JSString::dumpCharsNoQuote(js::GenericPrinter& out) { if (JSLinearString* linear = ensureLinear(nullptr)) { AutoCheckCannotGC nogc; if (hasLatin1Chars()) { dumpCharsNoQuote(linear->latin1Chars(nogc), length(), out); } else { dumpCharsNoQuote(linear->twoByteChars(nogc), length(), out); } } else { out.put("(oom in JSString::dumpCharsNoNewline)"); } } void JSString::dump() { js::Fprinter out(stderr); dump(out); } void JSString::dump(js::GenericPrinter& out) { dumpNoNewline(out); out.putChar('\n'); } void JSString::dumpNoNewline(js::GenericPrinter& out) { if (JSLinearString* linear = ensureLinear(nullptr)) { AutoCheckCannotGC nogc; if (hasLatin1Chars()) { const Latin1Char* chars = linear->latin1Chars(nogc); out.printf("JSString* (%p) = Latin1Char * (%p) = ", (void*)this, (void*)chars); dumpChars(chars, length(), out); } else { const char16_t* chars = linear->twoByteChars(nogc); out.printf("JSString* (%p) = char16_t * (%p) = ", (void*)this, (void*)chars); dumpChars(chars, length(), out); } } else { out.put("(oom in JSString::dump)"); } } void JSString::dumpRepresentation(js::GenericPrinter& out, int indent) const { if (isRope()) { asRope().dumpRepresentation(out, indent); } else if (isDependent()) { asDependent().dumpRepresentation(out, indent); } else if (isExternal()) { asExternal().dumpRepresentation(out, indent); } else if (isExtensible()) { asExtensible().dumpRepresentation(out, indent); } else if (isInline()) { asInline().dumpRepresentation(out, indent); } else if (isLinear()) { asLinear().dumpRepresentation(out, indent); } else { MOZ_CRASH("Unexpected JSString representation"); } } void JSString::dumpRepresentationHeader(js::GenericPrinter& out, const char* subclass) const { uint32_t flags = JSString::flags(); // Print the string's address as an actual C++ expression, to facilitate // copy-and-paste into a debugger. out.printf("((%s*) %p) length: %zu flags: 0x%x", subclass, this, length(), flags); if (flags & LINEAR_BIT) out.put(" LINEAR"); if (flags & DEPENDENT_BIT) out.put(" DEPENDENT"); if (flags & INLINE_CHARS_BIT) out.put(" INLINE_CHARS"); if (flags & ATOM_BIT) out.put(" ATOM"); else out.put(" (NON ATOM)"); if (isPermanentAtom()) out.put(" PERMANENT"); if (flags & LATIN1_CHARS_BIT) out.put(" LATIN1"); if (flags & INDEX_VALUE_BIT) out.printf(" INDEX_VALUE(%u)", getIndexValue()); if (!isTenured()) out.put(" NURSERY"); out.putChar('\n'); } void JSLinearString::dumpRepresentationChars(js::GenericPrinter& out, int indent) const { if (hasLatin1Chars()) { out.printf("%*schars: ((Latin1Char*) %p) ", indent, "", rawLatin1Chars()); dumpChars(rawLatin1Chars(), length(), out); } else { out.printf("%*schars: ((char16_t*) %p) ", indent, "", rawTwoByteChars()); dumpChars(rawTwoByteChars(), length(), out); } out.putChar('\n'); } bool JSString::equals(const char* s) { JSLinearString* linear = ensureLinear(nullptr); if (!linear) { // This is DEBUG-only code. fprintf(stderr, "OOM in JSString::equals!\n"); return false; } return StringEqualsAscii(linear, s); } #endif /* defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) */ JSExtensibleString& JSLinearString::makeExtensible(size_t capacity) { MOZ_ASSERT(!isDependent()); MOZ_ASSERT(!isInline()); MOZ_ASSERT(!isAtom()); MOZ_ASSERT(!isExternal()); MOZ_ASSERT(capacity >= length()); js::RemoveCellMemory(this, allocSize(), js::MemoryUse::StringContents); setLengthAndFlags(length(), flags() | EXTENSIBLE_FLAGS); d.s.u3.capacity = capacity; js::AddCellMemory(this, allocSize(), js::MemoryUse::StringContents); return asExtensible(); } template static MOZ_ALWAYS_INLINE bool AllocChars(JSString* str, size_t length, CharT** chars, size_t* capacity) { /* * Grow by 12.5% if the buffer is very large. Otherwise, round up to the * next power of 2. This is similar to what we do with arrays; see * JSObject::ensureDenseArrayElements. */ static const size_t DOUBLING_MAX = 1024 * 1024; *capacity = length > DOUBLING_MAX ? length + (length / 8) : RoundUpPow2(length); static_assert(JSString::MAX_LENGTH * sizeof(CharT) <= UINT32_MAX); *chars = str->zone()->pod_arena_malloc(js::StringBufferArena, *capacity); return *chars != nullptr; } UniqueLatin1Chars JSRope::copyLatin1Chars(JSContext* maybecx, arena_id_t destArenaId) const { return copyCharsInternal(maybecx, destArenaId); } UniqueTwoByteChars JSRope::copyTwoByteChars(JSContext* maybecx, arena_id_t destArenaId) const { return copyCharsInternal(maybecx, destArenaId); } template UniquePtr JSRope::copyCharsInternal( JSContext* maybecx, arena_id_t destArenaId) const { // Left-leaning ropes are far more common than right-leaning ropes, so // perform a non-destructive traversal of the rope, right node first, // splatting each node's characters into a contiguous buffer. size_t n = length(); UniquePtr out; if (maybecx) { out.reset(maybecx->pod_arena_malloc(destArenaId, n)); } else { out.reset(js_pod_arena_malloc(destArenaId, n)); } if (!out) { return nullptr; } Vector nodeStack; const JSString* str = this; CharT* end = out.get() + str->length(); while (true) { if (str->isRope()) { if (!nodeStack.append(str->asRope().leftChild())) { if (maybecx) { ReportOutOfMemory(maybecx); } return nullptr; } str = str->asRope().rightChild(); } else { end -= str->length(); CopyChars(end, str->asLinear()); if (nodeStack.empty()) { break; } str = nodeStack.popCopy(); } } MOZ_ASSERT(end == out.get()); return out; } template void AddStringToHash(uint32_t* hash, const CharT* chars, size_t len) { // It's tempting to use |HashString| instead of this loop, but that's // slightly different than our existing implementation for non-ropes. We // want to pretend we have a contiguous set of chars so we need to // accumulate char by char rather than generate a new hash for substring // and then accumulate that. for (size_t i = 0; i < len; i++) { *hash = mozilla::AddToHash(*hash, chars[i]); } } void AddStringToHash(uint32_t* hash, const JSString* str) { AutoCheckCannotGC nogc; const auto& s = str->asLinear(); if (s.hasLatin1Chars()) { AddStringToHash(hash, s.latin1Chars(nogc), s.length()); } else { AddStringToHash(hash, s.twoByteChars(nogc), s.length()); } } bool JSRope::hash(uint32_t* outHash) const { Vector nodeStack; const JSString* str = this; *outHash = 0; while (true) { if (str->isRope()) { if (!nodeStack.append(str->asRope().rightChild())) { return false; } str = str->asRope().leftChild(); } else { AddStringToHash(outHash, str); if (nodeStack.empty()) { break; } str = nodeStack.popCopy(); } } return true; } #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) void JSRope::dumpRepresentation(js::GenericPrinter& out, int indent) const { dumpRepresentationHeader(out, "JSRope"); indent += 2; out.printf("%*sleft: ", indent, ""); leftChild()->dumpRepresentation(out, indent); out.printf("%*sright: ", indent, ""); rightChild()->dumpRepresentation(out, indent); } #endif namespace js { template <> void CopyChars(char16_t* dest, const JSLinearString& str) { AutoCheckCannotGC nogc; if (str.hasTwoByteChars()) { PodCopy(dest, str.twoByteChars(nogc), str.length()); } else { CopyAndInflateChars(dest, str.latin1Chars(nogc), str.length()); } } template <> void CopyChars(Latin1Char* dest, const JSLinearString& str) { AutoCheckCannotGC nogc; if (str.hasLatin1Chars()) { PodCopy(dest, str.latin1Chars(nogc), str.length()); } else { /* * When we flatten a TwoByte rope, we turn child ropes (including Latin1 * ropes) into TwoByte dependent strings. If one of these strings is * also part of another Latin1 rope tree, we can have a Latin1 rope with * a TwoByte descendent and we end up here when we flatten it. Although * the chars are stored as TwoByte, we know they must be in the Latin1 * range, so we can safely deflate here. */ size_t len = str.length(); const char16_t* chars = str.twoByteChars(nogc); auto src = Span(chars, len); MOZ_ASSERT(IsUtf16Latin1(src)); LossyConvertUtf16toLatin1(src, AsWritableChars(Span(dest, len))); } } } /* namespace js */ template static constexpr uint32_t StringFlagsForCharType(uint32_t baseFlags) { if constexpr (std::is_same_v) { return baseFlags; } return baseFlags | JSString::LATIN1_CHARS_BIT; } static bool UpdateNurseryBuffersOnTransfer(js::Nursery& nursery, JSString* from, JSString* to, void* buffer, size_t size) { // Update the list of buffers associated with nursery cells when |buffer| is // moved from string |from| to string |to|, depending on whether those strings // are in the nursery or not. if (from->isTenured() && !to->isTenured()) { // Tenured leftmost child is giving its chars buffer to the // nursery-allocated root node. if (!nursery.registerMallocedBuffer(buffer, size)) { return false; } } else if (!from->isTenured() && to->isTenured()) { // Leftmost child is giving its nursery-held chars buffer to a // tenured string. nursery.removeMallocedBuffer(buffer, size); } return true; } static bool CanReuseLeftmostBuffer(JSString* leftmostChild, size_t wholeLength, bool hasTwoByteChars) { if (!leftmostChild->isExtensible()) { return false; } JSExtensibleString& str = leftmostChild->asExtensible(); return str.capacity() >= wholeLength && str.hasTwoByteChars() == hasTwoByteChars; } JSLinearString* JSRope::flatten(JSContext* maybecx) { MOZ_ASSERT_IF(maybecx, maybecx->isMainThreadContext()); mozilla::Maybe entry; if (maybecx) { entry.emplace(maybecx, "JSRope::flatten"); } JSLinearString* str = flattenInternal(); if (!str && maybecx) { ReportOutOfMemory(maybecx); } return str; } JSLinearString* JSRope::flattenInternal() { if (zone()->needsIncrementalBarrier()) { return flattenInternal(); } return flattenInternal(); } template JSLinearString* JSRope::flattenInternal() { if (hasTwoByteChars()) { return flattenInternal(this); } return flattenInternal(this); } template /* static */ JSLinearString* JSRope::flattenInternal(JSRope* root) { /* * Consider the DAG of JSRopes rooted at |root|, with non-JSRopes as * its leaves. Mutate the root JSRope into a JSExtensibleString containing * the full flattened text that the root represents, and mutate all other * JSRopes in the interior of the DAG into JSDependentStrings that refer to * this new JSExtensibleString. * * If the leftmost leaf of our DAG is a JSExtensibleString, consider * stealing its buffer for use in our new root, and transforming it into a * JSDependentString too. Do not mutate any of the other leaves. * * Perform a depth-first dag traversal, splatting each node's characters * into a contiguous buffer. Visit each rope node three times: * 1. record position in the buffer and recurse into left child; * 2. recurse into the right child; * 3. transform the node into a dependent string. * To avoid maintaining a stack, tree nodes are mutated to indicate how many * times they have been visited. Since ropes can be dags, a node may be * encountered multiple times during traversal. However, step 3 above leaves * a valid dependent string, so everything works out. * * While ropes avoid all sorts of quadratic cases with string concatenation, * they can't help when ropes are immediately flattened. One idiomatic case * that we'd like to keep linear (and has traditionally been linear in SM * and other JS engines) is: * * while (...) { * s += ... * s.flatten * } * * Two behaviors accomplish this: * * - When the leftmost non-rope in the DAG we're flattening is a * JSExtensibleString with sufficient capacity to hold the entire * flattened string, we just flatten the DAG into its buffer. Then, when * we transform the root of the DAG from a JSRope into a * JSExtensibleString, we steal that buffer, and change the victim from a * JSExtensibleString to a JSDependentString. In this case, the left-hand * side of the string never needs to be copied. * * - Otherwise, we round up the total flattened size and create a fresh * JSExtensibleString with that much capacity. If this in turn becomes the * leftmost leaf of a subsequent flatten, we will hopefully be able to * fill it, as in the case above. * * Note that, even though the code for creating JSDependentStrings avoids * creating dependents of dependents, we can create that situation here: the * JSExtensibleStrings we transform into JSDependentStrings might have * JSDependentStrings pointing to them already. Stealing the buffer doesn't * change its address, only its owning JSExtensibleString, so all chars() * pointers in the JSDependentStrings are still valid. */ const size_t wholeLength = root->length(); size_t wholeCapacity; CharT* wholeChars; AutoCheckCannotGC nogc; Nursery& nursery = root->runtimeFromMainThread()->gc.nursery(); /* Find the left most string, containing the first string. */ JSRope* leftmostRope = root; while (leftmostRope->leftChild()->isRope()) { leftmostRope = &leftmostRope->leftChild()->asRope(); } JSString* leftmostChild = leftmostRope->leftChild(); bool reuseLeftmostBuffer = CanReuseLeftmostBuffer( leftmostChild, wholeLength, std::is_same_v); if (reuseLeftmostBuffer) { JSExtensibleString& left = leftmostChild->asExtensible(); wholeCapacity = left.capacity(); wholeChars = const_cast(left.nonInlineChars(nogc)); // Nursery::registerMallocedBuffer is fallible, so attempt it first before // doing anything irreversible. if (!UpdateNurseryBuffersOnTransfer(nursery, &left, root, wholeChars, wholeCapacity * sizeof(CharT))) { return nullptr; } } else { // If we can't reuse the leftmost child's buffer, allocate a new one. if (!AllocChars(root, wholeLength, &wholeChars, &wholeCapacity)) { return nullptr; } if (!root->isTenured()) { if (!nursery.registerMallocedBuffer(wholeChars, wholeCapacity * sizeof(CharT))) { js_free(wholeChars); return nullptr; } } } JSRope* str = root; CharT* pos = wholeChars; JSRope* parent = nullptr; uint32_t parentFlag = 0; first_visit_node : { MOZ_ASSERT_IF(str != root, parent && parentFlag); MOZ_ASSERT(!str->asRope().isBeingFlattened()); ropeBarrierDuringFlattening(str); JSString& left = *str->d.s.u2.left; str->d.s.u2.parent = parent; str->setFlagBit(parentFlag); parent = nullptr; parentFlag = 0; if (left.isRope()) { /* Return to this node when 'left' done, then goto visit_right_child. */ parent = str; parentFlag = FLATTEN_VISIT_RIGHT; str = &left.asRope(); goto first_visit_node; } if (!(reuseLeftmostBuffer && pos == wholeChars)) { CopyChars(pos, left.asLinear()); } pos += left.length(); } visit_right_child : { JSString& right = *str->d.s.u3.right; if (right.isRope()) { /* Return to this node when 'right' done, then goto finish_node. */ parent = str; parentFlag = FLATTEN_FINISH_NODE; str = &right.asRope(); goto first_visit_node; } CopyChars(pos, right.asLinear()); pos += right.length(); } finish_node : { if (str == root) { goto finish_root; } MOZ_ASSERT(pos >= wholeChars); CharT* chars = pos - str->length(); JSRope* strParent = str->d.s.u2.parent; str->setNonInlineChars(chars); MOZ_ASSERT(str->asRope().isBeingFlattened()); mozilla::DebugOnly visitRight = str->flags() & FLATTEN_VISIT_RIGHT; bool finishNode = str->flags() & FLATTEN_FINISH_NODE; MOZ_ASSERT(visitRight != finishNode); // This also clears the flags related to flattening. str->setLengthAndFlags(str->length(), StringFlagsForCharType(INIT_DEPENDENT_FLAGS)); str->d.s.u3.base = reinterpret_cast(root); /* will be true on exit */ // Every interior (rope) node in the rope's tree will be visited during // the traversal and post-barriered here, so earlier additions of // dependent.base -> root pointers are handled by this barrier as well. // // The only time post-barriers need do anything is when the root is in // the nursery. Note that the root was a rope but will be an extensible // string when we return, so it will not point to any strings and need // not be barriered. if (str->isTenured() && !root->isTenured()) { root->storeBuffer()->putWholeCell(str); } str = strParent; if (finishNode) { goto finish_node; } MOZ_ASSERT(visitRight); goto visit_right_child; } finish_root: // We traversed all the way back up to the root so we're finished. MOZ_ASSERT(str == root); MOZ_ASSERT(pos == wholeChars + wholeLength); root->setLengthAndFlags(wholeLength, StringFlagsForCharType(EXTENSIBLE_FLAGS)); root->setNonInlineChars(wholeChars); root->d.s.u3.capacity = wholeCapacity; AddCellMemory(root, root->asLinear().allocSize(), MemoryUse::StringContents); if (reuseLeftmostBuffer) { // Remove memory association for left node we're about to make into a // dependent string. JSString& left = *leftmostChild; RemoveCellMemory(&left, left.allocSize(), MemoryUse::StringContents); uint32_t flags = INIT_DEPENDENT_FLAGS; if (left.inStringToAtomCache()) { flags |= IN_STRING_TO_ATOM_CACHE; } left.setLengthAndFlags(left.length(), StringFlagsForCharType(flags)); left.d.s.u3.base = &root->asLinear(); if (left.isTenured() && !root->isTenured()) { // leftmost child -> root is a tenured -> nursery edge. root->storeBuffer()->putWholeCell(&left); } } return &root->asLinear(); } template /* static */ inline void JSRope::ropeBarrierDuringFlattening(JSRope* rope) { MOZ_ASSERT(!rope->isBeingFlattened()); if constexpr (usingBarrier) { gc::PreWriteBarrierDuringFlattening(rope->leftChild()); gc::PreWriteBarrierDuringFlattening(rope->rightChild()); } } template static JSLinearString* EnsureLinear( JSContext* cx, typename MaybeRooted::HandleType string) { JSLinearString* linear = string->ensureLinear(cx); // Don't report an exception if GC is not allowed, just return nullptr. if (!linear && !allowGC) { cx->recoverFromOutOfMemory(); } return linear; } template JSString* js::ConcatStrings( JSContext* cx, typename MaybeRooted::HandleType left, typename MaybeRooted::HandleType right, gc::Heap heap) { MOZ_ASSERT_IF(!left->isAtom(), cx->isInsideCurrentZone(left)); MOZ_ASSERT_IF(!right->isAtom(), cx->isInsideCurrentZone(right)); size_t leftLen = left->length(); if (leftLen == 0) { return right; } size_t rightLen = right->length(); if (rightLen == 0) { return left; } size_t wholeLength = leftLen + rightLen; if (MOZ_UNLIKELY(wholeLength > JSString::MAX_LENGTH)) { // Don't report an exception if GC is not allowed, just return nullptr. if (allowGC) { js::ReportOversizedAllocation(cx, JSMSG_ALLOC_OVERFLOW); } return nullptr; } bool isLatin1 = left->hasLatin1Chars() && right->hasLatin1Chars(); bool canUseInline = isLatin1 ? JSInlineString::lengthFits(wholeLength) : JSInlineString::lengthFits(wholeLength); if (canUseInline) { Latin1Char* latin1Buf = nullptr; // initialize to silence GCC warning char16_t* twoByteBuf = nullptr; // initialize to silence GCC warning JSInlineString* str = isLatin1 ? AllocateInlineString(cx, wholeLength, &latin1Buf, heap) : AllocateInlineString(cx, wholeLength, &twoByteBuf, heap); if (!str) { return nullptr; } AutoCheckCannotGC nogc; JSLinearString* leftLinear = EnsureLinear(cx, left); if (!leftLinear) { return nullptr; } JSLinearString* rightLinear = EnsureLinear(cx, right); if (!rightLinear) { return nullptr; } if (isLatin1) { PodCopy(latin1Buf, leftLinear->latin1Chars(nogc), leftLen); PodCopy(latin1Buf + leftLen, rightLinear->latin1Chars(nogc), rightLen); } else { if (leftLinear->hasTwoByteChars()) { PodCopy(twoByteBuf, leftLinear->twoByteChars(nogc), leftLen); } else { CopyAndInflateChars(twoByteBuf, leftLinear->latin1Chars(nogc), leftLen); } if (rightLinear->hasTwoByteChars()) { PodCopy(twoByteBuf + leftLen, rightLinear->twoByteChars(nogc), rightLen); } else { CopyAndInflateChars(twoByteBuf + leftLen, rightLinear->latin1Chars(nogc), rightLen); } } return str; } return JSRope::new_(cx, left, right, wholeLength, heap); } template JSString* js::ConcatStrings(JSContext* cx, HandleString left, HandleString right, gc::Heap heap); template JSString* js::ConcatStrings(JSContext* cx, JSString* const& left, JSString* const& right, gc::Heap heap); /** * Copy |src[0..length]| to |dest[0..length]| when copying doesn't narrow and * therefore can't lose information. */ static inline void FillChars(char16_t* dest, const unsigned char* src, size_t length) { ConvertLatin1toUtf16(AsChars(Span(src, length)), Span(dest, length)); } static inline void FillChars(char16_t* dest, const char16_t* src, size_t length) { PodCopy(dest, src, length); } static inline void FillChars(unsigned char* dest, const unsigned char* src, size_t length) { PodCopy(dest, src, length); } /** * Copy |src[0..length]| to |dest[0..length]| when copying *does* narrow, but * the user guarantees every runtime |src[i]| value can be stored without change * of value in |dest[i]|. */ static inline void FillFromCompatible(unsigned char* dest, const char16_t* src, size_t length) { LossyConvertUtf16toLatin1(Span(src, length), AsWritableChars(Span(dest, length))); } #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) void JSDependentString::dumpRepresentation(js::GenericPrinter& out, int indent) const { dumpRepresentationHeader(out, "JSDependentString"); indent += 2; out.printf("%*soffset: %zu\n", indent, "", baseOffset()); out.printf("%*sbase: ", indent, ""); base()->dumpRepresentation(out, indent); } #endif bool js::EqualChars(const JSLinearString* str1, const JSLinearString* str2) { MOZ_ASSERT(str1->length() == str2->length()); size_t len = str1->length(); AutoCheckCannotGC nogc; if (str1->hasTwoByteChars()) { if (str2->hasTwoByteChars()) { return EqualChars(str1->twoByteChars(nogc), str2->twoByteChars(nogc), len); } return EqualChars(str2->latin1Chars(nogc), str1->twoByteChars(nogc), len); } if (str2->hasLatin1Chars()) { return EqualChars(str1->latin1Chars(nogc), str2->latin1Chars(nogc), len); } return EqualChars(str1->latin1Chars(nogc), str2->twoByteChars(nogc), len); } bool js::HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start) { MOZ_ASSERT(start + pat->length() <= text->length()); size_t patLen = pat->length(); AutoCheckCannotGC nogc; if (text->hasLatin1Chars()) { const Latin1Char* textChars = text->latin1Chars(nogc) + start; if (pat->hasLatin1Chars()) { return EqualChars(textChars, pat->latin1Chars(nogc), patLen); } return EqualChars(textChars, pat->twoByteChars(nogc), patLen); } const char16_t* textChars = text->twoByteChars(nogc) + start; if (pat->hasTwoByteChars()) { return EqualChars(textChars, pat->twoByteChars(nogc), patLen); } return EqualChars(pat->latin1Chars(nogc), textChars, patLen); } bool js::EqualStrings(JSContext* cx, JSString* str1, JSString* str2, bool* result) { if (str1 == str2) { *result = true; return true; } size_t length1 = str1->length(); if (length1 != str2->length()) { *result = false; return true; } JSLinearString* linear1 = str1->ensureLinear(cx); if (!linear1) { return false; } JSLinearString* linear2 = str2->ensureLinear(cx); if (!linear2) { return false; } *result = EqualChars(linear1, linear2); return true; } bool js::EqualStrings(const JSLinearString* str1, const JSLinearString* str2) { if (str1 == str2) { return true; } size_t length1 = str1->length(); if (length1 != str2->length()) { return false; } return EqualChars(str1, str2); } int32_t js::CompareChars(const char16_t* s1, size_t len1, JSLinearString* s2) { AutoCheckCannotGC nogc; return s2->hasLatin1Chars() ? CompareChars(s1, len1, s2->latin1Chars(nogc), s2->length()) : CompareChars(s1, len1, s2->twoByteChars(nogc), s2->length()); } static int32_t CompareStringsImpl(const JSLinearString* str1, const JSLinearString* str2) { size_t len1 = str1->length(); size_t len2 = str2->length(); AutoCheckCannotGC nogc; if (str1->hasLatin1Chars()) { const Latin1Char* chars1 = str1->latin1Chars(nogc); return str2->hasLatin1Chars() ? CompareChars(chars1, len1, str2->latin1Chars(nogc), len2) : CompareChars(chars1, len1, str2->twoByteChars(nogc), len2); } const char16_t* chars1 = str1->twoByteChars(nogc); return str2->hasLatin1Chars() ? CompareChars(chars1, len1, str2->latin1Chars(nogc), len2) : CompareChars(chars1, len1, str2->twoByteChars(nogc), len2); } bool js::CompareStrings(JSContext* cx, JSString* str1, JSString* str2, int32_t* result) { MOZ_ASSERT(str1); MOZ_ASSERT(str2); if (str1 == str2) { *result = 0; return true; } JSLinearString* linear1 = str1->ensureLinear(cx); if (!linear1) { return false; } JSLinearString* linear2 = str2->ensureLinear(cx); if (!linear2) { return false; } *result = CompareStringsImpl(linear1, linear2); return true; } int32_t js::CompareStrings(const JSLinearString* str1, const JSLinearString* str2) { MOZ_ASSERT(str1); MOZ_ASSERT(str2); if (str1 == str2) { return 0; } return CompareStringsImpl(str1, str2); } bool js::StringIsAscii(JSLinearString* str) { JS::AutoCheckCannotGC nogc; if (str->hasLatin1Chars()) { return mozilla::IsAscii( AsChars(Span(str->latin1Chars(nogc), str->length()))); } return mozilla::IsAscii(Span(str->twoByteChars(nogc), str->length())); } bool js::StringEqualsAscii(JSLinearString* str, const char* asciiBytes) { return StringEqualsAscii(str, asciiBytes, strlen(asciiBytes)); } bool js::StringEqualsAscii(JSLinearString* str, const char* asciiBytes, size_t length) { MOZ_ASSERT(JS::StringIsASCII(Span(asciiBytes, length))); if (length != str->length()) { return false; } const Latin1Char* latin1 = reinterpret_cast(asciiBytes); AutoCheckCannotGC nogc; return str->hasLatin1Chars() ? EqualChars(latin1, str->latin1Chars(nogc), length) : EqualChars(latin1, str->twoByteChars(nogc), length); } template bool js::CheckStringIsIndex(const CharT* s, size_t length, uint32_t* indexp) { MOZ_ASSERT(length > 0); MOZ_ASSERT(length <= UINT32_CHAR_BUFFER_LENGTH); MOZ_ASSERT(IsAsciiDigit(*s), "caller's fast path must have checked first char"); RangedPtr cp(s, length); const RangedPtr end(s + length, s, length); uint32_t index = AsciiDigitToNumber(*cp++); uint32_t oldIndex = 0; uint32_t c = 0; if (index != 0) { // Consume remaining characters only if the first character isn't '0'. while (cp < end && IsAsciiDigit(*cp)) { oldIndex = index; c = AsciiDigitToNumber(*cp); index = 10 * index + c; cp++; } } // It's not an integer index if there are characters after the number. if (cp != end) { return false; } // Look out for "4294967295" and larger-number strings that fit in // UINT32_CHAR_BUFFER_LENGTH: only unsigned 32-bit integers less than or equal // to MAX_ARRAY_INDEX shall pass. if (oldIndex < MAX_ARRAY_INDEX / 10 || (oldIndex == MAX_ARRAY_INDEX / 10 && c <= (MAX_ARRAY_INDEX % 10))) { MOZ_ASSERT(index <= MAX_ARRAY_INDEX); *indexp = index; return true; } return false; } template bool js::CheckStringIsIndex(const Latin1Char* s, size_t length, uint32_t* indexp); template bool js::CheckStringIsIndex(const char16_t* s, size_t length, uint32_t* indexp); template static uint32_t AtomCharsToIndex(const CharT* s, size_t length) { // Chars are known to be a valid index value (as determined by // CheckStringIsIndex) that didn't fit in the "index value" bits in the // header. MOZ_ASSERT(length > 0); MOZ_ASSERT(length <= UINT32_CHAR_BUFFER_LENGTH); RangedPtr cp(s, length); const RangedPtr end(s + length, s, length); MOZ_ASSERT(IsAsciiDigit(*cp)); uint32_t index = AsciiDigitToNumber(*cp++); MOZ_ASSERT(index != 0); while (cp < end) { MOZ_ASSERT(IsAsciiDigit(*cp)); index = 10 * index + AsciiDigitToNumber(*cp); cp++; } MOZ_ASSERT(index <= MAX_ARRAY_INDEX); return index; } uint32_t JSAtom::getIndexSlow() const { MOZ_ASSERT(isIndex()); MOZ_ASSERT(!hasIndexValue()); size_t len = length(); AutoCheckCannotGC nogc; return hasLatin1Chars() ? AtomCharsToIndex(latin1Chars(nogc), len) : AtomCharsToIndex(twoByteChars(nogc), len); } static void MarkStringAndBasesNonDeduplicatable(JSLinearString* s) { while (true) { if (!s->isTenured()) { s->setNonDeduplicatable(); } if (!s->hasBase()) { break; } s = s->base(); } } bool AutoStableStringChars::init(JSContext* cx, JSString* s) { Rooted linearString(cx, s->ensureLinear(cx)); if (!linearString) { return false; } MOZ_ASSERT(state_ == Uninitialized); // If the chars are inline then we need to copy them since they may be moved // by a compacting GC. if (baseIsInline(linearString)) { return linearString->hasTwoByteChars() ? copyTwoByteChars(cx, linearString) : copyLatin1Chars(cx, linearString); } if (linearString->hasLatin1Chars()) { state_ = Latin1; latin1Chars_ = linearString->rawLatin1Chars(); } else { state_ = TwoByte; twoByteChars_ = linearString->rawTwoByteChars(); } MarkStringAndBasesNonDeduplicatable(linearString); s_ = linearString; return true; } bool AutoStableStringChars::initTwoByte(JSContext* cx, JSString* s) { Rooted linearString(cx, s->ensureLinear(cx)); if (!linearString) { return false; } MOZ_ASSERT(state_ == Uninitialized); if (linearString->hasLatin1Chars()) { return copyAndInflateLatin1Chars(cx, linearString); } // If the chars are inline then we need to copy them since they may be moved // by a compacting GC. if (baseIsInline(linearString)) { return copyTwoByteChars(cx, linearString); } state_ = TwoByte; twoByteChars_ = linearString->rawTwoByteChars(); MarkStringAndBasesNonDeduplicatable(linearString); s_ = linearString; return true; } bool AutoStableStringChars::baseIsInline(Handle linearString) { JSString* base = linearString; while (base->isDependent()) { base = base->asDependent().base(); } return base->isInline(); } template T* AutoStableStringChars::allocOwnChars(JSContext* cx, size_t count) { static_assert( InlineCapacity >= sizeof(JS::Latin1Char) * JSFatInlineString::MAX_LENGTH_LATIN1 && InlineCapacity >= sizeof(char16_t) * JSFatInlineString::MAX_LENGTH_TWO_BYTE, "InlineCapacity too small to hold fat inline strings"); static_assert((JSString::MAX_LENGTH & mozilla::tl::MulOverflowMask::value) == 0, "Size calculation can overflow"); MOZ_ASSERT(count <= JSString::MAX_LENGTH); size_t size = sizeof(T) * count; ownChars_.emplace(cx); if (!ownChars_->resize(size)) { ownChars_.reset(); return nullptr; } return reinterpret_cast(ownChars_->begin()); } bool AutoStableStringChars::copyAndInflateLatin1Chars( JSContext* cx, Handle linearString) { char16_t* chars = allocOwnChars(cx, linearString->length()); if (!chars) { return false; } FillChars(chars, linearString->rawLatin1Chars(), linearString->length()); state_ = TwoByte; twoByteChars_ = chars; s_ = linearString; return true; } bool AutoStableStringChars::copyLatin1Chars( JSContext* cx, Handle linearString) { size_t length = linearString->length(); JS::Latin1Char* chars = allocOwnChars(cx, length); if (!chars) { return false; } FillChars(chars, linearString->rawLatin1Chars(), length); state_ = Latin1; latin1Chars_ = chars; s_ = linearString; return true; } bool AutoStableStringChars::copyTwoByteChars( JSContext* cx, Handle linearString) { size_t length = linearString->length(); char16_t* chars = allocOwnChars(cx, length); if (!chars) { return false; } FillChars(chars, linearString->rawTwoByteChars(), length); state_ = TwoByte; twoByteChars_ = chars; s_ = linearString; return true; } template <> bool JS::SourceText::initMaybeBorrowed( JSContext* cx, JS::AutoStableStringChars& linearChars) { MOZ_ASSERT(linearChars.isTwoByte(), "AutoStableStringChars must be initialized with char16_t"); const char16_t* chars = linearChars.twoByteChars(); size_t length = linearChars.length(); JS::SourceOwnership ownership = linearChars.maybeGiveOwnershipToCaller() ? JS::SourceOwnership::TakeOwnership : JS::SourceOwnership::Borrowed; return initImpl(cx, chars, length, ownership); } template <> bool JS::SourceText::initMaybeBorrowed( JS::FrontendContext* fc, JS::AutoStableStringChars& linearChars) { MOZ_ASSERT(linearChars.isTwoByte(), "AutoStableStringChars must be initialized with char16_t"); const char16_t* chars = linearChars.twoByteChars(); size_t length = linearChars.length(); JS::SourceOwnership ownership = linearChars.maybeGiveOwnershipToCaller() ? JS::SourceOwnership::TakeOwnership : JS::SourceOwnership::Borrowed; return initImpl(fc, chars, length, ownership); } #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) void JSAtom::dump(js::GenericPrinter& out) { out.printf("JSAtom* (%p) = ", (void*)this); this->JSString::dump(out); } void JSAtom::dump() { Fprinter out(stderr); dump(out); } void JSExternalString::dumpRepresentation(js::GenericPrinter& out, int indent) const { dumpRepresentationHeader(out, "JSExternalString"); indent += 2; out.printf("%*sfinalizer: ((JSExternalStringCallbacks*) %p)\n", indent, "", callbacks()); dumpRepresentationChars(out, indent); } #endif /* defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) */ JSLinearString* js::NewDependentString(JSContext* cx, JSString* baseArg, size_t start, size_t length, gc::Heap heap) { if (length == 0) { return cx->emptyString(); } JSLinearString* base = baseArg->ensureLinear(cx); if (!base) { return nullptr; } if (start == 0 && length == base->length()) { return base; } if (base->hasTwoByteChars()) { AutoCheckCannotGC nogc; const char16_t* chars = base->twoByteChars(nogc) + start; if (JSLinearString* staticStr = cx->staticStrings().lookup(chars, length)) { return staticStr; } } else { AutoCheckCannotGC nogc; const Latin1Char* chars = base->latin1Chars(nogc) + start; if (JSLinearString* staticStr = cx->staticStrings().lookup(chars, length)) { return staticStr; } } return JSDependentString::new_(cx, base, start, length, heap); } static inline bool CanStoreCharsAsLatin1(const char16_t* s, size_t length) { return IsUtf16Latin1(Span(s, length)); } template static MOZ_ALWAYS_INLINE JSInlineString* NewInlineStringDeflated( JSContext* cx, const mozilla::Range& chars, gc::Heap heap = gc::Heap::Default) { size_t len = chars.length(); Latin1Char* storage; JSInlineString* str = AllocateInlineString(cx, len, &storage, heap); if (!str) { return nullptr; } MOZ_ASSERT(CanStoreCharsAsLatin1(chars.begin().get(), len)); FillFromCompatible(storage, chars.begin().get(), len); return str; } template static JSLinearString* NewStringDeflated(JSContext* cx, const char16_t* s, size_t n, gc::Heap heap) { if (JSLinearString* str = TryEmptyOrStaticString(cx, s, n)) { return str; } if (JSInlineString::lengthFits(n)) { return NewInlineStringDeflated( cx, mozilla::Range(s, n), heap); } auto news = cx->make_pod_arena_array(js::StringBufferArena, n); if (!news) { if (!allowGC) { cx->recoverFromOutOfMemory(); } return nullptr; } MOZ_ASSERT(CanStoreCharsAsLatin1(s, n)); FillFromCompatible(news.get(), s, n); return JSLinearString::new_(cx, std::move(news), n, heap); } static MOZ_ALWAYS_INLINE JSAtom* NewInlineAtomDeflated(JSContext* cx, const char16_t* chars, size_t length, js::HashNumber hash) { Latin1Char* storage; JSAtom* str = AllocateInlineAtom(cx, length, &storage, hash); if (!str) { return nullptr; } MOZ_ASSERT(CanStoreCharsAsLatin1(chars, length)); FillFromCompatible(storage, chars, length); return str; } static JSAtom* NewAtomDeflatedValidLength(JSContext* cx, const char16_t* s, size_t n, js::HashNumber hash) { if (JSInlineString::lengthFits(n)) { return NewInlineAtomDeflated(cx, s, n, hash); } auto news = cx->make_pod_arena_array(js::StringBufferArena, n); if (!news) { cx->recoverFromOutOfMemory(); return nullptr; } MOZ_ASSERT(CanStoreCharsAsLatin1(s, n)); FillFromCompatible(news.get(), s, n); return JSAtom::newValidLength(cx, std::move(news), n, hash); } template JSLinearString* js::NewStringDontDeflate( JSContext* cx, UniquePtr chars, size_t length, gc::Heap heap) { if (JSLinearString* str = TryEmptyOrStaticString(cx, chars.get(), length)) { return str; } if (JSInlineString::lengthFits(length)) { // |chars.get()| is safe because 1) |NewInlineString| necessarily *copies*, // and 2) |chars| frees its contents only when this function returns. return NewInlineString( cx, mozilla::Range(chars.get(), length), heap); } return JSLinearString::new_(cx, std::move(chars), length, heap); } template JSLinearString* js::NewStringDontDeflate( JSContext* cx, UniqueTwoByteChars chars, size_t length, gc::Heap heap); template JSLinearString* js::NewStringDontDeflate( JSContext* cx, UniqueTwoByteChars chars, size_t length, gc::Heap heap); template JSLinearString* js::NewStringDontDeflate( JSContext* cx, UniqueLatin1Chars chars, size_t length, gc::Heap heap); template JSLinearString* js::NewStringDontDeflate(JSContext* cx, UniqueLatin1Chars chars, size_t length, gc::Heap heap); template JSLinearString* js::NewString(JSContext* cx, UniquePtr chars, size_t length, gc::Heap heap) { if constexpr (std::is_same_v) { if (CanStoreCharsAsLatin1(chars.get(), length)) { // Deflating copies from |chars.get()| and lets |chars| be freed on // return. return NewStringDeflated(cx, chars.get(), length, heap); } } return NewStringDontDeflate(cx, std::move(chars), length, heap); } template JSLinearString* js::NewString(JSContext* cx, UniqueTwoByteChars chars, size_t length, gc::Heap heap); template JSLinearString* js::NewString(JSContext* cx, UniqueTwoByteChars chars, size_t length, gc::Heap heap); template JSLinearString* js::NewString(JSContext* cx, UniqueLatin1Chars chars, size_t length, gc::Heap heap); template JSLinearString* js::NewString(JSContext* cx, UniqueLatin1Chars chars, size_t length, gc::Heap heap); namespace js { template JSLinearString* NewStringCopyNDontDeflateNonStaticValidLength(JSContext* cx, const CharT* s, size_t n, gc::Heap heap) { if (JSInlineString::lengthFits(n)) { return NewInlineString(cx, mozilla::Range(s, n), heap); } auto news = cx->make_pod_arena_array(js::StringBufferArena, n); if (!news) { if (!allowGC) { cx->recoverFromOutOfMemory(); } return nullptr; } FillChars(news.get(), s, n); return JSLinearString::newValidLength(cx, std::move(news), n, heap); } template JSLinearString* NewStringCopyNDontDeflateNonStaticValidLength( JSContext* cx, const char16_t* s, size_t n, gc::Heap heap); template JSLinearString* NewStringCopyNDontDeflateNonStaticValidLength( JSContext* cx, const Latin1Char* s, size_t n, gc::Heap heap); template JSLinearString* NewStringCopyNDontDeflate(JSContext* cx, const CharT* s, size_t n, gc::Heap heap) { if (JSLinearString* str = TryEmptyOrStaticString(cx, s, n)) { return str; } if (MOZ_UNLIKELY(!JSLinearString::validateLength(cx, n))) { return nullptr; } return NewStringCopyNDontDeflateNonStaticValidLength(cx, s, n, heap); } template JSLinearString* NewStringCopyNDontDeflate(JSContext* cx, const char16_t* s, size_t n, gc::Heap heap); template JSLinearString* NewStringCopyNDontDeflate(JSContext* cx, const char16_t* s, size_t n, gc::Heap heap); template JSLinearString* NewStringCopyNDontDeflate(JSContext* cx, const Latin1Char* s, size_t n, gc::Heap heap); template JSLinearString* NewStringCopyNDontDeflate(JSContext* cx, const Latin1Char* s, size_t n, gc::Heap heap); JSLinearString* NewLatin1StringZ(JSContext* cx, UniqueChars chars, gc::Heap heap) { size_t length = strlen(chars.get()); UniqueLatin1Chars latin1(reinterpret_cast(chars.release())); return NewString(cx, std::move(latin1), length, heap); } template JSLinearString* NewStringCopyN(JSContext* cx, const CharT* s, size_t n, gc::Heap heap) { if constexpr (std::is_same_v) { if (CanStoreCharsAsLatin1(s, n)) { return NewStringDeflated(cx, s, n, heap); } } return NewStringCopyNDontDeflate(cx, s, n, heap); } template JSLinearString* NewStringCopyN(JSContext* cx, const char16_t* s, size_t n, gc::Heap heap); template JSLinearString* NewStringCopyN(JSContext* cx, const char16_t* s, size_t n, gc::Heap heap); template JSLinearString* NewStringCopyN(JSContext* cx, const Latin1Char* s, size_t n, gc::Heap heap); template JSLinearString* NewStringCopyN(JSContext* cx, const Latin1Char* s, size_t n, gc::Heap heap); template JSAtom* NewAtomCopyNDontDeflateValidLength(JSContext* cx, const CharT* s, size_t n, js::HashNumber hash) { if constexpr (std::is_same_v) { MOZ_ASSERT(!CanStoreCharsAsLatin1(s, n)); } if (JSInlineString::lengthFits(n)) { return NewInlineAtom(cx, s, n, hash); } auto news = cx->make_pod_arena_array(js::StringBufferArena, n); if (!news) { cx->recoverFromOutOfMemory(); return nullptr; } FillChars(news.get(), s, n); return JSAtom::newValidLength(cx, std::move(news), n, hash); } template JSAtom* NewAtomCopyNDontDeflateValidLength(JSContext* cx, const char16_t* s, size_t n, js::HashNumber hash); template JSAtom* NewAtomCopyNDontDeflateValidLength(JSContext* cx, const Latin1Char* s, size_t n, js::HashNumber hash); template JSAtom* NewAtomCopyNMaybeDeflateValidLength(JSContext* cx, const CharT* s, size_t n, js::HashNumber hash) { if constexpr (std::is_same_v) { if (CanStoreCharsAsLatin1(s, n)) { return NewAtomDeflatedValidLength(cx, s, n, hash); } } return NewAtomCopyNDontDeflateValidLength(cx, s, n, hash); } template JSAtom* NewAtomCopyNMaybeDeflateValidLength(JSContext* cx, const char16_t* s, size_t n, js::HashNumber hash); template JSAtom* NewAtomCopyNMaybeDeflateValidLength(JSContext* cx, const Latin1Char* s, size_t n, js::HashNumber hash); JSLinearString* NewStringCopyUTF8N(JSContext* cx, const JS::UTF8Chars utf8, gc::Heap heap) { JS::SmallestEncoding encoding = JS::FindSmallestEncoding(utf8); if (encoding == JS::SmallestEncoding::ASCII) { return NewStringCopyN(cx, utf8.begin().get(), utf8.length(), heap); } size_t length; if (encoding == JS::SmallestEncoding::Latin1) { UniqueLatin1Chars latin1( UTF8CharsToNewLatin1CharsZ(cx, utf8, &length, js::StringBufferArena) .get()); if (!latin1) { return nullptr; } return NewString(cx, std::move(latin1), length, heap); } MOZ_ASSERT(encoding == JS::SmallestEncoding::UTF16); UniqueTwoByteChars utf16( UTF8CharsToNewTwoByteCharsZ(cx, utf8, &length, js::StringBufferArena) .get()); if (!utf16) { return nullptr; } return NewString(cx, std::move(utf16), length, heap); } MOZ_ALWAYS_INLINE JSString* ExternalStringCache::lookup(const char16_t* chars, size_t len) const { AutoCheckCannotGC nogc; for (size_t i = 0; i < NumEntries; i++) { JSString* str = entries_[i]; if (!str || str->length() != len) { continue; } const char16_t* strChars = str->asLinear().nonInlineTwoByteChars(nogc); if (chars == strChars) { // Note that we don't need an incremental barrier here or below. // The cache is purged on GC so any string we get from the cache // must have been allocated after the GC started. return str; } // Compare the chars. Don't do this for long strings as it will be // faster to allocate a new external string. static const size_t MaxLengthForCharComparison = 100; if (len <= MaxLengthForCharComparison && EqualChars(chars, strChars, len)) { return str; } } return nullptr; } MOZ_ALWAYS_INLINE void ExternalStringCache::put(JSString* str) { MOZ_ASSERT(str->isExternal()); for (size_t i = NumEntries - 1; i > 0; i--) { entries_[i] = entries_[i - 1]; } entries_[0] = str; } JSString* NewMaybeExternalString(JSContext* cx, const char16_t* s, size_t n, const JSExternalStringCallbacks* callbacks, bool* allocatedExternal, gc::Heap heap) { if (JSString* str = TryEmptyOrStaticString(cx, s, n)) { *allocatedExternal = false; return str; } if (JSThinInlineString::lengthFits(n) && CanStoreCharsAsLatin1(s, n)) { *allocatedExternal = false; return NewInlineStringDeflated( cx, mozilla::Range(s, n), heap); } ExternalStringCache& cache = cx->zone()->externalStringCache(); if (JSString* str = cache.lookup(s, n)) { *allocatedExternal = false; return str; } JSString* str = JSExternalString::new_(cx, s, n, callbacks); if (!str) { return nullptr; } *allocatedExternal = true; cache.put(str); return str; } } /* namespace js */ #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) void JSExtensibleString::dumpRepresentation(js::GenericPrinter& out, int indent) const { dumpRepresentationHeader(out, "JSExtensibleString"); indent += 2; out.printf("%*scapacity: %zu\n", indent, "", capacity()); dumpRepresentationChars(out, indent); } void JSInlineString::dumpRepresentation(js::GenericPrinter& out, int indent) const { dumpRepresentationHeader( out, isFatInline() ? "JSFatInlineString" : "JSThinInlineString"); indent += 2; dumpRepresentationChars(out, indent); } void JSLinearString::dumpRepresentation(js::GenericPrinter& out, int indent) const { dumpRepresentationHeader(out, "JSLinearString"); indent += 2; dumpRepresentationChars(out, indent); } #endif struct RepresentativeExternalString : public JSExternalStringCallbacks { void finalize(char16_t* chars) const override { // Constant chars, nothing to do. } size_t sizeOfBuffer(const char16_t* chars, mozilla::MallocSizeOf mallocSizeOf) const override { // This string's buffer is not heap-allocated, so its malloc size is 0. return 0; } }; static const RepresentativeExternalString RepresentativeExternalStringCallbacks; template static bool FillWithRepresentatives(JSContext* cx, Handle array, uint32_t* index, const CharT* chars, size_t len, size_t fatInlineMaxLength, const CheckString& check, gc::Heap heap) { auto AppendString = [&check](JSContext* cx, Handle array, uint32_t* index, HandleString s) { MOZ_ASSERT(check(s)); (void)check; // silence clang -Wunused-lambda-capture in opt builds RootedValue val(cx, StringValue(s)); return JS_DefineElement(cx, array, (*index)++, val, 0); }; MOZ_ASSERT(len > fatInlineMaxLength); // Normal atom. RootedString atom1(cx, AtomizeChars(cx, chars, len)); if (!atom1 || !AppendString(cx, array, index, atom1)) { return false; } MOZ_ASSERT(atom1->isAtom()); // Inline atom. RootedString atom2(cx, AtomizeChars(cx, chars, 2)); if (!atom2 || !AppendString(cx, array, index, atom2)) { return false; } MOZ_ASSERT(atom2->isAtom()); MOZ_ASSERT(atom2->isInline()); // Fat inline atom. RootedString atom3(cx, AtomizeChars(cx, chars, fatInlineMaxLength)); if (!atom3 || !AppendString(cx, array, index, atom3)) { return false; } MOZ_ASSERT(atom3->isAtom()); MOZ_ASSERT(atom3->isFatInline()); // Normal linear string; maybe nursery. RootedString linear1(cx, NewStringCopyN(cx, chars, len, heap)); if (!linear1 || !AppendString(cx, array, index, linear1)) { return false; } MOZ_ASSERT(linear1->isLinear()); // Inline string; maybe nursery. RootedString linear2(cx, NewStringCopyN(cx, chars, 3, heap)); if (!linear2 || !AppendString(cx, array, index, linear2)) { return false; } MOZ_ASSERT(linear2->isLinear()); MOZ_ASSERT(linear2->isInline()); // Fat inline string; maybe nursery. RootedString linear3( cx, NewStringCopyN(cx, chars, fatInlineMaxLength, heap)); if (!linear3 || !AppendString(cx, array, index, linear3)) { return false; } MOZ_ASSERT(linear3->isLinear()); MOZ_ASSERT(linear3->isFatInline()); // Rope; maybe nursery. RootedString rope(cx, ConcatStrings(cx, atom1, atom3, heap)); if (!rope || !AppendString(cx, array, index, rope)) { return false; } MOZ_ASSERT(rope->isRope()); // Dependent; maybe nursery. RootedString dep(cx, NewDependentString(cx, atom1, 0, len - 2, heap)); if (!dep || !AppendString(cx, array, index, dep)) { return false; } MOZ_ASSERT(dep->isDependent()); // Extensible; maybe nursery. RootedString temp1(cx, NewStringCopyN(cx, chars, len, heap)); if (!temp1) { return false; } RootedString extensible(cx, ConcatStrings(cx, temp1, atom3, heap)); if (!extensible || !extensible->ensureLinear(cx)) { return false; } if (!AppendString(cx, array, index, extensible)) { return false; } MOZ_ASSERT(extensible->isExtensible()); // External. Note that we currently only support TwoByte external strings. RootedString external1(cx), external2(cx); if constexpr (std::is_same_v) { external1 = JS_NewExternalString(cx, (const char16_t*)chars, len, &RepresentativeExternalStringCallbacks); if (!external1 || !AppendString(cx, array, index, external1)) { return false; } MOZ_ASSERT(external1->isExternal()); external2 = JS_NewExternalString(cx, (const char16_t*)chars, 2, &RepresentativeExternalStringCallbacks); if (!external2 || !AppendString(cx, array, index, external2)) { return false; } MOZ_ASSERT(external2->isExternal()); } // Assert the strings still have the types we expect after creating the // other strings. MOZ_ASSERT(atom1->isAtom()); MOZ_ASSERT(atom2->isAtom()); MOZ_ASSERT(atom3->isAtom()); MOZ_ASSERT(atom2->isInline()); MOZ_ASSERT(atom3->isFatInline()); MOZ_ASSERT(linear1->isLinear()); MOZ_ASSERT(linear2->isLinear()); MOZ_ASSERT(linear3->isLinear()); MOZ_ASSERT(linear2->isInline()); MOZ_ASSERT(linear3->isFatInline()); MOZ_ASSERT(rope->isRope()); MOZ_ASSERT(dep->isDependent()); MOZ_ASSERT(extensible->isExtensible()); MOZ_ASSERT_IF(external1, external1->isExternal()); MOZ_ASSERT_IF(external2, external2->isExternal()); return true; } /* static */ bool JSString::fillWithRepresentatives(JSContext* cx, Handle array) { uint32_t index = 0; auto CheckTwoByte = [](JSString* str) { return str->hasTwoByteChars(); }; auto CheckLatin1 = [](JSString* str) { return str->hasLatin1Chars(); }; static const char16_t twoByteChars[] = u"\u1234abc\0def\u5678ghijklmasdfa\0xyz0123456789"; static const Latin1Char latin1Chars[] = "abc\0defghijklmasdfa\0xyz0123456789"; // Create strings using both the default heap and forcing the tenured heap. If // nursery strings are available, this is a best effort at creating them in // the default heap case. Since nursery strings may be disabled or a GC may // occur during this process, there may be duplicate representatives in the // final list. if (!FillWithRepresentatives(cx, array, &index, twoByteChars, std::size(twoByteChars) - 1, JSFatInlineString::MAX_LENGTH_TWO_BYTE, CheckTwoByte, gc::Heap::Tenured)) { return false; } if (!FillWithRepresentatives(cx, array, &index, latin1Chars, std::size(latin1Chars) - 1, JSFatInlineString::MAX_LENGTH_LATIN1, CheckLatin1, gc::Heap::Tenured)) { return false; } if (!FillWithRepresentatives(cx, array, &index, twoByteChars, std::size(twoByteChars) - 1, JSFatInlineString::MAX_LENGTH_TWO_BYTE, CheckTwoByte, gc::Heap::Default)) { return false; } if (!FillWithRepresentatives(cx, array, &index, latin1Chars, std::size(latin1Chars) - 1, JSFatInlineString::MAX_LENGTH_LATIN1, CheckLatin1, gc::Heap::Default)) { return false; } MOZ_ASSERT(index == 40); return true; } /*** Conversions ************************************************************/ UniqueChars js::EncodeLatin1(JSContext* cx, JSString* str) { JSLinearString* linear = str->ensureLinear(cx); if (!linear) { return nullptr; } JS::AutoCheckCannotGC nogc; if (linear->hasTwoByteChars()) { JS::Latin1CharsZ chars = JS::LossyTwoByteCharsToNewLatin1CharsZ(cx, linear->twoByteRange(nogc)); return UniqueChars(chars.c_str()); } size_t len = str->length(); Latin1Char* buf = cx->pod_malloc(len + 1); if (!buf) { return nullptr; } FillChars(buf, linear->latin1Chars(nogc), len); buf[len] = '\0'; return UniqueChars(reinterpret_cast(buf)); } UniqueChars js::EncodeAscii(JSContext* cx, JSString* str) { JSLinearString* linear = str->ensureLinear(cx); if (!linear) { return nullptr; } MOZ_ASSERT(StringIsAscii(linear)); return EncodeLatin1(cx, linear); } UniqueChars js::IdToPrintableUTF8(JSContext* cx, HandleId id, IdToPrintableBehavior behavior) { // ToString() throws a TypeError, therefore require that callers // request source representation when |id| is a property key. MOZ_ASSERT_IF( behavior == IdToPrintableBehavior::IdIsIdentifier, id.isAtom() && frontend::IsIdentifierNameOrPrivateName(id.toAtom())); RootedValue v(cx, IdToValue(id)); JSString* str; if (behavior == IdToPrintableBehavior::IdIsPropertyKey) { str = ValueToSource(cx, v); } else { str = ToString(cx, v); } if (!str) { return nullptr; } return StringToNewUTF8CharsZ(cx, *str); } template JSString* js::ToStringSlow( JSContext* cx, typename MaybeRooted::HandleType arg) { /* As with ToObjectSlow, callers must verify that |arg| isn't a string. */ MOZ_ASSERT(!arg.isString()); Value v = arg; if (!v.isPrimitive()) { MOZ_ASSERT(!cx->isHelperThreadContext()); if (!allowGC) { return nullptr; } RootedValue v2(cx, v); if (!ToPrimitive(cx, JSTYPE_STRING, &v2)) { return nullptr; } v = v2; } JSString* str; if (v.isString()) { str = v.toString(); } else if (v.isInt32()) { str = Int32ToString(cx, v.toInt32()); } else if (v.isDouble()) { str = NumberToString(cx, v.toDouble()); } else if (v.isBoolean()) { str = BooleanToString(cx, v.toBoolean()); } else if (v.isNull()) { str = cx->names().null; } else if (v.isSymbol()) { MOZ_ASSERT(!cx->isHelperThreadContext()); if (allowGC) { JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_SYMBOL_TO_STRING); } return nullptr; } else if (v.isBigInt()) { if (!allowGC) { return nullptr; } RootedBigInt i(cx, v.toBigInt()); str = BigInt::toString(cx, i, 10); } #ifdef ENABLE_RECORD_TUPLE else if (v.isExtendedPrimitive()) { if (!allowGC) { return nullptr; } if (IsTuple(v)) { Rooted tup(cx, &TupleType::thisTupleValue(v)); return TupleToSource(cx, tup); } Rooted rec(cx); MOZ_ALWAYS_TRUE(RecordObject::maybeUnbox(&v.getObjectPayload(), &rec)); return RecordToSource(cx, rec); } #endif else { MOZ_ASSERT(v.isUndefined()); str = cx->names().undefined; } return str; } template JSString* js::ToStringSlow(JSContext* cx, HandleValue arg); template JSString* js::ToStringSlow(JSContext* cx, const Value& arg); JS_PUBLIC_API JSString* js::ToStringSlow(JSContext* cx, HandleValue v) { return ToStringSlow(cx, v); }