diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /js/src/util/StringBuffer.h | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'js/src/util/StringBuffer.h')
-rw-r--r-- | js/src/util/StringBuffer.h | 507 |
1 files changed, 507 insertions, 0 deletions
diff --git a/js/src/util/StringBuffer.h b/js/src/util/StringBuffer.h new file mode 100644 index 0000000000..23c1ff61ae --- /dev/null +++ b/js/src/util/StringBuffer.h @@ -0,0 +1,507 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef util_StringBuffer_h +#define util_StringBuffer_h + +#include "mozilla/CheckedInt.h" +#include "mozilla/MaybeOneOf.h" +#include "mozilla/Utf8.h" + +#include "frontend/FrontendContext.h" +#include "js/Vector.h" +#include "vm/StringType.h" + +namespace js { + +class FrontendContext; + +namespace frontend { +class ParserAtomsTable; +class TaggedParserAtomIndex; +} // namespace frontend + +namespace detail { + +// GrowEltsAggressively will multiply the space by a factor of 8 on overflow, to +// avoid very expensive memcpys for large strings (eg giant toJSON output for +// sessionstore.js). Drop back to the normal expansion policy once the buffer +// hits 128MB. +static constexpr size_t AggressiveLimit = 128 << 20; + +template <size_t EltSize> +inline size_t GrowEltsAggressively(size_t aOldElts, size_t aIncr) { + mozilla::CheckedInt<size_t> required = + mozilla::CheckedInt<size_t>(aOldElts) + aIncr; + if (!(required * 2).isValid()) { + return 0; + } + required = mozilla::RoundUpPow2(required.value()); + required *= 8; + if (!(required * EltSize).isValid() || required.value() > AggressiveLimit) { + // Fall back to doubling behavior if the aggressive growth fails or gets too + // big. + return mozilla::detail::GrowEltsByDoubling<EltSize>(aOldElts, aIncr); + } + return required.value(); +}; + +} // namespace detail + +class StringBufferAllocPolicy { + TempAllocPolicy impl_; + const arena_id_t& arenaId_; + + public: + StringBufferAllocPolicy(FrontendContext* fc, const arena_id_t& arenaId) + : impl_(fc), arenaId_(arenaId) {} + + StringBufferAllocPolicy(JSContext* cx, const arena_id_t& arenaId) + : impl_(cx), arenaId_(arenaId) {} + + template <typename T> + T* maybe_pod_malloc(size_t numElems) { + return impl_.maybe_pod_arena_malloc<T>(arenaId_, numElems); + } + template <typename T> + T* maybe_pod_calloc(size_t numElems) { + return impl_.maybe_pod_arena_calloc<T>(arenaId_, numElems); + } + template <typename T> + T* maybe_pod_realloc(T* p, size_t oldSize, size_t newSize) { + return impl_.maybe_pod_arena_realloc<T>(arenaId_, p, oldSize, newSize); + } + template <typename T> + T* pod_malloc(size_t numElems) { + return impl_.pod_arena_malloc<T>(arenaId_, numElems); + } + template <typename T> + T* pod_calloc(size_t numElems) { + return impl_.pod_arena_calloc<T>(arenaId_, numElems); + } + template <typename T> + T* pod_realloc(T* p, size_t oldSize, size_t newSize) { + return impl_.pod_arena_realloc<T>(arenaId_, p, oldSize, newSize); + } + template <typename T> + void free_(T* p, size_t numElems = 0) { + impl_.free_(p, numElems); + } + void reportAllocOverflow() const { impl_.reportAllocOverflow(); } + bool checkSimulatedOOM() const { return impl_.checkSimulatedOOM(); } + + // See ComputeGrowth in mfbt/Vector.h. + template <size_t EltSize> + static size_t computeGrowth(size_t aOldElts, size_t aIncr) { + return detail::GrowEltsAggressively<EltSize>(aOldElts, aIncr); + } +}; + +/* + * String builder that eagerly checks for over-allocation past the maximum + * string length. + * + * Any operation which would exceed the maximum string length causes an + * exception report on the context and results in a failed return value. + * + * Well-sized extractions (which waste no more than 1/4 of their char + * buffer space) are guaranteed for strings built by this interface. + * See |extractWellSized|. + */ +class StringBuffer { + protected: + template <typename CharT> + using BufferType = Vector<CharT, 64 / sizeof(CharT), StringBufferAllocPolicy>; + + /* + * The Vector's buffer may be either stolen or copied, so we need to use + * TempAllocPolicy and account for the memory manually when stealing. + */ + using Latin1CharBuffer = BufferType<Latin1Char>; + using TwoByteCharBuffer = BufferType<char16_t>; + + JSContext* maybeCx_ = nullptr; + + /* + * If Latin1 strings are enabled, cb starts out as a Latin1CharBuffer. When + * a TwoByte char is appended, inflateChars() constructs a TwoByteCharBuffer + * and copies the Latin1 chars. + */ + mozilla::MaybeOneOf<Latin1CharBuffer, TwoByteCharBuffer> cb; + + /* Number of reserve()'d chars, see inflateChars. */ + size_t reserved_ = 0; + + StringBuffer(const StringBuffer& other) = delete; + void operator=(const StringBuffer& other) = delete; + + template <typename CharT> + MOZ_ALWAYS_INLINE bool isCharType() const { + return cb.constructed<BufferType<CharT>>(); + } + + MOZ_ALWAYS_INLINE bool isLatin1() const { return isCharType<Latin1Char>(); } + + MOZ_ALWAYS_INLINE bool isTwoByte() const { return isCharType<char16_t>(); } + + template <typename CharT> + MOZ_ALWAYS_INLINE BufferType<CharT>& chars() { + MOZ_ASSERT(isCharType<CharT>()); + return cb.ref<BufferType<CharT>>(); + } + + template <typename CharT> + MOZ_ALWAYS_INLINE const BufferType<CharT>& chars() const { + MOZ_ASSERT(isCharType<CharT>()); + return cb.ref<BufferType<CharT>>(); + } + + MOZ_ALWAYS_INLINE TwoByteCharBuffer& twoByteChars() { + return chars<char16_t>(); + } + + MOZ_ALWAYS_INLINE const TwoByteCharBuffer& twoByteChars() const { + return chars<char16_t>(); + } + + MOZ_ALWAYS_INLINE Latin1CharBuffer& latin1Chars() { + return chars<Latin1Char>(); + } + + MOZ_ALWAYS_INLINE const Latin1CharBuffer& latin1Chars() const { + return chars<Latin1Char>(); + } + + [[nodiscard]] bool inflateChars(); + + template <typename CharT> + JSLinearString* finishStringInternal(JSContext* cx, gc::Heap heap); + + public: + explicit StringBuffer(JSContext* cx, + const arena_id_t& arenaId = js::MallocArena) + : maybeCx_(cx) { + MOZ_ASSERT(cx); + cb.construct<Latin1CharBuffer>(StringBufferAllocPolicy{cx, arenaId}); + } + + // This constructor should only be used if the methods related to the + // following are not used, because they require a JSContext: + // * JSString + // * JSAtom + // * mozilla::Utf8Unit + explicit StringBuffer(FrontendContext* fc, + const arena_id_t& arenaId = js::MallocArena) { + MOZ_ASSERT(fc); + cb.construct<Latin1CharBuffer>(StringBufferAllocPolicy{fc, arenaId}); + } + + void clear() { + if (isLatin1()) { + latin1Chars().clear(); + } else { + twoByteChars().clear(); + } + } + [[nodiscard]] bool reserve(size_t len) { + if (len > reserved_) { + reserved_ = len; + } + return isLatin1() ? latin1Chars().reserve(len) + : twoByteChars().reserve(len); + } + [[nodiscard]] bool resize(size_t len) { + return isLatin1() ? latin1Chars().resize(len) : twoByteChars().resize(len); + } + [[nodiscard]] bool growByUninitialized(size_t incr) { + return isLatin1() ? latin1Chars().growByUninitialized(incr) + : twoByteChars().growByUninitialized(incr); + } + void shrinkTo(size_t newLength) { + return isLatin1() ? latin1Chars().shrinkTo(newLength) + : twoByteChars().shrinkTo(newLength); + } + bool empty() const { + return isLatin1() ? latin1Chars().empty() : twoByteChars().empty(); + } + size_t length() const { + return isLatin1() ? latin1Chars().length() : twoByteChars().length(); + } + char16_t getChar(size_t idx) const { + return isLatin1() ? latin1Chars()[idx] : twoByteChars()[idx]; + } + + [[nodiscard]] bool ensureTwoByteChars() { + return isTwoByte() || inflateChars(); + } + + [[nodiscard]] bool append(const char16_t c) { + if (isLatin1()) { + if (c <= JSString::MAX_LATIN1_CHAR) { + return latin1Chars().append(Latin1Char(c)); + } + if (!inflateChars()) { + return false; + } + } + return twoByteChars().append(c); + } + [[nodiscard]] bool append(Latin1Char c) { + return isLatin1() ? latin1Chars().append(c) : twoByteChars().append(c); + } + [[nodiscard]] bool append(char c) { return append(Latin1Char(c)); } + + [[nodiscard]] inline bool append(const char16_t* begin, const char16_t* end); + + [[nodiscard]] bool append(const char16_t* chars, size_t len) { + return append(chars, chars + len); + } + + [[nodiscard]] bool append(const Latin1Char* begin, const Latin1Char* end) { + return isLatin1() ? latin1Chars().append(begin, end) + : twoByteChars().append(begin, end); + } + [[nodiscard]] bool append(const Latin1Char* chars, size_t len) { + return append(chars, chars + len); + } + + /** + * Interpret the provided count of UTF-8 code units as UTF-8, and append + * the represented code points to this. If the code units contain invalid + * UTF-8, leave the internal buffer in a consistent but unspecified state, + * report an error, and return false. + */ + [[nodiscard]] bool append(const mozilla::Utf8Unit* units, size_t len); + + [[nodiscard]] bool append(const JS::ConstCharPtr chars, size_t len) { + return append(chars.get(), chars.get() + len); + } + [[nodiscard]] bool appendN(Latin1Char c, size_t n) { + return isLatin1() ? latin1Chars().appendN(c, n) + : twoByteChars().appendN(c, n); + } + + [[nodiscard]] inline bool append(JSString* str); + [[nodiscard]] inline bool append(JSLinearString* str); + [[nodiscard]] inline bool appendSubstring(JSString* base, size_t off, + size_t len); + [[nodiscard]] inline bool appendSubstring(JSLinearString* base, size_t off, + size_t len); + [[nodiscard]] bool append(const frontend::ParserAtomsTable& parserAtoms, + frontend::TaggedParserAtomIndex atom); + + [[nodiscard]] bool append(const char* chars, size_t len) { + return append(reinterpret_cast<const Latin1Char*>(chars), len); + } + + template <size_t ArrayLength> + [[nodiscard]] bool append(const char (&array)[ArrayLength]) { + return append(array, ArrayLength - 1); /* No trailing '\0'. */ + } + + /* Infallible variants usable when the corresponding space is reserved. */ + void infallibleAppend(Latin1Char c) { + if (isLatin1()) { + latin1Chars().infallibleAppend(c); + } else { + twoByteChars().infallibleAppend(c); + } + } + void infallibleAppend(char c) { infallibleAppend(Latin1Char(c)); } + void infallibleAppend(const Latin1Char* chars, size_t len) { + if (isLatin1()) { + latin1Chars().infallibleAppend(chars, len); + } else { + twoByteChars().infallibleAppend(chars, len); + } + } + void infallibleAppend(const char* chars, size_t len) { + infallibleAppend(reinterpret_cast<const Latin1Char*>(chars), len); + } + + void infallibleAppendSubstring(JSLinearString* base, size_t off, size_t len); + + /* + * Because inflation is fallible, these methods should only be used after + * calling ensureTwoByteChars(). + */ + void infallibleAppend(const char16_t* chars, size_t len) { + twoByteChars().infallibleAppend(chars, len); + } + void infallibleAppend(char16_t c) { twoByteChars().infallibleAppend(c); } + + bool isUnderlyingBufferLatin1() const { return isLatin1(); } + + template <typename CharT> + CharT* begin() { + return chars<CharT>().begin(); + } + + template <typename CharT> + CharT* end() { + return chars<CharT>().end(); + } + + template <typename CharT> + const CharT* begin() const { + return chars<CharT>().begin(); + } + + template <typename CharT> + const CharT* end() const { + return chars<CharT>().end(); + } + + char16_t* rawTwoByteBegin() { return begin<char16_t>(); } + char16_t* rawTwoByteEnd() { return end<char16_t>(); } + const char16_t* rawTwoByteBegin() const { return begin<char16_t>(); } + const char16_t* rawTwoByteEnd() const { return end<char16_t>(); } + + Latin1Char* rawLatin1Begin() { return begin<Latin1Char>(); } + Latin1Char* rawLatin1End() { return end<Latin1Char>(); } + const Latin1Char* rawLatin1Begin() const { return begin<Latin1Char>(); } + const Latin1Char* rawLatin1End() const { return end<Latin1Char>(); } + + /* Identical to finishString() except that an atom is created. */ + JSAtom* finishAtom(); + frontend::TaggedParserAtomIndex finishParserAtom( + frontend::ParserAtomsTable& parserAtoms, FrontendContext* fc); + + /* + * Creates a raw string from the characters in this buffer. The string is + * exactly the characters in this buffer (inflated to TwoByte), it is *not* + * null-terminated unless the last appended character was '\0'. + */ + char16_t* stealChars(); +}; + +// Like StringBuffer, but uses StringBufferArena for the characters. +class JSStringBuilder : public StringBuffer { + public: + explicit JSStringBuilder(JSContext* cx) + : StringBuffer(cx, js::StringBufferArena) {} + + /* + * Creates a string from the characters in this buffer, then (regardless + * whether string creation succeeded or failed) empties the buffer. + * + * Returns nullptr if string creation failed. + */ + JSLinearString* finishString(gc::Heap heap = gc::Heap::Default); +}; + +inline bool StringBuffer::append(const char16_t* begin, const char16_t* end) { + MOZ_ASSERT(begin <= end); + if (isLatin1()) { + while (true) { + if (begin >= end) { + return true; + } + if (*begin > JSString::MAX_LATIN1_CHAR) { + break; + } + if (!latin1Chars().append(*begin)) { + return false; + } + ++begin; + } + if (!inflateChars()) { + return false; + } + } + return twoByteChars().append(begin, end); +} + +inline bool StringBuffer::append(JSLinearString* str) { + JS::AutoCheckCannotGC nogc; + if (isLatin1()) { + if (str->hasLatin1Chars()) { + return latin1Chars().append(str->latin1Chars(nogc), str->length()); + } + if (!inflateChars()) { + return false; + } + } + return str->hasLatin1Chars() + ? twoByteChars().append(str->latin1Chars(nogc), str->length()) + : twoByteChars().append(str->twoByteChars(nogc), str->length()); +} + +inline void StringBuffer::infallibleAppendSubstring(JSLinearString* base, + size_t off, size_t len) { + MOZ_ASSERT(off + len <= base->length()); + MOZ_ASSERT_IF(base->hasTwoByteChars(), isTwoByte()); + + JS::AutoCheckCannotGC nogc; + if (base->hasLatin1Chars()) { + infallibleAppend(base->latin1Chars(nogc) + off, len); + } else { + infallibleAppend(base->twoByteChars(nogc) + off, len); + } +} + +inline bool StringBuffer::appendSubstring(JSLinearString* base, size_t off, + size_t len) { + MOZ_ASSERT(off + len <= base->length()); + + JS::AutoCheckCannotGC nogc; + if (isLatin1()) { + if (base->hasLatin1Chars()) { + return latin1Chars().append(base->latin1Chars(nogc) + off, len); + } + if (!inflateChars()) { + return false; + } + } + return base->hasLatin1Chars() + ? twoByteChars().append(base->latin1Chars(nogc) + off, len) + : twoByteChars().append(base->twoByteChars(nogc) + off, len); +} + +inline bool StringBuffer::appendSubstring(JSString* base, size_t off, + size_t len) { + MOZ_ASSERT(maybeCx_); + + JSLinearString* linear = base->ensureLinear(maybeCx_); + if (!linear) { + return false; + } + + return appendSubstring(linear, off, len); +} + +inline bool StringBuffer::append(JSString* str) { + MOZ_ASSERT(maybeCx_); + + JSLinearString* linear = str->ensureLinear(maybeCx_); + if (!linear) { + return false; + } + + return append(linear); +} + +/* ES5 9.8 ToString, appending the result to the string buffer. */ +extern bool ValueToStringBufferSlow(JSContext* cx, const Value& v, + StringBuffer& sb); + +inline bool ValueToStringBuffer(JSContext* cx, const Value& v, + StringBuffer& sb) { + if (v.isString()) { + return sb.append(v.toString()); + } + + return ValueToStringBufferSlow(cx, v, sb); +} + +/* ES5 9.8 ToString for booleans, appending the result to the string buffer. */ +inline bool BooleanToStringBuffer(bool b, StringBuffer& sb) { + return b ? sb.append("true") : sb.append("false"); +} + +} /* namespace js */ + +#endif /* util_StringBuffer_h */ |