diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /js/src/vm/SharedImmutableStringsCache.h | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'js/src/vm/SharedImmutableStringsCache.h')
-rw-r--r-- | js/src/vm/SharedImmutableStringsCache.h | 425 |
1 files changed, 425 insertions, 0 deletions
diff --git a/js/src/vm/SharedImmutableStringsCache.h b/js/src/vm/SharedImmutableStringsCache.h new file mode 100644 index 0000000000..a10ff416a4 --- /dev/null +++ b/js/src/vm/SharedImmutableStringsCache.h @@ -0,0 +1,425 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef vm_SharedImmutableStringsCache_h +#define vm_SharedImmutableStringsCache_h + +#include "mozilla/Maybe.h" +#include "mozilla/UniquePtr.h" + +#include <cstring> +#include <new> // for placement new +#include <utility> // std::move + +#include "js/AllocPolicy.h" +#include "js/HashTable.h" +#include "js/UniquePtr.h" +#include "js/Utility.h" + +#include "threading/ExclusiveData.h" + +namespace js { + +class SharedImmutableString; +class SharedImmutableTwoByteString; + +/** + * The `SharedImmutableStringsCache` allows safely sharing and deduplicating + * immutable strings (either `const char*` [any encoding, not restricted to + * only Latin-1 or only UTF-8] or `const char16_t*`) between threads. + * + * The locking mechanism is dead-simple and coarse grained: a single lock guards + * all of the internal table itself, the table's entries, and the entries' + * reference counts. It is only safe to perform any mutation on the cache or any + * data stored within the cache when this lock is acquired. + */ +class SharedImmutableStringsCache { + static SharedImmutableStringsCache singleton_; + + friend class SharedImmutableString; + friend class SharedImmutableTwoByteString; + struct Hasher; + + public: + using OwnedChars = JS::UniqueChars; + using OwnedTwoByteChars = JS::UniqueTwoByteChars; + + /** + * Get the canonical, shared, and de-duplicated version of the given `const + * char*` string. If such a string does not exist, call `intoOwnedChars` and + * add the string it returns to the cache. + * + * `intoOwnedChars` must create an owned version of the given string, and + * must have one of the following types: + * + * JS::UniqueChars intoOwnedChars(); + * JS::UniqueChars&& intoOwnedChars(); + * + * It can be used by callers to elide a copy of the string when it is safe + * to give up ownership of the lookup string to the cache. It must return a + * `nullptr` on failure. + * + * On success, `Some` is returned. In the case of OOM failure, `Nothing` is + * returned. + */ + template <typename IntoOwnedChars> + [[nodiscard]] SharedImmutableString getOrCreate( + const char* chars, size_t length, IntoOwnedChars intoOwnedChars); + + /** + * Take ownership of the given `chars` and return the canonical, shared and + * de-duplicated version. + * + * On success, `Some` is returned. In the case of OOM failure, `Nothing` is + * returned. + */ + [[nodiscard]] SharedImmutableString getOrCreate(OwnedChars&& chars, + size_t length); + + /** + * Do not take ownership of the given `chars`. Return the canonical, shared + * and de-duplicated version. If there is no extant shared version of + * `chars`, make a copy and insert it into the cache. + * + * On success, `Some` is returned. In the case of OOM failure, `Nothing` is + * returned. + */ + [[nodiscard]] SharedImmutableString getOrCreate(const char* chars, + size_t length); + + /** + * Get the canonical, shared, and de-duplicated version of the given `const + * char16_t*` string. If such a string does not exist, call `intoOwnedChars` + * and add the string it returns to the cache. + * + * `intoOwnedTwoByteChars` must create an owned version of the given string, + * and must have one of the following types: + * + * JS::UniqueTwoByteChars intoOwnedTwoByteChars(); + * JS::UniqueTwoByteChars&& intoOwnedTwoByteChars(); + * + * It can be used by callers to elide a copy of the string when it is safe + * to give up ownership of the lookup string to the cache. It must return a + * `nullptr` on failure. + * + * On success, `Some` is returned. In the case of OOM failure, `Nothing` is + * returned. + */ + template <typename IntoOwnedTwoByteChars> + [[nodiscard]] SharedImmutableTwoByteString getOrCreate( + const char16_t* chars, size_t length, + IntoOwnedTwoByteChars intoOwnedTwoByteChars); + + /** + * Take ownership of the given `chars` and return the canonical, shared and + * de-duplicated version. + * + * On success, `Some` is returned. In the case of OOM failure, `Nothing` is + * returned. + */ + [[nodiscard]] SharedImmutableTwoByteString getOrCreate( + OwnedTwoByteChars&& chars, size_t length); + + /** + * Do not take ownership of the given `chars`. Return the canonical, shared + * and de-duplicated version. If there is no extant shared version of + * `chars`, then make a copy and insert it into the cache. + * + * On success, `Some` is returned. In the case of OOM failure, `Nothing` is + * returned. + */ + [[nodiscard]] SharedImmutableTwoByteString getOrCreate(const char16_t* chars, + size_t length); + + size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const { + MOZ_ASSERT(inner_); + size_t n = mallocSizeOf(inner_); + + auto locked = inner_->lock(); + + // Size of the table. + n += locked->set.shallowSizeOfExcludingThis(mallocSizeOf); + + // Sizes of the strings and their boxes. + for (auto r = locked->set.all(); !r.empty(); r.popFront()) { + n += mallocSizeOf(r.front().get()); + if (const char* chars = r.front()->chars()) { + n += mallocSizeOf(chars); + } + } + + return n; + } + + private: + bool init(); + void free(); + + public: + static bool initSingleton(); + static void freeSingleton(); + + static SharedImmutableStringsCache& getSingleton() { + MOZ_ASSERT(singleton_.inner_); + return singleton_; + } + + private: + SharedImmutableStringsCache() = default; + ~SharedImmutableStringsCache() = default; + + public: + SharedImmutableStringsCache(const SharedImmutableStringsCache& rhs) = delete; + SharedImmutableStringsCache(SharedImmutableStringsCache&& rhs) = delete; + + SharedImmutableStringsCache& operator=(SharedImmutableStringsCache&& rhs) = + delete; + + SharedImmutableStringsCache& operator=(const SharedImmutableStringsCache&) = + delete; + + /** + * Purge the cache of all refcount == 0 entries. + */ + void purge() { + auto locked = inner_->lock(); + + for (Inner::Set::Enum e(locked->set); !e.empty(); e.popFront()) { + if (e.front()->refcount == 0) { + // The chars should be eagerly freed when refcount reaches zero. + MOZ_ASSERT(!e.front()->chars()); + e.removeFront(); + } else { + // The chars should exist as long as the refcount is non-zero. + MOZ_ASSERT(e.front()->chars()); + } + } + } + + private: + struct Inner; + class StringBox { + friend class SharedImmutableString; + + OwnedChars chars_; + size_t length_; + const ExclusiveData<Inner>* cache_; + + public: + mutable size_t refcount; + + using Ptr = js::UniquePtr<StringBox>; + + StringBox(OwnedChars&& chars, size_t length, + const ExclusiveData<Inner>* cache) + : chars_(std::move(chars)), + length_(length), + cache_(cache), + refcount(0) { + MOZ_ASSERT(chars_); + } + + static Ptr Create(OwnedChars&& chars, size_t length, + const ExclusiveData<Inner>* cache) { + return js::MakeUnique<StringBox>(std::move(chars), length, cache); + } + + StringBox(const StringBox&) = delete; + StringBox& operator=(const StringBox&) = delete; + + ~StringBox() { + MOZ_RELEASE_ASSERT( + refcount == 0, + "There are `SharedImmutable[TwoByte]String`s outliving their " + "associated cache! This always leads to use-after-free in the " + "`~SharedImmutableString` destructor!"); + } + + const char* chars() const { return chars_.get(); } + size_t length() const { return length_; } + }; + + struct Hasher { + /** + * A structure used when querying for a `const char*` string in the cache. + */ + class Lookup { + friend struct Hasher; + + HashNumber hash_; + const char* chars_; + size_t length_; + + public: + Lookup(HashNumber hash, const char* chars, size_t length) + : hash_(hash), chars_(chars), length_(length) { + MOZ_ASSERT(chars_); + MOZ_ASSERT(hash == Hasher::hashLongString(chars, length)); + } + + Lookup(HashNumber hash, const char16_t* chars, size_t length) + : Lookup(hash, reinterpret_cast<const char*>(chars), + length * sizeof(char16_t)) {} + }; + + static const size_t SHORT_STRING_MAX_LENGTH = 8192; + static const size_t HASH_CHUNK_LENGTH = SHORT_STRING_MAX_LENGTH / 2; + + // For strings longer than SHORT_STRING_MAX_LENGTH, we only hash the + // first HASH_CHUNK_LENGTH and last HASH_CHUNK_LENGTH characters in the + // string. This increases the risk of collisions, but in practice it + // should be rare, and it yields a large speedup for hashing long + // strings. + static HashNumber hashLongString(const char* chars, size_t length) { + MOZ_ASSERT(chars); + return length <= SHORT_STRING_MAX_LENGTH + ? mozilla::HashString(chars, length) + : mozilla::AddToHash( + mozilla::HashString(chars, HASH_CHUNK_LENGTH), + mozilla::HashString(chars + length - HASH_CHUNK_LENGTH, + HASH_CHUNK_LENGTH)); + } + + static HashNumber hash(const Lookup& lookup) { return lookup.hash_; } + + static bool match(const StringBox::Ptr& key, const Lookup& lookup) { + MOZ_ASSERT(lookup.chars_); + + if (!key->chars() || key->length() != lookup.length_) { + return false; + } + + if (key->chars() == lookup.chars_) { + return true; + } + + return memcmp(key->chars(), lookup.chars_, key->length()) == 0; + } + }; + + // The `Inner` struct contains the actual cached contents and shared between + // the `SharedImmutableStringsCache` singleton and all + // `SharedImmutable[TwoByte]String` holders. + struct Inner { + using Set = HashSet<StringBox::Ptr, Hasher, SystemAllocPolicy>; + + Set set; + + Inner() = default; + + Inner(const Inner&) = delete; + Inner& operator=(const Inner&) = delete; + }; + + const ExclusiveData<Inner>* inner_ = nullptr; +}; + +/** + * The `SharedImmutableString` class holds a reference to a `const char*` string + * from the `SharedImmutableStringsCache` and releases the reference upon + * destruction. + */ +class SharedImmutableString { + friend class SharedImmutableStringsCache; + friend class SharedImmutableTwoByteString; + + mutable SharedImmutableStringsCache::StringBox* box_; + + explicit SharedImmutableString(SharedImmutableStringsCache::StringBox* box); + + public: + /** + * `SharedImmutableString`s are move-able. It is an error to use a + * `SharedImmutableString` after it has been moved. + */ + SharedImmutableString(SharedImmutableString&& rhs); + SharedImmutableString& operator=(SharedImmutableString&& rhs); + SharedImmutableString() { box_ = nullptr; } + + /** + * Create another shared reference to the underlying string. + */ + SharedImmutableString clone() const; + + // If you want a copy, take one explicitly with `clone`! + SharedImmutableString& operator=(const SharedImmutableString&) = delete; + explicit operator bool() const { return box_ != nullptr; } + + ~SharedImmutableString(); + + /** + * Get a raw pointer to the underlying string. It is only safe to use the + * resulting pointer while this `SharedImmutableString` exists. + */ + const char* chars() const { + MOZ_ASSERT(box_); + MOZ_ASSERT(box_->refcount > 0); + MOZ_ASSERT(box_->chars()); + return box_->chars(); + } + + /** + * Get the length of the underlying string. + */ + size_t length() const { + MOZ_ASSERT(box_); + MOZ_ASSERT(box_->refcount > 0); + MOZ_ASSERT(box_->chars()); + return box_->length(); + } +}; + +/** + * The `SharedImmutableTwoByteString` class holds a reference to a `const + * char16_t*` string from the `SharedImmutableStringsCache` and releases the + * reference upon destruction. + */ +class SharedImmutableTwoByteString { + friend class SharedImmutableStringsCache; + + // If a `char*` string and `char16_t*` string happen to have the same bytes, + // the bytes will be shared but handed out as different types. + SharedImmutableString string_; + + explicit SharedImmutableTwoByteString(SharedImmutableString&& string); + explicit SharedImmutableTwoByteString( + SharedImmutableStringsCache::StringBox* box); + + public: + /** + * `SharedImmutableTwoByteString`s are move-able. It is an error to use a + * `SharedImmutableTwoByteString` after it has been moved. + */ + SharedImmutableTwoByteString(SharedImmutableTwoByteString&& rhs); + SharedImmutableTwoByteString& operator=(SharedImmutableTwoByteString&& rhs); + SharedImmutableTwoByteString() { string_.box_ = nullptr; } + + /** + * Create another shared reference to the underlying string. + */ + SharedImmutableTwoByteString clone() const; + + // If you want a copy, take one explicitly with `clone`! + SharedImmutableTwoByteString& operator=(const SharedImmutableTwoByteString&) = + delete; + explicit operator bool() const { return string_.box_ != nullptr; } + /** + * Get a raw pointer to the underlying string. It is only safe to use the + * resulting pointer while this `SharedImmutableTwoByteString` exists. + */ + const char16_t* chars() const { + return reinterpret_cast<const char16_t*>(string_.chars()); + } + + /** + * Get the length of the underlying string. + */ + size_t length() const { return string_.length() / sizeof(char16_t); } +}; + +} // namespace js + +#endif // vm_SharedImmutableStringsCache_h |