diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /js/src/vm/SharedImmutableStringsCache.h | |
parent | Initial commit. (diff) | |
download | firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.tar.xz firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'js/src/vm/SharedImmutableStringsCache.h')
-rw-r--r-- | js/src/vm/SharedImmutableStringsCache.h | 458 |
1 files changed, 458 insertions, 0 deletions
diff --git a/js/src/vm/SharedImmutableStringsCache.h b/js/src/vm/SharedImmutableStringsCache.h new file mode 100644 index 0000000000..5cc536971c --- /dev/null +++ b/js/src/vm/SharedImmutableStringsCache.h @@ -0,0 +1,458 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef vm_SharedImmutableStringsCache_h +#define vm_SharedImmutableStringsCache_h + +#include "mozilla/Maybe.h" +#include "mozilla/UniquePtr.h" + +#include <cstring> +#include <new> // for placement new +#include <utility> // std::move + +#include "js/AllocPolicy.h" +#include "js/HashTable.h" +#include "js/UniquePtr.h" +#include "js/Utility.h" + +#include "threading/ExclusiveData.h" + +#include "vm/MutexIDs.h" + +namespace js { + +class SharedImmutableString; +class SharedImmutableTwoByteString; + +/** + * The `SharedImmutableStringsCache` allows safely sharing and deduplicating + * immutable strings (either `const char*` [any encoding, not restricted to + * only Latin-1 or only UTF-8] or `const char16_t*`) between threads. + * + * The locking mechanism is dead-simple and coarse grained: a single lock guards + * all of the internal table itself, the table's entries, and the entries' + * reference counts. It is only safe to perform any mutation on the cache or any + * data stored within the cache when this lock is acquired. + */ +class SharedImmutableStringsCache { + friend class SharedImmutableString; + friend class SharedImmutableTwoByteString; + struct Hasher; + + public: + using OwnedChars = JS::UniqueChars; + using OwnedTwoByteChars = JS::UniqueTwoByteChars; + + /** + * Get the canonical, shared, and de-duplicated version of the given `const + * char*` string. If such a string does not exist, call `intoOwnedChars` and + * add the string it returns to the cache. + * + * `intoOwnedChars` must create an owned version of the given string, and + * must have one of the following types: + * + * JS::UniqueChars intoOwnedChars(); + * JS::UniqueChars&& intoOwnedChars(); + * + * It can be used by callers to elide a copy of the string when it is safe + * to give up ownership of the lookup string to the cache. It must return a + * `nullptr` on failure. + * + * On success, `Some` is returned. In the case of OOM failure, `Nothing` is + * returned. + */ + template <typename IntoOwnedChars> + MOZ_MUST_USE mozilla::Maybe<SharedImmutableString> getOrCreate( + const char* chars, size_t length, IntoOwnedChars intoOwnedChars); + + /** + * Take ownership of the given `chars` and return the canonical, shared and + * de-duplicated version. + * + * On success, `Some` is returned. In the case of OOM failure, `Nothing` is + * returned. + */ + MOZ_MUST_USE mozilla::Maybe<SharedImmutableString> getOrCreate( + OwnedChars&& chars, size_t length); + + /** + * Do not take ownership of the given `chars`. Return the canonical, shared + * and de-duplicated version. If there is no extant shared version of + * `chars`, make a copy and insert it into the cache. + * + * On success, `Some` is returned. In the case of OOM failure, `Nothing` is + * returned. + */ + MOZ_MUST_USE mozilla::Maybe<SharedImmutableString> getOrCreate( + const char* chars, size_t length); + + /** + * Get the canonical, shared, and de-duplicated version of the given `const + * char16_t*` string. If such a string does not exist, call `intoOwnedChars` + * and add the string it returns to the cache. + * + * `intoOwnedTwoByteChars` must create an owned version of the given string, + * and must have one of the following types: + * + * JS::UniqueTwoByteChars intoOwnedTwoByteChars(); + * JS::UniqueTwoByteChars&& intoOwnedTwoByteChars(); + * + * It can be used by callers to elide a copy of the string when it is safe + * to give up ownership of the lookup string to the cache. It must return a + * `nullptr` on failure. + * + * On success, `Some` is returned. In the case of OOM failure, `Nothing` is + * returned. + */ + template <typename IntoOwnedTwoByteChars> + MOZ_MUST_USE mozilla::Maybe<SharedImmutableTwoByteString> getOrCreate( + const char16_t* chars, size_t length, + IntoOwnedTwoByteChars intoOwnedTwoByteChars); + + /** + * Take ownership of the given `chars` and return the canonical, shared and + * de-duplicated version. + * + * On success, `Some` is returned. In the case of OOM failure, `Nothing` is + * returned. + */ + MOZ_MUST_USE mozilla::Maybe<SharedImmutableTwoByteString> getOrCreate( + OwnedTwoByteChars&& chars, size_t length); + + /** + * Do not take ownership of the given `chars`. Return the canonical, shared + * and de-duplicated version. If there is no extant shared version of + * `chars`, then make a copy and insert it into the cache. + * + * On success, `Some` is returned. In the case of OOM failure, `Nothing` is + * returned. + */ + MOZ_MUST_USE mozilla::Maybe<SharedImmutableTwoByteString> getOrCreate( + const char16_t* chars, size_t length); + + size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const { + MOZ_ASSERT(inner_); + size_t n = mallocSizeOf(inner_); + + auto locked = inner_->lock(); + + // Size of the table. + n += locked->set.shallowSizeOfExcludingThis(mallocSizeOf); + + // Sizes of the strings and their boxes. + for (auto r = locked->set.all(); !r.empty(); r.popFront()) { + n += mallocSizeOf(r.front().get()); + if (const char* chars = r.front()->chars()) { + n += mallocSizeOf(chars); + } + } + + return n; + } + + /** + * Construct a new cache of shared, immutable strings. Returns + * `mozilla::Nothing` on out of memory failure. + */ + static mozilla::Maybe<SharedImmutableStringsCache> Create() { + auto inner = + js_new<ExclusiveData<Inner>>(mutexid::SharedImmutableStringsCache); + if (!inner) { + return mozilla::Nothing(); + } + + auto locked = inner->lock(); + return mozilla::Some(SharedImmutableStringsCache(locked)); + } + + SharedImmutableStringsCache(SharedImmutableStringsCache&& rhs) + : inner_(rhs.inner_) { + MOZ_ASSERT(inner_); + rhs.inner_ = nullptr; + } + + SharedImmutableStringsCache& operator=(SharedImmutableStringsCache&& rhs) { + MOZ_ASSERT(this != &rhs, "self move not allowed"); + new (this) SharedImmutableStringsCache(std::move(rhs)); + return *this; + } + + SharedImmutableStringsCache& operator=(const SharedImmutableStringsCache&) = + delete; + + SharedImmutableStringsCache clone() { + MOZ_ASSERT(inner_); + auto locked = inner_->lock(); + return SharedImmutableStringsCache(locked); + } + + ~SharedImmutableStringsCache() { + if (!inner_) { + return; + } + + bool shouldDestroy = false; + { + // ~ExclusiveData takes the lock, so be sure to drop the lock before + // attempting to destroy the inner. + auto locked = inner_->lock(); + MOZ_ASSERT(locked->refcount > 0); + locked->refcount--; + if (locked->refcount == 0) { + shouldDestroy = true; + } + } + if (shouldDestroy) { + js_delete(inner_); + } + } + + /** + * Purge the cache of all refcount == 0 entries. + */ + void purge() { + auto locked = inner_->lock(); + MOZ_ASSERT(locked->refcount > 0); + + for (Inner::Set::Enum e(locked->set); !e.empty(); e.popFront()) { + if (e.front()->refcount == 0) { + // The chars should be eagerly freed when refcount reaches zero. + MOZ_ASSERT(!e.front()->chars()); + e.removeFront(); + } else { + // The chars should exist as long as the refcount is non-zero. + MOZ_ASSERT(e.front()->chars()); + } + } + } + + private: + class StringBox { + friend class SharedImmutableString; + + OwnedChars chars_; + size_t length_; + + public: + mutable size_t refcount; + + using Ptr = js::UniquePtr<StringBox>; + + StringBox(OwnedChars&& chars, size_t length) + : chars_(std::move(chars)), length_(length), refcount(0) { + MOZ_ASSERT(chars_); + } + + static Ptr Create(OwnedChars&& chars, size_t length) { + return js::MakeUnique<StringBox>(std::move(chars), length); + } + + StringBox(const StringBox&) = delete; + StringBox& operator=(const StringBox&) = delete; + + ~StringBox() { + MOZ_RELEASE_ASSERT( + refcount == 0, + "There are `SharedImmutable[TwoByte]String`s outliving their " + "associated cache! This always leads to use-after-free in the " + "`~SharedImmutableString` destructor!"); + } + + const char* chars() const { return chars_.get(); } + size_t length() const { return length_; } + }; + + struct Hasher { + /** + * A structure used when querying for a `const char*` string in the cache. + */ + class Lookup { + friend struct Hasher; + + HashNumber hash_; + const char* chars_; + size_t length_; + + public: + Lookup(HashNumber hash, const char* chars, size_t length) + : hash_(hash), chars_(chars), length_(length) { + MOZ_ASSERT(chars_); + MOZ_ASSERT(hash == Hasher::hashLongString(chars, length)); + } + + Lookup(HashNumber hash, const char16_t* chars, size_t length) + : Lookup(hash, reinterpret_cast<const char*>(chars), + length * sizeof(char16_t)) {} + }; + + static const size_t SHORT_STRING_MAX_LENGTH = 8192; + static const size_t HASH_CHUNK_LENGTH = SHORT_STRING_MAX_LENGTH / 2; + + // For strings longer than SHORT_STRING_MAX_LENGTH, we only hash the + // first HASH_CHUNK_LENGTH and last HASH_CHUNK_LENGTH characters in the + // string. This increases the risk of collisions, but in practice it + // should be rare, and it yields a large speedup for hashing long + // strings. + static HashNumber hashLongString(const char* chars, size_t length) { + MOZ_ASSERT(chars); + return length <= SHORT_STRING_MAX_LENGTH + ? mozilla::HashString(chars, length) + : mozilla::AddToHash( + mozilla::HashString(chars, HASH_CHUNK_LENGTH), + mozilla::HashString(chars + length - HASH_CHUNK_LENGTH, + HASH_CHUNK_LENGTH)); + } + + static HashNumber hash(const Lookup& lookup) { return lookup.hash_; } + + static bool match(const StringBox::Ptr& key, const Lookup& lookup) { + MOZ_ASSERT(lookup.chars_); + + if (!key->chars() || key->length() != lookup.length_) { + return false; + } + + if (key->chars() == lookup.chars_) { + return true; + } + + return memcmp(key->chars(), lookup.chars_, key->length()) == 0; + } + }; + + // The `Inner` struct contains the actual cached contents, and is reference + // counted and shared between all `SharedImmutableStringsCache` and + // `SharedImmutable[TwoByte]String` holders. + struct Inner { + using Set = HashSet<StringBox::Ptr, Hasher, SystemAllocPolicy>; + + size_t refcount; + Set set; + + Inner() : refcount(0), set() {} + + Inner(const Inner&) = delete; + Inner& operator=(const Inner&) = delete; + + ~Inner() { MOZ_ASSERT(refcount == 0); } + }; + + const ExclusiveData<Inner>* inner_; + + explicit SharedImmutableStringsCache(ExclusiveData<Inner>::Guard& locked) + : inner_(locked.parent()) { + locked->refcount++; + } +}; + +/** + * The `SharedImmutableString` class holds a reference to a `const char*` string + * from the `SharedImmutableStringsCache` and releases the reference upon + * destruction. + */ +class SharedImmutableString { + friend class SharedImmutableStringsCache; + friend class SharedImmutableTwoByteString; + + mutable SharedImmutableStringsCache cache_; + mutable SharedImmutableStringsCache::StringBox* box_; + + SharedImmutableString( + ExclusiveData<SharedImmutableStringsCache::Inner>::Guard& locked, + SharedImmutableStringsCache::StringBox* box); + + public: + /** + * `SharedImmutableString`s are move-able. It is an error to use a + * `SharedImmutableString` after it has been moved. + */ + SharedImmutableString(SharedImmutableString&& rhs); + SharedImmutableString& operator=(SharedImmutableString&& rhs); + + /** + * Create another shared reference to the underlying string. + */ + SharedImmutableString clone() const; + + // If you want a copy, take one explicitly with `clone`! + SharedImmutableString& operator=(const SharedImmutableString&) = delete; + + ~SharedImmutableString(); + + /** + * Get a raw pointer to the underlying string. It is only safe to use the + * resulting pointer while this `SharedImmutableString` exists. + */ + const char* chars() const { + MOZ_ASSERT(box_); + MOZ_ASSERT(box_->refcount > 0); + MOZ_ASSERT(box_->chars()); + return box_->chars(); + } + + /** + * Get the length of the underlying string. + */ + size_t length() const { + MOZ_ASSERT(box_); + MOZ_ASSERT(box_->refcount > 0); + MOZ_ASSERT(box_->chars()); + return box_->length(); + } +}; + +/** + * The `SharedImmutableTwoByteString` class holds a reference to a `const + * char16_t*` string from the `SharedImmutableStringsCache` and releases the + * reference upon destruction. + */ +class SharedImmutableTwoByteString { + friend class SharedImmutableStringsCache; + + // If a `char*` string and `char16_t*` string happen to have the same bytes, + // the bytes will be shared but handed out as different types. + SharedImmutableString string_; + + explicit SharedImmutableTwoByteString(SharedImmutableString&& string); + SharedImmutableTwoByteString( + ExclusiveData<SharedImmutableStringsCache::Inner>::Guard& locked, + SharedImmutableStringsCache::StringBox* box); + + public: + /** + * `SharedImmutableTwoByteString`s are move-able. It is an error to use a + * `SharedImmutableTwoByteString` after it has been moved. + */ + SharedImmutableTwoByteString(SharedImmutableTwoByteString&& rhs); + SharedImmutableTwoByteString& operator=(SharedImmutableTwoByteString&& rhs); + + /** + * Create another shared reference to the underlying string. + */ + SharedImmutableTwoByteString clone() const; + + // If you want a copy, take one explicitly with `clone`! + SharedImmutableTwoByteString& operator=(const SharedImmutableTwoByteString&) = + delete; + + /** + * Get a raw pointer to the underlying string. It is only safe to use the + * resulting pointer while this `SharedImmutableTwoByteString` exists. + */ + const char16_t* chars() const { + return reinterpret_cast<const char16_t*>(string_.chars()); + } + + /** + * Get the length of the underlying string. + */ + size_t length() const { return string_.length() / sizeof(char16_t); } +}; + +} // namespace js + +#endif // vm_SharedImmutableStringsCache_h |