/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- * vim: set ts=8 sts=2 et sw=2 tw=80: * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #ifndef vm_SharedImmutableStringsCache_h #define vm_SharedImmutableStringsCache_h #include "mozilla/Maybe.h" #include "mozilla/UniquePtr.h" #include #include // for placement new #include // std::move #include "js/AllocPolicy.h" #include "js/HashTable.h" #include "js/UniquePtr.h" #include "js/Utility.h" #include "threading/ExclusiveData.h" namespace js { class SharedImmutableString; class SharedImmutableTwoByteString; /** * The `SharedImmutableStringsCache` allows safely sharing and deduplicating * immutable strings (either `const char*` [any encoding, not restricted to * only Latin-1 or only UTF-8] or `const char16_t*`) between threads. * * The locking mechanism is dead-simple and coarse grained: a single lock guards * all of the internal table itself, the table's entries, and the entries' * reference counts. It is only safe to perform any mutation on the cache or any * data stored within the cache when this lock is acquired. */ class SharedImmutableStringsCache { static SharedImmutableStringsCache singleton_; friend class SharedImmutableString; friend class SharedImmutableTwoByteString; struct Hasher; public: using OwnedChars = JS::UniqueChars; using OwnedTwoByteChars = JS::UniqueTwoByteChars; /** * Get the canonical, shared, and de-duplicated version of the given `const * char*` string. If such a string does not exist, call `intoOwnedChars` and * add the string it returns to the cache. * * `intoOwnedChars` must create an owned version of the given string, and * must have one of the following types: * * JS::UniqueChars intoOwnedChars(); * JS::UniqueChars&& intoOwnedChars(); * * It can be used by callers to elide a copy of the string when it is safe * to give up ownership of the lookup string to the cache. It must return a * `nullptr` on failure. * * On success, `Some` is returned. In the case of OOM failure, `Nothing` is * returned. */ template [[nodiscard]] SharedImmutableString getOrCreate( const char* chars, size_t length, IntoOwnedChars intoOwnedChars); /** * Take ownership of the given `chars` and return the canonical, shared and * de-duplicated version. * * On success, `Some` is returned. In the case of OOM failure, `Nothing` is * returned. */ [[nodiscard]] SharedImmutableString getOrCreate(OwnedChars&& chars, size_t length); /** * Do not take ownership of the given `chars`. Return the canonical, shared * and de-duplicated version. If there is no extant shared version of * `chars`, make a copy and insert it into the cache. * * On success, `Some` is returned. In the case of OOM failure, `Nothing` is * returned. */ [[nodiscard]] SharedImmutableString getOrCreate(const char* chars, size_t length); /** * Get the canonical, shared, and de-duplicated version of the given `const * char16_t*` string. If such a string does not exist, call `intoOwnedChars` * and add the string it returns to the cache. * * `intoOwnedTwoByteChars` must create an owned version of the given string, * and must have one of the following types: * * JS::UniqueTwoByteChars intoOwnedTwoByteChars(); * JS::UniqueTwoByteChars&& intoOwnedTwoByteChars(); * * It can be used by callers to elide a copy of the string when it is safe * to give up ownership of the lookup string to the cache. It must return a * `nullptr` on failure. * * On success, `Some` is returned. In the case of OOM failure, `Nothing` is * returned. */ template [[nodiscard]] SharedImmutableTwoByteString getOrCreate( const char16_t* chars, size_t length, IntoOwnedTwoByteChars intoOwnedTwoByteChars); /** * Take ownership of the given `chars` and return the canonical, shared and * de-duplicated version. * * On success, `Some` is returned. In the case of OOM failure, `Nothing` is * returned. */ [[nodiscard]] SharedImmutableTwoByteString getOrCreate( OwnedTwoByteChars&& chars, size_t length); /** * Do not take ownership of the given `chars`. Return the canonical, shared * and de-duplicated version. If there is no extant shared version of * `chars`, then make a copy and insert it into the cache. * * On success, `Some` is returned. In the case of OOM failure, `Nothing` is * returned. */ [[nodiscard]] SharedImmutableTwoByteString getOrCreate(const char16_t* chars, size_t length); size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const { MOZ_ASSERT(inner_); size_t n = mallocSizeOf(inner_); auto locked = inner_->lock(); // Size of the table. n += locked->set.shallowSizeOfExcludingThis(mallocSizeOf); // Sizes of the strings and their boxes. for (auto r = locked->set.all(); !r.empty(); r.popFront()) { n += mallocSizeOf(r.front().get()); if (const char* chars = r.front()->chars()) { n += mallocSizeOf(chars); } } return n; } private: bool init(); void free(); public: static bool initSingleton(); static void freeSingleton(); static SharedImmutableStringsCache& getSingleton() { MOZ_ASSERT(singleton_.inner_); return singleton_; } private: SharedImmutableStringsCache() = default; ~SharedImmutableStringsCache() = default; public: SharedImmutableStringsCache(const SharedImmutableStringsCache& rhs) = delete; SharedImmutableStringsCache(SharedImmutableStringsCache&& rhs) = delete; SharedImmutableStringsCache& operator=(SharedImmutableStringsCache&& rhs) = delete; SharedImmutableStringsCache& operator=(const SharedImmutableStringsCache&) = delete; /** * Purge the cache of all refcount == 0 entries. */ void purge() { auto locked = inner_->lock(); for (Inner::Set::Enum e(locked->set); !e.empty(); e.popFront()) { if (e.front()->refcount == 0) { // The chars should be eagerly freed when refcount reaches zero. MOZ_ASSERT(!e.front()->chars()); e.removeFront(); } else { // The chars should exist as long as the refcount is non-zero. MOZ_ASSERT(e.front()->chars()); } } } private: struct Inner; class StringBox { friend class SharedImmutableString; OwnedChars chars_; size_t length_; const ExclusiveData* cache_; public: mutable size_t refcount; using Ptr = js::UniquePtr; StringBox(OwnedChars&& chars, size_t length, const ExclusiveData* cache) : chars_(std::move(chars)), length_(length), cache_(cache), refcount(0) { MOZ_ASSERT(chars_); } static Ptr Create(OwnedChars&& chars, size_t length, const ExclusiveData* cache) { return js::MakeUnique(std::move(chars), length, cache); } StringBox(const StringBox&) = delete; StringBox& operator=(const StringBox&) = delete; ~StringBox() { MOZ_RELEASE_ASSERT( refcount == 0, "There are `SharedImmutable[TwoByte]String`s outliving their " "associated cache! This always leads to use-after-free in the " "`~SharedImmutableString` destructor!"); } const char* chars() const { return chars_.get(); } size_t length() const { return length_; } }; struct Hasher { /** * A structure used when querying for a `const char*` string in the cache. */ class Lookup { friend struct Hasher; HashNumber hash_; const char* chars_; size_t length_; public: Lookup(HashNumber hash, const char* chars, size_t length) : hash_(hash), chars_(chars), length_(length) { MOZ_ASSERT(chars_); MOZ_ASSERT(hash == Hasher::hashLongString(chars, length)); } Lookup(HashNumber hash, const char16_t* chars, size_t length) : Lookup(hash, reinterpret_cast(chars), length * sizeof(char16_t)) {} }; static const size_t SHORT_STRING_MAX_LENGTH = 8192; static const size_t HASH_CHUNK_LENGTH = SHORT_STRING_MAX_LENGTH / 2; // For strings longer than SHORT_STRING_MAX_LENGTH, we only hash the // first HASH_CHUNK_LENGTH and last HASH_CHUNK_LENGTH characters in the // string. This increases the risk of collisions, but in practice it // should be rare, and it yields a large speedup for hashing long // strings. static HashNumber hashLongString(const char* chars, size_t length) { MOZ_ASSERT(chars); return length <= SHORT_STRING_MAX_LENGTH ? mozilla::HashString(chars, length) : mozilla::AddToHash( mozilla::HashString(chars, HASH_CHUNK_LENGTH), mozilla::HashString(chars + length - HASH_CHUNK_LENGTH, HASH_CHUNK_LENGTH)); } static HashNumber hash(const Lookup& lookup) { return lookup.hash_; } static bool match(const StringBox::Ptr& key, const Lookup& lookup) { MOZ_ASSERT(lookup.chars_); if (!key->chars() || key->length() != lookup.length_) { return false; } if (key->chars() == lookup.chars_) { return true; } return memcmp(key->chars(), lookup.chars_, key->length()) == 0; } }; // The `Inner` struct contains the actual cached contents and shared between // the `SharedImmutableStringsCache` singleton and all // `SharedImmutable[TwoByte]String` holders. struct Inner { using Set = HashSet; Set set; Inner() : set() {} Inner(const Inner&) = delete; Inner& operator=(const Inner&) = delete; }; const ExclusiveData* inner_ = nullptr; }; /** * The `SharedImmutableString` class holds a reference to a `const char*` string * from the `SharedImmutableStringsCache` and releases the reference upon * destruction. */ class SharedImmutableString { friend class SharedImmutableStringsCache; friend class SharedImmutableTwoByteString; mutable SharedImmutableStringsCache::StringBox* box_; explicit SharedImmutableString(SharedImmutableStringsCache::StringBox* box); public: /** * `SharedImmutableString`s are move-able. It is an error to use a * `SharedImmutableString` after it has been moved. */ SharedImmutableString(SharedImmutableString&& rhs); SharedImmutableString& operator=(SharedImmutableString&& rhs); SharedImmutableString() { box_ = nullptr; } /** * Create another shared reference to the underlying string. */ SharedImmutableString clone() const; // If you want a copy, take one explicitly with `clone`! SharedImmutableString& operator=(const SharedImmutableString&) = delete; explicit operator bool() const { return box_ != nullptr; } ~SharedImmutableString(); /** * Get a raw pointer to the underlying string. It is only safe to use the * resulting pointer while this `SharedImmutableString` exists. */ const char* chars() const { MOZ_ASSERT(box_); MOZ_ASSERT(box_->refcount > 0); MOZ_ASSERT(box_->chars()); return box_->chars(); } /** * Get the length of the underlying string. */ size_t length() const { MOZ_ASSERT(box_); MOZ_ASSERT(box_->refcount > 0); MOZ_ASSERT(box_->chars()); return box_->length(); } }; /** * The `SharedImmutableTwoByteString` class holds a reference to a `const * char16_t*` string from the `SharedImmutableStringsCache` and releases the * reference upon destruction. */ class SharedImmutableTwoByteString { friend class SharedImmutableStringsCache; // If a `char*` string and `char16_t*` string happen to have the same bytes, // the bytes will be shared but handed out as different types. SharedImmutableString string_; explicit SharedImmutableTwoByteString(SharedImmutableString&& string); explicit SharedImmutableTwoByteString( SharedImmutableStringsCache::StringBox* box); public: /** * `SharedImmutableTwoByteString`s are move-able. It is an error to use a * `SharedImmutableTwoByteString` after it has been moved. */ SharedImmutableTwoByteString(SharedImmutableTwoByteString&& rhs); SharedImmutableTwoByteString& operator=(SharedImmutableTwoByteString&& rhs); SharedImmutableTwoByteString() { string_.box_ = nullptr; } /** * Create another shared reference to the underlying string. */ SharedImmutableTwoByteString clone() const; // If you want a copy, take one explicitly with `clone`! SharedImmutableTwoByteString& operator=(const SharedImmutableTwoByteString&) = delete; explicit operator bool() const { return string_.box_ != nullptr; } /** * Get a raw pointer to the underlying string. It is only safe to use the * resulting pointer while this `SharedImmutableTwoByteString` exists. */ const char16_t* chars() const { return reinterpret_cast(string_.chars()); } /** * Get the length of the underlying string. */ size_t length() const { return string_.length() / sizeof(char16_t); } }; } // namespace js #endif // vm_SharedImmutableStringsCache_h