summaryrefslogtreecommitdiffstats
path: root/js/src/vm/SharedImmutableStringsCache.h
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /js/src/vm/SharedImmutableStringsCache.h
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'js/src/vm/SharedImmutableStringsCache.h')
-rw-r--r--js/src/vm/SharedImmutableStringsCache.h425
1 files changed, 425 insertions, 0 deletions
diff --git a/js/src/vm/SharedImmutableStringsCache.h b/js/src/vm/SharedImmutableStringsCache.h
new file mode 100644
index 0000000000..a10ff416a4
--- /dev/null
+++ b/js/src/vm/SharedImmutableStringsCache.h
@@ -0,0 +1,425 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef vm_SharedImmutableStringsCache_h
+#define vm_SharedImmutableStringsCache_h
+
+#include "mozilla/Maybe.h"
+#include "mozilla/UniquePtr.h"
+
+#include <cstring>
+#include <new> // for placement new
+#include <utility> // std::move
+
+#include "js/AllocPolicy.h"
+#include "js/HashTable.h"
+#include "js/UniquePtr.h"
+#include "js/Utility.h"
+
+#include "threading/ExclusiveData.h"
+
+namespace js {
+
+class SharedImmutableString;
+class SharedImmutableTwoByteString;
+
+/**
+ * The `SharedImmutableStringsCache` allows safely sharing and deduplicating
+ * immutable strings (either `const char*` [any encoding, not restricted to
+ * only Latin-1 or only UTF-8] or `const char16_t*`) between threads.
+ *
+ * The locking mechanism is dead-simple and coarse grained: a single lock guards
+ * all of the internal table itself, the table's entries, and the entries'
+ * reference counts. It is only safe to perform any mutation on the cache or any
+ * data stored within the cache when this lock is acquired.
+ */
+class SharedImmutableStringsCache {
+ static SharedImmutableStringsCache singleton_;
+
+ friend class SharedImmutableString;
+ friend class SharedImmutableTwoByteString;
+ struct Hasher;
+
+ public:
+ using OwnedChars = JS::UniqueChars;
+ using OwnedTwoByteChars = JS::UniqueTwoByteChars;
+
+ /**
+ * Get the canonical, shared, and de-duplicated version of the given `const
+ * char*` string. If such a string does not exist, call `intoOwnedChars` and
+ * add the string it returns to the cache.
+ *
+ * `intoOwnedChars` must create an owned version of the given string, and
+ * must have one of the following types:
+ *
+ * JS::UniqueChars intoOwnedChars();
+ * JS::UniqueChars&& intoOwnedChars();
+ *
+ * It can be used by callers to elide a copy of the string when it is safe
+ * to give up ownership of the lookup string to the cache. It must return a
+ * `nullptr` on failure.
+ *
+ * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
+ * returned.
+ */
+ template <typename IntoOwnedChars>
+ [[nodiscard]] SharedImmutableString getOrCreate(
+ const char* chars, size_t length, IntoOwnedChars intoOwnedChars);
+
+ /**
+ * Take ownership of the given `chars` and return the canonical, shared and
+ * de-duplicated version.
+ *
+ * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
+ * returned.
+ */
+ [[nodiscard]] SharedImmutableString getOrCreate(OwnedChars&& chars,
+ size_t length);
+
+ /**
+ * Do not take ownership of the given `chars`. Return the canonical, shared
+ * and de-duplicated version. If there is no extant shared version of
+ * `chars`, make a copy and insert it into the cache.
+ *
+ * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
+ * returned.
+ */
+ [[nodiscard]] SharedImmutableString getOrCreate(const char* chars,
+ size_t length);
+
+ /**
+ * Get the canonical, shared, and de-duplicated version of the given `const
+ * char16_t*` string. If such a string does not exist, call `intoOwnedChars`
+ * and add the string it returns to the cache.
+ *
+ * `intoOwnedTwoByteChars` must create an owned version of the given string,
+ * and must have one of the following types:
+ *
+ * JS::UniqueTwoByteChars intoOwnedTwoByteChars();
+ * JS::UniqueTwoByteChars&& intoOwnedTwoByteChars();
+ *
+ * It can be used by callers to elide a copy of the string when it is safe
+ * to give up ownership of the lookup string to the cache. It must return a
+ * `nullptr` on failure.
+ *
+ * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
+ * returned.
+ */
+ template <typename IntoOwnedTwoByteChars>
+ [[nodiscard]] SharedImmutableTwoByteString getOrCreate(
+ const char16_t* chars, size_t length,
+ IntoOwnedTwoByteChars intoOwnedTwoByteChars);
+
+ /**
+ * Take ownership of the given `chars` and return the canonical, shared and
+ * de-duplicated version.
+ *
+ * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
+ * returned.
+ */
+ [[nodiscard]] SharedImmutableTwoByteString getOrCreate(
+ OwnedTwoByteChars&& chars, size_t length);
+
+ /**
+ * Do not take ownership of the given `chars`. Return the canonical, shared
+ * and de-duplicated version. If there is no extant shared version of
+ * `chars`, then make a copy and insert it into the cache.
+ *
+ * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
+ * returned.
+ */
+ [[nodiscard]] SharedImmutableTwoByteString getOrCreate(const char16_t* chars,
+ size_t length);
+
+ size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const {
+ MOZ_ASSERT(inner_);
+ size_t n = mallocSizeOf(inner_);
+
+ auto locked = inner_->lock();
+
+ // Size of the table.
+ n += locked->set.shallowSizeOfExcludingThis(mallocSizeOf);
+
+ // Sizes of the strings and their boxes.
+ for (auto r = locked->set.all(); !r.empty(); r.popFront()) {
+ n += mallocSizeOf(r.front().get());
+ if (const char* chars = r.front()->chars()) {
+ n += mallocSizeOf(chars);
+ }
+ }
+
+ return n;
+ }
+
+ private:
+ bool init();
+ void free();
+
+ public:
+ static bool initSingleton();
+ static void freeSingleton();
+
+ static SharedImmutableStringsCache& getSingleton() {
+ MOZ_ASSERT(singleton_.inner_);
+ return singleton_;
+ }
+
+ private:
+ SharedImmutableStringsCache() = default;
+ ~SharedImmutableStringsCache() = default;
+
+ public:
+ SharedImmutableStringsCache(const SharedImmutableStringsCache& rhs) = delete;
+ SharedImmutableStringsCache(SharedImmutableStringsCache&& rhs) = delete;
+
+ SharedImmutableStringsCache& operator=(SharedImmutableStringsCache&& rhs) =
+ delete;
+
+ SharedImmutableStringsCache& operator=(const SharedImmutableStringsCache&) =
+ delete;
+
+ /**
+ * Purge the cache of all refcount == 0 entries.
+ */
+ void purge() {
+ auto locked = inner_->lock();
+
+ for (Inner::Set::Enum e(locked->set); !e.empty(); e.popFront()) {
+ if (e.front()->refcount == 0) {
+ // The chars should be eagerly freed when refcount reaches zero.
+ MOZ_ASSERT(!e.front()->chars());
+ e.removeFront();
+ } else {
+ // The chars should exist as long as the refcount is non-zero.
+ MOZ_ASSERT(e.front()->chars());
+ }
+ }
+ }
+
+ private:
+ struct Inner;
+ class StringBox {
+ friend class SharedImmutableString;
+
+ OwnedChars chars_;
+ size_t length_;
+ const ExclusiveData<Inner>* cache_;
+
+ public:
+ mutable size_t refcount;
+
+ using Ptr = js::UniquePtr<StringBox>;
+
+ StringBox(OwnedChars&& chars, size_t length,
+ const ExclusiveData<Inner>* cache)
+ : chars_(std::move(chars)),
+ length_(length),
+ cache_(cache),
+ refcount(0) {
+ MOZ_ASSERT(chars_);
+ }
+
+ static Ptr Create(OwnedChars&& chars, size_t length,
+ const ExclusiveData<Inner>* cache) {
+ return js::MakeUnique<StringBox>(std::move(chars), length, cache);
+ }
+
+ StringBox(const StringBox&) = delete;
+ StringBox& operator=(const StringBox&) = delete;
+
+ ~StringBox() {
+ MOZ_RELEASE_ASSERT(
+ refcount == 0,
+ "There are `SharedImmutable[TwoByte]String`s outliving their "
+ "associated cache! This always leads to use-after-free in the "
+ "`~SharedImmutableString` destructor!");
+ }
+
+ const char* chars() const { return chars_.get(); }
+ size_t length() const { return length_; }
+ };
+
+ struct Hasher {
+ /**
+ * A structure used when querying for a `const char*` string in the cache.
+ */
+ class Lookup {
+ friend struct Hasher;
+
+ HashNumber hash_;
+ const char* chars_;
+ size_t length_;
+
+ public:
+ Lookup(HashNumber hash, const char* chars, size_t length)
+ : hash_(hash), chars_(chars), length_(length) {
+ MOZ_ASSERT(chars_);
+ MOZ_ASSERT(hash == Hasher::hashLongString(chars, length));
+ }
+
+ Lookup(HashNumber hash, const char16_t* chars, size_t length)
+ : Lookup(hash, reinterpret_cast<const char*>(chars),
+ length * sizeof(char16_t)) {}
+ };
+
+ static const size_t SHORT_STRING_MAX_LENGTH = 8192;
+ static const size_t HASH_CHUNK_LENGTH = SHORT_STRING_MAX_LENGTH / 2;
+
+ // For strings longer than SHORT_STRING_MAX_LENGTH, we only hash the
+ // first HASH_CHUNK_LENGTH and last HASH_CHUNK_LENGTH characters in the
+ // string. This increases the risk of collisions, but in practice it
+ // should be rare, and it yields a large speedup for hashing long
+ // strings.
+ static HashNumber hashLongString(const char* chars, size_t length) {
+ MOZ_ASSERT(chars);
+ return length <= SHORT_STRING_MAX_LENGTH
+ ? mozilla::HashString(chars, length)
+ : mozilla::AddToHash(
+ mozilla::HashString(chars, HASH_CHUNK_LENGTH),
+ mozilla::HashString(chars + length - HASH_CHUNK_LENGTH,
+ HASH_CHUNK_LENGTH));
+ }
+
+ static HashNumber hash(const Lookup& lookup) { return lookup.hash_; }
+
+ static bool match(const StringBox::Ptr& key, const Lookup& lookup) {
+ MOZ_ASSERT(lookup.chars_);
+
+ if (!key->chars() || key->length() != lookup.length_) {
+ return false;
+ }
+
+ if (key->chars() == lookup.chars_) {
+ return true;
+ }
+
+ return memcmp(key->chars(), lookup.chars_, key->length()) == 0;
+ }
+ };
+
+ // The `Inner` struct contains the actual cached contents and shared between
+ // the `SharedImmutableStringsCache` singleton and all
+ // `SharedImmutable[TwoByte]String` holders.
+ struct Inner {
+ using Set = HashSet<StringBox::Ptr, Hasher, SystemAllocPolicy>;
+
+ Set set;
+
+ Inner() = default;
+
+ Inner(const Inner&) = delete;
+ Inner& operator=(const Inner&) = delete;
+ };
+
+ const ExclusiveData<Inner>* inner_ = nullptr;
+};
+
+/**
+ * The `SharedImmutableString` class holds a reference to a `const char*` string
+ * from the `SharedImmutableStringsCache` and releases the reference upon
+ * destruction.
+ */
+class SharedImmutableString {
+ friend class SharedImmutableStringsCache;
+ friend class SharedImmutableTwoByteString;
+
+ mutable SharedImmutableStringsCache::StringBox* box_;
+
+ explicit SharedImmutableString(SharedImmutableStringsCache::StringBox* box);
+
+ public:
+ /**
+ * `SharedImmutableString`s are move-able. It is an error to use a
+ * `SharedImmutableString` after it has been moved.
+ */
+ SharedImmutableString(SharedImmutableString&& rhs);
+ SharedImmutableString& operator=(SharedImmutableString&& rhs);
+ SharedImmutableString() { box_ = nullptr; }
+
+ /**
+ * Create another shared reference to the underlying string.
+ */
+ SharedImmutableString clone() const;
+
+ // If you want a copy, take one explicitly with `clone`!
+ SharedImmutableString& operator=(const SharedImmutableString&) = delete;
+ explicit operator bool() const { return box_ != nullptr; }
+
+ ~SharedImmutableString();
+
+ /**
+ * Get a raw pointer to the underlying string. It is only safe to use the
+ * resulting pointer while this `SharedImmutableString` exists.
+ */
+ const char* chars() const {
+ MOZ_ASSERT(box_);
+ MOZ_ASSERT(box_->refcount > 0);
+ MOZ_ASSERT(box_->chars());
+ return box_->chars();
+ }
+
+ /**
+ * Get the length of the underlying string.
+ */
+ size_t length() const {
+ MOZ_ASSERT(box_);
+ MOZ_ASSERT(box_->refcount > 0);
+ MOZ_ASSERT(box_->chars());
+ return box_->length();
+ }
+};
+
+/**
+ * The `SharedImmutableTwoByteString` class holds a reference to a `const
+ * char16_t*` string from the `SharedImmutableStringsCache` and releases the
+ * reference upon destruction.
+ */
+class SharedImmutableTwoByteString {
+ friend class SharedImmutableStringsCache;
+
+ // If a `char*` string and `char16_t*` string happen to have the same bytes,
+ // the bytes will be shared but handed out as different types.
+ SharedImmutableString string_;
+
+ explicit SharedImmutableTwoByteString(SharedImmutableString&& string);
+ explicit SharedImmutableTwoByteString(
+ SharedImmutableStringsCache::StringBox* box);
+
+ public:
+ /**
+ * `SharedImmutableTwoByteString`s are move-able. It is an error to use a
+ * `SharedImmutableTwoByteString` after it has been moved.
+ */
+ SharedImmutableTwoByteString(SharedImmutableTwoByteString&& rhs);
+ SharedImmutableTwoByteString& operator=(SharedImmutableTwoByteString&& rhs);
+ SharedImmutableTwoByteString() { string_.box_ = nullptr; }
+
+ /**
+ * Create another shared reference to the underlying string.
+ */
+ SharedImmutableTwoByteString clone() const;
+
+ // If you want a copy, take one explicitly with `clone`!
+ SharedImmutableTwoByteString& operator=(const SharedImmutableTwoByteString&) =
+ delete;
+ explicit operator bool() const { return string_.box_ != nullptr; }
+ /**
+ * Get a raw pointer to the underlying string. It is only safe to use the
+ * resulting pointer while this `SharedImmutableTwoByteString` exists.
+ */
+ const char16_t* chars() const {
+ return reinterpret_cast<const char16_t*>(string_.chars());
+ }
+
+ /**
+ * Get the length of the underlying string.
+ */
+ size_t length() const { return string_.length() / sizeof(char16_t); }
+};
+
+} // namespace js
+
+#endif // vm_SharedImmutableStringsCache_h