diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/rocksdb/cache/lru_cache.h | |
parent | Initial commit. (diff) | |
download | ceph-upstream/18.2.2.tar.xz ceph-upstream/18.2.2.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/rocksdb/cache/lru_cache.h')
-rw-r--r-- | src/rocksdb/cache/lru_cache.h | 546 |
1 files changed, 546 insertions, 0 deletions
diff --git a/src/rocksdb/cache/lru_cache.h b/src/rocksdb/cache/lru_cache.h new file mode 100644 index 000000000..99b2f2b20 --- /dev/null +++ b/src/rocksdb/cache/lru_cache.h @@ -0,0 +1,546 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +#pragma once + +#include <memory> +#include <string> + +#include "cache/sharded_cache.h" +#include "port/lang.h" +#include "port/malloc.h" +#include "port/port.h" +#include "rocksdb/secondary_cache.h" +#include "util/autovector.h" +#include "util/distributed_mutex.h" + +namespace ROCKSDB_NAMESPACE { +namespace lru_cache { + +// LRU cache implementation. This class is not thread-safe. + +// An entry is a variable length heap-allocated structure. +// Entries are referenced by cache and/or by any external entity. +// The cache keeps all its entries in a hash table. Some elements +// are also stored on LRU list. +// +// LRUHandle can be in these states: +// 1. Referenced externally AND in hash table. +// In that case the entry is *not* in the LRU list +// (refs >= 1 && in_cache == true) +// 2. Not referenced externally AND in hash table. +// In that case the entry is in the LRU list and can be freed. +// (refs == 0 && in_cache == true) +// 3. Referenced externally AND not in hash table. +// In that case the entry is not in the LRU list and not in hash table. +// The entry must be freed if refs becomes 0 in this state. +// (refs >= 1 && in_cache == false) +// If you call LRUCacheShard::Release enough times on an entry in state 1, it +// will go into state 2. To move from state 1 to state 3, either call +// LRUCacheShard::Erase or LRUCacheShard::Insert with the same key (but +// possibly different value). To move from state 2 to state 1, use +// LRUCacheShard::Lookup. +// While refs > 0, public properties like value and deleter must not change. + +struct LRUHandle { + void* value; + union Info { + Info() {} + ~Info() {} + Cache::DeleterFn deleter; + const Cache::CacheItemHelper* helper; + } info_; + // An entry is not added to the LRUHandleTable until the secondary cache + // lookup is complete, so its safe to have this union. + union { + LRUHandle* next_hash; + SecondaryCacheResultHandle* sec_handle; + }; + LRUHandle* next; + LRUHandle* prev; + size_t total_charge; // TODO(opt): Only allow uint32_t? + size_t key_length; + // The hash of key(). Used for fast sharding and comparisons. + uint32_t hash; + // The number of external refs to this entry. The cache itself is not counted. + uint32_t refs; + + // Mutable flags - access controlled by mutex + // The m_ and M_ prefixes (and im_ and IM_ later) are to hopefully avoid + // checking an M_ flag on im_flags or an IM_ flag on m_flags. + uint8_t m_flags; + enum MFlags : uint8_t { + // Whether this entry is referenced by the hash table. + M_IN_CACHE = (1 << 0), + // Whether this entry has had any lookups (hits). + M_HAS_HIT = (1 << 1), + // Whether this entry is in high-pri pool. + M_IN_HIGH_PRI_POOL = (1 << 2), + // Whether this entry is in low-pri pool. + M_IN_LOW_PRI_POOL = (1 << 3), + }; + + // "Immutable" flags - only set in single-threaded context and then + // can be accessed without mutex + uint8_t im_flags; + enum ImFlags : uint8_t { + // Whether this entry is high priority entry. + IM_IS_HIGH_PRI = (1 << 0), + // Whether this entry is low priority entry. + IM_IS_LOW_PRI = (1 << 1), + // Can this be inserted into the secondary cache. + IM_IS_SECONDARY_CACHE_COMPATIBLE = (1 << 2), + // Is the handle still being read from a lower tier. + IM_IS_PENDING = (1 << 3), + // Whether this handle is still in a lower tier + IM_IS_IN_SECONDARY_CACHE = (1 << 4), + // Marks result handles that should not be inserted into cache + IM_IS_STANDALONE = (1 << 5), + }; + + // Beginning of the key (MUST BE THE LAST FIELD IN THIS STRUCT!) + char key_data[1]; + + Slice key() const { return Slice(key_data, key_length); } + + // For HandleImpl concept + uint32_t GetHash() const { return hash; } + + // Increase the reference count by 1. + void Ref() { refs++; } + + // Just reduce the reference count by 1. Return true if it was last reference. + bool Unref() { + assert(refs > 0); + refs--; + return refs == 0; + } + + // Return true if there are external refs, false otherwise. + bool HasRefs() const { return refs > 0; } + + bool InCache() const { return m_flags & M_IN_CACHE; } + bool IsHighPri() const { return im_flags & IM_IS_HIGH_PRI; } + bool InHighPriPool() const { return m_flags & M_IN_HIGH_PRI_POOL; } + bool IsLowPri() const { return im_flags & IM_IS_LOW_PRI; } + bool InLowPriPool() const { return m_flags & M_IN_LOW_PRI_POOL; } + bool HasHit() const { return m_flags & M_HAS_HIT; } + bool IsSecondaryCacheCompatible() const { + return im_flags & IM_IS_SECONDARY_CACHE_COMPATIBLE; + } + bool IsPending() const { return im_flags & IM_IS_PENDING; } + bool IsInSecondaryCache() const { + return im_flags & IM_IS_IN_SECONDARY_CACHE; + } + bool IsStandalone() const { return im_flags & IM_IS_STANDALONE; } + + void SetInCache(bool in_cache) { + if (in_cache) { + m_flags |= M_IN_CACHE; + } else { + m_flags &= ~M_IN_CACHE; + } + } + + void SetPriority(Cache::Priority priority) { + if (priority == Cache::Priority::HIGH) { + im_flags |= IM_IS_HIGH_PRI; + im_flags &= ~IM_IS_LOW_PRI; + } else if (priority == Cache::Priority::LOW) { + im_flags &= ~IM_IS_HIGH_PRI; + im_flags |= IM_IS_LOW_PRI; + } else { + im_flags &= ~IM_IS_HIGH_PRI; + im_flags &= ~IM_IS_LOW_PRI; + } + } + + void SetInHighPriPool(bool in_high_pri_pool) { + if (in_high_pri_pool) { + m_flags |= M_IN_HIGH_PRI_POOL; + } else { + m_flags &= ~M_IN_HIGH_PRI_POOL; + } + } + + void SetInLowPriPool(bool in_low_pri_pool) { + if (in_low_pri_pool) { + m_flags |= M_IN_LOW_PRI_POOL; + } else { + m_flags &= ~M_IN_LOW_PRI_POOL; + } + } + + void SetHit() { m_flags |= M_HAS_HIT; } + + void SetSecondaryCacheCompatible(bool compat) { + if (compat) { + im_flags |= IM_IS_SECONDARY_CACHE_COMPATIBLE; + } else { + im_flags &= ~IM_IS_SECONDARY_CACHE_COMPATIBLE; + } + } + + void SetIsPending(bool pending) { + if (pending) { + im_flags |= IM_IS_PENDING; + } else { + im_flags &= ~IM_IS_PENDING; + } + } + + void SetIsInSecondaryCache(bool is_in_secondary_cache) { + if (is_in_secondary_cache) { + im_flags |= IM_IS_IN_SECONDARY_CACHE; + } else { + im_flags &= ~IM_IS_IN_SECONDARY_CACHE; + } + } + + void SetIsStandalone(bool is_standalone) { + if (is_standalone) { + im_flags |= IM_IS_STANDALONE; + } else { + im_flags &= ~IM_IS_STANDALONE; + } + } + + void Free() { + assert(refs == 0); + + if (!IsSecondaryCacheCompatible() && info_.deleter) { + (*info_.deleter)(key(), value); + } else if (IsSecondaryCacheCompatible()) { + if (IsPending()) { + assert(sec_handle != nullptr); + SecondaryCacheResultHandle* tmp_sec_handle = sec_handle; + tmp_sec_handle->Wait(); + value = tmp_sec_handle->Value(); + delete tmp_sec_handle; + } + if (value) { + (*info_.helper->del_cb)(key(), value); + } + } + + free(this); + } + + inline size_t CalcuMetaCharge( + CacheMetadataChargePolicy metadata_charge_policy) const { + if (metadata_charge_policy != kFullChargeCacheMetadata) { + return 0; + } else { +#ifdef ROCKSDB_MALLOC_USABLE_SIZE + return malloc_usable_size( + const_cast<void*>(static_cast<const void*>(this))); +#else + // This is the size that is used when a new handle is created. + return sizeof(LRUHandle) - 1 + key_length; +#endif + } + } + + // Calculate the memory usage by metadata. + inline void CalcTotalCharge( + size_t charge, CacheMetadataChargePolicy metadata_charge_policy) { + total_charge = charge + CalcuMetaCharge(metadata_charge_policy); + } + + inline size_t GetCharge( + CacheMetadataChargePolicy metadata_charge_policy) const { + size_t meta_charge = CalcuMetaCharge(metadata_charge_policy); + assert(total_charge >= meta_charge); + return total_charge - meta_charge; + } +}; + +// We provide our own simple hash table since it removes a whole bunch +// of porting hacks and is also faster than some of the built-in hash +// table implementations in some of the compiler/runtime combinations +// we have tested. E.g., readrandom speeds up by ~5% over the g++ +// 4.4.3's builtin hashtable. +class LRUHandleTable { + public: + explicit LRUHandleTable(int max_upper_hash_bits); + ~LRUHandleTable(); + + LRUHandle* Lookup(const Slice& key, uint32_t hash); + LRUHandle* Insert(LRUHandle* h); + LRUHandle* Remove(const Slice& key, uint32_t hash); + + template <typename T> + void ApplyToEntriesRange(T func, size_t index_begin, size_t index_end) { + for (size_t i = index_begin; i < index_end; i++) { + LRUHandle* h = list_[i]; + while (h != nullptr) { + auto n = h->next_hash; + assert(h->InCache()); + func(h); + h = n; + } + } + } + + int GetLengthBits() const { return length_bits_; } + + size_t GetOccupancyCount() const { return elems_; } + + private: + // Return a pointer to slot that points to a cache entry that + // matches key/hash. If there is no such cache entry, return a + // pointer to the trailing slot in the corresponding linked list. + LRUHandle** FindPointer(const Slice& key, uint32_t hash); + + void Resize(); + + // Number of hash bits (upper because lower bits used for sharding) + // used for table index. Length == 1 << length_bits_ + int length_bits_; + + // The table consists of an array of buckets where each bucket is + // a linked list of cache entries that hash into the bucket. + std::unique_ptr<LRUHandle*[]> list_; + + // Number of elements currently in the table. + uint32_t elems_; + + // Set from max_upper_hash_bits (see constructor). + const int max_length_bits_; +}; + +// A single shard of sharded cache. +class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShardBase { + public: + LRUCacheShard(size_t capacity, bool strict_capacity_limit, + double high_pri_pool_ratio, double low_pri_pool_ratio, + bool use_adaptive_mutex, + CacheMetadataChargePolicy metadata_charge_policy, + int max_upper_hash_bits, SecondaryCache* secondary_cache); + + public: // Type definitions expected as parameter to ShardedCache + using HandleImpl = LRUHandle; + using HashVal = uint32_t; + using HashCref = uint32_t; + + public: // Function definitions expected as parameter to ShardedCache + static inline HashVal ComputeHash(const Slice& key) { + return Lower32of64(GetSliceNPHash64(key)); + } + + // Separate from constructor so caller can easily make an array of LRUCache + // if current usage is more than new capacity, the function will attempt to + // free the needed space. + void SetCapacity(size_t capacity); + + // Set the flag to reject insertion if cache if full. + void SetStrictCapacityLimit(bool strict_capacity_limit); + + // Set percentage of capacity reserved for high-pri cache entries. + void SetHighPriorityPoolRatio(double high_pri_pool_ratio); + + // Set percentage of capacity reserved for low-pri cache entries. + void SetLowPriorityPoolRatio(double low_pri_pool_ratio); + + // Like Cache methods, but with an extra "hash" parameter. + inline Status Insert(const Slice& key, uint32_t hash, void* value, + size_t charge, Cache::DeleterFn deleter, + LRUHandle** handle, Cache::Priority priority) { + return Insert(key, hash, value, charge, deleter, nullptr, handle, priority); + } + inline Status Insert(const Slice& key, uint32_t hash, void* value, + const Cache::CacheItemHelper* helper, size_t charge, + LRUHandle** handle, Cache::Priority priority) { + assert(helper); + return Insert(key, hash, value, charge, nullptr, helper, handle, priority); + } + // If helper_cb is null, the values of the following arguments don't matter. + LRUHandle* Lookup(const Slice& key, uint32_t hash, + const Cache::CacheItemHelper* helper, + const Cache::CreateCallback& create_cb, + Cache::Priority priority, bool wait, Statistics* stats); + inline LRUHandle* Lookup(const Slice& key, uint32_t hash) { + return Lookup(key, hash, nullptr, nullptr, Cache::Priority::LOW, true, + nullptr); + } + bool Release(LRUHandle* handle, bool useful, bool erase_if_last_ref); + bool IsReady(LRUHandle* /*handle*/); + void Wait(LRUHandle* /*handle*/) {} + bool Ref(LRUHandle* handle); + void Erase(const Slice& key, uint32_t hash); + + // Although in some platforms the update of size_t is atomic, to make sure + // GetUsage() and GetPinnedUsage() work correctly under any platform, we'll + // protect them with mutex_. + + size_t GetUsage() const; + size_t GetPinnedUsage() const; + size_t GetOccupancyCount() const; + size_t GetTableAddressCount() const; + + void ApplyToSomeEntries( + const std::function<void(const Slice& key, void* value, size_t charge, + DeleterFn deleter)>& callback, + size_t average_entries_per_lock, size_t* state); + + void EraseUnRefEntries(); + + public: // other function definitions + void TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri, + LRUHandle** lru_bottom_pri); + + // Retrieves number of elements in LRU, for unit test purpose only. + // Not threadsafe. + size_t TEST_GetLRUSize(); + + // Retrieves high pri pool ratio + double GetHighPriPoolRatio(); + + // Retrieves low pri pool ratio + double GetLowPriPoolRatio(); + + void AppendPrintableOptions(std::string& /*str*/) const; + + private: + friend class LRUCache; + // Insert an item into the hash table and, if handle is null, insert into + // the LRU list. Older items are evicted as necessary. If the cache is full + // and free_handle_on_fail is true, the item is deleted and handle is set to + // nullptr. + Status InsertItem(LRUHandle* item, LRUHandle** handle, + bool free_handle_on_fail); + Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge, + DeleterFn deleter, const Cache::CacheItemHelper* helper, + LRUHandle** handle, Cache::Priority priority); + // Promote an item looked up from the secondary cache to the LRU cache. + // The item may be still in the secondary cache. + // It is only inserted into the hash table and not the LRU list, and only + // if the cache is not at full capacity, as is the case during Insert. The + // caller should hold a reference on the LRUHandle. When the caller releases + // the last reference, the item is added to the LRU list. + // The item is promoted to the high pri or low pri pool as specified by the + // caller in Lookup. + void Promote(LRUHandle* e); + void LRU_Remove(LRUHandle* e); + void LRU_Insert(LRUHandle* e); + + // Overflow the last entry in high-pri pool to low-pri pool until size of + // high-pri pool is no larger than the size specify by high_pri_pool_pct. + void MaintainPoolSize(); + + // Free some space following strict LRU policy until enough space + // to hold (usage_ + charge) is freed or the lru list is empty + // This function is not thread safe - it needs to be executed while + // holding the mutex_. + void EvictFromLRU(size_t charge, autovector<LRUHandle*>* deleted); + + // Try to insert the evicted handles into the secondary cache. + void TryInsertIntoSecondaryCache(autovector<LRUHandle*> evicted_handles); + + // Initialized before use. + size_t capacity_; + + // Memory size for entries in high-pri pool. + size_t high_pri_pool_usage_; + + // Memory size for entries in low-pri pool. + size_t low_pri_pool_usage_; + + // Whether to reject insertion if cache reaches its full capacity. + bool strict_capacity_limit_; + + // Ratio of capacity reserved for high priority cache entries. + double high_pri_pool_ratio_; + + // High-pri pool size, equals to capacity * high_pri_pool_ratio. + // Remember the value to avoid recomputing each time. + double high_pri_pool_capacity_; + + // Ratio of capacity reserved for low priority cache entries. + double low_pri_pool_ratio_; + + // Low-pri pool size, equals to capacity * low_pri_pool_ratio. + // Remember the value to avoid recomputing each time. + double low_pri_pool_capacity_; + + // Dummy head of LRU list. + // lru.prev is newest entry, lru.next is oldest entry. + // LRU contains items which can be evicted, ie reference only by cache + LRUHandle lru_; + + // Pointer to head of low-pri pool in LRU list. + LRUHandle* lru_low_pri_; + + // Pointer to head of bottom-pri pool in LRU list. + LRUHandle* lru_bottom_pri_; + + // ------------^^^^^^^^^^^^^----------- + // Not frequently modified data members + // ------------------------------------ + // + // We separate data members that are updated frequently from the ones that + // are not frequently updated so that they don't share the same cache line + // which will lead into false cache sharing + // + // ------------------------------------ + // Frequently modified data members + // ------------vvvvvvvvvvvvv----------- + LRUHandleTable table_; + + // Memory size for entries residing in the cache. + size_t usage_; + + // Memory size for entries residing only in the LRU list. + size_t lru_usage_; + + // mutex_ protects the following state. + // We don't count mutex_ as the cache's internal state so semantically we + // don't mind mutex_ invoking the non-const actions. + mutable DMutex mutex_; + + // Owned by LRUCache + SecondaryCache* secondary_cache_; +}; + +class LRUCache +#ifdef NDEBUG + final +#endif + : public ShardedCache<LRUCacheShard> { + public: + LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit, + double high_pri_pool_ratio, double low_pri_pool_ratio, + std::shared_ptr<MemoryAllocator> memory_allocator = nullptr, + bool use_adaptive_mutex = kDefaultToAdaptiveMutex, + CacheMetadataChargePolicy metadata_charge_policy = + kDontChargeCacheMetadata, + std::shared_ptr<SecondaryCache> secondary_cache = nullptr); + const char* Name() const override { return "LRUCache"; } + void* Value(Handle* handle) override; + size_t GetCharge(Handle* handle) const override; + DeleterFn GetDeleter(Handle* handle) const override; + void WaitAll(std::vector<Handle*>& handles) override; + + // Retrieves number of elements in LRU, for unit test purpose only. + size_t TEST_GetLRUSize(); + // Retrieves high pri pool ratio. + double GetHighPriPoolRatio(); + + void AppendPrintableOptions(std::string& str) const override; + + private: + std::shared_ptr<SecondaryCache> secondary_cache_; +}; + +} // namespace lru_cache + +using LRUCache = lru_cache::LRUCache; +using LRUHandle = lru_cache::LRUHandle; +using LRUCacheShard = lru_cache::LRUCacheShard; + +} // namespace ROCKSDB_NAMESPACE |