diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
commit | 19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch) | |
tree | 42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/rocksdb/cache/lru_cache.h | |
parent | Initial commit. (diff) | |
download | ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip |
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/rocksdb/cache/lru_cache.h')
-rw-r--r-- | src/rocksdb/cache/lru_cache.h | 339 |
1 files changed, 339 insertions, 0 deletions
diff --git a/src/rocksdb/cache/lru_cache.h b/src/rocksdb/cache/lru_cache.h new file mode 100644 index 000000000..827e0bece --- /dev/null +++ b/src/rocksdb/cache/lru_cache.h @@ -0,0 +1,339 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +#pragma once + +#include <string> + +#include "cache/sharded_cache.h" + +#include "port/malloc.h" +#include "port/port.h" +#include "util/autovector.h" + +namespace ROCKSDB_NAMESPACE { + +// LRU cache implementation. This class is not thread-safe. + +// An entry is a variable length heap-allocated structure. +// Entries are referenced by cache and/or by any external entity. +// The cache keeps all its entries in a hash table. Some elements +// are also stored on LRU list. +// +// LRUHandle can be in these states: +// 1. Referenced externally AND in hash table. +// In that case the entry is *not* in the LRU list +// (refs >= 1 && in_cache == true) +// 2. Not referenced externally AND in hash table. +// In that case the entry is in the LRU list and can be freed. +// (refs == 0 && in_cache == true) +// 3. Referenced externally AND not in hash table. +// In that case the entry is not in the LRU list and not in hash table. +// The entry can be freed when refs becomes 0. +// (refs >= 1 && in_cache == false) +// +// All newly created LRUHandles are in state 1. If you call +// LRUCacheShard::Release on entry in state 1, it will go into state 2. +// To move from state 1 to state 3, either call LRUCacheShard::Erase or +// LRUCacheShard::Insert with the same key (but possibly different value). +// To move from state 2 to state 1, use LRUCacheShard::Lookup. +// Before destruction, make sure that no handles are in state 1. This means +// that any successful LRUCacheShard::Lookup/LRUCacheShard::Insert have a +// matching LRUCache::Release (to move into state 2) or LRUCacheShard::Erase +// (to move into state 3). + +struct LRUHandle { + void* value; + void (*deleter)(const Slice&, void* value); + LRUHandle* next_hash; + LRUHandle* next; + LRUHandle* prev; + size_t charge; // TODO(opt): Only allow uint32_t? + size_t key_length; + // The hash of key(). Used for fast sharding and comparisons. + uint32_t hash; + // The number of external refs to this entry. The cache itself is not counted. + uint32_t refs; + + enum Flags : uint8_t { + // Whether this entry is referenced by the hash table. + IN_CACHE = (1 << 0), + // Whether this entry is high priority entry. + IS_HIGH_PRI = (1 << 1), + // Whether this entry is in high-pri pool. + IN_HIGH_PRI_POOL = (1 << 2), + // Wwhether this entry has had any lookups (hits). + HAS_HIT = (1 << 3), + }; + + uint8_t flags; + + // Beginning of the key (MUST BE THE LAST FIELD IN THIS STRUCT!) + char key_data[1]; + + Slice key() const { return Slice(key_data, key_length); } + + // Increase the reference count by 1. + void Ref() { refs++; } + + // Just reduce the reference count by 1. Return true if it was last reference. + bool Unref() { + assert(refs > 0); + refs--; + return refs == 0; + } + + // Return true if there are external refs, false otherwise. + bool HasRefs() const { return refs > 0; } + + bool InCache() const { return flags & IN_CACHE; } + bool IsHighPri() const { return flags & IS_HIGH_PRI; } + bool InHighPriPool() const { return flags & IN_HIGH_PRI_POOL; } + bool HasHit() const { return flags & HAS_HIT; } + + void SetInCache(bool in_cache) { + if (in_cache) { + flags |= IN_CACHE; + } else { + flags &= ~IN_CACHE; + } + } + + void SetPriority(Cache::Priority priority) { + if (priority == Cache::Priority::HIGH) { + flags |= IS_HIGH_PRI; + } else { + flags &= ~IS_HIGH_PRI; + } + } + + void SetInHighPriPool(bool in_high_pri_pool) { + if (in_high_pri_pool) { + flags |= IN_HIGH_PRI_POOL; + } else { + flags &= ~IN_HIGH_PRI_POOL; + } + } + + void SetHit() { flags |= HAS_HIT; } + + void Free() { + assert(refs == 0); + if (deleter) { + (*deleter)(key(), value); + } + delete[] reinterpret_cast<char*>(this); + } + + // Caclculate the memory usage by metadata + inline size_t CalcTotalCharge( + CacheMetadataChargePolicy metadata_charge_policy) { + size_t meta_charge = 0; + if (metadata_charge_policy == kFullChargeCacheMetadata) { +#ifdef ROCKSDB_MALLOC_USABLE_SIZE + meta_charge += malloc_usable_size(static_cast<void*>(this)); +#else + // This is the size that is used when a new handle is created + meta_charge += sizeof(LRUHandle) - 1 + key_length; +#endif + } + return charge + meta_charge; + } +}; + +// We provide our own simple hash table since it removes a whole bunch +// of porting hacks and is also faster than some of the built-in hash +// table implementations in some of the compiler/runtime combinations +// we have tested. E.g., readrandom speeds up by ~5% over the g++ +// 4.4.3's builtin hashtable. +class LRUHandleTable { + public: + LRUHandleTable(); + ~LRUHandleTable(); + + LRUHandle* Lookup(const Slice& key, uint32_t hash); + LRUHandle* Insert(LRUHandle* h); + LRUHandle* Remove(const Slice& key, uint32_t hash); + + template <typename T> + void ApplyToAllCacheEntries(T func) { + for (uint32_t i = 0; i < length_; i++) { + LRUHandle* h = list_[i]; + while (h != nullptr) { + auto n = h->next_hash; + assert(h->InCache()); + func(h); + h = n; + } + } + } + + private: + // Return a pointer to slot that points to a cache entry that + // matches key/hash. If there is no such cache entry, return a + // pointer to the trailing slot in the corresponding linked list. + LRUHandle** FindPointer(const Slice& key, uint32_t hash); + + void Resize(); + + // The table consists of an array of buckets where each bucket is + // a linked list of cache entries that hash into the bucket. + LRUHandle** list_; + uint32_t length_; + uint32_t elems_; +}; + +// A single shard of sharded cache. +class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard { + public: + LRUCacheShard(size_t capacity, bool strict_capacity_limit, + double high_pri_pool_ratio, bool use_adaptive_mutex, + CacheMetadataChargePolicy metadata_charge_policy); + virtual ~LRUCacheShard() override = default; + + // Separate from constructor so caller can easily make an array of LRUCache + // if current usage is more than new capacity, the function will attempt to + // free the needed space + virtual void SetCapacity(size_t capacity) override; + + // Set the flag to reject insertion if cache if full. + virtual void SetStrictCapacityLimit(bool strict_capacity_limit) override; + + // Set percentage of capacity reserved for high-pri cache entries. + void SetHighPriorityPoolRatio(double high_pri_pool_ratio); + + // Like Cache methods, but with an extra "hash" parameter. + virtual Status Insert(const Slice& key, uint32_t hash, void* value, + size_t charge, + void (*deleter)(const Slice& key, void* value), + Cache::Handle** handle, + Cache::Priority priority) override; + virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash) override; + virtual bool Ref(Cache::Handle* handle) override; + virtual bool Release(Cache::Handle* handle, + bool force_erase = false) override; + virtual void Erase(const Slice& key, uint32_t hash) override; + + // Although in some platforms the update of size_t is atomic, to make sure + // GetUsage() and GetPinnedUsage() work correctly under any platform, we'll + // protect them with mutex_. + + virtual size_t GetUsage() const override; + virtual size_t GetPinnedUsage() const override; + + virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t), + bool thread_safe) override; + + virtual void EraseUnRefEntries() override; + + virtual std::string GetPrintableOptions() const override; + + void TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri); + + // Retrieves number of elements in LRU, for unit test purpose only + // not threadsafe + size_t TEST_GetLRUSize(); + + // Retrives high pri pool ratio + double GetHighPriPoolRatio(); + + private: + void LRU_Remove(LRUHandle* e); + void LRU_Insert(LRUHandle* e); + + // Overflow the last entry in high-pri pool to low-pri pool until size of + // high-pri pool is no larger than the size specify by high_pri_pool_pct. + void MaintainPoolSize(); + + // Free some space following strict LRU policy until enough space + // to hold (usage_ + charge) is freed or the lru list is empty + // This function is not thread safe - it needs to be executed while + // holding the mutex_ + void EvictFromLRU(size_t charge, autovector<LRUHandle*>* deleted); + + // Initialized before use. + size_t capacity_; + + // Memory size for entries in high-pri pool. + size_t high_pri_pool_usage_; + + // Whether to reject insertion if cache reaches its full capacity. + bool strict_capacity_limit_; + + // Ratio of capacity reserved for high priority cache entries. + double high_pri_pool_ratio_; + + // High-pri pool size, equals to capacity * high_pri_pool_ratio. + // Remember the value to avoid recomputing each time. + double high_pri_pool_capacity_; + + // Dummy head of LRU list. + // lru.prev is newest entry, lru.next is oldest entry. + // LRU contains items which can be evicted, ie reference only by cache + LRUHandle lru_; + + // Pointer to head of low-pri pool in LRU list. + LRUHandle* lru_low_pri_; + + // ------------^^^^^^^^^^^^^----------- + // Not frequently modified data members + // ------------------------------------ + // + // We separate data members that are updated frequently from the ones that + // are not frequently updated so that they don't share the same cache line + // which will lead into false cache sharing + // + // ------------------------------------ + // Frequently modified data members + // ------------vvvvvvvvvvvvv----------- + LRUHandleTable table_; + + // Memory size for entries residing in the cache + size_t usage_; + + // Memory size for entries residing only in the LRU list + size_t lru_usage_; + + // mutex_ protects the following state. + // We don't count mutex_ as the cache's internal state so semantically we + // don't mind mutex_ invoking the non-const actions. + mutable port::Mutex mutex_; +}; + +class LRUCache +#ifdef NDEBUG + final +#endif + : public ShardedCache { + public: + LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit, + double high_pri_pool_ratio, + std::shared_ptr<MemoryAllocator> memory_allocator = nullptr, + bool use_adaptive_mutex = kDefaultToAdaptiveMutex, + CacheMetadataChargePolicy metadata_charge_policy = + kDontChargeCacheMetadata); + virtual ~LRUCache(); + virtual const char* Name() const override { return "LRUCache"; } + virtual CacheShard* GetShard(int shard) override; + virtual const CacheShard* GetShard(int shard) const override; + virtual void* Value(Handle* handle) override; + virtual size_t GetCharge(Handle* handle) const override; + virtual uint32_t GetHash(Handle* handle) const override; + virtual void DisownData() override; + + // Retrieves number of elements in LRU, for unit test purpose only + size_t TEST_GetLRUSize(); + // Retrives high pri pool ratio + double GetHighPriPoolRatio(); + + private: + LRUCacheShard* shards_ = nullptr; + int num_shards_ = 0; +}; + +} // namespace ROCKSDB_NAMESPACE |