path: root/src/include/mempool.h
diff options
Diffstat (limited to 'src/include/mempool.h')
1 files changed, 548 insertions, 0 deletions
diff --git a/src/include/mempool.h b/src/include/mempool.h
new file mode 100644
index 000000000..fe84f3b8f
--- /dev/null
+++ b/src/include/mempool.h
@@ -0,0 +1,548 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2016 Allen Samuels <>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+#include <cstddef>
+#include <map>
+#include <unordered_map>
+#include <set>
+#include <vector>
+#include <list>
+#include <mutex>
+#include <typeinfo>
+#include <boost/container/flat_set.hpp>
+#include <boost/container/flat_map.hpp>
+#include "common/Formatter.h"
+#include "common/ceph_atomic.h"
+#include "include/ceph_assert.h"
+#include "include/compact_map.h"
+#include "include/compact_set.h"
+#include "include/compat.h"
+Memory Pools
+A memory pool is a method for accounting the consumption of memory of
+a set of containers.
+Memory pools are statically declared (see pool_index_t).
+Each memory pool tracks the number of bytes and items it contains.
+Allocators can be declared and associated with a type so that they are
+tracked independently of the pool total. This additional accounting
+is optional and only incurs an overhead if the debugging is enabled at
+runtime. This allows developers to see what types are consuming the
+pool resources.
+Using memory pools is very easy.
+To create a new memory pool, simply add a new name into the list of
+memory pools that's defined in "DEFINE_MEMORY_POOLS_HELPER". That's
+it. :)
+For each memory pool that's created a C++ namespace is also
+automatically created (name is same as in DEFINE_MEMORY_POOLS_HELPER).
+That namespace contains a set of common STL containers that are predefined
+with the appropriate allocators.
+Thus for mempool "osd" we have automatically available to us:
+ mempool::osd::map
+ mempool::osd::multimap
+ mempool::osd::set
+ mempool::osd::multiset
+ mempool::osd::list
+ mempool::osd::vector
+ mempool::osd::unordered_map
+Putting objects in a mempool
+In order to use a memory pool with a particular type, a few additional
+declarations are needed.
+For a class:
+ struct Foo {
+ ...
+ };
+Then, in an appropriate .cc file,
+The second argument can generally be identical to the first, except
+when the type contains a nested scope. For example, for
+BlueStore::Onode, we need to do
+ MEMPOOL_DEFINE_OBJECT_FACTORY(BlueStore::Onode, bluestore_onode,
+ bluestore_meta);
+(This is just because we need to name some static variables and we
+can't use :: in a variable name.)
+XXX Note: the new operator hard-codes the allocation size to the size of the
+object given in MEMPOOL_DEFINE_OBJECT_FACTORY. For this reason, you cannot
+incorporate mempools into a base class without also defining a helper/factory
+for the child class as well (as the base class is usually smaller than the
+child class).
+In order to use the STL containers, simply use the namespaced variant
+of the container type. For example,
+ mempool::osd::map<int> myvec;
+The simplest way to interrogate the process is with
+ Formater *f = ...
+ mempool::dump(f);
+This will dump information about *all* memory pools. When debug mode
+is enabled, the runtime complexity of dump is O(num_shards *
+num_types). When debug name is disabled it is O(num_shards).
+You can also interrogate a specific pool programmatically with
+ size_t bytes = mempool::unittest_2::allocated_bytes();
+ size_t items = mempool::unittest_2::allocated_items();
+The runtime complexity is O(num_shards).
+Note that you cannot easily query per-type, primarily because debug
+mode is optional and you should not rely on that information being
+namespace mempool {
+// --------------------------------------------------------------
+// define memory pools
+ f(bloom_filter) \
+ f(bluestore_alloc) \
+ f(bluestore_cache_data) \
+ f(bluestore_cache_onode) \
+ f(bluestore_cache_meta) \
+ f(bluestore_cache_other) \
+ f(bluestore_Buffer) \
+ f(bluestore_Extent) \
+ f(bluestore_Blob) \
+ f(bluestore_SharedBlob) \
+ f(bluestore_inline_bl) \
+ f(bluestore_fsck) \
+ f(bluestore_txc) \
+ f(bluestore_writing_deferred) \
+ f(bluestore_writing) \
+ f(bluefs) \
+ f(bluefs_file_reader) \
+ f(bluefs_file_writer) \
+ f(buffer_anon) \
+ f(buffer_meta) \
+ f(osd) \
+ f(osd_mapbl) \
+ f(osd_pglog) \
+ f(osdmap) \
+ f(osdmap_mapping) \
+ f(pgmap) \
+ f(mds_co) \
+ f(unittest_1) \
+ f(unittest_2)
+// give them integer ids
+#define P(x) mempool_##x,
+enum pool_index_t {
+ num_pools // Must be last.
+#undef P
+extern bool debug_mode;
+extern void set_debug_mode(bool d);
+// --------------------------------------------------------------
+class pool_t;
+// we shard pool stats across many shard_t's to reduce the amount
+// of cacheline ping pong.
+enum {
+ num_shard_bits = 5
+enum {
+ num_shards = 1 << num_shard_bits
+// align shard to a cacheline
+struct shard_t {
+ ceph::atomic<size_t> bytes = {0};
+ ceph::atomic<size_t> items = {0};
+ char __padding[128 - sizeof(ceph::atomic<size_t>)*2];
+} __attribute__ ((aligned (128)));
+static_assert(sizeof(shard_t) == 128, "shard_t should be cacheline-sized");
+struct stats_t {
+ ssize_t items = 0;
+ ssize_t bytes = 0;
+ void dump(ceph::Formatter *f) const {
+ f->dump_int("items", items);
+ f->dump_int("bytes", bytes);
+ }
+ stats_t& operator+=(const stats_t& o) {
+ items += o.items;
+ bytes += o.bytes;
+ return *this;
+ }
+pool_t& get_pool(pool_index_t ix);
+const char *get_pool_name(pool_index_t ix);
+struct type_t {
+ const char *type_name;
+ size_t item_size;
+ ceph::atomic<ssize_t> items = {0}; // signed
+struct type_info_hash {
+ std::size_t operator()(const std::type_info& k) const {
+ return k.hash_code();
+ }
+class pool_t {
+ shard_t shard[num_shards];
+ mutable std::mutex lock; // only used for types list
+ std::unordered_map<const char *, type_t> type_map;
+ //
+ // How much this pool consumes. O(<num_shards>)
+ //
+ size_t allocated_bytes() const;
+ size_t allocated_items() const;
+ void adjust_count(ssize_t items, ssize_t bytes);
+ static size_t pick_a_shard_int() {
+ // Dirt cheap, see:
+ //
+ size_t me = (size_t)pthread_self();
+ size_t i = (me >> CEPH_PAGE_SHIFT) & ((1 << num_shard_bits) - 1);
+ return i;
+ }
+ shard_t* pick_a_shard() {
+ size_t i = pick_a_shard_int();
+ return &shard[i];
+ }
+ type_t *get_type(const std::type_info& ti, size_t size) {
+ std::lock_guard<std::mutex> l(lock);
+ auto p = type_map.find(;
+ if (p != type_map.end()) {
+ return &p->second;
+ }
+ type_t &t = type_map[];
+ t.type_name =;
+ t.item_size = size;
+ return &t;
+ }
+ // get pool stats. by_type is not populated if !debug
+ void get_stats(stats_t *total,
+ std::map<std::string, stats_t> *by_type) const;
+ void dump(ceph::Formatter *f, stats_t *ptotal=0) const;
+void dump(ceph::Formatter *f);
+// STL allocator for use with containers. All actual state
+// is stored in the static pool_allocator_base_t, which saves us from
+// passing the allocator to container constructors.
+template<pool_index_t pool_ix, typename T>
+class pool_allocator {
+ pool_t *pool;
+ type_t *type = nullptr;
+ typedef pool_allocator<pool_ix, T> allocator_type;
+ typedef T value_type;
+ typedef value_type *pointer;
+ typedef const value_type * const_pointer;
+ typedef value_type& reference;
+ typedef const value_type& const_reference;
+ typedef std::size_t size_type;
+ typedef std::ptrdiff_t difference_type;
+ template<typename U> struct rebind {
+ typedef pool_allocator<pool_ix,U> other;
+ };
+ void init(bool force_register) {
+ pool = &get_pool(pool_ix);
+ if (debug_mode || force_register) {
+ type = pool->get_type(typeid(T), sizeof(T));
+ }
+ }
+ pool_allocator(bool force_register=false) {
+ init(force_register);
+ }
+ template<typename U>
+ pool_allocator(const pool_allocator<pool_ix,U>&) {
+ init(false);
+ }
+ T* allocate(size_t n, void *p = nullptr) {
+ size_t total = sizeof(T) * n;
+ shard_t *shard = pool->pick_a_shard();
+ shard->bytes += total;
+ shard->items += n;
+ if (type) {
+ type->items += n;
+ }
+ T* r = reinterpret_cast<T*>(new char[total]);
+ return r;
+ }
+ void deallocate(T* p, size_t n) {
+ size_t total = sizeof(T) * n;
+ shard_t *shard = pool->pick_a_shard();
+ shard->bytes -= total;
+ shard->items -= n;
+ if (type) {
+ type->items -= n;
+ }
+ delete[] reinterpret_cast<char*>(p);
+ }
+ T* allocate_aligned(size_t n, size_t align, void *p = nullptr) {
+ size_t total = sizeof(T) * n;
+ shard_t *shard = pool->pick_a_shard();
+ shard->bytes += total;
+ shard->items += n;
+ if (type) {
+ type->items += n;
+ }
+ char *ptr;
+ int rc = ::posix_memalign((void**)(void*)&ptr, align, total);
+ if (rc)
+ throw std::bad_alloc();
+ T* r = reinterpret_cast<T*>(ptr);
+ return r;
+ }
+ void deallocate_aligned(T* p, size_t n) {
+ size_t total = sizeof(T) * n;
+ shard_t *shard = pool->pick_a_shard();
+ shard->bytes -= total;
+ shard->items -= n;
+ if (type) {
+ type->items -= n;
+ }
+ aligned_free(p);
+ }
+ void destroy(T* p) {
+ p->~T();
+ }
+ template<class U>
+ void destroy(U *p) {
+ p->~U();
+ }
+ void construct(T* p, const T& val) {
+ ::new ((void *)p) T(val);
+ }
+ template<class U, class... Args> void construct(U* p,Args&&... args) {
+ ::new((void *)p) U(std::forward<Args>(args)...);
+ }
+ bool operator==(const pool_allocator&) const { return true; }
+ bool operator!=(const pool_allocator&) const { return false; }
+// Namespace mempool
+#define P(x) \
+ namespace x { \
+ static const mempool::pool_index_t id = mempool::mempool_##x; \
+ template<typename v> \
+ using pool_allocator = mempool::pool_allocator<id,v>; \
+ \
+ using string = std::basic_string<char,std::char_traits<char>, \
+ pool_allocator<char>>; \
+ \
+ template<typename k,typename v, typename cmp = std::less<k> > \
+ using map = std::map<k, v, cmp, \
+ pool_allocator<std::pair<const k,v>>>; \
+ \
+ template<typename k,typename v, typename cmp = std::less<k> > \
+ using compact_map = compact_map<k, v, cmp, \
+ pool_allocator<std::pair<const k,v>>>; \
+ \
+ template<typename k,typename v, typename cmp = std::less<k> > \
+ using compact_multimap = compact_multimap<k, v, cmp, \
+ pool_allocator<std::pair<const k,v>>>; \
+ \
+ template<typename k, typename cmp = std::less<k> > \
+ using compact_set = compact_set<k, cmp, pool_allocator<k>>; \
+ \
+ template<typename k,typename v, typename cmp = std::less<k> > \
+ using multimap = std::multimap<k,v,cmp, \
+ pool_allocator<std::pair<const k, \
+ v>>>; \
+ \
+ template<typename k, typename cmp = std::less<k> > \
+ using set = std::set<k,cmp,pool_allocator<k>>; \
+ \
+ template<typename k, typename cmp = std::less<k> > \
+ using flat_set = boost::container::flat_set<k,cmp,pool_allocator<k>>; \
+ \
+ template<typename k, typename v, typename cmp = std::less<k> > \
+ using flat_map = boost::container::flat_map<k,v,cmp, \
+ pool_allocator<std::pair<k,v>>>; \
+ \
+ template<typename v> \
+ using list = std::list<v,pool_allocator<v>>; \
+ \
+ template<typename v> \
+ using vector = std::vector<v,pool_allocator<v>>; \
+ \
+ template<typename k, typename v, \
+ typename h=std::hash<k>, \
+ typename eq = std::equal_to<k>> \
+ using unordered_map = \
+ std::unordered_map<k,v,h,eq,pool_allocator<std::pair<const k,v>>>;\
+ \
+ inline size_t allocated_bytes() { \
+ return mempool::get_pool(id).allocated_bytes(); \
+ } \
+ inline size_t allocated_items() { \
+ return mempool::get_pool(id).allocated_items(); \
+ } \
+ };
+#undef P
+// the elements allocated by mempool is in the same memory space as the ones
+// allocated by the default allocator. so compare them in an efficient way:
+// libstdc++'s std::equal is specialized to use memcmp if T is integer or
+// pointer. this is good enough for our usecase. use
+// std::is_trivially_copyable<T> to expand the support to more types if
+// nececssary.
+template<typename T, mempool::pool_index_t pool_index>
+bool operator==(const std::vector<T, std::allocator<T>>& lhs,
+ const std::vector<T, mempool::pool_allocator<pool_index, T>>& rhs)
+ return (lhs.size() == rhs.size() &&
+ std::equal(lhs.begin(), lhs.end(), rhs.begin()));
+template<typename T, mempool::pool_index_t pool_index>
+bool operator!=(const std::vector<T, std::allocator<T>>& lhs,
+ const std::vector<T, mempool::pool_allocator<pool_index, T>>& rhs)
+ return !(lhs == rhs);
+template<typename T, mempool::pool_index_t pool_index>
+bool operator==(const std::vector<T, mempool::pool_allocator<pool_index, T>>& lhs,
+ const std::vector<T, std::allocator<T>>& rhs)
+ return rhs == lhs;
+template<typename T, mempool::pool_index_t pool_index>
+bool operator!=(const std::vector<T, mempool::pool_allocator<pool_index, T>>& lhs,
+ const std::vector<T, std::allocator<T>>& rhs)
+ return !(lhs == rhs);
+// Use this for any type that is contained by a container (unless it
+// is a class you defined; see below).
+#define MEMPOOL_DECLARE_FACTORY(obj, factoryname, pool) \
+ namespace mempool { \
+ namespace pool { \
+ extern pool_allocator<obj> alloc_##factoryname; \
+ } \
+ }
+#define MEMPOOL_DEFINE_FACTORY(obj, factoryname, pool) \
+ namespace mempool { \
+ namespace pool { \
+ pool_allocator<obj> alloc_##factoryname = {true}; \
+ } \
+ }
+// Use this for each class that belongs to a mempool. For example,
+// class T {
+// ...
+// };
+ void *operator new(size_t size); \
+ void *operator new[](size_t size) noexcept { \
+ ceph_abort_msg("no array new"); \
+ return nullptr; } \
+ void operator delete(void *); \
+ void operator delete[](void *) { ceph_abort_msg("no array delete"); }
+// Use this in some particular .cc file to match each class with a
+#define MEMPOOL_DEFINE_OBJECT_FACTORY(obj,factoryname,pool) \
+ MEMPOOL_DEFINE_FACTORY(obj, factoryname, pool) \
+ void *obj::operator new(size_t size) { \
+ return mempool::pool::alloc_##factoryname.allocate(1); \
+ } \
+ void obj::operator delete(void *p) { \
+ return mempool::pool::alloc_##factoryname.deallocate((obj*)p, 1); \
+ }