diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
commit | 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch) | |
tree | e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/os/bluestore/fastbmap_allocator_impl.h | |
parent | Initial commit. (diff) | |
download | ceph-upstream.tar.xz ceph-upstream.zip |
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/os/bluestore/fastbmap_allocator_impl.h')
-rwxr-xr-x | src/os/bluestore/fastbmap_allocator_impl.h | 833 |
1 files changed, 833 insertions, 0 deletions
diff --git a/src/os/bluestore/fastbmap_allocator_impl.h b/src/os/bluestore/fastbmap_allocator_impl.h new file mode 100755 index 00000000..52a1edee --- /dev/null +++ b/src/os/bluestore/fastbmap_allocator_impl.h @@ -0,0 +1,833 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Bitmap based in-memory allocator implementation. + * Author: Igor Fedotov, ifedotov@suse.com + * + */ + +#ifndef __FAST_BITMAP_ALLOCATOR_IMPL_H +#define __FAST_BITMAP_ALLOCATOR_IMPL_H +#include "include/intarith.h" + +#include <vector> +#include <algorithm> +#include <mutex> + +typedef uint64_t slot_t; + +#ifdef NON_CEPH_BUILD +#include <assert.h> +struct interval_t +{ + uint64_t offset = 0; + uint64_t length = 0; + + interval_t() {} + interval_t(uint64_t o, uint64_t l) : offset(o), length(l) {} + interval_t(const interval_t &ext) : + offset(ext.offset), length(ext.length) {} +}; +typedef std::vector<interval_t> interval_vector_t; +typedef std::vector<slot_t> slot_vector_t; +#else +#include "include/ceph_assert.h" +#include "common/likely.h" +#include "os/bluestore/bluestore_types.h" +#include "include/mempool.h" +#include "common/ceph_mutex.h" + +typedef bluestore_interval_t<uint64_t, uint64_t> interval_t; +typedef PExtentVector interval_vector_t; + +typedef mempool::bluestore_alloc::vector<slot_t> slot_vector_t; + +#endif + +// fitting into cache line on x86_64 +static const size_t slots_per_slotset = 8; // 8 slots per set +static const size_t slotset_bytes = sizeof(slot_t) * slots_per_slotset; +static const size_t bits_per_slot = sizeof(slot_t) * 8; +static const size_t bits_per_slotset = slotset_bytes * 8; +static const slot_t all_slot_set = 0xffffffffffffffff; +static const slot_t all_slot_clear = 0; + +inline size_t find_next_set_bit(slot_t slot_val, size_t start_pos) +{ +#ifdef __GNUC__ + if (start_pos == 0) { + start_pos = __builtin_ffsll(slot_val); + return start_pos ? start_pos - 1 : bits_per_slot; + } +#endif + slot_t mask = slot_t(1) << start_pos; + while (start_pos < bits_per_slot && !(slot_val & mask)) { + mask <<= 1; + ++start_pos; + } + return start_pos; +} + + +class AllocatorLevel +{ +protected: + + virtual uint64_t _children_per_slot() const = 0; + virtual uint64_t _level_granularity() const = 0; + +public: + static uint64_t l0_dives; + static uint64_t l0_iterations; + static uint64_t l0_inner_iterations; + static uint64_t alloc_fragments; + static uint64_t alloc_fragments_fast; + static uint64_t l2_allocs; + + virtual ~AllocatorLevel() + {} + + virtual void collect_stats( + std::map<size_t, size_t>& bins_overall) = 0; + +}; + +class AllocatorLevel01 : public AllocatorLevel +{ +protected: + slot_vector_t l0; // set bit means free entry + slot_vector_t l1; + uint64_t l0_granularity = 0; // space per entry + uint64_t l1_granularity = 0; // space per entry + + size_t partial_l1_count = 0; + size_t unalloc_l1_count = 0; + + double get_fragmentation() const { + double res = 0.0; + auto total = unalloc_l1_count + partial_l1_count; + if (total) { + res = double(partial_l1_count) / double(total); + } + return res; + } + + uint64_t _level_granularity() const override + { + return l1_granularity; + } + + inline bool _is_slot_fully_allocated(uint64_t idx) const { + return l1[idx] == all_slot_clear; + } +public: + inline uint64_t get_min_alloc_size() const + { + return l0_granularity; + } + +}; + +template <class T> +class AllocatorLevel02; + +class AllocatorLevel01Loose : public AllocatorLevel01 +{ + enum { + L1_ENTRY_WIDTH = 2, + L1_ENTRY_MASK = (1 << L1_ENTRY_WIDTH) - 1, + L1_ENTRY_FULL = 0x00, + L1_ENTRY_PARTIAL = 0x01, + L1_ENTRY_NOT_USED = 0x02, + L1_ENTRY_FREE = 0x03, + L1_ENTRIES_PER_SLOT = bits_per_slot / L1_ENTRY_WIDTH, //32 + L0_ENTRIES_PER_SLOT = bits_per_slot, // 64 + }; + uint64_t _children_per_slot() const override + { + return L1_ENTRIES_PER_SLOT; + } + + interval_t _get_longest_from_l0(uint64_t pos0, uint64_t pos1, + uint64_t min_length, interval_t* tail) const; + + inline void _fragment_and_emplace(uint64_t max_length, uint64_t offset, + uint64_t len, + interval_vector_t* res) + { + auto it = res->rbegin(); + if (max_length) { + if (it != res->rend() && it->offset + it->length == offset) { + auto l = max_length - it->length; + if (l >= len) { + it->length += len; + return; + } else { + offset += l; + len -= l; + it->length += l; + } + } + + while (len > max_length) { + res->emplace_back(offset, max_length); + offset += max_length; + len -= max_length; + } + res->emplace_back(offset, len); + return; + } + + if (it != res->rend() && it->offset + it->length == offset) { + it->length += len; + } else { + res->emplace_back(offset, len); + } + } + + bool _allocate_l0(uint64_t length, + uint64_t max_length, + uint64_t l0_pos0, uint64_t l0_pos1, + uint64_t* allocated, + interval_vector_t* res) + { + uint64_t d0 = L0_ENTRIES_PER_SLOT; + + ++l0_dives; + + ceph_assert(l0_pos0 < l0_pos1); + ceph_assert(length > *allocated); + ceph_assert(0 == (l0_pos0 % (slots_per_slotset * d0))); + ceph_assert(0 == (l0_pos1 % (slots_per_slotset * d0))); + ceph_assert(((length - *allocated) % l0_granularity) == 0); + + uint64_t need_entries = (length - *allocated) / l0_granularity; + + for (auto idx = l0_pos0 / d0; (idx < l0_pos1 / d0) && (length > *allocated); + ++idx) { + ++l0_iterations; + slot_t& slot_val = l0[idx]; + auto base = idx * d0; + if (slot_val == all_slot_clear) { + continue; + } else if (slot_val == all_slot_set) { + uint64_t to_alloc = std::min(need_entries, d0); + *allocated += to_alloc * l0_granularity; + ++alloc_fragments; + need_entries -= to_alloc; + + _fragment_and_emplace(max_length, base * l0_granularity, + to_alloc * l0_granularity, res); + + if (to_alloc == d0) { + slot_val = all_slot_clear; + } else { + _mark_alloc_l0(base, base + to_alloc); + } + continue; + } + + auto free_pos = find_next_set_bit(slot_val, 0); + ceph_assert(free_pos < bits_per_slot); + auto next_pos = free_pos + 1; + while (next_pos < bits_per_slot && + (next_pos - free_pos) < need_entries) { + ++l0_inner_iterations; + + if (0 == (slot_val & (slot_t(1) << next_pos))) { + auto to_alloc = (next_pos - free_pos); + *allocated += to_alloc * l0_granularity; + ++alloc_fragments; + need_entries -= to_alloc; + _fragment_and_emplace(max_length, (base + free_pos) * l0_granularity, + to_alloc * l0_granularity, res); + _mark_alloc_l0(base + free_pos, base + next_pos); + free_pos = find_next_set_bit(slot_val, next_pos + 1); + next_pos = free_pos + 1; + } else { + ++next_pos; + } + } + if (need_entries && free_pos < bits_per_slot) { + auto to_alloc = std::min(need_entries, d0 - free_pos); + *allocated += to_alloc * l0_granularity; + ++alloc_fragments; + need_entries -= to_alloc; + _fragment_and_emplace(max_length, (base + free_pos) * l0_granularity, + to_alloc * l0_granularity, res); + _mark_alloc_l0(base + free_pos, base + free_pos + to_alloc); + } + } + return _is_empty_l0(l0_pos0, l0_pos1); + } + +protected: + + friend class AllocatorLevel02<AllocatorLevel01Loose>; + + void _init(uint64_t capacity, uint64_t _alloc_unit, bool mark_as_free = true) + { + l0_granularity = _alloc_unit; + // 512 bits at L0 mapped to L1 entry + l1_granularity = l0_granularity * bits_per_slotset; + + // capacity to have slot alignment at l1 + auto aligned_capacity = + p2roundup((int64_t)capacity, + int64_t(l1_granularity * slots_per_slotset * _children_per_slot())); + size_t slot_count = + aligned_capacity / l1_granularity / _children_per_slot(); + // we use set bit(s) as a marker for (partially) free entry + l1.resize(slot_count, mark_as_free ? all_slot_set : all_slot_clear); + + // l0 slot count + size_t slot_count_l0 = aligned_capacity / _alloc_unit / bits_per_slot; + // we use set bit(s) as a marker for (partially) free entry + l0.resize(slot_count_l0, mark_as_free ? all_slot_set : all_slot_clear); + + partial_l1_count = unalloc_l1_count = 0; + if (mark_as_free) { + unalloc_l1_count = slot_count * _children_per_slot(); + auto l0_pos_no_use = p2roundup((int64_t)capacity, (int64_t)l0_granularity) / l0_granularity; + _mark_alloc_l1_l0(l0_pos_no_use, aligned_capacity / l0_granularity); + } + } + + struct search_ctx_t + { + size_t partial_count = 0; + size_t free_count = 0; + uint64_t free_l1_pos = 0; + + uint64_t min_affordable_len = 0; + uint64_t min_affordable_offs = 0; + uint64_t affordable_len = 0; + uint64_t affordable_offs = 0; + + bool fully_processed = false; + + void reset() + { + *this = search_ctx_t(); + } + }; + enum { + NO_STOP, + STOP_ON_EMPTY, + STOP_ON_PARTIAL, + }; + void _analyze_partials(uint64_t pos_start, uint64_t pos_end, + uint64_t length, uint64_t min_length, int mode, + search_ctx_t* ctx); + + void _mark_l1_on_l0(int64_t l0_pos, int64_t l0_pos_end); + void _mark_alloc_l0(int64_t l0_pos_start, int64_t l0_pos_end); + uint64_t _claim_free_to_left_l0(int64_t l0_pos_start); + uint64_t _claim_free_to_right_l0(int64_t l0_pos_start); + + + void _mark_alloc_l1_l0(int64_t l0_pos_start, int64_t l0_pos_end) + { + _mark_alloc_l0(l0_pos_start, l0_pos_end); + l0_pos_start = p2align(l0_pos_start, int64_t(bits_per_slotset)); + l0_pos_end = p2roundup(l0_pos_end, int64_t(bits_per_slotset)); + _mark_l1_on_l0(l0_pos_start, l0_pos_end); + } + + void _mark_free_l0(int64_t l0_pos_start, int64_t l0_pos_end) + { + auto d0 = L0_ENTRIES_PER_SLOT; + + auto pos = l0_pos_start; + slot_t bits = (slot_t)1 << (l0_pos_start % d0); + slot_t* val_s = &l0[pos / d0]; + int64_t pos_e = std::min(l0_pos_end, + p2roundup<int64_t>(l0_pos_start + 1, d0)); + while (pos < pos_e) { + *val_s |= bits; + bits <<= 1; + pos++; + } + pos_e = std::min(l0_pos_end, p2align<int64_t>(l0_pos_end, d0)); + while (pos < pos_e) { + *(++val_s) = all_slot_set; + pos += d0; + } + bits = 1; + ++val_s; + while (pos < l0_pos_end) { + *val_s |= bits; + bits <<= 1; + pos++; + } + } + + void _mark_free_l1_l0(int64_t l0_pos_start, int64_t l0_pos_end) + { + _mark_free_l0(l0_pos_start, l0_pos_end); + l0_pos_start = p2align(l0_pos_start, int64_t(bits_per_slotset)); + l0_pos_end = p2roundup(l0_pos_end, int64_t(bits_per_slotset)); + _mark_l1_on_l0(l0_pos_start, l0_pos_end); + } + + bool _is_empty_l0(uint64_t l0_pos, uint64_t l0_pos_end) + { + bool no_free = true; + uint64_t d = slots_per_slotset * L0_ENTRIES_PER_SLOT; + ceph_assert(0 == (l0_pos % d)); + ceph_assert(0 == (l0_pos_end % d)); + + auto idx = l0_pos / L0_ENTRIES_PER_SLOT; + auto idx_end = l0_pos_end / L0_ENTRIES_PER_SLOT; + while (idx < idx_end && no_free) { + no_free = l0[idx] == all_slot_clear; + ++idx; + } + return no_free; + } + bool _is_empty_l1(uint64_t l1_pos, uint64_t l1_pos_end) + { + bool no_free = true; + uint64_t d = slots_per_slotset * _children_per_slot(); + ceph_assert(0 == (l1_pos % d)); + ceph_assert(0 == (l1_pos_end % d)); + + auto idx = l1_pos / L1_ENTRIES_PER_SLOT; + auto idx_end = l1_pos_end / L1_ENTRIES_PER_SLOT; + while (idx < idx_end && no_free) { + no_free = _is_slot_fully_allocated(idx); + ++idx; + } + return no_free; + } + + interval_t _allocate_l1_contiguous(uint64_t length, + uint64_t min_length, uint64_t max_length, + uint64_t pos_start, uint64_t pos_end); + + bool _allocate_l1(uint64_t length, + uint64_t min_length, uint64_t max_length, + uint64_t l1_pos_start, uint64_t l1_pos_end, + uint64_t* allocated, + interval_vector_t* res); + + uint64_t _mark_alloc_l1(uint64_t offset, uint64_t length) + { + uint64_t l0_pos_start = offset / l0_granularity; + uint64_t l0_pos_end = p2roundup(offset + length, l0_granularity) / l0_granularity; + _mark_alloc_l1_l0(l0_pos_start, l0_pos_end); + return l0_granularity * (l0_pos_end - l0_pos_start); + } + + uint64_t _free_l1(uint64_t offs, uint64_t len) + { + uint64_t l0_pos_start = offs / l0_granularity; + uint64_t l0_pos_end = p2roundup(offs + len, l0_granularity) / l0_granularity; + _mark_free_l1_l0(l0_pos_start, l0_pos_end); + return l0_granularity * (l0_pos_end - l0_pos_start); + } + + uint64_t claim_free_to_left_l1(uint64_t offs) + { + uint64_t l0_pos_end = offs / l0_granularity; + uint64_t l0_pos_start = _claim_free_to_left_l0(l0_pos_end); + if (l0_pos_start < l0_pos_end) { + _mark_l1_on_l0( + p2align(l0_pos_start, uint64_t(bits_per_slotset)), + p2roundup(l0_pos_end, uint64_t(bits_per_slotset))); + return l0_granularity * (l0_pos_end - l0_pos_start); + } + return 0; + } + + uint64_t claim_free_to_right_l1(uint64_t offs) + { + uint64_t l0_pos_start = offs / l0_granularity; + uint64_t l0_pos_end = _claim_free_to_right_l0(l0_pos_start); + + if (l0_pos_start < l0_pos_end) { + _mark_l1_on_l0( + p2align(l0_pos_start, uint64_t(bits_per_slotset)), + p2roundup(l0_pos_end, uint64_t(bits_per_slotset))); + return l0_granularity * (l0_pos_end - l0_pos_start); + } + return 0; + } + + +public: + uint64_t debug_get_allocated(uint64_t pos0 = 0, uint64_t pos1 = 0) + { + if (pos1 == 0) { + pos1 = l1.size() * L1_ENTRIES_PER_SLOT; + } + auto avail = debug_get_free(pos0, pos1); + return (pos1 - pos0) * l1_granularity - avail; + } + + uint64_t debug_get_free(uint64_t l1_pos0 = 0, uint64_t l1_pos1 = 0) + { + ceph_assert(0 == (l1_pos0 % L1_ENTRIES_PER_SLOT)); + ceph_assert(0 == (l1_pos1 % L1_ENTRIES_PER_SLOT)); + + auto idx0 = l1_pos0 * slots_per_slotset; + auto idx1 = l1_pos1 * slots_per_slotset; + + if (idx1 == 0) { + idx1 = l0.size(); + } + + uint64_t res = 0; + for (uint64_t i = idx0; i < idx1; ++i) { + auto v = l0[i]; + if (v == all_slot_set) { + res += L0_ENTRIES_PER_SLOT; + } else if (v != all_slot_clear) { + size_t cnt = 0; +#ifdef __GNUC__ + cnt = __builtin_popcountll(v); +#else + // Kernighan's Alg to count set bits + while (v) { + v &= (v - 1); + cnt++; + } +#endif + res += cnt; + } + } + return res * l0_granularity; + } + void collect_stats( + std::map<size_t, size_t>& bins_overall) override; + + static inline ssize_t count_0s(slot_t slot_val, size_t start_pos); + static inline ssize_t count_1s(slot_t slot_val, size_t start_pos); + void dump(std::function<void(uint64_t offset, uint64_t length)> notify); +}; + + +class AllocatorLevel01Compact : public AllocatorLevel01 +{ + uint64_t _children_per_slot() const override + { + return 8; + } +public: + void collect_stats( + std::map<size_t, size_t>& bins_overall) override + { + // not implemented + } +}; + +template <class L1> +class AllocatorLevel02 : public AllocatorLevel +{ +public: + uint64_t debug_get_free(uint64_t pos0 = 0, uint64_t pos1 = 0) + { + std::lock_guard l(lock); + return l1.debug_get_free(pos0 * l1._children_per_slot() * bits_per_slot, + pos1 * l1._children_per_slot() * bits_per_slot); + } + uint64_t debug_get_allocated(uint64_t pos0 = 0, uint64_t pos1 = 0) + { + std::lock_guard l(lock); + return l1.debug_get_allocated(pos0 * l1._children_per_slot() * bits_per_slot, + pos1 * l1._children_per_slot() * bits_per_slot); + } + + uint64_t get_available() + { + std::lock_guard l(lock); + return available; + } + inline uint64_t get_min_alloc_size() const + { + return l1.get_min_alloc_size(); + } + void collect_stats( + std::map<size_t, size_t>& bins_overall) override { + + std::lock_guard l(lock); + l1.collect_stats(bins_overall); + } + uint64_t claim_free_to_left(uint64_t offset) { + std::lock_guard l(lock); + auto allocated = l1.claim_free_to_left_l1(offset); + ceph_assert(available >= allocated); + available -= allocated; + + uint64_t l2_pos = (offset - allocated) / l2_granularity; + uint64_t l2_pos_end = + p2roundup(int64_t(offset), int64_t(l2_granularity)) / l2_granularity; + _mark_l2_on_l1(l2_pos, l2_pos_end); + return allocated; + } + + uint64_t claim_free_to_right(uint64_t offset) { + std::lock_guard l(lock); + auto allocated = l1.claim_free_to_right_l1(offset); + ceph_assert(available >= allocated); + available -= allocated; + + uint64_t l2_pos = (offset) / l2_granularity; + int64_t end = offset + allocated; + uint64_t l2_pos_end = p2roundup(end, int64_t(l2_granularity)) / l2_granularity; + _mark_l2_on_l1(l2_pos, l2_pos_end); + return allocated; + } +protected: + ceph::mutex lock = ceph::make_mutex("AllocatorLevel02::lock"); + L1 l1; + slot_vector_t l2; + uint64_t l2_granularity = 0; // space per entry + uint64_t available = 0; + uint64_t last_pos = 0; + + enum { + L1_ENTRIES_PER_SLOT = bits_per_slot, // 64 + }; + + uint64_t _children_per_slot() const override + { + return L1_ENTRIES_PER_SLOT; + } + uint64_t _level_granularity() const override + { + return l2_granularity; + } + + void _init(uint64_t capacity, uint64_t _alloc_unit, bool mark_as_free = true) + { + ceph_assert(isp2(_alloc_unit)); + l1._init(capacity, _alloc_unit, mark_as_free); + + l2_granularity = + l1._level_granularity() * l1._children_per_slot() * slots_per_slotset; + + // capacity to have slot alignment at l2 + auto aligned_capacity = + p2roundup((int64_t)capacity, (int64_t)l2_granularity * L1_ENTRIES_PER_SLOT); + size_t elem_count = aligned_capacity / l2_granularity / L1_ENTRIES_PER_SLOT; + // we use set bit(s) as a marker for (partially) free entry + l2.resize(elem_count, mark_as_free ? all_slot_set : all_slot_clear); + + if (mark_as_free) { + // capacity to have slotset alignment at l1 + auto l2_pos_no_use = + p2roundup((int64_t)capacity, (int64_t)l2_granularity) / l2_granularity; + _mark_l2_allocated(l2_pos_no_use, aligned_capacity / l2_granularity); + available = p2align(capacity, _alloc_unit); + } else { + available = 0; + } + } + + void _mark_l2_allocated(int64_t l2_pos, int64_t l2_pos_end) + { + auto d = L1_ENTRIES_PER_SLOT; + ceph_assert(0 <= l2_pos_end); + ceph_assert((int64_t)l2.size() >= (l2_pos_end / d)); + + while (l2_pos < l2_pos_end) { + l2[l2_pos / d] &= ~(slot_t(1) << (l2_pos % d)); + ++l2_pos; + } + } + + void _mark_l2_free(int64_t l2_pos, int64_t l2_pos_end) + { + auto d = L1_ENTRIES_PER_SLOT; + ceph_assert(0 <= l2_pos_end); + ceph_assert((int64_t)l2.size() >= (l2_pos_end / d)); + + while (l2_pos < l2_pos_end) { + l2[l2_pos / d] |= (slot_t(1) << (l2_pos % d)); + ++l2_pos; + } + } + + void _mark_l2_on_l1(int64_t l2_pos, int64_t l2_pos_end) + { + auto d = L1_ENTRIES_PER_SLOT; + ceph_assert(0 <= l2_pos_end); + ceph_assert((int64_t)l2.size() >= (l2_pos_end / d)); + + auto idx = l2_pos * slots_per_slotset; + auto idx_end = l2_pos_end * slots_per_slotset; + bool all_allocated = true; + while (idx < idx_end) { + if (!l1._is_slot_fully_allocated(idx)) { + all_allocated = false; + idx = p2roundup(int64_t(++idx), int64_t(slots_per_slotset)); + } + else { + ++idx; + } + if ((idx % slots_per_slotset) == 0) { + if (all_allocated) { + l2[l2_pos / d] &= ~(slot_t(1) << (l2_pos % d)); + } + else { + l2[l2_pos / d] |= (slot_t(1) << (l2_pos % d)); + } + all_allocated = true; + ++l2_pos; + } + } + } + + void _allocate_l2(uint64_t length, + uint64_t min_length, + uint64_t max_length, + uint64_t hint, + + uint64_t* allocated, + interval_vector_t* res) + { + uint64_t prev_allocated = *allocated; + uint64_t d = L1_ENTRIES_PER_SLOT; + ceph_assert(min_length <= l2_granularity); + ceph_assert(max_length == 0 || max_length >= min_length); + ceph_assert(max_length == 0 || (max_length % min_length) == 0); + ceph_assert(length >= min_length); + ceph_assert((length % min_length) == 0); + + uint64_t cap = 1ull << 31; + if (max_length == 0 || max_length >= cap) { + max_length = cap; + } + + uint64_t l1_w = slots_per_slotset * l1._children_per_slot(); + + std::lock_guard l(lock); + + if (available < min_length) { + return; + } + if (hint != 0) { + last_pos = (hint / (d * l2_granularity)) < l2.size() ? p2align(hint / l2_granularity, d) : 0; + } + auto l2_pos = last_pos; + auto last_pos0 = last_pos; + auto pos = last_pos / d; + auto pos_end = l2.size(); + // outer loop below is intended to optimize the performance by + // avoiding 'modulo' operations inside the internal loop. + // Looks like they have negative impact on the performance + for (auto i = 0; i < 2; ++i) { + for(; length > *allocated && pos < pos_end; ++pos) { + slot_t& slot_val = l2[pos]; + size_t free_pos = 0; + bool all_set = false; + if (slot_val == all_slot_clear) { + l2_pos += d; + last_pos = l2_pos; + continue; + } else if (slot_val == all_slot_set) { + free_pos = 0; + all_set = true; + } else { + free_pos = find_next_set_bit(slot_val, 0); + ceph_assert(free_pos < bits_per_slot); + } + do { + ceph_assert(length > *allocated); + bool empty = l1._allocate_l1(length, + min_length, + max_length, + (l2_pos + free_pos) * l1_w, + (l2_pos + free_pos + 1) * l1_w, + allocated, + res); + if (empty) { + slot_val &= ~(slot_t(1) << free_pos); + } + if (length <= *allocated || slot_val == all_slot_clear) { + break; + } + ++free_pos; + if (!all_set) { + free_pos = find_next_set_bit(slot_val, free_pos); + } + } while (free_pos < bits_per_slot); + last_pos = l2_pos; + l2_pos += d; + } + l2_pos = 0; + pos = 0; + pos_end = last_pos0 / d; + } + + ++l2_allocs; + auto allocated_here = *allocated - prev_allocated; + ceph_assert(available >= allocated_here); + available -= allocated_here; + } + +#ifndef NON_CEPH_BUILD + // to provide compatibility with BlueStore's allocator interface + void _free_l2(const interval_set<uint64_t> & rr) + { + uint64_t released = 0; + std::lock_guard l(lock); + for (auto r : rr) { + released += l1._free_l1(r.first, r.second); + uint64_t l2_pos = r.first / l2_granularity; + uint64_t l2_pos_end = p2roundup(int64_t(r.first + r.second), int64_t(l2_granularity)) / l2_granularity; + + _mark_l2_free(l2_pos, l2_pos_end); + } + available += released; + } +#endif + + template <typename T> + void _free_l2(const T& rr) + { + uint64_t released = 0; + std::lock_guard l(lock); + for (auto r : rr) { + released += l1._free_l1(r.offset, r.length); + uint64_t l2_pos = r.offset / l2_granularity; + uint64_t l2_pos_end = p2roundup(int64_t(r.offset + r.length), int64_t(l2_granularity)) / l2_granularity; + + _mark_l2_free(l2_pos, l2_pos_end); + } + available += released; + } + + void _mark_allocated(uint64_t o, uint64_t len) + { + uint64_t l2_pos = o / l2_granularity; + uint64_t l2_pos_end = p2roundup(int64_t(o + len), int64_t(l2_granularity)) / l2_granularity; + + std::lock_guard l(lock); + auto allocated = l1._mark_alloc_l1(o, len); + ceph_assert(available >= allocated); + available -= allocated; + _mark_l2_on_l1(l2_pos, l2_pos_end); + } + + void _mark_free(uint64_t o, uint64_t len) + { + uint64_t l2_pos = o / l2_granularity; + uint64_t l2_pos_end = p2roundup(int64_t(o + len), int64_t(l2_granularity)) / l2_granularity; + + std::lock_guard l(lock); + available += l1._free_l1(o, len); + _mark_l2_free(l2_pos, l2_pos_end); + } + void _shutdown() + { + last_pos = 0; + } + double _get_fragmentation() { + std::lock_guard l(lock); + return l1.get_fragmentation(); + } +}; + +#endif |