summaryrefslogtreecommitdiffstats
path: root/src/os/bluestore/fastbmap_allocator_impl.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/os/bluestore/fastbmap_allocator_impl.cc')
-rw-r--r--src/os/bluestore/fastbmap_allocator_impl.cc717
1 files changed, 717 insertions, 0 deletions
diff --git a/src/os/bluestore/fastbmap_allocator_impl.cc b/src/os/bluestore/fastbmap_allocator_impl.cc
new file mode 100644
index 000000000..595b12485
--- /dev/null
+++ b/src/os/bluestore/fastbmap_allocator_impl.cc
@@ -0,0 +1,717 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Bitmap based in-memory allocator implementation.
+ * Author: Igor Fedotov, ifedotov@suse.com
+ *
+ */
+
+#include "fastbmap_allocator_impl.h"
+
+uint64_t AllocatorLevel::l0_dives = 0;
+uint64_t AllocatorLevel::l0_iterations = 0;
+uint64_t AllocatorLevel::l0_inner_iterations = 0;
+uint64_t AllocatorLevel::alloc_fragments = 0;
+uint64_t AllocatorLevel::alloc_fragments_fast = 0;
+uint64_t AllocatorLevel::l2_allocs = 0;
+
+inline interval_t _align2units(uint64_t offset, uint64_t len, uint64_t min_length)
+{
+ interval_t res;
+ if (len >= min_length) {
+ res.offset = p2roundup(offset, min_length);
+ auto delta_off = res.offset - offset;
+ if (len > delta_off) {
+ res.length = len - delta_off;
+ res.length = p2align<uint64_t>(res.length, min_length);
+ if (res.length) {
+ return res;
+ }
+ }
+ }
+ return interval_t();
+}
+
+interval_t AllocatorLevel01Loose::_get_longest_from_l0(uint64_t pos0,
+ uint64_t pos1, uint64_t min_length, interval_t* tail) const
+{
+ interval_t res;
+ if (pos0 >= pos1) {
+ return res;
+ }
+ auto pos = pos0;
+
+ interval_t res_candidate;
+ if (tail->length != 0) {
+ ceph_assert((tail->offset % l0_granularity) == 0);
+ ceph_assert((tail->length % l0_granularity) == 0);
+ res_candidate.offset = tail->offset / l0_granularity;
+ res_candidate.length = tail->length / l0_granularity;
+ }
+ *tail = interval_t();
+
+ auto d = bits_per_slot;
+ slot_t bits = l0[pos / d];
+ bits >>= pos % d;
+ bool end_loop = false;
+ auto min_granules = min_length / l0_granularity;
+
+ do {
+ if ((pos % d) == 0) {
+ bits = l0[pos / d];
+ if (pos1 - pos >= d) {
+ switch(bits) {
+ case all_slot_set:
+ // slot is totally free
+ if (!res_candidate.length) {
+ res_candidate.offset = pos;
+ }
+ res_candidate.length += d;
+ pos += d;
+ end_loop = pos >= pos1;
+ if (end_loop) {
+ *tail = res_candidate;
+ res_candidate = _align2units(res_candidate.offset,
+ res_candidate.length, min_granules);
+ if(res.length < res_candidate.length) {
+ res = res_candidate;
+ }
+ }
+ continue;
+ case all_slot_clear:
+ // slot is totally allocated
+ res_candidate = _align2units(res_candidate.offset,
+ res_candidate.length, min_granules);
+ if (res.length < res_candidate.length) {
+ res = res_candidate;
+ }
+ res_candidate = interval_t();
+ pos += d;
+ end_loop = pos >= pos1;
+ continue;
+ }
+ }
+ } //if ((pos % d) == 0)
+
+ end_loop = ++pos >= pos1;
+ if (bits & 1) {
+ // item is free
+ if (!res_candidate.length) {
+ res_candidate.offset = pos - 1;
+ }
+ ++res_candidate.length;
+ if (end_loop) {
+ *tail = res_candidate;
+ res_candidate = _align2units(res_candidate.offset,
+ res_candidate.length, min_granules);
+ if (res.length < res_candidate.length) {
+ res = res_candidate;
+ }
+ }
+ } else {
+ res_candidate = _align2units(res_candidate.offset,
+ res_candidate.length, min_granules);
+ if (res.length < res_candidate.length) {
+ res = res_candidate;
+ }
+ res_candidate = interval_t();
+ }
+ bits >>= 1;
+ } while (!end_loop);
+ res.offset *= l0_granularity;
+ res.length *= l0_granularity;
+ tail->offset *= l0_granularity;
+ tail->length *= l0_granularity;
+ return res;
+}
+
+void AllocatorLevel01Loose::_analyze_partials(uint64_t pos_start,
+ uint64_t pos_end, uint64_t length, uint64_t min_length, int mode,
+ search_ctx_t* ctx)
+{
+ auto d = L1_ENTRIES_PER_SLOT;
+ ceph_assert((pos_start % d) == 0);
+ ceph_assert((pos_end % d) == 0);
+
+ uint64_t l0_w = slots_per_slotset * L0_ENTRIES_PER_SLOT;
+
+ uint64_t l1_pos = pos_start;
+ const interval_t empty_tail;
+ interval_t prev_tail;
+
+ uint64_t next_free_l1_pos = 0;
+ for (auto pos = pos_start / d; pos < pos_end / d; ++pos) {
+ slot_t slot_val = l1[pos];
+ // FIXME minor: code below can be optimized to check slot_val against
+ // all_slot_set(_clear) value
+
+ for (auto c = 0; c < d; c++) {
+ switch (slot_val & L1_ENTRY_MASK) {
+ case L1_ENTRY_FREE:
+ prev_tail = empty_tail;
+ if (!ctx->free_count) {
+ ctx->free_l1_pos = l1_pos;
+ } else if (l1_pos != next_free_l1_pos){
+ auto o = ctx->free_l1_pos * l1_granularity;
+ auto l = ctx->free_count * l1_granularity;
+ // check if already found extent fits min_length after alignment
+ if (_align2units(o, l, min_length).length >= min_length) {
+ break;
+ }
+ // if not - proceed with the next one
+ ctx->free_l1_pos = l1_pos;
+ ctx->free_count = 0;
+ }
+ next_free_l1_pos = l1_pos + 1;
+ ++ctx->free_count;
+ if (mode == STOP_ON_EMPTY) {
+ return;
+ }
+ break;
+ case L1_ENTRY_FULL:
+ prev_tail = empty_tail;
+ break;
+ case L1_ENTRY_PARTIAL:
+ interval_t longest;
+ ++ctx->partial_count;
+
+ longest = _get_longest_from_l0(l1_pos * l0_w, (l1_pos + 1) * l0_w, min_length, &prev_tail);
+
+ if (longest.length >= length) {
+ if ((ctx->affordable_len == 0) ||
+ ((ctx->affordable_len != 0) &&
+ (longest.length < ctx->affordable_len))) {
+ ctx->affordable_len = longest.length;
+ ctx->affordable_offs = longest.offset;
+ }
+ }
+ if (longest.length >= min_length &&
+ (ctx->min_affordable_len == 0 ||
+ (longest.length < ctx->min_affordable_len))) {
+
+ ctx->min_affordable_len = p2align<uint64_t>(longest.length, min_length);
+ ctx->min_affordable_offs = longest.offset;
+ }
+ if (mode == STOP_ON_PARTIAL) {
+ return;
+ }
+ break;
+ }
+ slot_val >>= L1_ENTRY_WIDTH;
+ ++l1_pos;
+ }
+ }
+ ctx->fully_processed = true;
+}
+
+void AllocatorLevel01Loose::_mark_l1_on_l0(int64_t l0_pos, int64_t l0_pos_end)
+{
+ if (l0_pos == l0_pos_end) {
+ return;
+ }
+ auto d0 = bits_per_slotset;
+ uint64_t l1_w = L1_ENTRIES_PER_SLOT;
+ // this should be aligned with slotset boundaries
+ ceph_assert(0 == (l0_pos % d0));
+ ceph_assert(0 == (l0_pos_end % d0));
+
+ int64_t idx = l0_pos / bits_per_slot;
+ int64_t idx_end = l0_pos_end / bits_per_slot;
+ slot_t mask_to_apply = L1_ENTRY_NOT_USED;
+
+ auto l1_pos = l0_pos / d0;
+
+ while (idx < idx_end) {
+ if (l0[idx] == all_slot_clear) {
+ // if not all prev slots are allocated then no need to check the
+ // current slot set, it's partial
+ ++idx;
+ if (mask_to_apply == L1_ENTRY_NOT_USED) {
+ mask_to_apply = L1_ENTRY_FULL;
+ } else if (mask_to_apply != L1_ENTRY_FULL) {
+ idx = p2roundup(idx, int64_t(slots_per_slotset));
+ mask_to_apply = L1_ENTRY_PARTIAL;
+ }
+ } else if (l0[idx] == all_slot_set) {
+ // if not all prev slots are free then no need to check the
+ // current slot set, it's partial
+ ++idx;
+ if (mask_to_apply == L1_ENTRY_NOT_USED) {
+ mask_to_apply = L1_ENTRY_FREE;
+ } else if (mask_to_apply != L1_ENTRY_FREE) {
+ idx = p2roundup(idx, int64_t(slots_per_slotset));
+ mask_to_apply = L1_ENTRY_PARTIAL;
+ }
+ } else {
+ // no need to check the current slot set, it's partial
+ mask_to_apply = L1_ENTRY_PARTIAL;
+ ++idx;
+ idx = p2roundup(idx, int64_t(slots_per_slotset));
+ }
+ if ((idx % slots_per_slotset) == 0) {
+ ceph_assert(mask_to_apply != L1_ENTRY_NOT_USED);
+ uint64_t shift = (l1_pos % l1_w) * L1_ENTRY_WIDTH;
+ slot_t& slot_val = l1[l1_pos / l1_w];
+ auto mask = slot_t(L1_ENTRY_MASK) << shift;
+
+ slot_t old_mask = (slot_val & mask) >> shift;
+ switch(old_mask) {
+ case L1_ENTRY_FREE:
+ unalloc_l1_count--;
+ break;
+ case L1_ENTRY_PARTIAL:
+ partial_l1_count--;
+ break;
+ }
+ slot_val &= ~mask;
+ slot_val |= slot_t(mask_to_apply) << shift;
+ switch(mask_to_apply) {
+ case L1_ENTRY_FREE:
+ unalloc_l1_count++;
+ break;
+ case L1_ENTRY_PARTIAL:
+ partial_l1_count++;
+ break;
+ }
+ mask_to_apply = L1_ENTRY_NOT_USED;
+ ++l1_pos;
+ }
+ }
+}
+
+void AllocatorLevel01Loose::_mark_alloc_l0(int64_t l0_pos_start,
+ int64_t l0_pos_end)
+{
+ auto d0 = L0_ENTRIES_PER_SLOT;
+
+ int64_t pos = l0_pos_start;
+ slot_t bits = (slot_t)1 << (l0_pos_start % d0);
+ slot_t* val_s = l0.data() + (pos / d0);
+ int64_t pos_e = std::min(l0_pos_end, p2roundup<int64_t>(l0_pos_start + 1, d0));
+ while (pos < pos_e) {
+ (*val_s) &= ~bits;
+ bits <<= 1;
+ pos++;
+ }
+ pos_e = std::min(l0_pos_end, p2align<int64_t>(l0_pos_end, d0));
+ while (pos < pos_e) {
+ *(++val_s) = all_slot_clear;
+ pos += d0;
+ }
+ bits = 1;
+ ++val_s;
+ while (pos < l0_pos_end) {
+ (*val_s) &= ~bits;
+ bits <<= 1;
+ pos++;
+ }
+}
+
+interval_t AllocatorLevel01Loose::_allocate_l1_contiguous(uint64_t length,
+ uint64_t min_length, uint64_t max_length,
+ uint64_t pos_start, uint64_t pos_end)
+{
+ interval_t res = { 0, 0 };
+ uint64_t l0_w = slots_per_slotset * L0_ENTRIES_PER_SLOT;
+
+ if (unlikely(length <= l0_granularity)) {
+ search_ctx_t ctx;
+ _analyze_partials(pos_start, pos_end, l0_granularity, l0_granularity,
+ STOP_ON_PARTIAL, &ctx);
+
+ // check partially free slot sets first (including neighboring),
+ // full length match required.
+ if (ctx.affordable_len) {
+ // allocate as specified
+ ceph_assert(ctx.affordable_len >= length);
+ auto pos = ctx.affordable_offs / l0_granularity;
+ _mark_alloc_l1_l0(pos, pos + 1);
+ res = interval_t(ctx.affordable_offs, length);
+ return res;
+ }
+
+ // allocate from free slot sets
+ if (ctx.free_count) {
+ auto l = std::min(length, ctx.free_count * l1_granularity);
+ ceph_assert((l % l0_granularity) == 0);
+ auto pos_end = ctx.free_l1_pos * l0_w + l / l0_granularity;
+
+ _mark_alloc_l1_l0(ctx.free_l1_pos * l0_w, pos_end);
+ res = interval_t(ctx.free_l1_pos * l1_granularity, l);
+ return res;
+ }
+ } else if (unlikely(length == l1_granularity)) {
+ search_ctx_t ctx;
+ _analyze_partials(pos_start, pos_end, length, min_length, STOP_ON_EMPTY, &ctx);
+
+ // allocate using contiguous extent found at l1 if any
+ if (ctx.free_count) {
+
+ auto l = std::min(length, ctx.free_count * l1_granularity);
+ ceph_assert((l % l0_granularity) == 0);
+ auto pos_end = ctx.free_l1_pos * l0_w + l / l0_granularity;
+
+ _mark_alloc_l1_l0(ctx.free_l1_pos * l0_w, pos_end);
+ res = interval_t(ctx.free_l1_pos * l1_granularity, l);
+
+ return res;
+ }
+
+ // we can terminate earlier on free entry only
+ ceph_assert(ctx.fully_processed);
+
+ // check partially free slot sets first (including neighboring),
+ // full length match required.
+ if (ctx.affordable_len) {
+ ceph_assert(ctx.affordable_len >= length);
+ ceph_assert((length % l0_granularity) == 0);
+ auto pos_start = ctx.affordable_offs / l0_granularity;
+ auto pos_end = (ctx.affordable_offs + length) / l0_granularity;
+ _mark_alloc_l1_l0(pos_start, pos_end);
+ res = interval_t(ctx.affordable_offs, length);
+ return res;
+ }
+ if (ctx.min_affordable_len) {
+ auto pos_start = ctx.min_affordable_offs / l0_granularity;
+ auto pos_end = (ctx.min_affordable_offs + ctx.min_affordable_len) / l0_granularity;
+ _mark_alloc_l1_l0(pos_start, pos_end);
+ return interval_t(ctx.min_affordable_offs, ctx.min_affordable_len);
+ }
+ } else {
+ search_ctx_t ctx;
+ _analyze_partials(pos_start, pos_end, length, min_length, NO_STOP, &ctx);
+ ceph_assert(ctx.fully_processed);
+ // check partially free slot sets first (including neighboring),
+ // full length match required.
+ if (ctx.affordable_len) {
+ ceph_assert(ctx.affordable_len >= length);
+ ceph_assert((length % l0_granularity) == 0);
+ auto pos_start = ctx.affordable_offs / l0_granularity;
+ auto pos_end = (ctx.affordable_offs + length) / l0_granularity;
+ _mark_alloc_l1_l0(pos_start, pos_end);
+ res = interval_t(ctx.affordable_offs, length);
+ return res;
+ }
+ // allocate using contiguous extent found at l1 if affordable
+ // align allocated extent with min_length
+ if (ctx.free_count) {
+ auto o = ctx.free_l1_pos * l1_granularity;
+ auto l = ctx.free_count * l1_granularity;
+ interval_t aligned_extent = _align2units(o, l, min_length);
+ if (aligned_extent.length > 0) {
+ aligned_extent.length = std::min(length,
+ uint64_t(aligned_extent.length));
+ ceph_assert((aligned_extent.offset % l0_granularity) == 0);
+ ceph_assert((aligned_extent.length % l0_granularity) == 0);
+
+ auto pos_start = aligned_extent.offset / l0_granularity;
+ auto pos_end = (aligned_extent.offset + aligned_extent.length) / l0_granularity;
+
+ _mark_alloc_l1_l0(pos_start, pos_end);
+ return aligned_extent;
+ }
+ }
+ if (ctx.min_affordable_len) {
+ auto pos_start = ctx.min_affordable_offs / l0_granularity;
+ auto pos_end = (ctx.min_affordable_offs + ctx.min_affordable_len) / l0_granularity;
+ _mark_alloc_l1_l0(pos_start, pos_end);
+ return interval_t(ctx.min_affordable_offs, ctx.min_affordable_len);
+ }
+ }
+ return res;
+}
+
+bool AllocatorLevel01Loose::_allocate_l1(uint64_t length,
+ uint64_t min_length, uint64_t max_length,
+ uint64_t l1_pos_start, uint64_t l1_pos_end,
+ uint64_t* allocated,
+ interval_vector_t* res)
+{
+ uint64_t d0 = L0_ENTRIES_PER_SLOT;
+ uint64_t d1 = L1_ENTRIES_PER_SLOT;
+
+ ceph_assert(0 == (l1_pos_start % (slots_per_slotset * d1)));
+ ceph_assert(0 == (l1_pos_end % (slots_per_slotset * d1)));
+ if (min_length != l0_granularity) {
+ // probably not the most effecient way but
+ // don't care much about that at the moment
+ bool has_space = true;
+ while (length > *allocated && has_space) {
+ interval_t i =
+ _allocate_l1_contiguous(length - *allocated, min_length, max_length,
+ l1_pos_start, l1_pos_end);
+ if (i.length == 0) {
+ has_space = false;
+ } else {
+ _fragment_and_emplace(max_length, i.offset, i.length, res);
+ *allocated += i.length;
+ }
+ }
+ } else {
+ uint64_t l0_w = slots_per_slotset * d0;
+
+ for (auto idx = l1_pos_start / d1;
+ idx < l1_pos_end / d1 && length > *allocated;
+ ++idx) {
+ slot_t& slot_val = l1[idx];
+ if (slot_val == all_slot_clear) {
+ continue;
+ } else if (slot_val == all_slot_set) {
+ uint64_t to_alloc = std::min(length - *allocated,
+ l1_granularity * d1);
+ *allocated += to_alloc;
+ ++alloc_fragments_fast;
+ _fragment_and_emplace(max_length, idx * d1 * l1_granularity, to_alloc,
+ res);
+ _mark_alloc_l1_l0(idx * d1 * bits_per_slotset,
+ idx * d1 * bits_per_slotset + to_alloc / l0_granularity);
+ continue;
+ }
+ auto free_pos = find_next_set_bit(slot_val, 0);
+ ceph_assert(free_pos < bits_per_slot);
+ do {
+ ceph_assert(length > *allocated);
+
+ bool empty;
+ empty = _allocate_l0(length, max_length,
+ (idx * d1 + free_pos / L1_ENTRY_WIDTH) * l0_w,
+ (idx * d1 + free_pos / L1_ENTRY_WIDTH + 1) * l0_w,
+ allocated,
+ res);
+
+ auto mask = slot_t(L1_ENTRY_MASK) << free_pos;
+
+ slot_t old_mask = (slot_val & mask) >> free_pos;
+ switch(old_mask) {
+ case L1_ENTRY_FREE:
+ unalloc_l1_count--;
+ break;
+ case L1_ENTRY_PARTIAL:
+ partial_l1_count--;
+ break;
+ }
+ slot_val &= ~mask;
+ if (empty) {
+ // the next line is no op with the current L1_ENTRY_FULL but left
+ // as-is for the sake of uniformity and to avoid potential errors
+ // in future
+ slot_val |= slot_t(L1_ENTRY_FULL) << free_pos;
+ } else {
+ slot_val |= slot_t(L1_ENTRY_PARTIAL) << free_pos;
+ partial_l1_count++;
+ }
+ if (length <= *allocated || slot_val == all_slot_clear) {
+ break;
+ }
+ free_pos = find_next_set_bit(slot_val, free_pos + L1_ENTRY_WIDTH);
+ } while (free_pos < bits_per_slot);
+ }
+ }
+ return _is_empty_l1(l1_pos_start, l1_pos_end);
+}
+
+void AllocatorLevel01Loose::collect_stats(
+ std::map<size_t, size_t>& bins_overall)
+{
+ size_t free_seq_cnt = 0;
+ for (auto slot : l0) {
+ if (slot == all_slot_set) {
+ free_seq_cnt += L0_ENTRIES_PER_SLOT;
+ } else if(slot != all_slot_clear) {
+ size_t pos = 0;
+ do {
+ auto pos1 = find_next_set_bit(slot, pos);
+ if (pos1 == pos) {
+ free_seq_cnt++;
+ pos = pos1 + 1;
+ } else {
+ if (free_seq_cnt) {
+ bins_overall[cbits(free_seq_cnt) - 1]++;
+ free_seq_cnt = 0;
+ }
+ if (pos1 < bits_per_slot) {
+ free_seq_cnt = 1;
+ }
+ pos = pos1 + 1;
+ }
+ } while (pos < bits_per_slot);
+ } else if (free_seq_cnt) {
+ bins_overall[cbits(free_seq_cnt) - 1]++;
+ free_seq_cnt = 0;
+ }
+ }
+ if (free_seq_cnt) {
+ bins_overall[cbits(free_seq_cnt) - 1]++;
+ }
+}
+
+inline ssize_t AllocatorLevel01Loose::count_0s(slot_t slot_val, size_t start_pos)
+ {
+ #ifdef __GNUC__
+ size_t pos = __builtin_ffsll(slot_val >> start_pos);
+ if (pos == 0)
+ return sizeof(slot_t)*8 - start_pos;
+ return pos - 1;
+ #else
+ size_t pos = start_pos;
+ slot_t mask = slot_t(1) << pos;
+ while (pos < bits_per_slot && (slot_val & mask) == 0) {
+ mask <<= 1;
+ pos++;
+ }
+ return pos - start_pos;
+ #endif
+ }
+
+ inline ssize_t AllocatorLevel01Loose::count_1s(slot_t slot_val, size_t start_pos)
+ {
+ return count_0s(~slot_val, start_pos);
+ }
+void AllocatorLevel01Loose::foreach_internal(
+ std::function<void(uint64_t offset, uint64_t length)> notify)
+{
+ size_t len = 0;
+ size_t off = 0;
+ for (size_t i = 0; i < l1.size(); i++)
+ {
+ for (size_t j = 0; j < L1_ENTRIES_PER_SLOT * L1_ENTRY_WIDTH; j += L1_ENTRY_WIDTH)
+ {
+ size_t w = (l1[i] >> j) & L1_ENTRY_MASK;
+ switch (w) {
+ case L1_ENTRY_FULL:
+ if (len > 0) {
+ notify(off, len);
+ len = 0;
+ }
+ break;
+ case L1_ENTRY_FREE:
+ if (len == 0)
+ off = ( ( bits_per_slot * i + j ) / L1_ENTRY_WIDTH ) * slots_per_slotset * bits_per_slot;
+ len += bits_per_slotset;
+ break;
+ case L1_ENTRY_PARTIAL:
+ size_t pos = ( ( bits_per_slot * i + j ) / L1_ENTRY_WIDTH ) * slots_per_slotset;
+ for (size_t t = 0; t < slots_per_slotset; t++) {
+ size_t p = 0;
+ slot_t allocation_pattern = l0[pos + t];
+ while (p < bits_per_slot) {
+ if (len == 0) {
+ //continue to skip allocated space, meaning bits set to 0
+ ssize_t alloc_count = count_0s(allocation_pattern, p);
+ p += alloc_count;
+ //now we are switched to expecting free space
+ if (p < bits_per_slot) {
+ //now @p are 1s
+ ssize_t free_count = count_1s(allocation_pattern, p);
+ assert(free_count > 0);
+ len = free_count;
+ off = (pos + t) * bits_per_slot + p;
+ p += free_count;
+ }
+ } else {
+ //continue free region
+ ssize_t free_count = count_1s(allocation_pattern, p);
+ if (free_count == 0) {
+ notify(off, len);
+ len = 0;
+ } else {
+ p += free_count;
+ len += free_count;
+ }
+ }
+ }
+ }
+ break;
+ }
+ }
+ }
+ if (len > 0)
+ notify(off, len);
+}
+
+uint64_t AllocatorLevel01Loose::_claim_free_to_left_l0(int64_t l0_pos_start)
+{
+ int64_t d0 = L0_ENTRIES_PER_SLOT;
+
+ int64_t pos = l0_pos_start - 1;
+ slot_t bits = (slot_t)1 << (pos % d0);
+ int64_t idx = pos / d0;
+ slot_t* val_s = l0.data() + idx;
+
+ int64_t pos_e = p2align<int64_t>(pos, d0);
+
+ while (pos >= pos_e) {
+ if (0 == ((*val_s) & bits))
+ return pos + 1;
+ (*val_s) &= ~bits;
+ bits >>= 1;
+ --pos;
+ }
+ --idx;
+ val_s = l0.data() + idx;
+ while (idx >= 0 && (*val_s) == all_slot_set) {
+ *val_s = all_slot_clear;
+ --idx;
+ pos -= d0;
+ val_s = l0.data() + idx;
+ }
+
+ if (idx >= 0 &&
+ (*val_s) != all_slot_set && (*val_s) != all_slot_clear) {
+ int64_t pos_e = p2align<int64_t>(pos, d0);
+ slot_t bits = (slot_t)1 << (pos % d0);
+ while (pos >= pos_e) {
+ if (0 == ((*val_s) & bits))
+ return pos + 1;
+ (*val_s) &= ~bits;
+ bits >>= 1;
+ --pos;
+ }
+ }
+ return pos + 1;
+}
+
+uint64_t AllocatorLevel01Loose::_claim_free_to_right_l0(int64_t l0_pos_start)
+{
+ auto d0 = L0_ENTRIES_PER_SLOT;
+
+ int64_t pos = l0_pos_start;
+ slot_t bits = (slot_t)1 << (pos % d0);
+ size_t idx = pos / d0;
+ if (idx >= l0.size()) {
+ return pos;
+ }
+ slot_t* val_s = l0.data() + idx;
+
+ int64_t pos_e = p2roundup<int64_t>(pos + 1, d0);
+
+ while (pos < pos_e) {
+ if (0 == ((*val_s) & bits))
+ return pos;
+ (*val_s) &= ~bits;
+ bits <<= 1;
+ ++pos;
+ }
+ ++idx;
+ val_s = l0.data() + idx;
+ while (idx < l0.size() && (*val_s) == all_slot_set) {
+ *val_s = all_slot_clear;
+ ++idx;
+ pos += d0;
+ val_s = l0.data() + idx;
+ }
+
+ if (idx < l0.size() &&
+ (*val_s) != all_slot_set && (*val_s) != all_slot_clear) {
+ int64_t pos_e = p2roundup<int64_t>(pos + 1, d0);
+ slot_t bits = (slot_t)1 << (pos % d0);
+ while (pos < pos_e) {
+ if (0 == ((*val_s) & bits))
+ return pos;
+ (*val_s) &= ~bits;
+ bits <<= 1;
+ ++pos;
+ }
+ }
+ return pos;
+}