summaryrefslogtreecommitdiffstats
path: root/src/include/interval_set.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/interval_set.h')
-rw-r--r--src/include/interval_set.h824
1 files changed, 824 insertions, 0 deletions
diff --git a/src/include/interval_set.h b/src/include/interval_set.h
new file mode 100644
index 000000000..dfb2a306c
--- /dev/null
+++ b/src/include/interval_set.h
@@ -0,0 +1,824 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#ifndef CEPH_INTERVAL_SET_H
+#define CEPH_INTERVAL_SET_H
+
+#include <iterator>
+#include <map>
+#include <ostream>
+
+#include "encoding.h"
+
+/*
+ * *** NOTE ***
+ *
+ * This class is written to work with a variety of map-like containers,
+ * *include* ones that invalidate iterators when they are modified (e.g.,
+ * flat_map and btree_map).
+ */
+
+template<typename T, template<typename, typename, typename ...> class C = std::map>
+class interval_set {
+ public:
+ using Map = C<T, T>;
+ using value_type = typename Map::value_type;
+ using offset_type = T;
+ using length_type = T;
+ using reference = value_type&;
+ using const_reference = const value_type&;
+ using size_type = typename Map::size_type;
+
+ class const_iterator;
+
+ class iterator
+ {
+ public:
+ using difference_type = ssize_t;
+ using value_type = typename Map::value_type;
+ using pointer = typename Map::value_type*;
+ using reference = typename Map::value_type&;
+ using iterator_category = std::forward_iterator_tag;
+
+ explicit iterator(typename Map::iterator iter)
+ : _iter(iter)
+ { }
+
+ // For the copy constructor and assignment operator, the compiler-generated functions, which
+ // perform simple bitwise copying, should be fine.
+
+ bool operator==(const iterator& rhs) const {
+ return (_iter == rhs._iter);
+ }
+
+ bool operator!=(const iterator& rhs) const {
+ return (_iter != rhs._iter);
+ }
+
+ // Dereference this iterator to get a pair.
+ reference operator*() const {
+ return *_iter;
+ }
+
+ // Return the interval start.
+ offset_type get_start() const {
+ return _iter->first;
+ }
+
+ // Return the interval length.
+ length_type get_len() const {
+ return _iter->second;
+ }
+
+ offset_type get_end() const {
+ return _iter->first + _iter->second;
+ }
+
+ // Set the interval length.
+ void set_len(const length_type& len) {
+ _iter->second = len;
+ }
+
+ // Preincrement
+ iterator& operator++()
+ {
+ ++_iter;
+ return *this;
+ }
+
+ // Postincrement
+ iterator operator++(int)
+ {
+ iterator prev(_iter);
+ ++_iter;
+ return prev;
+ }
+
+ // Predecrement
+ iterator& operator--()
+ {
+ --_iter;
+ return *this;
+ }
+
+ // Postdecrement
+ iterator operator--(int)
+ {
+ iterator prev(_iter);
+ --_iter;
+ return prev;
+ }
+
+ friend class interval_set::const_iterator;
+
+ protected:
+ typename Map::iterator _iter;
+ friend class interval_set;
+ };
+
+ class const_iterator
+ {
+ public:
+ using difference_type = ssize_t;
+ using value_type = const typename Map::value_type;
+ using pointer = const typename Map::value_type*;
+ using reference = const typename Map::value_type&;
+ using iterator_category = std::forward_iterator_tag;
+
+ explicit const_iterator(typename Map::const_iterator iter)
+ : _iter(iter)
+ { }
+
+ const_iterator(const iterator &i)
+ : _iter(i._iter)
+ { }
+
+ // For the copy constructor and assignment operator, the compiler-generated functions, which
+ // perform simple bitwise copying, should be fine.
+
+ bool operator==(const const_iterator& rhs) const {
+ return (_iter == rhs._iter);
+ }
+
+ bool operator!=(const const_iterator& rhs) const {
+ return (_iter != rhs._iter);
+ }
+
+ // Dereference this iterator to get a pair.
+ reference operator*() const {
+ return *_iter;
+ }
+
+ // Return the interval start.
+ offset_type get_start() const {
+ return _iter->first;
+ }
+ offset_type get_end() const {
+ return _iter->first + _iter->second;
+ }
+
+ // Return the interval length.
+ length_type get_len() const {
+ return _iter->second;
+ }
+
+ // Preincrement
+ const_iterator& operator++()
+ {
+ ++_iter;
+ return *this;
+ }
+
+ // Postincrement
+ const_iterator operator++(int)
+ {
+ const_iterator prev(_iter);
+ ++_iter;
+ return prev;
+ }
+
+ // Predecrement
+ iterator& operator--()
+ {
+ --_iter;
+ return *this;
+ }
+
+ // Postdecrement
+ iterator operator--(int)
+ {
+ iterator prev(_iter);
+ --_iter;
+ return prev;
+ }
+
+ protected:
+ typename Map::const_iterator _iter;
+ };
+
+ interval_set() = default;
+ interval_set(Map&& other) {
+ m.swap(other);
+ for (const auto& p : m) {
+ _size += p.second;
+ }
+ }
+
+ size_type num_intervals() const
+ {
+ return m.size();
+ }
+
+ iterator begin() {
+ return iterator(m.begin());
+ }
+
+ iterator lower_bound(T start) {
+ return iterator(find_inc_m(start));
+ }
+
+ iterator end() {
+ return iterator(m.end());
+ }
+
+ const_iterator begin() const {
+ return const_iterator(m.begin());
+ }
+
+ const_iterator lower_bound(T start) const {
+ return const_iterator(find_inc(start));
+ }
+
+ const_iterator end() const {
+ return const_iterator(m.end());
+ }
+
+ // helpers
+ private:
+ auto find_inc(T start) const {
+ auto p = m.lower_bound(start); // p->first >= start
+ if (p != m.begin() &&
+ (p == m.end() || p->first > start)) {
+ --p; // might overlap?
+ if (p->first + p->second <= start)
+ ++p; // it doesn't.
+ }
+ return p;
+ }
+
+ auto find_inc_m(T start) {
+ auto p = m.lower_bound(start);
+ if (p != m.begin() &&
+ (p == m.end() || p->first > start)) {
+ --p; // might overlap?
+ if (p->first + p->second <= start)
+ ++p; // it doesn't.
+ }
+ return p;
+ }
+
+ auto find_adj(T start) const {
+ auto p = m.lower_bound(start);
+ if (p != m.begin() &&
+ (p == m.end() || p->first > start)) {
+ --p; // might touch?
+ if (p->first + p->second < start)
+ ++p; // it doesn't.
+ }
+ return p;
+ }
+
+ auto find_adj_m(T start) {
+ auto p = m.lower_bound(start);
+ if (p != m.begin() &&
+ (p == m.end() || p->first > start)) {
+ --p; // might touch?
+ if (p->first + p->second < start)
+ ++p; // it doesn't.
+ }
+ return p;
+ }
+
+ void intersection_size_asym(const interval_set &s, const interval_set &l) {
+ auto ps = s.m.begin();
+ ceph_assert(ps != s.m.end());
+ auto offset = ps->first;
+ bool first = true;
+ auto mi = m.begin();
+
+ while (1) {
+ if (first)
+ first = false;
+ auto pl = l.find_inc(offset);
+ if (pl == l.m.end())
+ break;
+ while (ps != s.m.end() && ps->first + ps->second <= pl->first)
+ ++ps;
+ if (ps == s.m.end())
+ break;
+ offset = pl->first + pl->second;
+ if (offset <= ps->first) {
+ offset = ps->first;
+ continue;
+ }
+
+ if (*ps == *pl) {
+ do {
+ mi = m.insert(mi, *ps);
+ _size += ps->second;
+ ++ps;
+ ++pl;
+ } while (ps != s.m.end() && pl != l.m.end() && *ps == *pl);
+ if (ps == s.m.end())
+ break;
+ offset = ps->first;
+ continue;
+ }
+
+ auto start = std::max<T>(ps->first, pl->first);
+ auto en = std::min<T>(ps->first + ps->second, offset);
+ ceph_assert(en > start);
+ mi = m.emplace_hint(mi, start, en - start);
+ _size += mi->second;
+ if (ps->first + ps->second <= offset) {
+ ++ps;
+ if (ps == s.m.end())
+ break;
+ offset = ps->first;
+ }
+ }
+ }
+
+ bool subset_size_sym(const interval_set &b) const {
+ auto pa = m.begin(), pb = b.m.begin();
+ const auto a_end = m.end(), b_end = b.m.end();
+
+ while (pa != a_end && pb != b_end) {
+ while (pb->first + pb->second <= pa->first) {
+ ++pb;
+ if (pb == b_end)
+ return false;
+ }
+
+ if (*pa == *pb) {
+ do {
+ ++pa;
+ ++pb;
+ } while (pa != a_end && pb != b_end && *pa == *pb);
+ continue;
+ }
+
+ // interval begins before other
+ if (pa->first < pb->first)
+ return false;
+ // interval is longer than other
+ if (pa->first + pa->second > pb->first + pb->second)
+ return false;
+
+ ++pa;
+ }
+
+ return pa == a_end;
+ }
+
+ public:
+ bool operator==(const interval_set& other) const {
+ return _size == other._size && m == other.m;
+ }
+
+ uint64_t size() const {
+ return _size;
+ }
+
+ void bound_encode(size_t& p) const {
+ denc_traits<Map>::bound_encode(m, p);
+ }
+ void encode(ceph::buffer::list::contiguous_appender& p) const {
+ denc(m, p);
+ }
+ void decode(ceph::buffer::ptr::const_iterator& p) {
+ denc(m, p);
+ _size = 0;
+ for (const auto& p : m) {
+ _size += p.second;
+ }
+ }
+ void decode(ceph::buffer::list::iterator& p) {
+ denc(m, p);
+ _size = 0;
+ for (const auto& p : m) {
+ _size += p.second;
+ }
+ }
+
+ void encode_nohead(ceph::buffer::list::contiguous_appender& p) const {
+ denc_traits<Map>::encode_nohead(m, p);
+ }
+ void decode_nohead(int n, ceph::buffer::ptr::const_iterator& p) {
+ denc_traits<Map>::decode_nohead(n, m, p);
+ _size = 0;
+ for (const auto& p : m) {
+ _size += p.second;
+ }
+ }
+
+ void clear() {
+ m.clear();
+ _size = 0;
+ }
+
+ bool contains(T i, T *pstart=0, T *plen=0) const {
+ auto p = find_inc(i);
+ if (p == m.end()) return false;
+ if (p->first > i) return false;
+ if (p->first+p->second <= i) return false;
+ ceph_assert(p->first <= i && p->first+p->second > i);
+ if (pstart)
+ *pstart = p->first;
+ if (plen)
+ *plen = p->second;
+ return true;
+ }
+ bool contains(T start, T len) const {
+ auto p = find_inc(start);
+ if (p == m.end()) return false;
+ if (p->first > start) return false;
+ if (p->first+p->second <= start) return false;
+ ceph_assert(p->first <= start && p->first+p->second > start);
+ if (p->first+p->second < start+len) return false;
+ return true;
+ }
+ bool intersects(T start, T len) const {
+ interval_set a;
+ a.insert(start, len);
+ interval_set i;
+ i.intersection_of( *this, a );
+ if (i.empty()) return false;
+ return true;
+ }
+
+ // outer range of set
+ bool empty() const {
+ return m.empty();
+ }
+ offset_type range_start() const {
+ ceph_assert(!empty());
+ auto p = m.begin();
+ return p->first;
+ }
+ offset_type range_end() const {
+ ceph_assert(!empty());
+ auto p = m.rbegin();
+ return p->first + p->second;
+ }
+
+ // interval start after p (where p not in set)
+ bool starts_after(T i) const {
+ ceph_assert(!contains(i));
+ auto p = find_inc(i);
+ if (p == m.end()) return false;
+ return true;
+ }
+ offset_type start_after(T i) const {
+ ceph_assert(!contains(i));
+ auto p = find_inc(i);
+ return p->first;
+ }
+
+ // interval end that contains start
+ offset_type end_after(T start) const {
+ ceph_assert(contains(start));
+ auto p = find_inc(start);
+ return p->first+p->second;
+ }
+
+ void insert(T val) {
+ insert(val, 1);
+ }
+
+ void insert(T start, T len, T *pstart=0, T *plen=0) {
+ //cout << "insert " << start << "~" << len << endl;
+ ceph_assert(len > 0);
+ _size += len;
+ auto p = find_adj_m(start);
+ if (p == m.end()) {
+ m[start] = len; // new interval
+ if (pstart)
+ *pstart = start;
+ if (plen)
+ *plen = len;
+ } else {
+ if (p->first < start) {
+
+ if (p->first + p->second != start) {
+ //cout << "p is " << p->first << "~" << p->second << ", start is " << start << ", len is " << len << endl;
+ ceph_abort();
+ }
+
+ p->second += len; // append to end
+
+ auto n = p;
+ ++n;
+ if (pstart)
+ *pstart = p->first;
+ if (n != m.end() &&
+ start+len == n->first) { // combine with next, too!
+ p->second += n->second;
+ if (plen)
+ *plen = p->second;
+ m.erase(n);
+ } else {
+ if (plen)
+ *plen = p->second;
+ }
+ } else {
+ if (start+len == p->first) {
+ if (pstart)
+ *pstart = start;
+ if (plen)
+ *plen = len + p->second;
+ T psecond = p->second;
+ m.erase(p);
+ m[start] = len + psecond; // append to front
+ } else {
+ ceph_assert(p->first > start+len);
+ if (pstart)
+ *pstart = start;
+ if (plen)
+ *plen = len;
+ m[start] = len; // new interval
+ }
+ }
+ }
+ }
+
+ void swap(interval_set& other) {
+ m.swap(other.m);
+ std::swap(_size, other._size);
+ }
+
+ void erase(const iterator &i) {
+ _size -= i.get_len();
+ m.erase(i._iter);
+ }
+
+ void erase(T val) {
+ erase(val, 1);
+ }
+
+ void erase(T start, T len,
+ std::function<bool(T, T)> claim = {}) {
+ auto p = find_inc_m(start);
+
+ _size -= len;
+
+ ceph_assert(p != m.end());
+ ceph_assert(p->first <= start);
+
+ T before = start - p->first;
+ ceph_assert(p->second >= before+len);
+ T after = p->second - before - len;
+ if (before) {
+ if (claim && claim(p->first, before)) {
+ _size -= before;
+ m.erase(p);
+ } else {
+ p->second = before; // shorten bit before
+ }
+ } else {
+ m.erase(p);
+ }
+ if (after) {
+ if (claim && claim(start + len, after)) {
+ _size -= after;
+ } else {
+ m[start + len] = after;
+ }
+ }
+ }
+
+ void subtract(const interval_set &a) {
+ for (const auto& [start, len] : a.m) {
+ erase(start, len);
+ }
+ }
+
+ void insert(const interval_set &a) {
+ for (const auto& [start, len] : a.m) {
+ insert(start, len);
+ }
+ }
+
+
+ void intersection_of(const interval_set &a, const interval_set &b) {
+ ceph_assert(&a != this);
+ ceph_assert(&b != this);
+ clear();
+
+ const interval_set *s, *l;
+
+ if (a.size() < b.size()) {
+ s = &a;
+ l = &b;
+ } else {
+ s = &b;
+ l = &a;
+ }
+
+ if (!s->size())
+ return;
+
+ /*
+ * Use the lower_bound algorithm for larger size ratios
+ * where it performs better, but not for smaller size
+ * ratios where sequential search performs better.
+ */
+ if (l->size() / s->size() >= 10) {
+ intersection_size_asym(*s, *l);
+ return;
+ }
+
+ auto pa = a.m.begin();
+ auto pb = b.m.begin();
+ auto mi = m.begin();
+
+ while (pa != a.m.end() && pb != b.m.end()) {
+ // passing?
+ if (pa->first + pa->second <= pb->first)
+ { pa++; continue; }
+ if (pb->first + pb->second <= pa->first)
+ { pb++; continue; }
+
+ if (*pa == *pb) {
+ do {
+ mi = m.insert(mi, *pa);
+ _size += pa->second;
+ ++pa;
+ ++pb;
+ } while (pa != a.m.end() && pb != b.m.end() && *pa == *pb);
+ continue;
+ }
+
+ T start = std::max(pa->first, pb->first);
+ T en = std::min(pa->first+pa->second, pb->first+pb->second);
+ ceph_assert(en > start);
+ mi = m.emplace_hint(mi, start, en - start);
+ _size += mi->second;
+ if (pa->first+pa->second > pb->first+pb->second)
+ pb++;
+ else
+ pa++;
+ }
+ }
+ void intersection_of(const interval_set& b) {
+ interval_set a;
+ swap(a);
+ intersection_of(a, b);
+ }
+
+ void union_of(const interval_set &a, const interval_set &b) {
+ ceph_assert(&a != this);
+ ceph_assert(&b != this);
+ clear();
+
+ //cout << "union_of" << endl;
+
+ // a
+ m = a.m;
+ _size = a._size;
+
+ // - (a*b)
+ interval_set ab;
+ ab.intersection_of(a, b);
+ subtract(ab);
+
+ // + b
+ insert(b);
+ return;
+ }
+ void union_of(const interval_set &b) {
+ interval_set a;
+ swap(a);
+ union_of(a, b);
+ }
+ void union_insert(T off, T len) {
+ interval_set a;
+ a.insert(off, len);
+ union_of(a);
+ }
+
+ bool subset_of(const interval_set &big) const {
+ if (!size())
+ return true;
+ if (size() > big.size())
+ return false;
+ if (range_end() > big.range_end())
+ return false;
+
+ /*
+ * Use the lower_bound algorithm for larger size ratios
+ * where it performs better, but not for smaller size
+ * ratios where sequential search performs better.
+ */
+ if (big.size() / size() < 10)
+ return subset_size_sym(big);
+
+ for (const auto& [start, len] : m) {
+ if (!big.contains(start, len)) return false;
+ }
+ return true;
+ }
+
+ /*
+ * build a subset of @other, starting at or after @start, and including
+ * @len worth of values, skipping holes. e.g.,
+ * span_of([5~10,20~5], 8, 5) -> [8~2,20~3]
+ */
+ void span_of(const interval_set &other, T start, T len) {
+ clear();
+ auto p = other.find_inc(start);
+ if (p == other.m.end())
+ return;
+ if (p->first < start) {
+ if (p->first + p->second < start)
+ return;
+ if (p->first + p->second < start + len) {
+ T howmuch = p->second - (start - p->first);
+ insert(start, howmuch);
+ len -= howmuch;
+ p++;
+ } else {
+ insert(start, len);
+ return;
+ }
+ }
+ while (p != other.m.end() && len > 0) {
+ if (p->second < len) {
+ insert(p->first, p->second);
+ len -= p->second;
+ p++;
+ } else {
+ insert(p->first, len);
+ return;
+ }
+ }
+ }
+
+ /*
+ * Move contents of m into another Map. Use that instead of
+ * encoding interval_set into bufferlist then decoding it back into Map.
+ */
+ Map detach() && {
+ return std::move(m);
+ }
+
+private:
+ // data
+ uint64_t _size = 0;
+ Map m; // map start -> len
+};
+
+// declare traits explicitly because (1) it's templatized, and (2) we
+// want to include _nohead variants.
+template<typename T, template<typename, typename, typename ...> class C>
+struct denc_traits<interval_set<T, C>> {
+private:
+ using container_t = interval_set<T, C>;
+public:
+ static constexpr bool supported = true;
+ static constexpr bool bounded = false;
+ static constexpr bool featured = false;
+ static constexpr bool need_contiguous = denc_traits<T, C<T,T>>::need_contiguous;
+ static void bound_encode(const container_t& v, size_t& p) {
+ v.bound_encode(p);
+ }
+ static void encode(const container_t& v,
+ ceph::buffer::list::contiguous_appender& p) {
+ v.encode(p);
+ }
+ static void decode(container_t& v, ceph::buffer::ptr::const_iterator& p) {
+ v.decode(p);
+ }
+ template<typename U=T>
+ static typename std::enable_if<sizeof(U) && !need_contiguous>::type
+ decode(container_t& v, ceph::buffer::list::iterator& p) {
+ v.decode(p);
+ }
+ static void encode_nohead(const container_t& v,
+ ceph::buffer::list::contiguous_appender& p) {
+ v.encode_nohead(p);
+ }
+ static void decode_nohead(size_t n, container_t& v,
+ ceph::buffer::ptr::const_iterator& p) {
+ v.decode_nohead(n, p);
+ }
+};
+
+
+template<typename T, template<typename, typename, typename ...> class C>
+inline std::ostream& operator<<(std::ostream& out, const interval_set<T,C> &s) {
+ out << "[";
+ bool first = true;
+ for (const auto& [start, len] : s) {
+ if (!first) out << ",";
+ out << start << "~" << len;
+ first = false;
+ }
+ out << "]";
+ return out;
+}
+
+
+#endif