diff options
Diffstat (limited to 'src/test/osd')
27 files changed, 18024 insertions, 0 deletions
diff --git a/src/test/osd/CMakeLists.txt b/src/test/osd/CMakeLists.txt new file mode 100644 index 000000000..c9216c42d --- /dev/null +++ b/src/test/osd/CMakeLists.txt @@ -0,0 +1,141 @@ +# test_rados +add_executable(ceph_test_rados + TestRados.cc + TestOpStat.cc + Object.cc + RadosModel.cc + ) +target_link_libraries(ceph_test_rados + librados + global + ${BLKID_LIBRARIES} + ${CMAKE_DL_LIBS} + ${EXTRALIBS} + ${CMAKE_DL_LIBS} + cls_cas_internal + cls_cas_client + ) +install(TARGETS + ceph_test_rados + DESTINATION ${CMAKE_INSTALL_BINDIR}) + +# test_stale_read +add_executable(ceph_test_osd_stale_read + ceph_test_osd_stale_read.cc + ) +target_link_libraries(ceph_test_osd_stale_read + librados + global + ${CMAKE_DL_LIBS} + ${EXTRALIBS} + ${CMAKE_DL_LIBS} + ${UNITTEST_LIBS} + ) +install(TARGETS + ceph_test_osd_stale_read + DESTINATION ${CMAKE_INSTALL_BINDIR}) + +# scripts +add_ceph_test(safe-to-destroy.sh ${CMAKE_CURRENT_SOURCE_DIR}/safe-to-destroy.sh) + +# unittest_osdmap +add_executable(unittest_osdmap + TestOSDMap.cc + ) +add_ceph_unittest(unittest_osdmap) +target_link_libraries(unittest_osdmap global ${BLKID_LIBRARIES}) + +# unittest_osd_types +add_executable(unittest_osd_types + types.cc + ) +add_ceph_unittest(unittest_osd_types) +target_link_libraries(unittest_osd_types global) + +# unittest_ecbackend +add_executable(unittest_ecbackend + TestECBackend.cc + ) +add_ceph_unittest(unittest_ecbackend) +target_link_libraries(unittest_ecbackend osd global) + +# unittest_osdscrub +add_executable(unittest_osdscrub + TestOSDScrub.cc + $<TARGET_OBJECTS:unit-main> + ) +add_ceph_unittest(unittest_osdscrub) +target_link_libraries(unittest_osdscrub osd os global ${CMAKE_DL_LIBS} mon ${BLKID_LIBRARIES}) + +# unittest_scrubber_be +add_executable(unittest_scrubber_be + test_scrubber_be.cc + scrubber_generators.cc + scrubber_test_datasets.cc + ) +add_ceph_unittest(unittest_scrubber_be) +target_link_libraries(unittest_scrubber_be osd os global ${CMAKE_DL_LIBS} mon ${BLKID_LIBRARIES}) + +# unittest_scrub_sched +add_executable(unittest_scrub_sched + test_scrub_sched.cc + ) +add_ceph_unittest(unittest_scrub_sched) +target_link_libraries(unittest_scrub_sched osd os global ${CMAKE_DL_LIBS} mon ${BLKID_LIBRARIES}) + +# unittest_pglog +add_executable(unittest_pglog + TestPGLog.cc + $<TARGET_OBJECTS:unit-main> + $<TARGET_OBJECTS:store_test_fixture> + ) +add_ceph_unittest(unittest_pglog) +target_link_libraries(unittest_pglog osd os global ${CMAKE_DL_LIBS} ${BLKID_LIBRARIES}) + +# unittest_hitset +add_executable(unittest_hitset + hitset.cc + ) +add_ceph_unittest(unittest_hitset) +target_link_libraries(unittest_hitset osd global ${BLKID_LIBRARIES}) + +# unittest_osd_osdcap +add_executable(unittest_osd_osdcap + osdcap.cc +) +if(HAS_VTA) + set_source_files_properties(osdcap.cc PROPERTIES + COMPILE_FLAGS -fno-var-tracking-assignments) +endif() +add_ceph_unittest(unittest_osd_osdcap) +target_link_libraries(unittest_osd_osdcap osd global ${BLKID_LIBRARIES}) + +# unittest ExtentCache +add_executable(unittest_extent_cache + test_extent_cache.cc +) +add_ceph_unittest(unittest_extent_cache) +target_link_libraries(unittest_extent_cache osd global ${BLKID_LIBRARIES}) + +# unittest PGTransaction +add_executable(unittest_pg_transaction + test_pg_transaction.cc +) +add_ceph_unittest(unittest_pg_transaction) +target_link_libraries(unittest_pg_transaction osd global ${BLKID_LIBRARIES}) + +# unittest ECTransaction +add_executable(unittest_ec_transaction + test_ec_transaction.cc +) +add_ceph_unittest(unittest_ec_transaction) +target_link_libraries(unittest_ec_transaction osd global ${BLKID_LIBRARIES}) + +# unittest_mclock_scheduler +add_executable(unittest_mclock_scheduler + TestMClockScheduler.cc +) +add_ceph_unittest(unittest_mclock_scheduler) +target_link_libraries(unittest_mclock_scheduler + global osd dmclock os +) diff --git a/src/test/osd/Object.cc b/src/test/osd/Object.cc new file mode 100644 index 000000000..9d914abd7 --- /dev/null +++ b/src/test/osd/Object.cc @@ -0,0 +1,200 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +#include "include/interval_set.h" +#include "include/buffer.h" +#include <list> +#include <map> +#include <set> +#include <iostream> + +#include "Object.h" + +void ContDesc::encode(bufferlist &bl) const +{ + ENCODE_START(1, 1, bl); + encode(objnum, bl); + encode(cursnap, bl); + encode(seqnum, bl); + encode(prefix, bl); + encode(oid, bl); + ENCODE_FINISH(bl); +} + +void ContDesc::decode(bufferlist::const_iterator &bl) +{ + DECODE_START(1, bl); + decode(objnum, bl); + decode(cursnap, bl); + decode(seqnum, bl); + decode(prefix, bl); + decode(oid, bl); + DECODE_FINISH(bl); +} + +std::ostream &operator<<(std::ostream &out, const ContDesc &rhs) +{ + return out << "(ObjNum " << rhs.objnum + << " snap " << rhs.cursnap + << " seq_num " << rhs.seqnum + << ")"; +} + +void AppendGenerator::get_ranges_map( + const ContDesc &cont, std::map<uint64_t, uint64_t> &out) { + RandWrap rand(cont.seqnum); + uint64_t pos = off; + uint64_t limit = off + get_append_size(cont); + while (pos < limit) { + uint64_t segment_length = round_up( + rand() % (max_append_size - min_append_size), + alignment) + min_append_size; + ceph_assert(segment_length >= min_append_size); + if (segment_length + pos > limit) { + segment_length = limit - pos; + } + if (alignment) + ceph_assert(segment_length % alignment == 0); + out.insert(std::pair<uint64_t, uint64_t>(pos, segment_length)); + pos += segment_length; + } +} + +void VarLenGenerator::get_ranges_map( + const ContDesc &cont, std::map<uint64_t, uint64_t> &out) { + RandWrap rand(cont.seqnum); + uint64_t pos = 0; + uint64_t limit = get_length(cont); + bool include = false; + while (pos < limit) { + uint64_t segment_length = (rand() % (max_stride_size - min_stride_size)) + min_stride_size; + ceph_assert(segment_length < max_stride_size); + ceph_assert(segment_length >= min_stride_size); + if (segment_length + pos > limit) { + segment_length = limit - pos; + } + if (include) { + out.insert(std::pair<uint64_t, uint64_t>(pos, segment_length)); + include = false; + } else { + include = true; + } + pos += segment_length; + } +} + +void ObjectDesc::iterator::adjust_stack() { + while (!stack.empty() && pos >= stack.top().second.next) { + ceph_assert(pos == stack.top().second.next); + size = stack.top().second.size; + current = stack.top().first; + stack.pop(); + } + + if (stack.empty()) { + cur_valid_till = std::numeric_limits<uint64_t>::max(); + } else { + cur_valid_till = stack.top().second.next; + } + + while (current != layers.end() && !current->covers(pos)) { + uint64_t next = current->next(pos); + if (next < cur_valid_till) { + stack.emplace(current, StackState{next, size}); + cur_valid_till = next; + } + + ++current; + } + + if (current == layers.end()) { + size = 0; + } else { + current->iter.seek(pos); + size = std::min(size, current->get_size()); + cur_valid_till = std::min( + current->valid_till(pos), + cur_valid_till); + } +} + +const ContDesc &ObjectDesc::most_recent() { + return layers.begin()->second; +} + +void ObjectDesc::update(ContentsGenerator *gen, const ContDesc &next) { + layers.push_front(std::pair<std::shared_ptr<ContentsGenerator>, ContDesc>(std::shared_ptr<ContentsGenerator>(gen), next)); + return; +} + +bool ObjectDesc::check(bufferlist &to_check) { + iterator objiter = begin(); + uint64_t error_at = 0; + if (!objiter.check_bl_advance(to_check, &error_at)) { + std::cout << "incorrect buffer at pos " << error_at << std::endl; + return false; + } + + uint64_t size = layers.begin()->first->get_length(layers.begin()->second); + if (to_check.length() < size) { + std::cout << "only read " << to_check.length() + << " out of size " << size << std::endl; + return false; + } + return true; +} + +bool ObjectDesc::check_sparse(const std::map<uint64_t, uint64_t>& extents, + bufferlist &to_check) +{ + uint64_t off = 0; + uint64_t pos = 0; + auto objiter = begin(); + for (auto &&extiter : extents) { + // verify hole + { + bufferlist bl; + bl.append_zero(extiter.first - pos); + uint64_t error_at = 0; + if (!objiter.check_bl_advance(bl, &error_at)) { + std::cout << "sparse read omitted non-zero data at " + << error_at << std::endl; + return false; + } + } + + ceph_assert(off <= to_check.length()); + pos = extiter.first; + objiter.seek(pos); + + { + bufferlist bl; + bl.substr_of( + to_check, + off, + std::min(to_check.length() - off, extiter.second)); + uint64_t error_at = 0; + if (!objiter.check_bl_advance(bl, &error_at)) { + std::cout << "incorrect buffer at pos " << error_at << std::endl; + return false; + } + off += extiter.second; + pos += extiter.second; + } + + if (pos < extiter.first + extiter.second) { + std::cout << "reached end of iterator first" << std::endl; + return false; + } + } + + // final hole + bufferlist bl; + uint64_t size = layers.begin()->first->get_length(layers.begin()->second); + bl.append_zero(size - pos); + uint64_t error_at; + if (!objiter.check_bl_advance(bl, &error_at)) { + std::cout << "sparse read omitted non-zero data at " + << error_at << std::endl; + return false; + } + return true; +} diff --git a/src/test/osd/Object.h b/src/test/osd/Object.h new file mode 100644 index 000000000..76ce2d2a2 --- /dev/null +++ b/src/test/osd/Object.h @@ -0,0 +1,540 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +#include "include/interval_set.h" +#include "include/buffer.h" +#include "include/encoding.h" +#include <list> +#include <map> +#include <set> +#include <stack> +#include <random> + +#ifndef OBJECT_H +#define OBJECT_H + +/// describes an object +class ContDesc { +public: + int objnum; + int cursnap; + unsigned seqnum; + std::string prefix; + std::string oid; + + ContDesc() : + objnum(0), cursnap(0), + seqnum(0), prefix("") {} + + ContDesc(int objnum, + int cursnap, + unsigned seqnum, + const std::string &prefix) : + objnum(objnum), cursnap(cursnap), + seqnum(seqnum), prefix(prefix) {} + + bool operator==(const ContDesc &rhs) { + return (rhs.objnum == objnum && + rhs.cursnap == cursnap && + rhs.seqnum == seqnum && + rhs.prefix == prefix && + rhs.oid == oid); + } + + bool operator<(const ContDesc &rhs) const { + return seqnum < rhs.seqnum; + } + + bool operator!=(const ContDesc &rhs) { + return !((*this) == rhs); + } + void encode(bufferlist &bl) const; + void decode(bufferlist::const_iterator &bp); +}; +WRITE_CLASS_ENCODER(ContDesc) + +std::ostream &operator<<(std::ostream &out, const ContDesc &rhs); + +class ChunkDesc { +public: + uint32_t offset; + uint32_t length; + std::string oid; +}; + +class ContentsGenerator { +public: + + class iterator_impl { + public: + virtual char operator*() = 0; + virtual iterator_impl &operator++() = 0; + virtual void seek(uint64_t pos) = 0; + virtual bool end() = 0; + virtual ContDesc get_cont() const = 0; + virtual uint64_t get_pos() const = 0; + virtual bufferlist gen_bl_advance(uint64_t s) { + bufferptr ret = buffer::create(s); + for (uint64_t i = 0; i < s; ++i, ++(*this)) { + ret[i] = **this; + } + bufferlist _ret; + _ret.push_back(ret); + return _ret; + } + /// walk through given @c bl + /// + /// @param[out] off the offset of the first byte which does not match + /// @returns true if @c bl matches with the content, false otherwise + virtual bool check_bl_advance(bufferlist &bl, uint64_t *off = nullptr) { + uint64_t _off = 0; + for (bufferlist::iterator i = bl.begin(); + !i.end(); + ++i, ++_off, ++(*this)) { + if (*i != **this) { + if (off) + *off = _off; + return false; + } + } + return true; + } + virtual ~iterator_impl() {}; + }; + + class iterator { + public: + ContentsGenerator *parent; + iterator_impl *impl; + char operator *() { return **impl; } + iterator &operator++() { ++(*impl); return *this; }; + void seek(uint64_t pos) { impl->seek(pos); } + bool end() { return impl->end(); } + ~iterator() { parent->put_iterator_impl(impl); } + iterator(const iterator &rhs) : parent(rhs.parent) { + impl = parent->dup_iterator_impl(rhs.impl); + } + iterator &operator=(const iterator &rhs) { + iterator new_iter(rhs); + swap(new_iter); + return *this; + } + void swap(iterator &other) { + ContentsGenerator *otherparent = other.parent; + other.parent = parent; + parent = otherparent; + + iterator_impl *otherimpl = other.impl; + other.impl = impl; + impl = otherimpl; + } + bufferlist gen_bl_advance(uint64_t s) { + return impl->gen_bl_advance(s); + } + bool check_bl_advance(bufferlist &bl, uint64_t *off = nullptr) { + return impl->check_bl_advance(bl, off); + } + iterator(ContentsGenerator *parent, iterator_impl *impl) : + parent(parent), impl(impl) {} + }; + + virtual uint64_t get_length(const ContDesc &in) = 0; + + virtual void get_ranges_map( + const ContDesc &cont, std::map<uint64_t, uint64_t> &out) = 0; + void get_ranges(const ContDesc &cont, interval_set<uint64_t> &out) { + std::map<uint64_t, uint64_t> ranges; + get_ranges_map(cont, ranges); + for (std::map<uint64_t, uint64_t>::iterator i = ranges.begin(); + i != ranges.end(); + ++i) { + out.insert(i->first, i->second); + } + } + + + virtual iterator_impl *get_iterator_impl(const ContDesc &in) = 0; + + virtual iterator_impl *dup_iterator_impl(const iterator_impl *in) = 0; + + virtual void put_iterator_impl(iterator_impl *in) = 0; + + virtual ~ContentsGenerator() {}; + + iterator get_iterator(const ContDesc &in) { + return iterator(this, get_iterator_impl(in)); + } +}; + +class RandGenerator : public ContentsGenerator { +public: + typedef std::minstd_rand0 RandWrap; + + class iterator_impl : public ContentsGenerator::iterator_impl { + public: + uint64_t pos; + ContDesc cont; + RandWrap rand; + RandGenerator *cont_gen; + char current; + iterator_impl(const ContDesc &cont, RandGenerator *cont_gen) : + pos(0), cont(cont), rand(cont.seqnum), cont_gen(cont_gen) { + current = rand(); + } + + ContDesc get_cont() const override { return cont; } + uint64_t get_pos() const override { return pos; } + + iterator_impl &operator++() override { + pos++; + current = rand(); + return *this; + } + + char operator*() override { + return current; + } + + void seek(uint64_t _pos) override { + if (_pos < pos) { + iterator_impl begin = iterator_impl(cont, cont_gen); + begin.seek(_pos); + *this = begin; + } + while (pos < _pos) { + ++(*this); + } + } + + bool end() override { + return pos >= cont_gen->get_length(cont); + } + }; + + ContentsGenerator::iterator_impl *get_iterator_impl(const ContDesc &in) override { + RandGenerator::iterator_impl *i = new iterator_impl(in, this); + return i; + } + + void put_iterator_impl(ContentsGenerator::iterator_impl *in) override { + delete in; + } + + ContentsGenerator::iterator_impl *dup_iterator_impl( + const ContentsGenerator::iterator_impl *in) override { + ContentsGenerator::iterator_impl *retval = get_iterator_impl(in->get_cont()); + retval->seek(in->get_pos()); + return retval; + } +}; + +class VarLenGenerator : public RandGenerator { + uint64_t max_length; + uint64_t min_stride_size; + uint64_t max_stride_size; +public: + VarLenGenerator( + uint64_t length, uint64_t min_stride_size, uint64_t max_stride_size) : + max_length(length), + min_stride_size(min_stride_size), + max_stride_size(max_stride_size) {} + void get_ranges_map( + const ContDesc &cont, std::map<uint64_t, uint64_t> &out) override; + uint64_t get_length(const ContDesc &in) override { + RandWrap rand(in.seqnum); + if (max_length == 0) + return 0; + return (rand() % (max_length/2)) + ((max_length - 1)/2) + 1; + } +}; + +class AttrGenerator : public RandGenerator { + uint64_t max_len; + uint64_t big_max_len; +public: + AttrGenerator(uint64_t max_len, uint64_t big_max_len) + : max_len(max_len), big_max_len(big_max_len) {} + void get_ranges_map( + const ContDesc &cont, std::map<uint64_t, uint64_t> &out) override { + out.insert(std::pair<uint64_t, uint64_t>(0, get_length(cont))); + } + uint64_t get_length(const ContDesc &in) override { + RandWrap rand(in.seqnum); + // make some attrs big + if (in.seqnum & 3) + return (rand() % max_len); + else + return (rand() % big_max_len); + } + bufferlist gen_bl(const ContDesc &in) { + bufferlist bl; + for (iterator i = get_iterator(in); !i.end(); ++i) { + bl.append(*i); + } + ceph_assert(bl.length() < big_max_len); + return bl; + } +}; + +class AppendGenerator : public RandGenerator { + uint64_t off; + uint64_t alignment; + uint64_t min_append_size; + uint64_t max_append_size; + uint64_t max_append_total; + + uint64_t round_up(uint64_t in, uint64_t by) { + if (by) + in += (by - (in % by)); + return in; + } + +public: + AppendGenerator( + uint64_t off, + uint64_t alignment, + uint64_t min_append_size, + uint64_t _max_append_size, + uint64_t max_append_multiple) : + off(off), alignment(alignment), + min_append_size(round_up(min_append_size, alignment)), + max_append_size(round_up(_max_append_size, alignment)) { + if (_max_append_size == min_append_size) + max_append_size += alignment; + max_append_total = max_append_multiple * max_append_size; + } + uint64_t get_append_size(const ContDesc &in) { + RandWrap rand(in.seqnum); + return round_up(rand() % max_append_total, alignment); + } + uint64_t get_length(const ContDesc &in) override { + return off + get_append_size(in); + } + void get_ranges_map( + const ContDesc &cont, std::map<uint64_t, uint64_t> &out) override; +}; + +class ObjectDesc { +public: + ObjectDesc() + : exists(false), dirty(false), + version(0), flushed(false) {} + ObjectDesc(const ContDesc &init, ContentsGenerator *cont_gen) + : exists(false), dirty(false), + version(0), flushed(false) { + layers.push_front(std::pair<std::shared_ptr<ContentsGenerator>, ContDesc>(std::shared_ptr<ContentsGenerator>(cont_gen), init)); + } + + class iterator { + public: + uint64_t pos; + uint64_t size; + uint64_t cur_valid_till; + + class ContState { + interval_set<uint64_t> ranges; + const uint64_t size; + + public: + ContDesc cont; + std::shared_ptr<ContentsGenerator> gen; + ContentsGenerator::iterator iter; + + ContState( + const ContDesc &_cont, + std::shared_ptr<ContentsGenerator> _gen, + ContentsGenerator::iterator _iter) + : size(_gen->get_length(_cont)), cont(_cont), gen(_gen), iter(_iter) { + gen->get_ranges(cont, ranges); + } + + const interval_set<uint64_t> &get_ranges() { + return ranges; + } + + uint64_t get_size() { + return gen->get_length(cont); + } + + bool covers(uint64_t pos) { + return ranges.contains(pos) || (!ranges.starts_after(pos) && pos >= size); + } + + uint64_t next(uint64_t pos) { + ceph_assert(!covers(pos)); + return ranges.starts_after(pos) ? ranges.start_after(pos) : size; + } + + uint64_t valid_till(uint64_t pos) { + ceph_assert(covers(pos)); + return ranges.contains(pos) ? + ranges.end_after(pos) : + std::numeric_limits<uint64_t>::max(); + } + }; + // from latest to earliest + using layers_t = std::vector<ContState>; + layers_t layers; + + struct StackState { + const uint64_t next; + const uint64_t size; + }; + std::stack<std::pair<layers_t::iterator, StackState> > stack; + layers_t::iterator current; + + explicit iterator(ObjectDesc &obj) : + pos(0), + size(obj.layers.begin()->first->get_length(obj.layers.begin()->second)), + cur_valid_till(0) { + for (auto &&i : obj.layers) { + layers.push_back({i.second, i.first, i.first->get_iterator(i.second)}); + } + current = layers.begin(); + + adjust_stack(); + } + + void adjust_stack(); + iterator &operator++() { + ceph_assert(cur_valid_till >= pos); + ++pos; + if (pos >= cur_valid_till) { + adjust_stack(); + } + return *this; + } + + char operator*() { + if (current == layers.end()) { + return '\0'; + } else { + return pos >= size ? '\0' : *(current->iter); + } + } + + bool end() { + return pos >= size; + } + + // advance @c pos to given position + void seek(uint64_t _pos) { + if (_pos < pos) { + ceph_abort(); + } + while (pos < _pos) { + ceph_assert(cur_valid_till >= pos); + uint64_t next = std::min(_pos - pos, cur_valid_till - pos); + pos += next; + + if (pos >= cur_valid_till) { + ceph_assert(pos == cur_valid_till); + adjust_stack(); + } + } + ceph_assert(pos == _pos); + } + + // grab the bytes in the range of [pos, pos+s), and advance @c pos + // + // @returns the bytes in the specified range + bufferlist gen_bl_advance(uint64_t s) { + bufferlist ret; + while (s > 0) { + ceph_assert(cur_valid_till >= pos); + uint64_t next = std::min(s, cur_valid_till - pos); + if (current != layers.end() && pos < size) { + ret.append(current->iter.gen_bl_advance(next)); + } else { + ret.append_zero(next); + } + + pos += next; + ceph_assert(next <= s); + s -= next; + + if (pos >= cur_valid_till) { + ceph_assert(cur_valid_till == pos); + adjust_stack(); + } + } + return ret; + } + + // compare the range of [pos, pos+bl.length()) with given @c bl, and + // advance @pos if all bytes in the range match + // + // @param error_at the offset of the first byte which does not match + // @returns true if all bytes match, false otherwise + bool check_bl_advance(bufferlist &bl, uint64_t *error_at = nullptr) { + uint64_t off = 0; + while (off < bl.length()) { + ceph_assert(cur_valid_till >= pos); + uint64_t next = std::min(bl.length() - off, cur_valid_till - pos); + + bufferlist to_check; + to_check.substr_of(bl, off, next); + if (current != layers.end() && pos < size) { + if (!current->iter.check_bl_advance(to_check, error_at)) { + if (error_at) + *error_at += off; + return false; + } + } else { + uint64_t at = pos; + for (auto i = to_check.begin(); !i.end(); ++i, ++at) { + if (*i) { + if (error_at) + *error_at = at; + return false; + } + } + } + + pos += next; + off += next; + ceph_assert(off <= bl.length()); + + if (pos >= cur_valid_till) { + ceph_assert(cur_valid_till == pos); + adjust_stack(); + } + } + ceph_assert(off == bl.length()); + return true; + } + }; + + iterator begin() { + return iterator(*this); + } + + bool deleted() { + return !exists; + } + + bool has_contents() { + return layers.size(); + } + + // takes ownership of gen + void update(ContentsGenerator *gen, const ContDesc &next); + bool check(bufferlist &to_check); + bool check_sparse(const std::map<uint64_t, uint64_t>& extends, + bufferlist &to_check); + const ContDesc &most_recent(); + ContentsGenerator *most_recent_gen() { + return layers.begin()->first.get(); + } + std::map<std::string, ContDesc> attrs; // Both omap and xattrs + bufferlist header; + bool exists; + bool dirty; + + uint64_t version; + std::string redirect_target; + std::map<uint64_t, ChunkDesc> chunk_info; + bool flushed; +private: + std::list<std::pair<std::shared_ptr<ContentsGenerator>, ContDesc> > layers; +}; + +#endif diff --git a/src/test/osd/RadosModel.cc b/src/test/osd/RadosModel.cc new file mode 100644 index 000000000..501bf3b13 --- /dev/null +++ b/src/test/osd/RadosModel.cc @@ -0,0 +1,36 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#include "include/interval_set.h" +#include "include/buffer.h" +#include <list> +#include <map> +#include <set> +#include "include/rados/librados.h" +#include "RadosModel.h" +#include "TestOpStat.h" + + +void TestOp::begin() +{ + _begin(); +} + +void TestOp::finish(TestOp::CallbackInfo *info) +{ + _finish(info); +} + +void read_callback(librados::completion_t comp, void *arg) { + TestOp* op = static_cast<TestOp*>(arg); + op->finish(NULL); +} + +void write_callback(librados::completion_t comp, void *arg) { + std::pair<TestOp*, TestOp::CallbackInfo*> *args = + static_cast<std::pair<TestOp*, TestOp::CallbackInfo*> *>(arg); + TestOp* op = args->first; + TestOp::CallbackInfo *info = args->second; + op->finish(info); + delete args; + delete info; +} diff --git a/src/test/osd/RadosModel.h b/src/test/osd/RadosModel.h new file mode 100644 index 000000000..1e5d0e908 --- /dev/null +++ b/src/test/osd/RadosModel.h @@ -0,0 +1,3520 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#include "include/int_types.h" + +#include "common/ceph_mutex.h" +#include "include/rados/librados.hpp" + +#include <iostream> +#include <iterator> +#include <sstream> +#include <map> +#include <set> +#include <list> +#include <string> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <time.h> +#include "Object.h" +#include "TestOpStat.h" +#include "test/librados/test.h" +#include "common/sharedptr_registry.hpp" +#include "common/errno.h" +#include "osd/HitSet.h" +#include "common/ceph_crypto.h" + +#include "cls/cas/cls_cas_client.h" +#include "cls/cas/cls_cas_internal.h" + +#ifndef RADOSMODEL_H +#define RADOSMODEL_H + +class RadosTestContext; +class TestOpStat; + +template <typename T> +typename T::iterator rand_choose(T &cont) { + if (std::empty(cont)) { + return std::end(cont); + } + return std::next(std::begin(cont), rand() % cont.size()); +} + +enum TestOpType { + TEST_OP_READ, + TEST_OP_WRITE, + TEST_OP_WRITE_EXCL, + TEST_OP_WRITESAME, + TEST_OP_DELETE, + TEST_OP_SNAP_CREATE, + TEST_OP_SNAP_REMOVE, + TEST_OP_ROLLBACK, + TEST_OP_SETATTR, + TEST_OP_RMATTR, + TEST_OP_WATCH, + TEST_OP_COPY_FROM, + TEST_OP_HIT_SET_LIST, + TEST_OP_UNDIRTY, + TEST_OP_IS_DIRTY, + TEST_OP_CACHE_FLUSH, + TEST_OP_CACHE_TRY_FLUSH, + TEST_OP_CACHE_EVICT, + TEST_OP_APPEND, + TEST_OP_APPEND_EXCL, + TEST_OP_SET_REDIRECT, + TEST_OP_UNSET_REDIRECT, + TEST_OP_CHUNK_READ, + TEST_OP_TIER_PROMOTE, + TEST_OP_TIER_FLUSH, + TEST_OP_SET_CHUNK, + TEST_OP_TIER_EVICT +}; + +class TestWatchContext : public librados::WatchCtx2 { + TestWatchContext(const TestWatchContext&); +public: + ceph::condition_variable cond; + uint64_t handle = 0; + bool waiting = false; + ceph::mutex lock = ceph::make_mutex("watch lock"); + TestWatchContext() = default; + void handle_notify(uint64_t notify_id, uint64_t cookie, + uint64_t notifier_id, + bufferlist &bl) override { + std::lock_guard l{lock}; + waiting = false; + cond.notify_all(); + } + void handle_error(uint64_t cookie, int err) override { + std::lock_guard l{lock}; + std::cout << "watch handle_error " << err << std::endl; + } + void start() { + std::lock_guard l{lock}; + waiting = true; + } + void wait() { + std::unique_lock l{lock}; + cond.wait(l, [this] { return !waiting; }); + } + uint64_t &get_handle() { + return handle; + } +}; + +class TestOp { +public: + const int num; + RadosTestContext *context; + TestOpStat *stat; + bool done = false; + TestOp(int n, RadosTestContext *context, + TestOpStat *stat = 0) + : num(n), + context(context), + stat(stat) + {} + + virtual ~TestOp() {}; + + /** + * This struct holds data to be passed by a callback + * to a TestOp::finish method. + */ + struct CallbackInfo { + uint64_t id; + explicit CallbackInfo(uint64_t id) : id(id) {} + virtual ~CallbackInfo() {}; + }; + + virtual void _begin() = 0; + + /** + * Called when the operation completes. + * This should be overridden by asynchronous operations. + * + * @param info information stored by a callback, or NULL - + * useful for multi-operation TestOps + */ + virtual void _finish(CallbackInfo *info) + { + return; + } + virtual std::string getType() = 0; + virtual bool finished() + { + return true; + } + + void begin(); + void finish(CallbackInfo *info); + virtual bool must_quiesce_other_ops() { return false; } +}; + +class TestOpGenerator { +public: + virtual ~TestOpGenerator() {}; + virtual TestOp *next(RadosTestContext &context) = 0; +}; + +class RadosTestContext { +public: + ceph::mutex state_lock = ceph::make_mutex("Context Lock"); + ceph::condition_variable wait_cond; + // snap => {oid => desc} + std::map<int, std::map<std::string,ObjectDesc> > pool_obj_cont; + std::set<std::string> oid_in_use; + std::set<std::string> oid_not_in_use; + std::set<std::string> oid_flushing; + std::set<std::string> oid_not_flushing; + std::set<std::string> oid_redirect_not_in_use; + std::set<std::string> oid_redirect_in_use; + std::set<std::string> oid_set_chunk_tgt_pool; + SharedPtrRegistry<int, int> snaps_in_use; + int current_snap; + std::string pool_name; + librados::IoCtx io_ctx; + librados::Rados rados; + int next_oid; + std::string prefix; + int errors; + int max_in_flight; + int seq_num; + std::map<int,uint64_t> snaps; + uint64_t seq; + const char *rados_id; + bool initialized; + std::map<std::string, TestWatchContext*> watches; + const uint64_t max_size; + const uint64_t min_stride_size; + const uint64_t max_stride_size; + AttrGenerator attr_gen; + const bool no_omap; + const bool no_sparse; + bool pool_snaps; + bool write_fadvise_dontneed; + std::string low_tier_pool_name; + librados::IoCtx low_tier_io_ctx; + int snapname_num; + std::map<std::string, std::string> redirect_objs; + bool enable_dedup; + std::string chunk_algo; + std::string chunk_size; + + RadosTestContext(const std::string &pool_name, + int max_in_flight, + uint64_t max_size, + uint64_t min_stride_size, + uint64_t max_stride_size, + bool no_omap, + bool no_sparse, + bool pool_snaps, + bool write_fadvise_dontneed, + const std::string &low_tier_pool_name, + bool enable_dedup, + std::string chunk_algo, + std::string chunk_size, + const char *id = 0) : + pool_obj_cont(), + current_snap(0), + pool_name(pool_name), + next_oid(0), + errors(0), + max_in_flight(max_in_flight), + seq_num(0), seq(0), + rados_id(id), initialized(false), + max_size(max_size), + min_stride_size(min_stride_size), max_stride_size(max_stride_size), + attr_gen(2000, 20000), + no_omap(no_omap), + no_sparse(no_sparse), + pool_snaps(pool_snaps), + write_fadvise_dontneed(write_fadvise_dontneed), + low_tier_pool_name(low_tier_pool_name), + snapname_num(0), + enable_dedup(enable_dedup), + chunk_algo(chunk_algo), + chunk_size(chunk_size) + { + } + + int init() + { + int r = rados.init(rados_id); + if (r < 0) + return r; + r = rados.conf_read_file(NULL); + if (r < 0) + return r; + r = rados.conf_parse_env(NULL); + if (r < 0) + return r; + r = rados.connect(); + if (r < 0) + return r; + r = rados.ioctx_create(pool_name.c_str(), io_ctx); + if (r < 0) { + rados.shutdown(); + return r; + } + if (!low_tier_pool_name.empty()) { + r = rados.ioctx_create(low_tier_pool_name.c_str(), low_tier_io_ctx); + if (r < 0) { + rados.shutdown(); + return r; + } + } + bufferlist inbl; + r = rados.mon_command( + "{\"prefix\": \"osd pool set\", \"pool\": \"" + pool_name + + "\", \"var\": \"write_fadvise_dontneed\", \"val\": \"" + (write_fadvise_dontneed ? "true" : "false") + "\"}", + inbl, NULL, NULL); + if (r < 0) { + rados.shutdown(); + return r; + } + if (enable_dedup) { + r = rados.mon_command( + "{\"prefix\": \"osd pool set\", \"pool\": \"" + pool_name + + "\", \"var\": \"fingerprint_algorithm\", \"val\": \"" + "sha256" + "\"}", + inbl, NULL, NULL); + if (r < 0) { + rados.shutdown(); + return r; + } + r = rados.mon_command( + "{\"prefix\": \"osd pool set\", \"pool\": \"" + pool_name + + "\", \"var\": \"dedup_tier\", \"val\": \"" + low_tier_pool_name + "\"}", + inbl, NULL, NULL); + if (r < 0) { + rados.shutdown(); + return r; + } + r = rados.mon_command( + "{\"prefix\": \"osd pool set\", \"pool\": \"" + pool_name + + "\", \"var\": \"dedup_chunk_algorithm\", \"val\": \"" + chunk_algo + "\"}", + inbl, NULL, NULL); + if (r < 0) { + rados.shutdown(); + return r; + } + r = rados.mon_command( + "{\"prefix\": \"osd pool set\", \"pool\": \"" + pool_name + + "\", \"var\": \"dedup_cdc_chunk_size\", \"val\": \"" + chunk_size + "\"}", + inbl, NULL, NULL); + if (r < 0) { + rados.shutdown(); + return r; + } + } + + char hostname_cstr[100]; + gethostname(hostname_cstr, 100); + std::stringstream hostpid; + hostpid << hostname_cstr << getpid() << "-"; + prefix = hostpid.str(); + ceph_assert(!initialized); + initialized = true; + return 0; + } + + void shutdown() + { + if (initialized) { + rados.shutdown(); + } + } + + void loop(TestOpGenerator *gen) + { + ceph_assert(initialized); + std::list<TestOp*> inflight; + std::unique_lock state_locker{state_lock}; + + TestOp *next = gen->next(*this); + TestOp *waiting = NULL; + + while (next || !inflight.empty()) { + if (next && next->must_quiesce_other_ops() && !inflight.empty()) { + waiting = next; + next = NULL; // Force to wait for inflight to drain + } + if (next) { + inflight.push_back(next); + } + state_lock.unlock(); + if (next) { + (*inflight.rbegin())->begin(); + } + state_lock.lock(); + while (1) { + for (auto i = inflight.begin(); + i != inflight.end();) { + if ((*i)->finished()) { + std::cout << (*i)->num << ": done (" << (inflight.size()-1) << " left)" << std::endl; + delete *i; + inflight.erase(i++); + } else { + ++i; + } + } + + if (inflight.size() >= (unsigned) max_in_flight || (!next && !inflight.empty())) { + std::cout << " waiting on " << inflight.size() << std::endl; + wait_cond.wait(state_locker); + } else { + break; + } + } + if (waiting) { + next = waiting; + waiting = NULL; + } else { + next = gen->next(*this); + } + } + } + + void kick() + { + wait_cond.notify_all(); + } + + TestWatchContext *get_watch_context(const std::string &oid) { + return watches.count(oid) ? watches[oid] : 0; + } + + TestWatchContext *watch(const std::string &oid) { + ceph_assert(!watches.count(oid)); + return (watches[oid] = new TestWatchContext); + } + + void unwatch(const std::string &oid) { + ceph_assert(watches.count(oid)); + delete watches[oid]; + watches.erase(oid); + } + + ObjectDesc get_most_recent(const std::string &oid) { + ObjectDesc new_obj; + for (auto i = pool_obj_cont.rbegin(); + i != pool_obj_cont.rend(); + ++i) { + std::map<std::string,ObjectDesc>::iterator j = i->second.find(oid); + if (j != i->second.end()) { + new_obj = j->second; + break; + } + } + return new_obj; + } + + void rm_object_attrs(const std::string &oid, const std::set<std::string> &attrs) + { + ObjectDesc new_obj = get_most_recent(oid); + for (std::set<std::string>::const_iterator i = attrs.begin(); + i != attrs.end(); + ++i) { + new_obj.attrs.erase(*i); + } + new_obj.dirty = true; + new_obj.flushed = false; + pool_obj_cont[current_snap].insert_or_assign(oid, new_obj); + } + + void remove_object_header(const std::string &oid) + { + ObjectDesc new_obj = get_most_recent(oid); + new_obj.header = bufferlist(); + new_obj.dirty = true; + new_obj.flushed = false; + pool_obj_cont[current_snap].insert_or_assign(oid, new_obj); + } + + + void update_object_header(const std::string &oid, const bufferlist &bl) + { + ObjectDesc new_obj = get_most_recent(oid); + new_obj.header = bl; + new_obj.exists = true; + new_obj.dirty = true; + new_obj.flushed = false; + pool_obj_cont[current_snap].insert_or_assign(oid, new_obj); + } + + void update_object_attrs(const std::string &oid, const std::map<std::string, ContDesc> &attrs) + { + ObjectDesc new_obj = get_most_recent(oid); + for (auto i = attrs.cbegin(); + i != attrs.cend(); + ++i) { + new_obj.attrs[i->first] = i->second; + } + new_obj.exists = true; + new_obj.dirty = true; + new_obj.flushed = false; + pool_obj_cont[current_snap].insert_or_assign(oid, new_obj); + } + + void update_object(ContentsGenerator *cont_gen, + const std::string &oid, const ContDesc &contents) + { + ObjectDesc new_obj = get_most_recent(oid); + new_obj.exists = true; + new_obj.dirty = true; + new_obj.flushed = false; + new_obj.update(cont_gen, + contents); + pool_obj_cont[current_snap].insert_or_assign(oid, new_obj); + } + + void update_object_full(const std::string &oid, const ObjectDesc &contents) + { + pool_obj_cont[current_snap].insert_or_assign(oid, contents); + pool_obj_cont[current_snap][oid].dirty = true; + } + + void update_object_undirty(const std::string &oid) + { + ObjectDesc new_obj = get_most_recent(oid); + new_obj.dirty = false; + pool_obj_cont[current_snap].insert_or_assign(oid, new_obj); + } + + void update_object_version(const std::string &oid, uint64_t version, + int snap = -1) + { + for (auto i = pool_obj_cont.rbegin(); + i != pool_obj_cont.rend(); + ++i) { + if (snap != -1 && snap < i->first) + continue; + std::map<std::string,ObjectDesc>::iterator j = i->second.find(oid); + if (j != i->second.end()) { + if (version) + j->second.version = version; + std::cout << __func__ << " oid " << oid + << " v " << version << " " << j->second.most_recent() + << " " << (j->second.dirty ? "dirty" : "clean") + << " " << (j->second.exists ? "exists" : "dne") + << std::endl; + break; + } + } + } + + void remove_object(const std::string &oid) + { + ceph_assert(!get_watch_context(oid)); + ObjectDesc new_obj; + pool_obj_cont[current_snap].insert_or_assign(oid, new_obj); + } + + bool find_object(const std::string &oid, ObjectDesc *contents, int snap = -1) const + { + for (auto i = pool_obj_cont.crbegin(); + i != pool_obj_cont.crend(); + ++i) { + if (snap != -1 && snap < i->first) continue; + if (i->second.count(oid) != 0) { + *contents = i->second.find(oid)->second; + return true; + } + } + return false; + } + + void update_object_redirect_target(const std::string &oid, const std::string &target) + { + redirect_objs[oid] = target; + } + + void update_object_chunk_target(const std::string &oid, uint64_t offset, const ChunkDesc &info) + { + for (auto i = pool_obj_cont.crbegin(); + i != pool_obj_cont.crend(); + ++i) { + if (i->second.count(oid) != 0) { + ObjectDesc obj_desc = i->second.find(oid)->second; + obj_desc.chunk_info[offset] = info; + update_object_full(oid, obj_desc); + return ; + } + } + return; + } + + bool object_existed_at(const std::string &oid, int snap = -1) const + { + ObjectDesc contents; + bool found = find_object(oid, &contents, snap); + return found && contents.exists; + } + + void remove_snap(int snap) + { + std::map<int, std::map<std::string,ObjectDesc> >::iterator next_iter = pool_obj_cont.find(snap); + ceph_assert(next_iter != pool_obj_cont.end()); + std::map<int, std::map<std::string,ObjectDesc> >::iterator current_iter = next_iter++; + ceph_assert(current_iter != pool_obj_cont.end()); + std::map<std::string,ObjectDesc> ¤t = current_iter->second; + std::map<std::string,ObjectDesc> &next = next_iter->second; + for (auto i = current.begin(); i != current.end(); ++i) { + if (next.count(i->first) == 0) { + next.insert(std::pair<std::string,ObjectDesc>(i->first, i->second)); + } + } + pool_obj_cont.erase(current_iter); + snaps.erase(snap); + } + + void add_snap(uint64_t snap) + { + snaps[current_snap] = snap; + current_snap++; + pool_obj_cont[current_snap]; + seq = snap; + } + + void roll_back(const std::string &oid, int snap) + { + ceph_assert(!get_watch_context(oid)); + ObjectDesc contents; + find_object(oid, &contents, snap); + contents.dirty = true; + contents.flushed = false; + pool_obj_cont.rbegin()->second.insert_or_assign(oid, contents); + } + + void update_object_tier_flushed(const std::string &oid, int snap) + { + for (auto i = pool_obj_cont.rbegin(); + i != pool_obj_cont.rend(); + ++i) { + if (snap != -1 && snap < i->first) + continue; + std::map<std::string,ObjectDesc>::iterator j = i->second.find(oid); + if (j != i->second.end()) { + j->second.flushed = true; + break; + } + } + } + + bool check_oldest_snap_flushed(const std::string &oid, int snap) + { + for (auto i = pool_obj_cont.rbegin(); + i != pool_obj_cont.rend(); + ++i) { + if (snap != -1 && snap < i->first) + continue; + std::map<std::string,ObjectDesc>::iterator j = i->second.find(oid); + if (j != i->second.end() && !j->second.flushed) { + std::cout << __func__ << " oid " << oid + << " v " << j->second.version << " " << j->second.most_recent() + << " " << (j->second.flushed ? "flushed" : "unflushed") + << " " << i->first << std::endl; + return false; + } + } + return true; + } + + bool check_chunks_refcount(librados::IoCtx &chunk_pool_ctx, librados::IoCtx &manifest_pool_ctx) + { + librados::ObjectCursor shard_start; + librados::ObjectCursor shard_end; + librados::ObjectCursor begin; + librados::ObjectCursor end; + begin = chunk_pool_ctx.object_list_begin(); + end = chunk_pool_ctx.object_list_end(); + + chunk_pool_ctx.object_list_slice( + begin, + end, + 1, + 1, + &shard_start, + &shard_end); + + librados::ObjectCursor c(shard_start); + while(c < shard_end) + { + std::vector<librados::ObjectItem> result; + int r = chunk_pool_ctx.object_list(c, shard_end, 12, {}, &result, &c); + if (r < 0) { + std::cerr << "error object_list : " << cpp_strerror(r) << std::endl; + return false; + } + + for (const auto & i : result) { + auto oid = i.oid; + chunk_refs_t refs; + { + bufferlist t; + r = chunk_pool_ctx.getxattr(oid, CHUNK_REFCOUNT_ATTR, t); + if (r < 0) { + continue; + } + auto p = t.cbegin(); + decode(refs, p); + } + ceph_assert(refs.get_type() == chunk_refs_t::TYPE_BY_OBJECT); + + chunk_refs_by_object_t *byo = + static_cast<chunk_refs_by_object_t*>(refs.r.get()); + + for (auto& pp : byo->by_object) { + int src_refcount = 0; + int dst_refcount = byo->by_object.count(pp); + for (int tries = 0; tries < 10; tries++) { + r = cls_cas_references_chunk(manifest_pool_ctx, pp.oid.name, oid); + if (r == -ENOENT || r == -ENOLINK) { + src_refcount = 0; + } else if (r == -EBUSY) { + sleep(10); + continue; + } else { + src_refcount = r; + } + break; + } + if (src_refcount > dst_refcount) { + std::cerr << " src_object " << pp + << ": src_refcount " << src_refcount + << ", dst_object " << oid + << ": dst_refcount " << dst_refcount + << std::endl; + return false; + } + } + } + } + return true; + } +}; + +void read_callback(librados::completion_t comp, void *arg); +void write_callback(librados::completion_t comp, void *arg); + +/// remove random xattrs from given object, and optionally remove omap +/// entries if @c no_omap is not specified in context +class RemoveAttrsOp : public TestOp { +public: + std::string oid; + librados::ObjectWriteOperation op; + librados::AioCompletion *comp; + RemoveAttrsOp(int n, RadosTestContext *context, + const std::string &oid, + TestOpStat *stat) + : TestOp(n, context, stat), oid(oid), comp(NULL) + {} + + void _begin() override + { + ContDesc cont; + std::set<std::string> to_remove; + { + std::lock_guard l{context->state_lock}; + ObjectDesc obj; + if (!context->find_object(oid, &obj)) { + context->kick(); + done = true; + return; + } + cont = ContDesc(context->seq_num, context->current_snap, + context->seq_num, ""); + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + + if (rand() % 30) { + ContentsGenerator::iterator iter = context->attr_gen.get_iterator(cont); + for (auto i = obj.attrs.begin(); + i != obj.attrs.end(); + ++i, ++iter) { + if (!(*iter % 3)) { + to_remove.insert(i->first); + op.rmxattr(i->first.c_str()); + } + } + if (to_remove.empty()) { + context->kick(); + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + done = true; + return; + } + if (!context->no_omap) { + op.omap_rm_keys(to_remove); + } + } else { + if (!context->no_omap) { + op.omap_clear(); + } + for (auto i = obj.attrs.begin(); + i != obj.attrs.end(); + ++i) { + op.rmxattr(i->first.c_str()); + to_remove.insert(i->first); + } + context->remove_object_header(oid); + } + context->rm_object_attrs(oid, to_remove); + } + + std::pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + comp = context->rados.aio_create_completion((void*) cb_arg, + &write_callback); + context->io_ctx.aio_operate(context->prefix+oid, comp, &op); + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard l{context->state_lock}; + done = true; + context->update_object_version(oid, comp->get_version64()); + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + context->kick(); + } + + bool finished() override + { + return done; + } + + std::string getType() override + { + return "RemoveAttrsOp"; + } +}; + +/// add random xattrs to given object, and optionally add omap +/// entries if @c no_omap is not specified in context +class SetAttrsOp : public TestOp { +public: + std::string oid; + librados::ObjectWriteOperation op; + librados::AioCompletion *comp; + SetAttrsOp(int n, + RadosTestContext *context, + const std::string &oid, + TestOpStat *stat) + : TestOp(n, context, stat), + oid(oid), comp(NULL) + {} + + void _begin() override + { + ContDesc cont; + { + std::lock_guard l{context->state_lock}; + cont = ContDesc(context->seq_num, context->current_snap, + context->seq_num, ""); + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + } + + std::map<std::string, bufferlist> omap_contents; + std::map<std::string, ContDesc> omap; + bufferlist header; + ContentsGenerator::iterator keygen = context->attr_gen.get_iterator(cont); + op.create(false); + while (!*keygen) ++keygen; + while (*keygen) { + if (*keygen != '_') + header.append(*keygen); + ++keygen; + } + for (int i = 0; i < 20; ++i) { + std::string key; + while (!*keygen) ++keygen; + while (*keygen && key.size() < 40) { + key.push_back((*keygen % 20) + 'a'); + ++keygen; + } + ContDesc val(cont); + val.seqnum += (unsigned)(*keygen); + val.prefix = ("oid: " + oid); + omap[key] = val; + bufferlist val_buffer = context->attr_gen.gen_bl(val); + omap_contents[key] = val_buffer; + op.setxattr(key.c_str(), val_buffer); + } + if (!context->no_omap) { + op.omap_set_header(header); + op.omap_set(omap_contents); + } + + { + std::lock_guard l{context->state_lock}; + context->update_object_header(oid, header); + context->update_object_attrs(oid, omap); + } + + std::pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + comp = context->rados.aio_create_completion((void*) cb_arg, &write_callback); + context->io_ctx.aio_operate(context->prefix+oid, comp, &op); + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard l{context->state_lock}; + int r; + if ((r = comp->get_return_value())) { + std::cerr << "err " << r << std::endl; + ceph_abort(); + } + done = true; + context->update_object_version(oid, comp->get_version64()); + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + context->kick(); + } + + bool finished() override + { + return done; + } + + std::string getType() override + { + return "SetAttrsOp"; + } +}; + +class WriteOp : public TestOp { +public: + const std::string oid; + ContDesc cont; + std::set<librados::AioCompletion *> waiting; + librados::AioCompletion *rcompletion = nullptr; + // numbers of async ops submitted + uint64_t waiting_on = 0; + uint64_t last_acked_tid = 0; + + librados::ObjectReadOperation read_op; + librados::ObjectWriteOperation write_op; + bufferlist rbuffer; + + const bool do_append; + const bool do_excl; + + WriteOp(int n, + RadosTestContext *context, + const std::string &oid, + bool do_append, + bool do_excl, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + oid(oid), + do_append(do_append), + do_excl(do_excl) + {} + + void _begin() override + { + assert(!done); + std::stringstream acc; + std::lock_guard state_locker{context->state_lock}; + acc << context->prefix << "OID: " << oid << " snap " << context->current_snap << std::endl; + std::string prefix = acc.str(); + + cont = ContDesc(context->seq_num, context->current_snap, context->seq_num, prefix); + + ContentsGenerator *cont_gen; + if (do_append) { + ObjectDesc old_value; + bool found = context->find_object(oid, &old_value); + uint64_t prev_length = found && old_value.has_contents() ? + old_value.most_recent_gen()->get_length(old_value.most_recent()) : + 0; + bool requires_alignment; + int r = context->io_ctx.pool_requires_alignment2(&requires_alignment); + ceph_assert(r == 0); + uint64_t alignment = 0; + if (requires_alignment) { + r = context->io_ctx.pool_required_alignment2(&alignment); + ceph_assert(r == 0); + ceph_assert(alignment != 0); + } + cont_gen = new AppendGenerator( + prev_length, + alignment, + context->min_stride_size, + context->max_stride_size, + 3); + } else { + cont_gen = new VarLenGenerator( + context->max_size, context->min_stride_size, context->max_stride_size); + } + context->update_object(cont_gen, oid, cont); + + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + + std::map<uint64_t, uint64_t> ranges; + + cont_gen->get_ranges_map(cont, ranges); + std::cout << num << ": seq_num " << context->seq_num << " ranges " << ranges << std::endl; + context->seq_num++; + + waiting_on = ranges.size(); + ContentsGenerator::iterator gen_pos = cont_gen->get_iterator(cont); + // assure that tid is greater than last_acked_tid + uint64_t tid = last_acked_tid + 1; + for (auto [offset, len] : ranges) { + gen_pos.seek(offset); + bufferlist to_write = gen_pos.gen_bl_advance(len); + ceph_assert(to_write.length() == len); + ceph_assert(to_write.length() > 0); + std::cout << num << ": writing " << context->prefix+oid + << " from " << offset + << " to " << len + offset << " tid " << tid << std::endl; + auto cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(tid++)); + librados::AioCompletion *completion = + context->rados.aio_create_completion((void*) cb_arg, &write_callback); + waiting.insert(completion); + librados::ObjectWriteOperation op; + if (do_append) { + op.append(to_write); + } else { + op.write(offset, to_write); + } + if (do_excl && cb_arg->second->id == last_acked_tid + 1) + op.assert_exists(); + context->io_ctx.aio_operate( + context->prefix+oid, completion, + &op); + } + + bufferlist contbl; + encode(cont, contbl); + std::pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>( + this, + new TestOp::CallbackInfo(tid++)); + librados::AioCompletion *completion = context->rados.aio_create_completion( + (void*) cb_arg, &write_callback); + waiting.insert(completion); + waiting_on++; + write_op.setxattr("_header", contbl); + if (!do_append) { + write_op.truncate(cont_gen->get_length(cont)); + } + context->io_ctx.aio_operate( + context->prefix+oid, completion, &write_op); + + cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>( + this, + new TestOp::CallbackInfo(tid++)); + rcompletion = context->rados.aio_create_completion( + (void*) cb_arg, &write_callback); + waiting_on++; + read_op.read(0, 1, &rbuffer, 0); + context->io_ctx.aio_operate( + context->prefix+oid, rcompletion, + &read_op, + librados::OPERATION_ORDER_READS_WRITES, // order wrt previous write/update + 0); + } + + void _finish(CallbackInfo *info) override + { + ceph_assert(info); + std::lock_guard state_locker{context->state_lock}; + uint64_t tid = info->id; + + std::cout << num << ": finishing write tid " << tid << " to " << context->prefix + oid << std::endl; + + if (tid <= last_acked_tid) { + std::cerr << "Error: finished tid " << tid + << " when last_acked_tid was " << last_acked_tid << std::endl; + ceph_abort(); + } + last_acked_tid = tid; + + ceph_assert(!done); + waiting_on--; + if (waiting_on == 0) { + uint64_t version = 0; + for (auto i = waiting.begin(); i != waiting.end();) { + ceph_assert((*i)->is_complete()); + if (int err = (*i)->get_return_value()) { + std::cerr << "Error: oid " << oid << " write returned error code " + << err << std::endl; + ceph_abort(); + } + if ((*i)->get_version64() > version) { + std::cout << num << ": oid " << oid << " updating version " << version + << " to " << (*i)->get_version64() << std::endl; + version = (*i)->get_version64(); + } else { + std::cout << num << ": oid " << oid << " version " << version + << " is already newer than " << (*i)->get_version64() << std::endl; + } + (*i)->release(); + waiting.erase(i++); + } + + context->update_object_version(oid, version); + ceph_assert(rcompletion->is_complete()); + int r = rcompletion->get_return_value(); + assertf(r >= 0, "r = %d", r); + if (rcompletion->get_version64() != version) { + std::cerr << "Error: racing read on " << oid << " returned version " + << rcompletion->get_version64() << " rather than version " + << version << std::endl; + ceph_abort_msg("racing read got wrong version"); + } + rcompletion->release(); + + { + ObjectDesc old_value; + ceph_assert(context->find_object(oid, &old_value, -1)); + if (old_value.deleted()) + std::cout << num << ": left oid " << oid << " deleted" << std::endl; + else + std::cout << num << ": left oid " << oid << " " + << old_value.most_recent() << std::endl; + } + + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + context->kick(); + done = true; + } + } + + bool finished() override + { + return done; + } + + std::string getType() override + { + return "WriteOp"; + } +}; + +class WriteSameOp : public TestOp { +public: + std::string oid; + ContDesc cont; + std::set<librados::AioCompletion *> waiting; + librados::AioCompletion *rcompletion; + uint64_t waiting_on; + uint64_t last_acked_tid; + + librados::ObjectReadOperation read_op; + librados::ObjectWriteOperation write_op; + bufferlist rbuffer; + + WriteSameOp(int n, + RadosTestContext *context, + const std::string &oid, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + oid(oid), rcompletion(NULL), waiting_on(0), + last_acked_tid(0) + {} + + void _begin() override + { + std::lock_guard state_locker{context->state_lock}; + done = 0; + std::stringstream acc; + acc << context->prefix << "OID: " << oid << " snap " << context->current_snap << std::endl; + std::string prefix = acc.str(); + + cont = ContDesc(context->seq_num, context->current_snap, context->seq_num, prefix); + + ContentsGenerator *cont_gen; + cont_gen = new VarLenGenerator( + context->max_size, context->min_stride_size, context->max_stride_size); + context->update_object(cont_gen, oid, cont); + + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + + std::map<uint64_t, uint64_t> ranges; + + cont_gen->get_ranges_map(cont, ranges); + std::cout << num << ": seq_num " << context->seq_num << " ranges " << ranges << std::endl; + context->seq_num++; + + waiting_on = ranges.size(); + ContentsGenerator::iterator gen_pos = cont_gen->get_iterator(cont); + // assure that tid is greater than last_acked_tid + uint64_t tid = last_acked_tid + 1; + for (auto [offset, len] : ranges) { + gen_pos.seek(offset); + bufferlist to_write = gen_pos.gen_bl_advance(len); + ceph_assert(to_write.length() == len); + ceph_assert(to_write.length() > 0); + std::cout << num << ": writing " << context->prefix+oid + << " from " << offset + << " to " << offset + len << " tid " << tid << std::endl; + auto cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(tid++)); + librados::AioCompletion *completion = + context->rados.aio_create_completion((void*) cb_arg, + &write_callback); + waiting.insert(completion); + librados::ObjectWriteOperation op; + /* no writesame multiplication factor for now */ + op.writesame(offset, to_write.length(), to_write); + + context->io_ctx.aio_operate( + context->prefix+oid, completion, + &op); + } + + bufferlist contbl; + encode(cont, contbl); + std::pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>( + this, + new TestOp::CallbackInfo(tid++)); + librados::AioCompletion *completion = context->rados.aio_create_completion( + (void*) cb_arg, &write_callback); + waiting.insert(completion); + waiting_on++; + write_op.setxattr("_header", contbl); + write_op.truncate(cont_gen->get_length(cont)); + context->io_ctx.aio_operate( + context->prefix+oid, completion, &write_op); + + cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>( + this, + new TestOp::CallbackInfo(tid++)); + rcompletion = context->rados.aio_create_completion( + (void*) cb_arg, &write_callback); + waiting_on++; + read_op.read(0, 1, &rbuffer, 0); + context->io_ctx.aio_operate( + context->prefix+oid, rcompletion, + &read_op, + librados::OPERATION_ORDER_READS_WRITES, // order wrt previous write/update + 0); + } + + void _finish(CallbackInfo *info) override + { + ceph_assert(info); + std::lock_guard state_locker{context->state_lock}; + uint64_t tid = info->id; + + std::cout << num << ": finishing writesame tid " << tid << " to " << context->prefix + oid << std::endl; + + if (tid <= last_acked_tid) { + std::cerr << "Error: finished tid " << tid + << " when last_acked_tid was " << last_acked_tid << std::endl; + ceph_abort(); + } + last_acked_tid = tid; + + ceph_assert(!done); + waiting_on--; + if (waiting_on == 0) { + uint64_t version = 0; + for (auto i = waiting.begin(); i != waiting.end();) { + ceph_assert((*i)->is_complete()); + if (int err = (*i)->get_return_value()) { + std::cerr << "Error: oid " << oid << " writesame returned error code " + << err << std::endl; + ceph_abort(); + } + if ((*i)->get_version64() > version) { + std::cout << "oid " << oid << "updating version " << version + << "to " << (*i)->get_version64() << std::endl; + version = (*i)->get_version64(); + } else { + std::cout << "oid " << oid << "version " << version + << "is already newer than " << (*i)->get_version64() << std::endl; + } + (*i)->release(); + waiting.erase(i++); + } + + context->update_object_version(oid, version); + ceph_assert(rcompletion->is_complete()); + int r = rcompletion->get_return_value(); + assertf(r >= 0, "r = %d", r); + if (rcompletion->get_version64() != version) { + std::cerr << "Error: racing read on " << oid << " returned version " + << rcompletion->get_version64() << " rather than version " + << version << std::endl; + ceph_abort_msg("racing read got wrong version"); + } + rcompletion->release(); + + { + ObjectDesc old_value; + ceph_assert(context->find_object(oid, &old_value, -1)); + if (old_value.deleted()) + std::cout << num << ": left oid " << oid << " deleted" << std::endl; + else + std::cout << num << ": left oid " << oid << " " + << old_value.most_recent() << std::endl; + } + + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + context->kick(); + done = true; + } + } + + bool finished() override + { + return done; + } + + std::string getType() override + { + return "WriteSameOp"; + } +}; + +class DeleteOp : public TestOp { +public: + std::string oid; + + DeleteOp(int n, + RadosTestContext *context, + const std::string &oid, + TestOpStat *stat = 0) + : TestOp(n, context, stat), oid(oid) + {} + + void _begin() override + { + std::unique_lock state_locker{context->state_lock}; + if (context->get_watch_context(oid)) { + context->kick(); + return; + } + + ObjectDesc contents; + context->find_object(oid, &contents); + bool present = !contents.deleted(); + + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + context->seq_num++; + + context->remove_object(oid); + + interval_set<uint64_t> ranges; + state_locker.unlock(); + + int r = 0; + if (rand() % 2) { + librados::ObjectWriteOperation op; + op.assert_exists(); + op.remove(); + r = context->io_ctx.operate(context->prefix+oid, &op); + } else { + r = context->io_ctx.remove(context->prefix+oid); + } + if (r && !(r == -ENOENT && !present)) { + std::cerr << "r is " << r << " while deleting " << oid << " and present is " << present << std::endl; + ceph_abort(); + } + + state_locker.lock(); + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + context->kick(); + } + + std::string getType() override + { + return "DeleteOp"; + } +}; + +class ReadOp : public TestOp { +public: + std::vector<librados::AioCompletion *> completions; + librados::ObjectReadOperation op; + std::string oid; + ObjectDesc old_value; + int snap; + bool balance_reads; + bool localize_reads; + + std::shared_ptr<int> in_use; + + std::vector<bufferlist> results; + std::vector<int> retvals; + std::vector<std::map<uint64_t, uint64_t>> extent_results; + std::vector<bool> is_sparse_read; + uint64_t waiting_on; + + std::vector<bufferlist> checksums; + std::vector<int> checksum_retvals; + + std::map<std::string, bufferlist> attrs; + int attrretval; + + std::set<std::string> omap_requested_keys; + std::map<std::string, bufferlist> omap_returned_values; + std::set<std::string> omap_keys; + std::map<std::string, bufferlist> omap; + bufferlist header; + + std::map<std::string, bufferlist> xattrs; + ReadOp(int n, + RadosTestContext *context, + const std::string &oid, + bool balance_reads, + bool localize_reads, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + completions(3), + oid(oid), + snap(0), + balance_reads(balance_reads), + localize_reads(localize_reads), + results(3), + retvals(3), + extent_results(3), + is_sparse_read(3, false), + waiting_on(0), + checksums(3), + checksum_retvals(3), + attrretval(0) + {} + + void _do_read(librados::ObjectReadOperation& read_op, int index) { + uint64_t len = 0; + if (old_value.has_contents()) + len = old_value.most_recent_gen()->get_length(old_value.most_recent()); + if (context->no_sparse || rand() % 2) { + is_sparse_read[index] = false; + read_op.read(0, + len, + &results[index], + &retvals[index]); + bufferlist init_value_bl; + encode(static_cast<uint32_t>(-1), init_value_bl); + read_op.checksum(LIBRADOS_CHECKSUM_TYPE_CRC32C, init_value_bl, 0, len, + 0, &checksums[index], &checksum_retvals[index]); + } else { + is_sparse_read[index] = true; + read_op.sparse_read(0, + len, + &extent_results[index], + &results[index], + &retvals[index]); + } + } + + void _begin() override + { + std::unique_lock state_locker{context->state_lock}; + if (!(rand() % 4) && !context->snaps.empty()) { + snap = rand_choose(context->snaps)->first; + in_use = context->snaps_in_use.lookup_or_create(snap, snap); + } else { + snap = -1; + } + std::cout << num << ": read oid " << oid << " snap " << snap << std::endl; + done = 0; + for (uint32_t i = 0; i < 3; i++) { + completions[i] = context->rados.aio_create_completion((void *) this, &read_callback); + } + + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + ceph_assert(context->find_object(oid, &old_value, snap)); + if (old_value.deleted()) + std::cout << num << ": expect deleted" << std::endl; + else + std::cout << num << ": expect " << old_value.most_recent() << std::endl; + + TestWatchContext *ctx = context->get_watch_context(oid); + state_locker.unlock(); + if (ctx) { + ceph_assert(old_value.exists); + TestAlarm alarm; + std::cerr << num << ": about to start" << std::endl; + ctx->start(); + std::cerr << num << ": started" << std::endl; + bufferlist bl; + context->io_ctx.set_notify_timeout(600); + int r = context->io_ctx.notify2(context->prefix+oid, bl, 0, NULL); + if (r < 0) { + std::cerr << "r is " << r << std::endl; + ceph_abort(); + } + std::cerr << num << ": notified, waiting" << std::endl; + ctx->wait(); + } + state_locker.lock(); + if (snap >= 0) { + context->io_ctx.snap_set_read(context->snaps[snap]); + } + _do_read(op, 0); + for (auto i = old_value.attrs.begin(); i != old_value.attrs.end(); ++i) { + if (rand() % 2) { + std::string key = i->first; + if (rand() % 2) + key.push_back((rand() % 26) + 'a'); + omap_requested_keys.insert(key); + } + } + if (!context->no_omap) { + op.omap_get_vals_by_keys(omap_requested_keys, &omap_returned_values, 0); + // NOTE: we're ignore pmore here, which assumes the OSD limit is high + // enough for us. + op.omap_get_keys2("", -1, &omap_keys, nullptr, nullptr); + op.omap_get_vals2("", -1, &omap, nullptr, nullptr); + op.omap_get_header(&header, 0); + } + op.getxattrs(&xattrs, 0); + + unsigned flags = 0; + if (balance_reads) + flags |= librados::OPERATION_BALANCE_READS; + if (localize_reads) + flags |= librados::OPERATION_LOCALIZE_READS; + + ceph_assert(!context->io_ctx.aio_operate(context->prefix+oid, completions[0], &op, + flags, NULL)); + waiting_on++; + + // send 2 pipelined reads on the same object/snap. This can help testing + // OSD's read behavior in some scenarios + for (uint32_t i = 1; i < 3; ++i) { + librados::ObjectReadOperation pipeline_op; + _do_read(pipeline_op, i); + ceph_assert(!context->io_ctx.aio_operate(context->prefix+oid, completions[i], &pipeline_op, 0)); + waiting_on++; + } + + if (snap >= 0) { + context->io_ctx.snap_set_read(0); + } + } + + void _finish(CallbackInfo *info) override + { + std::unique_lock state_locker{context->state_lock}; + ceph_assert(!done); + ceph_assert(waiting_on > 0); + if (--waiting_on) { + return; + } + + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + int retval = completions[0]->get_return_value(); + for (auto it = completions.begin(); + it != completions.end(); ++it) { + ceph_assert((*it)->is_complete()); + uint64_t version = (*it)->get_version64(); + int err = (*it)->get_return_value(); + if (err != retval) { + std::cerr << num << ": Error: oid " << oid << " read returned different error codes: " + << retval << " and " << err << std::endl; + ceph_abort(); + } + if (err) { + if (!(err == -ENOENT && old_value.deleted())) { + std::cerr << num << ": Error: oid " << oid << " read returned error code " + << err << std::endl; + ceph_abort(); + } + } else if (version != old_value.version) { + std::cerr << num << ": oid " << oid << " version is " << version + << " and expected " << old_value.version << std::endl; + ceph_assert(version == old_value.version); + } + } + if (!retval) { + std::map<std::string, bufferlist>::iterator iter = xattrs.find("_header"); + bufferlist headerbl; + if (iter == xattrs.end()) { + if (old_value.has_contents()) { + std::cerr << num << ": Error: did not find header attr, has_contents: " + << old_value.has_contents() + << std::endl; + ceph_assert(!old_value.has_contents()); + } + } else { + headerbl = iter->second; + xattrs.erase(iter); + } + if (old_value.deleted()) { + std::cout << num << ": expect deleted" << std::endl; + ceph_abort_msg("expected deleted"); + } else { + std::cout << num << ": expect " << old_value.most_recent() << std::endl; + } + if (old_value.has_contents()) { + ContDesc to_check; + auto p = headerbl.cbegin(); + decode(to_check, p); + if (to_check != old_value.most_recent()) { + std::cerr << num << ": oid " << oid << " found incorrect object contents " << to_check + << ", expected " << old_value.most_recent() << std::endl; + context->errors++; + } + for (unsigned i = 0; i < results.size(); i++) { + if (is_sparse_read[i]) { + if (!old_value.check_sparse(extent_results[i], results[i])) { + std::cerr << num << ": oid " << oid << " contents " << to_check << " corrupt" << std::endl; + context->errors++; + } + } else { + if (!old_value.check(results[i])) { + std::cerr << num << ": oid " << oid << " contents " << to_check << " corrupt" << std::endl; + context->errors++; + } + + uint32_t checksum = 0; + if (checksum_retvals[i] == 0) { + try { + auto bl_it = checksums[i].cbegin(); + uint32_t csum_count; + decode(csum_count, bl_it); + decode(checksum, bl_it); + } catch (const buffer::error &err) { + checksum_retvals[i] = -EBADMSG; + } + } + if (checksum_retvals[i] != 0 || checksum != results[i].crc32c(-1)) { + std::cerr << num << ": oid " << oid << " checksum " << checksums[i] + << " incorrect, expecting " << results[i].crc32c(-1) + << std::endl; + context->errors++; + } + } + } + if (context->errors) ceph_abort(); + } + + // Attributes + if (!context->no_omap) { + if (!(old_value.header == header)) { + std::cerr << num << ": oid " << oid << " header does not match, old size: " + << old_value.header.length() << " new size " << header.length() + << std::endl; + ceph_assert(old_value.header == header); + } + if (omap.size() != old_value.attrs.size()) { + std::cerr << num << ": oid " << oid << " omap.size() is " << omap.size() + << " and old is " << old_value.attrs.size() << std::endl; + ceph_assert(omap.size() == old_value.attrs.size()); + } + if (omap_keys.size() != old_value.attrs.size()) { + std::cerr << num << ": oid " << oid << " omap.size() is " << omap_keys.size() + << " and old is " << old_value.attrs.size() << std::endl; + ceph_assert(omap_keys.size() == old_value.attrs.size()); + } + } + if (xattrs.size() != old_value.attrs.size()) { + std::cerr << num << ": oid " << oid << " xattrs.size() is " << xattrs.size() + << " and old is " << old_value.attrs.size() << std::endl; + ceph_assert(xattrs.size() == old_value.attrs.size()); + } + for (auto iter = old_value.attrs.begin(); + iter != old_value.attrs.end(); + ++iter) { + bufferlist bl = context->attr_gen.gen_bl( + iter->second); + if (!context->no_omap) { + std::map<std::string, bufferlist>::iterator omap_iter = omap.find(iter->first); + ceph_assert(omap_iter != omap.end()); + ceph_assert(bl.length() == omap_iter->second.length()); + bufferlist::iterator k = bl.begin(); + for(bufferlist::iterator l = omap_iter->second.begin(); + !k.end() && !l.end(); + ++k, ++l) { + ceph_assert(*l == *k); + } + } + auto xattr_iter = xattrs.find(iter->first); + ceph_assert(xattr_iter != xattrs.end()); + ceph_assert(bl.length() == xattr_iter->second.length()); + bufferlist::iterator k = bl.begin(); + for (bufferlist::iterator j = xattr_iter->second.begin(); + !k.end() && !j.end(); + ++j, ++k) { + ceph_assert(*j == *k); + } + } + if (!context->no_omap) { + for (std::set<std::string>::iterator i = omap_requested_keys.begin(); + i != omap_requested_keys.end(); + ++i) { + if (!omap_returned_values.count(*i)) + ceph_assert(!old_value.attrs.count(*i)); + if (!old_value.attrs.count(*i)) + ceph_assert(!omap_returned_values.count(*i)); + } + for (auto i = omap_returned_values.begin(); + i != omap_returned_values.end(); + ++i) { + ceph_assert(omap_requested_keys.count(i->first)); + ceph_assert(omap.count(i->first)); + ceph_assert(old_value.attrs.count(i->first)); + ceph_assert(i->second == omap[i->first]); + } + } + } + for (auto it = completions.begin(); it != completions.end(); ++it) { + (*it)->release(); + } + context->kick(); + done = true; + } + + bool finished() override + { + return done; + } + + std::string getType() override + { + return "ReadOp"; + } +}; + +class SnapCreateOp : public TestOp { +public: + SnapCreateOp(int n, + RadosTestContext *context, + TestOpStat *stat = 0) + : TestOp(n, context, stat) + {} + + void _begin() override + { + uint64_t snap; + std::string snapname; + + if (context->pool_snaps) { + std::stringstream ss; + + ss << context->prefix << "snap" << ++context->snapname_num; + snapname = ss.str(); + + int ret = context->io_ctx.snap_create(snapname.c_str()); + if (ret) { + std::cerr << "snap_create returned " << ret << std::endl; + ceph_abort(); + } + ceph_assert(!context->io_ctx.snap_lookup(snapname.c_str(), &snap)); + + } else { + ceph_assert(!context->io_ctx.selfmanaged_snap_create(&snap)); + } + + std::unique_lock state_locker{context->state_lock}; + context->add_snap(snap); + + if (!context->pool_snaps) { + std::vector<uint64_t> snapset(context->snaps.size()); + + int j = 0; + for (auto i = context->snaps.rbegin(); + i != context->snaps.rend(); + ++i, ++j) { + snapset[j] = i->second; + } + + state_locker.unlock(); + + int r = context->io_ctx.selfmanaged_snap_set_write_ctx(context->seq, snapset); + if (r) { + std::cerr << "r is " << r << " snapset is " << snapset << " seq is " << context->seq << std::endl; + ceph_abort(); + } + } + } + + std::string getType() override + { + return "SnapCreateOp"; + } + bool must_quiesce_other_ops() override { return context->pool_snaps; } +}; + +class SnapRemoveOp : public TestOp { +public: + int to_remove; + SnapRemoveOp(int n, RadosTestContext *context, + int snap, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + to_remove(snap) + {} + + void _begin() override + { + std::unique_lock state_locker{context->state_lock}; + uint64_t snap = context->snaps[to_remove]; + context->remove_snap(to_remove); + + if (context->pool_snaps) { + std::string snapname; + + ceph_assert(!context->io_ctx.snap_get_name(snap, &snapname)); + ceph_assert(!context->io_ctx.snap_remove(snapname.c_str())); + } else { + ceph_assert(!context->io_ctx.selfmanaged_snap_remove(snap)); + + std::vector<uint64_t> snapset(context->snaps.size()); + int j = 0; + for (auto i = context->snaps.rbegin(); + i != context->snaps.rend(); + ++i, ++j) { + snapset[j] = i->second; + } + + int r = context->io_ctx.selfmanaged_snap_set_write_ctx(context->seq, snapset); + if (r) { + std::cerr << "r is " << r << " snapset is " << snapset << " seq is " << context->seq << std::endl; + ceph_abort(); + } + } + } + + std::string getType() override + { + return "SnapRemoveOp"; + } +}; + +class WatchOp : public TestOp { + std::string oid; +public: + WatchOp(int n, + RadosTestContext *context, + const std::string &_oid, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + oid(_oid) + {} + + void _begin() override + { + std::unique_lock state_locker{context->state_lock}; + ObjectDesc contents; + context->find_object(oid, &contents); + if (contents.deleted()) { + context->kick(); + return; + } + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + + TestWatchContext *ctx = context->get_watch_context(oid); + state_locker.unlock(); + int r; + if (!ctx) { + { + std::lock_guard l{context->state_lock}; + ctx = context->watch(oid); + } + + r = context->io_ctx.watch2(context->prefix+oid, + &ctx->get_handle(), + ctx); + } else { + r = context->io_ctx.unwatch2(ctx->get_handle()); + { + std::lock_guard l{context->state_lock}; + context->unwatch(oid); + } + } + + if (r) { + std::cerr << "r is " << r << std::endl; + ceph_abort(); + } + + { + std::lock_guard l{context->state_lock}; + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + } + } + + std::string getType() override + { + return "WatchOp"; + } +}; + +class RollbackOp : public TestOp { +public: + std::string oid; + int roll_back_to; + librados::ObjectWriteOperation zero_write_op1; + librados::ObjectWriteOperation zero_write_op2; + librados::ObjectWriteOperation op; + std::vector<librados::AioCompletion *> comps; + std::shared_ptr<int> in_use; + int last_finished; + int outstanding; + + RollbackOp(int n, + RadosTestContext *context, + const std::string &_oid, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + oid(_oid), roll_back_to(-1), + comps(3, NULL), + last_finished(-1), outstanding(3) + {} + + void _begin() override + { + context->state_lock.lock(); + if (context->get_watch_context(oid)) { + context->kick(); + context->state_lock.unlock(); + return; + } + + if (context->snaps.empty()) { + context->kick(); + context->state_lock.unlock(); + done = true; + return; + } + + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + + roll_back_to = rand_choose(context->snaps)->first; + in_use = context->snaps_in_use.lookup_or_create( + roll_back_to, + roll_back_to); + + + std::cout << "rollback oid " << oid << " to " << roll_back_to << std::endl; + + bool existed_before = context->object_existed_at(oid); + bool existed_after = context->object_existed_at(oid, roll_back_to); + + context->roll_back(oid, roll_back_to); + uint64_t snap = context->snaps[roll_back_to]; + + outstanding -= (!existed_before) + (!existed_after); + + context->state_lock.unlock(); + + bufferlist bl, bl2; + zero_write_op1.append(bl); + zero_write_op2.append(bl2); + + if (context->pool_snaps) { + op.snap_rollback(snap); + } else { + op.selfmanaged_snap_rollback(snap); + } + + if (existed_before) { + std::pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + comps[0] = + context->rados.aio_create_completion((void*) cb_arg, + &write_callback); + context->io_ctx.aio_operate( + context->prefix+oid, comps[0], &zero_write_op1); + } + { + std::pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(1)); + comps[1] = + context->rados.aio_create_completion((void*) cb_arg, + &write_callback); + context->io_ctx.aio_operate( + context->prefix+oid, comps[1], &op); + } + if (existed_after) { + std::pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(2)); + comps[2] = + context->rados.aio_create_completion((void*) cb_arg, + &write_callback); + context->io_ctx.aio_operate( + context->prefix+oid, comps[2], &zero_write_op2); + } + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard l{context->state_lock}; + uint64_t tid = info->id; + std::cout << num << ": finishing rollback tid " << tid + << " to " << context->prefix + oid << std::endl; + ceph_assert((int)(info->id) > last_finished); + last_finished = info->id; + + int r; + if ((r = comps[last_finished]->get_return_value()) != 0) { + std::cerr << "err " << r << std::endl; + ceph_abort(); + } + if (--outstanding == 0) { + done = true; + context->update_object_version(oid, comps[tid]->get_version64()); + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + in_use = std::shared_ptr<int>(); + context->kick(); + } + } + + bool finished() override + { + return done; + } + + std::string getType() override + { + return "RollBackOp"; + } +}; + +class CopyFromOp : public TestOp { +public: + std::string oid, oid_src; + ObjectDesc src_value; + librados::ObjectWriteOperation op; + librados::ObjectReadOperation rd_op; + librados::AioCompletion *comp; + librados::AioCompletion *comp_racing_read = nullptr; + std::shared_ptr<int> in_use; + int snap; + int done; + uint64_t version; + int r; + CopyFromOp(int n, + RadosTestContext *context, + const std::string &oid, + const std::string &oid_src, + TestOpStat *stat) + : TestOp(n, context, stat), + oid(oid), oid_src(oid_src), + comp(NULL), snap(-1), done(0), + version(0), r(0) + {} + + void _begin() override + { + ContDesc cont; + { + std::lock_guard l{context->state_lock}; + cont = ContDesc(context->seq_num, context->current_snap, + context->seq_num, ""); + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + context->oid_in_use.insert(oid_src); + context->oid_not_in_use.erase(oid_src); + + // choose source snap + if (0 && !(rand() % 4) && !context->snaps.empty()) { + snap = rand_choose(context->snaps)->first; + in_use = context->snaps_in_use.lookup_or_create(snap, snap); + } else { + snap = -1; + } + context->find_object(oid_src, &src_value, snap); + if (!src_value.deleted()) + context->update_object_full(oid, src_value); + } + + std::string src = context->prefix+oid_src; + op.copy_from(src.c_str(), context->io_ctx, src_value.version, 0); + + std::pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + comp = context->rados.aio_create_completion((void*) cb_arg, + &write_callback); + context->io_ctx.aio_operate(context->prefix+oid, comp, &op); + + // queue up a racing read, too. + std::pair<TestOp*, TestOp::CallbackInfo*> *read_cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(1)); + comp_racing_read = context->rados.aio_create_completion((void*) read_cb_arg, &write_callback); + rd_op.stat(NULL, NULL, NULL); + context->io_ctx.aio_operate(context->prefix+oid, comp_racing_read, &rd_op, + librados::OPERATION_ORDER_READS_WRITES, // order wrt previous write/update + NULL); + + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard l{context->state_lock}; + + // note that the read can (and atm will) come back before the + // write reply, but will reflect the update and the versions will + // match. + + if (info->id == 0) { + // copy_from + ceph_assert(comp->is_complete()); + std::cout << num << ": finishing copy_from to " << context->prefix + oid << std::endl; + if ((r = comp->get_return_value())) { + if (r == -ENOENT && src_value.deleted()) { + std::cout << num << ": got expected ENOENT (src dne)" << std::endl; + } else { + std::cerr << "Error: oid " << oid << " copy_from " << oid_src << " returned error code " + << r << std::endl; + ceph_abort(); + } + } else { + ceph_assert(!version || comp->get_version64() == version); + version = comp->get_version64(); + context->update_object_version(oid, comp->get_version64()); + } + } else if (info->id == 1) { + // racing read + ceph_assert(comp_racing_read->is_complete()); + std::cout << num << ": finishing copy_from racing read to " << context->prefix + oid << std::endl; + if ((r = comp_racing_read->get_return_value())) { + if (!(r == -ENOENT && src_value.deleted())) { + std::cerr << "Error: oid " << oid << " copy_from " << oid_src << " returned error code " + << r << std::endl; + } + } else { + ceph_assert(comp_racing_read->get_return_value() == 0); + ceph_assert(!version || comp_racing_read->get_version64() == version); + version = comp_racing_read->get_version64(); + } + } + if (++done == 2) { + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + context->oid_in_use.erase(oid_src); + context->oid_not_in_use.insert(oid_src); + context->kick(); + } + } + + bool finished() override + { + return done == 2; + } + + std::string getType() override + { + return "CopyFromOp"; + } +}; + +class ChunkReadOp : public TestOp { +public: + std::vector<librados::AioCompletion *> completions; + librados::ObjectReadOperation op; + std::string oid; + ObjectDesc old_value; + ObjectDesc tgt_value; + int snap; + bool balance_reads; + bool localize_reads; + + std::shared_ptr<int> in_use; + + std::vector<bufferlist> results; + std::vector<int> retvals; + std::vector<bool> is_sparse_read; + uint64_t waiting_on; + + std::vector<bufferlist> checksums; + std::vector<int> checksum_retvals; + uint32_t offset = 0; + uint32_t length = 0; + std::string tgt_oid; + std::string tgt_pool_name; + uint32_t tgt_offset = 0; + + ChunkReadOp(int n, + RadosTestContext *context, + const std::string &oid, + const std::string &tgt_pool_name, + bool balance_reads, + bool localize_reads, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + completions(2), + oid(oid), + snap(0), + balance_reads(balance_reads), + localize_reads(localize_reads), + results(2), + retvals(2), + waiting_on(0), + checksums(2), + checksum_retvals(2), + tgt_pool_name(tgt_pool_name) + {} + + void _do_read(librados::ObjectReadOperation& read_op, uint32_t offset, uint32_t length, int index) { + read_op.read(offset, + length, + &results[index], + &retvals[index]); + if (index != 0) { + bufferlist init_value_bl; + encode(static_cast<uint32_t>(-1), init_value_bl); + read_op.checksum(LIBRADOS_CHECKSUM_TYPE_CRC32C, init_value_bl, offset, length, + 0, &checksums[index], &checksum_retvals[index]); + } + + } + + void _begin() override + { + context->state_lock.lock(); + std::cout << num << ": chunk read oid " << oid << " snap " << snap << std::endl; + done = 0; + for (uint32_t i = 0; i < 2; i++) { + completions[i] = context->rados.aio_create_completion((void *) this, &read_callback); + } + + context->find_object(oid, &old_value); + + if (old_value.chunk_info.size() == 0) { + std::cout << ": no chunks" << std::endl; + context->kick(); + context->state_lock.unlock(); + done = true; + return; + } + + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + if (old_value.deleted()) { + std::cout << num << ": expect deleted" << std::endl; + } else { + std::cout << num << ": expect " << old_value.most_recent() << std::endl; + } + + int rand_index = rand() % old_value.chunk_info.size(); + auto iter = old_value.chunk_info.begin(); + for (int i = 0; i < rand_index; i++) { + iter++; + } + offset = iter->first; + offset += (rand() % iter->second.length)/2; + uint32_t t_length = rand() % iter->second.length; + while (t_length + offset > iter->first + iter->second.length) { + t_length = rand() % iter->second.length; + } + length = t_length; + tgt_offset = iter->second.offset + offset - iter->first; + tgt_oid = iter->second.oid; + + std::cout << num << ": ori offset " << iter->first << " req offset " << offset + << " ori length " << iter->second.length << " req length " << length + << " ori tgt_offset " << iter->second.offset << " req tgt_offset " << tgt_offset + << " tgt_oid " << tgt_oid << std::endl; + + TestWatchContext *ctx = context->get_watch_context(oid); + context->state_lock.unlock(); + if (ctx) { + ceph_assert(old_value.exists); + TestAlarm alarm; + std::cerr << num << ": about to start" << std::endl; + ctx->start(); + std::cerr << num << ": started" << std::endl; + bufferlist bl; + context->io_ctx.set_notify_timeout(600); + int r = context->io_ctx.notify2(context->prefix+oid, bl, 0, NULL); + if (r < 0) { + std::cerr << "r is " << r << std::endl; + ceph_abort(); + } + std::cerr << num << ": notified, waiting" << std::endl; + ctx->wait(); + } + std::lock_guard state_locker{context->state_lock}; + + _do_read(op, offset, length, 0); + + unsigned flags = 0; + if (balance_reads) + flags |= librados::OPERATION_BALANCE_READS; + if (localize_reads) + flags |= librados::OPERATION_LOCALIZE_READS; + + ceph_assert(!context->io_ctx.aio_operate(context->prefix+oid, completions[0], &op, + flags, NULL)); + waiting_on++; + + _do_read(op, tgt_offset, length, 1); + ceph_assert(!context->io_ctx.aio_operate(context->prefix+tgt_oid, completions[1], &op, + flags, NULL)); + + waiting_on++; + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard l{context->state_lock}; + ceph_assert(!done); + ceph_assert(waiting_on > 0); + if (--waiting_on) { + return; + } + + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + int retval = completions[0]->get_return_value(); + std::cout << ": finish!! ret: " << retval << std::endl; + context->find_object(tgt_oid, &tgt_value); + + for (int i = 0; i < 2; i++) { + ceph_assert(completions[i]->is_complete()); + int err = completions[i]->get_return_value(); + if (err != retval) { + std::cerr << num << ": Error: oid " << oid << " read returned different error codes: " + << retval << " and " << err << std::endl; + ceph_abort(); + } + if (err) { + if (!(err == -ENOENT && old_value.deleted())) { + std::cerr << num << ": Error: oid " << oid << " read returned error code " + << err << std::endl; + ceph_abort(); + } + } + } + + if (!retval) { + if (old_value.deleted()) { + std::cout << num << ": expect deleted" << std::endl; + ceph_abort_msg("expected deleted"); + } else { + std::cout << num << ": expect " << old_value.most_recent() << std::endl; + } + if (tgt_value.has_contents()) { + uint32_t checksum[2] = {0}; + if (checksum_retvals[1] == 0) { + try { + auto bl_it = checksums[1].cbegin(); + uint32_t csum_count; + decode(csum_count, bl_it); + decode(checksum[1], bl_it); + } catch (const buffer::error &err) { + checksum_retvals[1] = -EBADMSG; + } + } + + if (checksum_retvals[1] != 0) { + std::cerr << num << ": oid " << oid << " checksum retvals " << checksums[0] + << " error " << std::endl; + context->errors++; + } + + checksum[0] = results[0].crc32c(-1); + + if (checksum[0] != checksum[1]) { + std::cerr << num << ": oid " << oid << " checksum src " << checksum[0] + << " chunksum tgt " << checksum[1] << " incorrect, expecting " + << results[0].crc32c(-1) + << std::endl; + context->errors++; + } + if (context->errors) ceph_abort(); + } + } + for (auto it = completions.begin(); it != completions.end(); ++it) { + (*it)->release(); + } + context->kick(); + done = true; + } + + bool finished() override + { + return done; + } + + std::string getType() override + { + return "ChunkReadOp"; + } +}; + +class CopyOp : public TestOp { +public: + std::string oid, oid_src, tgt_pool_name; + librados::ObjectWriteOperation op; + librados::ObjectReadOperation rd_op; + librados::AioCompletion *comp; + ObjectDesc src_value, tgt_value; + int done; + int r; + CopyOp(int n, + RadosTestContext *context, + const std::string &oid_src, + const std::string &oid, + const std::string &tgt_pool_name, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + oid(oid), oid_src(oid_src), tgt_pool_name(tgt_pool_name), + comp(NULL), done(0), r(0) + {} + + void _begin() override + { + std::lock_guard l{context->state_lock}; + context->oid_in_use.insert(oid_src); + context->oid_not_in_use.erase(oid_src); + + std::string src = context->prefix+oid_src; + context->find_object(oid_src, &src_value); + op.copy_from(src.c_str(), context->io_ctx, src_value.version, 0); + + std::cout << "copy op oid " << oid_src << " to " << oid << " tgt_pool_name " << tgt_pool_name << std::endl; + + std::pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + comp = context->rados.aio_create_completion((void*) cb_arg, &write_callback); + if (tgt_pool_name == context->low_tier_pool_name) { + context->low_tier_io_ctx.aio_operate(context->prefix+oid, comp, &op); + } else { + context->io_ctx.aio_operate(context->prefix+oid, comp, &op); + } + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard l{context->state_lock}; + + if (info->id == 0) { + ceph_assert(comp->is_complete()); + std::cout << num << ": finishing copy op to oid " << oid << std::endl; + if ((r = comp->get_return_value())) { + std::cerr << "Error: oid " << oid << " write returned error code " + << r << std::endl; + ceph_abort(); + } + } + + if (++done == 1) { + context->oid_in_use.erase(oid_src); + context->oid_not_in_use.insert(oid_src); + context->kick(); + } + } + + bool finished() override + { + return done == 1; + } + + std::string getType() override + { + return "CopyOp"; + } +}; + +class SetChunkOp : public TestOp { +public: + std::string oid, oid_tgt; + ObjectDesc src_value, tgt_value; + librados::ObjectReadOperation op; + librados::AioCompletion *comp; + int done; + int r; + uint64_t offset; + uint32_t length; + uint32_t tgt_offset; + int snap; + std::shared_ptr<int> in_use; + SetChunkOp(int n, + RadosTestContext *context, + const std::string &oid, + const std::string &oid_tgt, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + oid(oid), oid_tgt(oid_tgt), + comp(NULL), done(0), + r(0), offset(0), length(0), + tgt_offset(0), + snap(0) + {} + + std::pair<uint64_t, uint64_t> get_rand_off_len(uint32_t max_len) { + std::pair<uint64_t, uint64_t> r (0, 0); + r.first = rand() % max_len; + r.second = rand() % max_len; + r.first = r.first - (r.first % 512); + r.second = r.second - (r.second % 512); + + while (r.first + r.second > max_len || r.second == 0) { + r.first = rand() % max_len; + r.second = rand() % max_len; + r.first = r.first - (r.first % 512); + r.second = r.second - (r.second % 512); + } + return r; + } + + void _begin() override + { + std::lock_guard l{context->state_lock}; + if (!(rand() % 4) && !context->snaps.empty()) { + snap = rand_choose(context->snaps)->first; + in_use = context->snaps_in_use.lookup_or_create(snap, snap); + } else { + snap = -1; + } + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + + context->find_object(oid, &src_value, snap); + context->find_object(oid_tgt, &tgt_value); + + uint32_t max_len = 0; + if (src_value.deleted()) { + /* just random length to check ENOENT */ + max_len = context->max_size; + } else { + max_len = src_value.most_recent_gen()->get_length(src_value.most_recent()); + } + std::pair<uint64_t, uint64_t> off_len; // first: offset, second: length + if (snap >= 0) { + context->io_ctx.snap_set_read(context->snaps[snap]); + off_len = get_rand_off_len(max_len); + } else if (src_value.version != 0 && !src_value.deleted()) { + op.assert_version(src_value.version); + off_len = get_rand_off_len(max_len); + } else if (src_value.deleted()) { + off_len.first = 0; + off_len.second = max_len; + } + offset = off_len.first; + length = off_len.second; + tgt_offset = offset; + + std::string target_oid; + if (!src_value.deleted() && oid_tgt.empty()) { + bufferlist bl; + int r = context->io_ctx.read(context->prefix+oid, bl, length, offset); + ceph_assert(r > 0); + std::string fp_oid = ceph::crypto::digest<ceph::crypto::SHA256>(bl).to_str(); + r = context->low_tier_io_ctx.write(fp_oid, bl, bl.length(), 0); + ceph_assert(r == 0); + target_oid = fp_oid; + tgt_offset = 0; + } else { + target_oid = context->prefix+oid_tgt; + } + + std::cout << num << ": " << "set_chunk oid " << oid << " offset: " << offset + << " length: " << length << " target oid " << target_oid + << " offset: " << tgt_offset << " snap " << snap << std::endl; + + op.set_chunk(offset, length, context->low_tier_io_ctx, + target_oid, tgt_offset, CEPH_OSD_OP_FLAG_WITH_REFERENCE); + + std::pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + comp = context->rados.aio_create_completion((void*) cb_arg, + &write_callback); + context->io_ctx.aio_operate(context->prefix+oid, comp, &op, + librados::OPERATION_ORDER_READS_WRITES, NULL); + if (snap >= 0) { + context->io_ctx.snap_set_read(0); + } + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard l{context->state_lock}; + + if (info->id == 0) { + ceph_assert(comp->is_complete()); + std::cout << num << ": finishing set_chunk to oid " << oid << std::endl; + if ((r = comp->get_return_value())) { + if (r == -ENOENT && src_value.deleted()) { + std::cout << num << ": got expected ENOENT (src dne)" << std::endl; + } else if (r == -ENOENT && context->oid_set_chunk_tgt_pool.find(oid_tgt) != + context->oid_set_chunk_tgt_pool.end()) { + std::cout << num << ": get expected ENOENT tgt oid " << oid_tgt << std::endl; + } else if (r == -ERANGE && src_value.deleted()) { + std::cout << num << ": got expected ERANGE (src dne)" << std::endl; + } else if (r == -EOPNOTSUPP) { + std::cout << "Range is overlapped: oid " << oid << " set_chunk " << oid_tgt << " returned error code " + << r << " offset: " << offset << " length: " << length << std::endl; + context->update_object_version(oid, comp->get_version64()); + } else { + std::cerr << "Error: oid " << oid << " set_chunk " << oid_tgt << " returned error code " + << r << std::endl; + ceph_abort(); + } + } else { + if (snap == -1) { + ChunkDesc info {tgt_offset, length, oid_tgt}; + context->update_object_chunk_target(oid, offset, info); + context->update_object_version(oid, comp->get_version64()); + } + } + } + + if (++done == 1) { + context->oid_set_chunk_tgt_pool.insert(oid_tgt); + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + context->kick(); + } + } + + bool finished() override + { + return done == 1; + } + + std::string getType() override + { + return "SetChunkOp"; + } +}; + +class SetRedirectOp : public TestOp { +public: + std::string oid, oid_tgt, tgt_pool_name; + ObjectDesc src_value, tgt_value; + librados::ObjectWriteOperation op; + librados::ObjectReadOperation rd_op; + librados::AioCompletion *comp; + std::shared_ptr<int> in_use; + int done; + int r; + SetRedirectOp(int n, + RadosTestContext *context, + const std::string &oid, + const std::string &oid_tgt, + const std::string &tgt_pool_name, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + oid(oid), oid_tgt(oid_tgt), tgt_pool_name(tgt_pool_name), + comp(NULL), done(0), + r(0) + {} + + void _begin() override + { + std::lock_guard l{context->state_lock}; + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + context->oid_redirect_in_use.insert(oid_tgt); + context->oid_redirect_not_in_use.erase(oid_tgt); + + if (tgt_pool_name.empty()) ceph_abort(); + + context->find_object(oid, &src_value); + if(!context->redirect_objs[oid].empty()) { + /* copy_from oid --> oid_tgt */ + comp = context->rados.aio_create_completion(); + std::string src = context->prefix+oid; + op.copy_from(src.c_str(), context->io_ctx, src_value.version, 0); + context->low_tier_io_ctx.aio_operate(context->prefix+oid_tgt, comp, &op, + librados::OPERATION_ORDER_READS_WRITES); + comp->wait_for_complete(); + if ((r = comp->get_return_value())) { + std::cerr << "Error: oid " << oid << " copy_from " << oid_tgt << " returned error code " + << r << std::endl; + ceph_abort(); + } + comp->release(); + + /* unset redirect target */ + comp = context->rados.aio_create_completion(); + bool present = !src_value.deleted(); + op.unset_manifest(); + context->io_ctx.aio_operate(context->prefix+oid, comp, &op, + librados::OPERATION_ORDER_READS_WRITES | + librados::OPERATION_IGNORE_REDIRECT); + comp->wait_for_complete(); + if ((r = comp->get_return_value())) { + if (!(r == -ENOENT && !present) && r != -EOPNOTSUPP) { + std::cerr << "r is " << r << " while deleting " << oid << " and present is " << present << std::endl; + ceph_abort(); + } + } + comp->release(); + + context->oid_redirect_not_in_use.insert(context->redirect_objs[oid]); + context->oid_redirect_in_use.erase(context->redirect_objs[oid]); + } + + comp = context->rados.aio_create_completion(); + rd_op.stat(NULL, NULL, NULL); + context->io_ctx.aio_operate(context->prefix+oid, comp, &rd_op, + librados::OPERATION_ORDER_READS_WRITES | + librados::OPERATION_IGNORE_REDIRECT, + NULL); + comp->wait_for_complete(); + if ((r = comp->get_return_value()) && !src_value.deleted()) { + std::cerr << "Error: oid " << oid << " stat returned error code " + << r << std::endl; + ceph_abort(); + } + context->update_object_version(oid, comp->get_version64()); + comp->release(); + + comp = context->rados.aio_create_completion(); + rd_op.stat(NULL, NULL, NULL); + context->low_tier_io_ctx.aio_operate(context->prefix+oid_tgt, comp, &rd_op, + librados::OPERATION_ORDER_READS_WRITES | + librados::OPERATION_IGNORE_REDIRECT, + NULL); + comp->wait_for_complete(); + if ((r = comp->get_return_value())) { + std::cerr << "Error: oid " << oid_tgt << " stat returned error code " + << r << std::endl; + ceph_abort(); + } + uint64_t tgt_version = comp->get_version64(); + comp->release(); + + + context->find_object(oid, &src_value); + + if (src_value.version != 0 && !src_value.deleted()) + op.assert_version(src_value.version); + op.set_redirect(context->prefix+oid_tgt, context->low_tier_io_ctx, tgt_version); + + std::pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + comp = context->rados.aio_create_completion((void*) cb_arg, &write_callback); + context->io_ctx.aio_operate(context->prefix+oid, comp, &op, + librados::OPERATION_ORDER_READS_WRITES); + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard l{context->state_lock}; + + if (info->id == 0) { + ceph_assert(comp->is_complete()); + std::cout << num << ": finishing set_redirect to oid " << oid << std::endl; + if ((r = comp->get_return_value())) { + if (r == -ENOENT && src_value.deleted()) { + std::cout << num << ": got expected ENOENT (src dne)" << std::endl; + } else { + std::cerr << "Error: oid " << oid << " set_redirect " << oid_tgt << " returned error code " + << r << std::endl; + ceph_abort(); + } + } else { + context->update_object_redirect_target(oid, oid_tgt); + context->update_object_version(oid, comp->get_version64()); + } + } + + if (++done == 1) { + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + context->kick(); + } + } + + bool finished() override + { + return done == 1; + } + + std::string getType() override + { + return "SetRedirectOp"; + } +}; + +class UnsetRedirectOp : public TestOp { +public: + std::string oid; + librados::ObjectWriteOperation op; + librados::AioCompletion *comp = nullptr; + + UnsetRedirectOp(int n, + RadosTestContext *context, + const std::string &oid, + TestOpStat *stat = 0) + : TestOp(n, context, stat), oid(oid) + {} + + void _begin() override + { + std::unique_lock state_locker{context->state_lock}; + if (context->get_watch_context(oid)) { + context->kick(); + return; + } + + ObjectDesc contents; + context->find_object(oid, &contents); + bool present = !contents.deleted(); + + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + context->seq_num++; + + context->remove_object(oid); + + state_locker.unlock(); + + comp = context->rados.aio_create_completion(); + op.remove(); + context->io_ctx.aio_operate(context->prefix+oid, comp, &op, + librados::OPERATION_ORDER_READS_WRITES | + librados::OPERATION_IGNORE_REDIRECT); + comp->wait_for_complete(); + int r = comp->get_return_value(); + if (r && !(r == -ENOENT && !present)) { + std::cerr << "r is " << r << " while deleting " << oid << " and present is " << present << std::endl; + ceph_abort(); + } + state_locker.lock(); + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + if(!context->redirect_objs[oid].empty()) { + context->oid_redirect_not_in_use.insert(context->redirect_objs[oid]); + context->oid_redirect_in_use.erase(context->redirect_objs[oid]); + context->update_object_redirect_target(oid, {}); + } + context->kick(); + } + + std::string getType() override + { + return "UnsetRedirectOp"; + } +}; + +class TierPromoteOp : public TestOp { +public: + librados::AioCompletion *completion; + librados::ObjectWriteOperation op; + std::string oid; + std::shared_ptr<int> in_use; + ObjectDesc src_value; + + TierPromoteOp(int n, + RadosTestContext *context, + const std::string &oid, + TestOpStat *stat) + : TestOp(n, context, stat), + completion(NULL), + oid(oid) + {} + + void _begin() override + { + context->state_lock.lock(); + + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + + context->find_object(oid, &src_value); + + std::pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + completion = context->rados.aio_create_completion((void *) cb_arg, + &write_callback); + context->state_lock.unlock(); + + op.tier_promote(); + int r = context->io_ctx.aio_operate(context->prefix+oid, completion, + &op); + ceph_assert(!r); + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard l{context->state_lock}; + ceph_assert(!done); + ceph_assert(completion->is_complete()); + + ObjectDesc oid_value; + context->find_object(oid, &oid_value); + int r = completion->get_return_value(); + std::cout << num << ": got " << cpp_strerror(r) << std::endl; + if (r == 0) { + // sucess + } else if (r == -ENOENT && src_value.deleted()) { + std::cout << num << ": got expected ENOENT (src dne)" << std::endl; + } else { + ceph_abort_msg("shouldn't happen"); + } + context->update_object_version(oid, completion->get_version64()); + context->find_object(oid, &oid_value); + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + context->kick(); + done = true; + } + + bool finished() override + { + return done; + } + + std::string getType() override + { + return "TierPromoteOp"; + } +}; + +class TierFlushOp : public TestOp { +public: + librados::AioCompletion *completion; + librados::ObjectReadOperation op; + std::string oid; + std::shared_ptr<int> in_use; + int snap; + ObjectDesc src_value; + + + TierFlushOp(int n, + RadosTestContext *context, + const std::string &oid, + TestOpStat *stat) + : TestOp(n, context, stat), + completion(NULL), + oid(oid), + snap(-1) + {} + + void _begin() override + { + context->state_lock.lock(); + + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + + if (0 && !(rand() % 4) && !context->snaps.empty()) { + snap = rand_choose(context->snaps)->first; + in_use = context->snaps_in_use.lookup_or_create(snap, snap); + } else { + snap = -1; + } + + std::cout << num << ": tier_flush oid " << oid << " snap " << snap << std::endl; + + if (snap >= 0) { + context->io_ctx.snap_set_read(context->snaps[snap]); + } + + context->find_object(oid, &src_value, snap); + + std::pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + completion = context->rados.aio_create_completion((void *) cb_arg, + &write_callback); + context->state_lock.unlock(); + + op.tier_flush(); + unsigned flags = librados::OPERATION_IGNORE_CACHE; + int r = context->io_ctx.aio_operate(context->prefix+oid, completion, + &op, flags, NULL); + ceph_assert(!r); + + if (snap >= 0) { + context->io_ctx.snap_set_read(0); + } + } + + void _finish(CallbackInfo *info) override + { + context->state_lock.lock(); + ceph_assert(!done); + ceph_assert(completion->is_complete()); + + int r = completion->get_return_value(); + std::cout << num << ": got " << cpp_strerror(r) << std::endl; + if (r == 0) { + // sucess + context->update_object_tier_flushed(oid, snap); + context->update_object_version(oid, completion->get_version64(), snap); + } else if (r == -EBUSY) { + // could fail if snap is not oldest + ceph_assert(!context->check_oldest_snap_flushed(oid, snap)); + } else if (r == -ENOENT) { + // could fail if object is removed + if (src_value.deleted()) { + std::cout << num << ": got expected ENOENT (src dne)" << std::endl; + } else { + std::cerr << num << ": got unexpected ENOENT" << std::endl; + ceph_abort(); + } + } else { + if (r != -ENOENT && src_value.deleted()) { + std::cerr << num << ": src dne, but r is not ENOENT" << std::endl; + } + ceph_abort_msg("shouldn't happen"); + } + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + context->kick(); + done = true; + context->state_lock.unlock(); + } + + bool finished() override + { + return done; + } + + std::string getType() override + { + return "TierFlushOp"; + } +}; + +class TierEvictOp : public TestOp { +public: + librados::AioCompletion *completion; + librados::ObjectReadOperation op; + std::string oid; + std::shared_ptr<int> in_use; + int snap; + ObjectDesc src_value; + + TierEvictOp(int n, + RadosTestContext *context, + const std::string &oid, + TestOpStat *stat) + : TestOp(n, context, stat), + completion(NULL), + oid(oid), + snap(-1) + {} + + void _begin() override + { + context->state_lock.lock(); + + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + + if (0 && !(rand() % 4) && !context->snaps.empty()) { + snap = rand_choose(context->snaps)->first; + in_use = context->snaps_in_use.lookup_or_create(snap, snap); + } else { + snap = -1; + } + + std::cout << num << ": tier_evict oid " << oid << " snap " << snap << std::endl; + + if (snap >= 0) { + context->io_ctx.snap_set_read(context->snaps[snap]); + } + + context->find_object(oid, &src_value, snap); + + std::pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + completion = context->rados.aio_create_completion((void *) cb_arg, + &write_callback); + context->state_lock.unlock(); + + op.cache_evict(); + int r = context->io_ctx.aio_operate(context->prefix+oid, completion, + &op, librados::OPERATION_IGNORE_CACHE, + NULL); + ceph_assert(!r); + + if (snap >= 0) { + context->io_ctx.snap_set_read(0); + } + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard state_locker{context->state_lock}; + ceph_assert(!done); + ceph_assert(completion->is_complete()); + + int r = completion->get_return_value(); + std::cout << num << ": got " << cpp_strerror(r) << std::endl; + if (r == 0) { + // ok + } else if (r == -EINVAL) { + // modifying manifest object makes existing chunk_map clear + // as a result, the modified object is no longer manifest object + // this casues to return -EINVAL + } else if (r == -ENOENT) { + // could fail if object is removed + if (src_value.deleted()) { + std::cout << num << ": got expected ENOENT (src dne)" << std::endl; + } else { + std::cerr << num << ": got unexpected ENOENT" << std::endl; + ceph_abort(); + } + } else { + if (r != -ENOENT && src_value.deleted()) { + std::cerr << num << ": src dne, but r is not ENOENT" << std::endl; + } + ceph_abort_msg("shouldn't happen"); + } + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + context->kick(); + done = true; + } + + bool finished() override + { + return done; + } + + std::string getType() override + { + return "TierEvictOp"; + } +}; + +class HitSetListOp : public TestOp { + librados::AioCompletion *comp1, *comp2; + uint32_t hash; + std::list< std::pair<time_t, time_t> > ls; + bufferlist bl; + +public: + HitSetListOp(int n, + RadosTestContext *context, + uint32_t hash, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + comp1(NULL), comp2(NULL), + hash(hash) + {} + + void _begin() override + { + std::pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + comp1 = context->rados.aio_create_completion((void*) cb_arg, + &write_callback); + int r = context->io_ctx.hit_set_list(hash, comp1, &ls); + ceph_assert(r == 0); + } + + void _finish(CallbackInfo *info) override { + std::lock_guard l{context->state_lock}; + if (!comp2) { + if (ls.empty()) { + std::cerr << num << ": no hitsets" << std::endl; + done = true; + } else { + std::cerr << num << ": hitsets are " << ls << std::endl; + int r = rand() % ls.size(); + auto p = ls.begin(); + while (r--) + ++p; + auto cb_arg = new std::pair<TestOp*, TestOp::CallbackInfo*>( + this, new TestOp::CallbackInfo(0)); + comp2 = context->rados.aio_create_completion((void*) cb_arg, &write_callback); + r = context->io_ctx.hit_set_get(hash, comp2, p->second, &bl); + ceph_assert(r == 0); + } + } else { + int r = comp2->get_return_value(); + if (r == 0) { + HitSet hitset; + auto p = bl.cbegin(); + decode(hitset, p); + std::cout << num << ": got hitset of type " << hitset.get_type_name() + << " size " << bl.length() + << std::endl; + } else { + // FIXME: we could verify that we did in fact race with a trim... + ceph_assert(r == -ENOENT); + } + done = true; + } + + context->kick(); + } + + bool finished() override { + return done; + } + + std::string getType() override { + return "HitSetListOp"; + } +}; + +class UndirtyOp : public TestOp { +public: + librados::AioCompletion *completion; + librados::ObjectWriteOperation op; + std::string oid; + + UndirtyOp(int n, + RadosTestContext *context, + const std::string &oid, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + completion(NULL), + oid(oid) + {} + + void _begin() override + { + context->state_lock.lock(); + std::pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + completion = context->rados.aio_create_completion((void *) cb_arg, + &write_callback); + + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + context->update_object_undirty(oid); + context->state_lock.unlock(); + + op.undirty(); + int r = context->io_ctx.aio_operate(context->prefix+oid, completion, + &op, 0); + ceph_assert(!r); + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard state_locker{context->state_lock}; + ceph_assert(!done); + ceph_assert(completion->is_complete()); + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + context->update_object_version(oid, completion->get_version64()); + context->kick(); + done = true; + } + + bool finished() override + { + return done; + } + + std::string getType() override + { + return "UndirtyOp"; + } +}; + +class IsDirtyOp : public TestOp { +public: + librados::AioCompletion *completion; + librados::ObjectReadOperation op; + std::string oid; + bool dirty; + ObjectDesc old_value; + int snap = 0; + std::shared_ptr<int> in_use; + + IsDirtyOp(int n, + RadosTestContext *context, + const std::string &oid, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + completion(NULL), + oid(oid), + dirty(false) + {} + + void _begin() override + { + context->state_lock.lock(); + + if (!(rand() % 4) && !context->snaps.empty()) { + snap = rand_choose(context->snaps)->first; + in_use = context->snaps_in_use.lookup_or_create(snap, snap); + } else { + snap = -1; + } + std::cout << num << ": is_dirty oid " << oid << " snap " << snap + << std::endl; + + std::pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + completion = context->rados.aio_create_completion((void *) cb_arg, + &write_callback); + + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + context->state_lock.unlock(); + + if (snap >= 0) { + context->io_ctx.snap_set_read(context->snaps[snap]); + } + + op.is_dirty(&dirty, NULL); + int r = context->io_ctx.aio_operate(context->prefix+oid, completion, + &op, 0); + ceph_assert(!r); + + if (snap >= 0) { + context->io_ctx.snap_set_read(0); + } + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard state_locker{context->state_lock}; + ceph_assert(!done); + ceph_assert(completion->is_complete()); + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + + ceph_assert(context->find_object(oid, &old_value, snap)); + + int r = completion->get_return_value(); + if (r == 0) { + std::cout << num << ": " << (dirty ? "dirty" : "clean") << std::endl; + ceph_assert(!old_value.deleted()); + ceph_assert(dirty == old_value.dirty); + } else { + std::cout << num << ": got " << r << std::endl; + ceph_assert(r == -ENOENT); + ceph_assert(old_value.deleted()); + } + context->kick(); + done = true; + } + + bool finished() override + { + return done; + } + + std::string getType() override + { + return "IsDirtyOp"; + } +}; + + + +class CacheFlushOp : public TestOp { +public: + librados::AioCompletion *completion; + librados::ObjectReadOperation op; + std::string oid; + bool blocking; + int snap; + bool can_fail; + std::shared_ptr<int> in_use; + + CacheFlushOp(int n, + RadosTestContext *context, + const std::string &oid, + TestOpStat *stat, + bool b) + : TestOp(n, context, stat), + completion(NULL), + oid(oid), + blocking(b), + snap(0), + can_fail(false) + {} + + void _begin() override + { + context->state_lock.lock(); + + if (!(rand() % 4) && !context->snaps.empty()) { + snap = rand_choose(context->snaps)->first; + in_use = context->snaps_in_use.lookup_or_create(snap, snap); + } else { + snap = -1; + } + // not being particularly specific here about knowing which + // flushes are on the oldest clean snap and which ones are not. + can_fail = !blocking || !context->snaps.empty(); + // FIXME: we could fail if we've ever removed a snap due to + // the async snap trimming. + can_fail = true; + std::cout << num << ": " << (blocking ? "cache_flush" : "cache_try_flush") + << " oid " << oid << " snap " << snap << std::endl; + + if (snap >= 0) { + context->io_ctx.snap_set_read(context->snaps[snap]); + } + + std::pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + completion = context->rados.aio_create_completion((void *) cb_arg, + &write_callback); + context->oid_flushing.insert(oid); + context->oid_not_flushing.erase(oid); + context->state_lock.unlock(); + + unsigned flags = librados::OPERATION_IGNORE_CACHE; + if (blocking) { + op.cache_flush(); + } else { + op.cache_try_flush(); + flags = librados::OPERATION_SKIPRWLOCKS; + } + int r = context->io_ctx.aio_operate(context->prefix+oid, completion, + &op, flags, NULL); + ceph_assert(!r); + + if (snap >= 0) { + context->io_ctx.snap_set_read(0); + } + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard state_locker{context->state_lock}; + ceph_assert(!done); + ceph_assert(completion->is_complete()); + context->oid_flushing.erase(oid); + context->oid_not_flushing.insert(oid); + int r = completion->get_return_value(); + std::cout << num << ": got " << cpp_strerror(r) << std::endl; + if (r == 0) { + context->update_object_version(oid, 0, snap); + } else if (r == -EBUSY) { + ceph_assert(can_fail); + } else if (r == -EINVAL) { + // caching not enabled? + } else if (r == -ENOENT) { + // may have raced with a remove? + } else { + ceph_abort_msg("shouldn't happen"); + } + context->kick(); + done = true; + } + + bool finished() override + { + return done; + } + + std::string getType() override + { + return "CacheFlushOp"; + } +}; + +class CacheEvictOp : public TestOp { +public: + librados::AioCompletion *completion; + librados::ObjectReadOperation op; + std::string oid; + std::shared_ptr<int> in_use; + + CacheEvictOp(int n, + RadosTestContext *context, + const std::string &oid, + TestOpStat *stat) + : TestOp(n, context, stat), + completion(NULL), + oid(oid) + {} + + void _begin() override + { + context->state_lock.lock(); + + int snap; + if (!(rand() % 4) && !context->snaps.empty()) { + snap = rand_choose(context->snaps)->first; + in_use = context->snaps_in_use.lookup_or_create(snap, snap); + } else { + snap = -1; + } + std::cout << num << ": cache_evict oid " << oid << " snap " << snap << std::endl; + + if (snap >= 0) { + context->io_ctx.snap_set_read(context->snaps[snap]); + } + + std::pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new std::pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + completion = context->rados.aio_create_completion((void *) cb_arg, + &write_callback); + context->state_lock.unlock(); + + op.cache_evict(); + int r = context->io_ctx.aio_operate(context->prefix+oid, completion, + &op, librados::OPERATION_IGNORE_CACHE, + NULL); + ceph_assert(!r); + + if (snap >= 0) { + context->io_ctx.snap_set_read(0); + } + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard state_locker{context->state_lock}; + ceph_assert(!done); + ceph_assert(completion->is_complete()); + + int r = completion->get_return_value(); + std::cout << num << ": got " << cpp_strerror(r) << std::endl; + if (r == 0) { + // yay! + } else if (r == -EBUSY) { + // raced with something that dirtied the object + } else if (r == -EINVAL) { + // caching not enabled? + } else if (r == -ENOENT) { + // may have raced with a remove? + } else { + ceph_abort_msg("shouldn't happen"); + } + context->kick(); + done = true; + } + + bool finished() override + { + return done; + } + + std::string getType() override + { + return "CacheEvictOp"; + } +}; + + +#endif diff --git a/src/test/osd/TestECBackend.cc b/src/test/osd/TestECBackend.cc new file mode 100644 index 000000000..1c13fb4c9 --- /dev/null +++ b/src/test/osd/TestECBackend.cc @@ -0,0 +1,62 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2013 Inktank Storage, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include <iostream> +#include <sstream> +#include <errno.h> +#include <signal.h> +#include "osd/ECBackend.h" +#include "gtest/gtest.h" + +using namespace std; + +TEST(ECUtil, stripe_info_t) +{ + const uint64_t swidth = 4096; + const uint64_t ssize = 4; + + ECUtil::stripe_info_t s(ssize, swidth); + ASSERT_EQ(s.get_stripe_width(), swidth); + + ASSERT_EQ(s.logical_to_next_chunk_offset(0), 0u); + ASSERT_EQ(s.logical_to_next_chunk_offset(1), s.get_chunk_size()); + ASSERT_EQ(s.logical_to_next_chunk_offset(swidth - 1), + s.get_chunk_size()); + + ASSERT_EQ(s.logical_to_prev_chunk_offset(0), 0u); + ASSERT_EQ(s.logical_to_prev_chunk_offset(swidth), s.get_chunk_size()); + ASSERT_EQ(s.logical_to_prev_chunk_offset((swidth * 2) - 1), + s.get_chunk_size()); + + ASSERT_EQ(s.logical_to_next_stripe_offset(0), 0u); + ASSERT_EQ(s.logical_to_next_stripe_offset(swidth - 1), + s.get_stripe_width()); + + ASSERT_EQ(s.logical_to_prev_stripe_offset(swidth), s.get_stripe_width()); + ASSERT_EQ(s.logical_to_prev_stripe_offset(swidth), s.get_stripe_width()); + ASSERT_EQ(s.logical_to_prev_stripe_offset((swidth * 2) - 1), + s.get_stripe_width()); + + ASSERT_EQ(s.aligned_logical_offset_to_chunk_offset(2*swidth), + 2*s.get_chunk_size()); + ASSERT_EQ(s.aligned_chunk_offset_to_logical_offset(2*s.get_chunk_size()), + 2*s.get_stripe_width()); + + ASSERT_EQ(s.aligned_offset_len_to_chunk(make_pair(swidth, 10*swidth)), + make_pair(s.get_chunk_size(), 10*s.get_chunk_size())); + + ASSERT_EQ(s.offset_len_to_stripe_bounds(make_pair(swidth-10, (uint64_t)20)), + make_pair((uint64_t)0, 2*swidth)); +} + diff --git a/src/test/osd/TestMClockScheduler.cc b/src/test/osd/TestMClockScheduler.cc new file mode 100644 index 000000000..8291da268 --- /dev/null +++ b/src/test/osd/TestMClockScheduler.cc @@ -0,0 +1,256 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- + +#include <chrono> + +#include "gtest/gtest.h" + +#include "global/global_context.h" +#include "global/global_init.h" +#include "common/common_init.h" + +#include "osd/scheduler/mClockScheduler.h" +#include "osd/scheduler/OpSchedulerItem.h" + +using namespace ceph::osd::scheduler; + +int main(int argc, char **argv) { + std::vector<const char*> args(argv, argv+argc); + auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_OSD, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + common_init_finish(g_ceph_context); + + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + + +class mClockSchedulerTest : public testing::Test { +public: + int whoami; + uint32_t num_shards; + int shard_id; + bool is_rotational; + MonClient *monc; + mClockScheduler q; + + uint64_t client1; + uint64_t client2; + uint64_t client3; + + mClockSchedulerTest() : + whoami(0), + num_shards(1), + shard_id(0), + is_rotational(false), + monc(nullptr), + q(g_ceph_context, whoami, num_shards, shard_id, is_rotational, monc), + client1(1001), + client2(9999), + client3(100000001) + {} + + struct MockDmclockItem : public PGOpQueueable { + op_scheduler_class scheduler_class; + + MockDmclockItem(op_scheduler_class _scheduler_class) : + PGOpQueueable(spg_t()), + scheduler_class(_scheduler_class) {} + + MockDmclockItem() + : MockDmclockItem(op_scheduler_class::background_best_effort) {} + + ostream &print(ostream &rhs) const final { return rhs; } + + std::optional<OpRequestRef> maybe_get_op() const final { + return std::nullopt; + } + + op_scheduler_class get_scheduler_class() const final { + return scheduler_class; + } + + void run(OSD *osd, OSDShard *sdata, PGRef& pg, ThreadPool::TPHandle &handle) final {} + }; +}; + +template <typename... Args> +OpSchedulerItem create_item( + epoch_t e, uint64_t owner, Args&&... args) +{ + return OpSchedulerItem( + std::make_unique<mClockSchedulerTest::MockDmclockItem>( + std::forward<Args>(args)...), + 12, 12, + utime_t(), owner, e); +} + +template <typename... Args> +OpSchedulerItem create_high_prio_item( + unsigned priority, epoch_t e, uint64_t owner, Args&&... args) +{ + // Create high priority item for testing high prio queue + return OpSchedulerItem( + std::make_unique<mClockSchedulerTest::MockDmclockItem>( + std::forward<Args>(args)...), + 12, priority, + utime_t(), owner, e); +} + +OpSchedulerItem get_item(WorkItem item) +{ + return std::move(std::get<OpSchedulerItem>(item)); +} + +TEST_F(mClockSchedulerTest, TestEmpty) { + ASSERT_TRUE(q.empty()); + + for (unsigned i = 100; i < 105; i+=2) { + q.enqueue(create_item(i, client1, op_scheduler_class::client)); + std::this_thread::sleep_for(std::chrono::microseconds(1)); + } + + ASSERT_FALSE(q.empty()); + + std::list<OpSchedulerItem> reqs; + + reqs.push_back(get_item(q.dequeue())); + reqs.push_back(get_item(q.dequeue())); + + ASSERT_EQ(2u, reqs.size()); + ASSERT_FALSE(q.empty()); + + for (auto &&i : reqs) { + q.enqueue_front(std::move(i)); + } + reqs.clear(); + + ASSERT_FALSE(q.empty()); + + for (int i = 0; i < 3; ++i) { + ASSERT_FALSE(q.empty()); + q.dequeue(); + } + + ASSERT_TRUE(q.empty()); +} + +TEST_F(mClockSchedulerTest, TestSingleClientOrderedEnqueueDequeue) { + ASSERT_TRUE(q.empty()); + + for (unsigned i = 100; i < 105; ++i) { + q.enqueue(create_item(i, client1, op_scheduler_class::client)); + std::this_thread::sleep_for(std::chrono::microseconds(1)); + } + + auto r = get_item(q.dequeue()); + ASSERT_EQ(100u, r.get_map_epoch()); + + r = get_item(q.dequeue()); + ASSERT_EQ(101u, r.get_map_epoch()); + + r = get_item(q.dequeue()); + ASSERT_EQ(102u, r.get_map_epoch()); + + r = get_item(q.dequeue()); + ASSERT_EQ(103u, r.get_map_epoch()); + + r = get_item(q.dequeue()); + ASSERT_EQ(104u, r.get_map_epoch()); +} + +TEST_F(mClockSchedulerTest, TestMultiClientOrderedEnqueueDequeue) { + const unsigned NUM = 1000; + for (unsigned i = 0; i < NUM; ++i) { + for (auto &&c: {client1, client2, client3}) { + q.enqueue(create_item(i, c)); + std::this_thread::sleep_for(std::chrono::microseconds(1)); + } + } + + std::map<uint64_t, epoch_t> next; + for (auto &&c: {client1, client2, client3}) { + next[c] = 0; + } + for (unsigned i = 0; i < NUM * 3; ++i) { + ASSERT_FALSE(q.empty()); + auto r = get_item(q.dequeue()); + auto owner = r.get_owner(); + auto niter = next.find(owner); + ASSERT_FALSE(niter == next.end()); + ASSERT_EQ(niter->second, r.get_map_epoch()); + niter->second++; + } + ASSERT_TRUE(q.empty()); +} + +TEST_F(mClockSchedulerTest, TestHighPriorityQueueEnqueueDequeue) { + ASSERT_TRUE(q.empty()); + for (unsigned i = 200; i < 205; ++i) { + q.enqueue(create_high_prio_item(i, i, client1, op_scheduler_class::client)); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + + ASSERT_FALSE(q.empty()); + // Higher priority ops should be dequeued first + auto r = get_item(q.dequeue()); + ASSERT_EQ(204u, r.get_map_epoch()); + + r = get_item(q.dequeue()); + ASSERT_EQ(203u, r.get_map_epoch()); + + r = get_item(q.dequeue()); + ASSERT_EQ(202u, r.get_map_epoch()); + + r = get_item(q.dequeue()); + ASSERT_EQ(201u, r.get_map_epoch()); + + r = get_item(q.dequeue()); + ASSERT_EQ(200u, r.get_map_epoch()); + + ASSERT_TRUE(q.empty()); +} + +TEST_F(mClockSchedulerTest, TestAllQueuesEnqueueDequeue) { + ASSERT_TRUE(q.empty()); + + // Insert ops into the mClock queue + for (unsigned i = 100; i < 102; ++i) { + q.enqueue(create_item(i, client1, op_scheduler_class::client)); + std::this_thread::sleep_for(std::chrono::microseconds(1)); + } + + // Insert Immediate ops + for (unsigned i = 103; i < 105; ++i) { + q.enqueue(create_item(i, client1, op_scheduler_class::immediate)); + std::this_thread::sleep_for(std::chrono::microseconds(1)); + } + + // Insert ops into the high queue + for (unsigned i = 200; i < 202; ++i) { + q.enqueue(create_high_prio_item(i, i, client1, op_scheduler_class::client)); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + + ASSERT_FALSE(q.empty()); + auto r = get_item(q.dequeue()); + // Ops classified as Immediate should be dequeued first + ASSERT_EQ(103u, r.get_map_epoch()); + r = get_item(q.dequeue()); + ASSERT_EQ(104u, r.get_map_epoch()); + + // High priority queue should be dequeued second + // higher priority operation first + r = get_item(q.dequeue()); + ASSERT_EQ(201u, r.get_map_epoch()); + r = get_item(q.dequeue()); + ASSERT_EQ(200u, r.get_map_epoch()); + + // mClock queue will be dequeued last + r = get_item(q.dequeue()); + ASSERT_EQ(100u, r.get_map_epoch()); + r = get_item(q.dequeue()); + ASSERT_EQ(101u, r.get_map_epoch()); + + ASSERT_TRUE(q.empty()); +} diff --git a/src/test/osd/TestOSDMap.cc b/src/test/osd/TestOSDMap.cc new file mode 100644 index 000000000..c3c8a1531 --- /dev/null +++ b/src/test/osd/TestOSDMap.cc @@ -0,0 +1,2719 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +#include "gtest/gtest.h" +#include "osd/OSDMap.h" +#include "osd/OSDMapMapping.h" +#include "mon/OSDMonitor.h" +#include "mon/PGMap.h" + +#include "global/global_context.h" +#include "global/global_init.h" +#include "common/common_init.h" +#include "common/ceph_argparse.h" +#include "common/ceph_json.h" + +#include <iostream> +#include <cmath> + +using namespace std; + +int main(int argc, char **argv) { + map<string,string> defaults = { + // make sure we have 3 copies, or some tests won't work + { "osd_pool_default_size", "3" }, + // our map is flat, so just try and split across OSDs, not hosts or whatever + { "osd_crush_chooseleaf_type", "0" }, + }; + std::vector<const char*> args(argv, argv+argc); + auto cct = global_init(&defaults, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + common_init_finish(g_ceph_context); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + +class OSDMapTest : public testing::Test, + public ::testing::WithParamInterface<std::pair<int, int>> { + int num_osds = 6; +public: + OSDMap osdmap; + OSDMapMapping mapping; + const uint64_t my_ec_pool = 1; + const uint64_t my_rep_pool = 2; + + // Blacklist testing lists + // I pulled the first two ranges and their start/end points from + // https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing#CIDR_notation + static const string range_addrs[]; + static const string ip_addrs[]; + static const string unblocked_ip_addrs[]; + const string EC_RULE_NAME = "erasure"; + + OSDMapTest() {} + + void set_up_map(int new_num_osds = 6, bool no_default_pools = false) { + num_osds = new_num_osds; + uuid_d fsid; + osdmap.build_simple(g_ceph_context, 0, fsid, num_osds); + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.fsid = osdmap.get_fsid(); + entity_addrvec_t sample_addrs; + sample_addrs.v.push_back(entity_addr_t()); + uuid_d sample_uuid; + for (int i = 0; i < num_osds; ++i) { + sample_uuid.generate_random(); + sample_addrs.v[0].nonce = i; + pending_inc.new_state[i] = CEPH_OSD_EXISTS | CEPH_OSD_NEW; + pending_inc.new_up_client[i] = sample_addrs; + pending_inc.new_up_cluster[i] = sample_addrs; + pending_inc.new_hb_back_up[i] = sample_addrs; + pending_inc.new_hb_front_up[i] = sample_addrs; + pending_inc.new_weight[i] = CEPH_OSD_IN; + pending_inc.new_uuid[i] = sample_uuid; + } + osdmap.apply_incremental(pending_inc); + if (no_default_pools) // do not create any default pool(s) + return; + + OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1); + new_pool_inc.new_pool_max = osdmap.get_pool_max(); + new_pool_inc.fsid = osdmap.get_fsid(); + // make an ec pool + set_ec_pool("ec", new_pool_inc); + // and a replicated pool + set_rep_pool("reppool",new_pool_inc); + osdmap.apply_incremental(new_pool_inc); + } + int get_ec_crush_rule() { + int r = osdmap.crush->get_rule_id(EC_RULE_NAME); + if (r < 0) { + r = osdmap.crush->add_simple_rule( + EC_RULE_NAME, "default", "osd", "", + "indep", pg_pool_t::TYPE_ERASURE, + &cerr); + } + return r; + } + uint64_t set_ec_pool(const string &name, OSDMap::Incremental &new_pool_inc, + bool assert_pool_id = true) { + pg_pool_t empty; + uint64_t pool_id = ++new_pool_inc.new_pool_max; + if (assert_pool_id) + ceph_assert(pool_id == my_ec_pool); + pg_pool_t *p = new_pool_inc.get_new_pool(pool_id, &empty); + p->size = 3; + p->set_pg_num(64); + p->set_pgp_num(64); + p->type = pg_pool_t::TYPE_ERASURE; + p->crush_rule = get_ec_crush_rule(); + new_pool_inc.new_pool_names[pool_id] = name;//"ec"; + return pool_id; + } + uint64_t set_rep_pool(const string name, OSDMap::Incremental &new_pool_inc, + bool assert_pool_id = true) { + pg_pool_t empty; + uint64_t pool_id = ++new_pool_inc.new_pool_max; + if (assert_pool_id) + ceph_assert(pool_id == my_rep_pool); + pg_pool_t *p = new_pool_inc.get_new_pool(pool_id, &empty); + p->size = 3; + p->set_pg_num(64); + p->set_pgp_num(64); + p->type = pg_pool_t::TYPE_REPLICATED; + p->crush_rule = 0; + p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); + new_pool_inc.new_pool_names[pool_id] = name;//"reppool"; + return pool_id; + } + + unsigned int get_num_osds() { return num_osds; } + void get_crush(const OSDMap& tmap, CrushWrapper& newcrush) { + bufferlist bl; + tmap.crush->encode(bl, CEPH_FEATURES_SUPPORTED_DEFAULT); + auto p = bl.cbegin(); + newcrush.decode(p); + } + int crush_move(OSDMap& tmap, const string &name, const vector<string> &argvec) { + map<string,string> loc; + CrushWrapper::parse_loc_map(argvec, &loc); + CrushWrapper newcrush; + get_crush(tmap, newcrush); + if (!newcrush.name_exists(name)) { + return -ENOENT; + } + int id = newcrush.get_item_id(name); + int err; + if (!newcrush.check_item_loc(g_ceph_context, id, loc, (int *)NULL)) { + if (id >= 0) { + err = newcrush.create_or_move_item(g_ceph_context, id, 0, name, loc); + } else { + err = newcrush.move_bucket(g_ceph_context, id, loc); + } + if (err >= 0) { + OSDMap::Incremental pending_inc(tmap.get_epoch() + 1); + pending_inc.crush.clear(); + newcrush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); + tmap.apply_incremental(pending_inc); + err = 0; + } + } else { + // already there + err = 0; + } + return err; + } + int crush_rule_create_replicated(const string &name, + const string &root, + const string &type) { + if (osdmap.crush->rule_exists(name)) { + return osdmap.crush->get_rule_id(name); + } + CrushWrapper newcrush; + get_crush(osdmap, newcrush); + string device_class; + stringstream ss; + int ruleno = newcrush.add_simple_rule( + name, root, type, device_class, + "firstn", pg_pool_t::TYPE_REPLICATED, &ss); + if (ruleno >= 0) { + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.crush.clear(); + newcrush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); + osdmap.apply_incremental(pending_inc); + } + return ruleno; + } + void test_mappings(int pool, + int num, + vector<int> *any, + vector<int> *first, + vector<int> *primary) { + mapping.update(osdmap); + for (int i=0; i<num; ++i) { + vector<int> up, acting; + int up_primary, acting_primary; + pg_t pgid(i, pool); + osdmap.pg_to_up_acting_osds(pgid, + &up, &up_primary, &acting, &acting_primary); + for (unsigned j=0; j<acting.size(); ++j) + (*any)[acting[j]]++; + if (!acting.empty()) + (*first)[acting[0]]++; + if (acting_primary >= 0) + (*primary)[acting_primary]++; + + // compare to precalc mapping + vector<int> up2, acting2; + int up_primary2, acting_primary2; + pgid = osdmap.raw_pg_to_pg(pgid); + mapping.get(pgid, &up2, &up_primary2, &acting2, &acting_primary2); + ASSERT_EQ(up, up2); + ASSERT_EQ(up_primary, up_primary2); + ASSERT_EQ(acting, acting2); + ASSERT_EQ(acting_primary, acting_primary2); + } + cout << "any: " << *any << std::endl;; + cout << "first: " << *first << std::endl;; + cout << "primary: " << *primary << std::endl;; + } + void clean_pg_upmaps(CephContext *cct, + const OSDMap& om, + OSDMap::Incremental& pending_inc) { + int cpu_num = 8; + int pgs_per_chunk = 256; + ThreadPool tp(cct, "BUG_40104::clean_upmap_tp", "clean_upmap_tp", cpu_num); + tp.start(); + ParallelPGMapper mapper(cct, &tp); + vector<pg_t> pgs_to_check; + om.get_upmap_pgs(&pgs_to_check); + OSDMonitor::CleanUpmapJob job(cct, om, pending_inc); + mapper.queue(&job, pgs_per_chunk, pgs_to_check); + job.wait(); + tp.stop(); + } + void set_primary_affinity_all(float pa) { + for (uint i = 0 ; i < get_num_osds() ; i++) { + osdmap.set_primary_affinity(i, int(pa * CEPH_OSD_MAX_PRIMARY_AFFINITY)); + } + } + bool score_in_range(float score, uint nosds = 0) { + if (nosds == 0) { + nosds = get_num_osds(); + } + return score >= 1.0 && score <= float(nosds); + } +}; + +TEST_F(OSDMapTest, Create) { + set_up_map(); + ASSERT_EQ(get_num_osds(), (unsigned)osdmap.get_max_osd()); + ASSERT_EQ(get_num_osds(), osdmap.get_num_in_osds()); +} + +TEST_F(OSDMapTest, Features) { + // with EC pool + set_up_map(); + uint64_t features = osdmap.get_features(CEPH_ENTITY_TYPE_OSD, NULL); + ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES); + ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2); + ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3); + ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_V2); + ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL); + ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY); + + // clients have a slightly different view + features = osdmap.get_features(CEPH_ENTITY_TYPE_CLIENT, NULL); + ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES); + ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2); + ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3); + ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_V2); + ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL); + ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY); + + // remove teh EC pool, but leave the rule. add primary affinity. + { + OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1); + new_pool_inc.old_pools.insert(osdmap.lookup_pg_pool_name("ec")); + new_pool_inc.new_primary_affinity[0] = 0x8000; + osdmap.apply_incremental(new_pool_inc); + } + + features = osdmap.get_features(CEPH_ENTITY_TYPE_MON, NULL); + ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES); + ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2); + ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3); // shared bit with primary affinity + ASSERT_FALSE(features & CEPH_FEATURE_CRUSH_V2); + ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL); + ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY); + + // FIXME: test tiering feature bits +} + +TEST_F(OSDMapTest, MapPG) { + set_up_map(); + + std::cerr << " osdmap.pool_max==" << osdmap.get_pool_max() << std::endl; + pg_t rawpg(0, my_rep_pool); + pg_t pgid = osdmap.raw_pg_to_pg(rawpg); + vector<int> up_osds, acting_osds; + int up_primary, acting_primary; + + osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, + &acting_osds, &acting_primary); + + vector<int> old_up_osds, old_acting_osds; + osdmap.pg_to_up_acting_osds(pgid, old_up_osds, old_acting_osds); + ASSERT_EQ(old_up_osds, up_osds); + ASSERT_EQ(old_acting_osds, acting_osds); + + ASSERT_EQ(osdmap.get_pg_pool(my_rep_pool)->get_size(), up_osds.size()); +} + +TEST_F(OSDMapTest, MapFunctionsMatch) { + // TODO: make sure pg_to_up_acting_osds and pg_to_acting_osds match + set_up_map(); + pg_t rawpg(0, my_rep_pool); + pg_t pgid = osdmap.raw_pg_to_pg(rawpg); + vector<int> up_osds, acting_osds; + int up_primary, acting_primary; + + osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, + &acting_osds, &acting_primary); + + vector<int> up_osds_two, acting_osds_two; + + osdmap.pg_to_up_acting_osds(pgid, up_osds_two, acting_osds_two); + + ASSERT_EQ(up_osds, up_osds_two); + ASSERT_EQ(acting_osds, acting_osds_two); + + int acting_primary_two; + osdmap.pg_to_acting_osds(pgid, &acting_osds_two, &acting_primary_two); + EXPECT_EQ(acting_osds, acting_osds_two); + EXPECT_EQ(acting_primary, acting_primary_two); + osdmap.pg_to_acting_osds(pgid, acting_osds_two); + EXPECT_EQ(acting_osds, acting_osds_two); +} + +/** This test must be removed or modified appropriately when we allow + * other ways to specify a primary. */ +TEST_F(OSDMapTest, PrimaryIsFirst) { + set_up_map(); + + pg_t rawpg(0, my_rep_pool); + pg_t pgid = osdmap.raw_pg_to_pg(rawpg); + vector<int> up_osds, acting_osds; + int up_primary, acting_primary; + + osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, + &acting_osds, &acting_primary); + EXPECT_EQ(up_osds[0], up_primary); + EXPECT_EQ(acting_osds[0], acting_primary); +} + +TEST_F(OSDMapTest, PGTempRespected) { + set_up_map(); + + pg_t rawpg(0, my_rep_pool); + pg_t pgid = osdmap.raw_pg_to_pg(rawpg); + vector<int> up_osds, acting_osds; + int up_primary, acting_primary; + + osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, + &acting_osds, &acting_primary); + + // copy and swap first and last element in acting_osds + vector<int> new_acting_osds(acting_osds); + int first = new_acting_osds[0]; + new_acting_osds[0] = *new_acting_osds.rbegin(); + *new_acting_osds.rbegin() = first; + + // apply pg_temp to osdmap + OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1); + pgtemp_map.new_pg_temp[pgid] = mempool::osdmap::vector<int>( + new_acting_osds.begin(), new_acting_osds.end()); + osdmap.apply_incremental(pgtemp_map); + + osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, + &acting_osds, &acting_primary); + EXPECT_EQ(new_acting_osds, acting_osds); +} + +TEST_F(OSDMapTest, PrimaryTempRespected) { + set_up_map(); + + pg_t rawpg(0, my_rep_pool); + pg_t pgid = osdmap.raw_pg_to_pg(rawpg); + vector<int> up_osds; + vector<int> acting_osds; + int up_primary, acting_primary; + + osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, + &acting_osds, &acting_primary); + + // make second OSD primary via incremental + OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1); + pgtemp_map.new_primary_temp[pgid] = acting_osds[1]; + osdmap.apply_incremental(pgtemp_map); + + osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, + &acting_osds, &acting_primary); + EXPECT_EQ(acting_primary, acting_osds[1]); +} + +TEST_F(OSDMapTest, CleanTemps) { + set_up_map(); + + OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1); + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 2); + pg_t pga = osdmap.raw_pg_to_pg(pg_t(0, my_rep_pool)); + { + vector<int> up_osds, acting_osds; + int up_primary, acting_primary; + osdmap.pg_to_up_acting_osds(pga, &up_osds, &up_primary, + &acting_osds, &acting_primary); + pgtemp_map.new_pg_temp[pga] = mempool::osdmap::vector<int>( + up_osds.begin(), up_osds.end()); + pgtemp_map.new_primary_temp[pga] = up_primary; + } + pg_t pgb = osdmap.raw_pg_to_pg(pg_t(1, my_rep_pool)); + { + vector<int> up_osds, acting_osds; + int up_primary, acting_primary; + osdmap.pg_to_up_acting_osds(pgb, &up_osds, &up_primary, + &acting_osds, &acting_primary); + pending_inc.new_pg_temp[pgb] = mempool::osdmap::vector<int>( + up_osds.begin(), up_osds.end()); + pending_inc.new_primary_temp[pgb] = up_primary; + } + + osdmap.apply_incremental(pgtemp_map); + + OSDMap tmpmap; + tmpmap.deepish_copy_from(osdmap); + tmpmap.apply_incremental(pending_inc); + OSDMap::clean_temps(g_ceph_context, osdmap, tmpmap, &pending_inc); + + EXPECT_TRUE(pending_inc.new_pg_temp.count(pga) && + pending_inc.new_pg_temp[pga].size() == 0); + EXPECT_EQ(-1, pending_inc.new_primary_temp[pga]); + + EXPECT_TRUE(!pending_inc.new_pg_temp.count(pgb) && + !pending_inc.new_primary_temp.count(pgb)); +} + +TEST_F(OSDMapTest, KeepsNecessaryTemps) { + set_up_map(); + + pg_t rawpg(0, my_rep_pool); + pg_t pgid = osdmap.raw_pg_to_pg(rawpg); + vector<int> up_osds, acting_osds; + int up_primary, acting_primary; + + osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, + &acting_osds, &acting_primary); + + // find unused OSD and stick it in there + OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1); + // find an unused osd and put it in place of the first one + int i = 0; + for(; i != (int)get_num_osds(); ++i) { + bool in_use = false; + for (vector<int>::iterator osd_it = up_osds.begin(); + osd_it != up_osds.end(); + ++osd_it) { + if (i == *osd_it) { + in_use = true; + break; + } + } + if (!in_use) { + up_osds[1] = i; + break; + } + } + if (i == (int)get_num_osds()) + FAIL() << "did not find unused OSD for temp mapping"; + + pgtemp_map.new_pg_temp[pgid] = mempool::osdmap::vector<int>( + up_osds.begin(), up_osds.end()); + pgtemp_map.new_primary_temp[pgid] = up_osds[1]; + osdmap.apply_incremental(pgtemp_map); + + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + + OSDMap tmpmap; + tmpmap.deepish_copy_from(osdmap); + tmpmap.apply_incremental(pending_inc); + OSDMap::clean_temps(g_ceph_context, osdmap, tmpmap, &pending_inc); + EXPECT_FALSE(pending_inc.new_pg_temp.count(pgid)); + EXPECT_FALSE(pending_inc.new_primary_temp.count(pgid)); +} + +TEST_F(OSDMapTest, PrimaryAffinity) { + set_up_map(); + + int n = get_num_osds(); + for (map<int64_t,pg_pool_t>::const_iterator p = osdmap.get_pools().begin(); + p != osdmap.get_pools().end(); + ++p) { + int pool = p->first; + int expect_primary = 10000 / n; + cout << "pool " << pool << " size " << (int)p->second.size + << " expect_primary " << expect_primary << std::endl; + { + vector<int> any(n, 0); + vector<int> first(n, 0); + vector<int> primary(n, 0); + test_mappings(pool, 10000, &any, &first, &primary); + for (int i=0; i<n; ++i) { + ASSERT_LT(0, any[i]); + ASSERT_LT(0, first[i]); + ASSERT_LT(0, primary[i]); + } + } + + osdmap.set_primary_affinity(0, 0); + osdmap.set_primary_affinity(1, 0); + { + vector<int> any(n, 0); + vector<int> first(n, 0); + vector<int> primary(n, 0); + test_mappings(pool, 10000, &any, &first, &primary); + for (int i=0; i<n; ++i) { + ASSERT_LT(0, any[i]); + if (i >= 2) { + ASSERT_LT(0, first[i]); + ASSERT_LT(0, primary[i]); + } else { + if (p->second.is_replicated()) { + ASSERT_EQ(0, first[i]); + } + ASSERT_EQ(0, primary[i]); + } + } + } + + osdmap.set_primary_affinity(0, 0x8000); + osdmap.set_primary_affinity(1, 0); + { + vector<int> any(n, 0); + vector<int> first(n, 0); + vector<int> primary(n, 0); + test_mappings(pool, 10000, &any, &first, &primary); + int expect = (10000 / (n-2)) / 2; // half weight + cout << "expect " << expect << std::endl; + for (int i=0; i<n; ++i) { + ASSERT_LT(0, any[i]); + if (i >= 2) { + ASSERT_LT(0, first[i]); + ASSERT_LT(0, primary[i]); + } else if (i == 1) { + if (p->second.is_replicated()) { + ASSERT_EQ(0, first[i]); + } + ASSERT_EQ(0, primary[i]); + } else { + ASSERT_LT(expect *2/3, primary[0]); + ASSERT_GT(expect *4/3, primary[0]); + } + } + } + + osdmap.set_primary_affinity(0, 0x10000); + osdmap.set_primary_affinity(1, 0x10000); + } +} + +TEST_F(OSDMapTest, get_osd_crush_node_flags) { + set_up_map(); + + for (unsigned i=0; i<get_num_osds(); ++i) { + ASSERT_EQ(0u, osdmap.get_osd_crush_node_flags(i)); + } + + OSDMap::Incremental inc(osdmap.get_epoch() + 1); + inc.new_crush_node_flags[-1] = 123u; + osdmap.apply_incremental(inc); + for (unsigned i=0; i<get_num_osds(); ++i) { + ASSERT_EQ(123u, osdmap.get_osd_crush_node_flags(i)); + } + ASSERT_EQ(0u, osdmap.get_osd_crush_node_flags(1000)); + + OSDMap::Incremental inc3(osdmap.get_epoch() + 1); + inc3.new_crush_node_flags[-1] = 456u; + osdmap.apply_incremental(inc3); + for (unsigned i=0; i<get_num_osds(); ++i) { + ASSERT_EQ(456u, osdmap.get_osd_crush_node_flags(i)); + } + ASSERT_EQ(0u, osdmap.get_osd_crush_node_flags(1000)); + + OSDMap::Incremental inc2(osdmap.get_epoch() + 1); + inc2.new_crush_node_flags[-1] = 0; + osdmap.apply_incremental(inc2); + for (unsigned i=0; i<get_num_osds(); ++i) { + ASSERT_EQ(0u, osdmap.get_crush_node_flags(i)); + } +} + +TEST_F(OSDMapTest, parse_osd_id_list) { + set_up_map(); + set<int> out; + set<int> all; + osdmap.get_all_osds(all); + + ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0"}, &out, &cout)); + ASSERT_EQ(1u, out.size()); + ASSERT_EQ(0, *out.begin()); + + ASSERT_EQ(0, osdmap.parse_osd_id_list({"1"}, &out, &cout)); + ASSERT_EQ(1u, out.size()); + ASSERT_EQ(1, *out.begin()); + + ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0","osd.1"}, &out, &cout)); + ASSERT_EQ(2u, out.size()); + ASSERT_EQ(0, *out.begin()); + ASSERT_EQ(1, *out.rbegin()); + + ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0","1"}, &out, &cout)); + ASSERT_EQ(2u, out.size()); + ASSERT_EQ(0, *out.begin()); + ASSERT_EQ(1, *out.rbegin()); + + ASSERT_EQ(0, osdmap.parse_osd_id_list({"*"}, &out, &cout)); + ASSERT_EQ(all.size(), out.size()); + ASSERT_EQ(all, out); + + ASSERT_EQ(0, osdmap.parse_osd_id_list({"all"}, &out, &cout)); + ASSERT_EQ(all, out); + + ASSERT_EQ(0, osdmap.parse_osd_id_list({"any"}, &out, &cout)); + ASSERT_EQ(all, out); + + ASSERT_EQ(-EINVAL, osdmap.parse_osd_id_list({"foo"}, &out, &cout)); + ASSERT_EQ(-EINVAL, osdmap.parse_osd_id_list({"-12"}, &out, &cout)); +} + +TEST_F(OSDMapTest, CleanPGUpmaps) { + set_up_map(); + + // build a crush rule of type host + const int expected_host_num = 3; + int osd_per_host = get_num_osds() / expected_host_num; + ASSERT_GE(2, osd_per_host); + int index = 0; + for (int i = 0; i < (int)get_num_osds(); i++) { + if (i && i % osd_per_host == 0) { + ++index; + } + stringstream osd_name; + stringstream host_name; + vector<string> move_to; + osd_name << "osd." << i; + host_name << "host-" << index; + move_to.push_back("root=default"); + string host_loc = "host=" + host_name.str(); + move_to.push_back(host_loc); + int r = crush_move(osdmap, osd_name.str(), move_to); + ASSERT_EQ(0, r); + } + const string upmap_rule = "upmap"; + int upmap_rule_no = crush_rule_create_replicated( + upmap_rule, "default", "host"); + ASSERT_LT(0, upmap_rule_no); + + // create a replicated pool which references the above rule + OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1); + new_pool_inc.new_pool_max = osdmap.get_pool_max(); + new_pool_inc.fsid = osdmap.get_fsid(); + pg_pool_t empty; + uint64_t upmap_pool_id = ++new_pool_inc.new_pool_max; + pg_pool_t *p = new_pool_inc.get_new_pool(upmap_pool_id, &empty); + p->size = 2; + p->set_pg_num(64); + p->set_pgp_num(64); + p->type = pg_pool_t::TYPE_REPLICATED; + p->crush_rule = upmap_rule_no; + p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); + new_pool_inc.new_pool_names[upmap_pool_id] = "upmap_pool"; + osdmap.apply_incremental(new_pool_inc); + + pg_t rawpg(0, upmap_pool_id); + pg_t pgid = osdmap.raw_pg_to_pg(rawpg); + vector<int> up; + int up_primary; + osdmap.pg_to_raw_up(pgid, &up, &up_primary); + ASSERT_LT(1U, up.size()); + { + // validate we won't have two OSDs from a same host + int parent_0 = osdmap.crush->get_parent_of_type(up[0], + osdmap.crush->get_type_id("host")); + int parent_1 = osdmap.crush->get_parent_of_type(up[1], + osdmap.crush->get_type_id("host")); + ASSERT_TRUE(parent_0 != parent_1); + } + + { + // cancel stale upmaps + osdmap.pg_to_raw_up(pgid, &up, &up_primary); + int from = -1; + for (int i = 0; i < (int)get_num_osds(); i++) { + if (std::find(up.begin(), up.end(), i) == up.end()) { + from = i; + break; + } + } + ASSERT_TRUE(from >= 0); + int to = -1; + for (int i = 0; i < (int)get_num_osds(); i++) { + if (std::find(up.begin(), up.end(), i) == up.end() && i != from) { + to = i; + break; + } + } + ASSERT_TRUE(to >= 0); + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + new_pg_upmap_items.push_back(make_pair(from, to)); + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap_items[pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + OSDMap nextmap; + nextmap.deepish_copy_from(osdmap); + nextmap.apply_incremental(pending_inc); + ASSERT_TRUE(nextmap.have_pg_upmaps(pgid)); + OSDMap::Incremental new_pending_inc(nextmap.get_epoch() + 1); + clean_pg_upmaps(g_ceph_context, nextmap, new_pending_inc); + nextmap.apply_incremental(new_pending_inc); + ASSERT_TRUE(!nextmap.have_pg_upmaps(pgid)); + } + + { + // https://tracker.ceph.com/issues/37493 + pg_t ec_pg(0, my_ec_pool); + pg_t ec_pgid = osdmap.raw_pg_to_pg(ec_pg); + OSDMap tmpmap; // use a tmpmap here, so we do not dirty origin map.. + int from = -1; + int to = -1; + { + // insert a valid pg_upmap_item + vector<int> ec_up; + int ec_up_primary; + osdmap.pg_to_raw_up(ec_pgid, &ec_up, &ec_up_primary); + ASSERT_TRUE(!ec_up.empty()); + from = *(ec_up.begin()); + ASSERT_TRUE(from >= 0); + for (int i = 0; i < (int)get_num_osds(); i++) { + if (std::find(ec_up.begin(), ec_up.end(), i) == ec_up.end()) { + to = i; + break; + } + } + ASSERT_TRUE(to >= 0); + ASSERT_TRUE(from != to); + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + new_pg_upmap_items.push_back(make_pair(from, to)); + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap_items[ec_pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + tmpmap.deepish_copy_from(osdmap); + tmpmap.apply_incremental(pending_inc); + ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid)); + } + { + // mark one of the target OSDs of the above pg_upmap_item as down + OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1); + pending_inc.new_state[to] = CEPH_OSD_UP; + tmpmap.apply_incremental(pending_inc); + ASSERT_TRUE(!tmpmap.is_up(to)); + ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid)); + } + { + // confirm *clean_pg_upmaps* won't do anything bad + OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1); + clean_pg_upmaps(g_ceph_context, tmpmap, pending_inc); + tmpmap.apply_incremental(pending_inc); + ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid)); + } + } + + { + // http://tracker.ceph.com/issues/37501 + pg_t ec_pg(0, my_ec_pool); + pg_t ec_pgid = osdmap.raw_pg_to_pg(ec_pg); + OSDMap tmpmap; // use a tmpmap here, so we do not dirty origin map.. + int from = -1; + int to = -1; + { + // insert a valid pg_upmap_item + vector<int> ec_up; + int ec_up_primary; + osdmap.pg_to_raw_up(ec_pgid, &ec_up, &ec_up_primary); + ASSERT_TRUE(!ec_up.empty()); + from = *(ec_up.begin()); + ASSERT_TRUE(from >= 0); + for (int i = 0; i < (int)get_num_osds(); i++) { + if (std::find(ec_up.begin(), ec_up.end(), i) == ec_up.end()) { + to = i; + break; + } + } + ASSERT_TRUE(to >= 0); + ASSERT_TRUE(from != to); + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + new_pg_upmap_items.push_back(make_pair(from, to)); + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap_items[ec_pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + tmpmap.deepish_copy_from(osdmap); + tmpmap.apply_incremental(pending_inc); + ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid)); + } + { + // mark one of the target OSDs of the above pg_upmap_item as out + OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1); + pending_inc.new_weight[to] = CEPH_OSD_OUT; + tmpmap.apply_incremental(pending_inc); + ASSERT_TRUE(tmpmap.is_out(to)); + ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid)); + } + { + // *clean_pg_upmaps* should be able to remove the above *bad* mapping + OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1); + clean_pg_upmaps(g_ceph_context, tmpmap, pending_inc); + tmpmap.apply_incremental(pending_inc); + ASSERT_TRUE(!tmpmap.have_pg_upmaps(ec_pgid)); + } + } + + { + // http://tracker.ceph.com/issues/37968 + + // build a temporary crush topology of 2 hosts, 3 osds per host + OSDMap tmp; // use a tmpmap here, so we do not dirty origin map.. + tmp.deepish_copy_from(osdmap); + const int expected_host_num = 2; + int osd_per_host = get_num_osds() / expected_host_num; + ASSERT_GE(osd_per_host, 3); + int index = 0; + for (int i = 0; i < (int)get_num_osds(); i++) { + if (i && i % osd_per_host == 0) { + ++index; + } + stringstream osd_name; + stringstream host_name; + vector<string> move_to; + osd_name << "osd." << i; + host_name << "host-" << index; + move_to.push_back("root=default"); + string host_loc = "host=" + host_name.str(); + move_to.push_back(host_loc); + auto r = crush_move(tmp, osd_name.str(), move_to); + ASSERT_EQ(0, r); + } + + // build crush rule + CrushWrapper crush; + get_crush(tmp, crush); + string rule_name = "rule_37968"; + int rule_type = pg_pool_t::TYPE_ERASURE; + ASSERT_TRUE(!crush.rule_exists(rule_name)); + int rno; + for (rno = 0; rno < crush.get_max_rules(); rno++) { + if (!crush.rule_exists(rno)) + break; + } + string root_name = "default"; + int root = crush.get_item_id(root_name); + int steps = 6; + crush_rule *rule = crush_make_rule(steps, rule_type); + int step = 0; + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSE_INDEP, 2, 1 /* host*/); + crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSE_INDEP, 2, 0 /* osd */); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + ASSERT_TRUE(step == steps); + auto r = crush_add_rule(crush.get_crush_map(), rule, rno); + ASSERT_TRUE(r >= 0); + crush.set_rule_name(rno, rule_name); + { + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + pending_inc.crush.clear(); + crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); + tmp.apply_incremental(pending_inc); + } + + // create a erasuce-coded pool referencing the above rule + int64_t pool_37968; + { + OSDMap::Incremental new_pool_inc(tmp.get_epoch() + 1); + new_pool_inc.new_pool_max = tmp.get_pool_max(); + new_pool_inc.fsid = tmp.get_fsid(); + pg_pool_t empty; + pool_37968 = ++new_pool_inc.new_pool_max; + pg_pool_t *p = new_pool_inc.get_new_pool(pool_37968, &empty); + p->size = 4; + p->set_pg_num(8); + p->set_pgp_num(8); + p->type = pg_pool_t::TYPE_ERASURE; + p->crush_rule = rno; + p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); + new_pool_inc.new_pool_names[pool_37968] = "pool_37968"; + tmp.apply_incremental(new_pool_inc); + } + + pg_t ec_pg(0, pool_37968); + pg_t ec_pgid = tmp.raw_pg_to_pg(ec_pg); + int from = -1; + int to = -1; + { + // insert a valid pg_upmap_item + vector<int> ec_up; + int ec_up_primary; + tmp.pg_to_raw_up(ec_pgid, &ec_up, &ec_up_primary); + ASSERT_TRUE(ec_up.size() == 4); + from = *(ec_up.begin()); + ASSERT_TRUE(from >= 0); + auto parent = tmp.crush->get_parent_of_type(from, 1 /* host */, rno); + ASSERT_TRUE(parent < 0); + // pick an osd of the same parent with *from* + for (int i = 0; i < (int)get_num_osds(); i++) { + if (std::find(ec_up.begin(), ec_up.end(), i) == ec_up.end()) { + auto p = tmp.crush->get_parent_of_type(i, 1 /* host */, rno); + if (p == parent) { + to = i; + break; + } + } + } + ASSERT_TRUE(to >= 0); + ASSERT_TRUE(from != to); + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + new_pg_upmap_items.push_back(make_pair(from, to)); + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + pending_inc.new_pg_upmap_items[ec_pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + tmp.apply_incremental(pending_inc); + ASSERT_TRUE(tmp.have_pg_upmaps(ec_pgid)); + } + { + // *clean_pg_upmaps* should not remove the above upmap_item + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + clean_pg_upmaps(g_ceph_context, tmp, pending_inc); + tmp.apply_incremental(pending_inc); + ASSERT_TRUE(tmp.have_pg_upmaps(ec_pgid)); + } + } + + { + // TEST pg_upmap + { + // STEP-1: enumerate all children of up[0]'s parent, + // replace up[1] with one of them (other than up[0]) + int parent = osdmap.crush->get_parent_of_type(up[0], + osdmap.crush->get_type_id("host")); + set<int> candidates; + osdmap.crush->get_leaves(osdmap.crush->get_item_name(parent), &candidates); + ASSERT_LT(1U, candidates.size()); + int replaced_by = -1; + for (auto c: candidates) { + if (c != up[0]) { + replaced_by = c; + break; + } + } + { + // Check we can handle a negative pg_upmap value + vector<int32_t> new_pg_upmap; + new_pg_upmap.push_back(up[0]); + new_pg_upmap.push_back(-823648512); + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>( + new_pg_upmap.begin(), new_pg_upmap.end()); + osdmap.apply_incremental(pending_inc); + vector<int> new_up; + int new_up_primary; + // crucial call - _apply_upmap should ignore the negative value + osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); + } + ASSERT_NE(-1, replaced_by); + // generate a new pg_upmap item and apply + vector<int32_t> new_pg_upmap; + new_pg_upmap.push_back(up[0]); + new_pg_upmap.push_back(replaced_by); // up[1] -> replaced_by + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>( + new_pg_upmap.begin(), new_pg_upmap.end()); + osdmap.apply_incremental(pending_inc); + { + // validate pg_upmap is there + vector<int> new_up; + int new_up_primary; + osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); + ASSERT_EQ(new_up.size(), up.size()); + ASSERT_EQ(new_up[0], new_pg_upmap[0]); + ASSERT_EQ(new_up[1], new_pg_upmap[1]); + // and we shall have two OSDs from a same host now.. + int parent_0 = osdmap.crush->get_parent_of_type(new_up[0], + osdmap.crush->get_type_id("host")); + int parent_1 = osdmap.crush->get_parent_of_type(new_up[1], + osdmap.crush->get_type_id("host")); + ASSERT_EQ(parent_0, parent_1); + } + } + { + // STEP-2: apply cure + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + clean_pg_upmaps(g_ceph_context, osdmap, pending_inc); + osdmap.apply_incremental(pending_inc); + { + // validate pg_upmap is gone (reverted) + vector<int> new_up; + int new_up_primary; + osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); + ASSERT_EQ(new_up, up); + ASSERT_EQ(new_up_primary, up_primary); + } + } + } + + { + // TEST pg_upmap_items + // enumerate all used hosts first + set<int> parents; + for (auto u: up) { + int parent = osdmap.crush->get_parent_of_type(u, + osdmap.crush->get_type_id("host")); + ASSERT_GT(0, parent); + parents.insert(parent); + } + int candidate_parent = 0; + set<int> candidate_children; + vector<int> up_after_out; + { + // STEP-1: try mark out up[1] and all other OSDs from the same host + int parent = osdmap.crush->get_parent_of_type(up[1], + osdmap.crush->get_type_id("host")); + set<int> children; + osdmap.crush->get_leaves(osdmap.crush->get_item_name(parent), + &children); + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + for (auto c: children) { + pending_inc.new_weight[c] = CEPH_OSD_OUT; + } + OSDMap tmpmap; + tmpmap.deepish_copy_from(osdmap); + tmpmap.apply_incremental(pending_inc); + vector<int> new_up; + int new_up_primary; + tmpmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); + // verify that we'll have OSDs from a different host.. + int will_choose = -1; + for (auto o: new_up) { + int parent = tmpmap.crush->get_parent_of_type(o, + osdmap.crush->get_type_id("host")); + if (!parents.count(parent)) { + will_choose = o; + candidate_parent = parent; // record + break; + } + } + ASSERT_LT(-1, will_choose); // it is an OSD! + ASSERT_NE(candidate_parent, 0); + osdmap.crush->get_leaves(osdmap.crush->get_item_name(candidate_parent), + &candidate_children); + ASSERT_TRUE(candidate_children.count(will_choose)); + candidate_children.erase(will_choose); + ASSERT_FALSE(candidate_children.empty()); + up_after_out = new_up; // needed for verification.. + } + { + // Make sure we can handle a negative pg_upmap_item + int victim = up[0]; + int replaced_by = -823648512; + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + new_pg_upmap_items.push_back(make_pair(victim, replaced_by)); + // apply + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap_items[pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + osdmap.apply_incremental(pending_inc); + vector<int> new_up; + int new_up_primary; + // crucial call - _apply_upmap should ignore the negative value + osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); + } + { + // STEP-2: generating a new pg_upmap_items entry by + // replacing up[0] with one coming from candidate_children + int victim = up[0]; + int replaced_by = *candidate_children.begin(); + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + new_pg_upmap_items.push_back(make_pair(victim, replaced_by)); + // apply + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap_items[pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + osdmap.apply_incremental(pending_inc); + { + // validate pg_upmap_items is there + vector<int> new_up; + int new_up_primary; + osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); + ASSERT_EQ(new_up.size(), up.size()); + ASSERT_TRUE(std::find(new_up.begin(), new_up.end(), replaced_by) != + new_up.end()); + // and up[1] too + ASSERT_TRUE(std::find(new_up.begin(), new_up.end(), up[1]) != + new_up.end()); + } + } + { + // STEP-3: mark out up[1] and all other OSDs from the same host + int parent = osdmap.crush->get_parent_of_type(up[1], + osdmap.crush->get_type_id("host")); + set<int> children; + osdmap.crush->get_leaves(osdmap.crush->get_item_name(parent), + &children); + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + for (auto c: children) { + pending_inc.new_weight[c] = CEPH_OSD_OUT; + } + osdmap.apply_incremental(pending_inc); + { + // validate we have two OSDs from the same host now.. + vector<int> new_up; + int new_up_primary; + osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); + ASSERT_EQ(up.size(), new_up.size()); + int parent_0 = osdmap.crush->get_parent_of_type(new_up[0], + osdmap.crush->get_type_id("host")); + int parent_1 = osdmap.crush->get_parent_of_type(new_up[1], + osdmap.crush->get_type_id("host")); + ASSERT_EQ(parent_0, parent_1); + } + } + { + // STEP-4: apply cure + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + clean_pg_upmaps(g_ceph_context, osdmap, pending_inc); + osdmap.apply_incremental(pending_inc); + { + // validate pg_upmap_items is gone (reverted) + vector<int> new_up; + int new_up_primary; + osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); + ASSERT_EQ(new_up, up_after_out); + } + } + } +} + +TEST_F(OSDMapTest, BUG_38897) { + // http://tracker.ceph.com/issues/38897 + // build a fresh map with 12 OSDs, without any default pools + set_up_map(12, true); + const string pool_1("pool1"); + const string pool_2("pool2"); + int64_t pool_1_id = -1; + + { + // build customized crush rule for "pool1" + string host_name = "host_for_pool_1"; + // build a customized host to capture osd.1~5 + for (int i = 1; i < 5; i++) { + stringstream osd_name; + vector<string> move_to; + osd_name << "osd." << i; + move_to.push_back("root=default"); + string host_loc = "host=" + host_name; + move_to.push_back(host_loc); + auto r = crush_move(osdmap, osd_name.str(), move_to); + ASSERT_EQ(0, r); + } + CrushWrapper crush; + get_crush(osdmap, crush); + auto host_id = crush.get_item_id(host_name); + ASSERT_TRUE(host_id < 0); + string rule_name = "rule_for_pool1"; + int rule_type = pg_pool_t::TYPE_REPLICATED; + ASSERT_TRUE(!crush.rule_exists(rule_name)); + int rno; + for (rno = 0; rno < crush.get_max_rules(); rno++) { + if (!crush.rule_exists(rno)) + break; + } + int steps = 7; + crush_rule *rule = crush_make_rule(steps, rule_type); + int step = 0; + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); + // always choose osd.0 + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + // then pick any other random osds + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, host_id, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, 2, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + ASSERT_TRUE(step == steps); + auto r = crush_add_rule(crush.get_crush_map(), rule, rno); + ASSERT_TRUE(r >= 0); + crush.set_rule_name(rno, rule_name); + { + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.crush.clear(); + crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); + osdmap.apply_incremental(pending_inc); + } + + // create "pool1" + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pool_max = osdmap.get_pool_max(); + auto pool_id = ++pending_inc.new_pool_max; + pool_1_id = pool_id; + pg_pool_t empty; + auto p = pending_inc.get_new_pool(pool_id, &empty); + p->size = 3; + p->min_size = 1; + p->set_pg_num(3); + p->set_pgp_num(3); + p->type = pg_pool_t::TYPE_REPLICATED; + p->crush_rule = rno; + p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); + pending_inc.new_pool_names[pool_id] = pool_1; + osdmap.apply_incremental(pending_inc); + ASSERT_TRUE(osdmap.have_pg_pool(pool_id)); + ASSERT_TRUE(osdmap.get_pool_name(pool_id) == pool_1); + { + for (unsigned i = 0; i < 3; i++) { + // 1.x -> [1] + pg_t rawpg(i, pool_id); + pg_t pgid = osdmap.raw_pg_to_pg(rawpg); + vector<int> up; + int up_primary; + osdmap.pg_to_raw_up(pgid, &up, &up_primary); + ASSERT_TRUE(up.size() == 3); + ASSERT_TRUE(up[0] == 0); + + // insert a new pg_upmap + vector<int32_t> new_up; + // and remap 1.x to osd.1 only + // this way osd.0 is deemed to be *underfull* + // and osd.1 is deemed to be *overfull* + new_up.push_back(1); + { + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>( + new_up.begin(), new_up.end()); + osdmap.apply_incremental(pending_inc); + } + osdmap.pg_to_raw_up(pgid, &up, &up_primary); + ASSERT_TRUE(up.size() == 1); + ASSERT_TRUE(up[0] == 1); + } + } + } + + { + // build customized crush rule for "pool2" + string host_name = "host_for_pool_2"; + // build a customized host to capture osd.6~11 + for (int i = 6; i < (int)get_num_osds(); i++) { + stringstream osd_name; + vector<string> move_to; + osd_name << "osd." << i; + move_to.push_back("root=default"); + string host_loc = "host=" + host_name; + move_to.push_back(host_loc); + auto r = crush_move(osdmap, osd_name.str(), move_to); + ASSERT_EQ(0, r); + } + CrushWrapper crush; + get_crush(osdmap, crush); + auto host_id = crush.get_item_id(host_name); + ASSERT_TRUE(host_id < 0); + string rule_name = "rule_for_pool2"; + int rule_type = pg_pool_t::TYPE_REPLICATED; + ASSERT_TRUE(!crush.rule_exists(rule_name)); + int rno; + for (rno = 0; rno < crush.get_max_rules(); rno++) { + if (!crush.rule_exists(rno)) + break; + } + int steps = 7; + crush_rule *rule = crush_make_rule(steps, rule_type); + int step = 0; + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); + // always choose osd.0 + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + // then pick any other random osds + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, host_id, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, 2, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + ASSERT_TRUE(step == steps); + auto r = crush_add_rule(crush.get_crush_map(), rule, rno); + ASSERT_TRUE(r >= 0); + crush.set_rule_name(rno, rule_name); + { + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.crush.clear(); + crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); + osdmap.apply_incremental(pending_inc); + } + + // create "pool2" + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pool_max = osdmap.get_pool_max(); + auto pool_id = ++pending_inc.new_pool_max; + pg_pool_t empty; + auto p = pending_inc.get_new_pool(pool_id, &empty); + p->size = 3; + // include a single PG + p->set_pg_num(1); + p->set_pgp_num(1); + p->type = pg_pool_t::TYPE_REPLICATED; + p->crush_rule = rno; + p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); + pending_inc.new_pool_names[pool_id] = pool_2; + osdmap.apply_incremental(pending_inc); + ASSERT_TRUE(osdmap.have_pg_pool(pool_id)); + ASSERT_TRUE(osdmap.get_pool_name(pool_id) == pool_2); + pg_t rawpg(0, pool_id); + pg_t pgid = osdmap.raw_pg_to_pg(rawpg); + EXPECT_TRUE(!osdmap.have_pg_upmaps(pgid)); + vector<int> up; + int up_primary; + osdmap.pg_to_raw_up(pgid, &up, &up_primary); + ASSERT_TRUE(up.size() == 3); + ASSERT_TRUE(up[0] == 0); + + { + // build a pg_upmap_item that will + // remap pg out from *underfull* osd.0 + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + new_pg_upmap_items.push_back(make_pair(0, 10)); // osd.0 -> osd.10 + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap_items[pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + osdmap.apply_incremental(pending_inc); + ASSERT_TRUE(osdmap.have_pg_upmaps(pgid)); + vector<int> up; + int up_primary; + osdmap.pg_to_raw_up(pgid, &up, &up_primary); + ASSERT_TRUE(up.size() == 3); + ASSERT_TRUE(up[0] == 10); + } + } + + // ready to go + { + set<int64_t> only_pools; + ASSERT_TRUE(pool_1_id >= 0); + only_pools.insert(pool_1_id); + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + // require perfect distribution! (max deviation 0) + osdmap.calc_pg_upmaps(g_ceph_context, + 0, // so we can force optimizing + 100, + only_pools, + &pending_inc); + osdmap.apply_incremental(pending_inc); + } +} + +TEST_F(OSDMapTest, BUG_40104) { + // http://tracker.ceph.com/issues/40104 + int big_osd_num = 5000; + int big_pg_num = 10000; + set_up_map(big_osd_num, true); + int pool_id; + { + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pool_max = osdmap.get_pool_max(); + pool_id = ++pending_inc.new_pool_max; + pg_pool_t empty; + auto p = pending_inc.get_new_pool(pool_id, &empty); + p->size = 3; + p->min_size = 1; + p->set_pg_num(big_pg_num); + p->set_pgp_num(big_pg_num); + p->type = pg_pool_t::TYPE_REPLICATED; + p->crush_rule = 0; + p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); + pending_inc.new_pool_names[pool_id] = "big_pool"; + osdmap.apply_incremental(pending_inc); + ASSERT_TRUE(osdmap.have_pg_pool(pool_id)); + ASSERT_TRUE(osdmap.get_pool_name(pool_id) == "big_pool"); + } + { + // generate pg_upmap_items for each pg + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + for (int i = 0; i < big_pg_num; i++) { + pg_t rawpg(i, pool_id); + pg_t pgid = osdmap.raw_pg_to_pg(rawpg); + vector<int> up; + int up_primary; + osdmap.pg_to_raw_up(pgid, &up, &up_primary); + ASSERT_TRUE(up.size() == 3); + int victim = up[0]; + int replaced_by = random() % big_osd_num; + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + // note that it might or might not be valid, we don't care + new_pg_upmap_items.push_back(make_pair(victim, replaced_by)); + pending_inc.new_pg_upmap_items[pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + } + osdmap.apply_incremental(pending_inc); + } + { + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + auto start = mono_clock::now(); + clean_pg_upmaps(g_ceph_context, osdmap, pending_inc); + auto latency = mono_clock::now() - start; + std::cout << "clean_pg_upmaps (~" << big_pg_num + << " pg_upmap_items) latency:" << timespan_str(latency) + << std::endl; + } +} + +TEST_F(OSDMapTest, BUG_42052) { + // https://tracker.ceph.com/issues/42052 + set_up_map(6, true); + const string pool_name("pool"); + // build customized crush rule for "pool" + CrushWrapper crush; + get_crush(osdmap, crush); + string rule_name = "rule"; + int rule_type = pg_pool_t::TYPE_REPLICATED; + ASSERT_TRUE(!crush.rule_exists(rule_name)); + int rno; + for (rno = 0; rno < crush.get_max_rules(); rno++) { + if (!crush.rule_exists(rno)) + break; + } + int steps = 8; + crush_rule *rule = crush_make_rule(steps, rule_type); + int step = 0; + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); + // always choose osd.0, osd.1, osd.2 + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 1); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 2); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + ASSERT_TRUE(step == steps); + auto r = crush_add_rule(crush.get_crush_map(), rule, rno); + ASSERT_TRUE(r >= 0); + crush.set_rule_name(rno, rule_name); + { + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.crush.clear(); + crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); + osdmap.apply_incremental(pending_inc); + } + + // create "pool" + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pool_max = osdmap.get_pool_max(); + auto pool_id = ++pending_inc.new_pool_max; + pg_pool_t empty; + auto p = pending_inc.get_new_pool(pool_id, &empty); + p->size = 3; + p->min_size = 1; + p->set_pg_num(1); + p->set_pgp_num(1); + p->type = pg_pool_t::TYPE_REPLICATED; + p->crush_rule = rno; + p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); + pending_inc.new_pool_names[pool_id] = pool_name; + osdmap.apply_incremental(pending_inc); + ASSERT_TRUE(osdmap.have_pg_pool(pool_id)); + ASSERT_TRUE(osdmap.get_pool_name(pool_id) == pool_name); + pg_t rawpg(0, pool_id); + pg_t pgid = osdmap.raw_pg_to_pg(rawpg); + { + // pg_upmap 1.0 [2,3,5] + vector<int32_t> new_up{2,3,5}; + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>( + new_up.begin(), new_up.end()); + osdmap.apply_incremental(pending_inc); + } + { + // pg_upmap_items 1.0 [0,3,4,5] + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + new_pg_upmap_items.push_back(make_pair(0, 3)); + new_pg_upmap_items.push_back(make_pair(4, 5)); + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap_items[pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + osdmap.apply_incremental(pending_inc); + } + { + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + clean_pg_upmaps(g_ceph_context, osdmap, pending_inc); + osdmap.apply_incremental(pending_inc); + ASSERT_FALSE(osdmap.have_pg_upmaps(pgid)); + } +} + +TEST_F(OSDMapTest, BUG_42485) { + set_up_map(60); + { + // build a temporary crush topology of 2datacenters, 3racks per dc, + // 1host per rack, 10osds per host + OSDMap tmp; // use a tmpmap here, so we do not dirty origin map.. + tmp.deepish_copy_from(osdmap); + const int expected_host_num = 6; + int osd_per_host = (int)get_num_osds() / expected_host_num; + ASSERT_GE(osd_per_host, 10); + int host_per_dc = 3; + int index = 0; + int dc_index = 0; + for (int i = 0; i < (int)get_num_osds(); i++) { + if (i && i % osd_per_host == 0) { + ++index; + } + if (i && i % (host_per_dc * osd_per_host) == 0) { + ++dc_index; + } + stringstream osd_name; + stringstream host_name; + stringstream rack_name; + stringstream dc_name; + vector<string> move_to; + osd_name << "osd." << i; + host_name << "host-" << index; + rack_name << "rack-" << index; + dc_name << "dc-" << dc_index; + move_to.push_back("root=default"); + string dc_loc = "datacenter=" + dc_name.str(); + move_to.push_back(dc_loc); + string rack_loc = "rack=" + rack_name.str(); + move_to.push_back(rack_loc); + string host_loc = "host=" + host_name.str(); + move_to.push_back(host_loc); + auto r = crush_move(tmp, osd_name.str(), move_to); + ASSERT_EQ(0, r); + } + + // build crush rule + CrushWrapper crush; + get_crush(tmp, crush); + string rule_name = "rule_xeus_993_1"; + int rule_type = pg_pool_t::TYPE_REPLICATED; + ASSERT_TRUE(!crush.rule_exists(rule_name)); + int rno; + for (rno = 0; rno < crush.get_max_rules(); rno++) { + if (!crush.rule_exists(rno)) + break; + } + string root_name = "default"; + string dc_1 = "dc-0"; + int dc1 = crush.get_item_id(dc_1); + string dc_2 = "dc-1"; + int dc2 = crush.get_item_id(dc_2); + int steps = 8; + crush_rule *rule = crush_make_rule(steps, rule_type); + int step = 0; + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, dc1, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, 2, 3 /* rack */); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, dc2, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, 2, 3 /* rack */); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + ASSERT_TRUE(step == steps); + auto r = crush_add_rule(crush.get_crush_map(), rule, rno); + ASSERT_TRUE(r >= 0); + crush.set_rule_name(rno, rule_name); + { + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + pending_inc.crush.clear(); + crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); + tmp.apply_incremental(pending_inc); + } + // create a repliacted pool referencing the above rule + int64_t pool_xeus_993; + { + OSDMap::Incremental new_pool_inc(tmp.get_epoch() + 1); + new_pool_inc.new_pool_max = tmp.get_pool_max(); + new_pool_inc.fsid = tmp.get_fsid(); + pg_pool_t empty; + pool_xeus_993 = ++new_pool_inc.new_pool_max; + pg_pool_t *p = new_pool_inc.get_new_pool(pool_xeus_993, &empty); + p->size = 4; + p->set_pg_num(4096); + p->set_pgp_num(4096); + p->type = pg_pool_t::TYPE_REPLICATED; + p->crush_rule = rno; + p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); + new_pool_inc.new_pool_names[pool_xeus_993] = "pool_xeus_993"; + tmp.apply_incremental(new_pool_inc); + } + + pg_t rep_pg(0, pool_xeus_993); + pg_t rep_pgid = tmp.raw_pg_to_pg(rep_pg); + { + int from = -1; + int to = -1; + vector<int> rep_up; + int rep_up_primary; + tmp.pg_to_raw_up(rep_pgid, &rep_up, &rep_up_primary); + std::cout << "pgid " << rep_up << " up " << rep_up << std::endl; + ASSERT_TRUE(rep_up.size() == 4); + from = *(rep_up.begin()); + ASSERT_TRUE(from >= 0); + auto dc_parent = tmp.crush->get_parent_of_type(from, 8 /* dc */, rno); + if (dc_parent == dc1) + dc_parent = dc2; + else + dc_parent = dc1; + auto rack_parent = tmp.crush->get_parent_of_type(from, 3 /* rack */, rno); + ASSERT_TRUE(dc_parent < 0); + ASSERT_TRUE(rack_parent < 0); + set<int> rack_parents; + for (auto &i: rep_up) { + if (i == from) continue; + auto rack_parent = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno); + rack_parents.insert(rack_parent); + } + for (int i = 0; i < (int)get_num_osds(); i++) { + if (std::find(rep_up.begin(), rep_up.end(), i) == rep_up.end()) { + auto dc_p = tmp.crush->get_parent_of_type(i, 8 /* dc */, rno); + auto rack_p = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno); + if (dc_p == dc_parent && + rack_parents.find(rack_p) == rack_parents.end()) { + to = i; + break; + } + } + } + ASSERT_TRUE(to >= 0); + ASSERT_TRUE(from != to); + std::cout << "from " << from << " to " << to << std::endl; + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + new_pg_upmap_items.push_back(make_pair(from, to)); + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + pending_inc.new_pg_upmap_items[rep_pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + tmp.apply_incremental(pending_inc); + ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid)); + } + pg_t rep_pg2(2, pool_xeus_993); + pg_t rep_pgid2 = tmp.raw_pg_to_pg(rep_pg2); + { + pg_t rep_pgid = rep_pgid2; + vector<int> from_osds{-1, -1}; + vector<int> rep_up; + int rep_up_primary; + tmp.pg_to_raw_up(rep_pgid, &rep_up, &rep_up_primary); + ASSERT_TRUE(rep_up.size() == 4); + from_osds[0] = *(rep_up.begin()); + from_osds[1] = *(rep_up.rbegin()); + std::cout << "pgid " << rep_pgid2 << " up " << rep_up << std::endl; + ASSERT_TRUE(*(from_osds.begin()) >= 0); + ASSERT_TRUE(*(from_osds.rbegin()) >= 0); + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + for (auto &from: from_osds) { + int to = -1; + auto dc_parent = tmp.crush->get_parent_of_type(from, 8 /* dc */, rno); + if (dc_parent == dc1) + dc_parent = dc2; + else + dc_parent = dc1; + auto rack_parent = tmp.crush->get_parent_of_type(from, 3 /* rack */, rno); + ASSERT_TRUE(dc_parent < 0); + ASSERT_TRUE(rack_parent < 0); + set<int> rack_parents; + for (auto &i: rep_up) { + if (i == from) continue; + auto rack_parent = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno); + rack_parents.insert(rack_parent); + } + for (auto &i: new_pg_upmap_items) { + auto rack_from = tmp.crush->get_parent_of_type(i.first, 3, rno); + auto rack_to = tmp.crush->get_parent_of_type(i.second, 3, rno); + rack_parents.insert(rack_from); + rack_parents.insert(rack_to); + } + for (int i = 0; i < (int)get_num_osds(); i++) { + if (std::find(rep_up.begin(), rep_up.end(), i) == rep_up.end()) { + auto dc_p = tmp.crush->get_parent_of_type(i, 8 /* dc */, rno); + auto rack_p = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno); + if (dc_p == dc_parent && + rack_parents.find(rack_p) == rack_parents.end()) { + to = i; + break; + } + } + } + ASSERT_TRUE(to >= 0); + ASSERT_TRUE(from != to); + std::cout << "from " << from << " to " << to << std::endl; + new_pg_upmap_items.push_back(make_pair(from, to)); + } + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + pending_inc.new_pg_upmap_items[rep_pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + tmp.apply_incremental(pending_inc); + ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid)); + } + { + // *maybe_remove_pg_upmaps* should remove the above upmap_item + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + clean_pg_upmaps(g_ceph_context, tmp, pending_inc); + tmp.apply_incremental(pending_inc); + ASSERT_FALSE(tmp.have_pg_upmaps(rep_pgid)); + ASSERT_FALSE(tmp.have_pg_upmaps(rep_pgid2)); + } + } +} + +TEST(PGTempMap, basic) +{ + PGTempMap m; + pg_t a(1,1); + for (auto i=3; i<1000; ++i) { + pg_t x(i, 1); + m.set(x, {static_cast<int>(i)}); + } + pg_t b(2,1); + m.set(a, {1, 2}); + ASSERT_NE(m.find(a), m.end()); + ASSERT_EQ(m.find(a), m.begin()); + ASSERT_EQ(m.find(b), m.end()); + ASSERT_EQ(998u, m.size()); +} + +TEST_F(OSDMapTest, BUG_43124) { + set_up_map(200); + { + // https://tracker.ceph.com/issues/43124 + + // build a temporary crush topology of 5racks, + // 4 hosts per rack, 10osds per host + OSDMap tmp; // use a tmpmap here, so we do not dirty origin map.. + tmp.deepish_copy_from(osdmap); + const int expected_host_num = 20; + int osd_per_host = (int)get_num_osds() / expected_host_num; + ASSERT_GE(osd_per_host, 10); + int host_per_rack = 4; + int index = 0; + int rack_index = 0; + for (int i = 0; i < (int)get_num_osds(); i++) { + if (i && i % osd_per_host == 0) { + ++index; + } + if (i && i % (host_per_rack * osd_per_host) == 0) { + ++rack_index; + } + stringstream osd_name; + stringstream host_name; + stringstream rack_name; + vector<string> move_to; + osd_name << "osd." << i; + host_name << "host-" << index; + rack_name << "rack-" << rack_index; + move_to.push_back("root=default"); + string rack_loc = "rack=" + rack_name.str(); + move_to.push_back(rack_loc); + string host_loc = "host=" + host_name.str(); + move_to.push_back(host_loc); + auto r = crush_move(tmp, osd_name.str(), move_to); + ASSERT_EQ(0, r); + } + + // build crush rule + CrushWrapper crush; + get_crush(tmp, crush); + string rule_name = "rule_angel_1944"; + int rule_type = pg_pool_t::TYPE_ERASURE; + ASSERT_TRUE(!crush.rule_exists(rule_name)); + int rno; + for (rno = 0; rno < crush.get_max_rules(); rno++) { + if (!crush.rule_exists(rno)) + break; + } + int steps = 6; + string root_name = "default"; + int root = crush.get_item_id(root_name); + crush_rule *rule = crush_make_rule(steps, rule_type); + int step = 0; + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSE_FIRSTN, 4, 3 /* rack */); + crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_INDEP, 3, 1 /* host */); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + ASSERT_TRUE(step == steps); + auto r = crush_add_rule(crush.get_crush_map(), rule, rno); + ASSERT_TRUE(r >= 0); + crush.set_rule_name(rno, rule_name); + { + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + pending_inc.crush.clear(); + crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); + tmp.apply_incremental(pending_inc); + } + { + stringstream oss; + crush.dump_tree(&oss, NULL); + std::cout << oss.str() << std::endl; + Formatter *f = Formatter::create("json-pretty"); + f->open_object_section("crush_rules"); + crush.dump_rules(f); + f->close_section(); + f->flush(cout); + delete f; + } + // create a erasuce-coded pool referencing the above rule + int64_t pool_angel_1944; + { + OSDMap::Incremental new_pool_inc(tmp.get_epoch() + 1); + new_pool_inc.new_pool_max = tmp.get_pool_max(); + new_pool_inc.fsid = tmp.get_fsid(); + pg_pool_t empty; + pool_angel_1944 = ++new_pool_inc.new_pool_max; + pg_pool_t *p = new_pool_inc.get_new_pool(pool_angel_1944, &empty); + p->size = 12; + p->set_pg_num(4096); + p->set_pgp_num(4096); + p->type = pg_pool_t::TYPE_ERASURE; + p->crush_rule = rno; + p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); + new_pool_inc.new_pool_names[pool_angel_1944] = "pool_angel_1944"; + tmp.apply_incremental(new_pool_inc); + } + + pg_t rep_pg(0, pool_angel_1944); + pg_t rep_pgid = tmp.raw_pg_to_pg(rep_pg); + { + // insert a pg_upmap_item + int from = -1; + int to = -1; + vector<int> rep_up; + int rep_up_primary; + tmp.pg_to_raw_up(rep_pgid, &rep_up, &rep_up_primary); + std::cout << "pgid " << rep_pgid << " up " << rep_up << std::endl; + ASSERT_TRUE(rep_up.size() == 12); + from = *(rep_up.begin()); + ASSERT_TRUE(from >= 0); + auto from_rack = tmp.crush->get_parent_of_type(from, 3 /* rack */, rno); + set<int> failure_domains; + for (auto &osd : rep_up) { + failure_domains.insert(tmp.crush->get_parent_of_type(osd, 1 /* host */, rno)); + } + for (int i = 0; i < (int)get_num_osds(); i++) { + if (std::find(rep_up.begin(), rep_up.end(), i) == rep_up.end()) { + auto to_rack = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno); + auto to_host = tmp.crush->get_parent_of_type(i, 1 /* host */, rno); + if (to_rack != from_rack && failure_domains.count(to_host) == 0) { + to = i; + break; + } + } + } + ASSERT_TRUE(to >= 0); + ASSERT_TRUE(from != to); + std::cout << "from " << from << " to " << to << std::endl; + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + new_pg_upmap_items.push_back(make_pair(from, to)); + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + pending_inc.new_pg_upmap_items[rep_pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + tmp.apply_incremental(pending_inc); + ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid)); + } + { + // *maybe_remove_pg_upmaps* should not remove the above upmap_item + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + clean_pg_upmaps(g_ceph_context, tmp, pending_inc); + tmp.apply_incremental(pending_inc); + ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid)); + } + } +} + +TEST_F(OSDMapTest, BUG_48884) +{ + + set_up_map(12); + + unsigned int host_index = 1; + for (unsigned int x=0; x < get_num_osds();) { + // Create three hosts with four osds each + for (unsigned int y=0; y < 4; y++) { + stringstream osd_name; + stringstream host_name; + vector<string> move_to; + osd_name << "osd." << x; + host_name << "host-" << host_index; + move_to.push_back("root=default"); + move_to.push_back("rack=localrack"); + string host_loc = "host=" + host_name.str(); + move_to.push_back(host_loc); + int r = crush_move(osdmap, osd_name.str(), move_to); + ASSERT_EQ(0, r); + x++; + } + host_index++; + } + + CrushWrapper crush; + get_crush(osdmap, crush); + auto host_id = crush.get_item_id("localhost"); + crush.remove_item(g_ceph_context, host_id, false); + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.crush.clear(); + crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); + osdmap.apply_incremental(pending_inc); + + PGMap pgmap; + osd_stat_t stats, stats_null; + stats.statfs.total = 500000; + stats.statfs.available = 50000; + stats.statfs.omap_allocated = 50000; + stats.statfs.internal_metadata = 50000; + stats_null.statfs.total = 0; + stats_null.statfs.available = 0; + stats_null.statfs.omap_allocated = 0; + stats_null.statfs.internal_metadata = 0; + for (unsigned int x=0; x < get_num_osds(); x++) { + if (x > 3 && x < 8) { + pgmap.osd_stat.insert({x,stats_null}); + } else { + pgmap.osd_stat.insert({x,stats}); + } + } + + stringstream ss; + boost::scoped_ptr<Formatter> f(Formatter::create("json-pretty")); + print_osd_utilization(osdmap, pgmap, ss, f.get(), true, "root"); + JSONParser parser; + parser.parse(ss.str().c_str(), static_cast<int>(ss.str().size())); + auto iter = parser.find_first(); + for (const auto& bucket : (*iter)->get_array_elements()) { + JSONParser parser2; + parser2.parse(bucket.c_str(), static_cast<int>(bucket.size())); + auto* obj = parser2.find_obj("name"); + if (obj->get_data().compare("localrack") == 0) { + obj = parser2.find_obj("kb"); + ASSERT_EQ(obj->get_data(), "3904"); + obj = parser2.find_obj("kb_used"); + ASSERT_EQ(obj->get_data(), "3512"); + obj = parser2.find_obj("kb_used_omap"); + ASSERT_EQ(obj->get_data(), "384"); + obj = parser2.find_obj("kb_used_meta"); + ASSERT_EQ(obj->get_data(), "384"); + obj = parser2.find_obj("kb_avail"); + ASSERT_EQ(obj->get_data(), "384"); + } + } +} + +TEST_P(OSDMapTest, BUG_51842) { + set_up_map(3, true); + OSDMap tmp; // use a tmpmap here, so we do not dirty origin map.. + tmp.deepish_copy_from(osdmap); + for (int i = 0; i < (int)get_num_osds(); i++) { + stringstream osd_name; + stringstream host_name; + vector<string> move_to; + osd_name << "osd." << i; + host_name << "host=host-" << i; + move_to.push_back("root=infra-1706"); + move_to.push_back(host_name.str()); + auto r = crush_move(tmp, osd_name.str(), move_to); + ASSERT_EQ(0, r); + } + + // build crush rule + CrushWrapper crush; + get_crush(tmp, crush); + string rule_name = "infra-1706"; + int rule_type = pg_pool_t::TYPE_REPLICATED; + ASSERT_TRUE(!crush.rule_exists(rule_name)); + int rno; + for (rno = 0; rno < crush.get_max_rules(); rno++) { + if (!crush.rule_exists(rno)) + break; + } + string root_bucket = "infra-1706"; + int root = crush.get_item_id(root_bucket); + int steps = 5; + crush_rule *rule = crush_make_rule(steps, rule_type); + int step = 0; + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0); + // note: it's ok to set like 'step chooseleaf_firstn 0 host' + std::pair<int, int> param = GetParam(); + int rep_num = std::get<0>(param); + int domain = std::get<1>(param); + crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, rep_num, domain); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + ASSERT_TRUE(step == steps); + auto r = crush_add_rule(crush.get_crush_map(), rule, rno); + ASSERT_TRUE(r >= 0); + crush.set_rule_name(rno, rule_name); + { + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + pending_inc.crush.clear(); + crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); + tmp.apply_incremental(pending_inc); + } + { + stringstream oss; + crush.dump_tree(&oss, NULL); + std::cout << oss.str() << std::endl; + Formatter *f = Formatter::create("json-pretty"); + f->open_object_section("crush_rules"); + crush.dump_rules(f); + f->close_section(); + f->flush(cout); + delete f; + } + // create a replicated pool referencing the above rule + int64_t pool_infra_1706; + { + OSDMap::Incremental new_pool_inc(tmp.get_epoch() + 1); + new_pool_inc.new_pool_max = tmp.get_pool_max(); + new_pool_inc.fsid = tmp.get_fsid(); + pg_pool_t empty; + pool_infra_1706 = ++new_pool_inc.new_pool_max; + pg_pool_t *p = new_pool_inc.get_new_pool(pool_infra_1706, &empty); + p->size = 3; + p->min_size = 1; + p->set_pg_num(256); + p->set_pgp_num(256); + p->type = pg_pool_t::TYPE_REPLICATED; + p->crush_rule = rno; + p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); + new_pool_inc.new_pool_names[pool_infra_1706] = "pool_infra_1706"; + tmp.apply_incremental(new_pool_inc); + } + + // add upmaps + pg_t rep_pg(3, pool_infra_1706); + pg_t rep_pgid = tmp.raw_pg_to_pg(rep_pg); + pg_t rep_pg2(4, pool_infra_1706); + pg_t rep_pgid2 = tmp.raw_pg_to_pg(rep_pg2); + pg_t rep_pg3(6, pool_infra_1706); + pg_t rep_pgid3 = tmp.raw_pg_to_pg(rep_pg3); + { + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + pending_inc.new_pg_upmap[rep_pgid] = mempool::osdmap::vector<int32_t>({1,0,2}); + pending_inc.new_pg_upmap[rep_pgid2] = mempool::osdmap::vector<int32_t>({1,2,0}); + pending_inc.new_pg_upmap[rep_pgid3] = mempool::osdmap::vector<int32_t>({1,2,0}); + tmp.apply_incremental(pending_inc); + ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid)); + ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid2)); + ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid3)); + } + + { + // now, set pool size to 1 + OSDMap tmpmap; + tmpmap.deepish_copy_from(tmp); + OSDMap::Incremental new_pool_inc(tmpmap.get_epoch() + 1); + pg_pool_t p = *tmpmap.get_pg_pool(pool_infra_1706); + p.size = 1; + p.last_change = new_pool_inc.epoch; + new_pool_inc.new_pools[pool_infra_1706] = p; + tmpmap.apply_incremental(new_pool_inc); + + OSDMap::Incremental new_pending_inc(tmpmap.get_epoch() + 1); + clean_pg_upmaps(g_ceph_context, tmpmap, new_pending_inc); + tmpmap.apply_incremental(new_pending_inc); + // check pg upmaps + ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid)); + ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid2)); + ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid3)); + } + { + // now, set pool size to 4 + OSDMap tmpmap; + tmpmap.deepish_copy_from(tmp); + OSDMap::Incremental new_pool_inc(tmpmap.get_epoch() + 1); + pg_pool_t p = *tmpmap.get_pg_pool(pool_infra_1706); + p.size = 4; + p.last_change = new_pool_inc.epoch; + new_pool_inc.new_pools[pool_infra_1706] = p; + tmpmap.apply_incremental(new_pool_inc); + + OSDMap::Incremental new_pending_inc(tmpmap.get_epoch() + 1); + clean_pg_upmaps(g_ceph_context, tmpmap, new_pending_inc); + tmpmap.apply_incremental(new_pending_inc); + // check pg upmaps + ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid)); + ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid2)); + ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid3)); + } +} + +const string OSDMapTest::range_addrs[] = {"198.51.100.0/22", "10.2.5.102/32", "2001:db8::/48", + "3001:db8::/72", "4001:db8::/30", "5001:db8::/64", "6001:db8::/128", "7001:db8::/127"}; +const string OSDMapTest::ip_addrs[] = {"198.51.100.14", "198.51.100.0", "198.51.103.255", + "10.2.5.102", + "2001:db8:0:0:0:0:0:0", "2001:db8:0:0:0:0001:ffff:ffff", + "2001:db8:0:ffff:ffff:ffff:ffff:ffff", + "3001:db8:0:0:0:0:0:0", "3001:db8:0:0:0:0001:ffff:ffff", + "3001:db8:0:0:00ff:ffff:ffff:ffff", + "4001:db8::", "4001:db8:0:0:0:0001:ffff:ffff", + "4001:dbb:ffff:ffff:ffff:ffff:ffff:ffff", + "5001:db8:0:0:0:0:0:0", "5001:db8:0:0:0:0:ffff:ffff", + "5001:db8:0:0:ffff:ffff:ffff:ffff", + "6001:db8:0:0:0:0:0:0", + "7001:db8:0:0:0:0:0:0", "7001:db8:0:0:0:0:0:0001" +}; +const string OSDMapTest::unblocked_ip_addrs[] = { "0.0.0.0", "1.1.1.1", "192.168.1.1", + "198.51.99.255", "198.51.104.0", + "10.2.5.101", "10.2.5.103", + "2001:db7:ffff:ffff:ffff:ffff:ffff:ffff", "2001:db8:0001::", + "3001:db7:ffff:ffff:ffff:ffff:ffff:ffff", "3001:db8:0:0:0100::", + "4001:db7:ffff:ffff:ffff:ffff:ffff:ffff", "4001:dbc::", + "5001:db7:ffff:ffff:ffff:ffff:ffff:ffff", "5001:db8:0:0001:0:0:0:0", + "6001:db8:0:0:0:0:0:0001", + "7001:db7:ffff:ffff:ffff:ffff:ffff:ffff", "7001:db8:0:0:0:0:0:0002" +}; + +TEST_F(OSDMapTest, blocklisting_ips) { + set_up_map(6); //whatever + + OSDMap::Incremental new_blocklist_inc(osdmap.get_epoch() + 1); + for (const auto& a : ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + new_blocklist_inc.new_blocklist[addr] = ceph_clock_now(); + } + osdmap.apply_incremental(new_blocklist_inc); + + for (const auto& a: ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + ASSERT_TRUE(osdmap.is_blocklisted(addr, g_ceph_context)); + } + for (const auto& a: unblocked_ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + ASSERT_FALSE(osdmap.is_blocklisted(addr, g_ceph_context)); + } + + OSDMap::Incremental rm_blocklist_inc(osdmap.get_epoch() + 1); + for (const auto& a : ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + rm_blocklist_inc.old_blocklist.push_back(addr); + } + osdmap.apply_incremental(rm_blocklist_inc); + for (const auto& a: ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + ASSERT_FALSE(osdmap.is_blocklisted(addr, g_ceph_context)); + } + for (const auto& a: unblocked_ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); + if (blocklisted) { + cout << "erroneously blocklisted " << addr << std::endl; + } + EXPECT_FALSE(blocklisted); + } +} + +TEST_F(OSDMapTest, blocklisting_ranges) { + set_up_map(6); //whatever + OSDMap::Incremental range_blocklist_inc(osdmap.get_epoch() + 1); + for (const auto& a : range_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.type = entity_addr_t::TYPE_CIDR; + range_blocklist_inc.new_range_blocklist[addr] = ceph_clock_now(); + } + osdmap.apply_incremental(range_blocklist_inc); + + for (const auto& a: ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); + if (!blocklisted) { + cout << "erroneously not blocklisted " << addr << std::endl; + } + ASSERT_TRUE(blocklisted); + } + for (const auto& a: unblocked_ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); + if (blocklisted) { + cout << "erroneously blocklisted " << addr << std::endl; + } + EXPECT_FALSE(blocklisted); + } + + OSDMap::Incremental rm_range_blocklist(osdmap.get_epoch() + 1); + for (const auto& a : range_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.type = entity_addr_t::TYPE_CIDR; + rm_range_blocklist.old_range_blocklist.push_back(addr); + } + osdmap.apply_incremental(rm_range_blocklist); + + for (const auto& a: ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + ASSERT_FALSE(osdmap.is_blocklisted(addr, g_ceph_context)); + } + for (const auto& a: unblocked_ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); + if (blocklisted) { + cout << "erroneously blocklisted " << addr << std::endl; + } + EXPECT_FALSE(blocklisted); + } +} + +TEST_F(OSDMapTest, blocklisting_everything) { + set_up_map(6); //whatever + OSDMap::Incremental range_blocklist_inc(osdmap.get_epoch() + 1); + entity_addr_t baddr; + baddr.parse("2001:db8::/0"); + baddr.type = entity_addr_t::TYPE_CIDR; + range_blocklist_inc.new_range_blocklist[baddr] = ceph_clock_now(); + osdmap.apply_incremental(range_blocklist_inc); + + for (const auto& a: ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + if (addr.is_ipv4()) continue; + bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); + if (!blocklisted) { + cout << "erroneously not blocklisted " << addr << std::endl; + } + ASSERT_TRUE(blocklisted); + } + for (const auto& a: unblocked_ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + if (addr.is_ipv4()) continue; + bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); + if (!blocklisted) { + cout << "erroneously not blocklisted " << addr << std::endl; + } + ASSERT_TRUE(blocklisted); + } + + OSDMap::Incremental swap_blocklist_inc(osdmap.get_epoch()+1); + swap_blocklist_inc.old_range_blocklist.push_back(baddr); + + entity_addr_t caddr; + caddr.parse("1.1.1.1/0"); + caddr.type = entity_addr_t::TYPE_CIDR; + swap_blocklist_inc.new_range_blocklist[caddr] = ceph_clock_now(); + osdmap.apply_incremental(swap_blocklist_inc); + + for (const auto& a: ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + if (!addr.is_ipv4()) continue; + bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); + if (!blocklisted) { + cout << "erroneously not blocklisted " << addr << std::endl; + } + ASSERT_TRUE(blocklisted); + } + for (const auto& a: unblocked_ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + if (!addr.is_ipv4()) continue; + bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); + if (!blocklisted) { + cout << "erroneously not blocklisted " << addr << std::endl; + } + ASSERT_TRUE(blocklisted); + } +} + +TEST_F(OSDMapTest, ReadBalanceScore1) { + std::srand ( unsigned ( std::time(0) ) ); + uint osd_rand = rand() % 13; + set_up_map(6 + osd_rand); //whatever + auto pools = osdmap.get_pools(); + for (auto &[pid, pg_pool] : pools) { + const pg_pool_t *pi = osdmap.get_pg_pool(pid); + if (pi->is_replicated()) { + //cout << "pool " << pid << " " << pg_pool << std::endl; + auto replica_count = pi->get_size(); + OSDMap::read_balance_info_t rbi; + auto rc = osdmap.calc_read_balance_score(g_ceph_context, pid, &rbi); + + // "Normal" score is between 1 and num_osds + ASSERT_TRUE(rc == 0); + ASSERT_TRUE(score_in_range(rbi.adjusted_score)); + ASSERT_TRUE(score_in_range(rbi.acting_adj_score)); + ASSERT_TRUE(rbi.err_msg.empty()); + + // When all OSDs have primary_affinity 0, score should be 0 + auto num_osds = get_num_osds(); + set_primary_affinity_all(0.); + + rc = osdmap.calc_read_balance_score(g_ceph_context, pid, &rbi); + ASSERT_TRUE(rc < 0); + ASSERT_TRUE(rbi.adjusted_score == 0.); + ASSERT_TRUE(rbi.acting_adj_score == 0.); + ASSERT_FALSE(rbi.err_msg.empty()); + + std::vector<uint> osds; + for (uint i = 0 ; i < num_osds ; i++) { + osds.push_back(i); + } + + // Change primary_affinity of some OSDs to 1 others are 0 + float fratio = 1. / (float)replica_count; + for (int iter = 0 ; iter < 100 ; iter++) { // run the test 100 times + // Create random shuffle of OSDs + std::random_device seed; + std::default_random_engine generator(seed()); + std::shuffle(osds.begin(), osds.end(), generator); + for (uint i = 0 ; i < num_osds ; i++) { + if ((float(i + 1) / float(num_osds)) < fratio) { + ASSERT_TRUE(osds[i] < num_osds); + osdmap.set_primary_affinity(osds[i], CEPH_OSD_MAX_PRIMARY_AFFINITY); + rc = osdmap.calc_read_balance_score(g_ceph_context, pid, &rbi); + + ASSERT_TRUE(rc < 0); + ASSERT_TRUE(rbi.adjusted_score == 0.); + ASSERT_TRUE(rbi.acting_adj_score == 0.); + ASSERT_FALSE(rbi.err_msg.empty()); + } + else { + if (rc < 0) { + ASSERT_TRUE(rbi.adjusted_score == 0.); + ASSERT_TRUE(rbi.acting_adj_score == 0.); + ASSERT_FALSE(rbi.err_msg.empty()); + } + else { + ASSERT_TRUE(score_in_range(rbi.acting_adj_score, i + 1)); + ASSERT_TRUE(rbi.err_msg.empty()); + } + } + } + set_primary_affinity_all(0.); + } + } + } + + } + +TEST_F(OSDMapTest, ReadBalanceScore2) { + std::srand ( unsigned ( std::time(0) ) ); + uint osd_num = 6 + rand() % 13; + set_up_map(osd_num, true); + for (int i = 0 ; i < 100 ; i++) { //running 100 random tests + uint num_pa_osds = 0; + float pa_sum = 0.; + OSDMap::read_balance_info_t rbi; + + // set pa for all osds + for (uint j = 0 ; j < osd_num ; j++) { + uint pa = 1 + rand() % 100; + if (pa > 80) + pa = 100; + if (pa < 20) + pa = 0; + float fpa = (float)pa / 100.; + if (pa > 0) { + num_pa_osds++; + pa_sum += fpa; + } + osdmap.set_primary_affinity(j, int(fpa * CEPH_OSD_MAX_PRIMARY_AFFINITY)); + } + float pa_ratio = pa_sum / (float) osd_num; + + // create a pool with the current osdmap configuration + OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1); + new_pool_inc.new_pool_max = osdmap.get_pool_max(); + new_pool_inc.fsid = osdmap.get_fsid(); + string pool_name = "rep_pool" + stringify(i); + uint64_t new_pid = set_rep_pool(pool_name, new_pool_inc, false); + ASSERT_TRUE(new_pid > 0); + osdmap.apply_incremental(new_pool_inc); + + // now run the test on the pool. + const pg_pool_t *pi = osdmap.get_pg_pool(new_pid); + ASSERT_NE(pi, nullptr); + ASSERT_TRUE(pi->is_replicated()); + float fratio = 1. / (float)pi->get_size(); + auto rc = osdmap.calc_read_balance_score(g_ceph_context, new_pid, &rbi); + if (pa_ratio < fratio) { + ASSERT_TRUE(rc < 0); + ASSERT_FALSE(rbi.err_msg.empty()); + ASSERT_TRUE(rbi.acting_adj_score == 0.); + ASSERT_TRUE(rbi.adjusted_score == 0.); + } + else { + if (rc < 0) { + ASSERT_TRUE(rbi.adjusted_score == 0.); + ASSERT_TRUE(rbi.acting_adj_score == 0.); + ASSERT_FALSE(rbi.err_msg.empty()); + } + else { + if (rbi.err_msg.empty()) { + ASSERT_TRUE(score_in_range(rbi.acting_adj_score, num_pa_osds)); + } + } + } + + } + //TODO add ReadBalanceScore3 - with weighted osds. + + } + +TEST_F(OSDMapTest, read_balance_small_map) { + // Set up a map with 4 OSDs and default pools + set_up_map(4); + + const vector<string> test_cases = {"basic", "prim_affinity"}; + for (const auto & test : test_cases) { + if (test == "prim_affinity") { + // Make osd.0 off-limits for primaries by giving it prim affinity 0 + OSDMap::Incremental pending_inc0(osdmap.get_epoch() + 1); + pending_inc0.new_primary_affinity[0] = 0; + osdmap.apply_incremental(pending_inc0); + + // Ensure osd.0 has no primaries assigned to it + map<uint64_t,set<pg_t>> prim_pgs_by_osd, acting_prims_by_osd; + osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd, &acting_prims_by_osd); + ASSERT_TRUE(prim_pgs_by_osd[0].size() == 0); + ASSERT_TRUE(acting_prims_by_osd[0].size() == 0); + } + + // Make sure capacity is balanced first + set<int64_t> only_pools; + only_pools.insert(my_rep_pool); + OSDMap::Incremental pending_inc(osdmap.get_epoch()+1); + osdmap.calc_pg_upmaps(g_ceph_context, + 0, + 100, + only_pools, + &pending_inc); + osdmap.apply_incremental(pending_inc); + + // Get read balance score before balancing + OSDMap::read_balance_info_t rb_info; + auto rc = osdmap.calc_read_balance_score(g_ceph_context, my_rep_pool, &rb_info); + ASSERT_TRUE(rc >= 0); + float read_balance_score_before = rb_info.adjusted_score; + + // Calculate desired prim distributions to verify later + map<uint64_t,set<pg_t>> prim_pgs_by_osd_2, acting_prims_by_osd_2; + osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd_2, &acting_prims_by_osd_2); + vector<uint64_t> osds_to_check; + for (const auto & [osd, pgs] : prim_pgs_by_osd_2) { + osds_to_check.push_back(osd); + } + map<uint64_t,float> desired_prim_dist; + rc = osdmap.calc_desired_primary_distribution(g_ceph_context, my_rep_pool, + osds_to_check, desired_prim_dist); + ASSERT_TRUE(rc >= 0); + + // Balance reads + OSDMap::Incremental pending_inc_2(osdmap.get_epoch()+1); + int num_changes = osdmap.balance_primaries(g_ceph_context, my_rep_pool, &pending_inc_2, osdmap); + osdmap.apply_incremental(pending_inc_2); + + if (test == "prim_affinity") { + // Ensure osd.0 still has no primaries assigned to it + map<uint64_t,set<pg_t>> prim_pgs_by_osd_3, acting_prims_by_osd_3; + osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd_3, &acting_prims_by_osd_3); + ASSERT_TRUE(prim_pgs_by_osd_3[0].size() == 0); + ASSERT_TRUE(acting_prims_by_osd_3[0].size() == 0); + } + + // Get read balance score after balancing + rc = osdmap.calc_read_balance_score(g_ceph_context, my_rep_pool, &rb_info); + ASSERT_TRUE(rc >= 0); + float read_balance_score_after = rb_info.adjusted_score; + + // Ensure the score hasn't gotten worse + ASSERT_TRUE(read_balance_score_after <= read_balance_score_before); + + // Check for improvements + if (num_changes > 0) { + ASSERT_TRUE(read_balance_score_after < read_balance_score_before); + + // Check num primaries for each OSD is within range + map<uint64_t,set<pg_t>> prim_pgs_by_osd_4, acting_prims_by_osd_4; + osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd_4, &acting_prims_by_osd_4); + for (const auto & [osd, primaries] : prim_pgs_by_osd_4) { + ASSERT_TRUE(primaries.size() >= floor(desired_prim_dist[osd] - 1)); + ASSERT_TRUE(primaries.size() <= ceil(desired_prim_dist[osd] + 1)); + } + } + } +} + +TEST_F(OSDMapTest, read_balance_large_map) { + // Set up a map with 60 OSDs and default pools + set_up_map(60); + + const vector<string> test_cases = {"basic", "prim_affinity"}; + for (const auto & test : test_cases) { + if (test == "prim_affinity") { + // Make osd.0 off-limits for primaries by giving it prim affinity 0 + OSDMap::Incremental pending_inc0(osdmap.get_epoch() + 1); + pending_inc0.new_primary_affinity[0] = 0; + osdmap.apply_incremental(pending_inc0); + + // Ensure osd.0 has no primaries assigned to it + map<uint64_t,set<pg_t>> prim_pgs_by_osd, acting_prims_by_osd; + osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd, &acting_prims_by_osd); + ASSERT_TRUE(prim_pgs_by_osd[0].size() == 0); + ASSERT_TRUE(acting_prims_by_osd[0].size() == 0); + } + + // Make sure capacity is balanced first + set<int64_t> only_pools; + only_pools.insert(my_rep_pool); + OSDMap::Incremental pending_inc(osdmap.get_epoch()+1); + osdmap.calc_pg_upmaps(g_ceph_context, + 0, + 100, + only_pools, + &pending_inc); + osdmap.apply_incremental(pending_inc); + + // Get read balance score before balancing + OSDMap::read_balance_info_t rb_info; + auto rc = osdmap.calc_read_balance_score(g_ceph_context, my_rep_pool, &rb_info); + ASSERT_TRUE(rc >= 0); + float read_balance_score_before = rb_info.adjusted_score; + + // Calculate desired prim distributions to verify later + map<uint64_t,set<pg_t>> prim_pgs_by_osd_2, acting_prims_by_osd_2; + osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd_2, &acting_prims_by_osd_2); + vector<uint64_t> osds_to_check; + for (auto [osd, pgs] : prim_pgs_by_osd_2) { + osds_to_check.push_back(osd); + } + map<uint64_t,float> desired_prim_dist; + rc = osdmap.calc_desired_primary_distribution(g_ceph_context, my_rep_pool, + osds_to_check, desired_prim_dist); + ASSERT_TRUE(rc >= 0); + + // Balance reads + OSDMap::Incremental pending_inc_2(osdmap.get_epoch()+1); + int num_changes = osdmap.balance_primaries(g_ceph_context, my_rep_pool, &pending_inc_2, osdmap); + osdmap.apply_incremental(pending_inc_2); + + if (test == "prim_affinity") { + // Ensure osd.0 still has no primaries assigned to it + map<uint64_t,set<pg_t>> prim_pgs_by_osd_3, acting_prims_by_osd_3; + osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd_3, &acting_prims_by_osd_3); + ASSERT_TRUE(prim_pgs_by_osd_3[0].size() == 0); + ASSERT_TRUE(acting_prims_by_osd_3[0].size() == 0); + } + + // Get read balance score after balancing + rc = osdmap.calc_read_balance_score(g_ceph_context, my_rep_pool, &rb_info); + ASSERT_TRUE(rc >= 0); + float read_balance_score_after = rb_info.adjusted_score; + + // Ensure the score hasn't gotten worse + ASSERT_TRUE(read_balance_score_after <= read_balance_score_before); + + // Check for improvements + if (num_changes > 0) { + ASSERT_TRUE(read_balance_score_after < read_balance_score_before); + + // Check num primaries for each OSD is within range + map<uint64_t,set<pg_t>> prim_pgs_by_osd_4, acting_prims_by_osd_4; + osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd_4, &acting_prims_by_osd_4); + for (const auto & [osd, primaries] : prim_pgs_by_osd_4) { + ASSERT_TRUE(primaries.size() >= floor(desired_prim_dist[osd] - 1)); + ASSERT_TRUE(primaries.size() <= ceil(desired_prim_dist[osd] + 1)); + } + } + } +} + +TEST_F(OSDMapTest, read_balance_random_map) { + // Set up map with random number of OSDs + std::srand ( unsigned ( std::time(0) ) ); + uint num_osds = 3 + (rand() % 10); + ASSERT_TRUE(num_osds >= 3); + set_up_map(num_osds); + + const vector<string> test_cases = {"basic", "prim_affinity"}; + for (const auto & test : test_cases) { + uint rand_osd = rand() % num_osds; + if (test == "prim_affinity") { + // Make a random OSD off-limits for primaries by giving it prim affinity 0 + ASSERT_TRUE(rand_osd < num_osds); + OSDMap::Incremental pending_inc0(osdmap.get_epoch() + 1); + pending_inc0.new_primary_affinity[rand_osd] = 0; + osdmap.apply_incremental(pending_inc0); + + // Ensure the random OSD has no primaries assigned to it + map<uint64_t,set<pg_t>> prim_pgs_by_osd, acting_prims_by_osd; + osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd, &acting_prims_by_osd); + ASSERT_TRUE(prim_pgs_by_osd[rand_osd].size() == 0); + ASSERT_TRUE(acting_prims_by_osd[rand_osd].size() == 0); + } + + // Make sure capacity is balanced first + set<int64_t> only_pools; + only_pools.insert(my_rep_pool); + OSDMap::Incremental pending_inc(osdmap.get_epoch()+1); + osdmap.calc_pg_upmaps(g_ceph_context, + 0, + 100, + only_pools, + &pending_inc); + osdmap.apply_incremental(pending_inc); + + // Get read balance score before balancing + OSDMap::read_balance_info_t rb_info; + auto rc = osdmap.calc_read_balance_score(g_ceph_context, my_rep_pool, &rb_info); + ASSERT_TRUE(rc >= 0); + float read_balance_score_before = rb_info.adjusted_score; + + // Calculate desired prim distributions to verify later + map<uint64_t,set<pg_t>> prim_pgs_by_osd_2, acting_prims_by_osd_2; + osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd_2, &acting_prims_by_osd_2); + vector<uint64_t> osds_to_check; + for (const auto & [osd, pgs] : prim_pgs_by_osd_2) { + osds_to_check.push_back(osd); + } + map<uint64_t,float> desired_prim_dist; + rc = osdmap.calc_desired_primary_distribution(g_ceph_context, my_rep_pool, + osds_to_check, desired_prim_dist); + ASSERT_TRUE(rc >= 0); + + // Balance reads + OSDMap::Incremental pending_inc_2(osdmap.get_epoch()+1); + int num_changes = osdmap.balance_primaries(g_ceph_context, my_rep_pool, &pending_inc_2, osdmap); + osdmap.apply_incremental(pending_inc_2); + + if (test == "prim_affinity") { + // Ensure the random OSD still has no primaries assigned to it + map<uint64_t,set<pg_t>> prim_pgs_by_osd_3, acting_prims_by_osd_3; + osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd_3, &acting_prims_by_osd_3); + ASSERT_TRUE(prim_pgs_by_osd_3[rand_osd].size() == 0); + ASSERT_TRUE(acting_prims_by_osd_3[rand_osd].size() == 0); + } + + // Get read balance score after balancing + rc = osdmap.calc_read_balance_score(g_ceph_context, my_rep_pool, &rb_info); + ASSERT_TRUE(rc >= 0); + float read_balance_score_after = rb_info.adjusted_score; + + // Ensure the score hasn't gotten worse + ASSERT_TRUE(read_balance_score_after <= read_balance_score_before); + + // Check for improvements + if (num_changes > 0) { + ASSERT_TRUE(read_balance_score_after < read_balance_score_before); + + // Check num primaries for each OSD is within range + map<uint64_t,set<pg_t>> prim_pgs_by_osd_4, acting_prims_by_osd_4; + osdmap.get_pgs_by_osd(g_ceph_context, my_rep_pool, &prim_pgs_by_osd_4, &acting_prims_by_osd_4); + for (auto [osd, primaries] : prim_pgs_by_osd_4) { + ASSERT_TRUE(primaries.size() >= floor(desired_prim_dist[osd] - 1)); + ASSERT_TRUE(primaries.size() <= ceil(desired_prim_dist[osd] + 1)); + } + for (auto [osd, primaries] : prim_pgs_by_osd_4) { + ASSERT_TRUE(primaries.size() >= floor(desired_prim_dist[osd] - 1)); + ASSERT_TRUE(primaries.size() <= ceil(desired_prim_dist[osd] + 1)); + } + } + } +} + +INSTANTIATE_TEST_SUITE_P( + OSDMap, + OSDMapTest, + ::testing::Values( + std::make_pair<int, int>(0, 1), // chooseleaf firstn 0 host + std::make_pair<int, int>(3, 1), // chooseleaf firstn 3 host + std::make_pair<int, int>(0, 0), // chooseleaf firstn 0 osd + std::make_pair<int, int>(3, 0) // chooseleaf firstn 3 osd + ) +); diff --git a/src/test/osd/TestOSDScrub.cc b/src/test/osd/TestOSDScrub.cc new file mode 100644 index 000000000..4c6d4ccee --- /dev/null +++ b/src/test/osd/TestOSDScrub.cc @@ -0,0 +1,203 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com> + * + * Author: Loic Dachary <loic@dachary.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library Public License for more details. + * + */ + +#include <stdio.h> +#include <signal.h> +#include <gtest/gtest.h> +#include "common/async/context_pool.h" +#include "osd/OSD.h" +#include "os/ObjectStore.h" +#include "mon/MonClient.h" +#include "common/ceph_argparse.h" +#include "msg/Messenger.h" + +class TestOSDScrub: public OSD { + +public: + TestOSDScrub(CephContext *cct_, + std::unique_ptr<ObjectStore> store_, + int id, + Messenger *internal, + Messenger *external, + Messenger *hb_front_client, + Messenger *hb_back_client, + Messenger *hb_front_server, + Messenger *hb_back_server, + Messenger *osdc_messenger, + MonClient *mc, const std::string &dev, const std::string &jdev, + ceph::async::io_context_pool& ictx) : + OSD(cct_, std::move(store_), id, internal, external, + hb_front_client, hb_back_client, + hb_front_server, hb_back_server, + osdc_messenger, mc, dev, jdev, ictx) + { + } + + bool scrub_time_permit(utime_t now) { + return service.get_scrub_services().scrub_time_permit(now); + } +}; + +TEST(TestOSDScrub, scrub_time_permit) { + ceph::async::io_context_pool icp(1); + std::unique_ptr<ObjectStore> store = ObjectStore::create(g_ceph_context, + g_conf()->osd_objectstore, + g_conf()->osd_data, + g_conf()->osd_journal); + std::string cluster_msgr_type = g_conf()->ms_cluster_type.empty() ? g_conf().get_val<std::string>("ms_type") : g_conf()->ms_cluster_type; + Messenger *ms = Messenger::create(g_ceph_context, cluster_msgr_type, + entity_name_t::OSD(0), "make_checker", + getpid()); + ms->set_cluster_protocol(CEPH_OSD_PROTOCOL); + ms->set_default_policy(Messenger::Policy::stateless_server(0)); + ms->bind(g_conf()->public_addr); + MonClient mc(g_ceph_context, icp); + mc.build_initial_monmap(); + TestOSDScrub* osd = new TestOSDScrub(g_ceph_context, std::move(store), 0, ms, ms, ms, ms, ms, ms, ms, &mc, "", "", icp); + + // These are now invalid + int err = g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "24"); + ASSERT_TRUE(err < 0); + //GTEST_LOG_(INFO) << " osd_scrub_begin_hour = " << g_ceph_context->_conf.get_val<int64_t>("osd_scrub_begin_hour"); + + err = g_ceph_context->_conf.set_val("osd_scrub_end_hour", "24"); + ASSERT_TRUE(err < 0); + //GTEST_LOG_(INFO) << " osd_scrub_end_hour = " << g_ceph_context->_conf.get_val<int64_t>("osd_scrub_end_hour"); + + err = g_ceph_context->_conf.set_val("osd_scrub_begin_week_day", "7"); + ASSERT_TRUE(err < 0); + //GTEST_LOG_(INFO) << " osd_scrub_begin_week_day = " << g_ceph_context->_conf.get_val<int64_t>("osd_scrub_begin_week_day"); + + err = g_ceph_context->_conf.set_val("osd_scrub_end_week_day", "7"); + ASSERT_TRUE(err < 0); + //GTEST_LOG_(INFO) << " osd_scrub_end_week_day = " << g_ceph_context->_conf.get_val<int64_t>("osd_scrub_end_week_day"); + + // Test all day + g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "0"); + g_ceph_context->_conf.set_val("osd_scrub_end_hour", "0"); + g_ceph_context->_conf.apply_changes(nullptr); + tm tm; + tm.tm_isdst = -1; + strptime("2015-01-16 12:05:13", "%Y-%m-%d %H:%M:%S", &tm); + utime_t now = utime_t(mktime(&tm), 0); + bool ret = osd->scrub_time_permit(now); + ASSERT_TRUE(ret); + + g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "20"); + g_ceph_context->_conf.set_val("osd_scrub_end_hour", "07"); + g_ceph_context->_conf.apply_changes(nullptr); + strptime("2015-01-16 01:05:13", "%Y-%m-%d %H:%M:%S", &tm); + now = utime_t(mktime(&tm), 0); + ret = osd->scrub_time_permit(now); + ASSERT_TRUE(ret); + + g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "20"); + g_ceph_context->_conf.set_val("osd_scrub_end_hour", "07"); + g_ceph_context->_conf.apply_changes(nullptr); + strptime("2015-01-16 20:05:13", "%Y-%m-%d %H:%M:%S", &tm); + now = utime_t(mktime(&tm), 0); + ret = osd->scrub_time_permit(now); + ASSERT_TRUE(ret); + + g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "20"); + g_ceph_context->_conf.set_val("osd_scrub_end_hour", "07"); + g_ceph_context->_conf.apply_changes(nullptr); + strptime("2015-01-16 08:05:13", "%Y-%m-%d %H:%M:%S", &tm); + now = utime_t(mktime(&tm), 0); + ret = osd->scrub_time_permit(now); + ASSERT_FALSE(ret); + + g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "01"); + g_ceph_context->_conf.set_val("osd_scrub_end_hour", "07"); + g_ceph_context->_conf.apply_changes(nullptr); + strptime("2015-01-16 20:05:13", "%Y-%m-%d %H:%M:%S", &tm); + now = utime_t(mktime(&tm), 0); + ret = osd->scrub_time_permit(now); + ASSERT_FALSE(ret); + + g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "01"); + g_ceph_context->_conf.set_val("osd_scrub_end_hour", "07"); + g_ceph_context->_conf.apply_changes(nullptr); + strptime("2015-01-16 00:05:13", "%Y-%m-%d %H:%M:%S", &tm); + now = utime_t(mktime(&tm), 0); + ret = osd->scrub_time_permit(now); + ASSERT_FALSE(ret); + + g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "01"); + g_ceph_context->_conf.set_val("osd_scrub_end_hour", "07"); + g_ceph_context->_conf.apply_changes(nullptr); + strptime("2015-01-16 04:05:13", "%Y-%m-%d %H:%M:%S", &tm); + now = utime_t(mktime(&tm), 0); + ret = osd->scrub_time_permit(now); + ASSERT_TRUE(ret); + + // Sun = 0, Mon = 1, Tue = 2, Wed = 3, Thu = 4m, Fri = 5, Sat = 6 + // Jan 16, 2015 is a Friday (5) + // every day + g_ceph_context->_conf.set_val("osd_scrub_begin_week day", "0"); // inclusive + g_ceph_context->_conf.set_val("osd_scrub_end_week_day", "0"); // not inclusive + g_ceph_context->_conf.apply_changes(nullptr); + strptime("2015-01-16 04:05:13", "%Y-%m-%d %H:%M:%S", &tm); + now = utime_t(mktime(&tm), 0); + ret = osd->scrub_time_permit(now); + ASSERT_TRUE(ret); + + // test Sun - Thu + g_ceph_context->_conf.set_val("osd_scrub_begin_week day", "0"); // inclusive + g_ceph_context->_conf.set_val("osd_scrub_end_week_day", "5"); // not inclusive + g_ceph_context->_conf.apply_changes(nullptr); + strptime("2015-01-16 04:05:13", "%Y-%m-%d %H:%M:%S", &tm); + now = utime_t(mktime(&tm), 0); + ret = osd->scrub_time_permit(now); + ASSERT_FALSE(ret); + + // test Fri - Sat + g_ceph_context->_conf.set_val("osd_scrub_begin_week day", "5"); // inclusive + g_ceph_context->_conf.set_val("osd_scrub_end_week_day", "0"); // not inclusive + g_ceph_context->_conf.apply_changes(nullptr); + strptime("2015-01-16 04:05:13", "%Y-%m-%d %H:%M:%S", &tm); + now = utime_t(mktime(&tm), 0); + ret = osd->scrub_time_permit(now); + ASSERT_TRUE(ret); + + // Jan 14, 2015 is a Wednesday (3) + // test Tue - Fri + g_ceph_context->_conf.set_val("osd_scrub_begin_week day", "2"); // inclusive + g_ceph_context->_conf.set_val("osd_scrub_end_week_day", "6"); // not inclusive + g_ceph_context->_conf.apply_changes(nullptr); + strptime("2015-01-14 04:05:13", "%Y-%m-%d %H:%M:%S", &tm); + now = utime_t(mktime(&tm), 0); + ret = osd->scrub_time_permit(now); + ASSERT_TRUE(ret); + + // Test Sat - Sun + g_ceph_context->_conf.set_val("osd_scrub_begin_week day", "6"); // inclusive + g_ceph_context->_conf.set_val("osd_scrub_end_week_day", "1"); // not inclusive + g_ceph_context->_conf.apply_changes(nullptr); + strptime("2015-01-14 04:05:13", "%Y-%m-%d %H:%M:%S", &tm); + now = utime_t(mktime(&tm), 0); + ret = osd->scrub_time_permit(now); + ASSERT_FALSE(ret); +} + +// Local Variables: +// compile-command: "cd ../.. ; make unittest_osdscrub ; ./unittest_osdscrub --log-to-stderr=true --debug-osd=20 # --gtest_filter=*.* " +// End: diff --git a/src/test/osd/TestOpStat.cc b/src/test/osd/TestOpStat.cc new file mode 100644 index 000000000..eb13e1d55 --- /dev/null +++ b/src/test/osd/TestOpStat.cc @@ -0,0 +1,58 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +#include "include/interval_set.h" +#include "include/buffer.h" +#include <list> +#include <map> +#include <set> +#include "RadosModel.h" +#include "TestOpStat.h" + +void TestOpStat::begin(TestOp *in) { + std::lock_guard l{stat_lock}; + stats[in->getType()].begin(in); +} + +void TestOpStat::end(TestOp *in) { + std::lock_guard l{stat_lock}; + stats[in->getType()].end(in); +} + +void TestOpStat::TypeStatus::export_latencies(std::map<double,uint64_t> &in) const +{ + auto i = in.begin(); + auto j = latencies.begin(); + int count = 0; + while (j != latencies.end() && i != in.end()) { + count++; + if ((((double)count)/((double)latencies.size())) * 100 >= i->first) { + i->second = *j; + ++i; + } + ++j; + } +} + +std::ostream & operator<<(std::ostream &out, const TestOpStat &rhs) +{ + std::lock_guard l{rhs.stat_lock}; + for (auto i = rhs.stats.begin(); + i != rhs.stats.end(); + ++i) { + std::map<double,uint64_t> latency; + latency[10] = 0; + latency[50] = 0; + latency[90] = 0; + latency[99] = 0; + i->second.export_latencies(latency); + + out << i->first << " latency: " << std::endl; + for (auto j = latency.begin(); + j != latency.end(); + ++j) { + if (j->second == 0) break; + out << "\t" << j->first << "th percentile: " + << j->second / 1000 << "ms" << std::endl; + } + } + return out; +} diff --git a/src/test/osd/TestOpStat.h b/src/test/osd/TestOpStat.h new file mode 100644 index 000000000..2c680558f --- /dev/null +++ b/src/test/osd/TestOpStat.h @@ -0,0 +1,53 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +#include "common/ceph_mutex.h" +#include "common/Cond.h" +#include "include/rados/librados.hpp" + +#ifndef TESTOPSTAT_H +#define TESTOPSTAT_H + +class TestOp; + +class TestOpStat { +public: + mutable ceph::mutex stat_lock = ceph::make_mutex("TestOpStat lock"); + + TestOpStat() = default; + + static uint64_t gettime() + { + timeval t; + gettimeofday(&t,0); + return (1000000*t.tv_sec) + t.tv_usec; + } + + class TypeStatus { + public: + std::map<TestOp*,uint64_t> inflight; + std::multiset<uint64_t> latencies; + void begin(TestOp *in) + { + ceph_assert(!inflight.count(in)); + inflight[in] = gettime(); + } + + void end(TestOp *in) + { + ceph_assert(inflight.count(in)); + uint64_t curtime = gettime(); + latencies.insert(curtime - inflight[in]); + inflight.erase(in); + } + + void export_latencies(std::map<double,uint64_t> &in) const; + }; + std::map<std::string,TypeStatus> stats; + + void begin(TestOp *in); + void end(TestOp *in); + friend std::ostream & operator<<(std::ostream &, const TestOpStat &); +}; + +std::ostream & operator<<(std::ostream &out, const TestOpStat &rhs); + +#endif diff --git a/src/test/osd/TestPGLog.cc b/src/test/osd/TestPGLog.cc new file mode 100644 index 000000000..1fff469d1 --- /dev/null +++ b/src/test/osd/TestPGLog.cc @@ -0,0 +1,3249 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com> + * + * Author: Loic Dachary <loic@dachary.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library Public License for more details. + * + */ + +#include <stdio.h> +#include <signal.h> +#include "gtest/gtest.h" +#include "osd/PGLog.h" +#include "osd/OSDMap.h" +#include "include/coredumpctl.h" +#include "../objectstore/store_test_fixture.h" + +using namespace std; + +struct PGLogTestBase { + static hobject_t mk_obj(unsigned id) { + hobject_t hoid; + stringstream ss; + ss << "obj_" << id; + hoid.oid = ss.str(); + hoid.set_hash(id); + hoid.pool = 1; + return hoid; + } + static eversion_t mk_evt(unsigned ep, unsigned v) { + return eversion_t(ep, v); + } + static pg_log_entry_t mk_ple_mod( + const hobject_t &hoid, eversion_t v, eversion_t pv, osd_reqid_t reqid) { + pg_log_entry_t e; + e.mark_unrollbackable(); + e.op = pg_log_entry_t::MODIFY; + e.soid = hoid; + e.version = v; + e.prior_version = pv; + e.reqid = reqid; + return e; + } + static pg_log_entry_t mk_ple_dt( + const hobject_t &hoid, eversion_t v, eversion_t pv, osd_reqid_t reqid) { + pg_log_entry_t e; + e.mark_unrollbackable(); + e.op = pg_log_entry_t::DELETE; + e.soid = hoid; + e.version = v; + e.prior_version = pv; + e.reqid = reqid; + return e; + } + static pg_log_entry_t mk_ple_ldt( + const hobject_t &hoid, eversion_t v, eversion_t pv) { + pg_log_entry_t e; + e.mark_unrollbackable(); + e.op = pg_log_entry_t::LOST_DELETE; + e.soid = hoid; + e.version = v; + e.prior_version = pv; + return e; + } + static pg_log_entry_t mk_ple_mod_rb( + const hobject_t &hoid, eversion_t v, eversion_t pv, osd_reqid_t reqid) { + pg_log_entry_t e; + e.op = pg_log_entry_t::MODIFY; + e.soid = hoid; + e.version = v; + e.prior_version = pv; + e.reqid = reqid; + return e; + } + static pg_log_entry_t mk_ple_dt_rb( + const hobject_t &hoid, eversion_t v, eversion_t pv, osd_reqid_t reqid) { + pg_log_entry_t e; + e.op = pg_log_entry_t::DELETE; + e.soid = hoid; + e.version = v; + e.prior_version = pv; + e.reqid = reqid; + return e; + } + static pg_log_entry_t mk_ple_err( + const hobject_t &hoid, eversion_t v, osd_reqid_t reqid) { + pg_log_entry_t e; + e.op = pg_log_entry_t::ERROR; + e.soid = hoid; + e.version = v; + e.prior_version = eversion_t(0, 0); + e.reqid = reqid; + return e; + } + static pg_log_entry_t mk_ple_mod( + const hobject_t &hoid, eversion_t v, eversion_t pv) { + return mk_ple_mod(hoid, v, pv, osd_reqid_t()); + } + static pg_log_entry_t mk_ple_dt( + const hobject_t &hoid, eversion_t v, eversion_t pv) { + return mk_ple_dt(hoid, v, pv, osd_reqid_t()); + } + static pg_log_entry_t mk_ple_mod_rb( + const hobject_t &hoid, eversion_t v, eversion_t pv) { + return mk_ple_mod_rb(hoid, v, pv, osd_reqid_t()); + } + static pg_log_entry_t mk_ple_dt_rb( + const hobject_t &hoid, eversion_t v, eversion_t pv) { + return mk_ple_dt_rb(hoid, v, pv, osd_reqid_t()); + } + static pg_log_entry_t mk_ple_err( + const hobject_t &hoid, eversion_t v) { + return mk_ple_err(hoid, v, osd_reqid_t()); + } +}; // PGLogTestBase + + +class PGLogTest : virtual public ::testing::Test, protected PGLog, public PGLogTestBase { +public: + PGLogTest() : PGLog(g_ceph_context) {} + void SetUp() override { + missing.may_include_deletes = true; + } + +#include "common/ceph_context.h" +#include "common/config.h" + + void TearDown() override { + clear(); + } + + + struct TestCase { + list<pg_log_entry_t> base; + list<pg_log_entry_t> auth; + list<pg_log_entry_t> div; + + pg_missing_t init; + pg_missing_t final; + + set<hobject_t> toremove; + list<pg_log_entry_t> torollback; + bool deletes_during_peering; + + private: + IndexedLog fullauth; + IndexedLog fulldiv; + pg_info_t authinfo; + pg_info_t divinfo; + public: + TestCase() : deletes_during_peering(false) {} + void setup() { + init.may_include_deletes = !deletes_during_peering; + final.may_include_deletes = !deletes_during_peering; + fullauth.log.insert(fullauth.log.end(), base.begin(), base.end()); + fullauth.log.insert(fullauth.log.end(), auth.begin(), auth.end()); + fulldiv.log.insert(fulldiv.log.end(), base.begin(), base.end()); + fulldiv.log.insert(fulldiv.log.end(), div.begin(), div.end()); + + fullauth.head = authinfo.last_update = fullauth.log.rbegin()->version; + authinfo.last_complete = fullauth.log.rbegin()->version; + authinfo.log_tail = fullauth.log.begin()->version; + authinfo.log_tail.version--; + fullauth.tail = authinfo.log_tail; + authinfo.last_backfill = hobject_t::get_max(); + + fulldiv.head = divinfo.last_update = fulldiv.log.rbegin()->version; + divinfo.last_complete = eversion_t(); + divinfo.log_tail = fulldiv.log.begin()->version; + divinfo.log_tail.version--; + fulldiv.tail = divinfo.log_tail; + divinfo.last_backfill = hobject_t::get_max(); + + if (init.get_items().empty()) { + divinfo.last_complete = divinfo.last_update; + } else { + eversion_t fmissing = init.get_items().at(init.get_rmissing().begin()->second).need; + for (list<pg_log_entry_t>::const_iterator i = fulldiv.log.begin(); + i != fulldiv.log.end(); + ++i) { + if (i->version < fmissing) + divinfo.last_complete = i->version; + else + break; + } + } + + fullauth.index(); + fulldiv.index(); + } + void set_div_bounds(eversion_t head, eversion_t tail) { + fulldiv.tail = divinfo.log_tail = tail; + fulldiv.head = divinfo.last_update = head; + } + void set_auth_bounds(eversion_t head, eversion_t tail) { + fullauth.tail = authinfo.log_tail = tail; + fullauth.head = authinfo.last_update = head; + } + const IndexedLog &get_fullauth() const { return fullauth; } + const IndexedLog &get_fulldiv() const { return fulldiv; } + const pg_info_t &get_authinfo() const { return authinfo; } + const pg_info_t &get_divinfo() const { return divinfo; } + }; // struct TestCase + + struct LogHandler : public PGLog::LogEntryHandler { + set<hobject_t> removed; + list<pg_log_entry_t> rolledback; + + void rollback( + const pg_log_entry_t &entry) override { + rolledback.push_back(entry); + } + void rollforward( + const pg_log_entry_t &entry) override {} + void remove( + const hobject_t &hoid) override { + removed.insert(hoid); + } + void try_stash(const hobject_t &, version_t) override { + // lost/unfound cases are not tested yet + } + void trim( + const pg_log_entry_t &entry) override {} + }; + + template <typename missing_t> + void verify_missing( + const TestCase &tcase, + const missing_t &missing) { + ASSERT_EQ(tcase.final.get_items().size(), missing.get_items().size()); + for (auto i = missing.get_items().begin(); + i != missing.get_items().end(); + ++i) { + EXPECT_TRUE(tcase.final.get_items().count(i->first)); + EXPECT_EQ(tcase.final.get_items().find(i->first)->second.need, i->second.need); + EXPECT_EQ(tcase.final.get_items().find(i->first)->second.have, i->second.have); + } + bool correct = missing.debug_verify_from_init(tcase.init, &(std::cout)); + ASSERT_TRUE(correct); + } + + void verify_sideeffects( + const TestCase &tcase, + const LogHandler &handler) { + ASSERT_EQ(tcase.toremove.size(), handler.removed.size()); + ASSERT_EQ(tcase.torollback.size(), handler.rolledback.size()); + + { + list<pg_log_entry_t>::const_iterator titer = tcase.torollback.begin(); + list<pg_log_entry_t>::const_iterator hiter = handler.rolledback.begin(); + for (; titer != tcase.torollback.end(); ++titer, ++hiter) { + EXPECT_EQ(titer->version, hiter->version); + } + } + + { + set<hobject_t>::const_iterator titer = tcase.toremove.begin(); + set<hobject_t>::const_iterator hiter = handler.removed.begin(); + for (; titer != tcase.toremove.end(); ++titer, ++hiter) { + EXPECT_EQ(*titer, *hiter); + } + } + } + + void test_merge_log(const TestCase &tcase) { + clear(); + log = tcase.get_fulldiv(); + pg_info_t info = tcase.get_divinfo(); + + missing = tcase.init; + missing.flush(); + + IndexedLog olog; + olog = tcase.get_fullauth(); + pg_info_t oinfo = tcase.get_authinfo(); + + LogHandler h; + bool dirty_info = false; + bool dirty_big_info = false; + merge_log( + oinfo, std::move(olog), pg_shard_t(1, shard_id_t(0)), info, + &h, dirty_info, dirty_big_info); + + ASSERT_EQ(info.last_update, oinfo.last_update); + verify_missing(tcase, missing); + verify_sideeffects(tcase, h); + } + + void test_proc_replica_log(const TestCase &tcase) { + clear(); + log = tcase.get_fullauth(); + pg_info_t info = tcase.get_authinfo(); + + pg_missing_t omissing = tcase.init; + + IndexedLog olog; + olog = tcase.get_fulldiv(); + pg_info_t oinfo = tcase.get_divinfo(); + + proc_replica_log( + oinfo, olog, omissing, pg_shard_t(1, shard_id_t(0))); + + ceph_assert(oinfo.last_update >= log.tail); + + if (!tcase.base.empty()) { + ASSERT_EQ(tcase.base.rbegin()->version, oinfo.last_update); + } + + for (list<pg_log_entry_t>::const_iterator i = tcase.auth.begin(); + i != tcase.auth.end(); + ++i) { + if (i->version > oinfo.last_update) { + if (i->is_delete() && tcase.deletes_during_peering) { + omissing.rm(i->soid, i->version); + } else { + omissing.add_next_event(*i); + } + } + } + verify_missing(tcase, omissing); + } // test_proc_replica_log + + void run_test_case(const TestCase &tcase) { + test_merge_log(tcase); + test_proc_replica_log(tcase); + } +}; // class PGLogTest + +struct TestHandler : public PGLog::LogEntryHandler { + list<hobject_t> &removed; + explicit TestHandler(list<hobject_t> &removed) : removed(removed) {} + + void rollback( + const pg_log_entry_t &entry) override {} + void rollforward( + const pg_log_entry_t &entry) override {} + void remove( + const hobject_t &hoid) override { + removed.push_back(hoid); + } + void cant_rollback(const pg_log_entry_t &entry) {} + void try_stash(const hobject_t &, version_t) override { + // lost/unfound cases are not tested yet + } + void trim( + const pg_log_entry_t &entry) override {} +}; + +TEST_F(PGLogTest, rewind_divergent_log) { + /* +----------------+ + | log | + +--------+-------+ + | |object | + |version | hash | + | | | + tail > (1,1) | x5 | + | | | + | | | + | (1,4) | x9 < newhead + | MODIFY | | + | | | + head > (1,5) | x9 | + | DELETE | | + | | | + +--------+-------+ + + */ + { + clear(); + + pg_info_t info; + list<hobject_t> remove_snap; + bool dirty_info = false; + bool dirty_big_info = false; + + hobject_t divergent_object; + eversion_t divergent_version; + eversion_t newhead; + + hobject_t divergent; + divergent.set_hash(0x9); + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + + e.version = eversion_t(1, 1); + e.soid.set_hash(0x5); + log.tail = e.version; + log.log.push_back(e); + e.version = newhead = eversion_t(1, 4); + e.soid = divergent; + e.op = pg_log_entry_t::MODIFY; + log.log.push_back(e); + e.version = divergent_version = eversion_t(1, 5); + e.prior_version = eversion_t(1, 4); + e.soid = divergent; + divergent_object = e.soid; + e.op = pg_log_entry_t::DELETE; + log.log.push_back(e); + log.head = e.version; + log.index(); + + info.last_update = log.head; + info.last_complete = log.head; + } + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(3U, log.log.size()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_EQ(log.head, info.last_update); + EXPECT_EQ(log.head, info.last_complete); + EXPECT_FALSE(is_dirty()); + EXPECT_FALSE(dirty_info); + EXPECT_FALSE(dirty_big_info); + + TestHandler h(remove_snap); + rewind_divergent_log(newhead, info, &h, + dirty_info, dirty_big_info); + + EXPECT_TRUE(log.objects.count(divergent)); + EXPECT_TRUE(missing.is_missing(divergent_object)); + EXPECT_EQ(1U, log.objects.count(divergent_object)); + EXPECT_EQ(2U, log.log.size()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_EQ(newhead, info.last_update); + EXPECT_EQ(newhead, info.last_complete); + EXPECT_TRUE(is_dirty()); + EXPECT_TRUE(dirty_info); + EXPECT_TRUE(dirty_big_info); + } + + /* +----------------+ + | log | + +--------+-------+ + | |object | + |version | hash | + | | | + tail > (1,1) | NULL | + | | | + | (1,4) | NULL < newhead + | | | + head > (1,5) | x9 | + | | | + +--------+-------+ + + */ + { + clear(); + + pg_info_t info; + list<hobject_t> remove_snap; + bool dirty_info = false; + bool dirty_big_info = false; + + hobject_t divergent_object; + eversion_t divergent_version; + eversion_t prior_version; + eversion_t newhead; + { + pg_log_entry_t e; + e.mark_unrollbackable(); + + info.log_tail = log.tail = eversion_t(1, 1); + newhead = eversion_t(1, 3); + e.version = divergent_version = eversion_t(1, 5); + e.soid.set_hash(0x9); + divergent_object = e.soid; + e.op = pg_log_entry_t::DELETE; + e.prior_version = prior_version = eversion_t(0, 2); + log.log.push_back(e); + log.head = e.version; + } + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(1U, log.log.size()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_FALSE(is_dirty()); + EXPECT_FALSE(dirty_info); + EXPECT_FALSE(dirty_big_info); + + TestHandler h(remove_snap); + rewind_divergent_log(newhead, info, &h, + dirty_info, dirty_big_info); + + EXPECT_TRUE(missing.is_missing(divergent_object)); + EXPECT_EQ(0U, log.objects.count(divergent_object)); + EXPECT_TRUE(log.empty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(is_dirty()); + EXPECT_TRUE(dirty_info); + EXPECT_TRUE(dirty_big_info); + } + + // Test for 13965 + { + clear(); + + list<hobject_t> remove_snap; + pg_info_t info; + info.log_tail = log.tail = eversion_t(1, 5); + info.last_update = eversion_t(1, 6); + bool dirty_info = false; + bool dirty_big_info = false; + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + e.version = eversion_t(1, 5); + e.soid.set_hash(0x9); + add(e); + } + { + pg_log_entry_t e; + e.mark_unrollbackable(); + e.version = eversion_t(1, 6); + e.soid.set_hash(0x10); + add(e); + } + TestHandler h(remove_snap); + roll_forward_to(eversion_t(1, 6), &h); + rewind_divergent_log(eversion_t(1, 5), info, &h, + dirty_info, dirty_big_info); + pg_log_t log; + reset_backfill_claim_log(log, &h); + } +} + +TEST_F(PGLogTest, merge_old_entry) { + // entries > last_backfill are silently ignored + { + clear(); + + ObjectStore::Transaction t; + pg_log_entry_t oe; + oe.mark_unrollbackable(); + pg_info_t info; + list<hobject_t> remove_snap; + + info.last_backfill = hobject_t(); + info.last_backfill.set_hash(100); + oe.soid.set_hash(2); + ASSERT_GT(oe.soid, info.last_backfill); + + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_FALSE(missing.have_missing()); + EXPECT_TRUE(log.empty()); + + TestHandler h(remove_snap); + merge_old_entry(t, oe, info, &h); + + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_FALSE(missing.have_missing()); + EXPECT_TRUE(log.empty()); + } + + // the new entry (from the logs) has a version that is higher than + // the old entry (from the log entry given in argument) : do + // nothing and return false + { + clear(); + + ObjectStore::Transaction t; + pg_info_t info; + list<hobject_t> remove_snap; + + pg_log_entry_t ne; + ne.mark_unrollbackable(); + ne.version = eversion_t(2,1); + log.add(ne); + + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(1U, log.log.size()); + EXPECT_EQ(ne.version, log.log.front().version); + + // the newer entry ( from the logs ) can be DELETE + { + log.log.front().op = pg_log_entry_t::DELETE; + pg_log_entry_t oe; + oe.mark_unrollbackable(); + oe.version = eversion_t(1,1); + + TestHandler h(remove_snap); + merge_old_entry(t, oe, info, &h); + } + + // if the newer entry is not DELETE, the object must be in missing + { + pg_log_entry_t &ne = log.log.front(); + ne.op = pg_log_entry_t::MODIFY; + missing.add_next_event(ne); + pg_log_entry_t oe; + oe.mark_unrollbackable(); + oe.version = eversion_t(1,1); + + TestHandler h(remove_snap); + merge_old_entry(t, oe, info, &h); + + missing.rm(ne.soid, ne.version); + } + + missing.flush(); + EXPECT_FALSE(is_dirty()); + EXPECT_FALSE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(1U, log.log.size()); + EXPECT_EQ(ne.version, log.log.front().version); + + } + + // the new entry (from the logs) has a version that is lower than + // the old entry (from the log entry given in argument) and + // old and new are delete : do nothing and return false + { + clear(); + + ObjectStore::Transaction t; + pg_log_entry_t oe; + oe.mark_unrollbackable(); + pg_info_t info; + list<hobject_t> remove_snap; + + pg_log_entry_t ne; + ne.mark_unrollbackable(); + ne.version = eversion_t(1,1); + ne.op = pg_log_entry_t::DELETE; + log.add(ne); + + oe.version = eversion_t(2,1); + oe.op = pg_log_entry_t::DELETE; + + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(1U, log.log.size()); + + TestHandler h(remove_snap); + merge_old_entry(t, oe, info, &h); + + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(1U, log.log.size()); + } + + // the new entry (from the logs) has a version that is lower than + // the old entry (from the log entry given in argument) and + // old is update and new is DELETE : + // if the object is in missing, it is removed + { + clear(); + + ObjectStore::Transaction t; + pg_log_entry_t oe; + oe.mark_unrollbackable(); + pg_info_t info; + list<hobject_t> remove_snap; + + pg_log_entry_t ne; + ne.mark_unrollbackable(); + ne.version = eversion_t(1,1); + ne.op = pg_log_entry_t::DELETE; + log.add(ne); + + oe.version = eversion_t(2,1); + oe.op = pg_log_entry_t::MODIFY; + missing.add_next_event(oe); + + missing.flush(); + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_TRUE(missing.is_missing(oe.soid)); + EXPECT_EQ(1U, log.log.size()); + + TestHandler h(remove_snap); + merge_old_entry(t, oe, info, &h); + + missing.flush(); + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.size() > 0); + EXPECT_TRUE(t.empty()); + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(1U, log.log.size()); + } + + // there is no new entry (from the logs) and + // the old entry (from the log entry given in argument) is not a CLONE and + // the old entry prior_version is greater than the tail of the log : + // do nothing and return false + { + clear(); + + ObjectStore::Transaction t; + pg_log_entry_t oe; + oe.mark_unrollbackable(); + pg_info_t info; + list<hobject_t> remove_snap; + + info.log_tail = eversion_t(1,1); + oe.op = pg_log_entry_t::MODIFY; + oe.prior_version = eversion_t(2,1); + missing_add(oe.soid, oe.prior_version, eversion_t()); + + missing.flush(); + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_TRUE(log.empty()); + + TestHandler h(remove_snap); + merge_old_entry(t, oe, info, &h); + + missing.flush(); + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_TRUE(log.empty()); + } + + // there is no new entry (from the logs) and + // the old entry (from the log entry given in argument) is not a CLONE and + // the old entry (from the log entry given in argument) is not a DELETE and + // the old entry prior_version is lower than the tail of the log : + // add the old object to the remove_snap list and + // add the old object to divergent priors and + // add or update the prior_version of the object to missing and + // return false + { + clear(); + + ObjectStore::Transaction t; + pg_log_entry_t oe; + oe.mark_unrollbackable(); + pg_info_t info; + list<hobject_t> remove_snap; + + info.log_tail = eversion_t(2,1); + oe.soid.set_hash(1); + oe.op = pg_log_entry_t::MODIFY; + oe.prior_version = eversion_t(1,1); + + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_FALSE(missing.have_missing()); + EXPECT_TRUE(log.empty()); + + TestHandler h(remove_snap); + merge_old_entry(t, oe, info, &h); + + EXPECT_TRUE(is_dirty()); + EXPECT_EQ(oe.soid, remove_snap.front()); + EXPECT_TRUE(t.empty()); + EXPECT_TRUE(missing.is_missing(oe.soid)); + EXPECT_TRUE(log.empty()); + } + + // there is no new entry (from the logs) and + // the old entry (from the log entry given in argument) is not a CLONE and + // the old entry (from the log entry given in argument) is a DELETE and + // the old entry prior_version is lower than the tail of the log : + // add the old object to divergent priors and + // add or update the prior_version of the object to missing and + // return false + { + clear(); + + ObjectStore::Transaction t; + pg_log_entry_t oe; + oe.mark_unrollbackable(); + pg_info_t info; + list<hobject_t> remove_snap; + + info.log_tail = eversion_t(2,1); + oe.soid.set_hash(1); + oe.op = pg_log_entry_t::DELETE; + oe.prior_version = eversion_t(1,1); + + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_FALSE(missing.have_missing()); + EXPECT_TRUE(log.empty()); + + TestHandler h(remove_snap); + merge_old_entry(t, oe, info, &h); + + EXPECT_TRUE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_TRUE(missing.is_missing(oe.soid)); + EXPECT_TRUE(log.empty()); + } + + + // there is no new entry (from the logs) and + // the old entry (from the log entry given in argument) is not a CLONE and + // the old entry (from the log entry given in argument) is not a DELETE and + // the old entry prior_version is eversion_t() : + // add the old object to the remove_snap list and + // remove the prior_version of the object from missing, if any and + // return false + { + clear(); + + ObjectStore::Transaction t; + pg_log_entry_t oe; + oe.mark_unrollbackable(); + pg_info_t info; + list<hobject_t> remove_snap; + + info.log_tail = eversion_t(10,1); + oe.soid.set_hash(1); + oe.op = pg_log_entry_t::MODIFY; + oe.prior_version = eversion_t(); + + missing.add(oe.soid, eversion_t(1,1), eversion_t(), false); + + missing.flush(); + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_TRUE(missing.is_missing(oe.soid)); + EXPECT_TRUE(log.empty()); + + TestHandler h(remove_snap); + merge_old_entry(t, oe, info, &h); + + missing.flush(); + EXPECT_FALSE(is_dirty()); + EXPECT_EQ(oe.soid, remove_snap.front()); + EXPECT_TRUE(t.empty()); + EXPECT_FALSE(missing.have_missing()); + EXPECT_TRUE(log.empty()); + } + +} + +TEST_F(PGLogTest, merge_log) { + // head and tail match, last_backfill is set: + // noop + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_shard_t fromosd; + pg_info_t info; + list<hobject_t> remove_snap; + bool dirty_info = false; + bool dirty_big_info = false; + + hobject_t last_backfill(object_t("oname"), string("key"), 1, 234, 1, ""); + info.last_backfill = last_backfill; + eversion_t stat_version(10, 1); + info.stats.version = stat_version; + log.tail = olog.tail = eversion_t(1, 1); + log.head = olog.head = eversion_t(2, 1); + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(0U, log.log.size()); + EXPECT_EQ(stat_version, info.stats.version); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_EQ(last_backfill, info.last_backfill); + EXPECT_TRUE(info.purged_snaps.empty()); + EXPECT_FALSE(is_dirty()); + EXPECT_FALSE(dirty_info); + EXPECT_FALSE(dirty_big_info); + + TestHandler h(remove_snap); + merge_log(oinfo, std::move(olog), fromosd, info, &h, + dirty_info, dirty_big_info); + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(0U, log.log.size()); + EXPECT_EQ(stat_version, info.stats.version); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(info.purged_snaps.empty()); + EXPECT_FALSE(is_dirty()); + EXPECT_FALSE(dirty_info); + EXPECT_FALSE(dirty_big_info); + } + + // head and tail match, last_backfill is not set: info.stats is + // copied from oinfo.stats but info.stats.reported_* is guaranteed to + // never be replaced by a lower version + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_shard_t fromosd; + pg_info_t info; + list<hobject_t> remove_snap; + bool dirty_info = false; + bool dirty_big_info = false; + + eversion_t stat_version(10, 1); + oinfo.stats.version = stat_version; + info.stats.reported_seq = 1; + info.stats.reported_epoch = 10; + oinfo.stats.reported_seq = 1; + oinfo.stats.reported_epoch = 1; + log.tail = olog.tail = eversion_t(1, 1); + log.head = olog.head = eversion_t(2, 1); + missing.may_include_deletes = false; + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(0U, log.log.size()); + EXPECT_EQ(eversion_t(), info.stats.version); + EXPECT_EQ(1ull, info.stats.reported_seq); + EXPECT_EQ(10u, info.stats.reported_epoch); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(info.last_backfill.is_max()); + EXPECT_TRUE(info.purged_snaps.empty()); + EXPECT_FALSE(is_dirty()); + EXPECT_FALSE(dirty_info); + EXPECT_FALSE(dirty_big_info); + + TestHandler h(remove_snap); + merge_log(oinfo, std::move(olog), fromosd, info, &h, + dirty_info, dirty_big_info); + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(0U, log.log.size()); + EXPECT_EQ(stat_version, info.stats.version); + EXPECT_EQ(1ull, info.stats.reported_seq); + EXPECT_EQ(10u, info.stats.reported_epoch); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(info.purged_snaps.empty()); + EXPECT_FALSE(is_dirty()); + EXPECT_FALSE(dirty_info); + EXPECT_FALSE(dirty_big_info); + } + + /* Before + +--------------------------+ + | log olog | + +--------+-------+---------+ + | |object | | + |version | hash | version | + | | | | + | | x5 | (1,1) < tail + | | | | + | | | | + tail > (1,4) | x7 | | + | | | | + | | | | + head > (1,5) | x9 | (1,5) < head + | | | | + | | | | + +--------+-------+---------+ + + After + +----------------- + | log | + +--------+-------+ + | |object | + |version | hash | + | | | + tail > (1,1) | x5 | + | | | + | | | + | (1,4) | x7 | + | | | + | | | + head > (1,5) | x9 | + | | | + | | | + +--------+-------+ + */ + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_shard_t fromosd; + pg_info_t info; + list<hobject_t> remove_snap; + bool dirty_info = false; + bool dirty_big_info = false; + missing.may_include_deletes = false; + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + + e.version = eversion_t(1, 4); + e.soid.set_hash(0x5); + log.tail = e.version; + log.log.push_back(e); + e.version = eversion_t(1, 5); + e.soid.set_hash(0x9); + log.log.push_back(e); + log.head = e.version; + log.index(); + + info.last_update = log.head; + + e.version = eversion_t(1, 1); + e.soid.set_hash(0x5); + olog.tail = e.version; + olog.log.push_back(e); + e.version = eversion_t(1, 5); + e.soid.set_hash(0x9); + olog.log.push_back(e); + olog.head = e.version; + } + + hobject_t last_backfill(object_t("oname"), string("key"), 1, 234, 1, ""); + info.last_backfill = last_backfill; + eversion_t stat_version(10, 1); + info.stats.version = stat_version; + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(2U, log.log.size()); + EXPECT_EQ(stat_version, info.stats.version); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_EQ(last_backfill, info.last_backfill); + EXPECT_TRUE(info.purged_snaps.empty()); + EXPECT_FALSE(is_dirty()); + EXPECT_FALSE(dirty_info); + EXPECT_FALSE(dirty_big_info); + + TestHandler h(remove_snap); + merge_log(oinfo, std::move(olog), fromosd, info, &h, + dirty_info, dirty_big_info); + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(3U, log.log.size()); + EXPECT_EQ(stat_version, info.stats.version); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(info.purged_snaps.empty()); + EXPECT_TRUE(is_dirty()); + EXPECT_TRUE(dirty_info); + EXPECT_TRUE(dirty_big_info); + } + + /* +--------------------------+ + | log olog | + +--------+-------+---------+ + | |object | | + |version | hash | version | + | | | | + tail > (1,1) | x5 | (1,1) < tail + | | | | + | | | | + | (1,2) | x3 | (1,2) < lower_bound + | | | | + | | | | + head > (1,3) | x9 | | + | DELETE | | | + | | | | + | | x9 | (2,3) | + | | | MODIFY | + | | | | + | | x7 | (2,4) < head + | | | DELETE | + +--------+-------+---------+ + + The log entry (1,3) deletes the object x9 but the olog entry (2,3) modifies + it and is authoritative : the log entry (1,3) is divergent. + + */ + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_shard_t fromosd; + pg_info_t info; + list<hobject_t> remove_snap; + bool dirty_info = false; + bool dirty_big_info = false; + + hobject_t divergent_object; + missing.may_include_deletes = true; + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + + e.version = eversion_t(1, 1); + e.soid.set_hash(0x5); + log.tail = e.version; + log.log.push_back(e); + e.version = eversion_t(1, 2); + e.soid.set_hash(0x3); + log.log.push_back(e); + e.version = eversion_t(1,3); + e.soid.set_hash(0x9); + divergent_object = e.soid; + e.op = pg_log_entry_t::DELETE; + log.log.push_back(e); + log.head = e.version; + log.index(); + + info.last_update = log.head; + + e.version = eversion_t(1, 1); + e.soid.set_hash(0x5); + olog.tail = e.version; + olog.log.push_back(e); + e.version = eversion_t(1, 2); + e.soid.set_hash(0x3); + olog.log.push_back(e); + e.version = eversion_t(2, 3); + e.soid.set_hash(0x9); + e.op = pg_log_entry_t::MODIFY; + olog.log.push_back(e); + e.version = eversion_t(2, 4); + e.soid.set_hash(0x7); + e.op = pg_log_entry_t::DELETE; + olog.log.push_back(e); + olog.head = e.version; + } + + snapid_t purged_snap(1); + { + oinfo.last_update = olog.head; + oinfo.purged_snaps.insert(purged_snap); + } + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(1U, log.objects.count(divergent_object)); + EXPECT_EQ(3U, log.log.size()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_EQ(log.head, info.last_update); + EXPECT_TRUE(info.purged_snaps.empty()); + EXPECT_FALSE(is_dirty()); + EXPECT_FALSE(dirty_info); + EXPECT_FALSE(dirty_big_info); + + TestHandler h(remove_snap); + merge_log(oinfo, std::move(olog), fromosd, info, &h, + dirty_info, dirty_big_info); + + /* When the divergent entry is a DELETE and the authoritative + entry is a MODIFY, the object will be added to missing : it is + a verifiable side effect proving the entry was identified + to be divergent. + */ + EXPECT_TRUE(missing.is_missing(divergent_object)); + EXPECT_EQ(1U, log.objects.count(divergent_object)); + EXPECT_EQ(4U, log.log.size()); + /* DELETE entries from olog that are appended to the hed of the + log, and the divergent version of the object is removed (added + to remove_snap) + */ + EXPECT_EQ(0x9U, remove_snap.front().get_hash()); + EXPECT_EQ(log.head, info.last_update); + EXPECT_TRUE(info.purged_snaps.contains(purged_snap)); + EXPECT_TRUE(is_dirty()); + EXPECT_TRUE(dirty_info); + EXPECT_TRUE(dirty_big_info); + } + + /* +--------------------------+ + | log olog | + +--------+-------+---------+ + | |object | | + |version | hash | version | + | | | | + tail > (1,1) | x5 | (1,1) < tail + | | | | + | | | | + | (1,2) | x3 | (1,2) < lower_bound + | | | | + | | | | + head > (1,3) | x9 | | + | DELETE | | | + | | | | + | | x9 | (2,3) | + | | | MODIFY | + | | | | + | | x7 | (2,4) < head + | | | DELETE | + +--------+-------+---------+ + + The log entry (1,3) deletes the object x9 but the olog entry (2,3) modifies + it and is authoritative : the log entry (1,3) is divergent. + + */ + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_shard_t fromosd; + pg_info_t info; + list<hobject_t> remove_snap; + bool dirty_info = false; + bool dirty_big_info = false; + + hobject_t divergent_object; + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + + e.version = eversion_t(1, 1); + e.soid.set_hash(0x5); + log.tail = e.version; + log.log.push_back(e); + e.version = eversion_t(1, 2); + e.soid.set_hash(0x3); + log.log.push_back(e); + e.version = eversion_t(1,3); + e.soid.set_hash(0x9); + divergent_object = e.soid; + e.op = pg_log_entry_t::DELETE; + log.log.push_back(e); + log.head = e.version; + log.index(); + + info.last_update = log.head; + + e.version = eversion_t(1, 1); + e.soid.set_hash(0x5); + olog.tail = e.version; + olog.log.push_back(e); + e.version = eversion_t(1, 2); + e.soid.set_hash(0x3); + olog.log.push_back(e); + e.version = eversion_t(2, 3); + e.soid.set_hash(0x9); + e.op = pg_log_entry_t::MODIFY; + olog.log.push_back(e); + e.version = eversion_t(2, 4); + e.soid.set_hash(0x7); + e.op = pg_log_entry_t::DELETE; + olog.log.push_back(e); + olog.head = e.version; + } + + snapid_t purged_snap(1); + { + oinfo.last_update = olog.head; + oinfo.purged_snaps.insert(purged_snap); + } + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(1U, log.objects.count(divergent_object)); + EXPECT_EQ(3U, log.log.size()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_EQ(log.head, info.last_update); + EXPECT_TRUE(info.purged_snaps.empty()); + EXPECT_FALSE(is_dirty()); + EXPECT_FALSE(dirty_info); + EXPECT_FALSE(dirty_big_info); + + TestHandler h(remove_snap); + missing.may_include_deletes = false; + merge_log(oinfo, std::move(olog), fromosd, info, &h, + dirty_info, dirty_big_info); + + /* When the divergent entry is a DELETE and the authoritative + entry is a MODIFY, the object will be added to missing : it is + a verifiable side effect proving the entry was identified + to be divergent. + */ + EXPECT_TRUE(missing.is_missing(divergent_object)); + EXPECT_EQ(1U, log.objects.count(divergent_object)); + EXPECT_EQ(4U, log.log.size()); + /* DELETE entries from olog that are appended to the hed of the + log, and the divergent version of the object is removed (added + to remove_snap). When peering handles deletes, it is the earlier + version that is in the removed list. + */ + EXPECT_EQ(0x7U, remove_snap.front().get_hash()); + EXPECT_EQ(log.head, info.last_update); + EXPECT_TRUE(info.purged_snaps.contains(purged_snap)); + EXPECT_TRUE(is_dirty()); + EXPECT_TRUE(dirty_info); + EXPECT_TRUE(dirty_big_info); + } + + /* +--------------------------+ + | log olog | + +--------+-------+---------+ + | |object | | + |version | hash | version | + | | | | + tail > (1,1) | x5 | (1,1) < tail + | | | | + | | | | + | (1,4) | x7 | (1,4) < head + | | | | + | | | | + head > (1,5) | x9 | | + | | | | + | | | | + +--------+-------+---------+ + + The head of the log entry (1,5) is divergent because it is greater than the + head of olog. + + */ + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_shard_t fromosd; + pg_info_t info; + list<hobject_t> remove_snap; + bool dirty_info = false; + bool dirty_big_info = false; + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + + e.version = eversion_t(1, 1); + e.soid.set_hash(0x5); + log.tail = e.version; + log.log.push_back(e); + e.version = eversion_t(1, 4); + e.soid.set_hash(0x7); + log.log.push_back(e); + e.version = eversion_t(1, 5); + e.soid.set_hash(0x9); + log.log.push_back(e); + log.head = e.version; + log.index(); + + info.last_update = log.head; + + e.version = eversion_t(1, 1); + e.soid.set_hash(0x5); + olog.tail = e.version; + olog.log.push_back(e); + e.version = eversion_t(1, 4); + e.soid.set_hash(0x7); + olog.log.push_back(e); + olog.head = e.version; + } + + hobject_t last_backfill(object_t("oname"), string("key"), 1, 234, 1, ""); + info.last_backfill = last_backfill; + eversion_t stat_version(10, 1); + info.stats.version = stat_version; + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(3U, log.log.size()); + EXPECT_EQ(stat_version, info.stats.version); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_EQ(last_backfill, info.last_backfill); + EXPECT_TRUE(info.purged_snaps.empty()); + EXPECT_FALSE(is_dirty()); + EXPECT_FALSE(dirty_info); + EXPECT_FALSE(dirty_big_info); + + TestHandler h(remove_snap); + missing.may_include_deletes = false; + merge_log(oinfo, std::move(olog), fromosd, info, &h, + dirty_info, dirty_big_info); + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(2U, log.log.size()); + EXPECT_EQ(stat_version, info.stats.version); + EXPECT_EQ(0x9U, remove_snap.front().get_hash()); + EXPECT_TRUE(info.purged_snaps.empty()); + EXPECT_TRUE(is_dirty()); + EXPECT_TRUE(dirty_info); + EXPECT_TRUE(dirty_big_info); + } + +} + +TEST_F(PGLogTest, proc_replica_log) { + // empty log : no side effect + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_missing_t omissing; + pg_shard_t from; + + eversion_t last_update(1, 1); + log.head = olog.head = oinfo.last_update = last_update; + eversion_t last_complete(1, 1); + oinfo.last_complete = last_complete; + + EXPECT_FALSE(omissing.have_missing()); + EXPECT_EQ(last_update, oinfo.last_update); + EXPECT_EQ(last_complete, oinfo.last_complete); + + missing.may_include_deletes = false; + proc_replica_log(oinfo, olog, omissing, from); + + EXPECT_FALSE(omissing.have_missing()); + EXPECT_EQ(last_update, oinfo.last_update); + EXPECT_EQ(last_update, oinfo.last_complete); + } + + /* +--------------------------+ + | log olog | + +--------+-------+---------+ + | |object | | + |version | hash | version | + | | | | + | | x3 | (1,1) < tail + | | | | + | | | | + tail > (1,2) | x5 | | + | | | | + | | | | + head > (1,3) | x9 | | + | DELETE | | | + | | | | + | | x9 | (2,3) < head + | | | DELETE | + | | | | + +--------+-------+---------+ + + The log entry (1,3) deletes the object x9 and the olog entry + (2,3) also deletes it : do nothing. The olog tail is ignored + because it is before the log tail. + + */ + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_missing_t omissing; + pg_shard_t from; + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + + e.version = eversion_t(1, 2); + e.soid.set_hash(0x5); + log.tail = e.version; + log.log.push_back(e); + e.version = eversion_t(1, 3); + e.soid.set_hash(0x9); + e.op = pg_log_entry_t::DELETE; + log.log.push_back(e); + log.head = e.version; + log.index(); + + e.version = eversion_t(1, 1); + e.soid.set_hash(0x3); + olog.tail = e.version; + olog.log.push_back(e); + e.version = eversion_t(2, 3); + e.soid.set_hash(0x9); + e.op = pg_log_entry_t::DELETE; + olog.log.push_back(e); + olog.head = e.version; + + oinfo.last_update = olog.head; + oinfo.last_complete = olog.head; + } + + EXPECT_FALSE(omissing.have_missing()); + EXPECT_EQ(olog.head, oinfo.last_update); + EXPECT_EQ(olog.head, oinfo.last_complete); + + missing.may_include_deletes = false; + proc_replica_log(oinfo, olog, omissing, from); + + EXPECT_FALSE(omissing.have_missing()); + } + + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_missing_t omissing; + pg_shard_t from; + + hobject_t divergent_object; + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + + { + e.soid = divergent_object; + e.soid.set_hash(0x1); + e.version = eversion_t(1, 1); + log.tail = e.version; + log.log.push_back(e); + + e.soid = divergent_object; + e.prior_version = eversion_t(1, 1); + e.version = eversion_t(1, 2); + log.tail = e.version; + log.log.push_back(e); + + e.soid.set_hash(0x3); + e.version = eversion_t(1, 4); + log.log.push_back(e); + + e.soid.set_hash(0x7); + e.version = eversion_t(1, 5); + log.log.push_back(e); + + e.soid.set_hash(0x8); + e.version = eversion_t(1, 6); + log.log.push_back(e); + + e.soid.set_hash(0x9); + e.op = pg_log_entry_t::DELETE; + e.version = eversion_t(2, 7); + log.log.push_back(e); + + e.soid.set_hash(0xa); + e.version = eversion_t(2, 8); + log.head = e.version; + log.log.push_back(e); + } + log.index(); + + { + e.soid = divergent_object; + e.soid.set_hash(0x1); + e.version = eversion_t(1, 1); + olog.tail = e.version; + olog.log.push_back(e); + + e.soid = divergent_object; + e.prior_version = eversion_t(1, 1); + e.version = eversion_t(1, 2); + olog.log.push_back(e); + + e.prior_version = eversion_t(0, 0); + e.soid.set_hash(0x3); + e.version = eversion_t(1, 4); + olog.log.push_back(e); + + e.soid.set_hash(0x7); + e.version = eversion_t(1, 5); + olog.log.push_back(e); + + e.soid.set_hash(0x8); + e.version = eversion_t(1, 6); + olog.log.push_back(e); + + e.soid.set_hash(0x9); // should not be added to missing, create + e.op = pg_log_entry_t::MODIFY; + e.version = eversion_t(1, 7); + olog.log.push_back(e); + + e.soid = divergent_object; // should be added to missing at 1,2 + e.op = pg_log_entry_t::MODIFY; + e.version = eversion_t(1, 8); + e.prior_version = eversion_t(1, 2); + olog.log.push_back(e); + olog.head = e.version; + } + oinfo.last_update = olog.head; + oinfo.last_complete = olog.head; + } + + EXPECT_FALSE(omissing.have_missing()); + EXPECT_EQ(olog.head, oinfo.last_update); + EXPECT_EQ(olog.head, oinfo.last_complete); + + missing.may_include_deletes = false; + proc_replica_log(oinfo, olog, omissing, from); + + EXPECT_TRUE(omissing.have_missing()); + EXPECT_TRUE(omissing.is_missing(divergent_object)); + EXPECT_EQ(eversion_t(1, 2), omissing.get_items().at(divergent_object).need); + EXPECT_EQ(eversion_t(1, 6), oinfo.last_update); + EXPECT_EQ(eversion_t(1, 1), oinfo.last_complete); + } + + /* +--------------------------+ + | olog log | + +--------+-------+---------+ + | |object | | + |version | hash | version | + | | | | + tail > (1,1) | x9 | (1,1) < tail + | | | | + | | | | + | (1,2) | x3 | (1,2) | + | | | | + | | | | + head > (1,3) | x9 | | + | DELETE | | | + | | | | + | | x9 | (2,3) < head + | | | DELETE | + | | | | + +--------+-------+---------+ + + The log entry (1,3) deletes the object x9 and the olog entry + (2,3) also deletes it : do nothing. + + */ + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_missing_t omissing; + pg_shard_t from; + + eversion_t last_update(1, 2); + hobject_t divergent_object; + divergent_object.set_hash(0x9); + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + + e.version = eversion_t(1, 1); + e.soid = divergent_object; + log.tail = e.version; + log.log.push_back(e); + e.version = last_update; + e.soid.set_hash(0x3); + log.log.push_back(e); + e.version = eversion_t(2, 3); + e.prior_version = eversion_t(1, 1); + e.soid = divergent_object; + e.op = pg_log_entry_t::DELETE; + log.log.push_back(e); + log.head = e.version; + log.index(); + + e.version = eversion_t(1, 1); + e.soid = divergent_object; + olog.tail = e.version; + olog.log.push_back(e); + e.version = last_update; + e.soid.set_hash(0x3); + olog.log.push_back(e); + e.version = eversion_t(1, 3); + e.prior_version = eversion_t(1, 1); + e.soid = divergent_object; + e.op = pg_log_entry_t::DELETE; + olog.log.push_back(e); + olog.head = e.version; + + oinfo.last_update = olog.head; + oinfo.last_complete = olog.head; + } + + EXPECT_FALSE(omissing.have_missing()); + EXPECT_EQ(olog.head, oinfo.last_update); + EXPECT_EQ(olog.head, oinfo.last_complete); + + missing.may_include_deletes = false; + proc_replica_log(oinfo, olog, omissing, from); + + EXPECT_TRUE(omissing.have_missing()); + EXPECT_TRUE(omissing.is_missing(divergent_object)); + EXPECT_EQ(omissing.get_items().at(divergent_object).have, eversion_t(0, 0)); + EXPECT_EQ(omissing.get_items().at(divergent_object).need, eversion_t(1, 1)); + EXPECT_EQ(last_update, oinfo.last_update); + } + + /* +--------------------------+ + | olog log | + +--------+-------+---------+ + | |object | | + |version | hash | version | + | | | | + tail > (1,1) | x9 | (1,1) < tail + | | | | + | | | | + | (1,2) | x3 | (1,2) | + | | | | + | | | | + head > (1,3) | x9 | | + | MODIFY | | | + | | | | + | | x9 | (2,3) < head + | | | DELETE | + | | | | + +--------+-------+---------+ + + The log entry (2,3) deletes the object x9 but the olog entry + (1,3) modifies it : remove it from omissing. + + */ + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_missing_t omissing; + pg_shard_t from; + + eversion_t last_update(1, 2); + hobject_t divergent_object; + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + + e.version = eversion_t(1, 1); + e.soid = divergent_object; + log.tail = e.version; + log.log.push_back(e); + e.version = last_update; + e.soid.set_hash(0x3); + log.log.push_back(e); + e.version = eversion_t(2, 3); + e.prior_version = eversion_t(1, 1); + e.soid = divergent_object; + e.op = pg_log_entry_t::DELETE; + log.log.push_back(e); + log.head = e.version; + log.index(); + + e.version = eversion_t(1, 1); + e.soid = divergent_object; + olog.tail = e.version; + olog.log.push_back(e); + e.version = last_update; + e.soid.set_hash(0x3); + olog.log.push_back(e); + e.version = eversion_t(1, 3); + e.prior_version = eversion_t(1, 1); + e.soid = divergent_object; + divergent_object = e.soid; + omissing.add(divergent_object, e.version, eversion_t(), false); + e.op = pg_log_entry_t::MODIFY; + olog.log.push_back(e); + olog.head = e.version; + + oinfo.last_update = olog.head; + oinfo.last_complete = olog.head; + } + + EXPECT_TRUE(omissing.have_missing()); + EXPECT_TRUE(omissing.is_missing(divergent_object)); + EXPECT_EQ(eversion_t(1, 3), omissing.get_items().at(divergent_object).need); + EXPECT_EQ(olog.head, oinfo.last_update); + EXPECT_EQ(olog.head, oinfo.last_complete); + + missing.may_include_deletes = false; + proc_replica_log(oinfo, olog, omissing, from); + + EXPECT_TRUE(omissing.have_missing()); + EXPECT_TRUE(omissing.is_missing(divergent_object)); + EXPECT_EQ(omissing.get_items().at(divergent_object).have, eversion_t(0, 0)); + EXPECT_EQ(omissing.get_items().at(divergent_object).need, eversion_t(1, 1)); + EXPECT_EQ(last_update, oinfo.last_update); + } + + /* +--------------------------+ + | log olog | + +--------+-------+---------+ + | |object | | + |version | hash | version | + | | | | + tail > (1,1) | x9 | (1,1) < tail + | | | | + | | | | + | (1,2) | x3 | (1,2) | + | | | | + | | | | + | | x9 | (1,3) < head + | | | MODIFY | + | | | | + head > (2,3) | x9 | | + | DELETE | | | + | | | | + +--------+-------+---------+ + + The log entry (2,3) deletes the object x9 but the olog entry + (1,3) modifies it : proc_replica_log should adjust missing to + 1,1 for that object until add_next_event in PG::activate processes + the delete. + */ + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_missing_t omissing; + pg_shard_t from; + + eversion_t last_update(1, 2); + hobject_t divergent_object; + eversion_t new_version(2, 3); + eversion_t divergent_version(1, 3); + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + + e.version = eversion_t(1, 1); + e.soid.set_hash(0x9); + log.tail = e.version; + log.log.push_back(e); + e.version = last_update; + e.soid.set_hash(0x3); + log.log.push_back(e); + e.version = new_version; + e.prior_version = eversion_t(1, 1); + e.soid.set_hash(0x9); + e.op = pg_log_entry_t::DELETE; + log.log.push_back(e); + log.head = e.version; + log.index(); + + e.op = pg_log_entry_t::MODIFY; + e.version = eversion_t(1, 1); + e.soid.set_hash(0x9); + olog.tail = e.version; + olog.log.push_back(e); + e.version = last_update; + e.soid.set_hash(0x3); + olog.log.push_back(e); + e.version = divergent_version; + e.prior_version = eversion_t(1, 1); + e.soid.set_hash(0x9); + divergent_object = e.soid; + omissing.add(divergent_object, e.version, eversion_t(), false); + e.op = pg_log_entry_t::MODIFY; + olog.log.push_back(e); + olog.head = e.version; + + oinfo.last_update = olog.head; + oinfo.last_complete = olog.head; + } + + EXPECT_TRUE(omissing.have_missing()); + EXPECT_TRUE(omissing.is_missing(divergent_object)); + EXPECT_EQ(divergent_version, omissing.get_items().at(divergent_object).need); + EXPECT_EQ(olog.head, oinfo.last_update); + EXPECT_EQ(olog.head, oinfo.last_complete); + + missing.may_include_deletes = false; + proc_replica_log(oinfo, olog, omissing, from); + + EXPECT_TRUE(omissing.have_missing()); + EXPECT_TRUE(omissing.get_items().begin()->second.need == eversion_t(1, 1)); + EXPECT_EQ(last_update, oinfo.last_update); + EXPECT_EQ(eversion_t(0, 0), oinfo.last_complete); + } + +} + +TEST_F(PGLogTest, merge_log_1) { + TestCase t; + t.base.push_back(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80))); + + t.div.push_back(mk_ple_mod(mk_obj(1), mk_evt(10, 101), mk_evt(10, 100))); + + t.final.add(mk_obj(1), mk_evt(10, 100), mk_evt(0, 0), false); + + t.toremove.insert(mk_obj(1)); + + t.setup(); + run_test_case(t); +} + +TEST_F(PGLogTest, merge_log_2) { + TestCase t; + t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80))); + + t.div.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 101), mk_evt(10, 100))); + t.div.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 102), mk_evt(10, 101))); + + t.torollback.insert( + t.torollback.begin(), t.div.rbegin(), t.div.rend()); + + t.setup(); + run_test_case(t); +} + +TEST_F(PGLogTest, merge_log_3) { + TestCase t; + t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80))); + + t.div.push_back(mk_ple_mod(mk_obj(1), mk_evt(10, 101), mk_evt(10, 100))); + t.div.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 102), mk_evt(10, 101))); + + t.final.add(mk_obj(1), mk_evt(10, 100), mk_evt(0, 0), false); + + t.toremove.insert(mk_obj(1)); + + t.setup(); + run_test_case(t); +} + +TEST_F(PGLogTest, merge_log_4) { + TestCase t; + t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80))); + + t.div.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 101), mk_evt(10, 100))); + t.div.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 102), mk_evt(10, 101))); + + t.init.add(mk_obj(1), mk_evt(10, 102), mk_evt(0, 0), false); + t.final.add(mk_obj(1), mk_evt(10, 100), mk_evt(0, 0), false); + + t.setup(); + run_test_case(t); +} + +TEST_F(PGLogTest, merge_log_5) { + TestCase t; + t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80))); + + t.div.push_back(mk_ple_mod(mk_obj(1), mk_evt(10, 101), mk_evt(10, 100))); + t.div.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 102), mk_evt(10, 101))); + + t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(11, 101), mk_evt(10, 100))); + + t.final.add(mk_obj(1), mk_evt(11, 101), mk_evt(0, 0), false); + + t.toremove.insert(mk_obj(1)); + + t.setup(); + run_test_case(t); +} + +TEST_F(PGLogTest, merge_log_6) { + TestCase t; + t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80))); + + t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(11, 101), mk_evt(10, 100))); + + t.final.add(mk_obj(1), mk_evt(11, 101), mk_evt(10, 100), false); + + t.setup(); + run_test_case(t); +} + +TEST_F(PGLogTest, merge_log_7) { + TestCase t; + t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80))); + + t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(11, 101), mk_evt(10, 100))); + + t.init.add(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80), false); + t.final.add(mk_obj(1), mk_evt(11, 101), mk_evt(8, 80), false); + + t.setup(); + run_test_case(t); +} + +TEST_F(PGLogTest, merge_log_8) { + TestCase t; + t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80))); + + t.auth.push_back(mk_ple_dt(mk_obj(1), mk_evt(11, 101), mk_evt(10, 100))); + + t.init.add(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80), false); + t.final.add(mk_obj(1), mk_evt(11, 101), mk_evt(8, 80), true); + + t.setup(); + run_test_case(t); +} + +TEST_F(PGLogTest, merge_log_9) { + TestCase t; + t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80))); + + t.auth.push_back(mk_ple_dt(mk_obj(1), mk_evt(11, 101), mk_evt(10, 100))); + + t.init.add(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80), false); + t.toremove.insert(mk_obj(1)); + t.deletes_during_peering = true; + + t.setup(); + run_test_case(t); +} + +TEST_F(PGLogTest, merge_log_10) { + TestCase t; + t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80))); + + t.auth.push_back(mk_ple_ldt(mk_obj(1), mk_evt(11, 101), mk_evt(10, 100))); + + t.init.add(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80), false); + t.final.add(mk_obj(1), mk_evt(11, 101), mk_evt(8, 80), true); + + t.setup(); + run_test_case(t); +} + +TEST_F(PGLogTest, merge_log_prior_version_have) { + TestCase t; + t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80))); + + t.div.push_back(mk_ple_mod(mk_obj(1), mk_evt(10, 101), mk_evt(10, 100))); + + t.init.add(mk_obj(1), mk_evt(10, 101), mk_evt(10, 100), false); + + t.setup(); + run_test_case(t); +} + +TEST_F(PGLogTest, merge_log_split_missing_entries_at_head) { + TestCase t; + t.auth.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70))); + t.auth.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(15, 150), mk_evt(10, 100))); + + t.div.push_back(mk_ple_mod(mk_obj(1), mk_evt(8, 70), mk_evt(8, 65))); + + t.setup(); + t.set_div_bounds(mk_evt(9, 79), mk_evt(8, 69)); + t.set_auth_bounds(mk_evt(15, 160), mk_evt(9, 77)); + t.final.add(mk_obj(1), mk_evt(15, 150), mk_evt(8, 70), false); + run_test_case(t); +} + +TEST_F(PGLogTest, olog_tail_gt_log_tail_split) { + TestCase t; + t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70))); + t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(15, 150), mk_evt(10, 100))); + t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(15, 155), mk_evt(15, 150))); + + t.setup(); + t.set_div_bounds(mk_evt(15, 153), mk_evt(15, 151)); + t.set_auth_bounds(mk_evt(15, 156), mk_evt(10, 99)); + t.final.add(mk_obj(1), mk_evt(15, 155), mk_evt(15, 150), false); + run_test_case(t); +} + +TEST_F(PGLogTest, olog_tail_gt_log_tail_split2) { + TestCase t; + t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70))); + t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(15, 150), mk_evt(10, 100))); + t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(16, 155), mk_evt(15, 150))); + t.div.push_back(mk_ple_mod(mk_obj(1), mk_evt(15, 153), mk_evt(15, 150))); + + t.setup(); + t.set_div_bounds(mk_evt(15, 153), mk_evt(15, 151)); + t.set_auth_bounds(mk_evt(16, 156), mk_evt(10, 99)); + t.final.add(mk_obj(1), mk_evt(16, 155), mk_evt(0, 0), false); + t.toremove.insert(mk_obj(1)); + run_test_case(t); +} + +TEST_F(PGLogTest, filter_log_1) { + { + clear(); + + int osd_id = 1; + epoch_t epoch = 40; + int64_t pool_id = 1; + int bits = 2; + int max_osd = 4; + int pg_num = max_osd << bits; + int num_objects = 1000; + int num_internal = 10; + + // Set up splitting map + std::unique_ptr<OSDMap> osdmap(new OSDMap); + uuid_d test_uuid; + test_uuid.generate_random(); + osdmap->build_simple_with_pool(g_ceph_context, epoch, test_uuid, max_osd, bits, bits); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + + const string hit_set_namespace("internal"); + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + e.op = pg_log_entry_t::MODIFY; + e.soid.pool = pool_id; + + uuid_d uuid_name; + int i; + for (i = 1; i <= num_objects; ++i) { + e.version = eversion_t(epoch, i); + // Use this to generate random file names + uuid_name.generate_random(); + ostringstream name; + name << uuid_name; + e.soid.oid.name = name.str(); + // First has no namespace + if (i != 1) { + // num_internal have the internal namspace + if (i <= num_internal + 1) { + e.soid.nspace = hit_set_namespace; + } else { // rest have different namespaces + ostringstream ns; + ns << "ns" << i; + e.soid.nspace = ns.str(); + } + } + log.log.push_back(e); + if (i == 1) + log.tail = e.version; + } + log.head = e.version; + log.index(); + } + + spg_t pgid(pg_t(2, pool_id), shard_id_t::NO_SHARD); + + // See if we created the right number of entries + int total = log.log.size(); + ASSERT_EQ(total, num_objects); + + // Some should be removed + { + pg_log_t filtered, reject; + pg_log_t::filter_log( + pgid, *osdmap, hit_set_namespace, log, filtered, reject); + log = IndexedLog(filtered); + } + EXPECT_LE(log.log.size(), (size_t)total); + + // If we filter a second time, there should be the same total + total = log.log.size(); + { + pg_log_t filtered, reject; + pg_log_t::filter_log( + pgid, *osdmap, hit_set_namespace, log, filtered, reject); + log = IndexedLog(filtered); + } + EXPECT_EQ(log.log.size(), (size_t)total); + + // Increase pg_num as if there would be a split + int new_pg_num = pg_num * 16; + OSDMap::Incremental inc(epoch + 1); + inc.fsid = test_uuid; + const pg_pool_t *pool = osdmap->get_pg_pool(pool_id); + pg_pool_t newpool; + newpool = *pool; + newpool.set_pg_num(new_pg_num); + newpool.set_pgp_num(new_pg_num); + inc.new_pools[pool_id] = newpool; + int ret = osdmap->apply_incremental(inc); + ASSERT_EQ(ret, 0); + + // We should have fewer entries after a filter + { + pg_log_t filtered, reject; + pg_log_t::filter_log( + pgid, *osdmap, hit_set_namespace, log, filtered, reject); + log = IndexedLog(filtered); + } + EXPECT_LE(log.log.size(), (size_t)total); + + // Make sure all internal entries are retained + int count = 0; + for (list<pg_log_entry_t>::iterator i = log.log.begin(); + i != log.log.end(); ++i) { + if (i->soid.nspace == hit_set_namespace) count++; + } + EXPECT_EQ(count, num_internal); + } +} + +TEST_F(PGLogTest, get_request) { + clear(); + + // make sure writes, deletes, and errors are found + vector<pg_log_entry_t> entries; + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + entries.push_back( + pg_log_entry_t(pg_log_entry_t::ERROR, oid, eversion_t(6,2), eversion_t(3,4), + 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 1), + utime_t(0,1), -ENOENT)); + entries.push_back( + pg_log_entry_t(pg_log_entry_t::MODIFY, oid, eversion_t(6,3), eversion_t(3,4), + 2, osd_reqid_t(entity_name_t::CLIENT(777), 8, 2), + utime_t(1,2), 0)); + entries.push_back( + pg_log_entry_t(pg_log_entry_t::DELETE, oid, eversion_t(7,4), eversion_t(7,4), + 3, osd_reqid_t(entity_name_t::CLIENT(777), 8, 3), + utime_t(10,2), 0)); + entries.push_back( + pg_log_entry_t(pg_log_entry_t::ERROR, oid, eversion_t(7,5), eversion_t(7,4), + 3, osd_reqid_t(entity_name_t::CLIENT(777), 8, 4), + utime_t(20,1), -ENOENT)); + + for (auto &entry : entries) { + log.add(entry); + } + + for (auto &entry : entries) { + eversion_t replay_version; + version_t user_version; + int return_code = 0; + vector<pg_log_op_return_item_t> op_returns; + bool got = log.get_request( + entry.reqid, &replay_version, &user_version, &return_code, &op_returns); + EXPECT_TRUE(got); + EXPECT_EQ(entry.return_code, return_code); + EXPECT_EQ(entry.version, replay_version); + EXPECT_EQ(entry.user_version, user_version); + } +} + +TEST_F(PGLogTest, ErrorNotIndexedByObject) { + clear(); + + // make sure writes, deletes, and errors are found + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + log.add( + pg_log_entry_t(pg_log_entry_t::ERROR, oid, eversion_t(6,2), eversion_t(3,4), + 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 1), + utime_t(0,1), -ENOENT)); + + EXPECT_FALSE(log.logged_object(oid)); + + pg_log_entry_t modify(pg_log_entry_t::MODIFY, oid, eversion_t(6,3), + eversion_t(3,4), 2, + osd_reqid_t(entity_name_t::CLIENT(777), 8, 2), + utime_t(1,2), 0); + log.add(modify); + + EXPECT_TRUE(log.logged_object(oid)); + pg_log_entry_t *entry = log.objects[oid]; + EXPECT_EQ(modify.op, entry->op); + EXPECT_EQ(modify.version, entry->version); + EXPECT_EQ(modify.prior_version, entry->prior_version); + EXPECT_EQ(modify.user_version, entry->user_version); + EXPECT_EQ(modify.reqid, entry->reqid); + + pg_log_entry_t del(pg_log_entry_t::DELETE, oid, eversion_t(7,4), + eversion_t(7,4), 3, + osd_reqid_t(entity_name_t::CLIENT(777), 8, 3), + utime_t(10,2), 0); + log.add(del); + + EXPECT_TRUE(log.logged_object(oid)); + entry = log.objects[oid]; + EXPECT_EQ(del.op, entry->op); + EXPECT_EQ(del.version, entry->version); + EXPECT_EQ(del.prior_version, entry->prior_version); + EXPECT_EQ(del.user_version, entry->user_version); + EXPECT_EQ(del.reqid, entry->reqid); + + log.add( + pg_log_entry_t(pg_log_entry_t::ERROR, oid, eversion_t(7,5), eversion_t(7,4), + 3, osd_reqid_t(entity_name_t::CLIENT(777), 8, 4), + utime_t(20,1), -ENOENT)); + + EXPECT_TRUE(log.logged_object(oid)); + entry = log.objects[oid]; + EXPECT_EQ(del.op, entry->op); + EXPECT_EQ(del.version, entry->version); + EXPECT_EQ(del.prior_version, entry->prior_version); + EXPECT_EQ(del.user_version, entry->user_version); + EXPECT_EQ(del.reqid, entry->reqid); +} + +TEST_F(PGLogTest, split_into_preserves_may_include_deletes) { + clear(); + + { + may_include_deletes_in_missing_dirty = false; + missing.may_include_deletes = true; + PGLog child_log(cct); + pg_t child_pg; + split_into(child_pg, 6, &child_log); + ASSERT_TRUE(child_log.get_missing().may_include_deletes); + ASSERT_TRUE(child_log.get_may_include_deletes_in_missing_dirty()); + } + + { + may_include_deletes_in_missing_dirty = false; + missing.may_include_deletes = false; + PGLog child_log(cct); + pg_t child_pg; + split_into(child_pg, 6, &child_log); + ASSERT_FALSE(child_log.get_missing().may_include_deletes); + ASSERT_FALSE(child_log.get_may_include_deletes_in_missing_dirty()); + } +} + +class PGLogTestRebuildMissing : public PGLogTest, public StoreTestFixture { +public: + PGLogTestRebuildMissing() : PGLogTest(), StoreTestFixture("memstore") {} + void SetUp() override { + StoreTestFixture::SetUp(); + ObjectStore::Transaction t; + test_coll = coll_t(spg_t(pg_t(1, 1))); + ch = store->create_new_collection(test_coll); + t.create_collection(test_coll, 0); + store->queue_transaction(ch, std::move(t)); + existing_oid = mk_obj(0); + nonexistent_oid = mk_obj(1); + ghobject_t existing_ghobj(existing_oid); + object_info_t existing_info; + existing_info.version = eversion_t(6, 2); + bufferlist enc_oi; + encode(existing_info, enc_oi, 0); + ObjectStore::Transaction t2; + t2.touch(test_coll, ghobject_t(existing_oid)); + t2.setattr(test_coll, ghobject_t(existing_oid), OI_ATTR, enc_oi); + ASSERT_EQ(0, store->queue_transaction(ch, std::move(t2))); + info.last_backfill = hobject_t::get_max(); + info.last_complete = eversion_t(); + } + + void TearDown() override { + clear(); + missing.may_include_deletes = false; + StoreTestFixture::TearDown(); + } + + pg_info_t info; + coll_t test_coll; + hobject_t existing_oid, nonexistent_oid; + + void run_rebuild_missing_test(const map<hobject_t, pg_missing_item> &expected_missing_items) { + rebuild_missing_set_with_deletes(store.get(), ch, info); + ASSERT_EQ(expected_missing_items, missing.get_items()); + } +}; + +TEST_F(PGLogTestRebuildMissing, EmptyLog) { + missing.add(existing_oid, mk_evt(6, 2), mk_evt(6, 3), false); + missing.add(nonexistent_oid, mk_evt(7, 4), mk_evt(0, 0), false); + map<hobject_t, pg_missing_item> orig_missing = missing.get_items(); + run_rebuild_missing_test(orig_missing); +} + +TEST_F(PGLogTestRebuildMissing, SameVersionMod) { + missing.add(existing_oid, mk_evt(6, 2), mk_evt(6, 1), false); + log.add(mk_ple_mod(existing_oid, mk_evt(6, 2), mk_evt(6, 1))); + map<hobject_t, pg_missing_item> empty_missing; + run_rebuild_missing_test(empty_missing); +} + +TEST_F(PGLogTestRebuildMissing, DelExisting) { + missing.add(existing_oid, mk_evt(6, 3), mk_evt(6, 2), false); + log.add(mk_ple_dt(existing_oid, mk_evt(7, 5), mk_evt(7, 4))); + map<hobject_t, pg_missing_item> expected; + expected[existing_oid] = pg_missing_item(mk_evt(7, 5), mk_evt(6, 2), true); + run_rebuild_missing_test(expected); +} + +TEST_F(PGLogTestRebuildMissing, DelNonexistent) { + log.add(mk_ple_dt(nonexistent_oid, mk_evt(7, 5), mk_evt(7, 4))); + map<hobject_t, pg_missing_item> expected; + expected[nonexistent_oid] = pg_missing_item(mk_evt(7, 5), mk_evt(0, 0), true); + run_rebuild_missing_test(expected); +} + +TEST_F(PGLogTestRebuildMissing, MissingNotInLog) { + missing.add(mk_obj(10), mk_evt(8, 12), mk_evt(8, 10), false); + log.add(mk_ple_dt(nonexistent_oid, mk_evt(7, 5), mk_evt(7, 4))); + map<hobject_t, pg_missing_item> expected; + expected[nonexistent_oid] = pg_missing_item(mk_evt(7, 5), mk_evt(0, 0), true); + expected[mk_obj(10)] = pg_missing_item(mk_evt(8, 12), mk_evt(8, 10), false); + run_rebuild_missing_test(expected); +} + + +class PGLogMergeDupsTest : protected PGLog, public StoreTestFixture { + +public: + + PGLogMergeDupsTest() : PGLog(g_ceph_context), StoreTestFixture("memstore") { } + + void SetUp() override { + StoreTestFixture::SetUp(); + ObjectStore::Transaction t; + test_coll = coll_t(spg_t(pg_t(1, 1))); + auto ch = store->create_new_collection(test_coll); + t.create_collection(test_coll, 0); + store->queue_transaction(ch, std::move(t)); + } + + void TearDown() override { + test_disk_roundtrip(); + clear(); + StoreTestFixture::TearDown(); + } + + static pg_log_dup_t create_dup_entry(uint a, uint b) { + // make each dup_entry unique by using different client id's + static uint client_id = 777; + return pg_log_dup_t(eversion_t(a, b), + a, + osd_reqid_t(entity_name_t::CLIENT(client_id++), 8, 1), + 0); + } + + static std::vector<pg_log_dup_t> example_dups_1() { + std::vector<pg_log_dup_t> result = { + create_dup_entry(10, 11), + create_dup_entry(10, 12), + create_dup_entry(11, 1), + create_dup_entry(12, 3), + create_dup_entry(13, 99) + }; + return result; + } + + static std::vector<pg_log_dup_t> example_dups_2() { + std::vector<pg_log_dup_t> result = { + create_dup_entry(12, 3), + create_dup_entry(13, 99), + create_dup_entry(15, 11), + create_dup_entry(16, 14), + create_dup_entry(16, 32) + }; + return result; + } + + void add_dups(uint a, uint b) { + log.dups.push_back(create_dup_entry(a, b)); + write_from_dups = std::min(write_from_dups, log.dups.back().version); + } + + void add_dups(const std::vector<pg_log_dup_t>& l) { + for (auto& i : l) { + log.dups.push_back(i); + write_from_dups = std::min(write_from_dups, log.dups.back().version); + } + } + + static void add_dups(IndexedLog& log, const std::vector<pg_log_dup_t>& dups) { + for (auto& i : dups) { + log.dups.push_back(i); + } + } + + void check_order() { + eversion_t prev(0, 0); + + for (auto& i : log.dups) { + EXPECT_LT(prev, i.version) << "verify versions monotonically increase"; + prev = i.version; + } + } + + void check_index() { + EXPECT_EQ(log.dups.size(), log.dup_index.size()); + for (auto& i : log.dups) { + EXPECT_EQ(1u, log.dup_index.count(i.reqid)); + } + } + + void test_disk_roundtrip() { + ObjectStore::Transaction t; + hobject_t hoid; + hoid.pool = 1; + hoid.oid = "log"; + ghobject_t log_oid(hoid); + map<string, bufferlist> km; + write_log_and_missing(t, &km, test_coll, log_oid, false); + if (!km.empty()) { + t.omap_setkeys(test_coll, log_oid, km); + } + auto ch = store->open_collection(test_coll); + ASSERT_EQ(0, store->queue_transaction(ch, std::move(t))); + + auto orig_dups = log.dups; + clear(); + ostringstream err; + read_log_and_missing(store.get(), ch, log_oid, + pg_info_t(), err, false); + ASSERT_EQ(orig_dups.size(), log.dups.size()); + ASSERT_EQ(orig_dups, log.dups); + auto dups_it = log.dups.begin(); + for (auto orig_dup : orig_dups) { + ASSERT_EQ(orig_dup, *dups_it); + ++dups_it; + } + } + + coll_t test_coll; +}; + +TEST_F(PGLogMergeDupsTest, OtherEmpty) { + log.tail = eversion_t(14, 5); + + IndexedLog olog; + + add_dups(example_dups_1()); + index(); + + bool changed = merge_log_dups(olog); + + EXPECT_FALSE(changed); + EXPECT_EQ(5u, log.dups.size()); + + if (5 == log.dups.size()) { + EXPECT_EQ(10u, log.dups.front().version.epoch); + EXPECT_EQ(11u, log.dups.front().version.version); + EXPECT_EQ(13u, log.dups.back().version.epoch); + EXPECT_EQ(99u, log.dups.back().version.version); + } + + check_order(); + check_index(); +} + +TEST_F(PGLogMergeDupsTest, AmEmpty) { + log.tail = eversion_t(14, 5); + index(); + + IndexedLog olog; + + add_dups(olog, example_dups_1()); + + bool changed = merge_log_dups(olog); + + EXPECT_TRUE(changed); + EXPECT_EQ(5u, log.dups.size()); + + if (5 == log.dups.size()) { + EXPECT_EQ(10u, log.dups.front().version.epoch); + EXPECT_EQ(11u, log.dups.front().version.version); + + EXPECT_EQ(13u, log.dups.back().version.epoch); + EXPECT_EQ(99u, log.dups.back().version.version); + } + + check_order(); + check_index(); +} + +TEST_F(PGLogMergeDupsTest, AmEmptyOverlap) { + log.tail = eversion_t(12, 3); + index(); + + IndexedLog olog; + + add_dups(olog, example_dups_1()); + + bool changed = merge_log_dups(olog); + + EXPECT_TRUE(changed); + EXPECT_EQ(4u, log.dups.size()); + + if (4 == log.dups.size()) { + EXPECT_EQ(10u, log.dups.front().version.epoch); + EXPECT_EQ(11u, log.dups.front().version.version); + + EXPECT_EQ(12u, log.dups.back().version.epoch); + EXPECT_EQ(3u, log.dups.back().version.version); + } + + check_order(); + check_index(); +} + +TEST_F(PGLogMergeDupsTest, Same) { + log.tail = eversion_t(14, 1); + + IndexedLog olog; + + add_dups(example_dups_1()); + index(); + add_dups(olog, example_dups_1()); + + bool changed = merge_log_dups(olog); + + EXPECT_FALSE(changed); + EXPECT_EQ(5u, log.dups.size()); + + if (5 == log.dups.size()) { + EXPECT_EQ(10u, log.dups.front().version.epoch); + EXPECT_EQ(11u, log.dups.front().version.version); + + EXPECT_EQ(13u, log.dups.back().version.epoch); + EXPECT_EQ(99u, log.dups.back().version.version); + } + + check_order(); + check_index(); +} + + +TEST_F(PGLogMergeDupsTest, Later) { + log.tail = eversion_t(16, 14); + + IndexedLog olog; + + add_dups(example_dups_1()); + index(); + add_dups(olog, example_dups_2()); + + bool changed = merge_log_dups(olog); + + EXPECT_TRUE(changed); + EXPECT_EQ(7u, log.dups.size()); + + if (7 == log.dups.size()) { + EXPECT_EQ(10u, log.dups.front().version.epoch); + EXPECT_EQ(11u, log.dups.front().version.version); + + EXPECT_EQ(16u, log.dups.back().version.epoch); + EXPECT_EQ(14u, log.dups.back().version.version); + } + + check_order(); + check_index(); +} + + +TEST_F(PGLogMergeDupsTest, Earlier) { + log.tail = eversion_t(17, 2); + + IndexedLog olog; + + add_dups(example_dups_2()); + index(); + add_dups(olog, example_dups_1()); + + bool changed = merge_log_dups(olog); + + EXPECT_TRUE(changed); + EXPECT_EQ(8u, log.dups.size()); + + if (6 == log.dups.size()) { + EXPECT_EQ(10u, log.dups.front().version.epoch); + EXPECT_EQ(11u, log.dups.front().version.version); + + EXPECT_EQ(16u, log.dups.back().version.epoch); + EXPECT_EQ(32u, log.dups.back().version.version); + } + + check_order(); + check_index(); +} + + +TEST_F(PGLogMergeDupsTest, Superset) { + log.tail = eversion_t(17, 2); + + IndexedLog olog; + + add_dups(example_dups_1()); + index(); + + olog.dups.push_back(create_dup_entry(9, 5)); + olog.dups.push_back(create_dup_entry(15, 11)); + + bool changed = merge_log_dups(olog); + + EXPECT_TRUE(changed); + EXPECT_EQ(7u, log.dups.size()); + + if (7 == log.dups.size()) { + EXPECT_EQ(9u, log.dups.front().version.epoch); + EXPECT_EQ(5u, log.dups.front().version.version); + + EXPECT_EQ(15u, log.dups.back().version.epoch); + EXPECT_EQ(11u, log.dups.back().version.version); + } + + check_order(); + check_index(); +} + + +struct PGLogTrimTest : + public ::testing::Test, + public PGLogTestBase, + public PGLog::IndexedLog +{ + CephContext *cct = g_ceph_context; + + using ::testing::Test::SetUp; + void SetUp(unsigned dup_track) { + constexpr size_t size = 10; + + char dup_track_s[size]; + + snprintf(dup_track_s, size, "%u", dup_track); + + cct->_conf.set_val_or_die("osd_pg_log_dups_tracked", dup_track_s); + } +}; // struct PGLogTrimTest + + +TEST_F(PGLogTrimTest, TestMakingCephContext) +{ + SetUp(5); + + EXPECT_EQ(5u, cct->_conf->osd_pg_log_dups_tracked); +} + + +TEST_F(PGLogTrimTest, TestPartialTrim) +{ + SetUp(20); + PGLog::IndexedLog log; + log.head = mk_evt(24, 0); + log.skip_can_rollback_to_to_head(); + log.head = mk_evt(9, 0); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(19, 160), mk_evt(25, 152))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166))); + + std::set<eversion_t> trimmed; + std::set<std::string> trimmed_dups; + eversion_t write_from_dups = eversion_t::max(); + + log.trim(cct, mk_evt(19, 157), &trimmed, &trimmed_dups, &write_from_dups); + + EXPECT_EQ(eversion_t(15, 150), write_from_dups); + EXPECT_EQ(3u, log.log.size()); + EXPECT_EQ(3u, trimmed.size()); + EXPECT_EQ(2u, log.dups.size()); + EXPECT_EQ(0u, trimmed_dups.size()); + + SetUp(15); + + std::set<eversion_t> trimmed2; + std::set<std::string> trimmed_dups2; + eversion_t write_from_dups2 = eversion_t::max(); + + log.trim(cct, mk_evt(20, 164), &trimmed2, &trimmed_dups2, &write_from_dups2); + + EXPECT_EQ(eversion_t(19, 160), write_from_dups2); + EXPECT_EQ(2u, log.log.size()); + EXPECT_EQ(1u, trimmed2.size()); + EXPECT_EQ(3u, log.dups.size()); + EXPECT_EQ(0u, trimmed_dups2.size()); +} + + +TEST_F(PGLogTrimTest, TestTrimNoTrimmed) { + SetUp(20); + PGLog::IndexedLog log; + log.head = mk_evt(20, 0); + log.skip_can_rollback_to_to_head(); + log.head = mk_evt(9, 0); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 160), mk_evt(25, 152))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166))); + + eversion_t write_from_dups = eversion_t::max(); + + log.trim(cct, mk_evt(19, 157), nullptr, nullptr, &write_from_dups); + + EXPECT_EQ(eversion_t(15, 150), write_from_dups); + EXPECT_EQ(3u, log.log.size()); + EXPECT_EQ(2u, log.dups.size()); +} + + +TEST_F(PGLogTrimTest, TestTrimNoDups) +{ + SetUp(10); + PGLog::IndexedLog log; + log.head = mk_evt(20, 0); + log.skip_can_rollback_to_to_head(); + log.head = mk_evt(9, 0); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 160), mk_evt(25, 152))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166))); + + std::set<eversion_t> trimmed; + std::set<std::string> trimmed_dups; + eversion_t write_from_dups = eversion_t::max(); + + log.trim(cct, mk_evt(19, 157), &trimmed, &trimmed_dups, &write_from_dups); + + EXPECT_EQ(eversion_t::max(), write_from_dups); + EXPECT_EQ(3u, log.log.size()); + EXPECT_EQ(3u, trimmed.size()); + EXPECT_EQ(0u, log.dups.size()); + EXPECT_EQ(0u, trimmed_dups.size()); +} + +TEST_F(PGLogTrimTest, TestNoTrim) +{ + SetUp(20); + PGLog::IndexedLog log; + log.head = mk_evt(24, 0); + log.skip_can_rollback_to_to_head(); + log.head = mk_evt(9, 0); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(19, 160), mk_evt(25, 152))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166))); + + std::set<eversion_t> trimmed; + std::set<std::string> trimmed_dups; + eversion_t write_from_dups = eversion_t::max(); + + log.trim(cct, mk_evt(9, 99), &trimmed, &trimmed_dups, &write_from_dups); + + EXPECT_EQ(eversion_t::max(), write_from_dups); + EXPECT_EQ(6u, log.log.size()); + EXPECT_EQ(0u, trimmed.size()); + EXPECT_EQ(0u, log.dups.size()); + EXPECT_EQ(0u, trimmed_dups.size()); +} + +TEST_F(PGLogTrimTest, TestTrimAll) +{ + SetUp(20); + PGLog::IndexedLog log; + EXPECT_EQ(0u, log.dup_index.size()); // Sanity check + log.head = mk_evt(24, 0); + log.skip_can_rollback_to_to_head(); + log.head = mk_evt(9, 0); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(19, 160), mk_evt(25, 152))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166))); + + std::set<eversion_t> trimmed; + std::set<std::string> trimmed_dups; + eversion_t write_from_dups = eversion_t::max(); + + log.trim(cct, mk_evt(22, 180), &trimmed, &trimmed_dups, &write_from_dups); + + EXPECT_EQ(eversion_t(15, 150), write_from_dups); + EXPECT_EQ(0u, log.log.size()); + EXPECT_EQ(6u, trimmed.size()); + EXPECT_EQ(5u, log.dups.size()); + EXPECT_EQ(0u, trimmed_dups.size()); + EXPECT_EQ(0u, log.dup_index.size()); // dup_index entry should be trimmed +} + + +TEST_F(PGLogTrimTest, TestGetRequest) { + SetUp(20); + PGLog::IndexedLog log; + log.head = mk_evt(20, 0); + log.skip_can_rollback_to_to_head(); + log.head = mk_evt(9, 0); + + entity_name_t client = entity_name_t::CLIENT(777); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70), + osd_reqid_t(client, 8, 1))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100), + osd_reqid_t(client, 8, 2))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150), + osd_reqid_t(client, 8, 3))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 160), mk_evt(25, 152), + osd_reqid_t(client, 8, 4))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160), + osd_reqid_t(client, 8, 5))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166), + osd_reqid_t(client, 8, 6))); + + eversion_t write_from_dups = eversion_t::max(); + + log.trim(cct, mk_evt(19, 157), nullptr, nullptr, &write_from_dups); + + EXPECT_EQ(eversion_t(15, 150), write_from_dups); + EXPECT_EQ(3u, log.log.size()); + EXPECT_EQ(2u, log.dups.size()); + + eversion_t version; + version_t user_version; + int return_code; + vector<pg_log_op_return_item_t> op_returns; + + osd_reqid_t log_reqid = osd_reqid_t(client, 8, 5); + osd_reqid_t dup_reqid = osd_reqid_t(client, 8, 3); + osd_reqid_t bad_reqid = osd_reqid_t(client, 8, 1); + + bool result; + + result = log.get_request(log_reqid, &version, &user_version, &return_code, + &op_returns); + EXPECT_EQ(true, result); + EXPECT_EQ(mk_evt(21, 165), version); + + result = log.get_request(dup_reqid, &version, &user_version, &return_code, + &op_returns); + EXPECT_EQ(true, result); + EXPECT_EQ(mk_evt(15, 155), version); + + result = log.get_request(bad_reqid, &version, &user_version, &return_code, + &op_returns); + EXPECT_FALSE(result); +} + +TEST_F(PGLogTest, _merge_object_divergent_entries) { + { + // Test for issue 20843 + clear(); + hobject_t hoid(object_t(/*name*/"notify.7"), + /*key*/string(""), + /*snap*/7, + /*hash*/77, + /*pool*/5, + /*nspace*/string("")); + mempool::osd_pglog::list<pg_log_entry_t> orig_entries; + orig_entries.push_back(mk_ple_mod(hoid, eversion_t(8336, 957), eversion_t(8336, 952))); + orig_entries.push_back(mk_ple_err(hoid, eversion_t(8336, 958))); + orig_entries.push_back(mk_ple_err(hoid, eversion_t(8336, 959))); + orig_entries.push_back(mk_ple_mod(hoid, eversion_t(8336, 960), eversion_t(8336, 957))); + log.add(mk_ple_mod(hoid, eversion_t(8973, 1075), eversion_t(8971, 1070))); + missing.add(hoid, + /*need*/eversion_t(8971, 1070), + /*have*/eversion_t(8336, 952), + false); + pg_info_t oinfo; + LogHandler rollbacker; + _merge_object_divergent_entries(log, hoid, + orig_entries, oinfo, + log.get_can_rollback_to(), + missing, &rollbacker, + this); + // No core dump + } + { + // skip leading error entries + clear(); + hobject_t hoid(object_t(/*name*/"notify.7"), + /*key*/string(""), + /*snap*/7, + /*hash*/77, + /*pool*/5, + /*nspace*/string("")); + mempool::osd_pglog::list<pg_log_entry_t> orig_entries; + orig_entries.push_back(mk_ple_err(hoid, eversion_t(8336, 956))); + orig_entries.push_back(mk_ple_mod(hoid, eversion_t(8336, 957), eversion_t(8336, 952))); + log.add(mk_ple_mod(hoid, eversion_t(8973, 1075), eversion_t(8971, 1070))); + missing.add(hoid, + /*need*/eversion_t(8971, 1070), + /*have*/eversion_t(8336, 952), + false); + pg_info_t oinfo; + LogHandler rollbacker; + _merge_object_divergent_entries(log, hoid, + orig_entries, oinfo, + log.get_can_rollback_to(), + missing, &rollbacker, + this); + // No core dump + } +} + +TEST(eversion_t, get_key_name) { + eversion_t a(1234, 5678); + std::string a_key_name = a.get_key_name(); + EXPECT_EQ("0000001234.00000000000000005678", a_key_name); +} + +TEST(pg_log_dup_t, get_key_name) { + pg_log_dup_t a(eversion_t(1234, 5678), + 13, + osd_reqid_t(entity_name_t::CLIENT(777), 8, 999), + 15); + std::string a_key_name = a.get_key_name(); + EXPECT_EQ("dup_0000001234.00000000000000005678", a_key_name); +} + + +// This tests trim() to make copies of +// 2 log entries (107, 106) and 3 additional for a total +// of 5 dups. Nothing from the original dups is copied. +TEST_F(PGLogTrimTest, TestTrimDups) { + SetUp(5); + PGLog::IndexedLog log; + log.head = mk_evt(21, 107); + log.skip_can_rollback_to_to_head(); + log.tail = mk_evt(9, 99); + log.head = mk_evt(9, 99); + + entity_name_t client = entity_name_t::CLIENT(777); + + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(9, 99), mk_evt(8, 98), osd_reqid_t(client, 8, 1)))); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(9, 99), + osd_reqid_t(client, 8, 1))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 101), mk_evt(10, 100), + osd_reqid_t(client, 8, 2))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 102), mk_evt(15, 101), + osd_reqid_t(client, 8, 3))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 103), mk_evt(15, 102), + osd_reqid_t(client, 8, 4))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 104), mk_evt(20, 103), + osd_reqid_t(client, 8, 5))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 105), mk_evt(21, 104), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 106), mk_evt(21, 105), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 107), mk_evt(21, 106), + osd_reqid_t(client, 8, 6))); + + eversion_t write_from_dups = eversion_t::max(); + + log.trim(cct, mk_evt(21, 105), nullptr, nullptr, &write_from_dups); + + EXPECT_EQ(eversion_t(20, 103), write_from_dups) << log; + EXPECT_EQ(2u, log.log.size()) << log; + EXPECT_EQ(4u, log.dups.size()) << log; +} + +// This tests trim() to make copies of +// 4 log entries (107, 106, 105, 104) and 5 additional for a total +// of 9 dups. Only 1 of 2 existing dups are copied. +TEST_F(PGLogTrimTest, TestTrimDups2) { + SetUp(9); + PGLog::IndexedLog log; + log.head = mk_evt(21, 107); + log.skip_can_rollback_to_to_head(); + log.tail = mk_evt(9, 99); + log.head = mk_evt(9, 99); + + entity_name_t client = entity_name_t::CLIENT(777); + + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(9, 98), mk_evt(8, 97), osd_reqid_t(client, 8, 1)))); + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(9, 99), mk_evt(8, 98), osd_reqid_t(client, 8, 1)))); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(9, 99), + osd_reqid_t(client, 8, 1))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 101), mk_evt(10, 100), + osd_reqid_t(client, 8, 2))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 102), mk_evt(15, 101), + osd_reqid_t(client, 8, 3))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 103), mk_evt(15, 102), + osd_reqid_t(client, 8, 4))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 104), mk_evt(20, 103), + osd_reqid_t(client, 8, 5))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 105), mk_evt(21, 104), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 106), mk_evt(21, 105), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 107), mk_evt(21, 106), + osd_reqid_t(client, 8, 6))); + + eversion_t write_from_dups = eversion_t::max(); + + log.trim(cct, mk_evt(20, 103), nullptr, nullptr, &write_from_dups); + + EXPECT_EQ(eversion_t(10, 100), write_from_dups) << log; + EXPECT_EQ(4u, log.log.size()) << log; + EXPECT_EQ(6u, log.dups.size()) << log; +} + +// This tests copy_up_to() to make copies of +// 2 log entries (107, 106) and 3 additional for a total +// of 5 dups. Nothing from the original dups is copied. +TEST_F(PGLogTrimTest, TestCopyUpTo) { + SetUp(5); + PGLog::IndexedLog log, copy; + log.tail = mk_evt(9, 99); + log.head = mk_evt(9, 99); + + entity_name_t client = entity_name_t::CLIENT(777); + + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(9, 99), mk_evt(8, 98), osd_reqid_t(client, 8, 1)))); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(9, 99), + osd_reqid_t(client, 8, 1))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 101), mk_evt(10, 100), + osd_reqid_t(client, 8, 2))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 102), mk_evt(15, 101), + osd_reqid_t(client, 8, 3))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 103), mk_evt(15, 102), + osd_reqid_t(client, 8, 4))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 104), mk_evt(20, 103), + osd_reqid_t(client, 8, 5))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 105), mk_evt(21, 104), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 106), mk_evt(21, 105), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 107), mk_evt(21, 106), + osd_reqid_t(client, 8, 6))); + + copy.copy_up_to(cct, log, 2); + + EXPECT_EQ(2u, copy.log.size()) << copy; + EXPECT_EQ(copy.head, mk_evt(21, 107)) << copy; + EXPECT_EQ(copy.tail, mk_evt(21, 105)) << copy; + // Tracking 5 means 3 additional as dups + EXPECT_EQ(3u, copy.dups.size()) << copy; +} + +// This tests copy_up_to() to make copies of +// 4 log entries (107, 106, 105, 104) and 5 additional for a total +// of 5 dups. Only 1 of 2 existing dups are copied. +TEST_F(PGLogTrimTest, TestCopyUpTo2) { + SetUp(9); + PGLog::IndexedLog log, copy; + log.tail = mk_evt(9, 99); + log.head = mk_evt(9, 99); + + entity_name_t client = entity_name_t::CLIENT(777); + + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(8, 98), mk_evt(8, 97), osd_reqid_t(client, 8, 1)))); + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(9, 99), mk_evt(8, 98), osd_reqid_t(client, 8, 1)))); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(9, 99), + osd_reqid_t(client, 8, 1))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 101), mk_evt(10, 100), + osd_reqid_t(client, 8, 2))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 102), mk_evt(15, 101), + osd_reqid_t(client, 8, 3))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 103), mk_evt(15, 102), + osd_reqid_t(client, 8, 4))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 104), mk_evt(20, 103), + osd_reqid_t(client, 8, 5))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 105), mk_evt(21, 104), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 106), mk_evt(21, 105), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 107), mk_evt(21, 106), + osd_reqid_t(client, 8, 6))); + + copy.copy_up_to(cct, log, 4); + + EXPECT_EQ(4u, copy.log.size()) << copy; + EXPECT_EQ(copy.head, mk_evt(21, 107)) << copy; + EXPECT_EQ(copy.tail, mk_evt(20, 103)) << copy; + // Tracking 5 means 3 additional as dups + EXPECT_EQ(5u, copy.dups.size()) << copy; +} + +// This tests copy_after() by specifying a version that copies +// 2 log entries (107, 106) and 3 additional for a total +// of 5 dups. Nothing of the original dups is copied. +TEST_F(PGLogTrimTest, TestCopyAfter) { + SetUp(5); + PGLog::IndexedLog log, copy; + log.tail = mk_evt(9, 99); + log.head = mk_evt(9, 99); + + entity_name_t client = entity_name_t::CLIENT(777); + + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(9, 99), mk_evt(8, 98), osd_reqid_t(client, 8, 1)))); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(9, 99), + osd_reqid_t(client, 8, 1))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 101), mk_evt(10, 100), + osd_reqid_t(client, 8, 2))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 102), mk_evt(15, 101), + osd_reqid_t(client, 8, 3))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 103), mk_evt(15, 102), + osd_reqid_t(client, 8, 4))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 104), mk_evt(20, 103), + osd_reqid_t(client, 8, 5))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 105), mk_evt(21, 104), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 106), mk_evt(21, 105), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 107), mk_evt(21, 106), + osd_reqid_t(client, 8, 6))); + + copy.copy_after(cct, log, mk_evt(21, 105)); + + EXPECT_EQ(2u, copy.log.size()) << copy; + EXPECT_EQ(copy.head, mk_evt(21, 107)) << copy; + EXPECT_EQ(copy.tail, mk_evt(21, 105)) << copy; + // Tracking 5 means 3 additional as dups + EXPECT_EQ(3u, copy.dups.size()) << copy; +} + +// This copies everything dups and log because of the large max dups +// and value passed to copy_after(). +TEST_F(PGLogTrimTest, TestCopyAfter2) { + SetUp(3000); + PGLog::IndexedLog log, copy; + log.tail = mk_evt(9, 99); + log.head = mk_evt(9, 99); + + entity_name_t client = entity_name_t::CLIENT(777); + + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(8, 93), mk_evt(8, 92), osd_reqid_t(client, 8, 1)))); + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(8, 94), mk_evt(8, 93), osd_reqid_t(client, 8, 1)))); + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(8, 95), mk_evt(8, 94), osd_reqid_t(client, 8, 1)))); + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(8, 96), mk_evt(8, 95), osd_reqid_t(client, 8, 1)))); + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(8, 97), mk_evt(8, 96), osd_reqid_t(client, 8, 1)))); + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(8, 98), mk_evt(8, 97), osd_reqid_t(client, 8, 1)))); + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(9, 99), mk_evt(8, 98), osd_reqid_t(client, 8, 1)))); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(9, 99), + osd_reqid_t(client, 8, 1))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 101), mk_evt(10, 100), + osd_reqid_t(client, 8, 2))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 102), mk_evt(15, 101), + osd_reqid_t(client, 8, 3))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 103), mk_evt(15, 102), + osd_reqid_t(client, 8, 4))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 104), mk_evt(20, 103), + osd_reqid_t(client, 8, 5))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 105), mk_evt(21, 104), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 106), mk_evt(21, 105), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 107), mk_evt(21, 106), + osd_reqid_t(client, 8, 6))); + + copy.copy_after(cct, log, mk_evt(9, 99)); + + EXPECT_EQ(8u, copy.log.size()) << copy; + EXPECT_EQ(copy.head, mk_evt(21, 107)) << copy; + EXPECT_EQ(copy.tail, mk_evt(9, 99)) << copy; + // Tracking 3000 is larger than all entries, so all dups copied + EXPECT_EQ(7u, copy.dups.size()) << copy; +} + +// Local Variables: +// compile-command: "cd ../.. ; make unittest_pglog ; ./unittest_pglog --log-to-stderr=true --debug-osd=20 # --gtest_filter=*.* " +// End: diff --git a/src/test/osd/TestRados.cc b/src/test/osd/TestRados.cc new file mode 100644 index 000000000..1a1389b2a --- /dev/null +++ b/src/test/osd/TestRados.cc @@ -0,0 +1,729 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#include "common/Cond.h" +#include "common/errno.h" +#include "common/version.h" + +#include <iostream> +#include <sstream> +#include <map> +#include <numeric> +#include <string> +#include <vector> +#include <stdlib.h> +#include <unistd.h> + +#include "test/osd/RadosModel.h" + +using namespace std; + +class WeightedTestGenerator : public TestOpGenerator +{ +public: + + WeightedTestGenerator(int ops, + int objects, + map<TestOpType, unsigned int> op_weights, + TestOpStat *stats, + int max_seconds, + bool ec_pool, + bool balance_reads, + bool localize_reads, + bool set_redirect, + bool set_chunk, + bool enable_dedup) : + m_nextop(NULL), m_op(0), m_ops(ops), m_seconds(max_seconds), + m_objects(objects), m_stats(stats), + m_total_weight(0), + m_ec_pool(ec_pool), + m_balance_reads(balance_reads), + m_localize_reads(localize_reads), + m_set_redirect(set_redirect), + m_set_chunk(set_chunk), + m_enable_dedup(enable_dedup) + { + m_start = time(0); + for (map<TestOpType, unsigned int>::const_iterator it = op_weights.begin(); + it != op_weights.end(); + ++it) { + m_total_weight += it->second; + m_weight_sums.insert(pair<TestOpType, unsigned int>(it->first, + m_total_weight)); + } + if (m_set_redirect || m_set_chunk) { + if (m_set_redirect) { + m_ops = ops+m_objects+m_objects; + } else { + /* create 10 chunks per an object*/ + m_ops = ops+m_objects+m_objects*10; + } + } + } + + TestOp *next(RadosTestContext &context) override + { + TestOp *retval = NULL; + + ++m_op; + if (m_op <= m_objects && !m_set_redirect && !m_set_chunk ) { + stringstream oid; + oid << m_op; + /*if (m_op % 2) { + // make it a long name + oid << " " << string(300, 'o'); + }*/ + cout << m_op << ": write initial oid " << oid.str() << std::endl; + context.oid_not_flushing.insert(oid.str()); + if (m_ec_pool) { + return new WriteOp(m_op, &context, oid.str(), true, true); + } else { + return new WriteOp(m_op, &context, oid.str(), false, true); + } + } else if (m_op >= m_ops) { + return NULL; + } + + if (m_set_redirect || m_set_chunk) { + if (init_extensible_tier(context, retval)) { + return retval; + } + } + + if (m_nextop) { + retval = m_nextop; + m_nextop = NULL; + return retval; + } + + while (retval == NULL) { + unsigned int rand_val = rand() % m_total_weight; + + time_t now = time(0); + if (m_seconds && now - m_start > m_seconds) + break; + + for (map<TestOpType, unsigned int>::const_iterator it = m_weight_sums.begin(); + it != m_weight_sums.end(); + ++it) { + if (rand_val < it->second) { + retval = gen_op(context, it->first); + break; + } + } + } + return retval; + } + + bool init_extensible_tier(RadosTestContext &context, TestOp *& op) { + /* + * set-redirect or set-chunk test (manifest test) + * 0. make default objects (using create op) + * 1. set-redirect or set-chunk + * 2. initialize target objects (using write op) + * 3. wait for set-* completion + */ + int copy_manifest_end = 0; + if (m_set_chunk) { + copy_manifest_end = m_objects*2; + } else { + copy_manifest_end = m_objects*3; + } + int make_manifest_end = copy_manifest_end; + if (m_set_chunk) { + /* make 10 chunks per an object*/ + make_manifest_end = make_manifest_end + m_objects * 10; + } else { + /* redirect */ + make_manifest_end = make_manifest_end + m_objects; + } + + if (m_op <= m_objects) { + stringstream oid; + oid << m_op; + /*if (m_op % 2) { + oid << " " << string(300, 'o'); + }*/ + cout << m_op << ": write initial oid " << oid.str() << std::endl; + context.oid_not_flushing.insert(oid.str()); + if (m_ec_pool) { + op = new WriteOp(m_op, &context, oid.str(), true, true); + } else { + op = new WriteOp(m_op, &context, oid.str(), false, true); + } + return true; + } else if (m_op <= copy_manifest_end) { + stringstream oid, oid2; + //int _oid = m_op-m_objects; + int _oid = m_op % m_objects + 1; + oid << _oid; + /*if ((_oid) % 2) { + oid << " " << string(300, 'o'); + }*/ + + if (context.oid_in_use.count(oid.str())) { + /* previous write is not finished */ + op = NULL; + m_op--; + cout << m_op << " wait for completion of write op! " << std::endl; + return true; + } + + int _oid2 = m_op - m_objects + 1; + if (_oid2 > copy_manifest_end - m_objects) { + _oid2 -= (copy_manifest_end - m_objects); + } + oid2 << _oid2 << " " << context.low_tier_pool_name; + if ((_oid2) % 2) { + oid2 << " " << string(300, 'm'); + } + cout << m_op << ": " << "copy oid " << oid.str() << " target oid " + << oid2.str() << std::endl; + op = new CopyOp(m_op, &context, oid.str(), oid2.str(), context.low_tier_pool_name); + return true; + } else if (m_op <= make_manifest_end) { + if (m_set_redirect) { + stringstream oid, oid2; + int _oid = m_op-copy_manifest_end; + oid << _oid; + /*if ((_oid) % 2) { + oid << " " << string(300, 'o'); + }*/ + oid2 << _oid << " " << context.low_tier_pool_name; + if ((_oid) % 2) { + oid2 << " " << string(300, 'm'); + } + if (context.oid_in_use.count(oid.str())) { + /* previous copy is not finished */ + op = NULL; + m_op--; + cout << m_op << " retry set_redirect !" << std::endl; + return true; + } + cout << m_op << ": " << "set_redirect oid " << oid.str() << " target oid " + << oid2.str() << std::endl; + op = new SetRedirectOp(m_op, &context, oid.str(), oid2.str(), context.pool_name); + return true; + } else if (m_set_chunk) { + stringstream oid; + int _oid = m_op % m_objects +1; + oid << _oid; + /*if ((_oid) % 2) { + oid << " " << string(300, 'o'); + }*/ + if (context.oid_in_use.count(oid.str())) { + /* previous set-chunk is not finished */ + op = NULL; + m_op--; + cout << m_op << " retry set_chunk !" << std::endl; + return true; + } + stringstream oid2; + oid2 << _oid << " " << context.low_tier_pool_name; + if ((_oid) % 2) { + oid2 << " " << string(300, 'm'); + } + + cout << m_op << ": " << "set_chunk oid " << oid.str() + << " target oid " << oid2.str() << std::endl; + op = new SetChunkOp(m_op, &context, oid.str(), oid2.str(), m_stats); + return true; + } + } else if (m_op == make_manifest_end + 1) { + int set_size = context.oid_not_in_use.size(); + int set_manifest_size = context.oid_redirect_not_in_use.size(); + cout << m_op << " oid_not_in_use " << set_size << " oid_redirect_not_in_use " << set_manifest_size << std::endl; + /* wait for redirect or set_chunk initialization */ + if (set_size != m_objects || set_manifest_size != 0) { + op = NULL; + m_op--; + cout << m_op << " wait for manifest initialization " << std::endl; + return true; + } + for (int t_op = m_objects+1; t_op <= m_objects*2; t_op++) { + stringstream oid; + oid << t_op << " " << context.low_tier_pool_name; + if (t_op % 2) { + oid << " " << string(300, 'm'); + } + cout << " redirect_not_in_use: " << oid.str() << std::endl; + context.oid_redirect_not_in_use.insert(oid.str()); + } + } + + return false; + } + +private: + + TestOp *gen_op(RadosTestContext &context, TestOpType type) + { + string oid, oid2; + ceph_assert(context.oid_not_in_use.size()); + + switch (type) { + case TEST_OP_READ: + oid = *(rand_choose(context.oid_not_in_use)); + return new ReadOp(m_op, &context, oid, m_balance_reads, m_localize_reads, + m_stats); + + case TEST_OP_WRITE: + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "write oid " << oid << " current snap is " + << context.current_snap << std::endl; + return new WriteOp(m_op, &context, oid, false, false, m_stats); + + case TEST_OP_WRITE_EXCL: + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "write (excl) oid " + << oid << " current snap is " + << context.current_snap << std::endl; + return new WriteOp(m_op, &context, oid, false, true, m_stats); + + case TEST_OP_WRITESAME: + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "writesame oid " + << oid << " current snap is " + << context.current_snap << std::endl; + return new WriteSameOp(m_op, &context, oid, m_stats); + + case TEST_OP_DELETE: + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "delete oid " << oid << " current snap is " + << context.current_snap << std::endl; + return new DeleteOp(m_op, &context, oid, m_stats); + + case TEST_OP_SNAP_CREATE: + cout << m_op << ": " << "snap_create" << std::endl; + return new SnapCreateOp(m_op, &context, m_stats); + + case TEST_OP_SNAP_REMOVE: + if (context.snaps.size() <= context.snaps_in_use.size()) { + return NULL; + } + while (true) { + int snap = rand_choose(context.snaps)->first; + if (context.snaps_in_use.lookup(snap)) + continue; // in use; try again! + cout << m_op << ": " << "snap_remove snap " << snap << std::endl; + return new SnapRemoveOp(m_op, &context, snap, m_stats); + } + + case TEST_OP_ROLLBACK: + { + string oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "rollback oid " << oid << " current snap is " + << context.current_snap << std::endl; + return new RollbackOp(m_op, &context, oid); + } + + case TEST_OP_SETATTR: + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "setattr oid " << oid + << " current snap is " << context.current_snap << std::endl; + return new SetAttrsOp(m_op, &context, oid, m_stats); + + case TEST_OP_RMATTR: + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "rmattr oid " << oid + << " current snap is " << context.current_snap << std::endl; + return new RemoveAttrsOp(m_op, &context, oid, m_stats); + + case TEST_OP_WATCH: + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "watch oid " << oid + << " current snap is " << context.current_snap << std::endl; + return new WatchOp(m_op, &context, oid, m_stats); + + case TEST_OP_COPY_FROM: + oid = *(rand_choose(context.oid_not_in_use)); + do { + oid2 = *(rand_choose(context.oid_not_in_use)); + } while (oid == oid2); + cout << m_op << ": " << "copy_from oid " << oid << " from oid " << oid2 + << " current snap is " << context.current_snap << std::endl; + return new CopyFromOp(m_op, &context, oid, oid2, m_stats); + + case TEST_OP_HIT_SET_LIST: + { + uint32_t hash = rjhash32(rand()); + cout << m_op << ": " << "hit_set_list " << hash << std::endl; + return new HitSetListOp(m_op, &context, hash, m_stats); + } + + case TEST_OP_UNDIRTY: + { + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "undirty oid " << oid << std::endl; + return new UndirtyOp(m_op, &context, oid, m_stats); + } + + case TEST_OP_IS_DIRTY: + { + oid = *(rand_choose(context.oid_not_flushing)); + return new IsDirtyOp(m_op, &context, oid, m_stats); + } + + case TEST_OP_CACHE_FLUSH: + { + oid = *(rand_choose(context.oid_not_in_use)); + return new CacheFlushOp(m_op, &context, oid, m_stats, true); + } + + case TEST_OP_CACHE_TRY_FLUSH: + { + oid = *(rand_choose(context.oid_not_in_use)); + return new CacheFlushOp(m_op, &context, oid, m_stats, false); + } + + case TEST_OP_CACHE_EVICT: + { + oid = *(rand_choose(context.oid_not_in_use)); + return new CacheEvictOp(m_op, &context, oid, m_stats); + } + + case TEST_OP_APPEND: + oid = *(rand_choose(context.oid_not_in_use)); + cout << "append oid " << oid << " current snap is " + << context.current_snap << std::endl; + return new WriteOp(m_op, &context, oid, true, false, m_stats); + + case TEST_OP_APPEND_EXCL: + oid = *(rand_choose(context.oid_not_in_use)); + cout << "append oid (excl) " << oid << " current snap is " + << context.current_snap << std::endl; + return new WriteOp(m_op, &context, oid, true, true, m_stats); + + case TEST_OP_CHUNK_READ: + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "chunk read oid " << oid << " target oid " << oid2 << std::endl; + return new ChunkReadOp(m_op, &context, oid, context.pool_name, false, m_stats); + + case TEST_OP_TIER_PROMOTE: + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "tier_promote oid " << oid << std::endl; + return new TierPromoteOp(m_op, &context, oid, m_stats); + + case TEST_OP_TIER_FLUSH: + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "tier_flush oid " << oid << std::endl; + return new TierFlushOp(m_op, &context, oid, m_stats); + + case TEST_OP_SET_REDIRECT: + oid = *(rand_choose(context.oid_not_in_use)); + oid2 = *(rand_choose(context.oid_redirect_not_in_use)); + cout << m_op << ": " << "set_redirect oid " << oid << " target oid " << oid2 << std::endl; + return new SetRedirectOp(m_op, &context, oid, oid2, context.pool_name, m_stats); + + case TEST_OP_UNSET_REDIRECT: + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "unset_redirect oid " << oid << std::endl; + return new UnsetRedirectOp(m_op, &context, oid, m_stats); + + case TEST_OP_SET_CHUNK: + { + ceph_assert(m_enable_dedup); + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "set_chunk oid " << oid + << " target oid " << std::endl; + return new SetChunkOp(m_op, &context, oid, "", m_stats); + } + + case TEST_OP_TIER_EVICT: + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "tier_evict oid " << oid << std::endl; + return new TierEvictOp(m_op, &context, oid, m_stats); + + default: + cerr << m_op << ": Invalid op type " << type << std::endl; + ceph_abort(); + return nullptr; + } + } + + TestOp *m_nextop; + int m_op; + int m_ops; + int m_seconds; + int m_objects; + time_t m_start; + TestOpStat *m_stats; + map<TestOpType, unsigned int> m_weight_sums; + unsigned int m_total_weight; + bool m_ec_pool; + bool m_balance_reads; + bool m_localize_reads; + bool m_set_redirect; + bool m_set_chunk; + bool m_enable_dedup; +}; + +int main(int argc, char **argv) +{ + int ops = 1000; + int objects = 50; + int max_in_flight = 16; + int64_t size = 4000000; // 4 MB + int64_t min_stride_size = -1, max_stride_size = -1; + int max_seconds = 0; + bool pool_snaps = false; + bool write_fadvise_dontneed = false; + + struct { + TestOpType op; + const char *name; + bool ec_pool_valid; + } op_types[] = { + { TEST_OP_READ, "read", true }, + { TEST_OP_WRITE, "write", false }, + { TEST_OP_WRITE_EXCL, "write_excl", false }, + { TEST_OP_WRITESAME, "writesame", false }, + { TEST_OP_DELETE, "delete", true }, + { TEST_OP_SNAP_CREATE, "snap_create", true }, + { TEST_OP_SNAP_REMOVE, "snap_remove", true }, + { TEST_OP_ROLLBACK, "rollback", true }, + { TEST_OP_SETATTR, "setattr", true }, + { TEST_OP_RMATTR, "rmattr", true }, + { TEST_OP_WATCH, "watch", true }, + { TEST_OP_COPY_FROM, "copy_from", true }, + { TEST_OP_HIT_SET_LIST, "hit_set_list", true }, + { TEST_OP_IS_DIRTY, "is_dirty", true }, + { TEST_OP_UNDIRTY, "undirty", true }, + { TEST_OP_CACHE_FLUSH, "cache_flush", true }, + { TEST_OP_CACHE_TRY_FLUSH, "cache_try_flush", true }, + { TEST_OP_CACHE_EVICT, "cache_evict", true }, + { TEST_OP_APPEND, "append", true }, + { TEST_OP_APPEND_EXCL, "append_excl", true }, + { TEST_OP_SET_REDIRECT, "set_redirect", true }, + { TEST_OP_UNSET_REDIRECT, "unset_redirect", true }, + { TEST_OP_CHUNK_READ, "chunk_read", true }, + { TEST_OP_TIER_PROMOTE, "tier_promote", true }, + { TEST_OP_TIER_FLUSH, "tier_flush", true }, + { TEST_OP_SET_CHUNK, "set_chunk", true }, + { TEST_OP_TIER_EVICT, "tier_evict", true }, + { TEST_OP_READ /* grr */, NULL }, + }; + + struct { + const char *name; + } chunk_algo_types[] = { + { "fastcdc" }, + { "fixcdc" }, + }; + + map<TestOpType, unsigned int> op_weights; + string pool_name = "rbd"; + string low_tier_pool_name = ""; + bool ec_pool = false; + bool no_omap = false; + bool no_sparse = false; + bool balance_reads = false; + bool localize_reads = false; + bool set_redirect = false; + bool set_chunk = false; + bool enable_dedup = false; + string chunk_algo = ""; + string chunk_size = ""; + + + for (int i = 1; i < argc; ++i) { + if (strcmp(argv[i], "--max-ops") == 0) + ops = atoi(argv[++i]); + else if (strcmp(argv[i], "--pool") == 0) + pool_name = argv[++i]; + else if (strcmp(argv[i], "--max-seconds") == 0) + max_seconds = atoi(argv[++i]); + else if (strcmp(argv[i], "--objects") == 0) + objects = atoi(argv[++i]); + else if (strcmp(argv[i], "--max-in-flight") == 0) + max_in_flight = atoi(argv[++i]); + else if (strcmp(argv[i], "--size") == 0) + size = atoi(argv[++i]); + else if (strcmp(argv[i], "--min-stride-size") == 0) + min_stride_size = atoi(argv[++i]); + else if (strcmp(argv[i], "--max-stride-size") == 0) + max_stride_size = atoi(argv[++i]); + else if (strcmp(argv[i], "--no-omap") == 0) + no_omap = true; + else if (strcmp(argv[i], "--no-sparse") == 0) + no_sparse = true; + else if (strcmp(argv[i], "--balance-reads") == 0) + balance_reads = true; + else if (strcmp(argv[i], "--localize-reads") == 0) + localize_reads = true; + else if (strcmp(argv[i], "--pool-snaps") == 0) + pool_snaps = true; + else if (strcmp(argv[i], "--write-fadvise-dontneed") == 0) + write_fadvise_dontneed = true; + else if (strcmp(argv[i], "--ec-pool") == 0) { + if (!op_weights.empty()) { + cerr << "--ec-pool must be specified prior to any ops" << std::endl; + exit(1); + } + ec_pool = true; + no_omap = true; + no_sparse = true; + } else if (strcmp(argv[i], "--op") == 0) { + i++; + if (i == argc) { + cerr << "Missing op after --op" << std::endl; + return 1; + } + int j; + for (j = 0; op_types[j].name; ++j) { + if (strcmp(op_types[j].name, argv[i]) == 0) { + break; + } + } + if (!op_types[j].name) { + cerr << "unknown op " << argv[i] << std::endl; + exit(1); + } + i++; + if (i == argc) { + cerr << "Weight unspecified." << std::endl; + return 1; + } + int weight = atoi(argv[i]); + if (weight < 0) { + cerr << "Weights must be nonnegative." << std::endl; + return 1; + } else if (weight > 0) { + if (ec_pool && !op_types[j].ec_pool_valid) { + cerr << "Error: cannot use op type " << op_types[j].name + << " with --ec-pool" << std::endl; + exit(1); + } + cout << "adding op weight " << op_types[j].name << " -> " << weight << std::endl; + op_weights.insert(pair<TestOpType, unsigned int>(op_types[j].op, weight)); + } + } else if (strcmp(argv[i], "--set_redirect") == 0) { + set_redirect = true; + } else if (strcmp(argv[i], "--set_chunk") == 0) { + set_chunk = true; + } else if (strcmp(argv[i], "--low_tier_pool") == 0) { + /* + * disallow redirect or chunk object into the same pool + * to prevent the race. see https://github.com/ceph/ceph/pull/20096 + */ + low_tier_pool_name = argv[++i]; + } else if (strcmp(argv[i], "--enable_dedup") == 0) { + enable_dedup = true; + } else if (strcmp(argv[i], "--dedup_chunk_algo") == 0) { + i++; + if (i == argc) { + cerr << "Missing chunking algorithm after --dedup_chunk_algo" << std::endl; + return 1; + } + int j; + for (j = 0; chunk_algo_types[j].name; ++j) { + if (strcmp(chunk_algo_types[j].name, argv[i]) == 0) { + break; + } + } + if (!chunk_algo_types[j].name) { + cerr << "unknown op " << argv[i] << std::endl; + exit(1); + } + chunk_algo = chunk_algo_types[j].name; + } else if (strcmp(argv[i], "--dedup_chunk_size") == 0) { + chunk_size = argv[++i]; + } else { + cerr << "unknown arg " << argv[i] << std::endl; + exit(1); + } + } + + if (set_redirect || set_chunk) { + if (low_tier_pool_name == "") { + cerr << "low_tier_pool is needed" << std::endl; + exit(1); + } + } + + if (enable_dedup) { + if (chunk_algo == "" || chunk_size == "") { + cerr << "Missing chunking algorithm: " << chunk_algo + << " or chunking size: " << chunk_size << std::endl; + exit(1); + } + } + + if (op_weights.empty()) { + cerr << "No operations specified" << std::endl; + exit(1); + } + + if (min_stride_size < 0) + min_stride_size = size / 10; + if (max_stride_size < 0) + max_stride_size = size / 5; + + cout << pretty_version_to_str() << std::endl; + cout << "Configuration:" << std::endl + << "\tNumber of operations: " << ops << std::endl + << "\tNumber of objects: " << objects << std::endl + << "\tMax in flight operations: " << max_in_flight << std::endl + << "\tObject size (in bytes): " << size << std::endl + << "\tWrite stride min: " << min_stride_size << std::endl + << "\tWrite stride max: " << max_stride_size << std::endl; + + if (min_stride_size >= max_stride_size) { + cerr << "Error: max_stride_size must be more than min_stride_size" + << std::endl; + return 1; + } + + if (min_stride_size > size || max_stride_size > size) { + cerr << "Error: min_stride_size and max_stride_size must be " + << "smaller than object size" << std::endl; + return 1; + } + + if (max_in_flight * 2 > objects) { + cerr << "Error: max_in_flight must be <= than the number of objects / 2" + << std::endl; + return 1; + } + + char *id = getenv("CEPH_CLIENT_ID"); + RadosTestContext context( + pool_name, + max_in_flight, + size, + min_stride_size, + max_stride_size, + no_omap, + no_sparse, + pool_snaps, + write_fadvise_dontneed, + low_tier_pool_name, + enable_dedup, + chunk_algo, + chunk_size, + id); + + TestOpStat stats; + WeightedTestGenerator gen = WeightedTestGenerator( + ops, objects, + op_weights, &stats, max_seconds, + ec_pool, balance_reads, localize_reads, + set_redirect, set_chunk, enable_dedup); + int r = context.init(); + if (r < 0) { + cerr << "Error initializing rados test context: " + << cpp_strerror(r) << std::endl; + exit(1); + } + context.loop(&gen); + if (enable_dedup) { + if (!context.check_chunks_refcount(context.low_tier_io_ctx, context.io_ctx)) { + cerr << " Invalid refcount " << std::endl; + exit(1); + } + } + + context.shutdown(); + cerr << context.errors << " errors." << std::endl; + cerr << stats << std::endl; + return 0; +} diff --git a/src/test/osd/ceph_test_osd_stale_read.cc b/src/test/osd/ceph_test_osd_stale_read.cc new file mode 100644 index 000000000..7ee1255ed --- /dev/null +++ b/src/test/osd/ceph_test_osd_stale_read.cc @@ -0,0 +1,177 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#include "gtest/gtest.h" + +#include "mds/mdstypes.h" +#include "include/buffer.h" +#include "include/rbd_types.h" +#include "include/rados/librados.h" +#include "include/rados/librados.hpp" +#include "include/stringify.h" +#include "include/types.h" +#include "global/global_context.h" +#include "global/global_init.h" +#include "common/ceph_argparse.h" +#include "common/common_init.h" +#include "common/Cond.h" +#include "json_spirit/json_spirit.h" + +#include <errno.h> +#include <map> +#include <sstream> +#include <string> + +using namespace std; +using namespace librados; + +int get_primary_osd(Rados& rados, const string& pool_name, + const string& oid, int *pprimary) +{ + bufferlist inbl; + string cmd = string("{\"prefix\": \"osd map\",\"pool\":\"") + + pool_name + + string("\",\"object\": \"") + + oid + + string("\",\"format\": \"json\"}"); + bufferlist outbl; + if (int r = rados.mon_command(cmd, inbl, &outbl, nullptr); + r < 0) { + return r; + } + string outstr(outbl.c_str(), outbl.length()); + json_spirit::Value v; + if (!json_spirit::read(outstr, v)) { + cerr <<" unable to parse json " << outstr << std::endl; + return -1; + } + + json_spirit::Object& o = v.get_obj(); + for (json_spirit::Object::size_type i=0; i<o.size(); i++) { + json_spirit::Pair& p = o[i]; + if (p.name_ == "acting_primary") { + cout << "primary = " << p.value_.get_int() << std::endl; + *pprimary = p.value_.get_int(); + return 0; + } + } + cerr << "didn't find primary in " << outstr << std::endl; + return -1; +} + +int fence_osd(Rados& rados, int osd) +{ + bufferlist inbl, outbl; + string cmd("{\"prefix\": \"injectargs\",\"injected_args\":[" + "\"--ms-blackhole-osd\", " + "\"--ms-blackhole-mon\"]}"); + return rados.osd_command(osd, cmd, inbl, &outbl, NULL); +} + +int mark_down_osd(Rados& rados, int osd) +{ + bufferlist inbl, outbl; + string cmd("{\"prefix\": \"osd down\",\"ids\":[\"" + + stringify(osd) + "\"]}"); + return rados.mon_command(cmd, inbl, &outbl, NULL); +} + +TEST(OSD, StaleRead) { + // create two rados instances, one pool + Rados rados1, rados2; + IoCtx ioctx1, ioctx2; + int r; + + r = rados1.init_with_context(g_ceph_context); + ASSERT_EQ(0, r); + r = rados1.connect(); + ASSERT_EQ(0, r); + + srand(time(0)); + string pool_name = "read-hole-test-" + stringify(rand()); + r = rados1.pool_create(pool_name.c_str()); + ASSERT_EQ(0, r); + + r = rados1.ioctx_create(pool_name.c_str(), ioctx1); + ASSERT_EQ(0, r); + + r = rados2.init_with_context(g_ceph_context); + ASSERT_EQ(0, r); + r = rados2.connect(); + ASSERT_EQ(0, r); + r = rados2.ioctx_create(pool_name.c_str(), ioctx2); + ASSERT_EQ(0, r); + + string oid = "foo"; + bufferlist one; + one.append("one"); + { + cout << "client1: writing 'one'" << std::endl; + r = ioctx1.write_full(oid, one); + ASSERT_EQ(0, r); + } + + // make sure 2 can read it + { + cout << "client2: reading 'one'" << std::endl; + bufferlist bl; + r = ioctx2.read(oid, bl, 3, 0); + ASSERT_EQ(3, r); + ASSERT_EQ('o', bl[0]); + ASSERT_EQ('n', bl[1]); + ASSERT_EQ('e', bl[2]); + } + + // find the primary + int primary; + r = get_primary_osd(rados1, pool_name, oid, &primary); + ASSERT_EQ(0, r); + + // fence it + cout << "client1: fencing primary" << std::endl; + fence_osd(rados1, primary); + mark_down_osd(rados1, primary); + rados1.wait_for_latest_osdmap(); + + // should still be able to read the old value on 2 + { + cout << "client2: reading 'one' again from old primary" << std::endl; + bufferlist bl; + r = ioctx2.read(oid, bl, 3, 0); + ASSERT_EQ(3, r); + ASSERT_EQ('o', bl[0]); + ASSERT_EQ('n', bl[1]); + ASSERT_EQ('e', bl[2]); + } + + // update object on 1 + bufferlist two; + two.append("two"); + { + cout << "client1: writing 'two' to new acting set" << std::endl; + r = ioctx1.write_full(oid, two); + ASSERT_EQ(0, r); + } + + // make sure we can't still read the old value on 2 + { + cout << "client2: reading again from old primary" << std::endl; + bufferlist bl; + r = ioctx2.read(oid, bl, 3, 0); + ASSERT_EQ(3, r); + ASSERT_EQ('t', bl[0]); + ASSERT_EQ('w', bl[1]); + ASSERT_EQ('o', bl[2]); + } + + rados1.shutdown(); + rados2.shutdown(); +} + +int main(int argc, char **argv) { + auto args = argv_to_vec(argc, argv); + auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); + common_init_finish(g_ceph_context); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/src/test/osd/hitset.cc b/src/test/osd/hitset.cc new file mode 100644 index 000000000..6234bdaba --- /dev/null +++ b/src/test/osd/hitset.cc @@ -0,0 +1,197 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * Copyright 2013 Inktank + */ + +#include "gtest/gtest.h" +#include "osd/HitSet.h" +#include <iostream> + +class HitSetTestStrap { +public: + HitSet *hitset; + + explicit HitSetTestStrap(HitSet *h) : hitset(h) {} + + void fill(unsigned count) { + char buf[50]; + for (unsigned i = 0; i < count; ++i) { + sprintf(buf, "hitsettest_%u", i); + hobject_t obj(object_t(buf), "", 0, i, 0, ""); + hitset->insert(obj); + } + EXPECT_EQ(count, hitset->insert_count()); + } + void verify_fill(unsigned count) { + char buf[50]; + for (unsigned i = 0; i < count; ++i) { + sprintf(buf, "hitsettest_%u", i); + hobject_t obj(object_t(buf), "", 0, i, 0, ""); + EXPECT_TRUE(hitset->contains(obj)); + } + } + +}; + +class BloomHitSetTest : public testing::Test, public HitSetTestStrap { +public: + + BloomHitSetTest() : HitSetTestStrap(new HitSet(new BloomHitSet)) {} + + void rebuild(double fp, uint64_t target, uint64_t seed) { + BloomHitSet::Params *bparams = new BloomHitSet::Params(fp, target, seed); + HitSet::Params param(bparams); + HitSet new_set(param); + *hitset = new_set; + } + + BloomHitSet *get_hitset() { return static_cast<BloomHitSet*>(hitset->impl.get()); } +}; + +TEST_F(BloomHitSetTest, Params) { + BloomHitSet::Params params(0.01, 100, 5); + EXPECT_EQ(.01, params.get_fpp()); + EXPECT_EQ((unsigned)100, params.target_size); + EXPECT_EQ((unsigned)5, params.seed); + params.set_fpp(0.1); + EXPECT_EQ(0.1, params.get_fpp()); + + bufferlist bl; + params.encode(bl); + BloomHitSet::Params p2; + auto iter = bl.cbegin(); + p2.decode(iter); + EXPECT_EQ(0.1, p2.get_fpp()); + EXPECT_EQ((unsigned)100, p2.target_size); + EXPECT_EQ((unsigned)5, p2.seed); +} + +TEST_F(BloomHitSetTest, Construct) { + ASSERT_EQ(hitset->impl->get_type(), HitSet::TYPE_BLOOM); + // success! +} + +TEST_F(BloomHitSetTest, Rebuild) { + rebuild(0.1, 100, 1); + ASSERT_EQ(hitset->impl->get_type(), HitSet::TYPE_BLOOM); +} + +TEST_F(BloomHitSetTest, InsertsMatch) { + rebuild(0.1, 100, 1); + fill(50); + /* + * the approx unique count is atrocious on bloom filters. Empirical + * evidence suggests the current test will produce a value of 62 + * regardless of hitset size + */ + EXPECT_TRUE(hitset->approx_unique_insert_count() >= 50 && + hitset->approx_unique_insert_count() <= 62); + verify_fill(50); + EXPECT_FALSE(hitset->is_full()); +} + +TEST_F(BloomHitSetTest, FillsUp) { + rebuild(0.1, 20, 1); + fill(20); + verify_fill(20); + EXPECT_TRUE(hitset->is_full()); +} + +TEST_F(BloomHitSetTest, RejectsNoMatch) { + rebuild(0.001, 100, 1); + fill(100); + verify_fill(100); + EXPECT_TRUE(hitset->is_full()); + + char buf[50]; + int matches = 0; + for (int i = 100; i < 200; ++i) { + sprintf(buf, "hitsettest_%d", i); + hobject_t obj(object_t(buf), "", 0, i, 0, ""); + if (hitset->contains(obj)) + ++matches; + } + // we set a 1 in 1000 false positive; allow one in our 100 + EXPECT_LT(matches, 2); +} + +class ExplicitHashHitSetTest : public testing::Test, public HitSetTestStrap { +public: + + ExplicitHashHitSetTest() : HitSetTestStrap(new HitSet(new ExplicitHashHitSet)) {} + + ExplicitHashHitSet *get_hitset() { return static_cast<ExplicitHashHitSet*>(hitset->impl.get()); } +}; + +TEST_F(ExplicitHashHitSetTest, Construct) { + ASSERT_EQ(hitset->impl->get_type(), HitSet::TYPE_EXPLICIT_HASH); + // success! +} + +TEST_F(ExplicitHashHitSetTest, InsertsMatch) { + fill(50); + verify_fill(50); + EXPECT_EQ((unsigned)50, hitset->approx_unique_insert_count()); + EXPECT_FALSE(hitset->is_full()); +} + +TEST_F(ExplicitHashHitSetTest, RejectsNoMatch) { + fill(100); + verify_fill(100); + EXPECT_FALSE(hitset->is_full()); + + char buf[50]; + int matches = 0; + for (int i = 100; i < 200; ++i) { + sprintf(buf, "hitsettest_%d", i); + hobject_t obj(object_t(buf), "", 0, i, 0, ""); + if (hitset->contains(obj)) { + ++matches; + } + } + EXPECT_EQ(matches, 0); +} + +class ExplicitObjectHitSetTest : public testing::Test, public HitSetTestStrap { +public: + + ExplicitObjectHitSetTest() : HitSetTestStrap(new HitSet(new ExplicitObjectHitSet)) {} + + ExplicitObjectHitSet *get_hitset() { return static_cast<ExplicitObjectHitSet*>(hitset->impl.get()); } +}; + +TEST_F(ExplicitObjectHitSetTest, Construct) { + ASSERT_EQ(hitset->impl->get_type(), HitSet::TYPE_EXPLICIT_OBJECT); + // success! +} + +TEST_F(ExplicitObjectHitSetTest, InsertsMatch) { + fill(50); + verify_fill(50); + EXPECT_EQ((unsigned)50, hitset->approx_unique_insert_count()); + EXPECT_FALSE(hitset->is_full()); +} + +TEST_F(ExplicitObjectHitSetTest, RejectsNoMatch) { + fill(100); + verify_fill(100); + EXPECT_FALSE(hitset->is_full()); + + char buf[50]; + int matches = 0; + for (int i = 100; i < 200; ++i) { + sprintf(buf, "hitsettest_%d", i); + hobject_t obj(object_t(buf), "", 0, i, 0, ""); + if (hitset->contains(obj)) { + ++matches; + } + } + EXPECT_EQ(matches, 0); +} diff --git a/src/test/osd/osdcap.cc b/src/test/osd/osdcap.cc new file mode 100644 index 000000000..4d961a426 --- /dev/null +++ b/src/test/osd/osdcap.cc @@ -0,0 +1,1398 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2012 Inktank + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include <iostream> + +#include "include/stringify.h" +#include "osd/OSDCap.h" + +#include "gtest/gtest.h" + +using namespace std; + +const char *parse_good[] = { + "allow *", + "allow r", + "allow rwx", + "allow r pool foo ", + "allow r pool=foo", + "allow wx pool taco", + "allow pool foo r", + "allow pool taco wx", + "allow wx pool taco object_prefix obj", + "allow wx pool taco object_prefix obj_with_underscores_and_no_quotes", + "allow pool taco object_prefix obj wx", + "allow pool taco object_prefix obj_with_underscores_and_no_quotes wx", + "allow rwx pool 'weird name'", + "allow rwx pool \"weird name with ''s\"", + "allow rwx pool foo, allow r pool bar", + "allow rwx pool foo ; allow r pool bar", + "allow rwx pool foo ;allow r pool bar", + "allow rwx pool foo; allow r pool bar", + "allow pool foo rwx, allow pool bar r", + "allow pool foo.froo.foo rwx, allow pool bar r", + "allow pool foo rwx ; allow pool bar r", + "allow pool foo rwx ;allow pool bar r", + "allow pool foo rwx; allow pool bar r", + "allow pool data rw, allow pool rbd rwx, allow pool images class rbd foo", + "allow class-read", + "allow class-write", + "allow class-read class-write", + "allow r class-read pool foo", + "allow rw class-read class-write pool foo", + "allow r class-read pool foo", + "allow pool bar rwx; allow pool baz r class-read", + "allow class foo", + "allow class clsname \"clsthingidon'tunderstand\"", + " allow rwx pool foo; allow r pool bar ", + " allow rwx pool foo; allow r pool bar ", + " allow pool foo rwx; allow pool bar r ", + " allow pool foo rwx; allow pool bar r ", + " allow wx pool taco", + "\tallow\nwx\tpool \n taco\t", + "allow class-read object_prefix rbd_children, allow pool libvirt-pool-test rwx", + "allow class-read object_prefix rbd-children, allow pool libvirt_pool_test rwx", + "allow pool foo namespace nfoo rwx, allow pool bar namespace=nbar r", + "allow pool foo namespace=nfoo rwx ; allow pool bar namespace=nbar r", + "allow pool foo namespace nfoo rwx ;allow pool bar namespace nbar r", + "allow pool foo namespace=nfoo rwx; allow pool bar namespace nbar object_prefix rbd r", + "allow rwx namespace=nfoo tag cephfs data=cephfs_a", + "allow rwx namespace foo tag cephfs data =cephfs_a", + "allow pool foo namespace=nfoo* rwx", + "allow pool foo namespace=\"\" rwx; allow pool bar namespace='' object_prefix rbd r", + "allow pool foo namespace \"\" rwx; allow pool bar namespace '' object_prefix rbd r", + "profile abc, profile abc pool=bar, profile abc pool=bar namespace=foo", + "allow rwx tag application key=value", + "allow rwx tag application key = value", + "allow rwx tag application key =value", + "allow rwx tag application key= value", + "allow rwx tag application key = value", + "allow all tag application all=all", + "allow rwx network 127.0.0.1/8", + "allow rwx network ::1/128", + "allow rwx network [ff::1]/128", + "profile foo network 127.0.0.1/8", + "allow rwx namespace foo tag cephfs data =cephfs_a network 127.0.0.1/8", + "allow pool foo rwx network 1.2.3.4/24", + 0 +}; + +TEST(OSDCap, ParseGood) { + for (int i=0; parse_good[i]; i++) { + string str = parse_good[i]; + OSDCap cap; + std::cout << "Testing good input: '" << str << "'" << std::endl; + ASSERT_TRUE(cap.parse(str, &cout)); + } +} + +const char *parse_bad[] = { + "allow r poolfoo", + "allow r w", + "ALLOW r", + "allow rwx,", + "allow rwx x", + "allow r pool foo r", + "allow wwx pool taco", + "allow wwx pool taco^funny&chars", + "allow rwx pool 'weird name''", + "allow rwx object_prefix \"beforepool\" pool weird", + "allow rwx auid 123 pool asdf", + "allow xrwx pool foo,, allow r pool bar", + ";allow rwx pool foo rwx ; allow r pool bar", + "allow rwx pool foo ;allow r pool bar gibberish", + "allow rwx auid 123 pool asdf namespace=foo", + "allow rwx auid 123 namespace", + "allow rwx namespace", + "allow namespace", + "allow namespace=foo", + "allow namespace=f*oo", + "allow rwx auid 123 namespace asdf", + "allow wwx pool ''", + "allow rwx tag application key value", + "allow rwx auid 123", + "allow auid 123 rwx", + "allow r pool foo object_prefix blah ; allow w auid 5", + 0 +}; + +TEST(OSDCap, ParseBad) { + for (int i=0; parse_bad[i]; i++) { + string str = parse_bad[i]; + OSDCap cap; + std::cout << "Testing bad input: '" << str << "'" << std::endl; + ASSERT_FALSE(cap.parse(str, &cout)); + } +} + +TEST(OSDCap, AllowAll) { + OSDCap cap; + entity_addr_t addr; + ASSERT_FALSE(cap.allow_all()); + + ASSERT_TRUE(cap.parse("allow r", NULL)); + ASSERT_FALSE(cap.allow_all()); + cap.grants.clear(); + + ASSERT_TRUE(cap.parse("allow w", NULL)); + ASSERT_FALSE(cap.allow_all()); + cap.grants.clear(); + + ASSERT_TRUE(cap.parse("allow x", NULL)); + ASSERT_FALSE(cap.allow_all()); + cap.grants.clear(); + + ASSERT_TRUE(cap.parse("allow rwx", NULL)); + ASSERT_FALSE(cap.allow_all()); + cap.grants.clear(); + + ASSERT_TRUE(cap.parse("allow rw", NULL)); + ASSERT_FALSE(cap.allow_all()); + cap.grants.clear(); + + ASSERT_TRUE(cap.parse("allow rx", NULL)); + ASSERT_FALSE(cap.allow_all()); + cap.grants.clear(); + + ASSERT_TRUE(cap.parse("allow wx", NULL)); + ASSERT_FALSE(cap.allow_all()); + cap.grants.clear(); + + ASSERT_TRUE(cap.parse("allow *", NULL)); + ASSERT_TRUE(cap.allow_all()); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "asdf", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "anamespace", {}, "asdf", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "asdf", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "anamespace", {}, "asdf", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "anamespace", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "anamespace", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, true}}, addr)); + // 'allow *' overrides allow list + ASSERT_TRUE(cap.is_capable("foo", "", {}, "asdf", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "anamespace", {}, "asdf", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "asdf", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "anamespace", {}, "asdf", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "anamespace", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "anamespace", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, false}}, addr)); +} + +TEST(OSDCap, AllowPool) { + OSDCap cap; + entity_addr_t addr; + bool r = cap.parse("allow rwx pool foo", NULL); + ASSERT_TRUE(r); + + ASSERT_TRUE(cap.is_capable("foo", "", {}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr)); + // true->false for classes not on allow list + ASSERT_FALSE(cap.is_capable("foo", "", {}, "", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, false}}, addr)); + + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr)); +} + +TEST(OSDCap, AllowPools) { + entity_addr_t addr; + OSDCap cap; + bool r = cap.parse("allow rwx pool foo, allow r pool bar", NULL); + ASSERT_TRUE(r); + + ASSERT_TRUE(cap.is_capable("foo", "", {}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr)); + // true-false for classes not on allow list + ASSERT_FALSE(cap.is_capable("foo", "", {}, "", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "ns", {}, "", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "ns", {{"application", {{"key", "value"}}}}, "", true, false, {}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr)); + + ASSERT_FALSE(cap.is_capable("baz", "", {}, "", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "ns", {}, "", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {{"application", {{"key", "value"}}}}, "", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "ns", {{"application", {{"key", "value"}}}}, "", true, false, {}, addr)); +} + +TEST(OSDCap, AllowPools2) { + entity_addr_t addr; + OSDCap cap; + bool r = cap.parse("allow r, allow rwx pool foo", NULL); + ASSERT_TRUE(r); + + ASSERT_TRUE(cap.is_capable("foo", "", {}, "", true, true, {{"cls", "", true, true, true}}, addr)); + // true-false for classes not on allow list + ASSERT_FALSE(cap.is_capable("foo", "", {}, "", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "", true, true, {{"cls", "", true, true, true}}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "", true, false, {}, addr)); +} + +TEST(OSDCap, ObjectPrefix) { + entity_addr_t addr; + OSDCap cap; + bool r = cap.parse("allow rwx object_prefix foo", NULL); + ASSERT_TRUE(r); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "food", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo_bar", true, true, {{"cls", "", true, true, true}}, addr)); + // true-false for classes not on allow list + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "food", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo_bar", true, true, {{"cls", "", true, true, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "_foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, " foo ", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "fo", true, true, {{"cls", "", true, true, true}}, addr)); +} + +TEST(OSDCap, ObjectPoolAndPrefix) { + entity_addr_t addr; + OSDCap cap; + bool r = cap.parse("allow rwx pool bar object_prefix foo", NULL); + ASSERT_TRUE(r); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "food", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo_bar", true, true, {{"cls", "", true, true, true}}, addr)); + // true-false for classes not on allow list + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "food", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo_bar", true, true, {{"cls", "", true, true, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "food", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "fo", true, true, {{"cls", "", true, true, true}}, addr)); +} + +TEST(OSDCap, Namespace) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rw namespace=nfoo")); + + ASSERT_TRUE(cap.is_capable("bar", "nfoo", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "nfoobar", {}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, NamespaceGlob) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rw namespace=nfoo*")); + + ASSERT_TRUE(cap.is_capable("bar", "nfoo", {}, "foo", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "nfoobar", {}, "foo", true, true, {}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "nfo", {}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, BasicR) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow r", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, BasicW) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow w", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, BasicX) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow x", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + // true->false when class not on allow list + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr)); + + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, BasicRW) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rw", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); +} + +TEST(OSDCap, BasicRX) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rx", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, true, true}}, addr)); + // true->false for class not on allow list + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, true, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, BasicWX) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow wx", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + // true->false for class not on allow list + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, BasicRWX) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rwx", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + // true->false for class not on allow list + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); +} + +TEST(OSDCap, BasicRWClassRClassW) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rw class-read class-write", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); +} + +TEST(OSDCap, ClassR) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow class-read", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); +} + +TEST(OSDCap, ClassW) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow class-write", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, true}}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); +} + +TEST(OSDCap, ClassRW) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow class-read class-write", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); +} + +TEST(OSDCap, BasicRClassR) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow r class-read", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, false, {}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, false, {{"cls", "", true, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "any", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "any", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "any", {}, "foo", true, false, {}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("bar", "any", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "any", {}, "foo", true, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "any", {{"application", {{"key", "value"}}}}, "foo", true, false, {{"cls", "", true, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "any", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "any", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "any", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "any", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "any", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, PoolClassR) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow pool bar r class-read, allow pool foo rwx", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, false, {}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, false, {{"cls", "", true, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "ns", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "ns", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "ns", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "ns", {{"application", {{"key", "value"}}}}, "foo", true, false, {}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", true, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {{"application", {{"key", "value"}}}}, "foo", true, false, {{"cls", "", true, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", false, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, false, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", false, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, false, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", false, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); +} + +TEST(OSDCap, PoolClassRNS) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow pool bar namespace='' r class-read, allow pool foo namespace=ns rwx", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, false, {}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, false, {{"cls", "", true, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {{"application", {{"key", "value"}}}}, "foo", true, false, {}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "other", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "other", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "other", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "other", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "other", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", false, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, false, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", false, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); +} + +TEST(OSDCap, NSClassR) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow namespace '' rw class-read class-write, allow namespace test r", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", false, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, false, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "test", {}, "foo", true, false, {}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "test", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "test", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "test", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "test", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "test", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "test", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "test", {}, "foo", true, false, {}, addr)); + + ASSERT_FALSE(cap.is_capable("foo", "test", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "test", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "test", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "test", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "test", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "test", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_FALSE(cap.is_capable("foo", "bad", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "bad", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "bad", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "bad", {}, "foo", false, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "bad", {{"application", {{"key", "value"}}}}, "foo", false, false, {{"cls", "", false, true, true}}, addr)); +} + +TEST(OSDCap, PoolTagBasic) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rwx tag application key=value", NULL)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); + + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key2", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"app2", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key2", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", false, false, true}}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", false, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", false, true, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", false, false, true}}, addr)); +} + +TEST(OSDCap, PoolTagWildK) +{ + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rwx tag application *=value", NULL)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key2", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"app2", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key2", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, PoolTagWildV) +{ + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rwx tag application key=*", NULL)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); + + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key2", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"app2", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key2", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, PoolTagWildKV) +{ + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rwx tag application *=*", NULL)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {}}}, "foo", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key2", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"app2", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key2", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, NSPool) +{ + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rwx namespace ns tag application key=value", NULL)); + + ASSERT_TRUE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns2", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key", "value2"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key2", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, NSPoolGlob) +{ + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rwx namespace ns* tag application key=value", NULL)); + + ASSERT_TRUE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "ns2", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key", "value2"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key2", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, OutputParsed) +{ + entity_addr_t addr; + struct CapsTest { + const char *input; + const char *output; + }; + CapsTest test_values[] = { + {"allow *", + "osdcap[grant(*)]"}, + {"allow r", + "osdcap[grant(r)]"}, + {"allow rx", + "osdcap[grant(rx)]"}, + {"allow rwx", + "osdcap[grant(rwx)]"}, + {"allow rw class-read class-write", + "osdcap[grant(rwx)]"}, + {"allow rw class-read", + "osdcap[grant(rw class-read)]"}, + {"allow rw class-write", + "osdcap[grant(rw class-write)]"}, + {"allow rwx pool images", + "osdcap[grant(pool images rwx)]"}, + {"allow r pool images", + "osdcap[grant(pool images r)]"}, + {"allow pool images rwx", + "osdcap[grant(pool images rwx)]"}, + {"allow pool images r", + "osdcap[grant(pool images r)]"}, + {"allow pool images w", + "osdcap[grant(pool images w)]"}, + {"allow pool images x", + "osdcap[grant(pool images x)]"}, + {"allow r pool images namespace ''", + "osdcap[grant(pool images namespace \"\" r)]"}, + {"allow r pool images namespace foo", + "osdcap[grant(pool images namespace foo r)]"}, + {"allow r pool images namespace \"\"", + "osdcap[grant(pool images namespace \"\" r)]"}, + {"allow r namespace foo", + "osdcap[grant(namespace foo r)]"}, + {"allow pool images r; allow pool rbd rwx", + "osdcap[grant(pool images r),grant(pool rbd rwx)]"}, + {"allow pool images r, allow pool rbd rwx", + "osdcap[grant(pool images r),grant(pool rbd rwx)]"}, + {"allow class-read object_prefix rbd_children, allow pool libvirt-pool-test rwx", + "osdcap[grant(object_prefix rbd_children class-read),grant(pool libvirt-pool-test rwx)]"}, + {"allow rwx tag application key=value", + "osdcap[grant(app application key key val value rwx)]"}, + {"allow rwx namespace ns* tag application key=value", + "osdcap[grant(namespace ns* app application key key val value rwx)]"}, + {"allow all", + "osdcap[grant(*)]"}, + {"allow rwx tag application all=all", + "osdcap[grant(app application key * val * rwx)]"}, + {"allow rwx network 1.2.3.4/24", + "osdcap[grant(rwx network 1.2.3.4/24)]"}, + }; + + size_t num_tests = sizeof(test_values) / sizeof(*test_values); + for (size_t i = 0; i < num_tests; ++i) { + OSDCap cap; + std::cout << "Testing input '" << test_values[i].input << "'" << std::endl; + ASSERT_TRUE(cap.parse(test_values[i].input)); + ASSERT_EQ(test_values[i].output, stringify(cap)); + } +} + +TEST(OSDCap, AllowClass) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow class foo", NULL)); + + // can call any method on class foo regardless of allow list status + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}}, addr)); + + // does not permit invoking class bar + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, false}}, addr)); +} + +TEST(OSDCap, AllowClassMethod) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow class foo xyz", NULL)); + + // can call the xyz method on class foo regardless of allow list status + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "xyz", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "xyz", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "xyz", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "xyz", true, false, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "xyz", false, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "xyz", true, true, false}}, addr)); + + // does not permit invoking class bar + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "xyz", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "xyz", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "xyz", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "xyz", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "xyz", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "xyz", true, true, false}}, addr)); +} + +TEST(OSDCap, AllowClass2) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow class foo, allow class bar", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, false}}, addr)); +} + +TEST(OSDCap, AllowClassRWX) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rwx, allow class foo", NULL)); + + // can call any method on class foo regardless of allow list status + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}}, addr)); + + // does not permit invoking class bar + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, false}}, addr)); + + // allows class bar if it is allow listed + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, true}}, addr)); +} + +TEST(OSDCap, AllowClassMulti) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow class foo", NULL)); + + // can call any method on foo, but not bar, so the entire op is rejected + // bar with allow list is rejected because it still needs rwx/class-read,write + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, false, false}}, addr)); + + // these are OK because 'bar' is on the allow list BUT the calls don't read or write + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, false, true}}, addr)); + + // can call any method on foo or bar regardless of allow list status + OSDCap cap2; + ASSERT_TRUE(cap2.parse("allow class foo, allow class bar", NULL)); + + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, false, false}}, addr)); +} + +TEST(OSDCap, AllowClassMultiRWX) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rwx, allow class foo", NULL)); + + // can call anything on foo, but only allow listed methods on bar + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, false, true}}, addr)); + + // fails because bar not allow listed + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, false, true}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, false, true}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, false, true}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, false, true}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, false, true}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, false, true}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, false, true}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, false, false}}, addr)); +} + +TEST(OSDCap, AllowProfile) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("profile read-only, profile read-write pool abc", NULL)); + ASSERT_FALSE(cap.allow_all()); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "asdf", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "asdf", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("abc", "", {}, "asdf", false, true, {}, addr)); + + // RBD + cap.grants.clear(); + ASSERT_TRUE(cap.parse("profile rbd pool abc", NULL)); + ASSERT_FALSE(cap.allow_all()); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "asdf", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "rbd_children", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "rbd_children", false, false, + {{"rbd", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("abc", "", {}, "asdf", true, true, + {{"rbd", "", true, true, true}}, addr)); + + cap.grants.clear(); + ASSERT_TRUE(cap.parse("profile rbd-read-only pool abc", NULL)); + ASSERT_FALSE(cap.allow_all()); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "rbd_children", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("abc", "", {}, "asdf", true, false, + {{"rbd", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("abc", "", {}, "asdf", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("abc", "", {}, "rbd_header.ABC", false, false, + {{"rbd", "child_attach", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("abc", "", {}, "rbd_header.ABC", false, false, + {{"rbd", "child_detach", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("abc", "", {}, "rbd_header.ABC", false, false, + {{"rbd", "other function", true, true, true}}, addr)); + + cap.grants.clear(); + ASSERT_TRUE(cap.parse("profile rbd pool pool1 namespace ns1", nullptr)); + ASSERT_TRUE(cap.is_capable("pool1", "", {}, "rbd_info", false, false, + {{"rbd", "metadata_list", true, false, true}}, + addr)); + ASSERT_TRUE(cap.is_capable("pool1", "ns1", {}, "rbd_info", false, false, + {{"rbd", "metadata_list", true, false, true}}, + addr)); + ASSERT_FALSE(cap.is_capable("pool1", "ns2", {}, "rbd_info", false, false, + {{"rbd", "metadata_list", true, false, true}}, + addr)); + ASSERT_FALSE(cap.is_capable("pool2", "", {}, "rbd_info", false, false, + {{"rbd", "metadata_list", true, false, true}}, + addr)); + ASSERT_FALSE(cap.is_capable("pool1", "", {}, "asdf", false, false, + {{"rbd", "metadata_list", true, false, true}}, + addr)); + ASSERT_FALSE(cap.is_capable("pool1", "", {}, "rbd_info", false, false, + {{"rbd", "other_method", true, false, true}}, + addr)); + + cap.grants.clear(); + ASSERT_TRUE(cap.parse("profile rbd-read-only pool pool1 namespace ns1", + nullptr)); + ASSERT_TRUE(cap.is_capable("pool1", "", {}, "rbd_info", false, false, + {{"rbd", "metadata_list", true, false, true}}, + addr)); + ASSERT_TRUE(cap.is_capable("pool1", "ns1", {}, "rbd_info", false, false, + {{"rbd", "metadata_list", true, false, true}}, + addr)); + ASSERT_FALSE(cap.is_capable("pool1", "ns2", {}, "rbd_info", false, false, + {{"rbd", "metadata_list", true, false, true}}, + addr)); + ASSERT_FALSE(cap.is_capable("pool2", "", {}, "rbd_info", false, false, + {{"rbd", "metadata_list", true, false, true}}, + addr)); + ASSERT_FALSE(cap.is_capable("pool1", "", {}, "asdf", false, false, + {{"rbd", "metadata_list", true, false, true}}, + addr)); + ASSERT_FALSE(cap.is_capable("pool1", "", {}, "rbd_info", false, false, + {{"rbd", "other_method", true, false, true}}, + addr)); +} + +TEST(OSDCap, network) { + entity_addr_t a, b, c; + a.parse("10.1.2.3"); + b.parse("192.168.2.3"); + c.parse("192.167.2.3"); + + OSDCap cap; + ASSERT_TRUE(cap.parse("allow * network 192.168.0.0/16, allow * network 10.0.0.0/8", NULL)); + + ASSERT_TRUE(cap.is_capable("foo", "", {}, "asdf", true, true, {{"cls", "", true, true, true}}, a)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "asdf", true, true, {{"cls", "", true, true, true}}, b)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "asdf", true, true, {{"cls", "", true, true, true}}, c)); +} diff --git a/src/test/osd/safe-to-destroy.sh b/src/test/osd/safe-to-destroy.sh new file mode 100755 index 000000000..08afc8e8d --- /dev/null +++ b/src/test/osd/safe-to-destroy.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +set -e + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:$(get_unused_port)" + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + set -e + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_safe_to_destroy() { + local dir=$1 + + run_mon $dir a + run_mgr $dir x + run_osd $dir 0 + run_osd $dir 1 + run_osd $dir 2 + run_osd $dir 3 + + flush_pg_stats + + ceph osd safe-to-destroy 0 + ceph osd safe-to-destroy 1 + ceph osd safe-to-destroy 2 + ceph osd safe-to-destroy 3 + + ceph osd pool create foo 128 + sleep 2 + flush_pg_stats + wait_for_clean + + expect_failure $dir 'pgs currently' ceph osd safe-to-destroy 0 + expect_failure $dir 'pgs currently' ceph osd safe-to-destroy 1 + expect_failure $dir 'pgs currently' ceph osd safe-to-destroy 2 + expect_failure $dir 'pgs currently' ceph osd safe-to-destroy 3 + + ceph osd out 0 + sleep 2 + flush_pg_stats + wait_for_clean + + ceph osd safe-to-destroy 0 + + # even osds without osd_stat are ok if all pgs are active+clean + id=`ceph osd create` + ceph osd safe-to-destroy $id +} + +function TEST_ok_to_stop() { + local dir=$1 + + run_mon $dir a + run_mgr $dir x + run_osd $dir 0 + run_osd $dir 1 + run_osd $dir 2 + run_osd $dir 3 + + ceph osd pool create foo 128 + ceph osd pool set foo size 3 + ceph osd pool set foo min_size 2 + sleep 1 + flush_pg_stats + wait_for_clean + + ceph osd ok-to-stop 0 + ceph osd ok-to-stop 1 + ceph osd ok-to-stop 2 + ceph osd ok-to-stop 3 + expect_failure $dir bad_become_inactive ceph osd ok-to-stop 0 1 + + ceph osd pool set foo min_size 1 + sleep 1 + flush_pg_stats + wait_for_clean + ceph osd ok-to-stop 0 1 + ceph osd ok-to-stop 1 2 + ceph osd ok-to-stop 2 3 + ceph osd ok-to-stop 3 4 + expect_failure $dir bad_become_inactive ceph osd ok-to-stop 0 1 2 + expect_failure $dir bad_become_inactive ceph osd ok-to-stop 0 1 2 3 +} + +main safe-to-destroy "$@" diff --git a/src/test/osd/scrubber_generators.cc b/src/test/osd/scrubber_generators.cc new file mode 100644 index 000000000..0f2f371e7 --- /dev/null +++ b/src/test/osd/scrubber_generators.cc @@ -0,0 +1,168 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/osd/scrubber_generators.h" + +#include <fmt/ranges.h> + +using namespace ScrubGenerator; + +// ref: PGLogTestRebuildMissing() +bufferptr create_object_info(const ScrubGenerator::RealObj& objver) +{ + object_info_t oi{}; + oi.soid = objver.ghobj.hobj; + oi.version = eversion_t(objver.ghobj.generation, 0); + oi.size = objver.data.size; + + bufferlist bl; + oi.encode(bl, + 0 /*get_osdmap()->get_features(CEPH_ENTITY_TYPE_OSD, nullptr)*/); + bufferptr bp(bl.c_str(), bl.length()); + return bp; +} + +std::pair<bufferptr, std::vector<snapid_t>> create_object_snapset( + const ScrubGenerator::RealObj& robj, + const SnapsetMockData* snapset_mock_data) +{ + if (!snapset_mock_data) { + return {bufferptr(), {}}; + } + /// \todo fill in missing version/osd details from the robj + auto sns = snapset_mock_data->make_snapset(); + bufferlist bl; + encode(sns, bl); + bufferptr bp = bufferptr(bl.c_str(), bl.length()); + + // extract the set of object snaps + return {bp, sns.snaps}; +} + +RealObjsConfList ScrubGenerator::make_real_objs_conf( + int64_t pool_id, + const RealObjsConf& blueprint, + std::vector<int32_t> active_osds) +{ + RealObjsConfList all_osds; + + for (auto osd : active_osds) { + RealObjsConfRef this_osd_fakes = std::make_unique<RealObjsConf>(blueprint); + // now - fix & corrupt every "object" in the blueprint + for (RealObj& robj : this_osd_fakes->objs) { + + robj.ghobj.hobj.pool = pool_id; + } + + all_osds[osd] = std::move(this_osd_fakes); + } + return all_osds; // reconsider (maybe add a move ctor?) +} + +///\todo dispose of the created buffer pointers + +ScrubGenerator::SmapEntry ScrubGenerator::make_smobject( + const ScrubGenerator::RealObj& blueprint, + int osd_num) +{ + ScrubGenerator::SmapEntry ret; + + ret.ghobj = blueprint.ghobj; + ret.smobj.attrs[OI_ATTR] = create_object_info(blueprint); + if (blueprint.snapset_mock_data) { + auto [bp, snaps] = + create_object_snapset(blueprint, blueprint.snapset_mock_data); + ret.smobj.attrs[SS_ATTR] = bp; + std::cout << fmt::format("{}: ({}) osd:{} snaps:{}", + __func__, + ret.ghobj.hobj, + osd_num, + snaps) + << std::endl; + } + + for (const auto& [at_k, at_v] : blueprint.data.attrs) { + ret.smobj.attrs[at_k] = ceph::buffer::copy(at_v.c_str(), at_v.size()); + { + // verifying (to be removed after dev phase) + auto bk = ret.smobj.attrs[at_k].begin_deep().get_ptr( + ret.smobj.attrs[at_k].length()); + std::string bkstr{bk.raw_c_str(), bk.raw_length()}; + std::cout << fmt::format("{}: verification: {}", __func__, bkstr) + << std::endl; + } + } + ret.smobj.size = blueprint.data.size; + ret.smobj.digest = blueprint.data.hash; + /// \todo handle the 'present' etc' + + ret.smobj.object_omap_keys = blueprint.data.omap.size(); + ret.smobj.object_omap_bytes = blueprint.data.omap_bytes; + return ret; +} + +all_clones_snaps_t ScrubGenerator::all_clones( + const ScrubGenerator::RealObj& head_obj) +{ + std::cout << fmt::format("{}: head_obj.ghobj.hobj:{}", + __func__, + head_obj.ghobj.hobj) + << std::endl; + + std::map<hobject_t, std::vector<snapid_t>> ret; + + for (const auto& clone : head_obj.snapset_mock_data->clones) { + auto clone_set_it = head_obj.snapset_mock_data->clone_snaps.find(clone); + if (clone_set_it == head_obj.snapset_mock_data->clone_snaps.end()) { + std::cout << "note: no clone_snaps for " << clone << std::endl; + continue; + } + auto clone_set = clone_set_it->second; + hobject_t clone_hobj{head_obj.ghobj.hobj}; + clone_hobj.snap = clone; + + ret[clone_hobj] = clone_set_it->second; + std::cout << fmt::format("{}: clone:{} clone_set:{}", + __func__, + clone_hobj, + clone_set) + << std::endl; + } + + return ret; +} + +void ScrubGenerator::add_object(ScrubMap& map, + const ScrubGenerator::RealObj& real_obj, + int osd_num) +{ + // do we have data corruption recipe for this OSD? + /// \todo c++20: use contains() + CorruptFunc relevant_fix = crpt_do_nothing; + + auto p = real_obj.corrupt_funcs->find(osd_num); + if (p != real_obj.corrupt_funcs->end()) { + // yes, we have a corruption recepie for this OSD + // \todo c++20: use at() + relevant_fix = p->second; + } + + // create a possibly-corrupted copy of the "real object" + auto modified_obj = (relevant_fix)(real_obj, osd_num); + + std::cout << fmt::format("{}: modified: osd:{} ho:{} key:{}", + __func__, + osd_num, + modified_obj.ghobj.hobj, + modified_obj.ghobj.hobj.get_key()) + << std::endl; + + auto entry = make_smobject(modified_obj, osd_num); + std::cout << fmt::format("{}: osd:{} smap entry: {} {}", + __func__, + osd_num, + entry.smobj.size, + entry.smobj.attrs.size()) + << std::endl; + map.objects[entry.ghobj.hobj] = entry.smobj; +} diff --git a/src/test/osd/scrubber_generators.h b/src/test/osd/scrubber_generators.h new file mode 100644 index 000000000..d0cbb22c4 --- /dev/null +++ b/src/test/osd/scrubber_generators.h @@ -0,0 +1,266 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#pragma once + +/// \file generating scrub-related maps & objects for unit tests + +#include <functional> +#include <map> +#include <sstream> +#include <string> +#include <variant> +#include <vector> + +#include "include/buffer.h" +#include "include/buffer_raw.h" +#include "include/object_fmt.h" +#include "osd/osd_types_fmt.h" +#include "osd/scrubber/pg_scrubber.h" + +namespace ScrubGenerator { + +/// \todo enhance the MockLog to capture the log messages +class MockLog : public LoggerSinkSet { + public: + void debug(std::stringstream& s) final + { + std::cout << "\n<<debug>> " << s.str() << std::endl; + } + void info(std::stringstream& s) final + { + std::cout << "\n<<info>> " << s.str() << std::endl; + } + void sec(std::stringstream& s) final + { + std::cout << "\n<<sec>> " << s.str() << std::endl; + } + void warn(std::stringstream& s) final + { + std::cout << "\n<<warn>> " << s.str() << std::endl; + } + void error(std::stringstream& s) final + { + err_count++; + std::cout << "\n<<error>> " << s.str() << std::endl; + } + OstreamTemp info() final { return OstreamTemp(CLOG_INFO, this); } + OstreamTemp warn() final { return OstreamTemp(CLOG_WARN, this); } + OstreamTemp error() final { return OstreamTemp(CLOG_ERROR, this); } + OstreamTemp sec() final { return OstreamTemp(CLOG_ERROR, this); } + OstreamTemp debug() final { return OstreamTemp(CLOG_DEBUG, this); } + + void do_log(clog_type prio, std::stringstream& ss) final + { + switch (prio) { + case CLOG_DEBUG: + debug(ss); + break; + case CLOG_INFO: + info(ss); + break; + case CLOG_SEC: + sec(ss); + break; + case CLOG_WARN: + warn(ss); + break; + case CLOG_ERROR: + default: + error(ss); + break; + } + } + + void do_log(clog_type prio, const std::string& ss) final + { + switch (prio) { + case CLOG_DEBUG: + debug() << ss; + break; + case CLOG_INFO: + info() << ss; + break; + case CLOG_SEC: + sec() << ss; + break; + case CLOG_WARN: + warn() << ss; + break; + case CLOG_ERROR: + default: + error() << ss; + break; + } + } + + virtual ~MockLog() {} + + int err_count{0}; + int expected_err_count{0}; + void set_expected_err_count(int c) { expected_err_count = c; } +}; + +// ///////////////////////////////////////////////////////////////////////// // +// ///////////////////////////////////////////////////////////////////////// // + +struct pool_conf_t { + int pg_num{3}; + int pgp_num{3}; + int size{3}; + int min_size{3}; + std::string name{"rep_pool"}; +}; + +using attr_t = std::map<std::string, std::string>; + +using all_clones_snaps_t = std::map<hobject_t, std::vector<snapid_t>>; + +struct RealObj; + +// a function to manipulate (i.e. corrupt) an object in a specific OSD +using CorruptFunc = + std::function<RealObj(const RealObj& s, [[maybe_unused]] int osd_num)>; +using CorruptFuncList = std::map<int, CorruptFunc>; // per OSD + +struct SnapsetMockData { + + using CookedCloneSnaps = + std::tuple<std::map<snapid_t, uint64_t>, + std::map<snapid_t, std::vector<snapid_t>>, + std::map<snapid_t, interval_set<uint64_t>>>; + + // an auxiliary function to cook the data for the SnapsetMockData + using clone_snaps_cooker = CookedCloneSnaps (*)(); + + snapid_t seq; + std::vector<snapid_t> snaps; // descending + std::vector<snapid_t> clones; // ascending + + std::map<snapid_t, interval_set<uint64_t>> clone_overlap; // overlap w/ next + // newest + std::map<snapid_t, uint64_t> clone_size; + std::map<snapid_t, std::vector<snapid_t>> clone_snaps; // descending + + + SnapsetMockData(snapid_t seq, + std::vector<snapid_t> snaps, + std::vector<snapid_t> clones, + std::map<snapid_t, interval_set<uint64_t>> clone_overlap, + std::map<snapid_t, uint64_t> clone_size, + std::map<snapid_t, std::vector<snapid_t>> clone_snaps) + : seq(seq) + , snaps(snaps) + , clones(clones) + , clone_overlap(clone_overlap) + , clone_size(clone_size) + , clone_snaps(clone_snaps) + {} + + SnapsetMockData(snapid_t seq, + std::vector<snapid_t> snaps, + std::vector<snapid_t> clones, + clone_snaps_cooker func) + : seq{seq} + , snaps{snaps} + , clones(clones) + { + auto [clone_size_, clone_snaps_, clone_overlap_] = func(); + clone_size = clone_size_; + clone_snaps = clone_snaps_; + clone_overlap = clone_overlap_; + } + + SnapSet make_snapset() const + { + SnapSet ss; + ss.seq = seq; + ss.snaps = snaps; + ss.clones = clones; + ss.clone_overlap = clone_overlap; + ss.clone_size = clone_size; + ss.clone_snaps = clone_snaps; + return ss; + } +}; + +// an object in our "DB" - with its versioned snaps, "data" (size and hash), +// and "omap" (size and hash) + +struct RealData { + // not needed at this level of "data falsification": std::byte data; + uint64_t size; + uint32_t hash; + uint32_t omap_digest; + uint32_t omap_bytes; + attr_t omap; + attr_t attrs; +}; + +struct RealObj { + // the ghobject - oid, version, snap, hash, pool + ghobject_t ghobj; + RealData data; + const CorruptFuncList* corrupt_funcs; + const SnapsetMockData* snapset_mock_data; +}; + +static inline RealObj crpt_do_nothing(const RealObj& s, int osdn) +{ + return s; +} + +struct SmapEntry { + ghobject_t ghobj; + ScrubMap::object smobj; +}; + + +ScrubGenerator::SmapEntry make_smobject( + const ScrubGenerator::RealObj& blueprint, // the whole set of versions + int osd_num); + + +/** + * returns the object's snap-set + */ +void add_object(ScrubMap& map, const RealObj& obj_versions, int osd_num); + +struct RealObjsConf { + std::vector<RealObj> objs; +}; + +using RealObjsConfRef = std::unique_ptr<RealObjsConf>; + +// RealObjsConf will be "developed" into the following of per-osd sets, +// now with the correct pool ID, and with the corrupting functions +// activated on the data +using RealObjsConfList = std::map<int, RealObjsConfRef>; + +RealObjsConfList make_real_objs_conf(int64_t pool_id, + const RealObjsConf& blueprint, + std::vector<int32_t> active_osds); + +/** + * create the snap-ids set for all clones appearing in the head + * object's snapset (those will be injected into the scrubber's mock, + * to be used as the 'snap_mapper') + */ +all_clones_snaps_t all_clones(const RealObj& head_obj); +} // namespace ScrubGenerator + +template <> +struct fmt::formatter<ScrubGenerator::RealObj> { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + + template <typename FormatContext> + auto format(const ScrubGenerator::RealObj& rlo, FormatContext& ctx) + { + using namespace ScrubGenerator; + return fmt::format_to(ctx.out(), + "RealObj(gh:{}, dt:{}, snaps:{})", + rlo.ghobj, + rlo.data.size, + (rlo.snapset_mock_data ? rlo.snapset_mock_data->snaps + : std::vector<snapid_t>{})); + } +}; diff --git a/src/test/osd/scrubber_test_datasets.cc b/src/test/osd/scrubber_test_datasets.cc new file mode 100644 index 000000000..478fd25fe --- /dev/null +++ b/src/test/osd/scrubber_test_datasets.cc @@ -0,0 +1,120 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +/// \file data-sets used by the scrubber unit tests + +#include "./scrubber_test_datasets.h" + + +using namespace ScrubGenerator; +using namespace std::string_literals; + +namespace ScrubDatasets { + +static RealObj corrupt_object_size(const RealObj& s, [[maybe_unused]] int osdn) +{ + RealObj ret = s; + ret.data.size = s.data.size + 1; + return ret; +} + +static RealObj corrupt_nothing(const RealObj& s, int osdn) +{ + return s; +} + + +static CorruptFuncList crpt_funcs_set0 = {{0, &corrupt_nothing}}; + +CorruptFuncList crpt_funcs_set1 = {{0, &corrupt_object_size}, + {1, &corrupt_nothing}}; + + +// object with head & two snaps + +static hobject_t hobj_ms1{object_t{"hobj_ms1"}, + "keykey", // key + CEPH_NOSNAP, // snap_id + 0, // hash + 0, // pool + ""s}; // nspace + +SnapsetMockData::CookedCloneSnaps ms1_fn() +{ + std::map<snapid_t, uint64_t> clnsz; + clnsz[0x20] = 222; + clnsz[0x30] = 333; + + std::map<snapid_t, std::vector<snapid_t>> clnsn; + clnsn[0x20] = {0x20}; + clnsn[0x30] = {0x30}; + + std::map<snapid_t, interval_set<uint64_t>> overlaps; + overlaps[0x20] = {}; + overlaps[0x30] = {}; + return {clnsz, clnsn, overlaps}; +} + +static SnapsetMockData hobj_ms1_snapset{/* seq */ 0x40, + /* snaps */ {0x30, 0x20}, + /* clones */ {0x20, 0x30}, + ms1_fn}; + +hobject_t hobj_ms1_snp30{object_t{"hobj_ms1"}, + "keykey", // key + 0x30, // snap_id + 0, // hash + 0, // pool + ""s}; // nspace + +static hobject_t hobj_ms1_snp20{object_t{"hobj_ms1"}, + "keykey", // key + 0x20, // snap_id + 0, // hash + 0, // pool + ""s}; // nspace + + +ScrubGenerator::RealObjsConf minimal_snaps_configuration{ + /* RealObjsConf::objs */ { + + /* Clone 30 */ { + ghobject_t{hobj_ms1_snp30, 0, shard_id_t{0}}, + RealData{ + 333, + 0x17, + 17, + 21, + attr_t{/*{"_om1k", "om1v"}, {"om1k", "om1v"},*/ {"om3k", "om3v"}}, + attr_t{{"_at1k", "_at1v"}, {"_at2k", "at2v"}, {"at3k", "at3v"}}}, + &crpt_funcs_set0, + nullptr}, + + /* Clone 20 */ + {ghobject_t{hobj_ms1_snp20, 0, shard_id_t{0}}, + RealData{222, + 0x17, + 17, + 21, + attr_t{/*{"_om1k", "om1v"}, {"om1k", "om1v"},*/ {"om3k", "om3v"}}, + attr_t{{"_at1k", "_at1v"}, {"_at2k", "at2v"}, {"at3k", "at3v"}}}, + &crpt_funcs_set0, + nullptr}, + + /* Head */ + {ghobject_t{hobj_ms1, 0, shard_id_t{0}}, + RealData{100, + 0x17, + 17, + 21, + attr_t{{"_om1k", "om1v"}, {"om1k", "om1v"}, {"om3k", "om3v"}}, + attr_t{{"_at1k", "_at1v"}, {"_at2k", "at2v"}, {"at3k", "at3v"}} + + + }, + &crpt_funcs_set0, + &hobj_ms1_snapset}} + +}; + +} // namespace ScrubDatasets diff --git a/src/test/osd/scrubber_test_datasets.h b/src/test/osd/scrubber_test_datasets.h new file mode 100644 index 000000000..181528568 --- /dev/null +++ b/src/test/osd/scrubber_test_datasets.h @@ -0,0 +1,21 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#pragma once + +/// \file data-sets used by the scrubber unit tests + +#include "./scrubber_generators.h" + +namespace ScrubDatasets { +/* + * Two objects with some clones. No inconsitencies. + */ +extern ScrubGenerator::RealObjsConf minimal_snaps_configuration; + +// and a part of this configuration, one that we will corrupt in a test: +extern hobject_t hobj_ms1_snp30; + +// a manipulation set used in TestTScrubberBe_data_2: +extern ScrubGenerator::CorruptFuncList crpt_funcs_set1; + +} // namespace ScrubDatasets diff --git a/src/test/osd/test_ec_transaction.cc b/src/test/osd/test_ec_transaction.cc new file mode 100644 index 000000000..98669667a --- /dev/null +++ b/src/test/osd/test_ec_transaction.cc @@ -0,0 +1,124 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 Red Hat + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include <gtest/gtest.h> +#include "osd/PGTransaction.h" +#include "osd/ECTransaction.h" + +#include "test/unit.cc" + +struct mydpp : public DoutPrefixProvider { + std::ostream& gen_prefix(std::ostream& out) const override { return out << "foo"; } + CephContext *get_cct() const override { return g_ceph_context; } + unsigned get_subsys() const override { return ceph_subsys_osd; } +} dpp; + +#define dout_context g_ceph_context + +TEST(ectransaction, two_writes_separated) +{ + hobject_t h; + PGTransactionUPtr t(new PGTransaction); + bufferlist a, b; + t->create(h); + a.append_zero(565760); + t->write(h, 0, a.length(), a, 0); + b.append_zero(2437120); + t->write(h, 669856, b.length(), b, 0); + + ECUtil::stripe_info_t sinfo(2, 8192); + auto plan = ECTransaction::get_write_plan( + sinfo, + std::move(t), + [&](const hobject_t &i) { + ECUtil::HashInfoRef ref(new ECUtil::HashInfo(1)); + return ref; + }, + &dpp); + generic_derr << "to_read " << plan.to_read << dendl; + generic_derr << "will_write " << plan.will_write << dendl; + + ASSERT_EQ(0u, plan.to_read.size()); + ASSERT_EQ(1u, plan.will_write.size()); +} + +TEST(ectransaction, two_writes_nearby) +{ + hobject_t h; + PGTransactionUPtr t(new PGTransaction); + bufferlist a, b; + t->create(h); + + // two nearby writes, both partly touching the same 8192-byte stripe + ECUtil::stripe_info_t sinfo(2, 8192); + a.append_zero(565760); + t->write(h, 0, a.length(), a, 0); + b.append_zero(2437120); + t->write(h, 569856, b.length(), b, 0); + + auto plan = ECTransaction::get_write_plan( + sinfo, + std::move(t), + [&](const hobject_t &i) { + ECUtil::HashInfoRef ref(new ECUtil::HashInfo(1)); + return ref; + }, + &dpp); + generic_derr << "to_read " << plan.to_read << dendl; + generic_derr << "will_write " << plan.will_write << dendl; + + ASSERT_EQ(0u, plan.to_read.size()); + ASSERT_EQ(1u, plan.will_write.size()); +} + +TEST(ectransaction, many_writes) +{ + hobject_t h; + PGTransactionUPtr t(new PGTransaction); + bufferlist a, b; + a.append_zero(512); + b.append_zero(4096); + t->create(h); + + ECUtil::stripe_info_t sinfo(2, 8192); + // write 2801664~512 + // write 2802176~512 + // write 2802688~512 + // write 2803200~512 + t->write(h, 2801664, a.length(), a, 0); + t->write(h, 2802176, a.length(), a, 0); + t->write(h, 2802688, a.length(), a, 0); + t->write(h, 2803200, a.length(), a, 0); + + // write 2805760~4096 + // write 2809856~4096 + // write 2813952~4096 + t->write(h, 2805760, b.length(), b, 0); + t->write(h, 2809856, b.length(), b, 0); + t->write(h, 2813952, b.length(), b, 0); + + auto plan = ECTransaction::get_write_plan( + sinfo, + std::move(t), + [&](const hobject_t &i) { + ECUtil::HashInfoRef ref(new ECUtil::HashInfo(1)); + return ref; + }, + &dpp); + generic_derr << "to_read " << plan.to_read << dendl; + generic_derr << "will_write " << plan.will_write << dendl; + + ASSERT_EQ(0u, plan.to_read.size()); + ASSERT_EQ(1u, plan.will_write.size()); +} diff --git a/src/test/osd/test_extent_cache.cc b/src/test/osd/test_extent_cache.cc new file mode 100644 index 000000000..9c789ca32 --- /dev/null +++ b/src/test/osd/test_extent_cache.cc @@ -0,0 +1,282 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 Red Hat + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#include <gtest/gtest.h> +#include "osd/ExtentCache.h" +#include <iostream> + +using namespace std; + +extent_map imap_from_vector(vector<pair<uint64_t, uint64_t> > &&in) +{ + extent_map out; + for (auto &&tup: in) { + bufferlist bl; + bl.append_zero(tup.second); + out.insert(tup.first, bl.length(), bl); + } + return out; +} + +extent_map imap_from_iset(const extent_set &set) +{ + extent_map out; + for (auto &&iter: set) { + bufferlist bl; + bl.append_zero(iter.second); + out.insert(iter.first, iter.second, bl); + } + return out; +} + +extent_set iset_from_vector(vector<pair<uint64_t, uint64_t> > &&in) +{ + extent_set out; + for (auto &&tup: in) { + out.insert(tup.first, tup.second); + } + return out; +} + +TEST(extentcache, simple_write) +{ + hobject_t oid; + + ExtentCache c; + ExtentCache::write_pin pin; + c.open_write_pin(pin); + + auto to_read = iset_from_vector( + {{0, 2}, {8, 2}, {20, 2}}); + auto to_write = iset_from_vector( + {{0, 10}, {20, 4}}); + auto must_read = c.reserve_extents_for_rmw( + oid, pin, to_write, to_read); + ASSERT_EQ( + must_read, + to_read); + + c.print(std::cerr); + + auto got = imap_from_iset(must_read); + auto pending_read = to_read; + pending_read.subtract(must_read); + + auto pending = c.get_remaining_extents_for_rmw( + oid, + pin, + pending_read); + ASSERT_TRUE(pending.empty()); + + auto write_map = imap_from_iset(to_write); + c.present_rmw_update( + oid, + pin, + write_map); + + c.release_write_pin(pin); +} + +TEST(extentcache, write_write_overlap) +{ + hobject_t oid; + + ExtentCache c; + ExtentCache::write_pin pin; + c.open_write_pin(pin); + + // start write 1 + auto to_read = iset_from_vector( + {{0, 2}, {8, 2}, {20, 2}}); + auto to_write = iset_from_vector( + {{0, 10}, {20, 4}}); + auto must_read = c.reserve_extents_for_rmw( + oid, pin, to_write, to_read); + ASSERT_EQ( + must_read, + to_read); + + c.print(std::cerr); + + // start write 2 + ExtentCache::write_pin pin2; + c.open_write_pin(pin2); + auto to_read2 = iset_from_vector( + {{2, 4}, {10, 4}, {18, 4}}); + auto to_write2 = iset_from_vector( + {{2, 12}, {18, 12}}); + auto must_read2 = c.reserve_extents_for_rmw( + oid, pin2, to_write2, to_read2); + ASSERT_EQ( + must_read2, + iset_from_vector({{10, 4}, {18, 2}})); + + c.print(std::cerr); + + // complete read for write 1 and start commit + auto got = imap_from_iset(must_read); + auto pending_read = to_read; + pending_read.subtract(must_read); + auto pending = c.get_remaining_extents_for_rmw( + oid, + pin, + pending_read); + ASSERT_TRUE(pending.empty()); + + auto write_map = imap_from_iset(to_write); + c.present_rmw_update( + oid, + pin, + write_map); + + c.print(std::cerr); + + // complete read for write 2 and start commit + auto pending_read2 = to_read2; + pending_read2.subtract(must_read2); + auto pending2 = c.get_remaining_extents_for_rmw( + oid, + pin2, + pending_read2); + ASSERT_EQ( + pending2, + imap_from_iset(pending_read2)); + + auto write_map2 = imap_from_iset(to_write2); + c.present_rmw_update( + oid, + pin2, + write_map2); + + c.print(std::cerr); + + c.release_write_pin(pin); + + c.print(std::cerr); + + c.release_write_pin(pin2); +} + +TEST(extentcache, write_write_overlap2) +{ + hobject_t oid; + + ExtentCache c; + ExtentCache::write_pin pin; + c.open_write_pin(pin); + + // start write 1 + auto to_read = extent_set(); + auto to_write = iset_from_vector( + {{659456, 4096}}); + auto must_read = c.reserve_extents_for_rmw( + oid, pin, to_write, to_read); + ASSERT_EQ( + must_read, + to_read); + + c.print(std::cerr); + + // start write 2 + ExtentCache::write_pin pin2; + c.open_write_pin(pin2); + auto to_read2 = extent_set(); + auto to_write2 = iset_from_vector( + {{663552, 4096}}); + auto must_read2 = c.reserve_extents_for_rmw( + oid, pin2, to_write2, to_read2); + ASSERT_EQ( + must_read2, + to_read2); + + + // start write 3 + ExtentCache::write_pin pin3; + c.open_write_pin(pin3); + auto to_read3 = iset_from_vector({{659456, 8192}}); + auto to_write3 = iset_from_vector({{659456, 8192}}); + auto must_read3 = c.reserve_extents_for_rmw( + oid, pin3, to_write3, to_read3); + ASSERT_EQ( + must_read3, + extent_set()); + + c.print(std::cerr); + + // complete read for write 1 and start commit + auto got = imap_from_iset(must_read); + auto pending_read = to_read; + pending_read.subtract(must_read); + auto pending = c.get_remaining_extents_for_rmw( + oid, + pin, + pending_read); + ASSERT_TRUE(pending.empty()); + + auto write_map = imap_from_iset(to_write); + c.present_rmw_update( + oid, + pin, + write_map); + + c.print(std::cerr); + + // complete read for write 2 and start commit + auto pending_read2 = to_read2; + pending_read2.subtract(must_read2); + auto pending2 = c.get_remaining_extents_for_rmw( + oid, + pin2, + pending_read2); + ASSERT_EQ( + pending2, + imap_from_iset(pending_read2)); + + auto write_map2 = imap_from_iset(to_write2); + c.present_rmw_update( + oid, + pin2, + write_map2); + + // complete read for write 2 and start commit + auto pending_read3 = to_read3; + pending_read3.subtract(must_read3); + auto pending3 = c.get_remaining_extents_for_rmw( + oid, + pin3, + pending_read3); + ASSERT_EQ( + pending3, + imap_from_iset(pending_read3)); + + auto write_map3 = imap_from_iset(to_write3); + c.present_rmw_update( + oid, + pin3, + write_map3); + + + c.print(std::cerr); + + c.release_write_pin(pin); + + c.print(std::cerr); + + c.release_write_pin(pin2); + + c.print(std::cerr); + + c.release_write_pin(pin3); +} diff --git a/src/test/osd/test_pg_transaction.cc b/src/test/osd/test_pg_transaction.cc new file mode 100644 index 000000000..6aa26920d --- /dev/null +++ b/src/test/osd/test_pg_transaction.cc @@ -0,0 +1,131 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 Red Hat + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include <gtest/gtest.h> +#include "osd/PGTransaction.h" + +using namespace std; + +TEST(pgtransaction, simple) +{ + hobject_t h; + PGTransaction t; + ASSERT_TRUE(t.empty()); + t.nop(h); + ASSERT_FALSE(t.empty()); + unsigned num = 0; + t.safe_create_traverse( + [&](const pair<const hobject_t, PGTransaction::ObjectOperation> &p) { + ASSERT_EQ(p.first, h); + using T = PGTransaction::ObjectOperation::Init; + ASSERT_TRUE(boost::get<T::None>(&p.second.init_type)); + ++num; + }); + ASSERT_EQ(num, 1u); +} + +TEST(pgtransaction, clone_safe_create_traverse) +{ + hobject_t h, h2; + h2.snap = 1; + PGTransaction t; + ASSERT_TRUE(t.empty()); + t.nop(h2); + ASSERT_FALSE(t.empty()); + t.clone(h, h2); + unsigned num = 0; + t.safe_create_traverse( + [&](const pair<const hobject_t, PGTransaction::ObjectOperation> &p) { + using T = PGTransaction::ObjectOperation::Init; + if (num == 0) { + ASSERT_EQ(p.first, h); + ASSERT_TRUE(boost::get<T::Clone>(&p.second.init_type)); + ASSERT_EQ( + boost::get<T::Clone>(&p.second.init_type)->source, + h2); + } else if (num == 1) { + ASSERT_EQ(p.first, h2); + ASSERT_TRUE(boost::get<T::None>(&p.second.init_type)); + } else { + ASSERT_LT(num, 2u); + } + ++num; + }); +} + +TEST(pgtransaction, clone_safe_create_traverse2) +{ + hobject_t h, h2, h3; + h.snap = 10; + h2.snap = 5; + h3.snap = 3; + PGTransaction t; + ASSERT_TRUE(t.empty()); + t.nop(h3); + ASSERT_FALSE(t.empty()); + t.clone(h, h2); + t.remove(h2); + t.clone(h2, h3); + unsigned num = 0; + t.safe_create_traverse( + [&](const pair<const hobject_t, PGTransaction::ObjectOperation> &p) { + using T = PGTransaction::ObjectOperation::Init; + if (num == 0) { + ASSERT_EQ(p.first, h); + ASSERT_TRUE(boost::get<T::Clone>(&p.second.init_type)); + ASSERT_EQ( + boost::get<T::Clone>(&p.second.init_type)->source, + h2); + } else if (num == 1) { + ASSERT_EQ(p.first, h2); + ASSERT_TRUE(boost::get<T::Clone>(&p.second.init_type)); + ASSERT_EQ( + boost::get<T::Clone>(&p.second.init_type)->source, + h3); + } else if (num == 2) { + ASSERT_EQ(p.first, h3); + ASSERT_TRUE(boost::get<T::None>(&p.second.init_type)); + } else { + ASSERT_LT(num, 3u); + } + ++num; + }); +} + +TEST(pgtransaction, clone_safe_create_traverse3) +{ + hobject_t h, h2, h3; + h.snap = 10; + h2.snap = 5; + h3.snap = 3; + PGTransaction t; + t.remove(h); + t.remove(h2); + t.clone(h2, h3); + unsigned num = 0; + t.safe_create_traverse( + [&](const pair<const hobject_t, PGTransaction::ObjectOperation> &p) { + using T = PGTransaction::ObjectOperation::Init; + if (p.first == h) { + ASSERT_TRUE(p.second.is_delete()); + } else if (p.first == h2) { + ASSERT_TRUE(boost::get<T::Clone>(&p.second.init_type)); + ASSERT_EQ( + boost::get<T::Clone>(&p.second.init_type)->source, + h3); + } + ASSERT_LT(num, 2u); + ++num; + }); +} diff --git a/src/test/osd/test_scrub_sched.cc b/src/test/osd/test_scrub_sched.cc new file mode 100644 index 000000000..efd7ba324 --- /dev/null +++ b/src/test/osd/test_scrub_sched.cc @@ -0,0 +1,402 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +/// \file testing the scrub scheduling algorithm + +#include <gtest/gtest.h> + +#include <algorithm> +#include <map> + +#include "common/async/context_pool.h" +#include "common/ceph_argparse.h" +#include "global/global_context.h" +#include "global/global_init.h" +#include "include/utime_fmt.h" +#include "mon/MonClient.h" +#include "msg/Messenger.h" +#include "os/ObjectStore.h" +#include "osd/PG.h" +#include "osd/osd_types.h" +#include "osd/osd_types_fmt.h" +#include "osd/scrubber/osd_scrub_sched.h" +#include "osd/scrubber_common.h" + +int main(int argc, char** argv) +{ + std::map<std::string, std::string> defaults = { + // make sure we have 3 copies, or some tests won't work + {"osd_pool_default_size", "3"}, + // our map is flat, so just try and split across OSDs, not hosts or whatever + {"osd_crush_chooseleaf_type", "0"}, + }; + std::vector<const char*> args(argv, argv + argc); + auto cct = global_init(&defaults, + args, + CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + common_init_finish(g_ceph_context); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + +using schedule_result_t = Scrub::schedule_result_t; +using ScrubJobRef = ScrubQueue::ScrubJobRef; +using qu_state_t = ScrubQueue::qu_state_t; + +/// enabling access into ScrubQueue internals +class ScrubSchedTestWrapper : public ScrubQueue { + public: + ScrubSchedTestWrapper(Scrub::ScrubSchedListener& osds) + : ScrubQueue(g_ceph_context, osds) + {} + + void rm_unregistered_jobs() + { + ScrubQueue::rm_unregistered_jobs(to_scrub); + ScrubQueue::rm_unregistered_jobs(penalized); + } + + ScrubQContainer collect_ripe_jobs() + { + return ScrubQueue::collect_ripe_jobs(to_scrub, time_now()); + } + + /** + * unit-test support for faking the current time. When + * not activated specifically - the default is to use ceph_clock_now() + */ + void set_time_for_testing(long faked_now) + { + m_time_for_testing = utime_t{timeval{faked_now}}; + } + void clear_time_for_testing() { m_time_for_testing.reset(); } + mutable std::optional<utime_t> m_time_for_testing; + + utime_t time_now() const final + { + if (m_time_for_testing) { + m_time_for_testing->tv.tv_nsec += 1'000'000; + } + return m_time_for_testing.value_or(ceph_clock_now()); + } + + ~ScrubSchedTestWrapper() override = default; +}; + + +/** + * providing the small number of OSD services used when scheduling + * a scrub + */ +class FakeOsd : public Scrub::ScrubSchedListener { + public: + FakeOsd(int osd_num) : m_osd_num(osd_num) {} + + int get_nodeid() const final { return m_osd_num; } + + schedule_result_t initiate_a_scrub(spg_t pgid, + bool allow_requested_repair_only) final + { + std::ignore = allow_requested_repair_only; + auto res = m_next_response.find(pgid); + if (res == m_next_response.end()) { + return schedule_result_t::no_such_pg; + } + return m_next_response[pgid]; + } + + void set_initiation_response(spg_t pgid, schedule_result_t result) + { + m_next_response[pgid] = result; + } + + private: + int m_osd_num; + std::map<spg_t, schedule_result_t> m_next_response; +}; + + +/// the static blueprint for creating a scrub job in the scrub queue +struct sjob_config_t { + spg_t spg; + bool are_stats_valid; + + utime_t history_scrub_stamp; + std::optional<double> pool_conf_min; + std::optional<double> pool_conf_max; + bool is_must; + bool is_need_auto; + ScrubQueue::scrub_schedule_t initial_schedule; +}; + + +/** + * the runtime configuration for a scrub job. Created basde on the blueprint + * above (sjob_config_t) + */ +struct sjob_dynamic_data_t { + sjob_config_t initial_config; + pg_info_t mocked_pg_info; + pool_opts_t mocked_pool_opts; + requested_scrub_t request_flags; + ScrubQueue::ScrubJobRef job; +}; + +class TestScrubSched : public ::testing::Test { + public: + TestScrubSched() = default; + + protected: + int m_osd_num{1}; + FakeOsd m_osds{m_osd_num}; + std::unique_ptr<ScrubSchedTestWrapper> m_sched{ + new ScrubSchedTestWrapper(m_osds)}; + + /// the pg-info is queried for stats validity and for the last-scrub-stamp + pg_info_t pg_info{}; + + /// the pool configuration holds some per-pool scrub timing settings + pool_opts_t pool_opts{}; + + /** + * the scrub-jobs created for the tests, along with their corresponding + * "pg info" and pool configuration. In real life - the scrub jobs + * are owned by the respective PGs. + */ + std::vector<sjob_dynamic_data_t> m_scrub_jobs; + + protected: + sjob_dynamic_data_t create_scrub_job(const sjob_config_t& sjob_data) + { + sjob_dynamic_data_t dyn_data; + dyn_data.initial_config = sjob_data; + + // populate the 'pool options' object with the scrub timing settings + if (sjob_data.pool_conf_min) { + dyn_data.mocked_pool_opts.set<double>(pool_opts_t::SCRUB_MIN_INTERVAL, + sjob_data.pool_conf_min.value()); + } + if (sjob_data.pool_conf_max) { + dyn_data.mocked_pool_opts.set(pool_opts_t::SCRUB_MAX_INTERVAL, + sjob_data.pool_conf_max.value()); + } + + // create the 'pg info' object with the stats + dyn_data.mocked_pg_info = pg_info_t{sjob_data.spg}; + + dyn_data.mocked_pg_info.history.last_scrub_stamp = + sjob_data.history_scrub_stamp; + dyn_data.mocked_pg_info.stats.stats_invalid = !sjob_data.are_stats_valid; + + // fake hust the required 'requested-scrub' flags + std::cout << "request_flags: sjob_data.is_must " << sjob_data.is_must + << std::endl; + dyn_data.request_flags.must_scrub = sjob_data.is_must; + dyn_data.request_flags.need_auto = sjob_data.is_need_auto; + + // create the scrub job + dyn_data.job = ceph::make_ref<ScrubQueue::ScrubJob>(g_ceph_context, + sjob_data.spg, + m_osd_num); + m_scrub_jobs.push_back(dyn_data); + return dyn_data; + } + + void register_job_set(const std::vector<sjob_config_t>& job_configs) + { + std::for_each(job_configs.begin(), + job_configs.end(), + [this](const sjob_config_t& sj) { + auto dynjob = create_scrub_job(sj); + m_sched->register_with_osd( + dynjob.job, + m_sched->determine_scrub_time(dynjob.request_flags, + dynjob.mocked_pg_info, + dynjob.mocked_pool_opts)); + }); + } + + /// count the scrub-jobs that are currently in a specific state + int count_scrub_jobs_in_state(qu_state_t state) + { + return std::count_if(m_scrub_jobs.begin(), + m_scrub_jobs.end(), + [state](const sjob_dynamic_data_t& sj) { + return sj.job->state == state; + }); + } + + void list_testers_jobs(std::string hdr) + { + std::cout << fmt::format("{}: {} jobs created for the test:", + hdr, + m_scrub_jobs.size()) + << std::endl; + for (const auto& job : m_scrub_jobs) { + std::cout << fmt::format("\t{}: job {}", hdr, *job.job) << std::endl; + } + } + + void print_all_states(std::string hdr) + { + std::cout << fmt::format( + "{}: Created:{}. Per state: not-reg:{} reg:{} unreg:{}", + hdr, + m_scrub_jobs.size(), + count_scrub_jobs_in_state(qu_state_t::not_registered), + count_scrub_jobs_in_state(qu_state_t::registered), + count_scrub_jobs_in_state(qu_state_t::unregistering)) + << std::endl; + } + + void debug_print_jobs(std::string hdr, + const ScrubQueue::ScrubQContainer& jobs) + { + std::cout << fmt::format("{}: time now {}", hdr, m_sched->time_now()) + << std::endl; + for (const auto& job : jobs) { + std::cout << fmt::format( + "\t{}: job {} ({}): scheduled {}", + hdr, + job->pgid, + job->scheduling_state(m_sched->time_now(), false), + job->get_sched_time()) + << std::endl; + } + } +}; + +// /////////////////////////////////////////////////////////////////////////// +// test data. Scrub-job creation requires a PG-id, and a set of 'scrub request' +// flags + +namespace { + +// the times used during the tests are offset to 1.1.2000, so that +// utime_t formatting will treat them as absolute (not as a relative time) +static const auto epoch_2000 = 946'684'800; + +std::vector<sjob_config_t> sjob_configs = { + { + spg_t{pg_t{1, 1}}, + true, // PG has valid stats + utime_t{std::time_t(epoch_2000 + 1'000'000), 0}, // last-scrub-stamp + 100.0, // min scrub delay in pool config + std::nullopt, // max scrub delay in pool config + false, // must-scrub + false, // need-auto + ScrubQueue::scrub_schedule_t{} // initial schedule + }, + + {spg_t{pg_t{4, 1}}, + true, + utime_t{epoch_2000 + 1'000'000, 0}, + 100.0, + std::nullopt, + true, + false, + ScrubQueue::scrub_schedule_t{}}, + + {spg_t{pg_t{7, 1}}, + true, + utime_t{}, + 1.0, + std::nullopt, + false, + false, + ScrubQueue::scrub_schedule_t{}}, + + {spg_t{pg_t{5, 1}}, + true, + utime_t{epoch_2000 + 1'900'000, 0}, + 1.0, + std::nullopt, + false, + false, + ScrubQueue::scrub_schedule_t{}}}; + +} // anonymous namespace + +// //////////////////////////// tests //////////////////////////////////////// + +/// basic test: scheduling simple jobs, validating their calculated schedule +TEST_F(TestScrubSched, populate_queue) +{ + ASSERT_EQ(0, m_sched->list_registered_jobs().size()); + + auto dynjob_0 = create_scrub_job(sjob_configs[0]); + auto suggested = m_sched->determine_scrub_time(dynjob_0.request_flags, + dynjob_0.mocked_pg_info, + dynjob_0.mocked_pool_opts); + m_sched->register_with_osd(dynjob_0.job, suggested); + std::cout << fmt::format("scheduled at: {}", dynjob_0.job->get_sched_time()) + << std::endl; + + auto dynjob_1 = create_scrub_job(sjob_configs[1]); + suggested = m_sched->determine_scrub_time(dynjob_1.request_flags, + dynjob_1.mocked_pg_info, + dynjob_1.mocked_pool_opts); + m_sched->register_with_osd(dynjob_1.job, suggested); + std::cout << fmt::format("scheduled at: {}", dynjob_1.job->get_sched_time()) + << std::endl; + + EXPECT_EQ(dynjob_1.job->get_sched_time(), utime_t(1, 1)); + EXPECT_EQ(2, m_sched->list_registered_jobs().size()); +} + +/// validate the states of the scrub-jobs (as set in the jobs themselves) +TEST_F(TestScrubSched, states) +{ + m_sched->set_time_for_testing(epoch_2000); + register_job_set(sjob_configs); + list_testers_jobs("testing states"); + EXPECT_EQ(sjob_configs.size(), m_sched->list_registered_jobs().size()); + + // check the initial state of the jobs + print_all_states("<initial state>"); + m_sched->rm_unregistered_jobs(); + EXPECT_EQ(0, count_scrub_jobs_in_state(qu_state_t::not_registered)); + + // now - remove a couple of them + m_sched->remove_from_osd_queue(m_scrub_jobs[2].job); + m_sched->remove_from_osd_queue(m_scrub_jobs[1].job); + m_sched->remove_from_osd_queue(m_scrub_jobs[2].job); // should have no effect + + print_all_states("<w/ 2 jobs removed>"); + EXPECT_EQ(2, count_scrub_jobs_in_state(qu_state_t::registered)); + EXPECT_EQ(2, count_scrub_jobs_in_state(qu_state_t::unregistering)); + + m_sched->rm_unregistered_jobs(); + EXPECT_EQ(2, count_scrub_jobs_in_state(qu_state_t::not_registered)); + std::cout << fmt::format("inp size: {}. In list-registered: {}", + sjob_configs.size(), + m_sched->list_registered_jobs().size()) + << std::endl; + EXPECT_EQ(sjob_configs.size() - 2, m_sched->list_registered_jobs().size()); +} + +/// jobs that are ripe should be in the ready list, sorted by their scheduled +/// time +TEST_F(TestScrubSched, ready_list) +{ + m_sched->set_time_for_testing(epoch_2000 + 900'000); + register_job_set(sjob_configs); + list_testers_jobs("testing states"); + EXPECT_EQ(sjob_configs.size(), m_sched->list_registered_jobs().size()); + + m_sched->set_time_for_testing(epoch_2000 + 1'000'000); + auto all_reg_jobs = m_sched->list_registered_jobs(); + debug_print_jobs("registered", all_reg_jobs); + + auto ripe_jobs = m_sched->collect_ripe_jobs(); + EXPECT_EQ(2, ripe_jobs.size()); + debug_print_jobs("ready_list", ripe_jobs); + + m_sched->set_time_for_testing(epoch_2000 + 3'000'000); + // all jobs should be in the ready list + ripe_jobs = m_sched->collect_ripe_jobs(); + EXPECT_EQ(4, ripe_jobs.size()); + debug_print_jobs("ready_list", ripe_jobs); +} diff --git a/src/test/osd/test_scrubber_be.cc b/src/test/osd/test_scrubber_be.cc new file mode 100644 index 000000000..65fd7e730 --- /dev/null +++ b/src/test/osd/test_scrubber_be.cc @@ -0,0 +1,669 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#include "./scrubber_generators.h" +#include "./scrubber_test_datasets.h" + +#include <gtest/gtest.h> +#include <signal.h> +#include <stdio.h> + +#include <fmt/ranges.h> + +#include "common/async/context_pool.h" +#include "common/ceph_argparse.h" +#include "global/global_context.h" +#include "global/global_init.h" +#include "mon/MonClient.h" +#include "msg/Messenger.h" +#include "os/ObjectStore.h" +#include "osd/PG.h" +#include "osd/PGBackend.h" +#include "osd/PrimaryLogPG.h" +#include "osd/osd_types.h" +#include "osd/osd_types_fmt.h" +#include "osd/scrubber/pg_scrubber.h" +#include "osd/scrubber/scrub_backend.h" + +/// \file testing isolated parts of the Scrubber backend + +using namespace std::string_literals; + +int main(int argc, char** argv) +{ + std::map<std::string, std::string> defaults = { + // make sure we have 3 copies, or some tests won't work + {"osd_pool_default_size", "3"}, + // our map is flat, so just try and split across OSDs, not hosts or whatever + {"osd_crush_chooseleaf_type", "0"}, + }; + std::vector<const char*> args(argv, argv + argc); + auto cct = global_init(&defaults, + args, + CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + common_init_finish(g_ceph_context); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + + +class TestScrubBackend : public ScrubBackend { + public: + TestScrubBackend(ScrubBeListener& scrubber, + PgScrubBeListener& pg, + pg_shard_t i_am, + bool repair, + scrub_level_t shallow_or_deep, + const std::set<pg_shard_t>& acting) + : ScrubBackend(scrubber, pg, i_am, repair, shallow_or_deep, acting) + {} + + bool get_m_repair() const { return m_repair; } + bool get_is_replicated() const { return m_is_replicated; } + auto get_omap_stats() const { return m_omap_stats; } + + const std::vector<pg_shard_t>& all_but_me() const { return m_acting_but_me; } + + /// populate the scrub-maps set for the 'chunk' being scrubbed + void insert_faked_smap(pg_shard_t shard, const ScrubMap& smap); +}; + +// mocking the PG +class TestPg : public PgScrubBeListener { + public: + ~TestPg() = default; + + TestPg(std::shared_ptr<PGPool> pool, pg_info_t& pginfo, pg_shard_t my_osd) + : m_pool{pool} + , m_info{pginfo} + , m_pshard{my_osd} + {} + + const PGPool& get_pgpool() const final { return *(m_pool.get()); } + pg_shard_t get_primary() const final { return m_pshard; } + void force_object_missing(ScrubberPasskey, + const std::set<pg_shard_t>& peer, + const hobject_t& oid, + eversion_t version) final + {} + + const pg_info_t& get_pg_info(ScrubberPasskey) const final { return m_info; } + + uint64_t logical_to_ondisk_size(uint64_t logical_size) const final + { + return logical_size; + } + + bool is_waiting_for_unreadable_object() const final { return false; } + + std::shared_ptr<PGPool> m_pool; + pg_info_t& m_info; + pg_shard_t m_pshard; +}; + + +// /////////////////////////////////////////////////////////////////////////// +// /////////////////////////////////////////////////////////////////////////// + +// and the scrubber +class TestScrubber : public ScrubBeListener, public Scrub::SnapMapReaderI { + using result_t = Scrub::SnapMapReaderI::result_t; + public: + ~TestScrubber() = default; + + TestScrubber(spg_t spg, OSDMapRef osdmap, LoggerSinkSet& logger) + : m_spg{spg} + , m_logger{logger} + , m_osdmap{osdmap} + {} + + std::ostream& gen_prefix(std::ostream& out) const final { return out; } + + CephContext* get_pg_cct() const final { return g_ceph_context; } + + LoggerSinkSet& get_logger() const final { return m_logger; } + + bool is_primary() const final { return m_primary; } + + spg_t get_pgid() const final { return m_info.pgid; } + + const OSDMapRef& get_osdmap() const final { return m_osdmap; } + + void add_to_stats(const object_stat_sum_t& stat) final { m_stats.add(stat); } + + // submit_digest_fixes() mock can be set to expect a specific set of + // fixes to perform. + /// \todo implement the mock. + void submit_digest_fixes(const digests_fixes_t& fixes) final + { + std::cout << fmt::format("{} submit_digest_fixes({})", + __func__, + fmt::join(fixes, ",")) + << std::endl; + } + + int get_snaps(const hobject_t& hoid, + std::set<snapid_t>* snaps_set) const; + + tl::expected<std::set<snapid_t>, result_t> get_snaps( + const hobject_t& oid) const final; + + tl::expected<std::set<snapid_t>, result_t> get_snaps_check_consistency( + const hobject_t& oid) const final + { + /// \todo for now + return get_snaps(oid); + } + + void set_snaps(const hobject_t& hoid, const std::vector<snapid_t>& snaps) + { + std::cout + << fmt::format("{}: ({}) -> #{} {}", __func__, hoid, snaps.size(), snaps) + << std::endl; + std::set<snapid_t> snaps_set(snaps.begin(), snaps.end()); + m_snaps[hoid] = snaps_set; + } + + void set_snaps(const ScrubGenerator::all_clones_snaps_t& clones_snaps) + { + for (const auto& [clone, snaps] : clones_snaps) { + std::cout << fmt::format("{}: ({}) -> #{} {}", + __func__, + clone, + snaps.size(), + snaps) + << std::endl; + std::set<snapid_t> snaps_set(snaps.begin(), snaps.end()); + m_snaps[clone] = snaps_set; + } + } + + bool m_primary{true}; + spg_t m_spg; + LoggerSinkSet& m_logger; + OSDMapRef m_osdmap; + pg_info_t m_info; + object_stat_sum_t m_stats; + + // the "snap-mapper" database (returned by get_snaps()) + std::map<hobject_t, std::set<snapid_t>> m_snaps; +}; + +int TestScrubber::get_snaps(const hobject_t& hoid, + std::set<snapid_t>* snaps_set) const +{ + auto it = m_snaps.find(hoid); + if (it == m_snaps.end()) { + std::cout << fmt::format("{}: ({}) no snaps", __func__, hoid) << std::endl; + return -ENOENT; + } + + *snaps_set = it->second; + std::cout << fmt::format("{}: ({}) -> #{} {}", + __func__, + hoid, + snaps_set->size(), + *snaps_set) + << std::endl; + return 0; +} + +tl::expected<std::set<snapid_t>, Scrub::SnapMapReaderI::result_t> +TestScrubber::get_snaps(const hobject_t& oid) const +{ + std::set<snapid_t> snapset; + auto r = get_snaps(oid, &snapset); + if (r >= 0) { + return snapset; + } + return tl::make_unexpected(Scrub::SnapMapReaderI::result_t{ + Scrub::SnapMapReaderI::result_t::code_t::not_found, + r}); +} + + +// /////////////////////////////////////////////////////////////////////////// +// /////////////////////////////////////////////////////////////////////////// + + +/// parameters for TestTScrubberBe construction +struct TestTScrubberBeParams { + ScrubGenerator::pool_conf_t pool_conf; + ScrubGenerator::RealObjsConf objs_conf; + int num_osds; +}; + + +// /////////////////////////////////////////////////////////////////////////// +// /////////////////////////////////////////////////////////////////////////// + + +// the actual owner of the OSD "objects" that are used by +// the mockers +class TestTScrubberBe : public ::testing::Test { + public: + // the test data source + virtual TestTScrubberBeParams inject_params() = 0; + + // initial test data + ScrubGenerator::MockLog logger; + ScrubGenerator::pool_conf_t pool_conf; + ScrubGenerator::RealObjsConf real_objs; + int num_osds{0}; + + // ctor & initialization + + TestTScrubberBe() = default; + ~TestTScrubberBe() = default; + void SetUp() override; + void TearDown() override; + + /** + * Create the set of scrub-maps supposedly sent by the replica (or + * generated by the Primary). Then - create the snap-sets for all + * the objects in the set. + */ + void fake_a_scrub_set(ScrubGenerator::RealObjsConfList& all_sets); + + std::unique_ptr<TestScrubBackend> sbe; + + spg_t spg; + pg_shard_t i_am; // set to 'my osd and no shard' + std::set<pg_shard_t> acting_shards; + std::vector<int> acting_osds; + int acting_primary; + + std::unique_ptr<TestScrubber> test_scrubber; + + int64_t pool_id; + pg_pool_t pool_info; + + OSDMapRef osdmap; + + std::shared_ptr<PGPool> pool; + pg_info_t info; + + std::unique_ptr<TestPg> test_pg; + + // generated sets of "objects" for the active OSDs + ScrubGenerator::RealObjsConfList real_objs_list; + + protected: + /** + * Create the OSDmap and populate it with one pool, based on + * the pool configuration. + * For now - only replicated pools are supported. + */ + OSDMapRef setup_map(int num_osds, const ScrubGenerator::pool_conf_t& pconf); + + /** + * Create a PG in the one pool we have. Fake the PG info. + * Use the primary of the PG to determine "who we are". + * + * \returns the PG info + */ + pg_info_t setup_pg_in_map(); +}; + + +// /////////////////////////////////////////////////////////////////////////// +// /////////////////////////////////////////////////////////////////////////// + +void TestTScrubberBe::SetUp() +{ + std::cout << "TestTScrubberBe::SetUp()" << std::endl; + logger.err_count = 0; + + // fetch test configuration + auto params = inject_params(); + pool_conf = params.pool_conf; + real_objs = params.objs_conf; + num_osds = params.num_osds; + + // create the OSDMap + + osdmap = setup_map(num_osds, pool_conf); + + std::cout << "osdmap: " << *osdmap << std::endl; + + // extract the pool from the osdmap + + pool_id = osdmap->lookup_pg_pool_name(pool_conf.name); + const pg_pool_t* ext_pool_info = osdmap->get_pg_pool(pool_id); + pool = + std::make_shared<PGPool>(osdmap, pool_id, *ext_pool_info, pool_conf.name); + + std::cout << "pool: " << pool->info << std::endl; + + // a PG in that pool? + info = setup_pg_in_map(); + std::cout << fmt::format("PG info: {}", info) << std::endl; + + real_objs_list = + ScrubGenerator::make_real_objs_conf(pool_id, real_objs, acting_osds); + + // now we can create the main mockers + + // the "PgScrubber" + test_scrubber = std::make_unique<TestScrubber>(spg, osdmap, logger); + + // the "PG" (and its backend) + test_pg = std::make_unique<TestPg>(pool, info, i_am); + std::cout << fmt::format("{}: acting: {}", __func__, acting_shards) + << std::endl; + sbe = std::make_unique<TestScrubBackend>(*test_scrubber, + *test_pg, + i_am, + /* repair? */ false, + scrub_level_t::deep, + acting_shards); + + // create a osd-num only copy of the relevant OSDs + acting_osds.reserve(acting_shards.size()); + for (const auto& shard : acting_shards) { + acting_osds.push_back(shard.osd); + } + + sbe->new_chunk(); + fake_a_scrub_set(real_objs_list); +} + + +// Note: based on TestOSDMap.cc. +OSDMapRef TestTScrubberBe::setup_map(int num_osds, + const ScrubGenerator::pool_conf_t& pconf) +{ + auto osdmap = std::make_shared<OSDMap>(); + uuid_d fsid; + osdmap->build_simple(g_ceph_context, 0, fsid, num_osds); + OSDMap::Incremental pending_inc(osdmap->get_epoch() + 1); + pending_inc.fsid = osdmap->get_fsid(); + entity_addrvec_t sample_addrs; + sample_addrs.v.push_back(entity_addr_t()); + uuid_d sample_uuid; + for (int i = 0; i < num_osds; ++i) { + sample_uuid.generate_random(); + sample_addrs.v[0].nonce = i; + pending_inc.new_state[i] = CEPH_OSD_EXISTS | CEPH_OSD_NEW; + pending_inc.new_up_client[i] = sample_addrs; + pending_inc.new_up_cluster[i] = sample_addrs; + pending_inc.new_hb_back_up[i] = sample_addrs; + pending_inc.new_hb_front_up[i] = sample_addrs; + pending_inc.new_weight[i] = CEPH_OSD_IN; + pending_inc.new_uuid[i] = sample_uuid; + } + osdmap->apply_incremental(pending_inc); + + // create a replicated pool + OSDMap::Incremental new_pool_inc(osdmap->get_epoch() + 1); + new_pool_inc.new_pool_max = osdmap->get_pool_max(); + new_pool_inc.fsid = osdmap->get_fsid(); + uint64_t pool_id = ++new_pool_inc.new_pool_max; + pg_pool_t empty; + auto p = new_pool_inc.get_new_pool(pool_id, &empty); + p->size = pconf.size; + p->set_pg_num(pconf.pg_num); + p->set_pgp_num(pconf.pgp_num); + p->type = pg_pool_t::TYPE_REPLICATED; + p->crush_rule = 0; + p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); + new_pool_inc.new_pool_names[pool_id] = pconf.name; + osdmap->apply_incremental(new_pool_inc); + return osdmap; +} + +pg_info_t TestTScrubberBe::setup_pg_in_map() +{ + pg_t rawpg(0, pool_id); + pg_t pgid = osdmap->raw_pg_to_pg(rawpg); + std::vector<int> up_osds; + int up_primary; + + osdmap->pg_to_up_acting_osds(pgid, + &up_osds, + &up_primary, + &acting_osds, + &acting_primary); + + std::cout << fmt::format( + "{}: pg: {} up_osds: {} up_primary: {} acting_osds: {} " + "acting_primary: " + "{}", + __func__, + pgid, + up_osds, + up_primary, + acting_osds, + acting_primary) + << std::endl; + + spg = spg_t{pgid}; + i_am = pg_shard_t{up_primary}; + std::cout << fmt::format("{}: spg: {} and I am {}", __func__, spg, i_am) + << std::endl; + + // the 'acting shards' set - the one actually used by the scrubber + std::for_each(acting_osds.begin(), acting_osds.end(), [&](int osd) { + acting_shards.insert(pg_shard_t{osd}); + }); + std::cout << fmt::format("{}: acting_shards: {}", __func__, acting_shards) + << std::endl; + + pg_info_t info; + info.pgid = spg; + /// \todo: handle the epochs: + // info.last_update = osdmap->get_epoch(); + // info.last_complete = osdmap->get_epoch(); + // info.last_osdmap_epoch = osdmap->get_epoch(); + // info.history.last_epoch_marked_removed = osdmap->get_epoch(); + info.last_user_version = 1; + info.purged_snaps = {}; + info.last_user_version = 1; + info.history.last_epoch_clean = osdmap->get_epoch(); + info.history.last_epoch_split = osdmap->get_epoch(); + info.history.last_epoch_marked_full = osdmap->get_epoch(); + info.last_backfill = hobject_t::get_max(); + return info; +} + +void TestTScrubberBe::TearDown() +{ + EXPECT_EQ(logger.err_count, logger.expected_err_count); +} + +void TestTScrubberBe::fake_a_scrub_set( + ScrubGenerator::RealObjsConfList& all_sets) +{ + for (int osd_num = 0; osd_num < pool_conf.size; ++osd_num) { + ScrubMap smap; + smap.valid_through = eversion_t{1, 1}; + smap.incr_since = eversion_t{1, 1}; + smap.has_omap_keys = true; // to force omap checks + + // fill the map with the objects relevant to this OSD + for (auto& obj : all_sets[osd_num]->objs) { + std::cout << fmt::format("{}: object: {}", __func__, obj.ghobj.hobj) + << std::endl; + ScrubGenerator::add_object(smap, obj, osd_num); + } + + std::cout << fmt::format("{}: {} inserting smap {:D}", + __func__, + osd_num, + smap) + << std::endl; + sbe->insert_faked_smap(pg_shard_t{osd_num}, smap); + } + + // create the snap_mapper state + + for (const auto& robj : all_sets[i_am.osd]->objs) { + + std::cout << fmt::format("{}: object: {}", __func__, robj.ghobj.hobj) + << std::endl; + + if (robj.ghobj.hobj.snap == CEPH_NOSNAP) { + // head object + auto objects_snapset = ScrubGenerator::all_clones(robj); + test_scrubber->set_snaps(objects_snapset); + } + } +} + +void TestScrubBackend::insert_faked_smap(pg_shard_t shard, const ScrubMap& smap) +{ + ASSERT_TRUE(this_chunk.has_value()); + std::cout << fmt::format("{}: inserting faked smap for osd {}", + __func__, + shard.osd) + << std::endl; + this_chunk->received_maps[shard] = smap; +} + + +// /////////////////////////////////////////////////////////////////////////// +// /////////////////////////////////////////////////////////////////////////// + + +using namespace ScrubGenerator; + +class TestTScrubberBe_data_1 : public TestTScrubberBe { + public: + TestTScrubberBe_data_1() : TestTScrubberBe() {} + + // test configuration + pool_conf_t pl{3, 3, 3, 3, "rep_pool"}; + + TestTScrubberBeParams inject_params() override + { + std::cout << fmt::format("{}: injecting params (minimal snaps conf.)", + __func__) + << std::endl; + return TestTScrubberBeParams{ + /* pool_conf */ pl, + /* real_objs_conf */ ScrubDatasets::minimal_snaps_configuration, + /*num_osds */ 3}; + } +}; + +// some basic sanity checks +// (mainly testing the constructor) + +TEST_F(TestTScrubberBe_data_1, creation_1) +{ + /// \todo copy some osdmap tests from TestOSDMap.cc + ASSERT_TRUE(sbe); + ASSERT_TRUE(sbe->get_is_replicated()); + ASSERT_FALSE(sbe->get_m_repair()); + sbe->update_repair_status(true); + ASSERT_TRUE(sbe->get_m_repair()); + + // make sure *I* do not appear in 'all_but_me' set of OSDs + auto others = sbe->all_but_me(); + auto in_others = std::find(others.begin(), others.end(), i_am); + EXPECT_EQ(others.end(), in_others); +} + + +TEST_F(TestTScrubberBe_data_1, smaps_creation_1) +{ + ASSERT_TRUE(sbe); + ASSERT_EQ(sbe->get_omap_stats().omap_bytes, 0); + + // for test data 'minimal_snaps_configuration': + // scrub_compare_maps() should not emmit any error, nor + // return any snap-mapper fix + auto [incons, fix_list] = sbe->scrub_compare_maps(true, *test_scrubber); + + EXPECT_EQ(fix_list.size(), 0); // snap-mapper fix should be empty + + EXPECT_EQ(incons.size(), 0); // no inconsistency + + // make sure the test did execute *something* + EXPECT_TRUE(sbe->get_omap_stats().omap_bytes != 0); +} + + +// whitebox testing (OK if failing after a change to the backend internals) + + +// blackbox testing - testing the published functionality +// (should not depend on internals of the backend) + + +/// corrupt the snap_mapper data +TEST_F(TestTScrubberBe_data_1, snapmapper_1) +{ + using snap_mapper_op_t = Scrub::snap_mapper_op_t; + ASSERT_TRUE(sbe); + + // a bogus version of hobj_ms1_snp30 (a clone) snap_ids + hobject_t hobj_ms1_snp30_inpool = hobject_t{ScrubDatasets::hobj_ms1_snp30}; + hobj_ms1_snp30_inpool.pool = pool_id; + all_clones_snaps_t bogus_30; + bogus_30[hobj_ms1_snp30_inpool] = {0x333, 0x666}; + + test_scrubber->set_snaps(bogus_30); + auto [incons, fix_list] = sbe->scrub_compare_maps(true, *test_scrubber); + + EXPECT_EQ(fix_list.size(), 1); + + // debug - print the fix-list: + for (const auto& fix : fix_list) { + std::cout << fmt::format("snapmapper_1: fix {}: {} {}->{}", + fix.hoid, + (fix.op == snap_mapper_op_t::add ? "add" : "upd"), + fix.wrong_snaps, + fix.snaps) + << std::endl; + } + EXPECT_EQ(fix_list[0].hoid, hobj_ms1_snp30_inpool); + EXPECT_EQ(fix_list[0].snaps, std::set<snapid_t>{0x30}); + + EXPECT_EQ(incons.size(), 0); // no inconsistency +} + +// a dataset similar to 'minimal_snaps_configuration', +// but with the hobj_ms1_snp30 clone being modified by a corruption +// function +class TestTScrubberBe_data_2 : public TestTScrubberBe { + public: + TestTScrubberBe_data_2() : TestTScrubberBe() {} + + // basic test configuration - 3 OSDs, all involved in the pool + pool_conf_t pl{3, 3, 3, 3, "rep_pool"}; + + TestTScrubberBeParams inject_params() override + { + std::cout << fmt::format( + "{}: injecting params (minimal-snaps + size change)", + __func__) + << std::endl; + TestTScrubberBeParams params{ + /* pool_conf */ pl, + /* real_objs_conf */ ScrubDatasets::minimal_snaps_configuration, + /*num_osds */ 3}; + + // inject a corruption function that will modify osd.0's version of + // the object + params.objs_conf.objs[0].corrupt_funcs = &ScrubDatasets::crpt_funcs_set1; + return params; + } +}; + +TEST_F(TestTScrubberBe_data_2, smaps_clone_size) +{ + ASSERT_TRUE(sbe); + EXPECT_EQ(sbe->get_omap_stats().omap_bytes, 0); + logger.set_expected_err_count(1); + auto [incons, fix_list] = sbe->scrub_compare_maps(true, *test_scrubber); + + EXPECT_EQ(fix_list.size(), 0); // snap-mapper fix should be empty + + EXPECT_EQ(incons.size(), 1); // one inconsistency +} + +// Local Variables: +// compile-command: "cd ../.. ; make unittest_osdscrub ; ./unittest_osdscrub +// --log-to-stderr=true --debug-osd=20 # --gtest_filter=*.* " End: diff --git a/src/test/osd/types.cc b/src/test/osd/types.cc new file mode 100644 index 000000000..d7b7862f5 --- /dev/null +++ b/src/test/osd/types.cc @@ -0,0 +1,2204 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2011 New Dream Network + * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com> + * + * Author: Loic Dachary <loic@dachary.org> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License version 2, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "include/types.h" +#include "osd/osd_types.h" +#include "osd/OSDMap.h" +#include "gtest/gtest.h" +#include "include/coredumpctl.h" +#include "common/Thread.h" +#include "include/stringify.h" +#include "osd/ReplicatedBackend.h" +#include <sstream> + +using namespace std; + +TEST(hobject, prefixes0) +{ + uint32_t mask = 0xE947FA20; + uint32_t bits = 12; + int64_t pool = 0; + + set<string> prefixes_correct; + prefixes_correct.insert(string("0000000000000000.02A")); + + set<string> prefixes_out(hobject_t::get_prefixes(bits, mask, pool)); + ASSERT_EQ(prefixes_out, prefixes_correct); +} + +TEST(hobject, prefixes1) +{ + uint32_t mask = 0x0000000F; + uint32_t bits = 6; + int64_t pool = 20; + + set<string> prefixes_correct; + prefixes_correct.insert(string("0000000000000014.F0")); + prefixes_correct.insert(string("0000000000000014.F4")); + prefixes_correct.insert(string("0000000000000014.F8")); + prefixes_correct.insert(string("0000000000000014.FC")); + + set<string> prefixes_out(hobject_t::get_prefixes(bits, mask, pool)); + ASSERT_EQ(prefixes_out, prefixes_correct); +} + +TEST(hobject, prefixes2) +{ + uint32_t mask = 0xDEADBEAF; + uint32_t bits = 25; + int64_t pool = 0; + + set<string> prefixes_correct; + prefixes_correct.insert(string("0000000000000000.FAEBDA0")); + prefixes_correct.insert(string("0000000000000000.FAEBDA2")); + prefixes_correct.insert(string("0000000000000000.FAEBDA4")); + prefixes_correct.insert(string("0000000000000000.FAEBDA6")); + prefixes_correct.insert(string("0000000000000000.FAEBDA8")); + prefixes_correct.insert(string("0000000000000000.FAEBDAA")); + prefixes_correct.insert(string("0000000000000000.FAEBDAC")); + prefixes_correct.insert(string("0000000000000000.FAEBDAE")); + + set<string> prefixes_out(hobject_t::get_prefixes(bits, mask, pool)); + ASSERT_EQ(prefixes_out, prefixes_correct); +} + +TEST(hobject, prefixes3) +{ + uint32_t mask = 0xE947FA20; + uint32_t bits = 32; + int64_t pool = 0x23; + + set<string> prefixes_correct; + prefixes_correct.insert(string("0000000000000023.02AF749E")); + + set<string> prefixes_out(hobject_t::get_prefixes(bits, mask, pool)); + ASSERT_EQ(prefixes_out, prefixes_correct); +} + +TEST(hobject, prefixes4) +{ + uint32_t mask = 0xE947FA20; + uint32_t bits = 0; + int64_t pool = 0x23; + + set<string> prefixes_correct; + prefixes_correct.insert(string("0000000000000023.")); + + set<string> prefixes_out(hobject_t::get_prefixes(bits, mask, pool)); + ASSERT_EQ(prefixes_out, prefixes_correct); +} + +TEST(hobject, prefixes5) +{ + uint32_t mask = 0xDEADBEAF; + uint32_t bits = 1; + int64_t pool = 0x34AC5D00; + + set<string> prefixes_correct; + prefixes_correct.insert(string("0000000034AC5D00.1")); + prefixes_correct.insert(string("0000000034AC5D00.3")); + prefixes_correct.insert(string("0000000034AC5D00.5")); + prefixes_correct.insert(string("0000000034AC5D00.7")); + prefixes_correct.insert(string("0000000034AC5D00.9")); + prefixes_correct.insert(string("0000000034AC5D00.B")); + prefixes_correct.insert(string("0000000034AC5D00.D")); + prefixes_correct.insert(string("0000000034AC5D00.F")); + + set<string> prefixes_out(hobject_t::get_prefixes(bits, mask, pool)); + ASSERT_EQ(prefixes_out, prefixes_correct); +} + +TEST(pg_interval_t, check_new_interval) +{ +// iterate through all 4 combinations +for (unsigned i = 0; i < 4; ++i) { + // + // Create a situation where osdmaps are the same so that + // each test case can diverge from it using minimal code. + // + int osd_id = 1; + epoch_t epoch = 40; + std::shared_ptr<OSDMap> osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + std::shared_ptr<OSDMap> lastmap(new OSDMap()); + lastmap->set_max_osd(10); + lastmap->set_state(osd_id, CEPH_OSD_EXISTS); + lastmap->set_epoch(epoch); + epoch_t same_interval_since = epoch; + epoch_t last_epoch_clean = same_interval_since; + int64_t pool_id = 200; + int pg_num = 4; + __u8 min_size = 2; + boost::scoped_ptr<IsPGRecoverablePredicate> recoverable(new ReplicatedBackend::RPCRecPred()); + { + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num(pg_num); + inc.new_pools[pool_id].set_pg_num_pending(pg_num); + inc.new_up_thru[osd_id] = epoch + 1; + osdmap->apply_incremental(inc); + lastmap->apply_incremental(inc); + } + vector<int> new_acting; + new_acting.push_back(osd_id); + new_acting.push_back(osd_id + 1); + vector<int> old_acting = new_acting; + int old_primary = osd_id; + int new_primary = osd_id; + vector<int> new_up; + new_up.push_back(osd_id); + int old_up_primary = osd_id; + int new_up_primary = osd_id; + vector<int> old_up = new_up; + pg_t pgid; + pgid.set_pool(pool_id); + + // + // Do nothing if there are no modifications in + // acting, up or pool size and that the pool is not + // being split + // + { + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_FALSE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pgid, + *recoverable, + &past_intervals)); + ASSERT_TRUE(past_intervals.empty()); + } + + // + // The acting set has changed + // + { + vector<int> new_acting; + int _new_primary = osd_id + 1; + new_acting.push_back(_new_primary); + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pgid, + *recoverable, + &past_intervals)); + old_primary = new_primary; + } + + // + // The up set has changed + // + { + vector<int> new_up; + int _new_primary = osd_id + 1; + new_up.push_back(_new_primary); + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pgid, + *recoverable, + &past_intervals)); + } + + // + // The up primary has changed + // + { + vector<int> new_up; + int _new_up_primary = osd_id + 1; + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + _new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pgid, + *recoverable, + &past_intervals)); + } + + // + // PG is splitting + // + { + std::shared_ptr<OSDMap> osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + int new_pg_num = pg_num ^ 2; + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num(new_pg_num); + osdmap->apply_incremental(inc); + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pgid, + *recoverable, + &past_intervals)); + } + + // + // PG is pre-merge source + // + { + std::shared_ptr<OSDMap> osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num(pg_num); + inc.new_pools[pool_id].set_pg_num_pending(pg_num - 1); + osdmap->apply_incremental(inc); + cout << "pg_num " << pg_num << std::endl; + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pg_t(pg_num - 1, pool_id), + *recoverable, + &past_intervals)); + } + + // + // PG was pre-merge source + // + { + std::shared_ptr<OSDMap> osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num(pg_num); + inc.new_pools[pool_id].set_pg_num_pending(pg_num - 1); + osdmap->apply_incremental(inc); + + cout << "pg_num " << pg_num << std::endl; + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + lastmap, // reverse order! + osdmap, + pg_t(pg_num - 1, pool_id), + *recoverable, + &past_intervals)); + } + + // + // PG is merge source + // + { + std::shared_ptr<OSDMap> osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num(pg_num - 1); + osdmap->apply_incremental(inc); + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pg_t(pg_num - 1, pool_id), + *recoverable, + &past_intervals)); + } + + // + // PG is pre-merge target + // + { + std::shared_ptr<OSDMap> osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num_pending(pg_num - 1); + osdmap->apply_incremental(inc); + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pg_t(pg_num / 2 - 1, pool_id), + *recoverable, + &past_intervals)); + } + + // + // PG was pre-merge target + // + { + std::shared_ptr<OSDMap> osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num_pending(pg_num - 1); + osdmap->apply_incremental(inc); + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + lastmap, // reverse order! + osdmap, + pg_t(pg_num / 2 - 1, pool_id), + *recoverable, + &past_intervals)); + } + + // + // PG is merge target + // + { + std::shared_ptr<OSDMap> osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num(pg_num - 1); + osdmap->apply_incremental(inc); + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pg_t(pg_num / 2 - 1, pool_id), + *recoverable, + &past_intervals)); + } + + // + // PG size has changed + // + { + std::shared_ptr<OSDMap> osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + __u8 new_min_size = min_size + 1; + inc.new_pools[pool_id].min_size = new_min_size; + inc.new_pools[pool_id].set_pg_num(pg_num); + osdmap->apply_incremental(inc); + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pgid, + *recoverable, + &past_intervals)); + } + + // + // The old acting set was empty : the previous interval could not + // have been rw + // + { + vector<int> old_acting; + + PastIntervals past_intervals; + + ostringstream out; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pgid, + *recoverable, + &past_intervals, + &out)); + ASSERT_NE(string::npos, out.str().find("acting set is too small")); + } + + // + // The old acting set did not have enough osd : it could + // not have been rw + // + { + vector<int> old_acting; + old_acting.push_back(osd_id); + + // + // see http://tracker.ceph.com/issues/5780 + // the size of the old acting set should be compared + // with the min_size of the old osdmap + // + // The new osdmap is created so that it triggers the + // bug. + // + std::shared_ptr<OSDMap> osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + __u8 new_min_size = old_acting.size(); + inc.new_pools[pool_id].min_size = new_min_size; + inc.new_pools[pool_id].set_pg_num(pg_num); + osdmap->apply_incremental(inc); + + ostringstream out; + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pgid, + *recoverable, + &past_intervals, + &out)); + ASSERT_NE(string::npos, out.str().find("acting set is too small")); + } + + // + // The acting set changes. The old acting set primary was up during the + // previous interval and may have been rw. + // + { + vector<int> new_acting; + new_acting.push_back(osd_id + 4); + new_acting.push_back(osd_id + 5); + + ostringstream out; + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pgid, + *recoverable, + &past_intervals, + &out)); + ASSERT_NE(string::npos, out.str().find("includes interval")); + } + // + // The acting set changes. The old acting set primary was not up + // during the old interval but last_epoch_clean is in the + // old interval and it may have been rw. + // + { + vector<int> new_acting; + new_acting.push_back(osd_id + 4); + new_acting.push_back(osd_id + 5); + + std::shared_ptr<OSDMap> lastmap(new OSDMap()); + lastmap->set_max_osd(10); + lastmap->set_state(osd_id, CEPH_OSD_EXISTS); + lastmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num(pg_num); + inc.new_up_thru[osd_id] = epoch - 10; + lastmap->apply_incremental(inc); + + ostringstream out; + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pgid, + *recoverable, + &past_intervals, + &out)); + ASSERT_NE(string::npos, out.str().find("presumed to have been rw")); + } + + // + // The acting set changes. The old acting set primary was not up + // during the old interval and last_epoch_clean is before the + // old interval : the previous interval could not possibly have + // been rw. + // + { + vector<int> new_acting; + new_acting.push_back(osd_id + 4); + new_acting.push_back(osd_id + 5); + + epoch_t last_epoch_clean = epoch - 10; + + std::shared_ptr<OSDMap> lastmap(new OSDMap()); + lastmap->set_max_osd(10); + lastmap->set_state(osd_id, CEPH_OSD_EXISTS); + lastmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num(pg_num); + inc.new_up_thru[osd_id] = last_epoch_clean; + lastmap->apply_incremental(inc); + + ostringstream out; + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pgid, + *recoverable, + &past_intervals, + &out)); + ASSERT_NE(string::npos, out.str().find("does not include interval")); + } +} // end for, didn't want to reindent +} + +TEST(pg_t, get_ancestor) +{ + ASSERT_EQ(pg_t(0, 0), pg_t(16, 0).get_ancestor(16)); + ASSERT_EQ(pg_t(1, 0), pg_t(17, 0).get_ancestor(16)); + ASSERT_EQ(pg_t(0, 0), pg_t(16, 0).get_ancestor(8)); + ASSERT_EQ(pg_t(16, 0), pg_t(16, 0).get_ancestor(80)); + ASSERT_EQ(pg_t(16, 0), pg_t(16, 0).get_ancestor(83)); + ASSERT_EQ(pg_t(1, 0), pg_t(1321, 0).get_ancestor(123).get_ancestor(8)); + ASSERT_EQ(pg_t(3, 0), pg_t(1323, 0).get_ancestor(123).get_ancestor(8)); + ASSERT_EQ(pg_t(3, 0), pg_t(1323, 0).get_ancestor(8)); +} + +TEST(pg_t, split) +{ + pg_t pgid(0, 0); + set<pg_t> s; + bool b; + + s.clear(); + b = pgid.is_split(1, 1, &s); + ASSERT_TRUE(!b); + + s.clear(); + b = pgid.is_split(2, 4, NULL); + ASSERT_TRUE(b); + b = pgid.is_split(2, 4, &s); + ASSERT_TRUE(b); + ASSERT_EQ(1u, s.size()); + ASSERT_TRUE(s.count(pg_t(2, 0))); + + s.clear(); + b = pgid.is_split(2, 8, &s); + ASSERT_TRUE(b); + ASSERT_EQ(3u, s.size()); + ASSERT_TRUE(s.count(pg_t(2, 0))); + ASSERT_TRUE(s.count(pg_t(4, 0))); + ASSERT_TRUE(s.count(pg_t(6, 0))); + + s.clear(); + b = pgid.is_split(3, 8, &s); + ASSERT_TRUE(b); + ASSERT_EQ(1u, s.size()); + ASSERT_TRUE(s.count(pg_t(4, 0))); + + s.clear(); + b = pgid.is_split(6, 8, NULL); + ASSERT_TRUE(!b); + b = pgid.is_split(6, 8, &s); + ASSERT_TRUE(!b); + ASSERT_EQ(0u, s.size()); + + pgid = pg_t(1, 0); + + s.clear(); + b = pgid.is_split(2, 4, &s); + ASSERT_TRUE(b); + ASSERT_EQ(1u, s.size()); + ASSERT_TRUE(s.count(pg_t(3, 0))); + + s.clear(); + b = pgid.is_split(2, 6, &s); + ASSERT_TRUE(b); + ASSERT_EQ(2u, s.size()); + ASSERT_TRUE(s.count(pg_t(3, 0))); + ASSERT_TRUE(s.count(pg_t(5, 0))); + + s.clear(); + b = pgid.is_split(2, 8, &s); + ASSERT_TRUE(b); + ASSERT_EQ(3u, s.size()); + ASSERT_TRUE(s.count(pg_t(3, 0))); + ASSERT_TRUE(s.count(pg_t(5, 0))); + ASSERT_TRUE(s.count(pg_t(7, 0))); + + s.clear(); + b = pgid.is_split(4, 8, &s); + ASSERT_TRUE(b); + ASSERT_EQ(1u, s.size()); + ASSERT_TRUE(s.count(pg_t(5, 0))); + + s.clear(); + b = pgid.is_split(3, 8, &s); + ASSERT_TRUE(b); + ASSERT_EQ(3u, s.size()); + ASSERT_TRUE(s.count(pg_t(3, 0))); + ASSERT_TRUE(s.count(pg_t(5, 0))); + ASSERT_TRUE(s.count(pg_t(7, 0))); + + s.clear(); + b = pgid.is_split(6, 8, &s); + ASSERT_TRUE(!b); + ASSERT_EQ(0u, s.size()); + + pgid = pg_t(3, 0); + + s.clear(); + b = pgid.is_split(7, 8, &s); + ASSERT_TRUE(b); + ASSERT_EQ(1u, s.size()); + ASSERT_TRUE(s.count(pg_t(7, 0))); + + s.clear(); + b = pgid.is_split(7, 12, &s); + ASSERT_TRUE(b); + ASSERT_EQ(2u, s.size()); + ASSERT_TRUE(s.count(pg_t(7, 0))); + ASSERT_TRUE(s.count(pg_t(11, 0))); + + s.clear(); + b = pgid.is_split(7, 11, &s); + ASSERT_TRUE(b); + ASSERT_EQ(1u, s.size()); + ASSERT_TRUE(s.count(pg_t(7, 0))); + +} + +TEST(pg_t, merge) +{ + pg_t pgid, parent; + bool b; + + pgid = pg_t(7, 0); + b = pgid.is_merge_source(8, 7, &parent); + ASSERT_TRUE(b); + ASSERT_EQ(parent, pg_t(3, 0)); + ASSERT_TRUE(parent.is_merge_target(8, 7)); + + b = pgid.is_merge_source(8, 5, &parent); + ASSERT_TRUE(b); + ASSERT_EQ(parent, pg_t(3, 0)); + ASSERT_TRUE(parent.is_merge_target(8, 5)); + + b = pgid.is_merge_source(8, 4, &parent); + ASSERT_TRUE(b); + ASSERT_EQ(parent, pg_t(3, 0)); + ASSERT_TRUE(parent.is_merge_target(8, 4)); + + b = pgid.is_merge_source(8, 3, &parent); + ASSERT_TRUE(b); + ASSERT_EQ(parent, pg_t(1, 0)); + ASSERT_TRUE(parent.is_merge_target(8, 4)); + + b = pgid.is_merge_source(9, 8, &parent); + ASSERT_FALSE(b); + ASSERT_FALSE(parent.is_merge_target(9, 8)); +} + +TEST(ObjectCleanRegions, mark_data_region_dirty) +{ + ObjectCleanRegions clean_regions; + uint64_t offset_1, len_1, offset_2, len_2; + offset_1 = 4096; + len_1 = 8192; + offset_2 = 40960; + len_2 = 4096; + + interval_set<uint64_t> expect_dirty_region; + EXPECT_EQ(expect_dirty_region, clean_regions.get_dirty_regions()); + expect_dirty_region.insert(offset_1, len_1); + expect_dirty_region.insert(offset_2, len_2); + + clean_regions.mark_data_region_dirty(offset_1, len_1); + clean_regions.mark_data_region_dirty(offset_2, len_2); + EXPECT_EQ(expect_dirty_region, clean_regions.get_dirty_regions()); +} + +TEST(ObjectCleanRegions, mark_omap_dirty) +{ + ObjectCleanRegions clean_regions; + + EXPECT_FALSE(clean_regions.omap_is_dirty()); + clean_regions.mark_omap_dirty(); + EXPECT_TRUE(clean_regions.omap_is_dirty()); +} + +TEST(ObjectCleanRegions, merge) +{ + ObjectCleanRegions cr1, cr2; + interval_set<uint64_t> cr1_expect; + interval_set<uint64_t> cr2_expect; + ASSERT_EQ(cr1_expect, cr1.get_dirty_regions()); + ASSERT_EQ(cr2_expect, cr2.get_dirty_regions()); + + cr1.mark_data_region_dirty(4096, 4096); + cr1_expect.insert(4096, 4096); + ASSERT_EQ(cr1_expect, cr1.get_dirty_regions()); + cr1.mark_data_region_dirty(12288, 8192); + cr1_expect.insert(12288, 8192); + ASSERT_TRUE(cr1_expect.subset_of(cr1.get_dirty_regions())); + cr1.mark_data_region_dirty(32768, 10240); + cr1_expect.insert(32768, 10240); + cr1_expect.erase(4096, 4096); + ASSERT_TRUE(cr1_expect.subset_of(cr1.get_dirty_regions())); + + cr2.mark_data_region_dirty(20480, 12288); + cr2_expect.insert(20480, 12288); + ASSERT_EQ(cr2_expect, cr2.get_dirty_regions()); + cr2.mark_data_region_dirty(102400, 4096); + cr2_expect.insert(102400, 4096); + cr2.mark_data_region_dirty(204800, 8192); + cr2_expect.insert(204800, 8192); + cr2.mark_data_region_dirty(409600, 4096); + cr2_expect.insert(409600, 4096); + ASSERT_TRUE(cr2_expect.subset_of(cr2.get_dirty_regions())); + + ASSERT_FALSE(cr2.omap_is_dirty()); + cr2.mark_omap_dirty(); + ASSERT_FALSE(cr1.omap_is_dirty()); + ASSERT_TRUE(cr2.omap_is_dirty()); + + cr1.merge(cr2); + cr1_expect.insert(204800, 8192); + ASSERT_TRUE(cr1_expect.subset_of(cr1.get_dirty_regions())); + ASSERT_TRUE(cr1.omap_is_dirty()); +} + +TEST(pg_missing_t, constructor) +{ + pg_missing_t missing; + EXPECT_EQ((unsigned int)0, missing.num_missing()); + EXPECT_FALSE(missing.have_missing()); +} + +TEST(pg_missing_t, have_missing) +{ + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + pg_missing_t missing; + EXPECT_FALSE(missing.have_missing()); + missing.add(oid, eversion_t(), eversion_t(), false); + EXPECT_TRUE(missing.have_missing()); +} + +TEST(pg_missing_t, claim) +{ + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + pg_missing_t missing; + EXPECT_FALSE(missing.have_missing()); + missing.add(oid, eversion_t(), eversion_t(), false); + EXPECT_TRUE(missing.have_missing()); + + pg_missing_t other; + EXPECT_FALSE(other.have_missing()); + + other.claim(std::move(missing)); + EXPECT_TRUE(other.have_missing()); +} + +TEST(pg_missing_t, is_missing) +{ + // pg_missing_t::is_missing(const hobject_t& oid) const + { + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + pg_missing_t missing; + EXPECT_FALSE(missing.is_missing(oid)); + missing.add(oid, eversion_t(), eversion_t(), false); + EXPECT_TRUE(missing.is_missing(oid)); + } + + // bool pg_missing_t::is_missing(const hobject_t& oid, eversion_t v) const + { + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + pg_missing_t missing; + eversion_t need(10,5); + EXPECT_FALSE(missing.is_missing(oid, eversion_t())); + missing.add(oid, need, eversion_t(), false); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_FALSE(missing.is_missing(oid, eversion_t())); + EXPECT_TRUE(missing.is_missing(oid, need)); + } +} + +TEST(pg_missing_t, add_next_event) +{ + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + hobject_t oid_other(object_t("other"), "key", 9123, 9456, 0, ""); + eversion_t version(10,5); + eversion_t prior_version(3,4); + pg_log_entry_t sample_e(pg_log_entry_t::DELETE, oid, version, prior_version, + 0, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999), + utime_t(8,9), 0); + + // new object (MODIFY) + { + pg_missing_t missing; + pg_log_entry_t e = sample_e; + + e.op = pg_log_entry_t::MODIFY; + e.prior_version = eversion_t(); + EXPECT_TRUE(e.is_update()); + EXPECT_TRUE(e.object_is_indexed()); + EXPECT_TRUE(e.reqid_is_indexed()); + EXPECT_FALSE(missing.is_missing(oid)); + missing.add_next_event(e); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_EQ(eversion_t(), missing.get_items().at(oid).have); + EXPECT_EQ(oid, missing.get_rmissing().at(e.version.version)); + EXPECT_EQ(1U, missing.num_missing()); + EXPECT_EQ(1U, missing.get_rmissing().size()); + + // adding the same object replaces the previous one + missing.add_next_event(e); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_EQ(1U, missing.num_missing()); + EXPECT_EQ(1U, missing.get_rmissing().size()); + } + + // new object (CLONE) + { + pg_missing_t missing; + pg_log_entry_t e = sample_e; + + e.op = pg_log_entry_t::CLONE; + e.prior_version = eversion_t(); + EXPECT_TRUE(e.is_clone()); + EXPECT_TRUE(e.object_is_indexed()); + EXPECT_FALSE(e.reqid_is_indexed()); + EXPECT_FALSE(missing.is_missing(oid)); + missing.add_next_event(e); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_EQ(eversion_t(), missing.get_items().at(oid).have); + EXPECT_EQ(oid, missing.get_rmissing().at(e.version.version)); + EXPECT_EQ(1U, missing.num_missing()); + EXPECT_EQ(1U, missing.get_rmissing().size()); + + // adding the same object replaces the previous one + missing.add_next_event(e); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_EQ(1U, missing.num_missing()); + EXPECT_EQ(1U, missing.get_rmissing().size()); + } + + // existing object (MODIFY) + { + pg_missing_t missing; + pg_log_entry_t e = sample_e; + + e.op = pg_log_entry_t::MODIFY; + e.prior_version = eversion_t(); + EXPECT_TRUE(e.is_update()); + EXPECT_TRUE(e.object_is_indexed()); + EXPECT_TRUE(e.reqid_is_indexed()); + EXPECT_FALSE(missing.is_missing(oid)); + missing.add_next_event(e); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_EQ(eversion_t(), missing.get_items().at(oid).have); + EXPECT_EQ(oid, missing.get_rmissing().at(e.version.version)); + EXPECT_EQ(1U, missing.num_missing()); + EXPECT_EQ(1U, missing.get_rmissing().size()); + + // adding the same object with a different version + e.prior_version = prior_version; + missing.add_next_event(e); + EXPECT_EQ(eversion_t(), missing.get_items().at(oid).have); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_EQ(1U, missing.num_missing()); + EXPECT_EQ(1U, missing.get_rmissing().size()); + } + + // object with prior version (MODIFY) + { + pg_missing_t missing; + pg_log_entry_t e = sample_e; + + e.op = pg_log_entry_t::MODIFY; + EXPECT_TRUE(e.is_update()); + EXPECT_TRUE(e.object_is_indexed()); + EXPECT_TRUE(e.reqid_is_indexed()); + EXPECT_FALSE(missing.is_missing(oid)); + missing.add_next_event(e); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_EQ(prior_version, missing.get_items().at(oid).have); + EXPECT_EQ(version, missing.get_items().at(oid).need); + EXPECT_EQ(oid, missing.get_rmissing().at(e.version.version)); + EXPECT_EQ(1U, missing.num_missing()); + EXPECT_EQ(1U, missing.get_rmissing().size()); + } + + // adding a DELETE matching an existing event + { + pg_missing_t missing; + pg_log_entry_t e = sample_e; + + e.op = pg_log_entry_t::MODIFY; + EXPECT_TRUE(e.is_update()); + EXPECT_TRUE(e.object_is_indexed()); + EXPECT_TRUE(e.reqid_is_indexed()); + EXPECT_FALSE(missing.is_missing(oid)); + missing.add_next_event(e); + EXPECT_TRUE(missing.is_missing(oid)); + + e.op = pg_log_entry_t::DELETE; + EXPECT_TRUE(e.is_delete()); + missing.add_next_event(e); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_TRUE(missing.get_items().at(oid).is_delete()); + EXPECT_EQ(prior_version, missing.get_items().at(oid).have); + EXPECT_EQ(version, missing.get_items().at(oid).need); + EXPECT_EQ(oid, missing.get_rmissing().at(e.version.version)); + EXPECT_EQ(1U, missing.num_missing()); + EXPECT_EQ(1U, missing.get_rmissing().size()); + } + + // adding a LOST_DELETE after an existing event + { + pg_missing_t missing; + pg_log_entry_t e = sample_e; + + e.op = pg_log_entry_t::MODIFY; + EXPECT_TRUE(e.is_update()); + EXPECT_TRUE(e.object_is_indexed()); + EXPECT_TRUE(e.reqid_is_indexed()); + EXPECT_FALSE(missing.is_missing(oid)); + missing.add_next_event(e); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_FALSE(missing.get_items().at(oid).is_delete()); + + e.op = pg_log_entry_t::LOST_DELETE; + e.version.version++; + EXPECT_TRUE(e.is_delete()); + missing.add_next_event(e); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_TRUE(missing.get_items().at(oid).is_delete()); + EXPECT_EQ(prior_version, missing.get_items().at(oid).have); + EXPECT_EQ(e.version, missing.get_items().at(oid).need); + EXPECT_EQ(oid, missing.get_rmissing().at(e.version.version)); + EXPECT_EQ(1U, missing.num_missing()); + EXPECT_EQ(1U, missing.get_rmissing().size()); + } +} + +TEST(pg_missing_t, revise_need) +{ + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + pg_missing_t missing; + // create a new entry + EXPECT_FALSE(missing.is_missing(oid)); + eversion_t need(10,10); + missing.revise_need(oid, need, false); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_EQ(eversion_t(), missing.get_items().at(oid).have); + EXPECT_EQ(need, missing.get_items().at(oid).need); + // update an existing entry and preserve have + eversion_t have(1,1); + missing.revise_have(oid, have); + eversion_t new_need(10,12); + EXPECT_EQ(have, missing.get_items().at(oid).have); + missing.revise_need(oid, new_need, false); + EXPECT_EQ(have, missing.get_items().at(oid).have); + EXPECT_EQ(new_need, missing.get_items().at(oid).need); +} + +TEST(pg_missing_t, revise_have) +{ + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + pg_missing_t missing; + // a non existing entry means noop + EXPECT_FALSE(missing.is_missing(oid)); + eversion_t have(1,1); + missing.revise_have(oid, have); + EXPECT_FALSE(missing.is_missing(oid)); + // update an existing entry + eversion_t need(10,12); + missing.add(oid, need, have, false); + EXPECT_TRUE(missing.is_missing(oid)); + eversion_t new_have(2,2); + EXPECT_EQ(have, missing.get_items().at(oid).have); + missing.revise_have(oid, new_have); + EXPECT_EQ(new_have, missing.get_items().at(oid).have); + EXPECT_EQ(need, missing.get_items().at(oid).need); +} + +TEST(pg_missing_t, add) +{ + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + pg_missing_t missing; + EXPECT_FALSE(missing.is_missing(oid)); + eversion_t have(1,1); + eversion_t need(10,10); + missing.add(oid, need, have, false); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_EQ(have, missing.get_items().at(oid).have); + EXPECT_EQ(need, missing.get_items().at(oid).need); +} + +TEST(pg_missing_t, rm) +{ + // void pg_missing_t::rm(const hobject_t& oid, eversion_t v) + { + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + pg_missing_t missing; + EXPECT_FALSE(missing.is_missing(oid)); + epoch_t epoch = 10; + eversion_t need(epoch,10); + missing.add(oid, need, eversion_t(), false); + EXPECT_TRUE(missing.is_missing(oid)); + // rm of an older version is a noop + missing.rm(oid, eversion_t(epoch / 2,20)); + EXPECT_TRUE(missing.is_missing(oid)); + // rm of a later version removes the object + missing.rm(oid, eversion_t(epoch * 2,20)); + EXPECT_FALSE(missing.is_missing(oid)); + } + // void pg_missing_t::rm(const std::map<hobject_t, pg_missing_item>::iterator &m) + { + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + pg_missing_t missing; + EXPECT_FALSE(missing.is_missing(oid)); + missing.add(oid, eversion_t(), eversion_t(), false); + EXPECT_TRUE(missing.is_missing(oid)); + auto m = missing.get_items().find(oid); + missing.rm(m); + EXPECT_FALSE(missing.is_missing(oid)); + } +} + +TEST(pg_missing_t, got) +{ + // void pg_missing_t::got(const hobject_t& oid, eversion_t v) + { + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + pg_missing_t missing; + // assert if the oid does not exist + { + PrCtl unset_dumpable; + EXPECT_DEATH(missing.got(oid, eversion_t()), ""); + } + EXPECT_FALSE(missing.is_missing(oid)); + epoch_t epoch = 10; + eversion_t need(epoch,10); + missing.add(oid, need, eversion_t(), false); + EXPECT_TRUE(missing.is_missing(oid)); + // assert if that the version to be removed is lower than the version of the object + { + PrCtl unset_dumpable; + EXPECT_DEATH(missing.got(oid, eversion_t(epoch / 2,20)), ""); + } + // remove of a later version removes the object + missing.got(oid, eversion_t(epoch * 2,20)); + EXPECT_FALSE(missing.is_missing(oid)); + } + // void pg_missing_t::got(const std::map<hobject_t, pg_missing_item>::iterator &m) + { + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + pg_missing_t missing; + EXPECT_FALSE(missing.is_missing(oid)); + missing.add(oid, eversion_t(), eversion_t(), false); + EXPECT_TRUE(missing.is_missing(oid)); + auto m = missing.get_items().find(oid); + missing.got(m); + EXPECT_FALSE(missing.is_missing(oid)); + } +} + +TEST(pg_missing_t, split_into) +{ + uint32_t hash1 = 1; + hobject_t oid1(object_t("objname"), "key1", 123, hash1, 0, ""); + uint32_t hash2 = 2; + hobject_t oid2(object_t("objname"), "key2", 123, hash2, 0, ""); + pg_missing_t missing; + missing.add(oid1, eversion_t(), eversion_t(), false); + missing.add(oid2, eversion_t(), eversion_t(), false); + pg_t child_pgid; + child_pgid.m_seed = 1; + pg_missing_t child; + unsigned split_bits = 1; + missing.split_into(child_pgid, split_bits, &child); + EXPECT_TRUE(child.is_missing(oid1)); + EXPECT_FALSE(child.is_missing(oid2)); + EXPECT_FALSE(missing.is_missing(oid1)); + EXPECT_TRUE(missing.is_missing(oid2)); +} + +TEST(pg_pool_t_test, get_pg_num_divisor) { + pg_pool_t p; + p.set_pg_num(16); + p.set_pgp_num(16); + + for (int i = 0; i < 16; ++i) + ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(i, 1))); + + p.set_pg_num(12); + p.set_pgp_num(12); + + ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(0, 1))); + ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(1, 1))); + ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(2, 1))); + ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(3, 1))); + ASSERT_EQ(8u, p.get_pg_num_divisor(pg_t(4, 1))); + ASSERT_EQ(8u, p.get_pg_num_divisor(pg_t(5, 1))); + ASSERT_EQ(8u, p.get_pg_num_divisor(pg_t(6, 1))); + ASSERT_EQ(8u, p.get_pg_num_divisor(pg_t(7, 1))); + ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(8, 1))); + ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(9, 1))); + ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(10, 1))); + ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(11, 1))); +} + +TEST(pg_pool_t_test, get_random_pg_position) { + srand(getpid()); + for (int i = 0; i < 100; ++i) { + pg_pool_t p; + p.set_pg_num(1 + (rand() % 1000)); + p.set_pgp_num(p.get_pg_num()); + pg_t pgid(rand() % p.get_pg_num(), 1); + uint32_t h = p.get_random_pg_position(pgid, rand()); + uint32_t ps = p.raw_hash_to_pg(h); + cout << p.get_pg_num() << " " << pgid << ": " + << h << " -> " << pg_t(ps, 1) << std::endl; + ASSERT_EQ(pgid.ps(), ps); + } +} + +TEST(shard_id_t, iostream) { + set<shard_id_t> shards; + shards.insert(shard_id_t(0)); + shards.insert(shard_id_t(1)); + shards.insert(shard_id_t(2)); + ostringstream out; + out << shards; + ASSERT_EQ(out.str(), "0,1,2"); + + shard_id_t noshard = shard_id_t::NO_SHARD; + shard_id_t zero(0); + ASSERT_GT(zero, noshard); +} + +TEST(spg_t, parse) { + spg_t a(pg_t(1,2), shard_id_t::NO_SHARD); + spg_t aa, bb; + spg_t b(pg_t(3,2), shard_id_t(2)); + std::string s = stringify(a); + ASSERT_TRUE(aa.parse(s.c_str())); + ASSERT_EQ(a, aa); + + s = stringify(b); + ASSERT_TRUE(bb.parse(s.c_str())); + ASSERT_EQ(b, bb); +} + +TEST(coll_t, parse) { + const char *ok[] = { + "meta", + "1.2_head", + "1.2_TEMP", + "1.2s3_head", + "1.3s2_TEMP", + "1.2s0_head", + 0 + }; + const char *bad[] = { + "foo", + "1.2_food", + "1.2_head ", + //" 1.2_head", // hrm, this parses, which is not ideal.. pg_t's fault? + "1.2_temp", + "1.2_HEAD", + "1.xS3_HEAD", + "1.2s_HEAD", + "1.2sfoo_HEAD", + 0 + }; + coll_t a; + for (int i = 0; ok[i]; ++i) { + cout << "check ok " << ok[i] << std::endl; + ASSERT_TRUE(a.parse(ok[i])); + ASSERT_EQ(string(ok[i]), a.to_str()); + } + for (int i = 0; bad[i]; ++i) { + cout << "check bad " << bad[i] << std::endl; + ASSERT_FALSE(a.parse(bad[i])); + } +} + +TEST(coll_t, temp) { + spg_t pgid; + coll_t foo(pgid); + ASSERT_EQ(foo.to_str(), string("0.0_head")); + + coll_t temp = foo.get_temp(); + ASSERT_EQ(temp.to_str(), string("0.0_TEMP")); + + spg_t pgid2; + ASSERT_TRUE(temp.is_temp()); + ASSERT_TRUE(temp.is_temp(&pgid2)); + ASSERT_EQ(pgid, pgid2); +} + +TEST(coll_t, assigment) { + spg_t pgid; + coll_t right(pgid); + ASSERT_EQ(right.to_str(), string("0.0_head")); + + coll_t left, middle; + + ASSERT_EQ(left.to_str(), string("meta")); + ASSERT_EQ(middle.to_str(), string("meta")); + + left = middle = right; + + ASSERT_EQ(left.to_str(), string("0.0_head")); + ASSERT_EQ(middle.to_str(), string("0.0_head")); + + ASSERT_NE(middle.c_str(), right.c_str()); + ASSERT_NE(left.c_str(), middle.c_str()); +} + +TEST(hobject_t, parse) { + const char *v[] = { + "MIN", + "MAX", + "-1:60c2fa6d:::inc_osdmap.1:0", + "-1:60c2fa6d:::inc_osdmap.1:333", + "0:00000000::::head", + "1:00000000:nspace:key:obj:head", + "-40:00000000:nspace::obj:head", + "20:00000000::key:obj:head", + "20:00000000:::o%fdj:head", + "20:00000000:::o%02fdj:head", + "20:00000000:::_zero_%00_:head", + NULL + }; + + for (unsigned i=0; v[i]; ++i) { + hobject_t o; + bool b = o.parse(v[i]); + if (!b) { + cout << "failed to parse " << v[i] << std::endl; + ASSERT_TRUE(false); + } + string s = stringify(o); + if (s != v[i]) { + cout << v[i] << " -> " << o << " -> " << s << std::endl; + ASSERT_EQ(s, string(v[i])); + } + } +} + +TEST(ghobject_t, cmp) { + ghobject_t min; + ghobject_t sep; + sep.set_shard(shard_id_t(1)); + sep.hobj.pool = -1; + cout << min << " < " << sep << std::endl; + ASSERT_TRUE(min < sep); + + sep.set_shard(shard_id_t::NO_SHARD); + cout << "sep shard " << sep.shard_id << std::endl; + ghobject_t o(hobject_t(object_t(), string(), CEPH_NOSNAP, 0x42, + 1, string())); + cout << "o " << o << std::endl; + ASSERT_TRUE(o > sep); +} + +TEST(ghobject_t, parse) { + const char *v[] = { + "GHMIN", + "GHMAX", + "13#0:00000000::::head#", + "13#0:00000000::::head#deadbeef", + "#-1:60c2fa6d:::inc_osdmap.1:333#deadbeef", + "#-1:60c2fa6d:::inc%02osdmap.1:333#deadbeef", + "#-1:60c2fa6d:::inc_osdmap.1:333#", + "1#MIN#deadbeefff", + "1#MAX#", + "#MAX#123", + "#-40:00000000:nspace::obj:head#", + NULL + }; + + for (unsigned i=0; v[i]; ++i) { + ghobject_t o; + bool b = o.parse(v[i]); + if (!b) { + cout << "failed to parse " << v[i] << std::endl; + ASSERT_TRUE(false); + } + string s = stringify(o); + if (s != v[i]) { + cout << v[i] << " -> " << o << " -> " << s << std::endl; + ASSERT_EQ(s, string(v[i])); + } + } +} + +TEST(pool_opts_t, invalid_opt) { + EXPECT_FALSE(pool_opts_t::is_opt_name("INVALID_OPT")); + PrCtl unset_dumpable; + EXPECT_DEATH(pool_opts_t::get_opt_desc("INVALID_OPT"), ""); +} + +TEST(pool_opts_t, scrub_min_interval) { + EXPECT_TRUE(pool_opts_t::is_opt_name("scrub_min_interval")); + EXPECT_EQ(pool_opts_t::get_opt_desc("scrub_min_interval"), + pool_opts_t::opt_desc_t(pool_opts_t::SCRUB_MIN_INTERVAL, + pool_opts_t::DOUBLE)); + + pool_opts_t opts; + EXPECT_FALSE(opts.is_set(pool_opts_t::SCRUB_MIN_INTERVAL)); + { + PrCtl unset_dumpable; + EXPECT_DEATH(opts.get(pool_opts_t::SCRUB_MIN_INTERVAL), ""); + } + double val; + EXPECT_FALSE(opts.get(pool_opts_t::SCRUB_MIN_INTERVAL, &val)); + opts.set(pool_opts_t::SCRUB_MIN_INTERVAL, static_cast<double>(2015)); + EXPECT_TRUE(opts.get(pool_opts_t::SCRUB_MIN_INTERVAL, &val)); + EXPECT_EQ(val, 2015); + opts.unset(pool_opts_t::SCRUB_MIN_INTERVAL); + EXPECT_FALSE(opts.is_set(pool_opts_t::SCRUB_MIN_INTERVAL)); +} + +TEST(pool_opts_t, scrub_max_interval) { + EXPECT_TRUE(pool_opts_t::is_opt_name("scrub_max_interval")); + EXPECT_EQ(pool_opts_t::get_opt_desc("scrub_max_interval"), + pool_opts_t::opt_desc_t(pool_opts_t::SCRUB_MAX_INTERVAL, + pool_opts_t::DOUBLE)); + + pool_opts_t opts; + EXPECT_FALSE(opts.is_set(pool_opts_t::SCRUB_MAX_INTERVAL)); + { + PrCtl unset_dumpable; + EXPECT_DEATH(opts.get(pool_opts_t::SCRUB_MAX_INTERVAL), ""); + } + double val; + EXPECT_FALSE(opts.get(pool_opts_t::SCRUB_MAX_INTERVAL, &val)); + opts.set(pool_opts_t::SCRUB_MAX_INTERVAL, static_cast<double>(2015)); + EXPECT_TRUE(opts.get(pool_opts_t::SCRUB_MAX_INTERVAL, &val)); + EXPECT_EQ(val, 2015); + opts.unset(pool_opts_t::SCRUB_MAX_INTERVAL); + EXPECT_FALSE(opts.is_set(pool_opts_t::SCRUB_MAX_INTERVAL)); +} + +TEST(pool_opts_t, deep_scrub_interval) { + EXPECT_TRUE(pool_opts_t::is_opt_name("deep_scrub_interval")); + EXPECT_EQ(pool_opts_t::get_opt_desc("deep_scrub_interval"), + pool_opts_t::opt_desc_t(pool_opts_t::DEEP_SCRUB_INTERVAL, + pool_opts_t::DOUBLE)); + + pool_opts_t opts; + EXPECT_FALSE(opts.is_set(pool_opts_t::DEEP_SCRUB_INTERVAL)); + { + PrCtl unset_dumpable; + EXPECT_DEATH(opts.get(pool_opts_t::DEEP_SCRUB_INTERVAL), ""); + } + double val; + EXPECT_FALSE(opts.get(pool_opts_t::DEEP_SCRUB_INTERVAL, &val)); + opts.set(pool_opts_t::DEEP_SCRUB_INTERVAL, static_cast<double>(2015)); + EXPECT_TRUE(opts.get(pool_opts_t::DEEP_SCRUB_INTERVAL, &val)); + EXPECT_EQ(val, 2015); + opts.unset(pool_opts_t::DEEP_SCRUB_INTERVAL); + EXPECT_FALSE(opts.is_set(pool_opts_t::DEEP_SCRUB_INTERVAL)); +} + +struct RequiredPredicate : IsPGRecoverablePredicate { + unsigned required_size; + explicit RequiredPredicate(unsigned required_size) : required_size(required_size) {} + bool operator()(const set<pg_shard_t> &have) const override { + return have.size() >= required_size; + } +}; + +using namespace std; +struct MapPredicate { + map<int, pair<PastIntervals::osd_state_t, epoch_t>> states; + explicit MapPredicate( + const vector<pair<int, pair<PastIntervals::osd_state_t, epoch_t>>> &_states) + : states(_states.begin(), _states.end()) {} + PastIntervals::osd_state_t operator()(epoch_t start, int osd, epoch_t *lost_at) { + auto val = states.at(osd); + if (lost_at) + *lost_at = val.second; + return val.first; + } +}; + +using sit = shard_id_t; +using PI = PastIntervals; +using pst = pg_shard_t; +using ival = PastIntervals::pg_interval_t; +using ivallst = std::list<ival>; +const int N = 0x7fffffff /* CRUSH_ITEM_NONE, can't import crush.h here */; + +struct PITest : ::testing::Test { + PITest() {} + void run( + bool ec_pool, + ivallst intervals, + epoch_t last_epoch_started, + unsigned min_to_peer, + vector<pair<int, pair<PastIntervals::osd_state_t, epoch_t>>> osd_states, + vector<int> up, + vector<int> acting, + set<pg_shard_t> probe, + set<int> down, + map<int, epoch_t> blocked_by, + bool pg_down) { + RequiredPredicate rec_pred(min_to_peer); + MapPredicate map_pred(osd_states); + + PI::PriorSet correct( + ec_pool, + probe, + down, + blocked_by, + pg_down, + new RequiredPredicate(rec_pred)); + + PastIntervals compact; + for (auto &&i: intervals) { + compact.add_interval(ec_pool, i); + } + PI::PriorSet compact_ps = compact.get_prior_set( + ec_pool, + last_epoch_started, + new RequiredPredicate(rec_pred), + map_pred, + up, + acting, + nullptr); + ASSERT_EQ(correct, compact_ps); + } +}; + +TEST_F(PITest, past_intervals_rep) { + run( + /* ec_pool */ false, + /* intervals */ + { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0} + , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1} + , ival{{ 2}, { 2}, 31, 35, false, 2, 2} + , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0} + }, + /* les */ 5, + /* min_peer */ 1, + /* osd states at end */ + { make_pair(0, make_pair(PI::UP , 0)) + , make_pair(1, make_pair(PI::UP , 0)) + , make_pair(2, make_pair(PI::DOWN , 0)) + }, + /* acting */ {0, 1 }, + /* up */ {0, 1 }, + /* probe */ {pst(0), pst(1)}, + /* down */ {2}, + /* blocked_by */ {}, + /* pg_down */ false); +} + +TEST_F(PITest, past_intervals_ec) { + run( + /* ec_pool */ true, + /* intervals */ + { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0} + , ival{{N, 1, 2}, {N, 1, 2}, 21, 30, true, 1, 1} + }, + /* les */ 5, + /* min_peer */ 2, + /* osd states at end */ + { make_pair(0, make_pair(PI::DOWN , 0)) + , make_pair(1, make_pair(PI::UP , 0)) + , make_pair(2, make_pair(PI::UP , 0)) + }, + /* acting */ {N, 1, 2}, + /* up */ {N, 1, 2}, + /* probe */ {pst(1, sit(1)), pst(2, sit(2))}, + /* down */ {0}, + /* blocked_by */ {}, + /* pg_down */ false); +} + +TEST_F(PITest, past_intervals_rep_down) { + run( + /* ec_pool */ false, + /* intervals */ + { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0} + , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1} + , ival{{ 2}, { 2}, 31, 35, true, 2, 2} + , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0} + }, + /* les */ 5, + /* min_peer */ 1, + /* osd states at end */ + { make_pair(0, make_pair(PI::UP , 0)) + , make_pair(1, make_pair(PI::UP , 0)) + , make_pair(2, make_pair(PI::DOWN , 0)) + }, + /* acting */ {0, 1 }, + /* up */ {0, 1 }, + /* probe */ {pst(0), pst(1)}, + /* down */ {2}, + /* blocked_by */ {{2, 0}}, + /* pg_down */ true); +} + +TEST_F(PITest, past_intervals_ec_down) { + run( + /* ec_pool */ true, + /* intervals */ + { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0} + , ival{{N, 1, 2}, {N, 1, 2}, 21, 30, true, 1, 1} + , ival{{N, N, 2}, {N, N, 2}, 31, 35, false, 2, 2} + }, + /* les */ 5, + /* min_peer */ 2, + /* osd states at end */ + { make_pair(0, make_pair(PI::UP , 0)) + , make_pair(1, make_pair(PI::DOWN , 0)) + , make_pair(2, make_pair(PI::UP , 0)) + }, + /* acting */ {0, N, 2}, + /* up */ {0, N, 2}, + /* probe */ {pst(0, sit(0)), pst(2, sit(2))}, + /* down */ {1}, + /* blocked_by */ {{1, 0}}, + /* pg_down */ true); +} + +TEST_F(PITest, past_intervals_rep_no_subsets) { + run( + /* ec_pool */ false, + /* intervals */ + { ival{{0, 2}, {0, 2}, 10, 20, true, 0, 0} + , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1} + , ival{{0, 1 }, {0, 1 }, 31, 35, true, 0, 0} + }, + /* les */ 5, + /* min_peer */ 1, + /* osd states at end */ + { make_pair(0, make_pair(PI::UP , 0)) + , make_pair(1, make_pair(PI::UP , 0)) + , make_pair(2, make_pair(PI::DOWN , 0)) + }, + /* acting */ {0, 1 }, + /* up */ {0, 1 }, + /* probe */ {pst(0), pst(1)}, + /* down */ {2}, + /* blocked_by */ {}, + /* pg_down */ false); +} + +TEST_F(PITest, past_intervals_ec_no_subsets) { + run( + /* ec_pool */ true, + /* intervals */ + { ival{{0, N, 2}, {0, N, 2}, 10, 20, true, 0, 0} + , ival{{N, 1, 2}, {N, 1, 2}, 21, 30, true, 1, 1} + , ival{{0, 1, N}, {0, 1, N}, 31, 35, true, 0, 0} + }, + /* les */ 5, + /* min_peer */ 2, + /* osd states at end */ + { make_pair(0, make_pair(PI::UP , 0)) + , make_pair(1, make_pair(PI::DOWN , 0)) + , make_pair(2, make_pair(PI::UP , 0)) + }, + /* acting */ {0, N, 2}, + /* up */ {0, N, 2}, + /* probe */ {pst(0, sit(0)), pst(2, sit(2))}, + /* down */ {1}, + /* blocked_by */ {{1, 0}}, + /* pg_down */ true); +} + +TEST_F(PITest, past_intervals_ec_no_subsets2) { + run( + /* ec_pool */ true, + /* intervals */ + { ival{{N, 1, 2}, {N, 1, 2}, 10, 20, true, 0, 0} + , ival{{0, N, 2}, {0, N, 2}, 21, 30, true, 1, 1} + , ival{{0, 3, N}, {0, 3, N}, 31, 35, true, 0, 0} + }, + /* les */ 31, + /* min_peer */ 2, + /* osd states at end */ + { make_pair(0, make_pair(PI::UP , 0)) + , make_pair(1, make_pair(PI::DOWN , 0)) + , make_pair(2, make_pair(PI::UP , 0)) + , make_pair(3, make_pair(PI::UP , 0)) + }, + /* acting */ {0, N, 2}, + /* up */ {0, N, 2}, + /* probe */ {pst(0, sit(0)), pst(2, sit(2)), pst(3, sit(1))}, + /* down */ {1}, + /* blocked_by */ {}, + /* pg_down */ false); +} + +TEST_F(PITest, past_intervals_rep_lost) { + run( + /* ec_pool */ false, + /* intervals */ + { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0} + , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1} + , ival{{ 2}, { 2}, 31, 35, true, 2, 2} + , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0} + }, + /* les */ 5, + /* min_peer */ 1, + /* osd states at end */ + { make_pair(0, make_pair(PI::UP , 0)) + , make_pair(1, make_pair(PI::UP , 0)) + , make_pair(2, make_pair(PI::LOST , 55)) + }, + /* acting */ {0, 1 }, + /* up */ {0, 1 }, + /* probe */ {pst(0), pst(1)}, + /* down */ {2}, + /* blocked_by */ {}, + /* pg_down */ false); +} + +TEST_F(PITest, past_intervals_ec_lost) { + run( + /* ec_pool */ true, + /* intervals */ + { ival{{0, N, 2}, {0, N, 2}, 10, 20, true, 0, 0} + , ival{{N, 1, 2}, {N, 1, 2}, 21, 30, true, 1, 1} + , ival{{0, 1, N}, {0, 1, N}, 31, 35, true, 0, 0} + }, + /* les */ 5, + /* min_peer */ 2, + /* osd states at end */ + { make_pair(0, make_pair(PI::UP , 0)) + , make_pair(1, make_pair(PI::LOST , 36)) + , make_pair(2, make_pair(PI::UP , 0)) + }, + /* acting */ {0, N, 2}, + /* up */ {0, N, 2}, + /* probe */ {pst(0, sit(0)), pst(2, sit(2))}, + /* down */ {1}, + /* blocked_by */ {}, + /* pg_down */ false); +} + +void ci_ref_test( + object_manifest_t l, + object_manifest_t to_remove, + object_manifest_t g, + object_ref_delta_t expected_delta) +{ + { + object_ref_delta_t delta; + to_remove.calc_refs_to_drop_on_removal( + &l, + &g, + delta); + ASSERT_EQ( + expected_delta, + delta); + } + + // calc_refs_to_drop specifically handles nullptr identically to empty + // chunk_map + if (l.chunk_map.empty() || g.chunk_map.empty()) { + object_ref_delta_t delta; + to_remove.calc_refs_to_drop_on_removal( + l.chunk_map.empty() ? nullptr : &l, + g.chunk_map.empty() ? nullptr : &g, + delta); + ASSERT_EQ( + expected_delta, + delta); + } +} + +void ci_ref_test_on_modify( + object_manifest_t l, + object_manifest_t to_remove, + ObjectCleanRegions clean_regions, + object_ref_delta_t expected_delta) +{ + { + object_ref_delta_t delta; + to_remove.calc_refs_to_drop_on_modify( + &l, + clean_regions, + delta); + ASSERT_EQ( + expected_delta, + delta); + } +} + +void ci_ref_test_inc_on_set( + object_manifest_t l, + object_manifest_t added_set, + object_manifest_t g, + object_ref_delta_t expected_delta) +{ + { + object_ref_delta_t delta; + added_set.calc_refs_to_inc_on_set( + &l, + &g, + delta); + ASSERT_EQ( + expected_delta, + delta); + } +} + +hobject_t mk_hobject(string name) +{ + return hobject_t( + std::move(name), + string(), + CEPH_NOSNAP, + 0x42, + 1, + string()); +} + +object_manifest_t mk_manifest( + std::map<uint64_t, std::tuple<uint64_t, uint64_t, string>> m) +{ + object_manifest_t ret; + ret.type = object_manifest_t::TYPE_CHUNKED; + for (auto &[offset, tgt] : m) { + auto &[tgt_off, length, name] = tgt; + auto &ci = ret.chunk_map[offset]; + ci.offset = tgt_off; + ci.length = length; + ci.oid = mk_hobject(name); + } + return ret; +} + +object_ref_delta_t mk_delta(std::map<string, int> _m) { + std::map<hobject_t, int> m; + for (auto &[name, delta] : _m) { + m.insert( + std::make_pair( + mk_hobject(name), + delta)); + } + return object_ref_delta_t(std::move(m)); +} + +TEST(chunk_info_test, calc_refs_to_drop) { + ci_ref_test( + mk_manifest({}), + mk_manifest({{0, {0, 1024, "foo"}}}), + mk_manifest({}), + mk_delta({{"foo", -1}})); + +} + + +TEST(chunk_info_test, calc_refs_to_drop_match) { + ci_ref_test( + mk_manifest({{0, {0, 1024, "foo"}}}), + mk_manifest({{0, {0, 1024, "foo"}}}), + mk_manifest({{0, {0, 1024, "foo"}}}), + mk_delta({})); + +} + +TEST(chunk_info_test, calc_refs_to_drop_head_match) { + ci_ref_test( + mk_manifest({}), + mk_manifest({{0, {0, 1024, "foo"}}}), + mk_manifest({{0, {0, 1024, "foo"}}}), + mk_delta({})); + +} + +TEST(chunk_info_test, calc_refs_to_drop_tail_match) { + ci_ref_test( + mk_manifest({{0, {0, 1024, "foo"}}}), + mk_manifest({{0, {0, 1024, "foo"}}}), + mk_manifest({}), + mk_delta({})); + +} + +TEST(chunk_info_test, calc_refs_to_drop_second_reference) { + ci_ref_test( + mk_manifest({{0, {0, 1024, "foo"}}}), + mk_manifest({{0, {0, 1024, "foo"}}, {4<<10, {0, 1<<10, "foo"}}}), + mk_manifest({}), + mk_delta({{"foo", -1}})); + +} + +TEST(chunk_info_test, calc_refs_offsets_dont_match) { + ci_ref_test( + mk_manifest({{0, {0, 1024, "foo"}}}), + mk_manifest({{512, {0, 1024, "foo"}}, {(4<<10) + 512, {0, 1<<10, "foo"}}}), + mk_manifest({}), + mk_delta({{"foo", -2}})); + +} + +TEST(chunk_info_test, calc_refs_g_l_match) { + ci_ref_test( + mk_manifest({{4096, {0, 1024, "foo"}}}), + mk_manifest({{0, {0, 1024, "foo"}}, {4096, {0, 1024, "bar"}}}), + mk_manifest({{4096, {0, 1024, "foo"}}}), + mk_delta({{"foo", -2}, {"bar", -1}})); + +} + +TEST(chunk_info_test, calc_refs_g_l_match_no_this) { + ci_ref_test( + mk_manifest({{4096, {0, 1024, "foo"}}}), + mk_manifest({{0, {0, 1024, "bar"}}}), + mk_manifest({{4096, {0, 1024, "foo"}}}), + mk_delta({{"foo", -1}, {"bar", -1}})); + +} + +TEST(chunk_info_test, calc_refs_modify_mismatch) { + ObjectCleanRegions clean_regions(0, 8192, false); + clean_regions.mark_data_region_dirty(0, 1024); + clean_regions.mark_data_region_dirty(512, 1024); + ci_ref_test_on_modify( + mk_manifest({{512, {2048, 1024, "foo"}}, {4096, {0, 1024, "foo"}}}), + mk_manifest({{0, {0, 1024, "bar"}}, {512, {2048, 1024, "ttt"}}}), + clean_regions, + mk_delta({{"bar", -1}, {"ttt", -1}})); +} + +TEST(chunk_info_test, calc_refs_modify_match) { + ObjectCleanRegions clean_regions(0, 8192, false); + clean_regions.mark_data_region_dirty(0, 1024); + clean_regions.mark_data_region_dirty(512, 1024); + clean_regions.mark_data_region_dirty(4096, 1024); + ci_ref_test_on_modify( + mk_manifest({{512, {2048, 1024, "foo"}}, {4096, {0, 1024, "ttt"}}}), + mk_manifest({{0, {0, 1024, "bar"}}, {512, {2048, 1024, "foo"}}, {4096, {0, 1024, "ttt"}}}), + clean_regions, + mk_delta({{"bar", -1}})); +} + +TEST(chunk_info_test, calc_refs_modify_match_dirty_overlap) { + ObjectCleanRegions clean_regions(0, 8192, false); + clean_regions.mark_data_region_dirty(0, 256); + clean_regions.mark_data_region_dirty(256, 4096); + ci_ref_test_on_modify( + mk_manifest({}), + mk_manifest({{0, {0, 256, "bar"}}, {512, {2048, 1024, "foo"}}, {4096, {0, 1024, "ttt"}}}), + clean_regions, + mk_delta({{"bar", -1}, {"foo", -1}, {"ttt", -1}})); +} + +TEST(chunk_info_test, calc_refs_modify_match_dirty_overlap2) { + ObjectCleanRegions clean_regions(0, 8192, false); + clean_regions.mark_data_region_dirty(0, 256); + clean_regions.mark_data_region_dirty(256, 1024); + clean_regions.mark_data_region_dirty(3584, 1024); + ci_ref_test_on_modify( + mk_manifest({{512, {2048, 1024, "foo"}}, {4096, {0, 1024, "ttt"}}}), + mk_manifest({{0, {0, 256, "bar"}}, {512, {2048, 1024, "foo"}}, {4096, {0, 1024, "ttt"}}}), + clean_regions, + mk_delta({{"bar", -1}})); +} + +TEST(chunk_info_test, calc_refs_modify_match_dirty_overlap3) { + ObjectCleanRegions clean_regions(0, 8192, false); + clean_regions.mark_data_region_dirty(0, 256); + clean_regions.mark_data_region_dirty(256, 4096); + ci_ref_test_on_modify( + mk_manifest({{512, {2048, 1024, "foo"}}, {4096, {0, 1024, "ttt"}}}), + mk_manifest({{0, {0, 256, "bar"}}, {512, {2048, 1024, "foo"}}, {4096, {0, 1024, "ttt"}}}), + clean_regions, + mk_delta({{"bar", -1}})); +} + +TEST(chunk_info_test, calc_refs_modify_match_clone_overlap) { + ObjectCleanRegions clean_regions(0, 8192, false); + clean_regions.mark_data_region_dirty(0, 256); + clean_regions.mark_data_region_dirty(256, 1024); + clean_regions.mark_data_region_dirty(3584, 1024); + ci_ref_test_on_modify( + mk_manifest({{512, {2048, 1024, "foo"}}, {4096, {0, 1024, "ttt"}}}), + mk_manifest({{0, {0, 256, "bar"}}, {256, {2048, 1024, "foo"}}, {3584, {0, 1024, "ttt"}}}), + clean_regions, + mk_delta({{"bar", -1}, {"foo", -1}, {"ttt", -1}})); +} + +TEST(chunk_info_test, calc_refs_modify_no_snap) { + ObjectCleanRegions clean_regions(0, 8192, false); + clean_regions.mark_data_region_dirty(0, 1024); + clean_regions.mark_data_region_dirty(512, 1024); + ci_ref_test_on_modify( + mk_manifest({}), + mk_manifest({{0, {0, 1024, "bar"}}, {512, {2048, 1024, "ttt"}}}), + clean_regions, + mk_delta({{"bar", -1}, {"ttt", -1}})); +} + +TEST(chunk_info_test, calc_refs_inc) { + ci_ref_test_inc_on_set( + mk_manifest({{256, {0, 256, "aaa"}}, {4096, {0, 1024, "foo"}}}), + mk_manifest({{1024, {0, 1024, "bar"}}}), + mk_manifest({{4096, {0, 1024, "foo"}}}), + mk_delta({{"bar", 1}})); +} + +TEST(chunk_info_test, calc_refs_inc2) { + ci_ref_test_inc_on_set( + mk_manifest({{512, {0, 1024, "aaa"}}, {4096, {0, 1024, "foo"}}}), + mk_manifest({{1024, {0, 1024, "bar"}}, {4096, {0, 1024, "bbb"}}}), + mk_manifest({{512, {0, 1024, "foo"}}}), + mk_delta({{"bar", 1}, {"bbb", 1}})); +} + +TEST(chunk_info_test, calc_refs_inc_no_l) { + ci_ref_test_inc_on_set( + mk_manifest({}), + mk_manifest({{1024, {0, 1024, "bar"}}, {4096, {0, 1024, "bbb"}}}), + mk_manifest({{512, {0, 1024, "foo"}}}), + mk_delta({{"bar", 1}, {"bbb", 1}})); +} + +TEST(chunk_info_test, calc_refs_inc_no_g) { + ci_ref_test_inc_on_set( + mk_manifest({{512, {0, 1024, "aaa"}}, {4096, {0, 1024, "foo"}}}), + mk_manifest({{1024, {0, 1024, "bar"}}, {4096, {0, 1024, "foo"}}}), + mk_manifest({}), + mk_delta({{"bar", 1}})); +} + +TEST(chunk_info_test, calc_refs_inc_match_g_l) { + ci_ref_test_inc_on_set( + mk_manifest({{256, {0, 256, "aaa"}}, {4096, {0, 1024, "foo"}}}), + mk_manifest({{256, {0, 256, "aaa"}}, {4096, {0, 1024, "foo"}}}), + mk_manifest({{256, {0, 256, "aaa"}}, {4096, {0, 1024, "foo"}}}), + mk_delta({{"aaa", -1}, {"foo", -1}})); +} + +TEST(chunk_info_test, calc_refs_inc_match) { + ci_ref_test_inc_on_set( + mk_manifest({{256, {0, 256, "bbb"}}, {4096, {0, 1024, "foo"}}}), + mk_manifest({{256, {0, 256, "aaa"}}, {4096, {0, 1024, "foo"}}}), + mk_manifest({{256, {0, 256, "aaa"}}, {4096, {0, 1024, "ccc"}}}), + mk_delta({})); +} + +/* + * Local Variables: + * compile-command: "cd ../.. ; + * make unittest_osd_types ; + * ./unittest_osd_types # --gtest_filter=pg_missing_t.constructor + * " + * End: + */ |