summaryrefslogtreecommitdiffstats
path: root/src/test/osd
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/test/osd/CMakeLists.txt116
-rw-r--r--src/test/osd/Object.cc205
-rw-r--r--src/test/osd/Object.h522
-rw-r--r--src/test/osd/RadosModel.cc36
-rw-r--r--src/test/osd/RadosModel.h3121
-rw-r--r--src/test/osd/TestECBackend.cc60
-rw-r--r--src/test/osd/TestMClockClientQueue.cc184
-rw-r--r--src/test/osd/TestMClockOpClassQueue.cc184
-rw-r--r--src/test/osd/TestOSDMap.cc1575
-rw-r--r--src/test/osd/TestOSDScrub.cc147
-rw-r--r--src/test/osd/TestOpStat.cc61
-rw-r--r--src/test/osd/TestOpStat.h53
-rw-r--r--src/test/osd/TestPGLog.cc3243
-rw-r--r--src/test/osd/TestRados.cc660
-rw-r--r--src/test/osd/hitset.cc197
-rw-r--r--src/test/osd/osdcap.cc1353
-rwxr-xr-xsrc/test/osd/safe-to-destroy.sh99
-rw-r--r--src/test/osd/test_ec_transaction.cc124
-rw-r--r--src/test/osd/test_extent_cache.cc280
-rw-r--r--src/test/osd/test_pg_transaction.cc129
-rw-r--r--src/test/osd/types.cc1830
-rw-r--r--src/test/osdc/CMakeLists.txt13
-rw-r--r--src/test/osdc/FakeWriteback.cc94
-rw-r--r--src/test/osdc/FakeWriteback.h48
-rw-r--r--src/test/osdc/MemWriteback.cc168
-rw-r--r--src/test/osdc/MemWriteback.h53
-rw-r--r--src/test/osdc/object_cacher_stress.cc426
27 files changed, 14981 insertions, 0 deletions
diff --git a/src/test/osd/CMakeLists.txt b/src/test/osd/CMakeLists.txt
new file mode 100644
index 00000000..64639d5e
--- /dev/null
+++ b/src/test/osd/CMakeLists.txt
@@ -0,0 +1,116 @@
+# test_rados
+add_executable(ceph_test_rados
+ TestRados.cc
+ TestOpStat.cc
+ Object.cc
+ RadosModel.cc
+ )
+target_link_libraries(ceph_test_rados
+ librados
+ global
+ ${BLKID_LIBRARIES}
+ ${CMAKE_DL_LIBS}
+ ${EXTRALIBS}
+ ${CMAKE_DL_LIBS}
+ )
+install(TARGETS
+ ceph_test_rados
+ DESTINATION ${CMAKE_INSTALL_BINDIR})
+
+# scripts
+add_ceph_test(safe-to-destroy.sh ${CMAKE_CURRENT_SOURCE_DIR}/safe-to-destroy.sh)
+
+# unittest_osdmap
+add_executable(unittest_osdmap
+ TestOSDMap.cc
+ )
+add_ceph_unittest(unittest_osdmap)
+target_link_libraries(unittest_osdmap global ${BLKID_LIBRARIES})
+
+# unittest_osd_types
+add_executable(unittest_osd_types
+ types.cc
+ )
+add_ceph_unittest(unittest_osd_types)
+target_link_libraries(unittest_osd_types global)
+
+# unittest_ecbackend
+add_executable(unittest_ecbackend
+ TestECBackend.cc
+ )
+add_ceph_unittest(unittest_ecbackend)
+target_link_libraries(unittest_ecbackend osd global)
+
+# unittest_osdscrub
+add_executable(unittest_osdscrub
+ TestOSDScrub.cc
+ $<TARGET_OBJECTS:unit-main>
+ )
+add_ceph_unittest(unittest_osdscrub)
+target_link_libraries(unittest_osdscrub osd os global ${CMAKE_DL_LIBS} mon ${BLKID_LIBRARIES})
+
+# unittest_pglog
+add_executable(unittest_pglog
+ TestPGLog.cc
+ $<TARGET_OBJECTS:unit-main>
+ $<TARGET_OBJECTS:store_test_fixture>
+ )
+add_ceph_unittest(unittest_pglog)
+target_link_libraries(unittest_pglog osd os global ${CMAKE_DL_LIBS} ${BLKID_LIBRARIES})
+
+# unittest_hitset
+add_executable(unittest_hitset
+ hitset.cc
+ )
+add_ceph_unittest(unittest_hitset)
+target_link_libraries(unittest_hitset osd global ${BLKID_LIBRARIES})
+
+# unittest_osd_osdcap
+add_executable(unittest_osd_osdcap
+ osdcap.cc
+)
+if(HAS_VTA)
+ set_source_files_properties(osdcap.cc PROPERTIES
+ COMPILE_FLAGS -fno-var-tracking-assignments)
+endif()
+add_ceph_unittest(unittest_osd_osdcap)
+target_link_libraries(unittest_osd_osdcap osd global ${BLKID_LIBRARIES})
+
+# unittest ExtentCache
+add_executable(unittest_extent_cache
+ test_extent_cache.cc
+)
+add_ceph_unittest(unittest_extent_cache)
+target_link_libraries(unittest_extent_cache osd global ${BLKID_LIBRARIES})
+
+# unittest PGTransaction
+add_executable(unittest_pg_transaction
+ test_pg_transaction.cc
+)
+add_ceph_unittest(unittest_pg_transaction)
+target_link_libraries(unittest_pg_transaction osd global ${BLKID_LIBRARIES})
+
+# unittest ECTransaction
+add_executable(unittest_ec_transaction
+ test_ec_transaction.cc
+)
+add_ceph_unittest(unittest_ec_transaction)
+target_link_libraries(unittest_ec_transaction osd global ${BLKID_LIBRARIES})
+
+# unittest_mclock_op_class_queue
+add_executable(unittest_mclock_op_class_queue
+ TestMClockOpClassQueue.cc
+)
+add_ceph_unittest(unittest_mclock_op_class_queue)
+target_link_libraries(unittest_mclock_op_class_queue
+ global osd dmclock os
+)
+
+# unittest_mclock_client_queue
+add_executable(unittest_mclock_client_queue
+ TestMClockClientQueue.cc
+)
+add_ceph_unittest(unittest_mclock_client_queue)
+target_link_libraries(unittest_mclock_client_queue
+ global osd dmclock os
+)
diff --git a/src/test/osd/Object.cc b/src/test/osd/Object.cc
new file mode 100644
index 00000000..12f946f5
--- /dev/null
+++ b/src/test/osd/Object.cc
@@ -0,0 +1,205 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+#include "include/interval_set.h"
+#include "include/buffer.h"
+#include <list>
+#include <map>
+#include <set>
+#include <iostream>
+
+#include "Object.h"
+
+void ContDesc::encode(bufferlist &bl) const
+{
+ ENCODE_START(1, 1, bl);
+ encode(objnum, bl);
+ encode(cursnap, bl);
+ encode(seqnum, bl);
+ encode(prefix, bl);
+ encode(oid, bl);
+ ENCODE_FINISH(bl);
+}
+
+void ContDesc::decode(bufferlist::const_iterator &bl)
+{
+ DECODE_START(1, bl);
+ decode(objnum, bl);
+ decode(cursnap, bl);
+ decode(seqnum, bl);
+ decode(prefix, bl);
+ decode(oid, bl);
+ DECODE_FINISH(bl);
+}
+
+std::ostream &operator<<(std::ostream &out, const ContDesc &rhs)
+{
+ return out << "(ObjNum " << rhs.objnum
+ << " snap " << rhs.cursnap
+ << " seq_num " << rhs.seqnum
+ << ")";
+}
+
+void AppendGenerator::get_ranges_map(
+ const ContDesc &cont, std::map<uint64_t, uint64_t> &out) {
+ RandWrap rand(cont.seqnum);
+ uint64_t pos = off;
+ uint64_t limit = off + get_append_size(cont);
+ while (pos < limit) {
+ uint64_t segment_length = round_up(
+ rand() % (max_append_size - min_append_size),
+ alignment) + min_append_size;
+ ceph_assert(segment_length >= min_append_size);
+ if (segment_length + pos > limit) {
+ segment_length = limit - pos;
+ }
+ if (alignment)
+ ceph_assert(segment_length % alignment == 0);
+ out.insert(std::pair<uint64_t, uint64_t>(pos, segment_length));
+ pos += segment_length;
+ }
+}
+
+void VarLenGenerator::get_ranges_map(
+ const ContDesc &cont, std::map<uint64_t, uint64_t> &out) {
+ RandWrap rand(cont.seqnum);
+ uint64_t pos = 0;
+ uint64_t limit = get_length(cont);
+ bool include = false;
+ while (pos < limit) {
+ uint64_t segment_length = (rand() % (max_stride_size - min_stride_size)) + min_stride_size;
+ ceph_assert(segment_length < max_stride_size);
+ ceph_assert(segment_length >= min_stride_size);
+ if (segment_length + pos > limit) {
+ segment_length = limit - pos;
+ }
+ if (include) {
+ out.insert(std::pair<uint64_t, uint64_t>(pos, segment_length));
+ include = false;
+ } else {
+ include = true;
+ }
+ pos += segment_length;
+ }
+}
+
+void ObjectDesc::iterator::adjust_stack() {
+ while (!stack.empty() && pos >= stack.front().second.next) {
+ ceph_assert(pos == stack.front().second.next);
+ size = stack.front().second.size;
+ current = stack.front().first;
+ stack.pop_front();
+ }
+
+ if (stack.empty()) {
+ cur_valid_till = std::numeric_limits<uint64_t>::max();
+ } else {
+ cur_valid_till = stack.front().second.next;
+ }
+
+ while (current != layers.end() && !current->covers(pos)) {
+ uint64_t next = current->next(pos);
+ if (next < cur_valid_till) {
+ stack.push_front(
+ make_pair(
+ current,
+ StackState{next, size}
+ )
+ );
+ cur_valid_till = next;
+ }
+
+ ++current;
+ }
+
+ if (current == layers.end()) {
+ size = 0;
+ } else {
+ current->iter.seek(pos);
+ size = std::min(size, current->get_size());
+ cur_valid_till = std::min(
+ current->valid_till(pos),
+ cur_valid_till);
+ }
+}
+
+const ContDesc &ObjectDesc::most_recent() {
+ return layers.begin()->second;
+}
+
+void ObjectDesc::update(ContentsGenerator *gen, const ContDesc &next) {
+ layers.push_front(std::pair<std::shared_ptr<ContentsGenerator>, ContDesc>(std::shared_ptr<ContentsGenerator>(gen), next));
+ return;
+}
+
+bool ObjectDesc::check(bufferlist &to_check) {
+ iterator objiter = begin();
+ uint64_t error_at = 0;
+ if (!objiter.check_bl_advance(to_check, &error_at)) {
+ std::cout << "incorrect buffer at pos " << error_at << std::endl;
+ return false;
+ }
+
+ uint64_t size = layers.begin()->first->get_length(layers.begin()->second);
+ if (to_check.length() < size) {
+ std::cout << "only read " << to_check.length()
+ << " out of size " << size << std::endl;
+ return false;
+ }
+ return true;
+}
+
+bool ObjectDesc::check_sparse(const std::map<uint64_t, uint64_t>& extents,
+ bufferlist &to_check)
+{
+ uint64_t off = 0;
+ uint64_t pos = 0;
+ auto objiter = begin();
+ for (auto &&extiter : extents) {
+ // verify hole
+ {
+ bufferlist bl;
+ bl.append_zero(extiter.first - pos);
+ uint64_t error_at = 0;
+ if (!objiter.check_bl_advance(bl, &error_at)) {
+ std::cout << "sparse read omitted non-zero data at "
+ << error_at << std::endl;
+ return false;
+ }
+ }
+
+ ceph_assert(off <= to_check.length());
+ pos = extiter.first;
+ objiter.seek(pos);
+
+ {
+ bufferlist bl;
+ bl.substr_of(
+ to_check,
+ off,
+ std::min(to_check.length() - off, extiter.second));
+ uint64_t error_at = 0;
+ if (!objiter.check_bl_advance(bl, &error_at)) {
+ std::cout << "incorrect buffer at pos " << error_at << std::endl;
+ return false;
+ }
+ off += extiter.second;
+ pos += extiter.second;
+ }
+
+ if (pos < extiter.first + extiter.second) {
+ std::cout << "reached end of iterator first" << std::endl;
+ return false;
+ }
+ }
+
+ // final hole
+ bufferlist bl;
+ uint64_t size = layers.begin()->first->get_length(layers.begin()->second);
+ bl.append_zero(size - pos);
+ uint64_t error_at;
+ if (!objiter.check_bl_advance(bl, &error_at)) {
+ std::cout << "sparse read omitted non-zero data at "
+ << error_at << std::endl;
+ return false;
+ }
+ return true;
+}
diff --git a/src/test/osd/Object.h b/src/test/osd/Object.h
new file mode 100644
index 00000000..b0ff84d1
--- /dev/null
+++ b/src/test/osd/Object.h
@@ -0,0 +1,522 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+#include "include/interval_set.h"
+#include "include/buffer.h"
+#include "include/encoding.h"
+#include <list>
+#include <map>
+#include <set>
+#include <random>
+
+#ifndef OBJECT_H
+#define OBJECT_H
+
+class ContDesc {
+public:
+ int objnum;
+ int cursnap;
+ unsigned seqnum;
+ std::string prefix;
+ std::string oid;
+
+ ContDesc() :
+ objnum(0), cursnap(0),
+ seqnum(0), prefix("") {}
+
+ ContDesc(int objnum,
+ int cursnap,
+ unsigned seqnum,
+ const std::string &prefix) :
+ objnum(objnum), cursnap(cursnap),
+ seqnum(seqnum), prefix(prefix) {}
+
+ bool operator==(const ContDesc &rhs) {
+ return (rhs.objnum == objnum &&
+ rhs.cursnap == cursnap &&
+ rhs.seqnum == seqnum &&
+ rhs.prefix == prefix &&
+ rhs.oid == oid);
+ }
+
+ bool operator<(const ContDesc &rhs) const {
+ return seqnum < rhs.seqnum;
+ }
+
+ bool operator!=(const ContDesc &rhs) {
+ return !((*this) == rhs);
+ }
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::const_iterator &bp);
+};
+WRITE_CLASS_ENCODER(ContDesc)
+
+std::ostream &operator<<(std::ostream &out, const ContDesc &rhs);
+
+class ChunkDesc {
+public:
+ uint32_t offset;
+ uint32_t length;
+ std::string oid;
+};
+
+class ContentsGenerator {
+public:
+
+ class iterator_impl {
+ public:
+ virtual char operator*() = 0;
+ virtual iterator_impl &operator++() = 0;
+ virtual void seek(uint64_t pos) = 0;
+ virtual bool end() = 0;
+ virtual ContDesc get_cont() const = 0;
+ virtual uint64_t get_pos() const = 0;
+ virtual bufferlist gen_bl_advance(uint64_t s) {
+ bufferptr ret = buffer::create(s);
+ for (uint64_t i = 0; i < s; ++i, ++(*this)) {
+ ret[i] = **this;
+ }
+ bufferlist _ret;
+ _ret.push_back(ret);
+ return _ret;
+ }
+ virtual bool check_bl_advance(bufferlist &bl, uint64_t *off = nullptr) {
+ uint64_t _off = 0;
+ for (bufferlist::iterator i = bl.begin();
+ !i.end();
+ ++i, ++_off, ++(*this)) {
+ if (*i != **this) {
+ if (off)
+ *off = _off;
+ return false;
+ }
+ }
+ return true;
+ }
+ virtual ~iterator_impl() {};
+ };
+
+ class iterator {
+ public:
+ ContentsGenerator *parent;
+ iterator_impl *impl;
+ char operator *() { return **impl; }
+ iterator &operator++() { ++(*impl); return *this; };
+ void seek(uint64_t pos) { impl->seek(pos); }
+ bool end() { return impl->end(); }
+ ~iterator() { parent->put_iterator_impl(impl); }
+ iterator(const iterator &rhs) : parent(rhs.parent) {
+ impl = parent->dup_iterator_impl(rhs.impl);
+ }
+ iterator &operator=(const iterator &rhs) {
+ iterator new_iter(rhs);
+ swap(new_iter);
+ return *this;
+ }
+ void swap(iterator &other) {
+ ContentsGenerator *otherparent = other.parent;
+ other.parent = parent;
+ parent = otherparent;
+
+ iterator_impl *otherimpl = other.impl;
+ other.impl = impl;
+ impl = otherimpl;
+ }
+ bufferlist gen_bl_advance(uint64_t s) {
+ return impl->gen_bl_advance(s);
+ }
+ bool check_bl_advance(bufferlist &bl, uint64_t *off = nullptr) {
+ return impl->check_bl_advance(bl, off);
+ }
+ iterator(ContentsGenerator *parent, iterator_impl *impl) :
+ parent(parent), impl(impl) {}
+ };
+
+ virtual uint64_t get_length(const ContDesc &in) = 0;
+
+ virtual void get_ranges_map(
+ const ContDesc &cont, std::map<uint64_t, uint64_t> &out) = 0;
+ void get_ranges(const ContDesc &cont, interval_set<uint64_t> &out) {
+ std::map<uint64_t, uint64_t> ranges;
+ get_ranges_map(cont, ranges);
+ for (std::map<uint64_t, uint64_t>::iterator i = ranges.begin();
+ i != ranges.end();
+ ++i) {
+ out.insert(i->first, i->second);
+ }
+ }
+
+
+ virtual iterator_impl *get_iterator_impl(const ContDesc &in) = 0;
+
+ virtual iterator_impl *dup_iterator_impl(const iterator_impl *in) = 0;
+
+ virtual void put_iterator_impl(iterator_impl *in) = 0;
+
+ virtual ~ContentsGenerator() {};
+
+ iterator get_iterator(const ContDesc &in) {
+ return iterator(this, get_iterator_impl(in));
+ }
+};
+
+class RandGenerator : public ContentsGenerator {
+public:
+ typedef std::minstd_rand0 RandWrap;
+
+ class iterator_impl : public ContentsGenerator::iterator_impl {
+ public:
+ uint64_t pos;
+ ContDesc cont;
+ RandWrap rand;
+ RandGenerator *cont_gen;
+ char current;
+ iterator_impl(const ContDesc &cont, RandGenerator *cont_gen) :
+ pos(0), cont(cont), rand(cont.seqnum), cont_gen(cont_gen) {
+ current = rand();
+ }
+
+ ContDesc get_cont() const override { return cont; }
+ uint64_t get_pos() const override { return pos; }
+
+ iterator_impl &operator++() override {
+ pos++;
+ current = rand();
+ return *this;
+ }
+
+ char operator*() override {
+ return current;
+ }
+
+ void seek(uint64_t _pos) override {
+ if (_pos < pos) {
+ iterator_impl begin = iterator_impl(cont, cont_gen);
+ begin.seek(_pos);
+ *this = begin;
+ }
+ while (pos < _pos) {
+ ++(*this);
+ }
+ }
+
+ bool end() override {
+ return pos >= cont_gen->get_length(cont);
+ }
+ };
+
+ ContentsGenerator::iterator_impl *get_iterator_impl(const ContDesc &in) override {
+ RandGenerator::iterator_impl *i = new iterator_impl(in, this);
+ return i;
+ }
+
+ void put_iterator_impl(ContentsGenerator::iterator_impl *in) override {
+ delete in;
+ }
+
+ ContentsGenerator::iterator_impl *dup_iterator_impl(
+ const ContentsGenerator::iterator_impl *in) override {
+ ContentsGenerator::iterator_impl *retval = get_iterator_impl(in->get_cont());
+ retval->seek(in->get_pos());
+ return retval;
+ }
+};
+
+class VarLenGenerator : public RandGenerator {
+ uint64_t max_length;
+ uint64_t min_stride_size;
+ uint64_t max_stride_size;
+public:
+ VarLenGenerator(
+ uint64_t length, uint64_t min_stride_size, uint64_t max_stride_size) :
+ max_length(length),
+ min_stride_size(min_stride_size),
+ max_stride_size(max_stride_size) {}
+ void get_ranges_map(
+ const ContDesc &cont, std::map<uint64_t, uint64_t> &out) override;
+ uint64_t get_length(const ContDesc &in) override {
+ RandWrap rand(in.seqnum);
+ if (max_length == 0)
+ return 0;
+ return (rand() % (max_length/2)) + ((max_length - 1)/2) + 1;
+ }
+};
+
+class AttrGenerator : public RandGenerator {
+ uint64_t max_len;
+ uint64_t big_max_len;
+public:
+ AttrGenerator(uint64_t max_len, uint64_t big_max_len)
+ : max_len(max_len), big_max_len(big_max_len) {}
+ void get_ranges_map(
+ const ContDesc &cont, std::map<uint64_t, uint64_t> &out) override {
+ out.insert(std::pair<uint64_t, uint64_t>(0, get_length(cont)));
+ }
+ uint64_t get_length(const ContDesc &in) override {
+ RandWrap rand(in.seqnum);
+ // make some attrs big
+ if (in.seqnum & 3)
+ return (rand() % max_len);
+ else
+ return (rand() % big_max_len);
+ }
+ bufferlist gen_bl(const ContDesc &in) {
+ bufferlist bl;
+ for (iterator i = get_iterator(in); !i.end(); ++i) {
+ bl.append(*i);
+ }
+ ceph_assert(bl.length() < big_max_len);
+ return bl;
+ }
+};
+
+class AppendGenerator : public RandGenerator {
+ uint64_t off;
+ uint64_t alignment;
+ uint64_t min_append_size;
+ uint64_t max_append_size;
+ uint64_t max_append_total;
+
+ uint64_t round_up(uint64_t in, uint64_t by) {
+ if (by)
+ in += (by - (in % by));
+ return in;
+ }
+
+public:
+ AppendGenerator(
+ uint64_t off,
+ uint64_t alignment,
+ uint64_t min_append_size,
+ uint64_t _max_append_size,
+ uint64_t max_append_multiple) :
+ off(off), alignment(alignment),
+ min_append_size(round_up(min_append_size, alignment)),
+ max_append_size(round_up(_max_append_size, alignment)) {
+ if (_max_append_size == min_append_size)
+ max_append_size += alignment;
+ max_append_total = max_append_multiple * max_append_size;
+ }
+ uint64_t get_append_size(const ContDesc &in) {
+ RandWrap rand(in.seqnum);
+ return round_up(rand() % max_append_total, alignment);
+ }
+ uint64_t get_length(const ContDesc &in) override {
+ return off + get_append_size(in);
+ }
+ void get_ranges_map(
+ const ContDesc &cont, std::map<uint64_t, uint64_t> &out) override;
+};
+
+class ObjectDesc {
+public:
+ ObjectDesc()
+ : exists(false), dirty(false),
+ version(0) {}
+ ObjectDesc(const ContDesc &init, ContentsGenerator *cont_gen)
+ : exists(false), dirty(false),
+ version(0) {
+ layers.push_front(std::pair<std::shared_ptr<ContentsGenerator>, ContDesc>(std::shared_ptr<ContentsGenerator>(cont_gen), init));
+ }
+
+ class iterator {
+ public:
+ uint64_t pos;
+ uint64_t size;
+ uint64_t cur_valid_till;
+
+ class ContState {
+ interval_set<uint64_t> ranges;
+ const uint64_t size;
+
+ public:
+ ContDesc cont;
+ std::shared_ptr<ContentsGenerator> gen;
+ ContentsGenerator::iterator iter;
+
+ ContState(
+ const ContDesc &_cont,
+ std::shared_ptr<ContentsGenerator> _gen,
+ ContentsGenerator::iterator _iter)
+ : size(_gen->get_length(_cont)), cont(_cont), gen(_gen), iter(_iter) {
+ gen->get_ranges(cont, ranges);
+ }
+
+ const interval_set<uint64_t> &get_ranges() {
+ return ranges;
+ }
+
+ uint64_t get_size() {
+ return gen->get_length(cont);
+ }
+
+ bool covers(uint64_t pos) {
+ return ranges.contains(pos) || (!ranges.starts_after(pos) && pos >= size);
+ }
+
+ uint64_t next(uint64_t pos) {
+ ceph_assert(!covers(pos));
+ return ranges.starts_after(pos) ? ranges.start_after(pos) : size;
+ }
+
+ uint64_t valid_till(uint64_t pos) {
+ ceph_assert(covers(pos));
+ return ranges.contains(pos) ?
+ ranges.end_after(pos) :
+ std::numeric_limits<uint64_t>::max();
+ }
+ };
+ std::list<ContState> layers;
+
+ struct StackState {
+ const uint64_t next;
+ const uint64_t size;
+ };
+ std::list<std::pair<std::list<ContState>::iterator, StackState> > stack;
+ std::list<ContState>::iterator current;
+
+ explicit iterator(ObjectDesc &obj) :
+ pos(0),
+ size(obj.layers.begin()->first->get_length(obj.layers.begin()->second)),
+ cur_valid_till(0) {
+ for (auto &&i : obj.layers) {
+ layers.push_back({i.second, i.first, i.first->get_iterator(i.second)});
+ }
+ current = layers.begin();
+
+ adjust_stack();
+ }
+
+ void adjust_stack();
+ iterator &operator++() {
+ ceph_assert(cur_valid_till >= pos);
+ ++pos;
+ if (pos >= cur_valid_till) {
+ adjust_stack();
+ }
+ return *this;
+ }
+
+ char operator*() {
+ if (current == layers.end()) {
+ return '\0';
+ } else {
+ return pos >= size ? '\0' : *(current->iter);
+ }
+ }
+
+ bool end() {
+ return pos >= size;
+ }
+
+ void seek(uint64_t _pos) {
+ if (_pos < pos) {
+ ceph_abort();
+ }
+ while (pos < _pos) {
+ ceph_assert(cur_valid_till >= pos);
+ uint64_t next = std::min(_pos - pos, cur_valid_till - pos);
+ pos += next;
+
+ if (pos >= cur_valid_till) {
+ ceph_assert(pos == cur_valid_till);
+ adjust_stack();
+ }
+ }
+ ceph_assert(pos == _pos);
+ }
+
+ bufferlist gen_bl_advance(uint64_t s) {
+ bufferlist ret;
+ while (s > 0) {
+ ceph_assert(cur_valid_till >= pos);
+ uint64_t next = std::min(s, cur_valid_till - pos);
+ if (current != layers.end() && pos < size) {
+ ret.append(current->iter.gen_bl_advance(next));
+ } else {
+ ret.append_zero(next);
+ }
+
+ pos += next;
+ ceph_assert(next <= s);
+ s -= next;
+
+ if (pos >= cur_valid_till) {
+ ceph_assert(cur_valid_till == pos);
+ adjust_stack();
+ }
+ }
+ return ret;
+ }
+
+ bool check_bl_advance(bufferlist &bl, uint64_t *error_at = nullptr) {
+ uint64_t off = 0;
+ while (off < bl.length()) {
+ ceph_assert(cur_valid_till >= pos);
+ uint64_t next = std::min(bl.length() - off, cur_valid_till - pos);
+
+ bufferlist to_check;
+ to_check.substr_of(bl, off, next);
+ if (current != layers.end() && pos < size) {
+ if (!current->iter.check_bl_advance(to_check, error_at)) {
+ if (error_at)
+ *error_at += off;
+ return false;
+ }
+ } else {
+ uint64_t at = pos;
+ for (auto i = to_check.begin(); !i.end(); ++i, ++at) {
+ if (*i) {
+ if (error_at)
+ *error_at = at;
+ return false;
+ }
+ }
+ }
+
+ pos += next;
+ off += next;
+ ceph_assert(off <= bl.length());
+
+ if (pos >= cur_valid_till) {
+ ceph_assert(cur_valid_till == pos);
+ adjust_stack();
+ }
+ }
+ ceph_assert(off == bl.length());
+ return true;
+ }
+ };
+
+ iterator begin() {
+ return iterator(*this);
+ }
+
+ bool deleted() {
+ return !exists;
+ }
+
+ bool has_contents() {
+ return layers.size();
+ }
+
+ // takes ownership of gen
+ void update(ContentsGenerator *gen, const ContDesc &next);
+ bool check(bufferlist &to_check);
+ bool check_sparse(const std::map<uint64_t, uint64_t>& extends,
+ bufferlist &to_check);
+ const ContDesc &most_recent();
+ ContentsGenerator *most_recent_gen() {
+ return layers.begin()->first.get();
+ }
+ std::map<std::string, ContDesc> attrs; // Both omap and xattrs
+ bufferlist header;
+ bool exists;
+ bool dirty;
+
+ uint64_t version;
+ std::string redirect_target;
+ std::map<uint64_t, ChunkDesc> chunk_info;
+private:
+ std::list<std::pair<std::shared_ptr<ContentsGenerator>, ContDesc> > layers;
+};
+
+#endif
diff --git a/src/test/osd/RadosModel.cc b/src/test/osd/RadosModel.cc
new file mode 100644
index 00000000..501bf3b1
--- /dev/null
+++ b/src/test/osd/RadosModel.cc
@@ -0,0 +1,36 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#include "include/interval_set.h"
+#include "include/buffer.h"
+#include <list>
+#include <map>
+#include <set>
+#include "include/rados/librados.h"
+#include "RadosModel.h"
+#include "TestOpStat.h"
+
+
+void TestOp::begin()
+{
+ _begin();
+}
+
+void TestOp::finish(TestOp::CallbackInfo *info)
+{
+ _finish(info);
+}
+
+void read_callback(librados::completion_t comp, void *arg) {
+ TestOp* op = static_cast<TestOp*>(arg);
+ op->finish(NULL);
+}
+
+void write_callback(librados::completion_t comp, void *arg) {
+ std::pair<TestOp*, TestOp::CallbackInfo*> *args =
+ static_cast<std::pair<TestOp*, TestOp::CallbackInfo*> *>(arg);
+ TestOp* op = args->first;
+ TestOp::CallbackInfo *info = args->second;
+ op->finish(info);
+ delete args;
+ delete info;
+}
diff --git a/src/test/osd/RadosModel.h b/src/test/osd/RadosModel.h
new file mode 100644
index 00000000..f7cac8e1
--- /dev/null
+++ b/src/test/osd/RadosModel.h
@@ -0,0 +1,3121 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#include "include/int_types.h"
+
+#include "common/Mutex.h"
+#include "common/Cond.h"
+#include "include/rados/librados.hpp"
+
+#include <iostream>
+#include <sstream>
+#include <map>
+#include <set>
+#include <list>
+#include <string>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <time.h>
+#include "Object.h"
+#include "TestOpStat.h"
+#include "test/librados/test.h"
+#include "common/sharedptr_registry.hpp"
+#include "common/errno.h"
+#include "osd/HitSet.h"
+
+#ifndef RADOSMODEL_H
+#define RADOSMODEL_H
+
+using namespace std;
+
+class RadosTestContext;
+class TestOpStat;
+
+template <typename T>
+typename T::iterator rand_choose(T &cont) {
+ if (cont.size() == 0) {
+ return cont.end();
+ }
+ int index = rand() % cont.size();
+ typename T::iterator retval = cont.begin();
+
+ for (; index > 0; --index) ++retval;
+ return retval;
+}
+
+enum TestOpType {
+ TEST_OP_READ,
+ TEST_OP_WRITE,
+ TEST_OP_WRITE_EXCL,
+ TEST_OP_WRITESAME,
+ TEST_OP_DELETE,
+ TEST_OP_SNAP_CREATE,
+ TEST_OP_SNAP_REMOVE,
+ TEST_OP_ROLLBACK,
+ TEST_OP_SETATTR,
+ TEST_OP_RMATTR,
+ TEST_OP_WATCH,
+ TEST_OP_COPY_FROM,
+ TEST_OP_HIT_SET_LIST,
+ TEST_OP_UNDIRTY,
+ TEST_OP_IS_DIRTY,
+ TEST_OP_CACHE_FLUSH,
+ TEST_OP_CACHE_TRY_FLUSH,
+ TEST_OP_CACHE_EVICT,
+ TEST_OP_APPEND,
+ TEST_OP_APPEND_EXCL,
+ TEST_OP_SET_REDIRECT,
+ TEST_OP_UNSET_REDIRECT,
+ TEST_OP_CHUNK_READ,
+ TEST_OP_TIER_PROMOTE
+};
+
+class TestWatchContext : public librados::WatchCtx2 {
+ TestWatchContext(const TestWatchContext&);
+public:
+ Cond cond;
+ uint64_t handle;
+ bool waiting;
+ Mutex lock;
+ TestWatchContext() : handle(0), waiting(false),
+ lock("watch lock") {}
+ void handle_notify(uint64_t notify_id, uint64_t cookie,
+ uint64_t notifier_id,
+ bufferlist &bl) override {
+ Mutex::Locker l(lock);
+ waiting = false;
+ cond.SignalAll();
+ }
+ void handle_error(uint64_t cookie, int err) override {
+ Mutex::Locker l(lock);
+ cout << "watch handle_error " << err << std::endl;
+ }
+ void start() {
+ Mutex::Locker l(lock);
+ waiting = true;
+ }
+ void wait() {
+ Mutex::Locker l(lock);
+ while (waiting)
+ cond.Wait(lock);
+ }
+ uint64_t &get_handle() {
+ return handle;
+ }
+};
+
+class TestOp {
+public:
+ int num;
+ RadosTestContext *context;
+ TestOpStat *stat;
+ bool done;
+ TestOp(int n, RadosTestContext *context,
+ TestOpStat *stat = 0)
+ : num(n),
+ context(context),
+ stat(stat),
+ done(false)
+ {}
+
+ virtual ~TestOp() {};
+
+ /**
+ * This struct holds data to be passed by a callback
+ * to a TestOp::finish method.
+ */
+ struct CallbackInfo {
+ uint64_t id;
+ explicit CallbackInfo(uint64_t id) : id(id) {}
+ virtual ~CallbackInfo() {};
+ };
+
+ virtual void _begin() = 0;
+
+ /**
+ * Called when the operation completes.
+ * This should be overridden by asynchronous operations.
+ *
+ * @param info information stored by a callback, or NULL -
+ * useful for multi-operation TestOps
+ */
+ virtual void _finish(CallbackInfo *info)
+ {
+ return;
+ }
+ virtual string getType() = 0;
+ virtual bool finished()
+ {
+ return true;
+ }
+
+ void begin();
+ void finish(CallbackInfo *info);
+ virtual bool must_quiesce_other_ops() { return false; }
+};
+
+class TestOpGenerator {
+public:
+ virtual ~TestOpGenerator() {};
+ virtual TestOp *next(RadosTestContext &context) = 0;
+};
+
+class RadosTestContext {
+public:
+ Mutex state_lock;
+ Cond wait_cond;
+ map<int, map<string,ObjectDesc> > pool_obj_cont;
+ set<string> oid_in_use;
+ set<string> oid_not_in_use;
+ set<string> oid_flushing;
+ set<string> oid_not_flushing;
+ set<string> oid_redirect_not_in_use;
+ set<string> oid_redirect_in_use;
+ SharedPtrRegistry<int, int> snaps_in_use;
+ int current_snap;
+ string pool_name;
+ librados::IoCtx io_ctx;
+ librados::Rados rados;
+ int next_oid;
+ string prefix;
+ int errors;
+ int max_in_flight;
+ int seq_num;
+ map<int,uint64_t> snaps;
+ uint64_t seq;
+ const char *rados_id;
+ bool initialized;
+ map<string, TestWatchContext*> watches;
+ const uint64_t max_size;
+ const uint64_t min_stride_size;
+ const uint64_t max_stride_size;
+ AttrGenerator attr_gen;
+ const bool no_omap;
+ const bool no_sparse;
+ bool pool_snaps;
+ bool write_fadvise_dontneed;
+ string low_tier_pool_name;
+ librados::IoCtx low_tier_io_ctx;
+ int snapname_num;
+ map<string,string > redirect_objs;
+
+ RadosTestContext(const string &pool_name,
+ int max_in_flight,
+ uint64_t max_size,
+ uint64_t min_stride_size,
+ uint64_t max_stride_size,
+ bool no_omap,
+ bool no_sparse,
+ bool pool_snaps,
+ bool write_fadvise_dontneed,
+ const string &low_tier_pool_name,
+ const char *id = 0) :
+ state_lock("Context Lock"),
+ pool_obj_cont(),
+ current_snap(0),
+ pool_name(pool_name),
+ next_oid(0),
+ errors(0),
+ max_in_flight(max_in_flight),
+ seq_num(0), seq(0),
+ rados_id(id), initialized(false),
+ max_size(max_size),
+ min_stride_size(min_stride_size), max_stride_size(max_stride_size),
+ attr_gen(2000, 20000),
+ no_omap(no_omap),
+ no_sparse(no_sparse),
+ pool_snaps(pool_snaps),
+ write_fadvise_dontneed(write_fadvise_dontneed),
+ low_tier_pool_name(low_tier_pool_name),
+ snapname_num(0)
+ {
+ }
+
+ int init()
+ {
+ int r = rados.init(rados_id);
+ if (r < 0)
+ return r;
+ r = rados.conf_read_file(NULL);
+ if (r < 0)
+ return r;
+ r = rados.conf_parse_env(NULL);
+ if (r < 0)
+ return r;
+ r = rados.connect();
+ if (r < 0)
+ return r;
+ r = rados.ioctx_create(pool_name.c_str(), io_ctx);
+ if (r < 0) {
+ rados.shutdown();
+ return r;
+ }
+ if (!low_tier_pool_name.empty()) {
+ r = rados.ioctx_create(low_tier_pool_name.c_str(), low_tier_io_ctx);
+ if (r < 0) {
+ rados.shutdown();
+ return r;
+ }
+ }
+ bufferlist inbl;
+ r = rados.mon_command(
+ "{\"prefix\": \"osd pool set\", \"pool\": \"" + pool_name +
+ "\", \"var\": \"write_fadvise_dontneed\", \"val\": \"" + (write_fadvise_dontneed ? "true" : "false") + "\"}",
+ inbl, NULL, NULL);
+ if (r < 0) {
+ rados.shutdown();
+ return r;
+ }
+ char hostname_cstr[100];
+ gethostname(hostname_cstr, 100);
+ stringstream hostpid;
+ hostpid << hostname_cstr << getpid() << "-";
+ prefix = hostpid.str();
+ ceph_assert(!initialized);
+ initialized = true;
+ return 0;
+ }
+
+ void shutdown()
+ {
+ if (initialized) {
+ rados.shutdown();
+ }
+ }
+
+ void loop(TestOpGenerator *gen)
+ {
+ ceph_assert(initialized);
+ list<TestOp*> inflight;
+ state_lock.Lock();
+
+ TestOp *next = gen->next(*this);
+ TestOp *waiting = NULL;
+
+ while (next || !inflight.empty()) {
+ if (next && next->must_quiesce_other_ops() && !inflight.empty()) {
+ waiting = next;
+ next = NULL; // Force to wait for inflight to drain
+ }
+ if (next) {
+ inflight.push_back(next);
+ }
+ state_lock.Unlock();
+ if (next) {
+ (*inflight.rbegin())->begin();
+ }
+ state_lock.Lock();
+ while (1) {
+ for (list<TestOp*>::iterator i = inflight.begin();
+ i != inflight.end();) {
+ if ((*i)->finished()) {
+ cout << (*i)->num << ": done (" << (inflight.size()-1) << " left)" << std::endl;
+ delete *i;
+ inflight.erase(i++);
+ } else {
+ ++i;
+ }
+ }
+
+ if (inflight.size() >= (unsigned) max_in_flight || (!next && !inflight.empty())) {
+ cout << " waiting on " << inflight.size() << std::endl;
+ wait();
+ } else {
+ break;
+ }
+ }
+ if (waiting) {
+ next = waiting;
+ waiting = NULL;
+ } else {
+ next = gen->next(*this);
+ }
+ }
+ state_lock.Unlock();
+ }
+
+ void wait()
+ {
+ wait_cond.Wait(state_lock);
+ }
+
+ void kick()
+ {
+ wait_cond.Signal();
+ }
+
+ TestWatchContext *get_watch_context(const string &oid) {
+ return watches.count(oid) ? watches[oid] : 0;
+ }
+
+ TestWatchContext *watch(const string &oid) {
+ ceph_assert(!watches.count(oid));
+ return (watches[oid] = new TestWatchContext);
+ }
+
+ void unwatch(const string &oid) {
+ ceph_assert(watches.count(oid));
+ delete watches[oid];
+ watches.erase(oid);
+ }
+
+ ObjectDesc get_most_recent(const string &oid) {
+ ObjectDesc new_obj;
+ for (map<int, map<string,ObjectDesc> >::reverse_iterator i =
+ pool_obj_cont.rbegin();
+ i != pool_obj_cont.rend();
+ ++i) {
+ map<string,ObjectDesc>::iterator j = i->second.find(oid);
+ if (j != i->second.end()) {
+ new_obj = j->second;
+ break;
+ }
+ }
+ return new_obj;
+ }
+
+ void rm_object_attrs(const string &oid, const set<string> &attrs)
+ {
+ ObjectDesc new_obj = get_most_recent(oid);
+ for (set<string>::const_iterator i = attrs.begin();
+ i != attrs.end();
+ ++i) {
+ new_obj.attrs.erase(*i);
+ }
+ new_obj.dirty = true;
+ pool_obj_cont[current_snap].erase(oid);
+ pool_obj_cont[current_snap].insert(pair<string,ObjectDesc>(oid, new_obj));
+ }
+
+ void remove_object_header(const string &oid)
+ {
+ ObjectDesc new_obj = get_most_recent(oid);
+ new_obj.header = bufferlist();
+ new_obj.dirty = true;
+ pool_obj_cont[current_snap].erase(oid);
+ pool_obj_cont[current_snap].insert(pair<string,ObjectDesc>(oid, new_obj));
+ }
+
+
+ void update_object_header(const string &oid, const bufferlist &bl)
+ {
+ ObjectDesc new_obj = get_most_recent(oid);
+ new_obj.header = bl;
+ new_obj.exists = true;
+ new_obj.dirty = true;
+ pool_obj_cont[current_snap].erase(oid);
+ pool_obj_cont[current_snap].insert(pair<string,ObjectDesc>(oid, new_obj));
+ }
+
+ void update_object_attrs(const string &oid, const map<string, ContDesc> &attrs)
+ {
+ ObjectDesc new_obj = get_most_recent(oid);
+ for (map<string, ContDesc>::const_iterator i = attrs.begin();
+ i != attrs.end();
+ ++i) {
+ new_obj.attrs[i->first] = i->second;
+ }
+ new_obj.exists = true;
+ new_obj.dirty = true;
+ pool_obj_cont[current_snap].erase(oid);
+ pool_obj_cont[current_snap].insert(pair<string,ObjectDesc>(oid, new_obj));
+ }
+
+ void update_object(ContentsGenerator *cont_gen,
+ const string &oid, const ContDesc &contents)
+ {
+ ObjectDesc new_obj = get_most_recent(oid);
+ new_obj.exists = true;
+ new_obj.dirty = true;
+ new_obj.update(cont_gen,
+ contents);
+ pool_obj_cont[current_snap].erase(oid);
+ pool_obj_cont[current_snap].insert(pair<string,ObjectDesc>(oid, new_obj));
+ }
+
+ void update_object_full(const string &oid, const ObjectDesc &contents)
+ {
+ pool_obj_cont[current_snap].erase(oid);
+ pool_obj_cont[current_snap].insert(pair<string,ObjectDesc>(oid, contents));
+ pool_obj_cont[current_snap][oid].dirty = true;
+ }
+
+ void update_object_undirty(const string &oid)
+ {
+ ObjectDesc new_obj = get_most_recent(oid);
+ new_obj.dirty = false;
+ pool_obj_cont[current_snap].erase(oid);
+ pool_obj_cont[current_snap].insert(pair<string,ObjectDesc>(oid, new_obj));
+ }
+
+ void update_object_version(const string &oid, uint64_t version,
+ int snap = -1)
+ {
+ for (map<int, map<string,ObjectDesc> >::reverse_iterator i =
+ pool_obj_cont.rbegin();
+ i != pool_obj_cont.rend();
+ ++i) {
+ if (snap != -1 && snap < i->first)
+ continue;
+ map<string,ObjectDesc>::iterator j = i->second.find(oid);
+ if (j != i->second.end()) {
+ if (version)
+ j->second.version = version;
+ cout << __func__ << " oid " << oid
+ << " v " << version << " " << j->second.most_recent()
+ << " " << (j->second.dirty ? "dirty" : "clean")
+ << " " << (j->second.exists ? "exists" : "dne")
+ << std::endl;
+ break;
+ }
+ }
+ }
+
+ void remove_object(const string &oid)
+ {
+ ceph_assert(!get_watch_context(oid));
+ ObjectDesc new_obj;
+ pool_obj_cont[current_snap].erase(oid);
+ pool_obj_cont[current_snap].insert(pair<string,ObjectDesc>(oid, new_obj));
+ }
+
+ bool find_object(const string &oid, ObjectDesc *contents, int snap = -1) const
+ {
+ for (map<int, map<string,ObjectDesc> >::const_reverse_iterator i =
+ pool_obj_cont.rbegin();
+ i != pool_obj_cont.rend();
+ ++i) {
+ if (snap != -1 && snap < i->first) continue;
+ if (i->second.count(oid) != 0) {
+ *contents = i->second.find(oid)->second;
+ return true;
+ }
+ }
+ return false;
+ }
+
+ void update_object_redirect_target(const string &oid, const string &target)
+ {
+ redirect_objs[oid] = target;
+ }
+
+ void update_object_chunk_target(const string &oid, uint64_t offset, const ChunkDesc &info)
+ {
+ for (map<int, map<string,ObjectDesc> >::const_reverse_iterator i =
+ pool_obj_cont.rbegin();
+ i != pool_obj_cont.rend();
+ ++i) {
+ if (i->second.count(oid) != 0) {
+ ObjectDesc obj_desc = i->second.find(oid)->second;
+ obj_desc.chunk_info[offset] = info;
+ update_object_full(oid, obj_desc);
+ return ;
+ }
+ }
+ return;
+ }
+
+ bool object_existed_at(const string &oid, int snap = -1) const
+ {
+ ObjectDesc contents;
+ bool found = find_object(oid, &contents, snap);
+ return found && contents.exists;
+ }
+
+ void remove_snap(int snap)
+ {
+ map<int, map<string,ObjectDesc> >::iterator next_iter = pool_obj_cont.find(snap);
+ ceph_assert(next_iter != pool_obj_cont.end());
+ map<int, map<string,ObjectDesc> >::iterator current_iter = next_iter++;
+ ceph_assert(current_iter != pool_obj_cont.end());
+ map<string,ObjectDesc> &current = current_iter->second;
+ map<string,ObjectDesc> &next = next_iter->second;
+ for (map<string,ObjectDesc>::iterator i = current.begin();
+ i != current.end();
+ ++i) {
+ if (next.count(i->first) == 0) {
+ next.insert(pair<string,ObjectDesc>(i->first, i->second));
+ }
+ }
+ pool_obj_cont.erase(current_iter);
+ snaps.erase(snap);
+ }
+
+ void add_snap(uint64_t snap)
+ {
+ snaps[current_snap] = snap;
+ current_snap++;
+ pool_obj_cont[current_snap];
+ seq = snap;
+ }
+
+ void roll_back(const string &oid, int snap)
+ {
+ ceph_assert(!get_watch_context(oid));
+ ObjectDesc contents;
+ find_object(oid, &contents, snap);
+ contents.dirty = true;
+ pool_obj_cont.rbegin()->second.erase(oid);
+ pool_obj_cont.rbegin()->second.insert(pair<string,ObjectDesc>(oid, contents));
+ }
+};
+
+void read_callback(librados::completion_t comp, void *arg);
+void write_callback(librados::completion_t comp, void *arg);
+
+class RemoveAttrsOp : public TestOp {
+public:
+ string oid;
+ librados::ObjectWriteOperation op;
+ librados::AioCompletion *comp;
+ RemoveAttrsOp(int n, RadosTestContext *context,
+ const string &oid,
+ TestOpStat *stat)
+ : TestOp(n, context, stat), oid(oid), comp(NULL)
+ {}
+
+ void _begin() override
+ {
+ ContDesc cont;
+ set<string> to_remove;
+ {
+ Mutex::Locker l(context->state_lock);
+ ObjectDesc obj;
+ if (!context->find_object(oid, &obj)) {
+ context->kick();
+ done = true;
+ return;
+ }
+ cont = ContDesc(context->seq_num, context->current_snap,
+ context->seq_num, "");
+ context->oid_in_use.insert(oid);
+ context->oid_not_in_use.erase(oid);
+
+ if (rand() % 30) {
+ ContentsGenerator::iterator iter = context->attr_gen.get_iterator(cont);
+ for (map<string, ContDesc>::iterator i = obj.attrs.begin();
+ i != obj.attrs.end();
+ ++i, ++iter) {
+ if (!(*iter % 3)) {
+ to_remove.insert(i->first);
+ op.rmxattr(i->first.c_str());
+ }
+ }
+ if (to_remove.empty()) {
+ context->kick();
+ context->oid_in_use.erase(oid);
+ context->oid_not_in_use.insert(oid);
+ done = true;
+ return;
+ }
+ if (!context->no_omap) {
+ op.omap_rm_keys(to_remove);
+ }
+ } else {
+ if (!context->no_omap) {
+ op.omap_clear();
+ }
+ for (map<string, ContDesc>::iterator i = obj.attrs.begin();
+ i != obj.attrs.end();
+ ++i) {
+ op.rmxattr(i->first.c_str());
+ to_remove.insert(i->first);
+ }
+ context->remove_object_header(oid);
+ }
+ context->rm_object_attrs(oid, to_remove);
+ }
+
+ pair<TestOp*, TestOp::CallbackInfo*> *cb_arg =
+ new pair<TestOp*, TestOp::CallbackInfo*>(this,
+ new TestOp::CallbackInfo(0));
+ comp = context->rados.aio_create_completion((void*) cb_arg, NULL,
+ &write_callback);
+ context->io_ctx.aio_operate(context->prefix+oid, comp, &op);
+ }
+
+ void _finish(CallbackInfo *info) override
+ {
+ Mutex::Locker l(context->state_lock);
+ done = true;
+ context->update_object_version(oid, comp->get_version64());
+ context->oid_in_use.erase(oid);
+ context->oid_not_in_use.insert(oid);
+ context->kick();
+ }
+
+ bool finished() override
+ {
+ return done;
+ }
+
+ string getType() override
+ {
+ return "RemoveAttrsOp";
+ }
+};
+
+class SetAttrsOp : public TestOp {
+public:
+ string oid;
+ librados::ObjectWriteOperation op;
+ librados::AioCompletion *comp;
+ SetAttrsOp(int n,
+ RadosTestContext *context,
+ const string &oid,
+ TestOpStat *stat)
+ : TestOp(n, context, stat),
+ oid(oid), comp(NULL)
+ {}
+
+ void _begin() override
+ {
+ ContDesc cont;
+ {
+ Mutex::Locker l(context->state_lock);
+ cont = ContDesc(context->seq_num, context->current_snap,
+ context->seq_num, "");
+ context->oid_in_use.insert(oid);
+ context->oid_not_in_use.erase(oid);
+ }
+
+ map<string, bufferlist> omap_contents;
+ map<string, ContDesc> omap;
+ bufferlist header;
+ ContentsGenerator::iterator keygen = context->attr_gen.get_iterator(cont);
+ op.create(false);
+ while (!*keygen) ++keygen;
+ while (*keygen) {
+ if (*keygen != '_')
+ header.append(*keygen);
+ ++keygen;
+ }
+ for (int i = 0; i < 20; ++i) {
+ string key;
+ while (!*keygen) ++keygen;
+ while (*keygen && key.size() < 40) {
+ key.push_back((*keygen % 20) + 'a');
+ ++keygen;
+ }
+ ContDesc val(cont);
+ val.seqnum += (unsigned)(*keygen);
+ val.prefix = ("oid: " + oid);
+ omap[key] = val;
+ bufferlist val_buffer = context->attr_gen.gen_bl(val);
+ omap_contents[key] = val_buffer;
+ op.setxattr(key.c_str(), val_buffer);
+ }
+ if (!context->no_omap) {
+ op.omap_set_header(header);
+ op.omap_set(omap_contents);
+ }
+
+ {
+ Mutex::Locker l(context->state_lock);
+ context->update_object_header(oid, header);
+ context->update_object_attrs(oid, omap);
+ }
+
+ pair<TestOp*, TestOp::CallbackInfo*> *cb_arg =
+ new pair<TestOp*, TestOp::CallbackInfo*>(this,
+ new TestOp::CallbackInfo(0));
+ comp = context->rados.aio_create_completion((void*) cb_arg, NULL,
+ &write_callback);
+ context->io_ctx.aio_operate(context->prefix+oid, comp, &op);
+ }
+
+ void _finish(CallbackInfo *info) override
+ {
+ Mutex::Locker l(context->state_lock);
+ int r;
+ if ((r = comp->get_return_value())) {
+ cerr << "err " << r << std::endl;
+ ceph_abort();
+ }
+ done = true;
+ context->update_object_version(oid, comp->get_version64());
+ context->oid_in_use.erase(oid);
+ context->oid_not_in_use.insert(oid);
+ context->kick();
+ }
+
+ bool finished() override
+ {
+ return done;
+ }
+
+ string getType() override
+ {
+ return "SetAttrsOp";
+ }
+};
+
+class WriteOp : public TestOp {
+public:
+ string oid;
+ ContDesc cont;
+ set<librados::AioCompletion *> waiting;
+ librados::AioCompletion *rcompletion;
+ uint64_t waiting_on;
+ uint64_t last_acked_tid;
+
+ librados::ObjectReadOperation read_op;
+ librados::ObjectWriteOperation write_op;
+ bufferlist rbuffer;
+
+ bool do_append;
+ bool do_excl;
+
+ WriteOp(int n,
+ RadosTestContext *context,
+ const string &oid,
+ bool do_append,
+ bool do_excl,
+ TestOpStat *stat = 0)
+ : TestOp(n, context, stat),
+ oid(oid), rcompletion(NULL), waiting_on(0),
+ last_acked_tid(0), do_append(do_append),
+ do_excl(do_excl)
+ {}
+
+ void _begin() override
+ {
+ context->state_lock.Lock();
+ done = 0;
+ stringstream acc;
+ acc << context->prefix << "OID: " << oid << " snap " << context->current_snap << std::endl;
+ string prefix = acc.str();
+
+ cont = ContDesc(context->seq_num, context->current_snap, context->seq_num, prefix);
+
+ ContentsGenerator *cont_gen;
+ if (do_append) {
+ ObjectDesc old_value;
+ bool found = context->find_object(oid, &old_value);
+ uint64_t prev_length = found && old_value.has_contents() ?
+ old_value.most_recent_gen()->get_length(old_value.most_recent()) :
+ 0;
+ bool requires;
+ int r = context->io_ctx.pool_requires_alignment2(&requires);
+ ceph_assert(r == 0);
+ uint64_t alignment = 0;
+ if (requires) {
+ r = context->io_ctx.pool_required_alignment2(&alignment);
+ ceph_assert(r == 0);
+ ceph_assert(alignment != 0);
+ }
+ cont_gen = new AppendGenerator(
+ prev_length,
+ alignment,
+ context->min_stride_size,
+ context->max_stride_size,
+ 3);
+ } else {
+ cont_gen = new VarLenGenerator(
+ context->max_size, context->min_stride_size, context->max_stride_size);
+ }
+ context->update_object(cont_gen, oid, cont);
+
+ context->oid_in_use.insert(oid);
+ context->oid_not_in_use.erase(oid);
+
+ map<uint64_t, uint64_t> ranges;
+
+ cont_gen->get_ranges_map(cont, ranges);
+ std::cout << num << ": seq_num " << context->seq_num << " ranges " << ranges << std::endl;
+ context->seq_num++;
+
+ waiting_on = ranges.size();
+ ContentsGenerator::iterator gen_pos = cont_gen->get_iterator(cont);
+ uint64_t tid = 1;
+ for (map<uint64_t, uint64_t>::iterator i = ranges.begin();
+ i != ranges.end();
+ ++i, ++tid) {
+ gen_pos.seek(i->first);
+ bufferlist to_write = gen_pos.gen_bl_advance(i->second);
+ ceph_assert(to_write.length() == i->second);
+ ceph_assert(to_write.length() > 0);
+ std::cout << num << ": writing " << context->prefix+oid
+ << " from " << i->first
+ << " to " << i->first + i->second << " tid " << tid << std::endl;
+ pair<TestOp*, TestOp::CallbackInfo*> *cb_arg =
+ new pair<TestOp*, TestOp::CallbackInfo*>(this,
+ new TestOp::CallbackInfo(tid));
+ librados::AioCompletion *completion =
+ context->rados.aio_create_completion((void*) cb_arg, NULL,
+ &write_callback);
+ waiting.insert(completion);
+ librados::ObjectWriteOperation op;
+ if (do_append) {
+ op.append(to_write);
+ } else {
+ op.write(i->first, to_write);
+ }
+ if (do_excl && tid == 1)
+ op.assert_exists();
+ context->io_ctx.aio_operate(
+ context->prefix+oid, completion,
+ &op);
+ }
+
+ bufferlist contbl;
+ encode(cont, contbl);
+ pair<TestOp*, TestOp::CallbackInfo*> *cb_arg =
+ new pair<TestOp*, TestOp::CallbackInfo*>(
+ this,
+ new TestOp::CallbackInfo(++tid));
+ librados::AioCompletion *completion = context->rados.aio_create_completion(
+ (void*) cb_arg, NULL, &write_callback);
+ waiting.insert(completion);
+ waiting_on++;
+ write_op.setxattr("_header", contbl);
+ if (!do_append) {
+ write_op.truncate(cont_gen->get_length(cont));
+ }
+ context->io_ctx.aio_operate(
+ context->prefix+oid, completion, &write_op);
+
+ cb_arg =
+ new pair<TestOp*, TestOp::CallbackInfo*>(
+ this,
+ new TestOp::CallbackInfo(++tid));
+ rcompletion = context->rados.aio_create_completion(
+ (void*) cb_arg, NULL, &write_callback);
+ waiting_on++;
+ read_op.read(0, 1, &rbuffer, 0);
+ context->io_ctx.aio_operate(
+ context->prefix+oid, rcompletion,
+ &read_op,
+ librados::OPERATION_ORDER_READS_WRITES, // order wrt previous write/update
+ 0);
+ context->state_lock.Unlock();
+ }
+
+ void _finish(CallbackInfo *info) override
+ {
+ ceph_assert(info);
+ context->state_lock.Lock();
+ uint64_t tid = info->id;
+
+ cout << num << ": finishing write tid " << tid << " to " << context->prefix + oid << std::endl;
+
+ if (tid <= last_acked_tid) {
+ cerr << "Error: finished tid " << tid
+ << " when last_acked_tid was " << last_acked_tid << std::endl;
+ ceph_abort();
+ }
+ last_acked_tid = tid;
+
+ ceph_assert(!done);
+ waiting_on--;
+ if (waiting_on == 0) {
+ uint64_t version = 0;
+ for (set<librados::AioCompletion *>::iterator i = waiting.begin();
+ i != waiting.end();
+ ) {
+ ceph_assert((*i)->is_complete());
+ if (int err = (*i)->get_return_value()) {
+ cerr << "Error: oid " << oid << " write returned error code "
+ << err << std::endl;
+ }
+ if ((*i)->get_version64() > version)
+ version = (*i)->get_version64();
+ (*i)->release();
+ waiting.erase(i++);
+ }
+
+ context->update_object_version(oid, version);
+ if (rcompletion->get_version64() != version) {
+ cerr << "Error: racing read on " << oid << " returned version "
+ << rcompletion->get_version64() << " rather than version "
+ << version << std::endl;
+ ceph_abort_msg("racing read got wrong version");
+ }
+
+ {
+ ObjectDesc old_value;
+ ceph_assert(context->find_object(oid, &old_value, -1));
+ if (old_value.deleted())
+ std::cout << num << ": left oid " << oid << " deleted" << std::endl;
+ else
+ std::cout << num << ": left oid " << oid << " "
+ << old_value.most_recent() << std::endl;
+ }
+
+ rcompletion->release();
+ context->oid_in_use.erase(oid);
+ context->oid_not_in_use.insert(oid);
+ context->kick();
+ done = true;
+ }
+ context->state_lock.Unlock();
+ }
+
+ bool finished() override
+ {
+ return done;
+ }
+
+ string getType() override
+ {
+ return "WriteOp";
+ }
+};
+
+class WriteSameOp : public TestOp {
+public:
+ string oid;
+ ContDesc cont;
+ set<librados::AioCompletion *> waiting;
+ librados::AioCompletion *rcompletion;
+ uint64_t waiting_on;
+ uint64_t last_acked_tid;
+
+ librados::ObjectReadOperation read_op;
+ librados::ObjectWriteOperation write_op;
+ bufferlist rbuffer;
+
+ WriteSameOp(int n,
+ RadosTestContext *context,
+ const string &oid,
+ TestOpStat *stat = 0)
+ : TestOp(n, context, stat),
+ oid(oid), rcompletion(NULL), waiting_on(0),
+ last_acked_tid(0)
+ {}
+
+ void _begin() override
+ {
+ context->state_lock.Lock();
+ done = 0;
+ stringstream acc;
+ acc << context->prefix << "OID: " << oid << " snap " << context->current_snap << std::endl;
+ string prefix = acc.str();
+
+ cont = ContDesc(context->seq_num, context->current_snap, context->seq_num, prefix);
+
+ ContentsGenerator *cont_gen;
+ cont_gen = new VarLenGenerator(
+ context->max_size, context->min_stride_size, context->max_stride_size);
+ context->update_object(cont_gen, oid, cont);
+
+ context->oid_in_use.insert(oid);
+ context->oid_not_in_use.erase(oid);
+
+ map<uint64_t, uint64_t> ranges;
+
+ cont_gen->get_ranges_map(cont, ranges);
+ std::cout << num << ": seq_num " << context->seq_num << " ranges " << ranges << std::endl;
+ context->seq_num++;
+
+ waiting_on = ranges.size();
+ ContentsGenerator::iterator gen_pos = cont_gen->get_iterator(cont);
+ uint64_t tid = 1;
+ for (map<uint64_t, uint64_t>::iterator i = ranges.begin();
+ i != ranges.end();
+ ++i, ++tid) {
+ gen_pos.seek(i->first);
+ bufferlist to_write = gen_pos.gen_bl_advance(i->second);
+ ceph_assert(to_write.length() == i->second);
+ ceph_assert(to_write.length() > 0);
+ std::cout << num << ": writing " << context->prefix+oid
+ << " from " << i->first
+ << " to " << i->first + i->second << " tid " << tid << std::endl;
+ pair<TestOp*, TestOp::CallbackInfo*> *cb_arg =
+ new pair<TestOp*, TestOp::CallbackInfo*>(this,
+ new TestOp::CallbackInfo(tid));
+ librados::AioCompletion *completion =
+ context->rados.aio_create_completion((void*) cb_arg, NULL,
+ &write_callback);
+ waiting.insert(completion);
+ librados::ObjectWriteOperation op;
+ /* no writesame multiplication factor for now */
+ op.writesame(i->first, to_write.length(), to_write);
+
+ context->io_ctx.aio_operate(
+ context->prefix+oid, completion,
+ &op);
+ }
+
+ bufferlist contbl;
+ encode(cont, contbl);
+ pair<TestOp*, TestOp::CallbackInfo*> *cb_arg =
+ new pair<TestOp*, TestOp::CallbackInfo*>(
+ this,
+ new TestOp::CallbackInfo(++tid));
+ librados::AioCompletion *completion = context->rados.aio_create_completion(
+ (void*) cb_arg, NULL, &write_callback);
+ waiting.insert(completion);
+ waiting_on++;
+ write_op.setxattr("_header", contbl);
+ write_op.truncate(cont_gen->get_length(cont));
+ context->io_ctx.aio_operate(
+ context->prefix+oid, completion, &write_op);
+
+ cb_arg =
+ new pair<TestOp*, TestOp::CallbackInfo*>(
+ this,
+ new TestOp::CallbackInfo(++tid));
+ rcompletion = context->rados.aio_create_completion(
+ (void*) cb_arg, NULL, &write_callback);
+ waiting_on++;
+ read_op.read(0, 1, &rbuffer, 0);
+ context->io_ctx.aio_operate(
+ context->prefix+oid, rcompletion,
+ &read_op,
+ librados::OPERATION_ORDER_READS_WRITES, // order wrt previous write/update
+ 0);
+ context->state_lock.Unlock();
+ }
+
+ void _finish(CallbackInfo *info) override
+ {
+ ceph_assert(info);
+ context->state_lock.Lock();
+ uint64_t tid = info->id;
+
+ cout << num << ": finishing writesame tid " << tid << " to " << context->prefix + oid << std::endl;
+
+ if (tid <= last_acked_tid) {
+ cerr << "Error: finished tid " << tid
+ << " when last_acked_tid was " << last_acked_tid << std::endl;
+ ceph_abort();
+ }
+ last_acked_tid = tid;
+
+ ceph_assert(!done);
+ waiting_on--;
+ if (waiting_on == 0) {
+ uint64_t version = 0;
+ for (set<librados::AioCompletion *>::iterator i = waiting.begin();
+ i != waiting.end();
+ ) {
+ ceph_assert((*i)->is_complete());
+ if (int err = (*i)->get_return_value()) {
+ cerr << "Error: oid " << oid << " writesame returned error code "
+ << err << std::endl;
+ }
+ if ((*i)->get_version64() > version)
+ version = (*i)->get_version64();
+ (*i)->release();
+ waiting.erase(i++);
+ }
+
+ context->update_object_version(oid, version);
+ if (rcompletion->get_version64() != version) {
+ cerr << "Error: racing read on " << oid << " returned version "
+ << rcompletion->get_version64() << " rather than version "
+ << version << std::endl;
+ ceph_abort_msg("racing read got wrong version");
+ }
+
+ {
+ ObjectDesc old_value;
+ ceph_assert(context->find_object(oid, &old_value, -1));
+ if (old_value.deleted())
+ std::cout << num << ": left oid " << oid << " deleted" << std::endl;
+ else
+ std::cout << num << ": left oid " << oid << " "
+ << old_value.most_recent() << std::endl;
+ }
+
+ rcompletion->release();
+ context->oid_in_use.erase(oid);
+ context->oid_not_in_use.insert(oid);
+ context->kick();
+ done = true;
+ }
+ context->state_lock.Unlock();
+ }
+
+ bool finished() override
+ {
+ return done;
+ }
+
+ string getType() override
+ {
+ return "WriteSameOp";
+ }
+};
+
+class DeleteOp : public TestOp {
+public:
+ string oid;
+
+ DeleteOp(int n,
+ RadosTestContext *context,
+ const string &oid,
+ TestOpStat *stat = 0)
+ : TestOp(n, context, stat), oid(oid)
+ {}
+
+ void _begin() override
+ {
+ context->state_lock.Lock();
+ if (context->get_watch_context(oid)) {
+ context->kick();
+ context->state_lock.Unlock();
+ return;
+ }
+
+ ObjectDesc contents;
+ context->find_object(oid, &contents);
+ bool present = !contents.deleted();
+
+ context->oid_in_use.insert(oid);
+ context->oid_not_in_use.erase(oid);
+ context->seq_num++;
+
+ context->remove_object(oid);
+
+ interval_set<uint64_t> ranges;
+ context->state_lock.Unlock();
+
+ int r = 0;
+ if (rand() % 2) {
+ librados::ObjectWriteOperation op;
+ op.assert_exists();
+ op.remove();
+ r = context->io_ctx.operate(context->prefix+oid, &op);
+ } else {
+ r = context->io_ctx.remove(context->prefix+oid);
+ }
+ if (r && !(r == -ENOENT && !present)) {
+ cerr << "r is " << r << " while deleting " << oid << " and present is " << present << std::endl;
+ ceph_abort();
+ }
+
+ context->state_lock.Lock();
+ context->oid_in_use.erase(oid);
+ context->oid_not_in_use.insert(oid);
+ context->kick();
+ context->state_lock.Unlock();
+ }
+
+ string getType() override
+ {
+ return "DeleteOp";
+ }
+};
+
+class ReadOp : public TestOp {
+public:
+ vector<librados::AioCompletion *> completions;
+ librados::ObjectReadOperation op;
+ string oid;
+ ObjectDesc old_value;
+ int snap;
+ bool balance_reads;
+
+ std::shared_ptr<int> in_use;
+
+ vector<bufferlist> results;
+ vector<int> retvals;
+ vector<std::map<uint64_t, uint64_t>> extent_results;
+ vector<bool> is_sparse_read;
+ uint64_t waiting_on;
+
+ vector<bufferlist> checksums;
+ vector<int> checksum_retvals;
+
+ map<string, bufferlist> attrs;
+ int attrretval;
+
+ set<string> omap_requested_keys;
+ map<string, bufferlist> omap_returned_values;
+ set<string> omap_keys;
+ map<string, bufferlist> omap;
+ bufferlist header;
+
+ map<string, bufferlist> xattrs;
+ ReadOp(int n,
+ RadosTestContext *context,
+ const string &oid,
+ bool balance_reads,
+ TestOpStat *stat = 0)
+ : TestOp(n, context, stat),
+ completions(3),
+ oid(oid),
+ snap(0),
+ balance_reads(balance_reads),
+ results(3),
+ retvals(3),
+ extent_results(3),
+ is_sparse_read(3, false),
+ waiting_on(0),
+ checksums(3),
+ checksum_retvals(3),
+ attrretval(0)
+ {}
+
+ void _do_read(librados::ObjectReadOperation& read_op, int index) {
+ uint64_t len = 0;
+ if (old_value.has_contents())
+ len = old_value.most_recent_gen()->get_length(old_value.most_recent());
+ if (context->no_sparse || rand() % 2) {
+ is_sparse_read[index] = false;
+ read_op.read(0,
+ len,
+ &results[index],
+ &retvals[index]);
+ bufferlist init_value_bl;
+ encode(static_cast<uint32_t>(-1), init_value_bl);
+ read_op.checksum(LIBRADOS_CHECKSUM_TYPE_CRC32C, init_value_bl, 0, len,
+ 0, &checksums[index], &checksum_retvals[index]);
+ } else {
+ is_sparse_read[index] = true;
+ read_op.sparse_read(0,
+ len,
+ &extent_results[index],
+ &results[index],
+ &retvals[index]);
+ }
+ }
+
+ void _begin() override
+ {
+ context->state_lock.Lock();
+ if (!(rand() % 4) && !context->snaps.empty()) {
+ snap = rand_choose(context->snaps)->first;
+ in_use = context->snaps_in_use.lookup_or_create(snap, snap);
+ } else {
+ snap = -1;
+ }
+ std::cout << num << ": read oid " << oid << " snap " << snap << std::endl;
+ done = 0;
+ for (uint32_t i = 0; i < 3; i++) {
+ completions[i] = context->rados.aio_create_completion((void *) this, &read_callback, 0);
+ }
+
+ context->oid_in_use.insert(oid);
+ context->oid_not_in_use.erase(oid);
+ ceph_assert(context->find_object(oid, &old_value, snap));
+ if (old_value.deleted())
+ std::cout << num << ": expect deleted" << std::endl;
+ else
+ std::cout << num << ": expect " << old_value.most_recent() << std::endl;
+
+ TestWatchContext *ctx = context->get_watch_context(oid);
+ context->state_lock.Unlock();
+ if (ctx) {
+ ceph_assert(old_value.exists);
+ TestAlarm alarm;
+ std::cerr << num << ": about to start" << std::endl;
+ ctx->start();
+ std::cerr << num << ": started" << std::endl;
+ bufferlist bl;
+ context->io_ctx.set_notify_timeout(600);
+ int r = context->io_ctx.notify2(context->prefix+oid, bl, 0, NULL);
+ if (r < 0) {
+ std::cerr << "r is " << r << std::endl;
+ ceph_abort();
+ }
+ std::cerr << num << ": notified, waiting" << std::endl;
+ ctx->wait();
+ }
+ context->state_lock.Lock();
+ if (snap >= 0) {
+ context->io_ctx.snap_set_read(context->snaps[snap]);
+ }
+ _do_read(op, 0);
+ for (map<string, ContDesc>::iterator i = old_value.attrs.begin();
+ i != old_value.attrs.end();
+ ++i) {
+ if (rand() % 2) {
+ string key = i->first;
+ if (rand() % 2)
+ key.push_back((rand() % 26) + 'a');
+ omap_requested_keys.insert(key);
+ }
+ }
+ if (!context->no_omap) {
+ op.omap_get_vals_by_keys(omap_requested_keys, &omap_returned_values, 0);
+ // NOTE: we're ignore pmore here, which assumes the OSD limit is high
+ // enough for us.
+ op.omap_get_keys2("", -1, &omap_keys, nullptr, nullptr);
+ op.omap_get_vals2("", -1, &omap, nullptr, nullptr);
+ op.omap_get_header(&header, 0);
+ }
+ op.getxattrs(&xattrs, 0);
+
+ unsigned flags = 0;
+ if (balance_reads)
+ flags |= librados::OPERATION_BALANCE_READS;
+
+ ceph_assert(!context->io_ctx.aio_operate(context->prefix+oid, completions[0], &op,
+ flags, NULL));
+ waiting_on++;
+
+ // send 2 pipelined reads on the same object/snap. This can help testing
+ // OSD's read behavior in some scenarios
+ for (uint32_t i = 1; i < 3; ++i) {
+ librados::ObjectReadOperation pipeline_op;
+ _do_read(pipeline_op, i);
+ ceph_assert(!context->io_ctx.aio_operate(context->prefix+oid, completions[i], &pipeline_op, 0));
+ waiting_on++;
+ }
+
+ if (snap >= 0) {
+ context->io_ctx.snap_set_read(0);
+ }
+ context->state_lock.Unlock();
+ }
+
+ void _finish(CallbackInfo *info) override
+ {
+ Mutex::Locker l(context->state_lock);
+ ceph_assert(!done);
+ ceph_assert(waiting_on > 0);
+ if (--waiting_on) {
+ return;
+ }
+
+ context->oid_in_use.erase(oid);
+ context->oid_not_in_use.insert(oid);
+ int retval = completions[0]->get_return_value();
+ for (vector<librados::AioCompletion *>::iterator it = completions.begin();
+ it != completions.end(); ++it) {
+ ceph_assert((*it)->is_complete());
+ uint64_t version = (*it)->get_version64();
+ int err = (*it)->get_return_value();
+ if (err != retval) {
+ cerr << num << ": Error: oid " << oid << " read returned different error codes: "
+ << retval << " and " << err << std::endl;
+ ceph_abort();
+ }
+ if (err) {
+ if (!(err == -ENOENT && old_value.deleted())) {
+ cerr << num << ": Error: oid " << oid << " read returned error code "
+ << err << std::endl;
+ ceph_abort();
+ }
+ } else if (version != old_value.version) {
+ cerr << num << ": oid " << oid << " version is " << version
+ << " and expected " << old_value.version << std::endl;
+ ceph_assert(version == old_value.version);
+ }
+ }
+ if (!retval) {
+ map<string, bufferlist>::iterator iter = xattrs.find("_header");
+ bufferlist headerbl;
+ if (iter == xattrs.end()) {
+ if (old_value.has_contents()) {
+ cerr << num << ": Error: did not find header attr, has_contents: "
+ << old_value.has_contents()
+ << std::endl;
+ ceph_assert(!old_value.has_contents());
+ }
+ } else {
+ headerbl = iter->second;
+ xattrs.erase(iter);
+ }
+ if (old_value.deleted()) {
+ std::cout << num << ": expect deleted" << std::endl;
+ ceph_abort_msg("expected deleted");
+ } else {
+ std::cout << num << ": expect " << old_value.most_recent() << std::endl;
+ }
+ if (old_value.has_contents()) {
+ ContDesc to_check;
+ auto p = headerbl.cbegin();
+ decode(to_check, p);
+ if (to_check != old_value.most_recent()) {
+ cerr << num << ": oid " << oid << " found incorrect object contents " << to_check
+ << ", expected " << old_value.most_recent() << std::endl;
+ context->errors++;
+ }
+ for (unsigned i = 0; i < results.size(); i++) {
+ if (is_sparse_read[i]) {
+ if (!old_value.check_sparse(extent_results[i], results[i])) {
+ cerr << num << ": oid " << oid << " contents " << to_check << " corrupt" << std::endl;
+ context->errors++;
+ }
+ } else {
+ if (!old_value.check(results[i])) {
+ cerr << num << ": oid " << oid << " contents " << to_check << " corrupt" << std::endl;
+ context->errors++;
+ }
+
+ uint32_t checksum = 0;
+ if (checksum_retvals[i] == 0) {
+ try {
+ auto bl_it = checksums[i].cbegin();
+ uint32_t csum_count;
+ decode(csum_count, bl_it);
+ decode(checksum, bl_it);
+ } catch (const buffer::error &err) {
+ checksum_retvals[i] = -EBADMSG;
+ }
+ }
+ if (checksum_retvals[i] != 0 || checksum != results[i].crc32c(-1)) {
+ cerr << num << ": oid " << oid << " checksum " << checksums[i]
+ << " incorrect, expecting " << results[i].crc32c(-1)
+ << std::endl;
+ context->errors++;
+ }
+ }
+ }
+ if (context->errors) ceph_abort();
+ }
+
+ // Attributes
+ if (!context->no_omap) {
+ if (!(old_value.header == header)) {
+ cerr << num << ": oid " << oid << " header does not match, old size: "
+ << old_value.header.length() << " new size " << header.length()
+ << std::endl;
+ ceph_assert(old_value.header == header);
+ }
+ if (omap.size() != old_value.attrs.size()) {
+ cerr << num << ": oid " << oid << " omap.size() is " << omap.size()
+ << " and old is " << old_value.attrs.size() << std::endl;
+ ceph_assert(omap.size() == old_value.attrs.size());
+ }
+ if (omap_keys.size() != old_value.attrs.size()) {
+ cerr << num << ": oid " << oid << " omap.size() is " << omap_keys.size()
+ << " and old is " << old_value.attrs.size() << std::endl;
+ ceph_assert(omap_keys.size() == old_value.attrs.size());
+ }
+ }
+ if (xattrs.size() != old_value.attrs.size()) {
+ cerr << num << ": oid " << oid << " xattrs.size() is " << xattrs.size()
+ << " and old is " << old_value.attrs.size() << std::endl;
+ ceph_assert(xattrs.size() == old_value.attrs.size());
+ }
+ for (map<string, ContDesc>::iterator iter = old_value.attrs.begin();
+ iter != old_value.attrs.end();
+ ++iter) {
+ bufferlist bl = context->attr_gen.gen_bl(
+ iter->second);
+ if (!context->no_omap) {
+ map<string, bufferlist>::iterator omap_iter = omap.find(iter->first);
+ ceph_assert(omap_iter != omap.end());
+ ceph_assert(bl.length() == omap_iter->second.length());
+ bufferlist::iterator k = bl.begin();
+ for(bufferlist::iterator l = omap_iter->second.begin();
+ !k.end() && !l.end();
+ ++k, ++l) {
+ ceph_assert(*l == *k);
+ }
+ }
+ map<string, bufferlist>::iterator xattr_iter = xattrs.find(iter->first);
+ ceph_assert(xattr_iter != xattrs.end());
+ ceph_assert(bl.length() == xattr_iter->second.length());
+ bufferlist::iterator k = bl.begin();
+ for (bufferlist::iterator j = xattr_iter->second.begin();
+ !k.end() && !j.end();
+ ++j, ++k) {
+ ceph_assert(*j == *k);
+ }
+ }
+ if (!context->no_omap) {
+ for (set<string>::iterator i = omap_requested_keys.begin();
+ i != omap_requested_keys.end();
+ ++i) {
+ if (!omap_returned_values.count(*i))
+ ceph_assert(!old_value.attrs.count(*i));
+ if (!old_value.attrs.count(*i))
+ ceph_assert(!omap_returned_values.count(*i));
+ }
+ for (map<string, bufferlist>::iterator i = omap_returned_values.begin();
+ i != omap_returned_values.end();
+ ++i) {
+ ceph_assert(omap_requested_keys.count(i->first));
+ ceph_assert(omap.count(i->first));
+ ceph_assert(old_value.attrs.count(i->first));
+ ceph_assert(i->second == omap[i->first]);
+ }
+ }
+ }
+ for (vector<librados::AioCompletion *>::iterator it = completions.begin();
+ it != completions.end(); ++it) {
+ (*it)->release();
+ }
+ context->kick();
+ done = true;
+ }
+
+ bool finished() override
+ {
+ return done;
+ }
+
+ string getType() override
+ {
+ return "ReadOp";
+ }
+};
+
+class SnapCreateOp : public TestOp {
+public:
+ SnapCreateOp(int n,
+ RadosTestContext *context,
+ TestOpStat *stat = 0)
+ : TestOp(n, context, stat)
+ {}
+
+ void _begin() override
+ {
+ uint64_t snap;
+ string snapname;
+
+ if (context->pool_snaps) {
+ stringstream ss;
+
+ ss << context->prefix << "snap" << ++context->snapname_num;
+ snapname = ss.str();
+
+ int ret = context->io_ctx.snap_create(snapname.c_str());
+ if (ret) {
+ cerr << "snap_create returned " << ret << std::endl;
+ ceph_abort();
+ }
+ ceph_assert(!context->io_ctx.snap_lookup(snapname.c_str(), &snap));
+
+ } else {
+ ceph_assert(!context->io_ctx.selfmanaged_snap_create(&snap));
+ }
+
+ context->state_lock.Lock();
+ context->add_snap(snap);
+
+ if (context->pool_snaps) {
+ context->state_lock.Unlock();
+ } else {
+ vector<uint64_t> snapset(context->snaps.size());
+
+ int j = 0;
+ for (map<int,uint64_t>::reverse_iterator i = context->snaps.rbegin();
+ i != context->snaps.rend();
+ ++i, ++j) {
+ snapset[j] = i->second;
+ }
+
+ context->state_lock.Unlock();
+
+ int r = context->io_ctx.selfmanaged_snap_set_write_ctx(context->seq, snapset);
+ if (r) {
+ cerr << "r is " << r << " snapset is " << snapset << " seq is " << context->seq << std::endl;
+ ceph_abort();
+ }
+ }
+ }
+
+ string getType() override
+ {
+ return "SnapCreateOp";
+ }
+ bool must_quiesce_other_ops() override { return context->pool_snaps; }
+};
+
+class SnapRemoveOp : public TestOp {
+public:
+ int to_remove;
+ SnapRemoveOp(int n, RadosTestContext *context,
+ int snap,
+ TestOpStat *stat = 0)
+ : TestOp(n, context, stat),
+ to_remove(snap)
+ {}
+
+ void _begin() override
+ {
+ context->state_lock.Lock();
+ uint64_t snap = context->snaps[to_remove];
+ context->remove_snap(to_remove);
+
+ if (context->pool_snaps) {
+ string snapname;
+
+ ceph_assert(!context->io_ctx.snap_get_name(snap, &snapname));
+ ceph_assert(!context->io_ctx.snap_remove(snapname.c_str()));
+ } else {
+ ceph_assert(!context->io_ctx.selfmanaged_snap_remove(snap));
+
+ vector<uint64_t> snapset(context->snaps.size());
+ int j = 0;
+ for (map<int,uint64_t>::reverse_iterator i = context->snaps.rbegin();
+ i != context->snaps.rend();
+ ++i, ++j) {
+ snapset[j] = i->second;
+ }
+
+ int r = context->io_ctx.selfmanaged_snap_set_write_ctx(context->seq, snapset);
+ if (r) {
+ cerr << "r is " << r << " snapset is " << snapset << " seq is " << context->seq << std::endl;
+ ceph_abort();
+ }
+ }
+ context->state_lock.Unlock();
+ }
+
+ string getType() override
+ {
+ return "SnapRemoveOp";
+ }
+};
+
+class WatchOp : public TestOp {
+ string oid;
+public:
+ WatchOp(int n,
+ RadosTestContext *context,
+ const string &_oid,
+ TestOpStat *stat = 0)
+ : TestOp(n, context, stat),
+ oid(_oid)
+ {}
+
+ void _begin() override
+ {
+ context->state_lock.Lock();
+ ObjectDesc contents;
+ context->find_object(oid, &contents);
+ if (contents.deleted()) {
+ context->kick();
+ context->state_lock.Unlock();
+ return;
+ }
+ context->oid_in_use.insert(oid);
+ context->oid_not_in_use.erase(oid);
+
+ TestWatchContext *ctx = context->get_watch_context(oid);
+ context->state_lock.Unlock();
+ int r;
+ if (!ctx) {
+ {
+ Mutex::Locker l(context->state_lock);
+ ctx = context->watch(oid);
+ }
+
+ r = context->io_ctx.watch2(context->prefix+oid,
+ &ctx->get_handle(),
+ ctx);
+ } else {
+ r = context->io_ctx.unwatch2(ctx->get_handle());
+ {
+ Mutex::Locker l(context->state_lock);
+ context->unwatch(oid);
+ }
+ }
+
+ if (r) {
+ cerr << "r is " << r << std::endl;
+ ceph_abort();
+ }
+
+ {
+ Mutex::Locker l(context->state_lock);
+ context->oid_in_use.erase(oid);
+ context->oid_not_in_use.insert(oid);
+ }
+ }
+
+ string getType() override
+ {
+ return "WatchOp";
+ }
+};
+
+class RollbackOp : public TestOp {
+public:
+ string oid;
+ int roll_back_to;
+ librados::ObjectWriteOperation zero_write_op1;
+ librados::ObjectWriteOperation zero_write_op2;
+ librados::ObjectWriteOperation op;
+ vector<librados::AioCompletion *> comps;
+ std::shared_ptr<int> in_use;
+ int last_finished;
+ int outstanding;
+
+ RollbackOp(int n,
+ RadosTestContext *context,
+ const string &_oid,
+ TestOpStat *stat = 0)
+ : TestOp(n, context, stat),
+ oid(_oid), roll_back_to(-1),
+ comps(3, NULL),
+ last_finished(-1), outstanding(3)
+ {}
+
+ void _begin() override
+ {
+ context->state_lock.Lock();
+ if (context->get_watch_context(oid)) {
+ context->kick();
+ context->state_lock.Unlock();
+ return;
+ }
+
+ if (context->snaps.empty()) {
+ context->kick();
+ context->state_lock.Unlock();
+ done = true;
+ return;
+ }
+
+ context->oid_in_use.insert(oid);
+ context->oid_not_in_use.erase(oid);
+
+ roll_back_to = rand_choose(context->snaps)->first;
+ in_use = context->snaps_in_use.lookup_or_create(
+ roll_back_to,
+ roll_back_to);
+
+
+ cout << "rollback oid " << oid << " to " << roll_back_to << std::endl;
+
+ bool existed_before = context->object_existed_at(oid);
+ bool existed_after = context->object_existed_at(oid, roll_back_to);
+
+ context->roll_back(oid, roll_back_to);
+ uint64_t snap = context->snaps[roll_back_to];
+
+ outstanding -= (!existed_before) + (!existed_after);
+
+ context->state_lock.Unlock();
+
+ bufferlist bl, bl2;
+ zero_write_op1.append(bl);
+ zero_write_op2.append(bl2);
+
+ if (context->pool_snaps) {
+ op.snap_rollback(snap);
+ } else {
+ op.selfmanaged_snap_rollback(snap);
+ }
+
+ if (existed_before) {
+ pair<TestOp*, TestOp::CallbackInfo*> *cb_arg =
+ new pair<TestOp*, TestOp::CallbackInfo*>(this,
+ new TestOp::CallbackInfo(0));
+ comps[0] =
+ context->rados.aio_create_completion((void*) cb_arg, NULL,
+ &write_callback);
+ context->io_ctx.aio_operate(
+ context->prefix+oid, comps[0], &zero_write_op1);
+ }
+ {
+ pair<TestOp*, TestOp::CallbackInfo*> *cb_arg =
+ new pair<TestOp*, TestOp::CallbackInfo*>(this,
+ new TestOp::CallbackInfo(1));
+ comps[1] =
+ context->rados.aio_create_completion((void*) cb_arg, NULL,
+ &write_callback);
+ context->io_ctx.aio_operate(
+ context->prefix+oid, comps[1], &op);
+ }
+ if (existed_after) {
+ pair<TestOp*, TestOp::CallbackInfo*> *cb_arg =
+ new pair<TestOp*, TestOp::CallbackInfo*>(this,
+ new TestOp::CallbackInfo(2));
+ comps[2] =
+ context->rados.aio_create_completion((void*) cb_arg, NULL,
+ &write_callback);
+ context->io_ctx.aio_operate(
+ context->prefix+oid, comps[2], &zero_write_op2);
+ }
+ }
+
+ void _finish(CallbackInfo *info) override
+ {
+ Mutex::Locker l(context->state_lock);
+ uint64_t tid = info->id;
+ cout << num << ": finishing rollback tid " << tid
+ << " to " << context->prefix + oid << std::endl;
+ ceph_assert((int)(info->id) > last_finished);
+ last_finished = info->id;
+
+ int r;
+ if ((r = comps[last_finished]->get_return_value()) != 0) {
+ cerr << "err " << r << std::endl;
+ ceph_abort();
+ }
+ if (--outstanding == 0) {
+ done = true;
+ context->update_object_version(oid, comps[tid]->get_version64());
+ context->oid_in_use.erase(oid);
+ context->oid_not_in_use.insert(oid);
+ in_use = std::shared_ptr<int>();
+ context->kick();
+ }
+ }
+
+ bool finished() override
+ {
+ return done;
+ }
+
+ string getType() override
+ {
+ return "RollBackOp";
+ }
+};
+
+class CopyFromOp : public TestOp {
+public:
+ string oid, oid_src;
+ ObjectDesc src_value;
+ librados::ObjectWriteOperation op;
+ librados::ObjectReadOperation rd_op;
+ librados::AioCompletion *comp;
+ librados::AioCompletion *comp_racing_read = nullptr;
+ std::shared_ptr<int> in_use;
+ int snap;
+ int done;
+ uint64_t version;
+ int r;
+ CopyFromOp(int n,
+ RadosTestContext *context,
+ const string &oid,
+ const string &oid_src,
+ TestOpStat *stat)
+ : TestOp(n, context, stat),
+ oid(oid), oid_src(oid_src),
+ comp(NULL), snap(-1), done(0),
+ version(0), r(0)
+ {}
+
+ void _begin() override
+ {
+ ContDesc cont;
+ {
+ Mutex::Locker l(context->state_lock);
+ cont = ContDesc(context->seq_num, context->current_snap,
+ context->seq_num, "");
+ context->oid_in_use.insert(oid);
+ context->oid_not_in_use.erase(oid);
+ context->oid_in_use.insert(oid_src);
+ context->oid_not_in_use.erase(oid_src);
+
+ // choose source snap
+ if (0 && !(rand() % 4) && !context->snaps.empty()) {
+ snap = rand_choose(context->snaps)->first;
+ in_use = context->snaps_in_use.lookup_or_create(snap, snap);
+ } else {
+ snap = -1;
+ }
+ context->find_object(oid_src, &src_value, snap);
+ if (!src_value.deleted())
+ context->update_object_full(oid, src_value);
+ }
+
+ string src = context->prefix+oid_src;
+ op.copy_from(src.c_str(), context->io_ctx, src_value.version, 0);
+
+ pair<TestOp*, TestOp::CallbackInfo*> *cb_arg =
+ new pair<TestOp*, TestOp::CallbackInfo*>(this,
+ new TestOp::CallbackInfo(0));
+ comp = context->rados.aio_create_completion((void*) cb_arg, NULL,
+ &write_callback);
+ context->io_ctx.aio_operate(context->prefix+oid, comp, &op);
+
+ // queue up a racing read, too.
+ pair<TestOp*, TestOp::CallbackInfo*> *read_cb_arg =
+ new pair<TestOp*, TestOp::CallbackInfo*>(this,
+ new TestOp::CallbackInfo(1));
+ comp_racing_read = context->rados.aio_create_completion((void*) read_cb_arg, NULL, &write_callback);
+ rd_op.stat(NULL, NULL, NULL);
+ context->io_ctx.aio_operate(context->prefix+oid, comp_racing_read, &rd_op,
+ librados::OPERATION_ORDER_READS_WRITES, // order wrt previous write/update
+ NULL);
+
+ }
+
+ void _finish(CallbackInfo *info) override
+ {
+ Mutex::Locker l(context->state_lock);
+
+ // note that the read can (and atm will) come back before the
+ // write reply, but will reflect the update and the versions will
+ // match.
+
+ if (info->id == 0) {
+ // copy_from
+ ceph_assert(comp->is_complete());
+ cout << num << ": finishing copy_from to " << context->prefix + oid << std::endl;
+ if ((r = comp->get_return_value())) {
+ if (r == -ENOENT && src_value.deleted()) {
+ cout << num << ": got expected ENOENT (src dne)" << std::endl;
+ } else {
+ cerr << "Error: oid " << oid << " copy_from " << oid_src << " returned error code "
+ << r << std::endl;
+ ceph_abort();
+ }
+ } else {
+ ceph_assert(!version || comp->get_version64() == version);
+ version = comp->get_version64();
+ context->update_object_version(oid, comp->get_version64());
+ }
+ } else if (info->id == 1) {
+ // racing read
+ ceph_assert(comp_racing_read->is_complete());
+ cout << num << ": finishing copy_from racing read to " << context->prefix + oid << std::endl;
+ if ((r = comp_racing_read->get_return_value())) {
+ if (!(r == -ENOENT && src_value.deleted())) {
+ cerr << "Error: oid " << oid << " copy_from " << oid_src << " returned error code "
+ << r << std::endl;
+ }
+ } else {
+ ceph_assert(comp_racing_read->get_return_value() == 0);
+ ceph_assert(!version || comp_racing_read->get_version64() == version);
+ version = comp_racing_read->get_version64();
+ }
+ }
+ if (++done == 2) {
+ context->oid_in_use.erase(oid);
+ context->oid_not_in_use.insert(oid);
+ context->oid_in_use.erase(oid_src);
+ context->oid_not_in_use.insert(oid_src);
+ context->kick();
+ }
+ }
+
+ bool finished() override
+ {
+ return done == 2;
+ }
+
+ string getType() override
+ {
+ return "CopyFromOp";
+ }
+};
+
+class ChunkReadOp : public TestOp {
+public:
+ vector<librados::AioCompletion *> completions;
+ librados::ObjectReadOperation op;
+ string oid;
+ ObjectDesc old_value;
+ ObjectDesc tgt_value;
+ int snap;
+ bool balance_reads;
+
+ std::shared_ptr<int> in_use;
+
+ vector<bufferlist> results;
+ vector<int> retvals;
+ vector<bool> is_sparse_read;
+ uint64_t waiting_on;
+
+ vector<bufferlist> checksums;
+ vector<int> checksum_retvals;
+ uint32_t offset = 0;
+ uint32_t length = 0;
+ string tgt_oid;
+ string tgt_pool_name;
+ uint32_t tgt_offset = 0;
+
+ ChunkReadOp(int n,
+ RadosTestContext *context,
+ const string &oid,
+ const string &tgt_pool_name,
+ bool balance_reads,
+ TestOpStat *stat = 0)
+ : TestOp(n, context, stat),
+ completions(2),
+ oid(oid),
+ snap(0),
+ balance_reads(balance_reads),
+ results(2),
+ retvals(2),
+ waiting_on(0),
+ checksums(2),
+ checksum_retvals(2),
+ tgt_pool_name(tgt_pool_name)
+ {}
+
+ void _do_read(librados::ObjectReadOperation& read_op, uint32_t offset, uint32_t length, int index) {
+ read_op.read(offset,
+ length,
+ &results[index],
+ &retvals[index]);
+ if (index != 0) {
+ bufferlist init_value_bl;
+ encode(static_cast<uint32_t>(-1), init_value_bl);
+ read_op.checksum(LIBRADOS_CHECKSUM_TYPE_CRC32C, init_value_bl, offset, length,
+ 0, &checksums[index], &checksum_retvals[index]);
+ }
+
+ }
+
+ void _begin() override
+ {
+ context->state_lock.Lock();
+ std::cout << num << ": chunk read oid " << oid << " snap " << snap << std::endl;
+ done = 0;
+ for (uint32_t i = 0; i < 2; i++) {
+ completions[i] = context->rados.aio_create_completion((void *) this, &read_callback, 0);
+ }
+
+ context->find_object(oid, &old_value);
+
+ if (old_value.chunk_info.size() == 0) {
+ std::cout << ": no chunks" << std::endl;
+ context->kick();
+ context->state_lock.Unlock();
+ done = true;
+ return;
+ }
+
+ context->oid_in_use.insert(oid);
+ context->oid_not_in_use.erase(oid);
+ if (old_value.deleted()) {
+ std::cout << num << ": expect deleted" << std::endl;
+ } else {
+ std::cout << num << ": expect " << old_value.most_recent() << std::endl;
+ }
+
+ int rand_index = rand() % old_value.chunk_info.size();
+ auto iter = old_value.chunk_info.begin();
+ for (int i = 0; i < rand_index; i++) {
+ iter++;
+ }
+ offset = iter->first;
+ offset += (rand() % iter->second.length)/2;
+ uint32_t t_length = rand() % iter->second.length;
+ while (t_length + offset > iter->first + iter->second.length) {
+ t_length = rand() % iter->second.length;
+ }
+ length = t_length;
+ tgt_offset = iter->second.offset + offset - iter->first;
+ tgt_oid = iter->second.oid;
+
+ std::cout << num << ": ori offset " << iter->first << " req offset " << offset
+ << " ori length " << iter->second.length << " req length " << length
+ << " ori tgt_offset " << iter->second.offset << " req tgt_offset " << tgt_offset
+ << " tgt_oid " << tgt_oid << std::endl;
+
+ TestWatchContext *ctx = context->get_watch_context(oid);
+ context->state_lock.Unlock();
+ if (ctx) {
+ ceph_assert(old_value.exists);
+ TestAlarm alarm;
+ std::cerr << num << ": about to start" << std::endl;
+ ctx->start();
+ std::cerr << num << ": started" << std::endl;
+ bufferlist bl;
+ context->io_ctx.set_notify_timeout(600);
+ int r = context->io_ctx.notify2(context->prefix+oid, bl, 0, NULL);
+ if (r < 0) {
+ std::cerr << "r is " << r << std::endl;
+ ceph_abort();
+ }
+ std::cerr << num << ": notified, waiting" << std::endl;
+ ctx->wait();
+ }
+ context->state_lock.Lock();
+
+ _do_read(op, offset, length, 0);
+
+ unsigned flags = 0;
+ if (balance_reads)
+ flags |= librados::OPERATION_BALANCE_READS;
+
+ ceph_assert(!context->io_ctx.aio_operate(context->prefix+oid, completions[0], &op,
+ flags, NULL));
+ waiting_on++;
+
+ _do_read(op, tgt_offset, length, 1);
+ ceph_assert(!context->io_ctx.aio_operate(context->prefix+tgt_oid, completions[1], &op,
+ flags, NULL));
+
+ waiting_on++;
+ context->state_lock.Unlock();
+ }
+
+ void _finish(CallbackInfo *info) override
+ {
+ Mutex::Locker l(context->state_lock);
+ ceph_assert(!done);
+ ceph_assert(waiting_on > 0);
+ if (--waiting_on) {
+ return;
+ }
+
+ context->oid_in_use.erase(oid);
+ context->oid_not_in_use.insert(oid);
+ int retval = completions[0]->get_return_value();
+ std::cout << ": finish!! ret: " << retval << std::endl;
+ context->find_object(tgt_oid, &tgt_value);
+
+ for (int i = 0; i < 2; i++) {
+ ceph_assert(completions[i]->is_complete());
+ int err = completions[i]->get_return_value();
+ if (err != retval) {
+ cerr << num << ": Error: oid " << oid << " read returned different error codes: "
+ << retval << " and " << err << std::endl;
+ ceph_abort();
+ }
+ if (err) {
+ if (!(err == -ENOENT && old_value.deleted())) {
+ cerr << num << ": Error: oid " << oid << " read returned error code "
+ << err << std::endl;
+ ceph_abort();
+ }
+ }
+ }
+
+ if (!retval) {
+ if (old_value.deleted()) {
+ std::cout << num << ": expect deleted" << std::endl;
+ ceph_abort_msg("expected deleted");
+ } else {
+ std::cout << num << ": expect " << old_value.most_recent() << std::endl;
+ }
+ if (tgt_value.has_contents()) {
+ uint32_t checksum[2] = {0};
+ if (checksum_retvals[1] == 0) {
+ try {
+ auto bl_it = checksums[1].cbegin();
+ uint32_t csum_count;
+ decode(csum_count, bl_it);
+ decode(checksum[1], bl_it);
+ } catch (const buffer::error &err) {
+ checksum_retvals[1] = -EBADMSG;
+ }
+ }
+
+ if (checksum_retvals[1] != 0) {
+ cerr << num << ": oid " << oid << " checksum retvals " << checksums[0]
+ << " error " << std::endl;
+ context->errors++;
+ }
+
+ checksum[0] = results[0].crc32c(-1);
+
+ if (checksum[0] != checksum[1]) {
+ cerr << num << ": oid " << oid << " checksum src " << checksum[0]
+ << " chunksum tgt " << checksum[1] << " incorrect, expecting "
+ << results[0].crc32c(-1)
+ << std::endl;
+ context->errors++;
+ }
+ if (context->errors) ceph_abort();
+ }
+ }
+ for (vector<librados::AioCompletion *>::iterator it = completions.begin();
+ it != completions.end(); ++it) {
+ (*it)->release();
+ }
+ context->kick();
+ done = true;
+ }
+
+ bool finished() override
+ {
+ return done;
+ }
+
+ string getType() override
+ {
+ return "ChunkReadOp";
+ }
+};
+
+class CopyOp : public TestOp {
+public:
+ string oid, oid_src, tgt_pool_name;
+ librados::ObjectWriteOperation op;
+ librados::ObjectReadOperation rd_op;
+ librados::AioCompletion *comp;
+ ObjectDesc src_value, tgt_value;
+ int done;
+ int r;
+ CopyOp(int n,
+ RadosTestContext *context,
+ const string &oid_src,
+ const string &oid,
+ const string &tgt_pool_name,
+ TestOpStat *stat = 0)
+ : TestOp(n, context, stat),
+ oid(oid), oid_src(oid_src), tgt_pool_name(tgt_pool_name),
+ comp(NULL), done(0), r(0)
+ {}
+
+ void _begin() override
+ {
+ Mutex::Locker l(context->state_lock);
+ context->oid_in_use.insert(oid_src);
+ context->oid_not_in_use.erase(oid_src);
+
+ string src = context->prefix+oid_src;
+ context->find_object(oid_src, &src_value);
+ op.copy_from(src.c_str(), context->io_ctx, src_value.version, 0);
+
+ cout << "copy op oid " << oid_src << " to " << oid << " tgt_pool_name " << tgt_pool_name << std::endl;
+
+ pair<TestOp*, TestOp::CallbackInfo*> *cb_arg =
+ new pair<TestOp*, TestOp::CallbackInfo*>(this,
+ new TestOp::CallbackInfo(0));
+ comp = context->rados.aio_create_completion((void*) cb_arg, NULL,
+ &write_callback);
+ if (tgt_pool_name == context->low_tier_pool_name) {
+ context->low_tier_io_ctx.aio_operate(context->prefix+oid, comp, &op);
+ } else {
+ context->io_ctx.aio_operate(context->prefix+oid, comp, &op);
+ }
+ }
+
+ void _finish(CallbackInfo *info) override
+ {
+ Mutex::Locker l(context->state_lock);
+
+ if (info->id == 0) {
+ ceph_assert(comp->is_complete());
+ cout << num << ": finishing copy op to oid " << oid << std::endl;
+ if ((r = comp->get_return_value())) {
+ cerr << "Error: oid " << oid << " write returned error code "
+ << r << std::endl;
+ ceph_abort();
+ }
+ }
+
+ if (++done == 1) {
+ context->oid_in_use.erase(oid_src);
+ context->oid_not_in_use.insert(oid_src);
+ context->kick();
+ }
+ }
+
+ bool finished() override
+ {
+ return done == 1;
+ }
+
+ string getType() override
+ {
+ return "CopyOp";
+ }
+};
+
+class SetChunkOp : public TestOp {
+public:
+ string oid, oid_tgt, tgt_pool_name;
+ ObjectDesc src_value, tgt_value;
+ librados::ObjectWriteOperation op;
+ librados::ObjectReadOperation rd_op;
+ librados::AioCompletion *comp;
+ std::shared_ptr<int> in_use;
+ int done;
+ int r;
+ uint64_t offset;
+ uint32_t length;
+ uint64_t tgt_offset;
+ SetChunkOp(int n,
+ RadosTestContext *context,
+ const string &oid,
+ uint64_t offset,
+ uint32_t length,
+ const string &oid_tgt,
+ const string &tgt_pool_name,
+ uint64_t tgt_offset,
+ TestOpStat *stat = 0)
+ : TestOp(n, context, stat),
+ oid(oid), oid_tgt(oid_tgt), tgt_pool_name(tgt_pool_name),
+ comp(NULL), done(0),
+ r(0), offset(offset), length(length),
+ tgt_offset(tgt_offset)
+ {}
+
+ void _begin() override
+ {
+ Mutex::Locker l(context->state_lock);
+ context->oid_in_use.insert(oid);
+ context->oid_not_in_use.erase(oid);
+
+ if (tgt_pool_name.empty()) ceph_abort();
+
+ context->find_object(oid, &src_value);
+ context->find_object(oid_tgt, &tgt_value);
+
+ if (src_value.version != 0 && !src_value.deleted())
+ op.assert_version(src_value.version);
+ op.set_chunk(offset, length, context->low_tier_io_ctx,
+ context->prefix+oid_tgt, tgt_offset);
+
+ pair<TestOp*, TestOp::CallbackInfo*> *cb_arg =
+ new pair<TestOp*, TestOp::CallbackInfo*>(this,
+ new TestOp::CallbackInfo(0));
+ comp = context->rados.aio_create_completion((void*) cb_arg, NULL,
+ &write_callback);
+ context->io_ctx.aio_operate(context->prefix+oid, comp, &op,
+ librados::OPERATION_ORDER_READS_WRITES);
+ }
+
+ void _finish(CallbackInfo *info) override
+ {
+ Mutex::Locker l(context->state_lock);
+
+ if (info->id == 0) {
+ ceph_assert(comp->is_complete());
+ cout << num << ": finishing set_chunk to oid " << oid << std::endl;
+ if ((r = comp->get_return_value())) {
+ if (r == -ENOENT && src_value.deleted()) {
+ cout << num << ": got expected ENOENT (src dne)" << std::endl;
+ } else if (r == -EOPNOTSUPP) {
+ bool is_overlapped = false;
+ for (auto &p : src_value.chunk_info) {
+ if ((p.first <= offset && p.first + p.second.length > offset) ||
+ (p.first > offset && p.first <= offset + length)) {
+ cout << " range is overlapped offset: " << offset << " length: " << length
+ << " chunk_info offset: " << p.second.offset << " length "
+ << p.second.length << std::endl;
+ is_overlapped = true;
+ context->update_object_version(oid, comp->get_version64());
+ }
+ }
+ if (!is_overlapped) {
+ cerr << "Error: oid " << oid << " set_chunk " << oid_tgt << " returned error code "
+ << r << " offset: " << offset << " length: " << length << std::endl;
+ ceph_abort();
+ }
+ } else {
+ cerr << "Error: oid " << oid << " set_chunk " << oid_tgt << " returned error code "
+ << r << std::endl;
+ ceph_abort();
+ }
+ } else {
+ ChunkDesc info;
+ info.offset = tgt_offset;
+ info.length = length;
+ info.oid = oid_tgt;
+ context->update_object_chunk_target(oid, offset, info);
+ context->update_object_version(oid, comp->get_version64());
+ }
+ }
+
+ if (++done == 1) {
+ context->oid_in_use.erase(oid);
+ context->oid_not_in_use.insert(oid);
+ context->kick();
+ }
+ }
+
+ bool finished() override
+ {
+ return done == 1;
+ }
+
+ string getType() override
+ {
+ return "SetChunkOp";
+ }
+};
+
+class SetRedirectOp : public TestOp {
+public:
+ string oid, oid_tgt, tgt_pool_name;
+ ObjectDesc src_value, tgt_value;
+ librados::ObjectWriteOperation op;
+ librados::ObjectReadOperation rd_op;
+ librados::AioCompletion *comp;
+ std::shared_ptr<int> in_use;
+ int done;
+ int r;
+ SetRedirectOp(int n,
+ RadosTestContext *context,
+ const string &oid,
+ const string &oid_tgt,
+ const string &tgt_pool_name,
+ TestOpStat *stat = 0)
+ : TestOp(n, context, stat),
+ oid(oid), oid_tgt(oid_tgt), tgt_pool_name(tgt_pool_name),
+ comp(NULL), done(0),
+ r(0)
+ {}
+
+ void _begin() override
+ {
+ Mutex::Locker l(context->state_lock);
+ context->oid_in_use.insert(oid);
+ context->oid_not_in_use.erase(oid);
+ context->oid_redirect_in_use.insert(oid_tgt);
+ context->oid_redirect_not_in_use.erase(oid_tgt);
+
+ if (tgt_pool_name.empty()) ceph_abort();
+
+ context->find_object(oid, &src_value);
+ if(!context->redirect_objs[oid].empty()) {
+ /* copy_from oid --> oid_tgt */
+ comp = context->rados.aio_create_completion();
+ string src = context->prefix+oid;
+ op.copy_from(src.c_str(), context->io_ctx, src_value.version, 0);
+ context->low_tier_io_ctx.aio_operate(context->prefix+oid_tgt, comp, &op,
+ librados::OPERATION_ORDER_READS_WRITES);
+ comp->wait_for_safe();
+ if ((r = comp->get_return_value())) {
+ cerr << "Error: oid " << oid << " copy_from " << oid_tgt << " returned error code "
+ << r << std::endl;
+ ceph_abort();
+ }
+ comp->release();
+
+ /* unset redirect target */
+ comp = context->rados.aio_create_completion();
+ bool present = !src_value.deleted();
+ context->remove_object(oid);
+ op.remove();
+ context->io_ctx.aio_operate(context->prefix+oid, comp, &op,
+ librados::OPERATION_ORDER_READS_WRITES |
+ librados::OPERATION_IGNORE_REDIRECT);
+ comp->wait_for_safe();
+ if ((r = comp->get_return_value())) {
+ if (!(r == -ENOENT && !present)) {
+ cerr << "r is " << r << " while deleting " << oid << " and present is " << present << std::endl;
+ ceph_abort();
+ }
+ }
+ comp->release();
+
+ context->oid_redirect_not_in_use.insert(context->redirect_objs[oid]);
+ context->oid_redirect_in_use.erase(context->redirect_objs[oid]);
+ }
+
+ comp = context->rados.aio_create_completion();
+ rd_op.stat(NULL, NULL, NULL);
+ context->io_ctx.aio_operate(context->prefix+oid, comp, &rd_op,
+ librados::OPERATION_ORDER_READS_WRITES |
+ librados::OPERATION_IGNORE_REDIRECT,
+ NULL);
+ comp->wait_for_safe();
+ if ((r = comp->get_return_value()) && !src_value.deleted()) {
+ cerr << "Error: oid " << oid << " stat returned error code "
+ << r << std::endl;
+ ceph_abort();
+ }
+ context->update_object_version(oid, comp->get_version64());
+ comp->release();
+
+ comp = context->rados.aio_create_completion();
+ rd_op.stat(NULL, NULL, NULL);
+ context->low_tier_io_ctx.aio_operate(context->prefix+oid_tgt, comp, &rd_op,
+ librados::OPERATION_ORDER_READS_WRITES |
+ librados::OPERATION_IGNORE_REDIRECT,
+ NULL);
+ comp->wait_for_safe();
+ if ((r = comp->get_return_value())) {
+ cerr << "Error: oid " << oid_tgt << " stat returned error code "
+ << r << std::endl;
+ ceph_abort();
+ }
+ uint64_t tgt_version = comp->get_version64();
+ comp->release();
+
+
+ context->find_object(oid, &src_value);
+
+ if (src_value.version != 0 && !src_value.deleted())
+ op.assert_version(src_value.version);
+ op.set_redirect(context->prefix+oid_tgt, context->low_tier_io_ctx, tgt_version);
+
+ pair<TestOp*, TestOp::CallbackInfo*> *cb_arg =
+ new pair<TestOp*, TestOp::CallbackInfo*>(this,
+ new TestOp::CallbackInfo(0));
+ comp = context->rados.aio_create_completion((void*) cb_arg, NULL,
+ &write_callback);
+ context->io_ctx.aio_operate(context->prefix+oid, comp, &op,
+ librados::OPERATION_ORDER_READS_WRITES);
+ }
+
+ void _finish(CallbackInfo *info) override
+ {
+ Mutex::Locker l(context->state_lock);
+
+ if (info->id == 0) {
+ ceph_assert(comp->is_complete());
+ cout << num << ": finishing set_redirect to oid " << oid << std::endl;
+ if ((r = comp->get_return_value())) {
+ if (r == -ENOENT && src_value.deleted()) {
+ cout << num << ": got expected ENOENT (src dne)" << std::endl;
+ } else {
+ cerr << "Error: oid " << oid << " set_redirect " << oid_tgt << " returned error code "
+ << r << std::endl;
+ ceph_abort();
+ }
+ } else {
+ context->update_object_redirect_target(oid, oid_tgt);
+ context->update_object_version(oid, comp->get_version64());
+ }
+ }
+
+ if (++done == 1) {
+ context->oid_in_use.erase(oid);
+ context->oid_not_in_use.insert(oid);
+ context->kick();
+ }
+ }
+
+ bool finished() override
+ {
+ return done == 1;
+ }
+
+ string getType() override
+ {
+ return "SetRedirectOp";
+ }
+};
+
+class UnsetRedirectOp : public TestOp {
+public:
+ string oid;
+ librados::ObjectWriteOperation op;
+ librados::AioCompletion *comp = nullptr;
+
+ UnsetRedirectOp(int n,
+ RadosTestContext *context,
+ const string &oid,
+ TestOpStat *stat = 0)
+ : TestOp(n, context, stat), oid(oid)
+ {}
+
+ void _begin() override
+ {
+ context->state_lock.Lock();
+ if (context->get_watch_context(oid)) {
+ context->kick();
+ context->state_lock.Unlock();
+ return;
+ }
+
+ ObjectDesc contents;
+ context->find_object(oid, &contents);
+ bool present = !contents.deleted();
+
+ context->oid_in_use.insert(oid);
+ context->oid_not_in_use.erase(oid);
+ context->seq_num++;
+
+ context->remove_object(oid);
+
+ context->state_lock.Unlock();
+
+ comp = context->rados.aio_create_completion();
+ op.remove();
+ context->io_ctx.aio_operate(context->prefix+oid, comp, &op,
+ librados::OPERATION_ORDER_READS_WRITES |
+ librados::OPERATION_IGNORE_REDIRECT);
+ comp->wait_for_safe();
+ int r = comp->get_return_value();
+ if (r && !(r == -ENOENT && !present)) {
+ cerr << "r is " << r << " while deleting " << oid << " and present is " << present << std::endl;
+ ceph_abort();
+ }
+
+ context->state_lock.Lock();
+ context->oid_in_use.erase(oid);
+ context->oid_not_in_use.insert(oid);
+ if(!context->redirect_objs[oid].empty()) {
+ context->oid_redirect_not_in_use.insert(context->redirect_objs[oid]);
+ context->oid_redirect_in_use.erase(context->redirect_objs[oid]);
+ context->update_object_redirect_target(oid, string());
+ }
+ context->kick();
+ context->state_lock.Unlock();
+ }
+
+ string getType() override
+ {
+ return "UnsetRedirectOp";
+ }
+};
+
+class TierPromoteOp : public TestOp {
+public:
+ librados::AioCompletion *completion;
+ librados::ObjectWriteOperation op;
+ string oid;
+ std::shared_ptr<int> in_use;
+
+ TierPromoteOp(int n,
+ RadosTestContext *context,
+ const string &oid,
+ TestOpStat *stat)
+ : TestOp(n, context, stat),
+ completion(NULL),
+ oid(oid)
+ {}
+
+ void _begin() override
+ {
+ context->state_lock.Lock();
+
+ context->oid_in_use.insert(oid);
+ context->oid_not_in_use.erase(oid);
+
+ pair<TestOp*, TestOp::CallbackInfo*> *cb_arg =
+ new pair<TestOp*, TestOp::CallbackInfo*>(this,
+ new TestOp::CallbackInfo(0));
+ completion = context->rados.aio_create_completion((void *) cb_arg, NULL,
+ &write_callback);
+ context->state_lock.Unlock();
+
+ op.tier_promote();
+ int r = context->io_ctx.aio_operate(context->prefix+oid, completion,
+ &op);
+ ceph_assert(!r);
+ }
+
+ void _finish(CallbackInfo *info) override
+ {
+ context->state_lock.Lock();
+ ceph_assert(!done);
+ ceph_assert(completion->is_complete());
+
+ ObjectDesc oid_value;
+ context->find_object(oid, &oid_value);
+ int r = completion->get_return_value();
+ cout << num << ": got " << cpp_strerror(r) << std::endl;
+ if (r == 0) {
+ // sucess
+ } else {
+ ceph_abort_msg("shouldn't happen");
+ }
+ context->update_object_version(oid, completion->get_version64());
+ context->find_object(oid, &oid_value);
+ context->oid_in_use.erase(oid);
+ context->oid_not_in_use.insert(oid);
+ context->kick();
+ done = true;
+ context->state_lock.Unlock();
+ }
+
+ bool finished() override
+ {
+ return done;
+ }
+
+ string getType() override
+ {
+ return "TierPromoteOp";
+ }
+};
+
+class HitSetListOp : public TestOp {
+ librados::AioCompletion *comp1, *comp2;
+ uint32_t hash;
+ std::list< std::pair<time_t, time_t> > ls;
+ bufferlist bl;
+
+public:
+ HitSetListOp(int n,
+ RadosTestContext *context,
+ uint32_t hash,
+ TestOpStat *stat = 0)
+ : TestOp(n, context, stat),
+ comp1(NULL), comp2(NULL),
+ hash(hash)
+ {}
+
+ void _begin() override
+ {
+ pair<TestOp*, TestOp::CallbackInfo*> *cb_arg =
+ new pair<TestOp*, TestOp::CallbackInfo*>(this,
+ new TestOp::CallbackInfo(0));
+ comp1 = context->rados.aio_create_completion((void*) cb_arg, NULL,
+ &write_callback);
+ int r = context->io_ctx.hit_set_list(hash, comp1, &ls);
+ ceph_assert(r == 0);
+ }
+
+ void _finish(CallbackInfo *info) override {
+ Mutex::Locker l(context->state_lock);
+ if (!comp2) {
+ if (ls.empty()) {
+ cerr << num << ": no hitsets" << std::endl;
+ done = true;
+ } else {
+ cerr << num << ": hitsets are " << ls << std::endl;
+ int r = rand() % ls.size();
+ std::list<pair<time_t,time_t> >::iterator p = ls.begin();
+ while (r--)
+ ++p;
+ pair<TestOp*, TestOp::CallbackInfo*> *cb_arg =
+ new pair<TestOp*, TestOp::CallbackInfo*>(this,
+ new TestOp::CallbackInfo(0));
+ comp2 = context->rados.aio_create_completion((void*) cb_arg, NULL,
+ &write_callback);
+ r = context->io_ctx.hit_set_get(hash, comp2, p->second, &bl);
+ ceph_assert(r == 0);
+ }
+ } else {
+ int r = comp2->get_return_value();
+ if (r == 0) {
+ HitSet hitset;
+ auto p = bl.cbegin();
+ decode(hitset, p);
+ cout << num << ": got hitset of type " << hitset.get_type_name()
+ << " size " << bl.length()
+ << std::endl;
+ } else {
+ // FIXME: we could verify that we did in fact race with a trim...
+ ceph_assert(r == -ENOENT);
+ }
+ done = true;
+ }
+
+ context->kick();
+ }
+
+ bool finished() override {
+ return done;
+ }
+
+ string getType() override {
+ return "HitSetListOp";
+ }
+};
+
+class UndirtyOp : public TestOp {
+public:
+ librados::AioCompletion *completion;
+ librados::ObjectWriteOperation op;
+ string oid;
+
+ UndirtyOp(int n,
+ RadosTestContext *context,
+ const string &oid,
+ TestOpStat *stat = 0)
+ : TestOp(n, context, stat),
+ completion(NULL),
+ oid(oid)
+ {}
+
+ void _begin() override
+ {
+ context->state_lock.Lock();
+ pair<TestOp*, TestOp::CallbackInfo*> *cb_arg =
+ new pair<TestOp*, TestOp::CallbackInfo*>(this,
+ new TestOp::CallbackInfo(0));
+ completion = context->rados.aio_create_completion((void *) cb_arg, NULL,
+ &write_callback);
+
+ context->oid_in_use.insert(oid);
+ context->oid_not_in_use.erase(oid);
+ context->update_object_undirty(oid);
+ context->state_lock.Unlock();
+
+ op.undirty();
+ int r = context->io_ctx.aio_operate(context->prefix+oid, completion,
+ &op, 0);
+ ceph_assert(!r);
+ }
+
+ void _finish(CallbackInfo *info) override
+ {
+ context->state_lock.Lock();
+ ceph_assert(!done);
+ ceph_assert(completion->is_complete());
+ context->oid_in_use.erase(oid);
+ context->oid_not_in_use.insert(oid);
+ context->update_object_version(oid, completion->get_version64());
+ context->kick();
+ done = true;
+ context->state_lock.Unlock();
+ }
+
+ bool finished() override
+ {
+ return done;
+ }
+
+ string getType() override
+ {
+ return "UndirtyOp";
+ }
+};
+
+class IsDirtyOp : public TestOp {
+public:
+ librados::AioCompletion *completion;
+ librados::ObjectReadOperation op;
+ string oid;
+ bool dirty;
+ ObjectDesc old_value;
+ int snap = 0;
+ std::shared_ptr<int> in_use;
+
+ IsDirtyOp(int n,
+ RadosTestContext *context,
+ const string &oid,
+ TestOpStat *stat = 0)
+ : TestOp(n, context, stat),
+ completion(NULL),
+ oid(oid),
+ dirty(false)
+ {}
+
+ void _begin() override
+ {
+ context->state_lock.Lock();
+
+ if (!(rand() % 4) && !context->snaps.empty()) {
+ snap = rand_choose(context->snaps)->first;
+ in_use = context->snaps_in_use.lookup_or_create(snap, snap);
+ } else {
+ snap = -1;
+ }
+ std::cout << num << ": is_dirty oid " << oid << " snap " << snap
+ << std::endl;
+
+ pair<TestOp*, TestOp::CallbackInfo*> *cb_arg =
+ new pair<TestOp*, TestOp::CallbackInfo*>(this,
+ new TestOp::CallbackInfo(0));
+ completion = context->rados.aio_create_completion((void *) cb_arg, NULL,
+ &write_callback);
+
+ context->oid_in_use.insert(oid);
+ context->oid_not_in_use.erase(oid);
+ context->state_lock.Unlock();
+
+ if (snap >= 0) {
+ context->io_ctx.snap_set_read(context->snaps[snap]);
+ }
+
+ op.is_dirty(&dirty, NULL);
+ int r = context->io_ctx.aio_operate(context->prefix+oid, completion,
+ &op, 0);
+ ceph_assert(!r);
+
+ if (snap >= 0) {
+ context->io_ctx.snap_set_read(0);
+ }
+ }
+
+ void _finish(CallbackInfo *info) override
+ {
+ context->state_lock.Lock();
+ ceph_assert(!done);
+ ceph_assert(completion->is_complete());
+ context->oid_in_use.erase(oid);
+ context->oid_not_in_use.insert(oid);
+
+ ceph_assert(context->find_object(oid, &old_value, snap));
+
+ int r = completion->get_return_value();
+ if (r == 0) {
+ cout << num << ": " << (dirty ? "dirty" : "clean") << std::endl;
+ ceph_assert(!old_value.deleted());
+ ceph_assert(dirty == old_value.dirty);
+ } else {
+ cout << num << ": got " << r << std::endl;
+ ceph_assert(r == -ENOENT);
+ ceph_assert(old_value.deleted());
+ }
+ context->kick();
+ done = true;
+ context->state_lock.Unlock();
+ }
+
+ bool finished() override
+ {
+ return done;
+ }
+
+ string getType() override
+ {
+ return "IsDirtyOp";
+ }
+};
+
+
+
+class CacheFlushOp : public TestOp {
+public:
+ librados::AioCompletion *completion;
+ librados::ObjectReadOperation op;
+ string oid;
+ bool blocking;
+ int snap;
+ bool can_fail;
+ std::shared_ptr<int> in_use;
+
+ CacheFlushOp(int n,
+ RadosTestContext *context,
+ const string &oid,
+ TestOpStat *stat,
+ bool b)
+ : TestOp(n, context, stat),
+ completion(NULL),
+ oid(oid),
+ blocking(b),
+ snap(0),
+ can_fail(false)
+ {}
+
+ void _begin() override
+ {
+ context->state_lock.Lock();
+
+ if (!(rand() % 4) && !context->snaps.empty()) {
+ snap = rand_choose(context->snaps)->first;
+ in_use = context->snaps_in_use.lookup_or_create(snap, snap);
+ } else {
+ snap = -1;
+ }
+ // not being particularly specific here about knowing which
+ // flushes are on the oldest clean snap and which ones are not.
+ can_fail = !blocking || !context->snaps.empty();
+ // FIXME: we could fail if we've ever removed a snap due to
+ // the async snap trimming.
+ can_fail = true;
+ cout << num << ": " << (blocking ? "cache_flush" : "cache_try_flush")
+ << " oid " << oid << " snap " << snap << std::endl;
+
+ if (snap >= 0) {
+ context->io_ctx.snap_set_read(context->snaps[snap]);
+ }
+
+ pair<TestOp*, TestOp::CallbackInfo*> *cb_arg =
+ new pair<TestOp*, TestOp::CallbackInfo*>(this,
+ new TestOp::CallbackInfo(0));
+ completion = context->rados.aio_create_completion((void *) cb_arg, NULL,
+ &write_callback);
+ context->oid_flushing.insert(oid);
+ context->oid_not_flushing.erase(oid);
+ context->state_lock.Unlock();
+
+ unsigned flags = librados::OPERATION_IGNORE_CACHE;
+ if (blocking) {
+ op.cache_flush();
+ } else {
+ op.cache_try_flush();
+ flags = librados::OPERATION_SKIPRWLOCKS;
+ }
+ int r = context->io_ctx.aio_operate(context->prefix+oid, completion,
+ &op, flags, NULL);
+ ceph_assert(!r);
+
+ if (snap >= 0) {
+ context->io_ctx.snap_set_read(0);
+ }
+ }
+
+ void _finish(CallbackInfo *info) override
+ {
+ context->state_lock.Lock();
+ ceph_assert(!done);
+ ceph_assert(completion->is_complete());
+ context->oid_flushing.erase(oid);
+ context->oid_not_flushing.insert(oid);
+ int r = completion->get_return_value();
+ cout << num << ": got " << cpp_strerror(r) << std::endl;
+ if (r == 0) {
+ context->update_object_version(oid, 0, snap);
+ } else if (r == -EBUSY) {
+ ceph_assert(can_fail);
+ } else if (r == -EINVAL) {
+ // caching not enabled?
+ } else if (r == -ENOENT) {
+ // may have raced with a remove?
+ } else {
+ ceph_abort_msg("shouldn't happen");
+ }
+ context->kick();
+ done = true;
+ context->state_lock.Unlock();
+ }
+
+ bool finished() override
+ {
+ return done;
+ }
+
+ string getType() override
+ {
+ return "CacheFlushOp";
+ }
+};
+
+class CacheEvictOp : public TestOp {
+public:
+ librados::AioCompletion *completion;
+ librados::ObjectReadOperation op;
+ string oid;
+ std::shared_ptr<int> in_use;
+
+ CacheEvictOp(int n,
+ RadosTestContext *context,
+ const string &oid,
+ TestOpStat *stat)
+ : TestOp(n, context, stat),
+ completion(NULL),
+ oid(oid)
+ {}
+
+ void _begin() override
+ {
+ context->state_lock.Lock();
+
+ int snap;
+ if (!(rand() % 4) && !context->snaps.empty()) {
+ snap = rand_choose(context->snaps)->first;
+ in_use = context->snaps_in_use.lookup_or_create(snap, snap);
+ } else {
+ snap = -1;
+ }
+ cout << num << ": cache_evict oid " << oid << " snap " << snap << std::endl;
+
+ if (snap >= 0) {
+ context->io_ctx.snap_set_read(context->snaps[snap]);
+ }
+
+ pair<TestOp*, TestOp::CallbackInfo*> *cb_arg =
+ new pair<TestOp*, TestOp::CallbackInfo*>(this,
+ new TestOp::CallbackInfo(0));
+ completion = context->rados.aio_create_completion((void *) cb_arg, NULL,
+ &write_callback);
+ context->state_lock.Unlock();
+
+ op.cache_evict();
+ int r = context->io_ctx.aio_operate(context->prefix+oid, completion,
+ &op, librados::OPERATION_IGNORE_CACHE,
+ NULL);
+ ceph_assert(!r);
+
+ if (snap >= 0) {
+ context->io_ctx.snap_set_read(0);
+ }
+ }
+
+ void _finish(CallbackInfo *info) override
+ {
+ context->state_lock.Lock();
+ ceph_assert(!done);
+ ceph_assert(completion->is_complete());
+
+ int r = completion->get_return_value();
+ cout << num << ": got " << cpp_strerror(r) << std::endl;
+ if (r == 0) {
+ // yay!
+ } else if (r == -EBUSY) {
+ // raced with something that dirtied the object
+ } else if (r == -EINVAL) {
+ // caching not enabled?
+ } else if (r == -ENOENT) {
+ // may have raced with a remove?
+ } else {
+ ceph_abort_msg("shouldn't happen");
+ }
+ context->kick();
+ done = true;
+ context->state_lock.Unlock();
+ }
+
+ bool finished() override
+ {
+ return done;
+ }
+
+ string getType() override
+ {
+ return "CacheEvictOp";
+ }
+};
+
+
+#endif
diff --git a/src/test/osd/TestECBackend.cc b/src/test/osd/TestECBackend.cc
new file mode 100644
index 00000000..affff369
--- /dev/null
+++ b/src/test/osd/TestECBackend.cc
@@ -0,0 +1,60 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 Inktank Storage, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <iostream>
+#include <sstream>
+#include <errno.h>
+#include <signal.h>
+#include "osd/ECBackend.h"
+#include "gtest/gtest.h"
+
+TEST(ECUtil, stripe_info_t)
+{
+ const uint64_t swidth = 4096;
+ const uint64_t ssize = 4;
+
+ ECUtil::stripe_info_t s(ssize, swidth);
+ ASSERT_EQ(s.get_stripe_width(), swidth);
+
+ ASSERT_EQ(s.logical_to_next_chunk_offset(0), 0u);
+ ASSERT_EQ(s.logical_to_next_chunk_offset(1), s.get_chunk_size());
+ ASSERT_EQ(s.logical_to_next_chunk_offset(swidth - 1),
+ s.get_chunk_size());
+
+ ASSERT_EQ(s.logical_to_prev_chunk_offset(0), 0u);
+ ASSERT_EQ(s.logical_to_prev_chunk_offset(swidth), s.get_chunk_size());
+ ASSERT_EQ(s.logical_to_prev_chunk_offset((swidth * 2) - 1),
+ s.get_chunk_size());
+
+ ASSERT_EQ(s.logical_to_next_stripe_offset(0), 0u);
+ ASSERT_EQ(s.logical_to_next_stripe_offset(swidth - 1),
+ s.get_stripe_width());
+
+ ASSERT_EQ(s.logical_to_prev_stripe_offset(swidth), s.get_stripe_width());
+ ASSERT_EQ(s.logical_to_prev_stripe_offset(swidth), s.get_stripe_width());
+ ASSERT_EQ(s.logical_to_prev_stripe_offset((swidth * 2) - 1),
+ s.get_stripe_width());
+
+ ASSERT_EQ(s.aligned_logical_offset_to_chunk_offset(2*swidth),
+ 2*s.get_chunk_size());
+ ASSERT_EQ(s.aligned_chunk_offset_to_logical_offset(2*s.get_chunk_size()),
+ 2*s.get_stripe_width());
+
+ ASSERT_EQ(s.aligned_offset_len_to_chunk(make_pair(swidth, 10*swidth)),
+ make_pair(s.get_chunk_size(), 10*s.get_chunk_size()));
+
+ ASSERT_EQ(s.offset_len_to_stripe_bounds(make_pair(swidth-10, (uint64_t)20)),
+ make_pair((uint64_t)0, 2*swidth));
+}
+
diff --git a/src/test/osd/TestMClockClientQueue.cc b/src/test/osd/TestMClockClientQueue.cc
new file mode 100644
index 00000000..70e054c7
--- /dev/null
+++ b/src/test/osd/TestMClockClientQueue.cc
@@ -0,0 +1,184 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+
+#include <iostream>
+
+#include "gtest/gtest.h"
+#include "global/global_init.h"
+#include "common/common_init.h"
+
+#include "osd/mClockClientQueue.h"
+
+
+int main(int argc, char **argv) {
+ std::vector<const char*> args(argv, argv+argc);
+ auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_OSD,
+ CODE_ENVIRONMENT_UTILITY,
+ CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
+ common_init_finish(g_ceph_context);
+
+ ::testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
+
+
+class MClockClientQueueTest : public testing::Test {
+public:
+ mClockClientQueue q;
+
+ uint64_t client1;
+ uint64_t client2;
+ uint64_t client3;
+
+ MClockClientQueueTest() :
+ q(g_ceph_context),
+ client1(1001),
+ client2(9999),
+ client3(100000001)
+ {}
+
+#if 0 // more work needed here
+ Request create_client_op(epoch_t e, uint64_t owner) {
+ return Request(spg_t(), OpQueueItem(OpRequestRef(), e));
+ }
+#endif
+
+ Request create_snaptrim(epoch_t e, uint64_t owner) {
+ return Request(OpQueueItem(unique_ptr<OpQueueItem::OpQueueable>(new PGSnapTrim(spg_t(), e)),
+ 12, 12,
+ utime_t(), owner, e));
+ }
+
+ Request create_scrub(epoch_t e, uint64_t owner) {
+ return Request(OpQueueItem(unique_ptr<OpQueueItem::OpQueueable>(new PGScrub(spg_t(), e)),
+ 12, 12,
+ utime_t(), owner, e));
+ }
+
+ Request create_recovery(epoch_t e, uint64_t owner) {
+ return Request(OpQueueItem(unique_ptr<OpQueueItem::OpQueueable>(new PGRecovery(spg_t(), e, 64)),
+ 12, 12,
+ utime_t(), owner, e));
+ }
+};
+
+
+TEST_F(MClockClientQueueTest, TestSize) {
+ ASSERT_TRUE(q.empty());
+ ASSERT_EQ(0u, q.get_size_slow());
+
+ q.enqueue(client1, 12, 1u, create_snaptrim(100, client1));
+ q.enqueue_strict(client2, 12, create_snaptrim(101, client2));
+ q.enqueue(client2, 12, 1u, create_snaptrim(102, client2));
+ q.enqueue_strict(client3, 12, create_snaptrim(103, client3));
+ q.enqueue(client1, 12, 1u, create_snaptrim(104, client1));
+
+ ASSERT_FALSE(q.empty());
+ ASSERT_EQ(5u, q.get_size_slow());
+
+ std::list<Request> reqs;
+
+ reqs.push_back(q.dequeue());
+ reqs.push_back(q.dequeue());
+ reqs.push_back(q.dequeue());
+
+ ASSERT_FALSE(q.empty());
+ ASSERT_EQ(2u, q.get_size_slow());
+
+ q.enqueue_front(client2, 12, 1u, std::move(reqs.back()));
+ reqs.pop_back();
+
+ q.enqueue_strict_front(client3, 12, std::move(reqs.back()));
+ reqs.pop_back();
+
+ q.enqueue_strict_front(client2, 12, std::move(reqs.back()));
+ reqs.pop_back();
+
+ ASSERT_FALSE(q.empty());
+ ASSERT_EQ(5u, q.get_size_slow());
+
+ for (int i = 0; i < 5; ++i) {
+ (void) q.dequeue();
+ }
+
+ ASSERT_TRUE(q.empty());
+ ASSERT_EQ(0u, q.get_size_slow());
+}
+
+
+TEST_F(MClockClientQueueTest, TestEnqueue) {
+ q.enqueue(client1, 12, 1u, create_snaptrim(100, client1));
+ q.enqueue(client2, 12, 1u, create_snaptrim(101, client2));
+ q.enqueue(client2, 12, 1u, create_snaptrim(102, client2));
+ q.enqueue(client3, 12, 1u, create_snaptrim(103, client3));
+ q.enqueue(client1, 12, 1u, create_snaptrim(104, client1));
+
+ Request r = q.dequeue();
+ ASSERT_EQ(100u, r.get_map_epoch());
+
+ r = q.dequeue();
+ ASSERT_EQ(101u, r.get_map_epoch());
+
+ r = q.dequeue();
+ ASSERT_EQ(103u, r.get_map_epoch());
+
+ r = q.dequeue();
+ ASSERT_TRUE(r.get_map_epoch() == 102u ||
+ r.get_map_epoch() == 104u);
+
+ r = q.dequeue();
+ ASSERT_TRUE(r.get_map_epoch() == 102u ||
+ r.get_map_epoch() == 104u);
+}
+
+
+TEST_F(MClockClientQueueTest, TestEnqueueStrict) {
+ q.enqueue_strict(client1, 12, create_snaptrim(100, client1));
+ q.enqueue_strict(client2, 13, create_snaptrim(101, client2));
+ q.enqueue_strict(client2, 16, create_snaptrim(102, client2));
+ q.enqueue_strict(client3, 14, create_snaptrim(103, client3));
+ q.enqueue_strict(client1, 15, create_snaptrim(104, client1));
+
+ Request r = q.dequeue();
+ ASSERT_EQ(102u, r.get_map_epoch());
+
+ r = q.dequeue();
+ ASSERT_EQ(104u, r.get_map_epoch());
+
+ r = q.dequeue();
+ ASSERT_EQ(103u, r.get_map_epoch());
+
+ r = q.dequeue();
+ ASSERT_EQ(101u, r.get_map_epoch());
+
+ r = q.dequeue();
+ ASSERT_EQ(100u, r.get_map_epoch());
+}
+
+
+TEST_F(MClockClientQueueTest, TestRemoveByClass) {
+ q.enqueue(client1, 12, 1u, create_snaptrim(100, client1));
+ q.enqueue_strict(client2, 12, create_snaptrim(101, client2));
+ q.enqueue(client2, 12, 1u, create_snaptrim(102, client2));
+ q.enqueue_strict(client3, 12, create_snaptrim(103, client3));
+ q.enqueue(client1, 12, 1u, create_snaptrim(104, client1));
+
+ std::list<Request> filtered_out;
+ q.remove_by_class(client2, &filtered_out);
+
+ ASSERT_EQ(2u, filtered_out.size());
+ while (!filtered_out.empty()) {
+ auto e = filtered_out.front().get_map_epoch() ;
+ ASSERT_TRUE(e == 101 || e == 102);
+ filtered_out.pop_front();
+ }
+
+ ASSERT_EQ(3u, q.get_size_slow());
+ Request r = q.dequeue();
+ ASSERT_EQ(103u, r.get_map_epoch());
+
+ r = q.dequeue();
+ ASSERT_EQ(100u, r.get_map_epoch());
+
+ r = q.dequeue();
+ ASSERT_EQ(104u, r.get_map_epoch());
+}
diff --git a/src/test/osd/TestMClockOpClassQueue.cc b/src/test/osd/TestMClockOpClassQueue.cc
new file mode 100644
index 00000000..0f6b564a
--- /dev/null
+++ b/src/test/osd/TestMClockOpClassQueue.cc
@@ -0,0 +1,184 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+
+#include <iostream>
+
+#include "gtest/gtest.h"
+
+#include "global/global_context.h"
+#include "global/global_init.h"
+#include "common/common_init.h"
+
+#include "osd/mClockOpClassQueue.h"
+
+
+int main(int argc, char **argv) {
+ std::vector<const char*> args(argv, argv+argc);
+ auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_OSD,
+ CODE_ENVIRONMENT_UTILITY,
+ CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
+ common_init_finish(g_ceph_context);
+
+ ::testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
+
+
+class MClockOpClassQueueTest : public testing::Test {
+public:
+ mClockOpClassQueue q;
+
+ uint64_t client1;
+ uint64_t client2;
+ uint64_t client3;
+
+ MClockOpClassQueueTest() :
+ q(g_ceph_context),
+ client1(1001),
+ client2(9999),
+ client3(100000001)
+ {}
+
+#if 0 // more work needed here
+ Request create_client_op(epoch_t e, uint64_t owner) {
+ return Request(spg_t(), OpQueueItem(OpRequestRef(), e));
+ }
+#endif
+
+ Request create_snaptrim(epoch_t e, uint64_t owner) {
+ return Request(OpQueueItem(unique_ptr<OpQueueItem::OpQueueable>(new PGSnapTrim(spg_t(), e)),
+ 12, 12,
+ utime_t(), owner, e));
+ }
+
+ Request create_scrub(epoch_t e, uint64_t owner) {
+ return Request(OpQueueItem(unique_ptr<OpQueueItem::OpQueueable>(new PGScrub(spg_t(), e)),
+ 12, 12,
+ utime_t(), owner, e));
+ }
+
+ Request create_recovery(epoch_t e, uint64_t owner) {
+ return Request(OpQueueItem(unique_ptr<OpQueueItem::OpQueueable>(new PGRecovery(spg_t(), e, 64)),
+ 12, 12,
+ utime_t(), owner, e));
+ }
+};
+
+
+TEST_F(MClockOpClassQueueTest, TestSize) {
+ ASSERT_TRUE(q.empty());
+ ASSERT_EQ(0u, q.get_size_slow());
+
+ q.enqueue(client1, 12, 1, create_snaptrim(100, client1));
+ q.enqueue_strict(client2, 12, create_snaptrim(101, client2));
+ q.enqueue(client2, 12, 1, create_snaptrim(102, client2));
+ q.enqueue_strict(client3, 12, create_snaptrim(103, client3));
+ q.enqueue(client1, 12, 1, create_snaptrim(104, client1));
+
+ ASSERT_FALSE(q.empty());
+ ASSERT_EQ(5u, q.get_size_slow());
+
+ std::list<Request> reqs;
+
+ reqs.push_back(q.dequeue());
+ reqs.push_back(q.dequeue());
+ reqs.push_back(q.dequeue());
+
+ ASSERT_FALSE(q.empty());
+ ASSERT_EQ(2u, q.get_size_slow());
+
+ q.enqueue_front(client2, 12, 1, std::move(reqs.back()));
+ reqs.pop_back();
+
+ q.enqueue_strict_front(client3, 12, std::move(reqs.back()));
+ reqs.pop_back();
+
+ q.enqueue_strict_front(client2, 12, std::move(reqs.back()));
+ reqs.pop_back();
+
+ ASSERT_FALSE(q.empty());
+ ASSERT_EQ(5u, q.get_size_slow());
+
+ for (int i = 0; i < 5; ++i) {
+ (void) q.dequeue();
+ }
+
+ ASSERT_TRUE(q.empty());
+ ASSERT_EQ(0u, q.get_size_slow());
+}
+
+
+TEST_F(MClockOpClassQueueTest, TestEnqueue) {
+ q.enqueue(client1, 12, 1, create_snaptrim(100, client1));
+ q.enqueue(client2, 12, 1, create_snaptrim(101, client2));
+ q.enqueue(client2, 12, 1, create_snaptrim(102, client2));
+ q.enqueue(client3, 12, 1, create_snaptrim(103, client3));
+ q.enqueue(client1, 12, 1, create_snaptrim(104, client1));
+
+ Request r = q.dequeue();
+ ASSERT_EQ(100u, r.get_map_epoch());
+
+ r = q.dequeue();
+ ASSERT_EQ(101u, r.get_map_epoch());
+
+ r = q.dequeue();
+ ASSERT_EQ(102u, r.get_map_epoch());
+
+ r = q.dequeue();
+ ASSERT_EQ(103u, r.get_map_epoch());
+
+ r = q.dequeue();
+ ASSERT_EQ(104u, r.get_map_epoch());
+}
+
+
+TEST_F(MClockOpClassQueueTest, TestEnqueueStrict) {
+ q.enqueue_strict(client1, 12, create_snaptrim(100, client1));
+ q.enqueue_strict(client2, 13, create_snaptrim(101, client2));
+ q.enqueue_strict(client2, 16, create_snaptrim(102, client2));
+ q.enqueue_strict(client3, 14, create_snaptrim(103, client3));
+ q.enqueue_strict(client1, 15, create_snaptrim(104, client1));
+
+ Request r = q.dequeue();
+ ASSERT_EQ(102u, r.get_map_epoch());
+
+ r = q.dequeue();
+ ASSERT_EQ(104u, r.get_map_epoch());
+
+ r = q.dequeue();
+ ASSERT_EQ(103u, r.get_map_epoch());
+
+ r = q.dequeue();
+ ASSERT_EQ(101u, r.get_map_epoch());
+
+ r = q.dequeue();
+ ASSERT_EQ(100u, r.get_map_epoch());
+}
+
+
+TEST_F(MClockOpClassQueueTest, TestRemoveByClass) {
+ q.enqueue(client1, 12, 1, create_snaptrim(100, client1));
+ q.enqueue_strict(client2, 12, create_snaptrim(101, client2));
+ q.enqueue(client2, 12, 1, create_snaptrim(102, client2));
+ q.enqueue_strict(client3, 12, create_snaptrim(103, client3));
+ q.enqueue(client1, 12, 1, create_snaptrim(104, client1));
+
+ std::list<Request> filtered_out;
+ q.remove_by_class(client2, &filtered_out);
+
+ ASSERT_EQ(2u, filtered_out.size());
+ while (!filtered_out.empty()) {
+ auto e = filtered_out.front().get_map_epoch() ;
+ ASSERT_TRUE(e == 101 || e == 102);
+ filtered_out.pop_front();
+ }
+
+ ASSERT_EQ(3u, q.get_size_slow());
+ Request r = q.dequeue();
+ ASSERT_EQ(103u, r.get_map_epoch());
+
+ r = q.dequeue();
+ ASSERT_EQ(100u, r.get_map_epoch());
+
+ r = q.dequeue();
+ ASSERT_EQ(104u, r.get_map_epoch());
+}
diff --git a/src/test/osd/TestOSDMap.cc b/src/test/osd/TestOSDMap.cc
new file mode 100644
index 00000000..d76341eb
--- /dev/null
+++ b/src/test/osd/TestOSDMap.cc
@@ -0,0 +1,1575 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+#include "gtest/gtest.h"
+#include "osd/OSDMap.h"
+#include "osd/OSDMapMapping.h"
+#include "mon/OSDMonitor.h"
+#include "mon/PGMap.h"
+
+#include "global/global_context.h"
+#include "global/global_init.h"
+#include "common/common_init.h"
+#include "common/ceph_argparse.h"
+#include "common/ceph_json.h"
+
+#include <iostream>
+
+using namespace std;
+
+int main(int argc, char **argv) {
+ map<string,string> defaults = {
+ // make sure we have 3 copies, or some tests won't work
+ { "osd_pool_default_size", "3" },
+ // our map is flat, so just try and split across OSDs, not hosts or whatever
+ { "osd_crush_chooseleaf_type", "0" },
+ };
+ std::vector<const char*> args(argv, argv+argc);
+ auto cct = global_init(&defaults, args, CEPH_ENTITY_TYPE_CLIENT,
+ CODE_ENVIRONMENT_UTILITY,
+ CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
+ common_init_finish(g_ceph_context);
+ ::testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
+
+class OSDMapTest : public testing::Test {
+ int num_osds = 6;
+public:
+ OSDMap osdmap;
+ OSDMapMapping mapping;
+ const uint64_t my_ec_pool = 1;
+ const uint64_t my_rep_pool = 2;
+
+
+ OSDMapTest() {}
+
+ void set_up_map(int new_num_osds = 6, bool no_default_pools = false) {
+ num_osds = new_num_osds;
+ uuid_d fsid;
+ osdmap.build_simple(g_ceph_context, 0, fsid, num_osds);
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ pending_inc.fsid = osdmap.get_fsid();
+ entity_addrvec_t sample_addrs;
+ sample_addrs.v.push_back(entity_addr_t());
+ uuid_d sample_uuid;
+ for (int i = 0; i < num_osds; ++i) {
+ sample_uuid.generate_random();
+ sample_addrs.v[0].nonce = i;
+ pending_inc.new_state[i] = CEPH_OSD_EXISTS | CEPH_OSD_NEW;
+ pending_inc.new_up_client[i] = sample_addrs;
+ pending_inc.new_up_cluster[i] = sample_addrs;
+ pending_inc.new_hb_back_up[i] = sample_addrs;
+ pending_inc.new_hb_front_up[i] = sample_addrs;
+ pending_inc.new_weight[i] = CEPH_OSD_IN;
+ pending_inc.new_uuid[i] = sample_uuid;
+ }
+ osdmap.apply_incremental(pending_inc);
+ if (no_default_pools) // do not create any default pool(s)
+ return;
+
+ // Create an EC ruleset and a pool using it
+ int r = osdmap.crush->add_simple_rule(
+ "erasure", "default", "osd", "",
+ "indep", pg_pool_t::TYPE_ERASURE,
+ &cerr);
+
+ OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1);
+ new_pool_inc.new_pool_max = osdmap.get_pool_max();
+ new_pool_inc.fsid = osdmap.get_fsid();
+ pg_pool_t empty;
+ // make an ec pool
+ uint64_t pool_id = ++new_pool_inc.new_pool_max;
+ ceph_assert(pool_id == my_ec_pool);
+ pg_pool_t *p = new_pool_inc.get_new_pool(pool_id, &empty);
+ p->size = 3;
+ p->set_pg_num(64);
+ p->set_pgp_num(64);
+ p->type = pg_pool_t::TYPE_ERASURE;
+ p->crush_rule = r;
+ new_pool_inc.new_pool_names[pool_id] = "ec";
+ // and a replicated pool
+ pool_id = ++new_pool_inc.new_pool_max;
+ ceph_assert(pool_id == my_rep_pool);
+ p = new_pool_inc.get_new_pool(pool_id, &empty);
+ p->size = 3;
+ p->set_pg_num(64);
+ p->set_pgp_num(64);
+ p->type = pg_pool_t::TYPE_REPLICATED;
+ p->crush_rule = 0;
+ p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
+ new_pool_inc.new_pool_names[pool_id] = "reppool";
+ osdmap.apply_incremental(new_pool_inc);
+ }
+ unsigned int get_num_osds() { return num_osds; }
+ void get_crush(const OSDMap& tmap, CrushWrapper& newcrush) {
+ bufferlist bl;
+ tmap.crush->encode(bl, CEPH_FEATURES_SUPPORTED_DEFAULT);
+ auto p = bl.cbegin();
+ newcrush.decode(p);
+ }
+ int crush_move(OSDMap& tmap, const string &name, const vector<string> &argvec) {
+ map<string,string> loc;
+ CrushWrapper::parse_loc_map(argvec, &loc);
+ CrushWrapper newcrush;
+ get_crush(tmap, newcrush);
+ if (!newcrush.name_exists(name)) {
+ return -ENOENT;
+ }
+ int id = newcrush.get_item_id(name);
+ int err;
+ if (!newcrush.check_item_loc(g_ceph_context, id, loc, (int *)NULL)) {
+ if (id >= 0) {
+ err = newcrush.create_or_move_item(g_ceph_context, id, 0, name, loc);
+ } else {
+ err = newcrush.move_bucket(g_ceph_context, id, loc);
+ }
+ if (err >= 0) {
+ OSDMap::Incremental pending_inc(tmap.get_epoch() + 1);
+ pending_inc.crush.clear();
+ newcrush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
+ tmap.apply_incremental(pending_inc);
+ err = 0;
+ }
+ } else {
+ // already there
+ err = 0;
+ }
+ return err;
+ }
+ int crush_rule_create_replicated(const string &name,
+ const string &root,
+ const string &type) {
+ if (osdmap.crush->rule_exists(name)) {
+ return osdmap.crush->get_rule_id(name);
+ }
+ CrushWrapper newcrush;
+ get_crush(osdmap, newcrush);
+ string device_class;
+ stringstream ss;
+ int ruleno = newcrush.add_simple_rule(
+ name, root, type, device_class,
+ "firstn", pg_pool_t::TYPE_REPLICATED, &ss);
+ if (ruleno >= 0) {
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ pending_inc.crush.clear();
+ newcrush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
+ osdmap.apply_incremental(pending_inc);
+ }
+ return ruleno;
+ }
+ void test_mappings(int pool,
+ int num,
+ vector<int> *any,
+ vector<int> *first,
+ vector<int> *primary) {
+ mapping.update(osdmap);
+ for (int i=0; i<num; ++i) {
+ vector<int> up, acting;
+ int up_primary, acting_primary;
+ pg_t pgid(i, pool);
+ osdmap.pg_to_up_acting_osds(pgid,
+ &up, &up_primary, &acting, &acting_primary);
+ for (unsigned j=0; j<acting.size(); ++j)
+ (*any)[acting[j]]++;
+ if (!acting.empty())
+ (*first)[acting[0]]++;
+ if (acting_primary >= 0)
+ (*primary)[acting_primary]++;
+
+ // compare to precalc mapping
+ vector<int> up2, acting2;
+ int up_primary2, acting_primary2;
+ pgid = osdmap.raw_pg_to_pg(pgid);
+ mapping.get(pgid, &up2, &up_primary2, &acting2, &acting_primary2);
+ ASSERT_EQ(up, up2);
+ ASSERT_EQ(up_primary, up_primary2);
+ ASSERT_EQ(acting, acting2);
+ ASSERT_EQ(acting_primary, acting_primary2);
+ }
+ cout << "any: " << *any << std::endl;;
+ cout << "first: " << *first << std::endl;;
+ cout << "primary: " << *primary << std::endl;;
+ }
+ void clean_pg_upmaps(CephContext *cct,
+ const OSDMap& om,
+ OSDMap::Incremental& pending_inc) {
+ int cpu_num = 8;
+ int pgs_per_chunk = 256;
+ ThreadPool tp(cct, "BUG_40104::clean_upmap_tp", "clean_upmap_tp", cpu_num);
+ tp.start();
+ ParallelPGMapper mapper(cct, &tp);
+ vector<pg_t> pgs_to_check;
+ om.get_upmap_pgs(&pgs_to_check);
+ OSDMonitor::CleanUpmapJob job(cct, om, pending_inc);
+ mapper.queue(&job, pgs_per_chunk, pgs_to_check);
+ job.wait();
+ tp.stop();
+ }
+};
+
+TEST_F(OSDMapTest, Create) {
+ set_up_map();
+ ASSERT_EQ(get_num_osds(), (unsigned)osdmap.get_max_osd());
+ ASSERT_EQ(get_num_osds(), osdmap.get_num_in_osds());
+}
+
+TEST_F(OSDMapTest, Features) {
+ // with EC pool
+ set_up_map();
+ uint64_t features = osdmap.get_features(CEPH_ENTITY_TYPE_OSD, NULL);
+ ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES);
+ ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2);
+ ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3);
+ ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_V2);
+ ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL);
+ ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY);
+
+ // clients have a slightly different view
+ features = osdmap.get_features(CEPH_ENTITY_TYPE_CLIENT, NULL);
+ ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES);
+ ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2);
+ ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3);
+ ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_V2);
+ ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL);
+ ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY);
+
+ // remove teh EC pool, but leave the rule. add primary affinity.
+ {
+ OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1);
+ new_pool_inc.old_pools.insert(osdmap.lookup_pg_pool_name("ec"));
+ new_pool_inc.new_primary_affinity[0] = 0x8000;
+ osdmap.apply_incremental(new_pool_inc);
+ }
+
+ features = osdmap.get_features(CEPH_ENTITY_TYPE_MON, NULL);
+ ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES);
+ ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2);
+ ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3); // shared bit with primary affinity
+ ASSERT_FALSE(features & CEPH_FEATURE_CRUSH_V2);
+ ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL);
+ ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY);
+
+ // FIXME: test tiering feature bits
+}
+
+TEST_F(OSDMapTest, MapPG) {
+ set_up_map();
+
+ std::cerr << " osdmap.pool_max==" << osdmap.get_pool_max() << std::endl;
+ pg_t rawpg(0, my_rep_pool);
+ pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
+ vector<int> up_osds, acting_osds;
+ int up_primary, acting_primary;
+
+ osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
+ &acting_osds, &acting_primary);
+
+ vector<int> old_up_osds, old_acting_osds;
+ osdmap.pg_to_up_acting_osds(pgid, old_up_osds, old_acting_osds);
+ ASSERT_EQ(old_up_osds, up_osds);
+ ASSERT_EQ(old_acting_osds, acting_osds);
+
+ ASSERT_EQ(osdmap.get_pg_pool(my_rep_pool)->get_size(), up_osds.size());
+}
+
+TEST_F(OSDMapTest, MapFunctionsMatch) {
+ // TODO: make sure pg_to_up_acting_osds and pg_to_acting_osds match
+ set_up_map();
+ pg_t rawpg(0, my_rep_pool);
+ pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
+ vector<int> up_osds, acting_osds;
+ int up_primary, acting_primary;
+
+ osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
+ &acting_osds, &acting_primary);
+
+ vector<int> up_osds_two, acting_osds_two;
+
+ osdmap.pg_to_up_acting_osds(pgid, up_osds_two, acting_osds_two);
+
+ ASSERT_EQ(up_osds, up_osds_two);
+ ASSERT_EQ(acting_osds, acting_osds_two);
+
+ int acting_primary_two;
+ osdmap.pg_to_acting_osds(pgid, &acting_osds_two, &acting_primary_two);
+ EXPECT_EQ(acting_osds, acting_osds_two);
+ EXPECT_EQ(acting_primary, acting_primary_two);
+ osdmap.pg_to_acting_osds(pgid, acting_osds_two);
+ EXPECT_EQ(acting_osds, acting_osds_two);
+}
+
+/** This test must be removed or modified appropriately when we allow
+ * other ways to specify a primary. */
+TEST_F(OSDMapTest, PrimaryIsFirst) {
+ set_up_map();
+
+ pg_t rawpg(0, my_rep_pool);
+ pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
+ vector<int> up_osds, acting_osds;
+ int up_primary, acting_primary;
+
+ osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
+ &acting_osds, &acting_primary);
+ EXPECT_EQ(up_osds[0], up_primary);
+ EXPECT_EQ(acting_osds[0], acting_primary);
+}
+
+TEST_F(OSDMapTest, PGTempRespected) {
+ set_up_map();
+
+ pg_t rawpg(0, my_rep_pool);
+ pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
+ vector<int> up_osds, acting_osds;
+ int up_primary, acting_primary;
+
+ osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
+ &acting_osds, &acting_primary);
+
+ // copy and swap first and last element in acting_osds
+ vector<int> new_acting_osds(acting_osds);
+ int first = new_acting_osds[0];
+ new_acting_osds[0] = *new_acting_osds.rbegin();
+ *new_acting_osds.rbegin() = first;
+
+ // apply pg_temp to osdmap
+ OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
+ pgtemp_map.new_pg_temp[pgid] = mempool::osdmap::vector<int>(
+ new_acting_osds.begin(), new_acting_osds.end());
+ osdmap.apply_incremental(pgtemp_map);
+
+ osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
+ &acting_osds, &acting_primary);
+ EXPECT_EQ(new_acting_osds, acting_osds);
+}
+
+TEST_F(OSDMapTest, PrimaryTempRespected) {
+ set_up_map();
+
+ pg_t rawpg(0, my_rep_pool);
+ pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
+ vector<int> up_osds;
+ vector<int> acting_osds;
+ int up_primary, acting_primary;
+
+ osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
+ &acting_osds, &acting_primary);
+
+ // make second OSD primary via incremental
+ OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
+ pgtemp_map.new_primary_temp[pgid] = acting_osds[1];
+ osdmap.apply_incremental(pgtemp_map);
+
+ osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
+ &acting_osds, &acting_primary);
+ EXPECT_EQ(acting_primary, acting_osds[1]);
+}
+
+TEST_F(OSDMapTest, CleanTemps) {
+ set_up_map();
+
+ OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 2);
+ pg_t pga = osdmap.raw_pg_to_pg(pg_t(0, my_rep_pool));
+ {
+ vector<int> up_osds, acting_osds;
+ int up_primary, acting_primary;
+ osdmap.pg_to_up_acting_osds(pga, &up_osds, &up_primary,
+ &acting_osds, &acting_primary);
+ pgtemp_map.new_pg_temp[pga] = mempool::osdmap::vector<int>(
+ up_osds.begin(), up_osds.end());
+ pgtemp_map.new_primary_temp[pga] = up_primary;
+ }
+ pg_t pgb = osdmap.raw_pg_to_pg(pg_t(1, my_rep_pool));
+ {
+ vector<int> up_osds, acting_osds;
+ int up_primary, acting_primary;
+ osdmap.pg_to_up_acting_osds(pgb, &up_osds, &up_primary,
+ &acting_osds, &acting_primary);
+ pending_inc.new_pg_temp[pgb] = mempool::osdmap::vector<int>(
+ up_osds.begin(), up_osds.end());
+ pending_inc.new_primary_temp[pgb] = up_primary;
+ }
+
+ osdmap.apply_incremental(pgtemp_map);
+
+ OSDMap tmpmap;
+ tmpmap.deepish_copy_from(osdmap);
+ tmpmap.apply_incremental(pending_inc);
+ OSDMap::clean_temps(g_ceph_context, osdmap, tmpmap, &pending_inc);
+
+ EXPECT_TRUE(pending_inc.new_pg_temp.count(pga) &&
+ pending_inc.new_pg_temp[pga].size() == 0);
+ EXPECT_EQ(-1, pending_inc.new_primary_temp[pga]);
+
+ EXPECT_TRUE(!pending_inc.new_pg_temp.count(pgb) &&
+ !pending_inc.new_primary_temp.count(pgb));
+}
+
+TEST_F(OSDMapTest, KeepsNecessaryTemps) {
+ set_up_map();
+
+ pg_t rawpg(0, my_rep_pool);
+ pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
+ vector<int> up_osds, acting_osds;
+ int up_primary, acting_primary;
+
+ osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary,
+ &acting_osds, &acting_primary);
+
+ // find unused OSD and stick it in there
+ OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1);
+ // find an unused osd and put it in place of the first one
+ int i = 0;
+ for(; i != (int)get_num_osds(); ++i) {
+ bool in_use = false;
+ for (vector<int>::iterator osd_it = up_osds.begin();
+ osd_it != up_osds.end();
+ ++osd_it) {
+ if (i == *osd_it) {
+ in_use = true;
+ break;
+ }
+ }
+ if (!in_use) {
+ up_osds[1] = i;
+ break;
+ }
+ }
+ if (i == (int)get_num_osds())
+ FAIL() << "did not find unused OSD for temp mapping";
+
+ pgtemp_map.new_pg_temp[pgid] = mempool::osdmap::vector<int>(
+ up_osds.begin(), up_osds.end());
+ pgtemp_map.new_primary_temp[pgid] = up_osds[1];
+ osdmap.apply_incremental(pgtemp_map);
+
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+
+ OSDMap tmpmap;
+ tmpmap.deepish_copy_from(osdmap);
+ tmpmap.apply_incremental(pending_inc);
+ OSDMap::clean_temps(g_ceph_context, osdmap, tmpmap, &pending_inc);
+ EXPECT_FALSE(pending_inc.new_pg_temp.count(pgid));
+ EXPECT_FALSE(pending_inc.new_primary_temp.count(pgid));
+}
+
+TEST_F(OSDMapTest, PrimaryAffinity) {
+ set_up_map();
+
+ int n = get_num_osds();
+ for (map<int64_t,pg_pool_t>::const_iterator p = osdmap.get_pools().begin();
+ p != osdmap.get_pools().end();
+ ++p) {
+ int pool = p->first;
+ int expect_primary = 10000 / n;
+ cout << "pool " << pool << " size " << (int)p->second.size
+ << " expect_primary " << expect_primary << std::endl;
+ {
+ vector<int> any(n, 0);
+ vector<int> first(n, 0);
+ vector<int> primary(n, 0);
+ test_mappings(pool, 10000, &any, &first, &primary);
+ for (int i=0; i<n; ++i) {
+ ASSERT_LT(0, any[i]);
+ ASSERT_LT(0, first[i]);
+ ASSERT_LT(0, primary[i]);
+ }
+ }
+
+ osdmap.set_primary_affinity(0, 0);
+ osdmap.set_primary_affinity(1, 0);
+ {
+ vector<int> any(n, 0);
+ vector<int> first(n, 0);
+ vector<int> primary(n, 0);
+ test_mappings(pool, 10000, &any, &first, &primary);
+ for (int i=0; i<n; ++i) {
+ ASSERT_LT(0, any[i]);
+ if (i >= 2) {
+ ASSERT_LT(0, first[i]);
+ ASSERT_LT(0, primary[i]);
+ } else {
+ if (p->second.is_replicated()) {
+ ASSERT_EQ(0, first[i]);
+ }
+ ASSERT_EQ(0, primary[i]);
+ }
+ }
+ }
+
+ osdmap.set_primary_affinity(0, 0x8000);
+ osdmap.set_primary_affinity(1, 0);
+ {
+ vector<int> any(n, 0);
+ vector<int> first(n, 0);
+ vector<int> primary(n, 0);
+ test_mappings(pool, 10000, &any, &first, &primary);
+ int expect = (10000 / (n-2)) / 2; // half weight
+ cout << "expect " << expect << std::endl;
+ for (int i=0; i<n; ++i) {
+ ASSERT_LT(0, any[i]);
+ if (i >= 2) {
+ ASSERT_LT(0, first[i]);
+ ASSERT_LT(0, primary[i]);
+ } else if (i == 1) {
+ if (p->second.is_replicated()) {
+ ASSERT_EQ(0, first[i]);
+ }
+ ASSERT_EQ(0, primary[i]);
+ } else {
+ ASSERT_LT(expect *2/3, primary[0]);
+ ASSERT_GT(expect *4/3, primary[0]);
+ }
+ }
+ }
+
+ osdmap.set_primary_affinity(0, 0x10000);
+ osdmap.set_primary_affinity(1, 0x10000);
+ }
+}
+
+TEST_F(OSDMapTest, get_osd_crush_node_flags) {
+ set_up_map();
+
+ for (unsigned i=0; i<get_num_osds(); ++i) {
+ ASSERT_EQ(0u, osdmap.get_osd_crush_node_flags(i));
+ }
+
+ OSDMap::Incremental inc(osdmap.get_epoch() + 1);
+ inc.new_crush_node_flags[-1] = 123u;
+ osdmap.apply_incremental(inc);
+ for (unsigned i=0; i<get_num_osds(); ++i) {
+ ASSERT_EQ(123u, osdmap.get_osd_crush_node_flags(i));
+ }
+ ASSERT_EQ(0u, osdmap.get_osd_crush_node_flags(1000));
+
+ OSDMap::Incremental inc3(osdmap.get_epoch() + 1);
+ inc3.new_crush_node_flags[-1] = 456u;
+ osdmap.apply_incremental(inc3);
+ for (unsigned i=0; i<get_num_osds(); ++i) {
+ ASSERT_EQ(456u, osdmap.get_osd_crush_node_flags(i));
+ }
+ ASSERT_EQ(0u, osdmap.get_osd_crush_node_flags(1000));
+
+ OSDMap::Incremental inc2(osdmap.get_epoch() + 1);
+ inc2.new_crush_node_flags[-1] = 0;
+ osdmap.apply_incremental(inc2);
+ for (unsigned i=0; i<get_num_osds(); ++i) {
+ ASSERT_EQ(0u, osdmap.get_crush_node_flags(i));
+ }
+}
+
+TEST_F(OSDMapTest, parse_osd_id_list) {
+ set_up_map();
+ set<int> out;
+ set<int> all;
+ osdmap.get_all_osds(all);
+
+ ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0"}, &out, &cout));
+ ASSERT_EQ(1u, out.size());
+ ASSERT_EQ(0, *out.begin());
+
+ ASSERT_EQ(0, osdmap.parse_osd_id_list({"1"}, &out, &cout));
+ ASSERT_EQ(1u, out.size());
+ ASSERT_EQ(1, *out.begin());
+
+ ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0","osd.1"}, &out, &cout));
+ ASSERT_EQ(2u, out.size());
+ ASSERT_EQ(0, *out.begin());
+ ASSERT_EQ(1, *out.rbegin());
+
+ ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0","1"}, &out, &cout));
+ ASSERT_EQ(2u, out.size());
+ ASSERT_EQ(0, *out.begin());
+ ASSERT_EQ(1, *out.rbegin());
+
+ ASSERT_EQ(0, osdmap.parse_osd_id_list({"*"}, &out, &cout));
+ ASSERT_EQ(all.size(), out.size());
+ ASSERT_EQ(all, out);
+
+ ASSERT_EQ(0, osdmap.parse_osd_id_list({"all"}, &out, &cout));
+ ASSERT_EQ(all, out);
+
+ ASSERT_EQ(0, osdmap.parse_osd_id_list({"any"}, &out, &cout));
+ ASSERT_EQ(all, out);
+
+ ASSERT_EQ(-EINVAL, osdmap.parse_osd_id_list({"foo"}, &out, &cout));
+ ASSERT_EQ(-EINVAL, osdmap.parse_osd_id_list({"-12"}, &out, &cout));
+}
+
+TEST_F(OSDMapTest, CleanPGUpmaps) {
+ set_up_map();
+
+ // build a crush rule of type host
+ const int expected_host_num = 3;
+ int osd_per_host = get_num_osds() / expected_host_num;
+ ASSERT_GE(2, osd_per_host);
+ int index = 0;
+ for (int i = 0; i < (int)get_num_osds(); i++) {
+ if (i && i % osd_per_host == 0) {
+ ++index;
+ }
+ stringstream osd_name;
+ stringstream host_name;
+ vector<string> move_to;
+ osd_name << "osd." << i;
+ host_name << "host-" << index;
+ move_to.push_back("root=default");
+ string host_loc = "host=" + host_name.str();
+ move_to.push_back(host_loc);
+ int r = crush_move(osdmap, osd_name.str(), move_to);
+ ASSERT_EQ(0, r);
+ }
+ const string upmap_rule = "upmap";
+ int upmap_rule_no = crush_rule_create_replicated(
+ upmap_rule, "default", "host");
+ ASSERT_LT(0, upmap_rule_no);
+
+ // create a replicated pool which references the above rule
+ OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1);
+ new_pool_inc.new_pool_max = osdmap.get_pool_max();
+ new_pool_inc.fsid = osdmap.get_fsid();
+ pg_pool_t empty;
+ uint64_t upmap_pool_id = ++new_pool_inc.new_pool_max;
+ pg_pool_t *p = new_pool_inc.get_new_pool(upmap_pool_id, &empty);
+ p->size = 2;
+ p->set_pg_num(64);
+ p->set_pgp_num(64);
+ p->type = pg_pool_t::TYPE_REPLICATED;
+ p->crush_rule = upmap_rule_no;
+ p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
+ new_pool_inc.new_pool_names[upmap_pool_id] = "upmap_pool";
+ osdmap.apply_incremental(new_pool_inc);
+
+ pg_t rawpg(0, upmap_pool_id);
+ pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
+ vector<int> up;
+ int up_primary;
+ osdmap.pg_to_raw_up(pgid, &up, &up_primary);
+ ASSERT_LT(1U, up.size());
+ {
+ // validate we won't have two OSDs from a same host
+ int parent_0 = osdmap.crush->get_parent_of_type(up[0],
+ osdmap.crush->get_type_id("host"));
+ int parent_1 = osdmap.crush->get_parent_of_type(up[1],
+ osdmap.crush->get_type_id("host"));
+ ASSERT_TRUE(parent_0 != parent_1);
+ }
+
+ {
+ // cancel stale upmaps
+ osdmap.pg_to_raw_up(pgid, &up, &up_primary);
+ int from = -1;
+ for (int i = 0; i < (int)get_num_osds(); i++) {
+ if (std::find(up.begin(), up.end(), i) == up.end()) {
+ from = i;
+ break;
+ }
+ }
+ ASSERT_TRUE(from >= 0);
+ int to = -1;
+ for (int i = 0; i < (int)get_num_osds(); i++) {
+ if (std::find(up.begin(), up.end(), i) == up.end() && i != from) {
+ to = i;
+ break;
+ }
+ }
+ ASSERT_TRUE(to >= 0);
+ vector<pair<int32_t,int32_t>> new_pg_upmap_items;
+ new_pg_upmap_items.push_back(make_pair(from, to));
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ pending_inc.new_pg_upmap_items[pgid] =
+ mempool::osdmap::vector<pair<int32_t,int32_t>>(
+ new_pg_upmap_items.begin(), new_pg_upmap_items.end());
+ OSDMap nextmap;
+ nextmap.deepish_copy_from(osdmap);
+ nextmap.apply_incremental(pending_inc);
+ ASSERT_TRUE(nextmap.have_pg_upmaps(pgid));
+ OSDMap::Incremental new_pending_inc(nextmap.get_epoch() + 1);
+ clean_pg_upmaps(g_ceph_context, nextmap, new_pending_inc);
+ nextmap.apply_incremental(new_pending_inc);
+ ASSERT_TRUE(!nextmap.have_pg_upmaps(pgid));
+ }
+
+ {
+ // https://tracker.ceph.com/issues/37493
+ pg_t ec_pg(0, my_ec_pool);
+ pg_t ec_pgid = osdmap.raw_pg_to_pg(ec_pg);
+ OSDMap tmpmap; // use a tmpmap here, so we do not dirty origin map..
+ int from = -1;
+ int to = -1;
+ {
+ // insert a valid pg_upmap_item
+ vector<int> ec_up;
+ int ec_up_primary;
+ osdmap.pg_to_raw_up(ec_pgid, &ec_up, &ec_up_primary);
+ ASSERT_TRUE(!ec_up.empty());
+ from = *(ec_up.begin());
+ ASSERT_TRUE(from >= 0);
+ for (int i = 0; i < (int)get_num_osds(); i++) {
+ if (std::find(ec_up.begin(), ec_up.end(), i) == ec_up.end()) {
+ to = i;
+ break;
+ }
+ }
+ ASSERT_TRUE(to >= 0);
+ ASSERT_TRUE(from != to);
+ vector<pair<int32_t,int32_t>> new_pg_upmap_items;
+ new_pg_upmap_items.push_back(make_pair(from, to));
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ pending_inc.new_pg_upmap_items[ec_pgid] =
+ mempool::osdmap::vector<pair<int32_t,int32_t>>(
+ new_pg_upmap_items.begin(), new_pg_upmap_items.end());
+ tmpmap.deepish_copy_from(osdmap);
+ tmpmap.apply_incremental(pending_inc);
+ ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
+ }
+ {
+ // mark one of the target OSDs of the above pg_upmap_item as down
+ OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1);
+ pending_inc.new_state[to] = CEPH_OSD_UP;
+ tmpmap.apply_incremental(pending_inc);
+ ASSERT_TRUE(!tmpmap.is_up(to));
+ ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
+ }
+ {
+ // confirm *clean_pg_upmaps* won't do anything bad
+ OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1);
+ clean_pg_upmaps(g_ceph_context, tmpmap, pending_inc);
+ tmpmap.apply_incremental(pending_inc);
+ ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
+ }
+ }
+
+ {
+ // http://tracker.ceph.com/issues/37501
+ pg_t ec_pg(0, my_ec_pool);
+ pg_t ec_pgid = osdmap.raw_pg_to_pg(ec_pg);
+ OSDMap tmpmap; // use a tmpmap here, so we do not dirty origin map..
+ int from = -1;
+ int to = -1;
+ {
+ // insert a valid pg_upmap_item
+ vector<int> ec_up;
+ int ec_up_primary;
+ osdmap.pg_to_raw_up(ec_pgid, &ec_up, &ec_up_primary);
+ ASSERT_TRUE(!ec_up.empty());
+ from = *(ec_up.begin());
+ ASSERT_TRUE(from >= 0);
+ for (int i = 0; i < (int)get_num_osds(); i++) {
+ if (std::find(ec_up.begin(), ec_up.end(), i) == ec_up.end()) {
+ to = i;
+ break;
+ }
+ }
+ ASSERT_TRUE(to >= 0);
+ ASSERT_TRUE(from != to);
+ vector<pair<int32_t,int32_t>> new_pg_upmap_items;
+ new_pg_upmap_items.push_back(make_pair(from, to));
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ pending_inc.new_pg_upmap_items[ec_pgid] =
+ mempool::osdmap::vector<pair<int32_t,int32_t>>(
+ new_pg_upmap_items.begin(), new_pg_upmap_items.end());
+ tmpmap.deepish_copy_from(osdmap);
+ tmpmap.apply_incremental(pending_inc);
+ ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
+ }
+ {
+ // mark one of the target OSDs of the above pg_upmap_item as out
+ OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1);
+ pending_inc.new_weight[to] = CEPH_OSD_OUT;
+ tmpmap.apply_incremental(pending_inc);
+ ASSERT_TRUE(tmpmap.is_out(to));
+ ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid));
+ }
+ {
+ // *clean_pg_upmaps* should be able to remove the above *bad* mapping
+ OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1);
+ clean_pg_upmaps(g_ceph_context, tmpmap, pending_inc);
+ tmpmap.apply_incremental(pending_inc);
+ ASSERT_TRUE(!tmpmap.have_pg_upmaps(ec_pgid));
+ }
+ }
+
+ {
+ // http://tracker.ceph.com/issues/37968
+
+ // build a temporary crush topology of 2 hosts, 3 osds per host
+ OSDMap tmp; // use a tmpmap here, so we do not dirty origin map..
+ tmp.deepish_copy_from(osdmap);
+ const int expected_host_num = 2;
+ int osd_per_host = get_num_osds() / expected_host_num;
+ ASSERT_GE(osd_per_host, 3);
+ int index = 0;
+ for (int i = 0; i < (int)get_num_osds(); i++) {
+ if (i && i % osd_per_host == 0) {
+ ++index;
+ }
+ stringstream osd_name;
+ stringstream host_name;
+ vector<string> move_to;
+ osd_name << "osd." << i;
+ host_name << "host-" << index;
+ move_to.push_back("root=default");
+ string host_loc = "host=" + host_name.str();
+ move_to.push_back(host_loc);
+ auto r = crush_move(tmp, osd_name.str(), move_to);
+ ASSERT_EQ(0, r);
+ }
+
+ // build crush rule
+ CrushWrapper crush;
+ get_crush(tmp, crush);
+ string rule_name = "rule_37968";
+ int rule_type = pg_pool_t::TYPE_ERASURE;
+ ASSERT_TRUE(!crush.rule_exists(rule_name));
+ int rno;
+ for (rno = 0; rno < crush.get_max_rules(); rno++) {
+ if (!crush.rule_exists(rno) && !crush.ruleset_exists(rno))
+ break;
+ }
+ string root_name = "default";
+ int root = crush.get_item_id(root_name);
+ int min_size = 3;
+ int max_size = 4;
+ int steps = 6;
+ crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_size, max_size);
+ int step = 0;
+ crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0);
+ crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0);
+ crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0);
+ crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSE_INDEP, 2, 1 /* host*/);
+ crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSE_INDEP, 2, 0 /* osd */);
+ crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
+ ASSERT_TRUE(step == steps);
+ auto r = crush_add_rule(crush.get_crush_map(), rule, rno);
+ ASSERT_TRUE(r >= 0);
+ crush.set_rule_name(rno, rule_name);
+ {
+ OSDMap::Incremental pending_inc(tmp.get_epoch() + 1);
+ pending_inc.crush.clear();
+ crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
+ tmp.apply_incremental(pending_inc);
+ }
+
+ // create a erasuce-coded pool referencing the above rule
+ int64_t pool_37968;
+ {
+ OSDMap::Incremental new_pool_inc(tmp.get_epoch() + 1);
+ new_pool_inc.new_pool_max = tmp.get_pool_max();
+ new_pool_inc.fsid = tmp.get_fsid();
+ pg_pool_t empty;
+ pool_37968 = ++new_pool_inc.new_pool_max;
+ pg_pool_t *p = new_pool_inc.get_new_pool(pool_37968, &empty);
+ p->size = 4;
+ p->set_pg_num(8);
+ p->set_pgp_num(8);
+ p->type = pg_pool_t::TYPE_ERASURE;
+ p->crush_rule = rno;
+ p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
+ new_pool_inc.new_pool_names[pool_37968] = "pool_37968";
+ tmp.apply_incremental(new_pool_inc);
+ }
+
+ pg_t ec_pg(0, pool_37968);
+ pg_t ec_pgid = tmp.raw_pg_to_pg(ec_pg);
+ int from = -1;
+ int to = -1;
+ {
+ // insert a valid pg_upmap_item
+ vector<int> ec_up;
+ int ec_up_primary;
+ tmp.pg_to_raw_up(ec_pgid, &ec_up, &ec_up_primary);
+ ASSERT_TRUE(ec_up.size() == 4);
+ from = *(ec_up.begin());
+ ASSERT_TRUE(from >= 0);
+ auto parent = tmp.crush->get_parent_of_type(from, 1 /* host */, rno);
+ ASSERT_TRUE(parent < 0);
+ // pick an osd of the same parent with *from*
+ for (int i = 0; i < (int)get_num_osds(); i++) {
+ if (std::find(ec_up.begin(), ec_up.end(), i) == ec_up.end()) {
+ auto p = tmp.crush->get_parent_of_type(i, 1 /* host */, rno);
+ if (p == parent) {
+ to = i;
+ break;
+ }
+ }
+ }
+ ASSERT_TRUE(to >= 0);
+ ASSERT_TRUE(from != to);
+ vector<pair<int32_t,int32_t>> new_pg_upmap_items;
+ new_pg_upmap_items.push_back(make_pair(from, to));
+ OSDMap::Incremental pending_inc(tmp.get_epoch() + 1);
+ pending_inc.new_pg_upmap_items[ec_pgid] =
+ mempool::osdmap::vector<pair<int32_t,int32_t>>(
+ new_pg_upmap_items.begin(), new_pg_upmap_items.end());
+ tmp.apply_incremental(pending_inc);
+ ASSERT_TRUE(tmp.have_pg_upmaps(ec_pgid));
+ }
+ {
+ // *clean_pg_upmaps* should not remove the above upmap_item
+ OSDMap::Incremental pending_inc(tmp.get_epoch() + 1);
+ clean_pg_upmaps(g_ceph_context, tmp, pending_inc);
+ tmp.apply_incremental(pending_inc);
+ ASSERT_TRUE(tmp.have_pg_upmaps(ec_pgid));
+ }
+ }
+
+ {
+ // TEST pg_upmap
+ {
+ // STEP-1: enumerate all children of up[0]'s parent,
+ // replace up[1] with one of them (other than up[0])
+ int parent = osdmap.crush->get_parent_of_type(up[0],
+ osdmap.crush->get_type_id("host"));
+ set<int> candidates;
+ osdmap.crush->get_leaves(osdmap.crush->get_item_name(parent), &candidates);
+ ASSERT_LT(1U, candidates.size());
+ int replaced_by = -1;
+ for (auto c: candidates) {
+ if (c != up[0]) {
+ replaced_by = c;
+ break;
+ }
+ }
+ {
+ // Check we can handle a negative pg_upmap value
+ vector<int32_t> new_pg_upmap;
+ new_pg_upmap.push_back(up[0]);
+ new_pg_upmap.push_back(-823648512);
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>(
+ new_pg_upmap.begin(), new_pg_upmap.end());
+ osdmap.apply_incremental(pending_inc);
+ vector<int> new_up;
+ int new_up_primary;
+ // crucial call - _apply_upmap should ignore the negative value
+ osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
+ }
+ ASSERT_NE(-1, replaced_by);
+ // generate a new pg_upmap item and apply
+ vector<int32_t> new_pg_upmap;
+ new_pg_upmap.push_back(up[0]);
+ new_pg_upmap.push_back(replaced_by); // up[1] -> replaced_by
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>(
+ new_pg_upmap.begin(), new_pg_upmap.end());
+ osdmap.apply_incremental(pending_inc);
+ {
+ // validate pg_upmap is there
+ vector<int> new_up;
+ int new_up_primary;
+ osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
+ ASSERT_TRUE(up.size() == new_up.size());
+ ASSERT_TRUE(new_up[0] == new_pg_upmap[0]);
+ ASSERT_TRUE(new_up[1] == new_pg_upmap[1]);
+ // and we shall have two OSDs from a same host now..
+ int parent_0 = osdmap.crush->get_parent_of_type(new_up[0],
+ osdmap.crush->get_type_id("host"));
+ int parent_1 = osdmap.crush->get_parent_of_type(new_up[1],
+ osdmap.crush->get_type_id("host"));
+ ASSERT_TRUE(parent_0 == parent_1);
+ }
+ }
+ {
+ // STEP-2: apply cure
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ clean_pg_upmaps(g_ceph_context, osdmap, pending_inc);
+ osdmap.apply_incremental(pending_inc);
+ {
+ // validate pg_upmap is gone (reverted)
+ vector<int> new_up;
+ int new_up_primary;
+ osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
+ ASSERT_TRUE(new_up == up);
+ ASSERT_TRUE(new_up_primary = up_primary);
+ }
+ }
+ }
+
+ {
+ // TEST pg_upmap_items
+ // enumerate all used hosts first
+ set<int> parents;
+ for (auto u: up) {
+ int parent = osdmap.crush->get_parent_of_type(u,
+ osdmap.crush->get_type_id("host"));
+ ASSERT_GT(0, parent);
+ parents.insert(parent);
+ }
+ int candidate_parent = 0;
+ set<int> candidate_children;
+ vector<int> up_after_out;
+ {
+ // STEP-1: try mark out up[1] and all other OSDs from the same host
+ int parent = osdmap.crush->get_parent_of_type(up[1],
+ osdmap.crush->get_type_id("host"));
+ set<int> children;
+ osdmap.crush->get_leaves(osdmap.crush->get_item_name(parent),
+ &children);
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ for (auto c: children) {
+ pending_inc.new_weight[c] = CEPH_OSD_OUT;
+ }
+ OSDMap tmpmap;
+ tmpmap.deepish_copy_from(osdmap);
+ tmpmap.apply_incremental(pending_inc);
+ vector<int> new_up;
+ int new_up_primary;
+ tmpmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
+ // verify that we'll have OSDs from a different host..
+ int will_choose = -1;
+ for (auto o: new_up) {
+ int parent = tmpmap.crush->get_parent_of_type(o,
+ osdmap.crush->get_type_id("host"));
+ if (!parents.count(parent)) {
+ will_choose = o;
+ candidate_parent = parent; // record
+ break;
+ }
+ }
+ ASSERT_LT(-1, will_choose); // it is an OSD!
+ ASSERT_TRUE(candidate_parent != 0);
+ osdmap.crush->get_leaves(osdmap.crush->get_item_name(candidate_parent),
+ &candidate_children);
+ ASSERT_TRUE(candidate_children.count(will_choose));
+ candidate_children.erase(will_choose);
+ ASSERT_TRUE(!candidate_children.empty());
+ up_after_out = new_up; // needed for verification..
+ }
+ {
+ // Make sure we can handle a negative pg_upmap_item
+ int victim = up[0];
+ int replaced_by = -823648512;
+ vector<pair<int32_t,int32_t>> new_pg_upmap_items;
+ new_pg_upmap_items.push_back(make_pair(victim, replaced_by));
+ // apply
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ pending_inc.new_pg_upmap_items[pgid] =
+ mempool::osdmap::vector<pair<int32_t,int32_t>>(
+ new_pg_upmap_items.begin(), new_pg_upmap_items.end());
+ osdmap.apply_incremental(pending_inc);
+ vector<int> new_up;
+ int new_up_primary;
+ // crucial call - _apply_upmap should ignore the negative value
+ osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
+ }
+ {
+ // STEP-2: generating a new pg_upmap_items entry by
+ // replacing up[0] with one coming from candidate_children
+ int victim = up[0];
+ int replaced_by = *candidate_children.begin();
+ vector<pair<int32_t,int32_t>> new_pg_upmap_items;
+ new_pg_upmap_items.push_back(make_pair(victim, replaced_by));
+ // apply
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ pending_inc.new_pg_upmap_items[pgid] =
+ mempool::osdmap::vector<pair<int32_t,int32_t>>(
+ new_pg_upmap_items.begin(), new_pg_upmap_items.end());
+ osdmap.apply_incremental(pending_inc);
+ {
+ // validate pg_upmap_items is there
+ vector<int> new_up;
+ int new_up_primary;
+ osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
+ ASSERT_TRUE(up.size() == new_up.size());
+ ASSERT_TRUE(std::find(new_up.begin(), new_up.end(), replaced_by) !=
+ new_up.end());
+ // and up[1] too
+ ASSERT_TRUE(std::find(new_up.begin(), new_up.end(), up[1]) !=
+ new_up.end());
+ }
+ }
+ {
+ // STEP-3: mark out up[1] and all other OSDs from the same host
+ int parent = osdmap.crush->get_parent_of_type(up[1],
+ osdmap.crush->get_type_id("host"));
+ set<int> children;
+ osdmap.crush->get_leaves(osdmap.crush->get_item_name(parent),
+ &children);
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ for (auto c: children) {
+ pending_inc.new_weight[c] = CEPH_OSD_OUT;
+ }
+ osdmap.apply_incremental(pending_inc);
+ {
+ // validate we have two OSDs from the same host now..
+ vector<int> new_up;
+ int new_up_primary;
+ osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
+ ASSERT_TRUE(up.size() == new_up.size());
+ int parent_0 = osdmap.crush->get_parent_of_type(new_up[0],
+ osdmap.crush->get_type_id("host"));
+ int parent_1 = osdmap.crush->get_parent_of_type(new_up[1],
+ osdmap.crush->get_type_id("host"));
+ ASSERT_TRUE(parent_0 == parent_1);
+ }
+ }
+ {
+ // STEP-4: apply cure
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ clean_pg_upmaps(g_ceph_context, osdmap, pending_inc);
+ osdmap.apply_incremental(pending_inc);
+ {
+ // validate pg_upmap_items is gone (reverted)
+ vector<int> new_up;
+ int new_up_primary;
+ osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary);
+ ASSERT_TRUE(new_up == up_after_out);
+ }
+ }
+ }
+}
+
+TEST_F(OSDMapTest, BUG_38897) {
+ // http://tracker.ceph.com/issues/38897
+ // build a fresh map with 12 OSDs, without any default pools
+ set_up_map(12, true);
+ const string pool_1("pool1");
+ const string pool_2("pool2");
+ int64_t pool_1_id = -1;
+
+ {
+ // build customized crush rule for "pool1"
+ string host_name = "host_for_pool_1";
+ // build a customized host to capture osd.1~5
+ for (int i = 1; i < 5; i++) {
+ stringstream osd_name;
+ vector<string> move_to;
+ osd_name << "osd." << i;
+ move_to.push_back("root=default");
+ string host_loc = "host=" + host_name;
+ move_to.push_back(host_loc);
+ auto r = crush_move(osdmap, osd_name.str(), move_to);
+ ASSERT_EQ(0, r);
+ }
+ CrushWrapper crush;
+ get_crush(osdmap, crush);
+ auto host_id = crush.get_item_id(host_name);
+ ASSERT_TRUE(host_id < 0);
+ string rule_name = "rule_for_pool1";
+ int rule_type = pg_pool_t::TYPE_REPLICATED;
+ ASSERT_TRUE(!crush.rule_exists(rule_name));
+ int rno;
+ for (rno = 0; rno < crush.get_max_rules(); rno++) {
+ if (!crush.rule_exists(rno) && !crush.ruleset_exists(rno))
+ break;
+ }
+ int min_size = 3;
+ int max_size = 3;
+ int steps = 7;
+ crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_size, max_size);
+ int step = 0;
+ crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0);
+ crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0);
+ // always choose osd.0
+ crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 0);
+ crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
+ // then pick any other random osds
+ crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, host_id, 0);
+ crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, 2, 0);
+ crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
+ ASSERT_TRUE(step == steps);
+ auto r = crush_add_rule(crush.get_crush_map(), rule, rno);
+ ASSERT_TRUE(r >= 0);
+ crush.set_rule_name(rno, rule_name);
+ {
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ pending_inc.crush.clear();
+ crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
+ osdmap.apply_incremental(pending_inc);
+ }
+
+ // create "pool1"
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ pending_inc.new_pool_max = osdmap.get_pool_max();
+ auto pool_id = ++pending_inc.new_pool_max;
+ pool_1_id = pool_id;
+ pg_pool_t empty;
+ auto p = pending_inc.get_new_pool(pool_id, &empty);
+ p->size = 3;
+ p->min_size = 1;
+ p->set_pg_num(3);
+ p->set_pgp_num(3);
+ p->type = pg_pool_t::TYPE_REPLICATED;
+ p->crush_rule = rno;
+ p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
+ pending_inc.new_pool_names[pool_id] = pool_1;
+ osdmap.apply_incremental(pending_inc);
+ ASSERT_TRUE(osdmap.have_pg_pool(pool_id));
+ ASSERT_TRUE(osdmap.get_pool_name(pool_id) == pool_1);
+ {
+ for (unsigned i = 0; i < 3; i++) {
+ // 1.x -> [1]
+ pg_t rawpg(i, pool_id);
+ pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
+ vector<int> up;
+ int up_primary;
+ osdmap.pg_to_raw_up(pgid, &up, &up_primary);
+ ASSERT_TRUE(up.size() == 3);
+ ASSERT_TRUE(up[0] == 0);
+
+ // insert a new pg_upmap
+ vector<int32_t> new_up;
+ // and remap 1.x to osd.1 only
+ // this way osd.0 is deemed to be *underfull*
+ // and osd.1 is deemed to be *overfull*
+ new_up.push_back(1);
+ {
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>(
+ new_up.begin(), new_up.end());
+ osdmap.apply_incremental(pending_inc);
+ }
+ osdmap.pg_to_raw_up(pgid, &up, &up_primary);
+ ASSERT_TRUE(up.size() == 1);
+ ASSERT_TRUE(up[0] == 1);
+ }
+ }
+ }
+
+ {
+ // build customized crush rule for "pool2"
+ string host_name = "host_for_pool_2";
+ // build a customized host to capture osd.6~11
+ for (int i = 6; i < (int)get_num_osds(); i++) {
+ stringstream osd_name;
+ vector<string> move_to;
+ osd_name << "osd." << i;
+ move_to.push_back("root=default");
+ string host_loc = "host=" + host_name;
+ move_to.push_back(host_loc);
+ auto r = crush_move(osdmap, osd_name.str(), move_to);
+ ASSERT_EQ(0, r);
+ }
+ CrushWrapper crush;
+ get_crush(osdmap, crush);
+ auto host_id = crush.get_item_id(host_name);
+ ASSERT_TRUE(host_id < 0);
+ string rule_name = "rule_for_pool2";
+ int rule_type = pg_pool_t::TYPE_REPLICATED;
+ ASSERT_TRUE(!crush.rule_exists(rule_name));
+ int rno;
+ for (rno = 0; rno < crush.get_max_rules(); rno++) {
+ if (!crush.rule_exists(rno) && !crush.ruleset_exists(rno))
+ break;
+ }
+ int min_size = 3;
+ int max_size = 3;
+ int steps = 7;
+ crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_size, max_size);
+ int step = 0;
+ crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0);
+ crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0);
+ // always choose osd.0
+ crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 0);
+ crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
+ // then pick any other random osds
+ crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, host_id, 0);
+ crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, 2, 0);
+ crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
+ ASSERT_TRUE(step == steps);
+ auto r = crush_add_rule(crush.get_crush_map(), rule, rno);
+ ASSERT_TRUE(r >= 0);
+ crush.set_rule_name(rno, rule_name);
+ {
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ pending_inc.crush.clear();
+ crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
+ osdmap.apply_incremental(pending_inc);
+ }
+
+ // create "pool2"
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ pending_inc.new_pool_max = osdmap.get_pool_max();
+ auto pool_id = ++pending_inc.new_pool_max;
+ pg_pool_t empty;
+ auto p = pending_inc.get_new_pool(pool_id, &empty);
+ p->size = 3;
+ // include a single PG
+ p->set_pg_num(1);
+ p->set_pgp_num(1);
+ p->type = pg_pool_t::TYPE_REPLICATED;
+ p->crush_rule = rno;
+ p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
+ pending_inc.new_pool_names[pool_id] = pool_2;
+ osdmap.apply_incremental(pending_inc);
+ ASSERT_TRUE(osdmap.have_pg_pool(pool_id));
+ ASSERT_TRUE(osdmap.get_pool_name(pool_id) == pool_2);
+ pg_t rawpg(0, pool_id);
+ pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
+ EXPECT_TRUE(!osdmap.have_pg_upmaps(pgid));
+ vector<int> up;
+ int up_primary;
+ osdmap.pg_to_raw_up(pgid, &up, &up_primary);
+ ASSERT_TRUE(up.size() == 3);
+ ASSERT_TRUE(up[0] == 0);
+
+ {
+ // build a pg_upmap_item that will
+ // remap pg out from *underfull* osd.0
+ vector<pair<int32_t,int32_t>> new_pg_upmap_items;
+ new_pg_upmap_items.push_back(make_pair(0, 10)); // osd.0 -> osd.10
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ pending_inc.new_pg_upmap_items[pgid] =
+ mempool::osdmap::vector<pair<int32_t,int32_t>>(
+ new_pg_upmap_items.begin(), new_pg_upmap_items.end());
+ osdmap.apply_incremental(pending_inc);
+ ASSERT_TRUE(osdmap.have_pg_upmaps(pgid));
+ vector<int> up;
+ int up_primary;
+ osdmap.pg_to_raw_up(pgid, &up, &up_primary);
+ ASSERT_TRUE(up.size() == 3);
+ ASSERT_TRUE(up[0] == 10);
+ }
+ }
+
+ // ready to go
+ {
+ set<int64_t> only_pools;
+ ASSERT_TRUE(pool_1_id >= 0);
+ only_pools.insert(pool_1_id);
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ // require perfect distribution! (max deviation 0)
+ osdmap.calc_pg_upmaps(g_ceph_context,
+ 0, // so we can force optimizing
+ 100,
+ only_pools,
+ &pending_inc);
+ osdmap.apply_incremental(pending_inc);
+ }
+}
+
+TEST_F(OSDMapTest, BUG_40104) {
+ // http://tracker.ceph.com/issues/40104
+ int big_osd_num = 5000;
+ int big_pg_num = 10000;
+ set_up_map(big_osd_num, true);
+ int pool_id;
+ {
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ pending_inc.new_pool_max = osdmap.get_pool_max();
+ pool_id = ++pending_inc.new_pool_max;
+ pg_pool_t empty;
+ auto p = pending_inc.get_new_pool(pool_id, &empty);
+ p->size = 3;
+ p->min_size = 1;
+ p->set_pg_num(big_pg_num);
+ p->set_pgp_num(big_pg_num);
+ p->type = pg_pool_t::TYPE_REPLICATED;
+ p->crush_rule = 0;
+ p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
+ pending_inc.new_pool_names[pool_id] = "big_pool";
+ osdmap.apply_incremental(pending_inc);
+ ASSERT_TRUE(osdmap.have_pg_pool(pool_id));
+ ASSERT_TRUE(osdmap.get_pool_name(pool_id) == "big_pool");
+ }
+ {
+ // generate pg_upmap_items for each pg
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ for (int i = 0; i < big_pg_num; i++) {
+ pg_t rawpg(i, pool_id);
+ pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
+ vector<int> up;
+ int up_primary;
+ osdmap.pg_to_raw_up(pgid, &up, &up_primary);
+ ASSERT_TRUE(up.size() == 3);
+ int victim = up[0];
+ int replaced_by = random() % big_osd_num;
+ vector<pair<int32_t,int32_t>> new_pg_upmap_items;
+ // note that it might or might not be valid, we don't care
+ new_pg_upmap_items.push_back(make_pair(victim, replaced_by));
+ pending_inc.new_pg_upmap_items[pgid] =
+ mempool::osdmap::vector<pair<int32_t,int32_t>>(
+ new_pg_upmap_items.begin(), new_pg_upmap_items.end());
+ }
+ osdmap.apply_incremental(pending_inc);
+ }
+ {
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ auto start = mono_clock::now();
+ clean_pg_upmaps(g_ceph_context, osdmap, pending_inc);
+ auto latency = mono_clock::now() - start;
+ std::cout << "clean_pg_upmaps (~" << big_pg_num
+ << " pg_upmap_items) latency:" << timespan_str(latency)
+ << std::endl;
+ }
+}
+
+TEST_F(OSDMapTest, BUG_42052) {
+ // https://tracker.ceph.com/issues/42052
+ set_up_map(6, true);
+ const string pool_name("pool");
+ // build customized crush rule for "pool"
+ CrushWrapper crush;
+ get_crush(osdmap, crush);
+ string rule_name = "rule";
+ int rule_type = pg_pool_t::TYPE_REPLICATED;
+ ASSERT_TRUE(!crush.rule_exists(rule_name));
+ int rno;
+ for (rno = 0; rno < crush.get_max_rules(); rno++) {
+ if (!crush.rule_exists(rno) && !crush.ruleset_exists(rno))
+ break;
+ }
+ int min_size = 3;
+ int max_size = 3;
+ int steps = 8;
+ crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_size, max_size);
+ int step = 0;
+ crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0);
+ crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0);
+ // always choose osd.0, osd.1, osd.2
+ crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 0);
+ crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
+ crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 1);
+ crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
+ crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 2);
+ crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0);
+ ASSERT_TRUE(step == steps);
+ auto r = crush_add_rule(crush.get_crush_map(), rule, rno);
+ ASSERT_TRUE(r >= 0);
+ crush.set_rule_name(rno, rule_name);
+ {
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ pending_inc.crush.clear();
+ crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
+ osdmap.apply_incremental(pending_inc);
+ }
+
+ // create "pool"
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ pending_inc.new_pool_max = osdmap.get_pool_max();
+ auto pool_id = ++pending_inc.new_pool_max;
+ pg_pool_t empty;
+ auto p = pending_inc.get_new_pool(pool_id, &empty);
+ p->size = 3;
+ p->min_size = 1;
+ p->set_pg_num(1);
+ p->set_pgp_num(1);
+ p->type = pg_pool_t::TYPE_REPLICATED;
+ p->crush_rule = rno;
+ p->set_flag(pg_pool_t::FLAG_HASHPSPOOL);
+ pending_inc.new_pool_names[pool_id] = pool_name;
+ osdmap.apply_incremental(pending_inc);
+ ASSERT_TRUE(osdmap.have_pg_pool(pool_id));
+ ASSERT_TRUE(osdmap.get_pool_name(pool_id) == pool_name);
+ pg_t rawpg(0, pool_id);
+ pg_t pgid = osdmap.raw_pg_to_pg(rawpg);
+ {
+ // pg_upmap 1.0 [2,3,5]
+ vector<int32_t> new_up{2,3,5};
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>(
+ new_up.begin(), new_up.end());
+ osdmap.apply_incremental(pending_inc);
+ }
+ {
+ // pg_upmap_items 1.0 [0,3,4,5]
+ vector<pair<int32_t,int32_t>> new_pg_upmap_items;
+ new_pg_upmap_items.push_back(make_pair(0, 3));
+ new_pg_upmap_items.push_back(make_pair(4, 5));
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ pending_inc.new_pg_upmap_items[pgid] =
+ mempool::osdmap::vector<pair<int32_t,int32_t>>(
+ new_pg_upmap_items.begin(), new_pg_upmap_items.end());
+ osdmap.apply_incremental(pending_inc);
+ }
+ {
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ clean_pg_upmaps(g_ceph_context, osdmap, pending_inc);
+ osdmap.apply_incremental(pending_inc);
+ ASSERT_FALSE(osdmap.have_pg_upmaps(pgid));
+ }
+}
+
+TEST(PGTempMap, basic)
+{
+ PGTempMap m;
+ pg_t a(1,1);
+ for (auto i=3; i<1000; ++i) {
+ pg_t x(i, 1);
+ m.set(x, {static_cast<int>(i)});
+ }
+ pg_t b(2,1);
+ m.set(a, {1, 2});
+ ASSERT_NE(m.find(a), m.end());
+ ASSERT_EQ(m.find(a), m.begin());
+ ASSERT_EQ(m.find(b), m.end());
+ ASSERT_EQ(998u, m.size());
+}
+
+TEST_F(OSDMapTest, BUG_48884)
+{
+
+ set_up_map(12);
+
+ unsigned int host_index = 1;
+ for (unsigned int x=0; x < get_num_osds();) {
+ // Create three hosts with four osds each
+ for (unsigned int y=0; y < 4; y++) {
+ stringstream osd_name;
+ stringstream host_name;
+ vector<string> move_to;
+ osd_name << "osd." << x;
+ host_name << "host-" << host_index;
+ move_to.push_back("root=default");
+ move_to.push_back("rack=localrack");
+ string host_loc = "host=" + host_name.str();
+ move_to.push_back(host_loc);
+ int r = crush_move(osdmap, osd_name.str(), move_to);
+ ASSERT_EQ(0, r);
+ x++;
+ }
+ host_index++;
+ }
+
+ CrushWrapper crush;
+ get_crush(osdmap, crush);
+ auto host_id = crush.get_item_id("localhost");
+ crush.remove_item(g_ceph_context, host_id, false);
+ OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1);
+ pending_inc.crush.clear();
+ crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT);
+ osdmap.apply_incremental(pending_inc);
+
+ PGMap pgmap;
+ osd_stat_t stats, stats_null;
+ stats.statfs.total = 500000;
+ stats.statfs.available = 50000;
+ stats.statfs.omap_allocated = 50000;
+ stats.statfs.internal_metadata = 50000;
+ stats_null.statfs.total = 0;
+ stats_null.statfs.available = 0;
+ stats_null.statfs.omap_allocated = 0;
+ stats_null.statfs.internal_metadata = 0;
+ for (unsigned int x=0; x < get_num_osds(); x++) {
+ if (x > 3 && x < 8) {
+ pgmap.osd_stat.insert({x,stats_null});
+ } else {
+ pgmap.osd_stat.insert({x,stats});
+ }
+ }
+
+ stringstream ss;
+ boost::scoped_ptr<Formatter> f(Formatter::create("json-pretty"));
+ print_osd_utilization(osdmap, pgmap, ss, f.get(), true, "", "root");
+ JSONParser parser;
+ parser.parse(ss.str().c_str(), static_cast<int>(ss.str().size()));
+ auto iter = parser.find_first();
+ for (const auto bucket : (*iter)->get_array_elements()) {
+ JSONParser parser2;
+ parser2.parse(bucket.c_str(), static_cast<int>(bucket.size()));
+ auto* obj = parser2.find_obj("name");
+ if (obj->get_data_val().str.compare("localrack") == 0) {
+ obj = parser2.find_obj("kb");
+ ASSERT_EQ(obj->get_data_val().str, "3904");
+ obj = parser2.find_obj("kb_used");
+ ASSERT_EQ(obj->get_data_val().str, "3512");
+ obj = parser2.find_obj("kb_used_omap");
+ ASSERT_EQ(obj->get_data_val().str, "384");
+ obj = parser2.find_obj("kb_used_meta");
+ ASSERT_EQ(obj->get_data_val().str, "384");
+ obj = parser2.find_obj("kb_avail");
+ ASSERT_EQ(obj->get_data_val().str, "384");
+ }
+ }
+}
diff --git a/src/test/osd/TestOSDScrub.cc b/src/test/osd/TestOSDScrub.cc
new file mode 100644
index 00000000..34a3b8d8
--- /dev/null
+++ b/src/test/osd/TestOSDScrub.cc
@@ -0,0 +1,147 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com>
+ *
+ * Author: Loic Dachary <loic@dachary.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library Public License for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <signal.h>
+#include <gtest/gtest.h>
+#include "osd/OSD.h"
+#include "os/ObjectStore.h"
+#include "mon/MonClient.h"
+#include "common/ceph_argparse.h"
+#include "msg/Messenger.h"
+
+class TestOSDScrub: public OSD {
+
+public:
+ TestOSDScrub(CephContext *cct_,
+ ObjectStore *store_,
+ int id,
+ Messenger *internal,
+ Messenger *external,
+ Messenger *hb_front_client,
+ Messenger *hb_back_client,
+ Messenger *hb_front_server,
+ Messenger *hb_back_server,
+ Messenger *osdc_messenger,
+ MonClient *mc, const std::string &dev, const std::string &jdev) :
+ OSD(cct_, store_, id, internal, external, hb_front_client, hb_back_client, hb_front_server, hb_back_server, osdc_messenger, mc, dev, jdev)
+ {
+ }
+
+ bool scrub_time_permit(utime_t now) {
+ return OSD::scrub_time_permit(now);
+ }
+};
+
+TEST(TestOSDScrub, scrub_time_permit) {
+ ObjectStore *store = ObjectStore::create(g_ceph_context,
+ g_conf()->osd_objectstore,
+ g_conf()->osd_data,
+ g_conf()->osd_journal);
+ std::string cluster_msgr_type = g_conf()->ms_cluster_type.empty() ? g_conf().get_val<std::string>("ms_type") : g_conf()->ms_cluster_type;
+ Messenger *ms = Messenger::create(g_ceph_context, cluster_msgr_type,
+ entity_name_t::OSD(0), "make_checker",
+ getpid(), 0);
+ ms->set_cluster_protocol(CEPH_OSD_PROTOCOL);
+ ms->set_default_policy(Messenger::Policy::stateless_server(0));
+ ms->bind(g_conf()->public_addr);
+ MonClient mc(g_ceph_context);
+ mc.build_initial_monmap();
+ TestOSDScrub* osd = new TestOSDScrub(g_ceph_context, store, 0, ms, ms, ms, ms, ms, ms, ms, &mc, "", "");
+
+ g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "0");
+ g_ceph_context->_conf.set_val("osd_scrub_end_hour", "24");
+ g_ceph_context->_conf.apply_changes(nullptr);
+ tm tm;
+ strptime("2015-01-16 12:05:13", "%Y-%m-%d %H:%M:%S", &tm);
+ utime_t now = utime_t(mktime(&tm), 0);
+ bool ret = osd->scrub_time_permit(now);
+ ASSERT_TRUE(ret);
+
+ g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "24");
+ g_ceph_context->_conf.set_val("osd_scrub_end_hour", "0");
+ g_ceph_context->_conf.apply_changes(nullptr);
+ strptime("2015-01-16 12:05:13", "%Y-%m-%d %H:%M:%S", &tm);
+ now = utime_t(mktime(&tm), 0);
+ ret = osd->scrub_time_permit(now);
+ ASSERT_FALSE(ret);
+
+ g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "0");
+ g_ceph_context->_conf.set_val("osd_scrub_end_hour", "0");
+ g_ceph_context->_conf.apply_changes(nullptr);
+ strptime("2015-01-16 12:05:13", "%Y-%m-%d %H:%M:%S", &tm);
+ now = utime_t(mktime(&tm), 0);
+ ret = osd->scrub_time_permit(now);
+ ASSERT_TRUE(ret);
+
+ g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "20");
+ g_ceph_context->_conf.set_val("osd_scrub_end_hour", "07");
+ g_ceph_context->_conf.apply_changes(nullptr);
+ strptime("2015-01-16 01:05:13", "%Y-%m-%d %H:%M:%S", &tm);
+ now = utime_t(mktime(&tm), 0);
+ ret = osd->scrub_time_permit(now);
+ ASSERT_TRUE(ret);
+
+ g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "20");
+ g_ceph_context->_conf.set_val("osd_scrub_end_hour", "07");
+ g_ceph_context->_conf.apply_changes(nullptr);
+ strptime("2015-01-16 20:05:13", "%Y-%m-%d %H:%M:%S", &tm);
+ now = utime_t(mktime(&tm), 0);
+ ret = osd->scrub_time_permit(now);
+ ASSERT_TRUE(ret);
+
+ g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "20");
+ g_ceph_context->_conf.set_val("osd_scrub_end_hour", "07");
+ g_ceph_context->_conf.apply_changes(nullptr);
+ strptime("2015-01-16 08:05:13", "%Y-%m-%d %H:%M:%S", &tm);
+ now = utime_t(mktime(&tm), 0);
+ ret = osd->scrub_time_permit(now);
+ ASSERT_FALSE(ret);
+
+ g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "01");
+ g_ceph_context->_conf.set_val("osd_scrub_end_hour", "07");
+ g_ceph_context->_conf.apply_changes(nullptr);
+ strptime("2015-01-16 20:05:13", "%Y-%m-%d %H:%M:%S", &tm);
+ now = utime_t(mktime(&tm), 0);
+ ret = osd->scrub_time_permit(now);
+ ASSERT_FALSE(ret);
+
+ g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "01");
+ g_ceph_context->_conf.set_val("osd_scrub_end_hour", "07");
+ g_ceph_context->_conf.apply_changes(nullptr);
+ strptime("2015-01-16 00:05:13", "%Y-%m-%d %H:%M:%S", &tm);
+ now = utime_t(mktime(&tm), 0);
+ ret = osd->scrub_time_permit(now);
+ ASSERT_FALSE(ret);
+
+ g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "01");
+ g_ceph_context->_conf.set_val("osd_scrub_end_hour", "07");
+ g_ceph_context->_conf.apply_changes(nullptr);
+ strptime("2015-01-16 04:05:13", "%Y-%m-%d %H:%M:%S", &tm);
+ now = utime_t(mktime(&tm), 0);
+ ret = osd->scrub_time_permit(now);
+ ASSERT_TRUE(ret);
+
+}
+
+// Local Variables:
+// compile-command: "cd ../.. ; make unittest_osdscrub ; ./unittest_osdscrub --log-to-stderr=true --debug-osd=20 # --gtest_filter=*.* "
+// End:
diff --git a/src/test/osd/TestOpStat.cc b/src/test/osd/TestOpStat.cc
new file mode 100644
index 00000000..e8045fa4
--- /dev/null
+++ b/src/test/osd/TestOpStat.cc
@@ -0,0 +1,61 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+#include "include/interval_set.h"
+#include "include/buffer.h"
+#include <list>
+#include <map>
+#include <set>
+#include "RadosModel.h"
+#include "TestOpStat.h"
+
+void TestOpStat::begin(TestOp *in) {
+ stat_lock.Lock();
+ stats[in->getType()].begin(in);
+ stat_lock.Unlock();
+}
+
+void TestOpStat::end(TestOp *in) {
+ stat_lock.Lock();
+ stats[in->getType()].end(in);
+ stat_lock.Unlock();
+}
+
+void TestOpStat::TypeStatus::export_latencies(map<double,uint64_t> &in) const
+{
+ map<double,uint64_t>::iterator i = in.begin();
+ multiset<uint64_t>::iterator j = latencies.begin();
+ int count = 0;
+ while (j != latencies.end() && i != in.end()) {
+ count++;
+ if ((((double)count)/((double)latencies.size())) * 100 >= i->first) {
+ i->second = *j;
+ ++i;
+ }
+ ++j;
+ }
+}
+
+std::ostream & operator<<(std::ostream &out, const TestOpStat &rhs)
+{
+ rhs.stat_lock.Lock();
+ for (auto i = rhs.stats.begin();
+ i != rhs.stats.end();
+ ++i) {
+ map<double,uint64_t> latency;
+ latency[10] = 0;
+ latency[50] = 0;
+ latency[90] = 0;
+ latency[99] = 0;
+ i->second.export_latencies(latency);
+
+ out << i->first << " latency: " << std::endl;
+ for (map<double,uint64_t>::iterator j = latency.begin();
+ j != latency.end();
+ ++j) {
+ if (j->second == 0) break;
+ out << "\t" << j->first << "th percentile: "
+ << j->second / 1000 << "ms" << std::endl;
+ }
+ }
+ rhs.stat_lock.Unlock();
+ return out;
+}
diff --git a/src/test/osd/TestOpStat.h b/src/test/osd/TestOpStat.h
new file mode 100644
index 00000000..314bb66c
--- /dev/null
+++ b/src/test/osd/TestOpStat.h
@@ -0,0 +1,53 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+#include "common/Mutex.h"
+#include "common/Cond.h"
+#include "include/rados/librados.hpp"
+
+#ifndef TESTOPSTAT_H
+#define TESTOPSTAT_H
+
+class TestOp;
+
+class TestOpStat {
+public:
+ mutable Mutex stat_lock;
+
+ TestOpStat() : stat_lock("TestOpStat lock") {}
+
+ static uint64_t gettime()
+ {
+ timeval t;
+ gettimeofday(&t,0);
+ return (1000000*t.tv_sec) + t.tv_usec;
+ }
+
+ class TypeStatus {
+ public:
+ map<TestOp*,uint64_t> inflight;
+ multiset<uint64_t> latencies;
+ void begin(TestOp *in)
+ {
+ ceph_assert(!inflight.count(in));
+ inflight[in] = gettime();
+ }
+
+ void end(TestOp *in)
+ {
+ ceph_assert(inflight.count(in));
+ uint64_t curtime = gettime();
+ latencies.insert(curtime - inflight[in]);
+ inflight.erase(in);
+ }
+
+ void export_latencies(map<double,uint64_t> &in) const;
+ };
+ map<string,TypeStatus> stats;
+
+ void begin(TestOp *in);
+ void end(TestOp *in);
+ friend std::ostream & operator<<(std::ostream &, const TestOpStat &);
+};
+
+std::ostream & operator<<(std::ostream &out, const TestOpStat &rhs);
+
+#endif
diff --git a/src/test/osd/TestPGLog.cc b/src/test/osd/TestPGLog.cc
new file mode 100644
index 00000000..bdf62b9b
--- /dev/null
+++ b/src/test/osd/TestPGLog.cc
@@ -0,0 +1,3243 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com>
+ *
+ * Author: Loic Dachary <loic@dachary.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library Public License for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <signal.h>
+#include "gtest/gtest.h"
+#include "osd/PGLog.h"
+#include "osd/OSDMap.h"
+#include "include/coredumpctl.h"
+#include "../objectstore/store_test_fixture.h"
+
+
+struct PGLogTestBase {
+ static hobject_t mk_obj(unsigned id) {
+ hobject_t hoid;
+ stringstream ss;
+ ss << "obj_" << id;
+ hoid.oid = ss.str();
+ hoid.set_hash(id);
+ hoid.pool = 1;
+ return hoid;
+ }
+ static eversion_t mk_evt(unsigned ep, unsigned v) {
+ return eversion_t(ep, v);
+ }
+ static pg_log_entry_t mk_ple_mod(
+ const hobject_t &hoid, eversion_t v, eversion_t pv, osd_reqid_t reqid) {
+ pg_log_entry_t e;
+ e.mark_unrollbackable();
+ e.op = pg_log_entry_t::MODIFY;
+ e.soid = hoid;
+ e.version = v;
+ e.prior_version = pv;
+ e.reqid = reqid;
+ return e;
+ }
+ static pg_log_entry_t mk_ple_dt(
+ const hobject_t &hoid, eversion_t v, eversion_t pv, osd_reqid_t reqid) {
+ pg_log_entry_t e;
+ e.mark_unrollbackable();
+ e.op = pg_log_entry_t::DELETE;
+ e.soid = hoid;
+ e.version = v;
+ e.prior_version = pv;
+ e.reqid = reqid;
+ return e;
+ }
+ static pg_log_entry_t mk_ple_ldt(
+ const hobject_t &hoid, eversion_t v, eversion_t pv) {
+ pg_log_entry_t e;
+ e.mark_unrollbackable();
+ e.op = pg_log_entry_t::LOST_DELETE;
+ e.soid = hoid;
+ e.version = v;
+ e.prior_version = pv;
+ return e;
+ }
+ static pg_log_entry_t mk_ple_mod_rb(
+ const hobject_t &hoid, eversion_t v, eversion_t pv, osd_reqid_t reqid) {
+ pg_log_entry_t e;
+ e.op = pg_log_entry_t::MODIFY;
+ e.soid = hoid;
+ e.version = v;
+ e.prior_version = pv;
+ e.reqid = reqid;
+ return e;
+ }
+ static pg_log_entry_t mk_ple_dt_rb(
+ const hobject_t &hoid, eversion_t v, eversion_t pv, osd_reqid_t reqid) {
+ pg_log_entry_t e;
+ e.op = pg_log_entry_t::DELETE;
+ e.soid = hoid;
+ e.version = v;
+ e.prior_version = pv;
+ e.reqid = reqid;
+ return e;
+ }
+ static pg_log_entry_t mk_ple_err(
+ const hobject_t &hoid, eversion_t v, osd_reqid_t reqid) {
+ pg_log_entry_t e;
+ e.op = pg_log_entry_t::ERROR;
+ e.soid = hoid;
+ e.version = v;
+ e.prior_version = eversion_t(0, 0);
+ e.reqid = reqid;
+ return e;
+ }
+ static pg_log_entry_t mk_ple_mod(
+ const hobject_t &hoid, eversion_t v, eversion_t pv) {
+ return mk_ple_mod(hoid, v, pv, osd_reqid_t());
+ }
+ static pg_log_entry_t mk_ple_dt(
+ const hobject_t &hoid, eversion_t v, eversion_t pv) {
+ return mk_ple_dt(hoid, v, pv, osd_reqid_t());
+ }
+ static pg_log_entry_t mk_ple_mod_rb(
+ const hobject_t &hoid, eversion_t v, eversion_t pv) {
+ return mk_ple_mod_rb(hoid, v, pv, osd_reqid_t());
+ }
+ static pg_log_entry_t mk_ple_dt_rb(
+ const hobject_t &hoid, eversion_t v, eversion_t pv) {
+ return mk_ple_dt_rb(hoid, v, pv, osd_reqid_t());
+ }
+ static pg_log_entry_t mk_ple_err(
+ const hobject_t &hoid, eversion_t v) {
+ return mk_ple_err(hoid, v, osd_reqid_t());
+ }
+}; // PGLogTestBase
+
+
+class PGLogTest : virtual public ::testing::Test, protected PGLog, public PGLogTestBase {
+public:
+ PGLogTest() : PGLog(g_ceph_context) {}
+ void SetUp() override {
+ missing.may_include_deletes = true;
+ }
+
+#include "common/ceph_context.h"
+#include "common/config.h"
+
+ void TearDown() override {
+ clear();
+ }
+
+
+ struct TestCase {
+ list<pg_log_entry_t> base;
+ list<pg_log_entry_t> auth;
+ list<pg_log_entry_t> div;
+
+ pg_missing_t init;
+ pg_missing_t final;
+
+ set<hobject_t> toremove;
+ list<pg_log_entry_t> torollback;
+ bool deletes_during_peering;
+
+ private:
+ IndexedLog fullauth;
+ IndexedLog fulldiv;
+ pg_info_t authinfo;
+ pg_info_t divinfo;
+ public:
+ TestCase() : deletes_during_peering(false) {}
+ void setup() {
+ init.may_include_deletes = !deletes_during_peering;
+ final.may_include_deletes = !deletes_during_peering;
+ fullauth.log.insert(fullauth.log.end(), base.begin(), base.end());
+ fullauth.log.insert(fullauth.log.end(), auth.begin(), auth.end());
+ fulldiv.log.insert(fulldiv.log.end(), base.begin(), base.end());
+ fulldiv.log.insert(fulldiv.log.end(), div.begin(), div.end());
+
+ fullauth.head = authinfo.last_update = fullauth.log.rbegin()->version;
+ authinfo.last_complete = fullauth.log.rbegin()->version;
+ authinfo.log_tail = fullauth.log.begin()->version;
+ authinfo.log_tail.version--;
+ fullauth.tail = authinfo.log_tail;
+ authinfo.last_backfill = hobject_t::get_max();
+
+ fulldiv.head = divinfo.last_update = fulldiv.log.rbegin()->version;
+ divinfo.last_complete = eversion_t();
+ divinfo.log_tail = fulldiv.log.begin()->version;
+ divinfo.log_tail.version--;
+ fulldiv.tail = divinfo.log_tail;
+ divinfo.last_backfill = hobject_t::get_max();
+
+ if (init.get_items().empty()) {
+ divinfo.last_complete = divinfo.last_update;
+ } else {
+ eversion_t fmissing = init.get_items().at(init.get_rmissing().begin()->second).need;
+ for (list<pg_log_entry_t>::const_iterator i = fulldiv.log.begin();
+ i != fulldiv.log.end();
+ ++i) {
+ if (i->version < fmissing)
+ divinfo.last_complete = i->version;
+ else
+ break;
+ }
+ }
+
+ fullauth.index();
+ fulldiv.index();
+ }
+ void set_div_bounds(eversion_t head, eversion_t tail) {
+ fulldiv.tail = divinfo.log_tail = tail;
+ fulldiv.head = divinfo.last_update = head;
+ }
+ void set_auth_bounds(eversion_t head, eversion_t tail) {
+ fullauth.tail = authinfo.log_tail = tail;
+ fullauth.head = authinfo.last_update = head;
+ }
+ const IndexedLog &get_fullauth() const { return fullauth; }
+ const IndexedLog &get_fulldiv() const { return fulldiv; }
+ const pg_info_t &get_authinfo() const { return authinfo; }
+ const pg_info_t &get_divinfo() const { return divinfo; }
+ }; // struct TestCase
+
+ struct LogHandler : public PGLog::LogEntryHandler {
+ set<hobject_t> removed;
+ list<pg_log_entry_t> rolledback;
+
+ void rollback(
+ const pg_log_entry_t &entry) override {
+ rolledback.push_back(entry);
+ }
+ void rollforward(
+ const pg_log_entry_t &entry) override {}
+ void remove(
+ const hobject_t &hoid) override {
+ removed.insert(hoid);
+ }
+ void try_stash(const hobject_t &, version_t) override {
+ // lost/unfound cases are not tested yet
+ }
+ void trim(
+ const pg_log_entry_t &entry) override {}
+ };
+
+ template <typename missing_t>
+ void verify_missing(
+ const TestCase &tcase,
+ const missing_t &missing) {
+ ASSERT_EQ(tcase.final.get_items().size(), missing.get_items().size());
+ for (auto i = missing.get_items().begin();
+ i != missing.get_items().end();
+ ++i) {
+ EXPECT_TRUE(tcase.final.get_items().count(i->first));
+ EXPECT_EQ(tcase.final.get_items().find(i->first)->second.need, i->second.need);
+ EXPECT_EQ(tcase.final.get_items().find(i->first)->second.have, i->second.have);
+ }
+ bool correct = missing.debug_verify_from_init(tcase.init, &(std::cout));
+ ASSERT_TRUE(correct);
+ }
+
+ void verify_sideeffects(
+ const TestCase &tcase,
+ const LogHandler &handler) {
+ ASSERT_EQ(tcase.toremove.size(), handler.removed.size());
+ ASSERT_EQ(tcase.torollback.size(), handler.rolledback.size());
+
+ {
+ list<pg_log_entry_t>::const_iterator titer = tcase.torollback.begin();
+ list<pg_log_entry_t>::const_iterator hiter = handler.rolledback.begin();
+ for (; titer != tcase.torollback.end(); ++titer, ++hiter) {
+ EXPECT_EQ(titer->version, hiter->version);
+ }
+ }
+
+ {
+ set<hobject_t>::const_iterator titer = tcase.toremove.begin();
+ set<hobject_t>::const_iterator hiter = handler.removed.begin();
+ for (; titer != tcase.toremove.end(); ++titer, ++hiter) {
+ EXPECT_EQ(*titer, *hiter);
+ }
+ }
+ }
+
+ void test_merge_log(const TestCase &tcase) {
+ clear();
+ log = tcase.get_fulldiv();
+ pg_info_t info = tcase.get_divinfo();
+
+ missing = tcase.init;
+ missing.flush();
+
+ IndexedLog olog;
+ olog = tcase.get_fullauth();
+ pg_info_t oinfo = tcase.get_authinfo();
+
+ LogHandler h;
+ bool dirty_info = false;
+ bool dirty_big_info = false;
+ merge_log(
+ oinfo, olog, pg_shard_t(1, shard_id_t(0)), info,
+ &h, dirty_info, dirty_big_info);
+
+ ASSERT_EQ(info.last_update, oinfo.last_update);
+ verify_missing(tcase, missing);
+ verify_sideeffects(tcase, h);
+ }
+
+ void test_proc_replica_log(const TestCase &tcase) {
+ clear();
+ log = tcase.get_fullauth();
+ pg_info_t info = tcase.get_authinfo();
+
+ pg_missing_t omissing = tcase.init;
+
+ IndexedLog olog;
+ olog = tcase.get_fulldiv();
+ pg_info_t oinfo = tcase.get_divinfo();
+
+ proc_replica_log(
+ oinfo, olog, omissing, pg_shard_t(1, shard_id_t(0)));
+
+ ceph_assert(oinfo.last_update >= log.tail);
+
+ if (!tcase.base.empty()) {
+ ASSERT_EQ(tcase.base.rbegin()->version, oinfo.last_update);
+ }
+
+ for (list<pg_log_entry_t>::const_iterator i = tcase.auth.begin();
+ i != tcase.auth.end();
+ ++i) {
+ if (i->version > oinfo.last_update) {
+ if (i->is_delete() && tcase.deletes_during_peering) {
+ omissing.rm(i->soid, i->version);
+ } else {
+ omissing.add_next_event(*i);
+ }
+ }
+ }
+ verify_missing(tcase, omissing);
+ } // test_proc_replica_log
+
+ void run_test_case(const TestCase &tcase) {
+ test_merge_log(tcase);
+ test_proc_replica_log(tcase);
+ }
+}; // class PGLogTest
+
+struct TestHandler : public PGLog::LogEntryHandler {
+ list<hobject_t> &removed;
+ explicit TestHandler(list<hobject_t> &removed) : removed(removed) {}
+
+ void rollback(
+ const pg_log_entry_t &entry) override {}
+ void rollforward(
+ const pg_log_entry_t &entry) override {}
+ void remove(
+ const hobject_t &hoid) override {
+ removed.push_back(hoid);
+ }
+ void cant_rollback(const pg_log_entry_t &entry) {}
+ void try_stash(const hobject_t &, version_t) override {
+ // lost/unfound cases are not tested yet
+ }
+ void trim(
+ const pg_log_entry_t &entry) override {}
+};
+
+TEST_F(PGLogTest, rewind_divergent_log) {
+ /* +----------------+
+ | log |
+ +--------+-------+
+ | |object |
+ |version | hash |
+ | | |
+ tail > (1,1) | x5 |
+ | | |
+ | | |
+ | (1,4) | x9 < newhead
+ | MODIFY | |
+ | | |
+ head > (1,5) | x9 |
+ | DELETE | |
+ | | |
+ +--------+-------+
+
+ */
+ {
+ clear();
+
+ pg_info_t info;
+ list<hobject_t> remove_snap;
+ bool dirty_info = false;
+ bool dirty_big_info = false;
+
+ hobject_t divergent_object;
+ eversion_t divergent_version;
+ eversion_t newhead;
+
+ hobject_t divergent;
+ divergent.set_hash(0x9);
+
+ {
+ pg_log_entry_t e;
+ e.mark_unrollbackable();
+
+ e.version = eversion_t(1, 1);
+ e.soid.set_hash(0x5);
+ log.tail = e.version;
+ log.log.push_back(e);
+ e.version = newhead = eversion_t(1, 4);
+ e.soid = divergent;
+ e.op = pg_log_entry_t::MODIFY;
+ log.log.push_back(e);
+ e.version = divergent_version = eversion_t(1, 5);
+ e.prior_version = eversion_t(1, 4);
+ e.soid = divergent;
+ divergent_object = e.soid;
+ e.op = pg_log_entry_t::DELETE;
+ log.log.push_back(e);
+ log.head = e.version;
+ log.index();
+
+ info.last_update = log.head;
+ info.last_complete = log.head;
+ }
+
+ EXPECT_FALSE(missing.have_missing());
+ EXPECT_EQ(3U, log.log.size());
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_EQ(log.head, info.last_update);
+ EXPECT_EQ(log.head, info.last_complete);
+ EXPECT_FALSE(is_dirty());
+ EXPECT_FALSE(dirty_info);
+ EXPECT_FALSE(dirty_big_info);
+
+ TestHandler h(remove_snap);
+ rewind_divergent_log(newhead, info, &h,
+ dirty_info, dirty_big_info);
+
+ EXPECT_TRUE(log.objects.count(divergent));
+ EXPECT_TRUE(missing.is_missing(divergent_object));
+ EXPECT_EQ(1U, log.objects.count(divergent_object));
+ EXPECT_EQ(2U, log.log.size());
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_EQ(newhead, info.last_update);
+ EXPECT_EQ(newhead, info.last_complete);
+ EXPECT_TRUE(is_dirty());
+ EXPECT_TRUE(dirty_info);
+ EXPECT_TRUE(dirty_big_info);
+ }
+
+ /* +----------------+
+ | log |
+ +--------+-------+
+ | |object |
+ |version | hash |
+ | | |
+ tail > (1,1) | NULL |
+ | | |
+ | (1,4) | NULL < newhead
+ | | |
+ head > (1,5) | x9 |
+ | | |
+ +--------+-------+
+
+ */
+ {
+ clear();
+
+ pg_info_t info;
+ list<hobject_t> remove_snap;
+ bool dirty_info = false;
+ bool dirty_big_info = false;
+
+ hobject_t divergent_object;
+ eversion_t divergent_version;
+ eversion_t prior_version;
+ eversion_t newhead;
+ {
+ pg_log_entry_t e;
+ e.mark_unrollbackable();
+
+ info.log_tail = log.tail = eversion_t(1, 1);
+ newhead = eversion_t(1, 3);
+ e.version = divergent_version = eversion_t(1, 5);
+ e.soid.set_hash(0x9);
+ divergent_object = e.soid;
+ e.op = pg_log_entry_t::DELETE;
+ e.prior_version = prior_version = eversion_t(0, 2);
+ log.log.push_back(e);
+ log.head = e.version;
+ }
+
+ EXPECT_FALSE(missing.have_missing());
+ EXPECT_EQ(1U, log.log.size());
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_FALSE(is_dirty());
+ EXPECT_FALSE(dirty_info);
+ EXPECT_FALSE(dirty_big_info);
+
+ TestHandler h(remove_snap);
+ rewind_divergent_log(newhead, info, &h,
+ dirty_info, dirty_big_info);
+
+ EXPECT_TRUE(missing.is_missing(divergent_object));
+ EXPECT_EQ(0U, log.objects.count(divergent_object));
+ EXPECT_TRUE(log.empty());
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_TRUE(is_dirty());
+ EXPECT_TRUE(dirty_info);
+ EXPECT_TRUE(dirty_big_info);
+ }
+
+ // Test for 13965
+ {
+ clear();
+
+ list<hobject_t> remove_snap;
+ pg_info_t info;
+ info.log_tail = log.tail = eversion_t(1, 5);
+ info.last_update = eversion_t(1, 6);
+ bool dirty_info = false;
+ bool dirty_big_info = false;
+
+ {
+ pg_log_entry_t e;
+ e.mark_unrollbackable();
+ e.version = eversion_t(1, 5);
+ e.soid.set_hash(0x9);
+ add(e);
+ }
+ {
+ pg_log_entry_t e;
+ e.mark_unrollbackable();
+ e.version = eversion_t(1, 6);
+ e.soid.set_hash(0x10);
+ add(e);
+ }
+ TestHandler h(remove_snap);
+ roll_forward_to(eversion_t(1, 6), &h);
+ rewind_divergent_log(eversion_t(1, 5), info, &h,
+ dirty_info, dirty_big_info);
+ pg_log_t log;
+ reset_backfill_claim_log(log, &h);
+ }
+}
+
+TEST_F(PGLogTest, merge_old_entry) {
+ // entries > last_backfill are silently ignored
+ {
+ clear();
+
+ ObjectStore::Transaction t;
+ pg_log_entry_t oe;
+ oe.mark_unrollbackable();
+ pg_info_t info;
+ list<hobject_t> remove_snap;
+
+ info.last_backfill = hobject_t();
+ info.last_backfill.set_hash(100);
+ oe.soid.set_hash(2);
+ ASSERT_GT(oe.soid, info.last_backfill);
+
+ EXPECT_FALSE(is_dirty());
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_TRUE(t.empty());
+ EXPECT_FALSE(missing.have_missing());
+ EXPECT_TRUE(log.empty());
+
+ TestHandler h(remove_snap);
+ merge_old_entry(t, oe, info, &h);
+
+ EXPECT_FALSE(is_dirty());
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_TRUE(t.empty());
+ EXPECT_FALSE(missing.have_missing());
+ EXPECT_TRUE(log.empty());
+ }
+
+ // the new entry (from the logs) has a version that is higher than
+ // the old entry (from the log entry given in argument) : do
+ // nothing and return false
+ {
+ clear();
+
+ ObjectStore::Transaction t;
+ pg_info_t info;
+ list<hobject_t> remove_snap;
+
+ pg_log_entry_t ne;
+ ne.mark_unrollbackable();
+ ne.version = eversion_t(2,1);
+ log.add(ne);
+
+ EXPECT_FALSE(is_dirty());
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_TRUE(t.empty());
+ EXPECT_FALSE(missing.have_missing());
+ EXPECT_EQ(1U, log.log.size());
+ EXPECT_EQ(ne.version, log.log.front().version);
+
+ // the newer entry ( from the logs ) can be DELETE
+ {
+ log.log.front().op = pg_log_entry_t::DELETE;
+ pg_log_entry_t oe;
+ oe.mark_unrollbackable();
+ oe.version = eversion_t(1,1);
+
+ TestHandler h(remove_snap);
+ merge_old_entry(t, oe, info, &h);
+ }
+
+ // if the newer entry is not DELETE, the object must be in missing
+ {
+ pg_log_entry_t &ne = log.log.front();
+ ne.op = pg_log_entry_t::MODIFY;
+ missing.add_next_event(ne);
+ pg_log_entry_t oe;
+ oe.mark_unrollbackable();
+ oe.version = eversion_t(1,1);
+
+ TestHandler h(remove_snap);
+ merge_old_entry(t, oe, info, &h);
+
+ missing.rm(ne.soid, ne.version);
+ }
+
+ missing.flush();
+ EXPECT_FALSE(is_dirty());
+ EXPECT_FALSE(remove_snap.empty());
+ EXPECT_TRUE(t.empty());
+ EXPECT_FALSE(missing.have_missing());
+ EXPECT_EQ(1U, log.log.size());
+ EXPECT_EQ(ne.version, log.log.front().version);
+
+ }
+
+ // the new entry (from the logs) has a version that is lower than
+ // the old entry (from the log entry given in argument) and
+ // old and new are delete : do nothing and return false
+ {
+ clear();
+
+ ObjectStore::Transaction t;
+ pg_log_entry_t oe;
+ oe.mark_unrollbackable();
+ pg_info_t info;
+ list<hobject_t> remove_snap;
+
+ pg_log_entry_t ne;
+ ne.mark_unrollbackable();
+ ne.version = eversion_t(1,1);
+ ne.op = pg_log_entry_t::DELETE;
+ log.add(ne);
+
+ oe.version = eversion_t(2,1);
+ oe.op = pg_log_entry_t::DELETE;
+
+ EXPECT_FALSE(is_dirty());
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_TRUE(t.empty());
+ EXPECT_FALSE(missing.have_missing());
+ EXPECT_EQ(1U, log.log.size());
+
+ TestHandler h(remove_snap);
+ merge_old_entry(t, oe, info, &h);
+
+ EXPECT_FALSE(is_dirty());
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_TRUE(t.empty());
+ EXPECT_FALSE(missing.have_missing());
+ EXPECT_EQ(1U, log.log.size());
+ }
+
+ // the new entry (from the logs) has a version that is lower than
+ // the old entry (from the log entry given in argument) and
+ // old is update and new is DELETE :
+ // if the object is in missing, it is removed
+ {
+ clear();
+
+ ObjectStore::Transaction t;
+ pg_log_entry_t oe;
+ oe.mark_unrollbackable();
+ pg_info_t info;
+ list<hobject_t> remove_snap;
+
+ pg_log_entry_t ne;
+ ne.mark_unrollbackable();
+ ne.version = eversion_t(1,1);
+ ne.op = pg_log_entry_t::DELETE;
+ log.add(ne);
+
+ oe.version = eversion_t(2,1);
+ oe.op = pg_log_entry_t::MODIFY;
+ missing.add_next_event(oe);
+
+ missing.flush();
+ EXPECT_FALSE(is_dirty());
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_TRUE(t.empty());
+ EXPECT_TRUE(missing.is_missing(oe.soid));
+ EXPECT_EQ(1U, log.log.size());
+
+ TestHandler h(remove_snap);
+ merge_old_entry(t, oe, info, &h);
+
+ missing.flush();
+ EXPECT_FALSE(is_dirty());
+ EXPECT_TRUE(remove_snap.size() > 0);
+ EXPECT_TRUE(t.empty());
+ EXPECT_FALSE(missing.have_missing());
+ EXPECT_EQ(1U, log.log.size());
+ }
+
+ // there is no new entry (from the logs) and
+ // the old entry (from the log entry given in argument) is not a CLONE and
+ // the old entry prior_version is greater than the tail of the log :
+ // do nothing and return false
+ {
+ clear();
+
+ ObjectStore::Transaction t;
+ pg_log_entry_t oe;
+ oe.mark_unrollbackable();
+ pg_info_t info;
+ list<hobject_t> remove_snap;
+
+ info.log_tail = eversion_t(1,1);
+ oe.op = pg_log_entry_t::MODIFY;
+ oe.prior_version = eversion_t(2,1);
+ missing_add(oe.soid, oe.prior_version, eversion_t());
+
+ missing.flush();
+ EXPECT_FALSE(is_dirty());
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_TRUE(t.empty());
+ EXPECT_TRUE(log.empty());
+
+ TestHandler h(remove_snap);
+ merge_old_entry(t, oe, info, &h);
+
+ missing.flush();
+ EXPECT_FALSE(is_dirty());
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_TRUE(t.empty());
+ EXPECT_TRUE(log.empty());
+ }
+
+ // there is no new entry (from the logs) and
+ // the old entry (from the log entry given in argument) is not a CLONE and
+ // the old entry (from the log entry given in argument) is not a DELETE and
+ // the old entry prior_version is lower than the tail of the log :
+ // add the old object to the remove_snap list and
+ // add the old object to divergent priors and
+ // add or update the prior_version of the object to missing and
+ // return false
+ {
+ clear();
+
+ ObjectStore::Transaction t;
+ pg_log_entry_t oe;
+ oe.mark_unrollbackable();
+ pg_info_t info;
+ list<hobject_t> remove_snap;
+
+ info.log_tail = eversion_t(2,1);
+ oe.soid.set_hash(1);
+ oe.op = pg_log_entry_t::MODIFY;
+ oe.prior_version = eversion_t(1,1);
+
+ EXPECT_FALSE(is_dirty());
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_TRUE(t.empty());
+ EXPECT_FALSE(missing.have_missing());
+ EXPECT_TRUE(log.empty());
+
+ TestHandler h(remove_snap);
+ merge_old_entry(t, oe, info, &h);
+
+ EXPECT_TRUE(is_dirty());
+ EXPECT_EQ(oe.soid, remove_snap.front());
+ EXPECT_TRUE(t.empty());
+ EXPECT_TRUE(missing.is_missing(oe.soid));
+ EXPECT_TRUE(log.empty());
+ }
+
+ // there is no new entry (from the logs) and
+ // the old entry (from the log entry given in argument) is not a CLONE and
+ // the old entry (from the log entry given in argument) is a DELETE and
+ // the old entry prior_version is lower than the tail of the log :
+ // add the old object to divergent priors and
+ // add or update the prior_version of the object to missing and
+ // return false
+ {
+ clear();
+
+ ObjectStore::Transaction t;
+ pg_log_entry_t oe;
+ oe.mark_unrollbackable();
+ pg_info_t info;
+ list<hobject_t> remove_snap;
+
+ info.log_tail = eversion_t(2,1);
+ oe.soid.set_hash(1);
+ oe.op = pg_log_entry_t::DELETE;
+ oe.prior_version = eversion_t(1,1);
+
+ EXPECT_FALSE(is_dirty());
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_TRUE(t.empty());
+ EXPECT_FALSE(missing.have_missing());
+ EXPECT_TRUE(log.empty());
+
+ TestHandler h(remove_snap);
+ merge_old_entry(t, oe, info, &h);
+
+ EXPECT_TRUE(is_dirty());
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_TRUE(t.empty());
+ EXPECT_TRUE(missing.is_missing(oe.soid));
+ EXPECT_TRUE(log.empty());
+ }
+
+
+ // there is no new entry (from the logs) and
+ // the old entry (from the log entry given in argument) is not a CLONE and
+ // the old entry (from the log entry given in argument) is not a DELETE and
+ // the old entry prior_version is eversion_t() :
+ // add the old object to the remove_snap list and
+ // remove the prior_version of the object from missing, if any and
+ // return false
+ {
+ clear();
+
+ ObjectStore::Transaction t;
+ pg_log_entry_t oe;
+ oe.mark_unrollbackable();
+ pg_info_t info;
+ list<hobject_t> remove_snap;
+
+ info.log_tail = eversion_t(10,1);
+ oe.soid.set_hash(1);
+ oe.op = pg_log_entry_t::MODIFY;
+ oe.prior_version = eversion_t();
+
+ missing.add(oe.soid, eversion_t(1,1), eversion_t(), false);
+
+ missing.flush();
+ EXPECT_FALSE(is_dirty());
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_TRUE(t.empty());
+ EXPECT_TRUE(missing.is_missing(oe.soid));
+ EXPECT_TRUE(log.empty());
+
+ TestHandler h(remove_snap);
+ merge_old_entry(t, oe, info, &h);
+
+ missing.flush();
+ EXPECT_FALSE(is_dirty());
+ EXPECT_EQ(oe.soid, remove_snap.front());
+ EXPECT_TRUE(t.empty());
+ EXPECT_FALSE(missing.have_missing());
+ EXPECT_TRUE(log.empty());
+ }
+
+}
+
+TEST_F(PGLogTest, merge_log) {
+ // head and tail match, last_backfill is set:
+ // noop
+ {
+ clear();
+
+ pg_log_t olog;
+ pg_info_t oinfo;
+ pg_shard_t fromosd;
+ pg_info_t info;
+ list<hobject_t> remove_snap;
+ bool dirty_info = false;
+ bool dirty_big_info = false;
+
+ hobject_t last_backfill(object_t("oname"), string("key"), 1, 234, 1, "");
+ info.last_backfill = last_backfill;
+ eversion_t stat_version(10, 1);
+ info.stats.version = stat_version;
+ log.tail = olog.tail = eversion_t(1, 1);
+ log.head = olog.head = eversion_t(2, 1);
+
+ EXPECT_FALSE(missing.have_missing());
+ EXPECT_EQ(0U, log.log.size());
+ EXPECT_EQ(stat_version, info.stats.version);
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_EQ(last_backfill, info.last_backfill);
+ EXPECT_TRUE(info.purged_snaps.empty());
+ EXPECT_FALSE(is_dirty());
+ EXPECT_FALSE(dirty_info);
+ EXPECT_FALSE(dirty_big_info);
+
+ TestHandler h(remove_snap);
+ merge_log(oinfo, olog, fromosd, info, &h,
+ dirty_info, dirty_big_info);
+
+ EXPECT_FALSE(missing.have_missing());
+ EXPECT_EQ(0U, log.log.size());
+ EXPECT_EQ(stat_version, info.stats.version);
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_TRUE(info.purged_snaps.empty());
+ EXPECT_FALSE(is_dirty());
+ EXPECT_FALSE(dirty_info);
+ EXPECT_FALSE(dirty_big_info);
+ }
+
+ // head and tail match, last_backfill is not set: info.stats is
+ // copied from oinfo.stats but info.stats.reported_* is guaranteed to
+ // never be replaced by a lower version
+ {
+ clear();
+
+ pg_log_t olog;
+ pg_info_t oinfo;
+ pg_shard_t fromosd;
+ pg_info_t info;
+ list<hobject_t> remove_snap;
+ bool dirty_info = false;
+ bool dirty_big_info = false;
+
+ eversion_t stat_version(10, 1);
+ oinfo.stats.version = stat_version;
+ info.stats.reported_seq = 1;
+ info.stats.reported_epoch = 10;
+ oinfo.stats.reported_seq = 1;
+ oinfo.stats.reported_epoch = 1;
+ log.tail = olog.tail = eversion_t(1, 1);
+ log.head = olog.head = eversion_t(2, 1);
+ missing.may_include_deletes = false;
+
+ EXPECT_FALSE(missing.have_missing());
+ EXPECT_EQ(0U, log.log.size());
+ EXPECT_EQ(eversion_t(), info.stats.version);
+ EXPECT_EQ(1ull, info.stats.reported_seq);
+ EXPECT_EQ(10u, info.stats.reported_epoch);
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_TRUE(info.last_backfill.is_max());
+ EXPECT_TRUE(info.purged_snaps.empty());
+ EXPECT_FALSE(is_dirty());
+ EXPECT_FALSE(dirty_info);
+ EXPECT_FALSE(dirty_big_info);
+
+ TestHandler h(remove_snap);
+ merge_log(oinfo, olog, fromosd, info, &h,
+ dirty_info, dirty_big_info);
+
+ EXPECT_FALSE(missing.have_missing());
+ EXPECT_EQ(0U, log.log.size());
+ EXPECT_EQ(stat_version, info.stats.version);
+ EXPECT_EQ(1ull, info.stats.reported_seq);
+ EXPECT_EQ(10u, info.stats.reported_epoch);
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_TRUE(info.purged_snaps.empty());
+ EXPECT_FALSE(is_dirty());
+ EXPECT_FALSE(dirty_info);
+ EXPECT_FALSE(dirty_big_info);
+ }
+
+ /* Before
+ +--------------------------+
+ | log olog |
+ +--------+-------+---------+
+ | |object | |
+ |version | hash | version |
+ | | | |
+ | | x5 | (1,1) < tail
+ | | | |
+ | | | |
+ tail > (1,4) | x7 | |
+ | | | |
+ | | | |
+ head > (1,5) | x9 | (1,5) < head
+ | | | |
+ | | | |
+ +--------+-------+---------+
+
+ After
+ +-----------------
+ | log |
+ +--------+-------+
+ | |object |
+ |version | hash |
+ | | |
+ tail > (1,1) | x5 |
+ | | |
+ | | |
+ | (1,4) | x7 |
+ | | |
+ | | |
+ head > (1,5) | x9 |
+ | | |
+ | | |
+ +--------+-------+
+ */
+ {
+ clear();
+
+ pg_log_t olog;
+ pg_info_t oinfo;
+ pg_shard_t fromosd;
+ pg_info_t info;
+ list<hobject_t> remove_snap;
+ bool dirty_info = false;
+ bool dirty_big_info = false;
+ missing.may_include_deletes = false;
+
+ {
+ pg_log_entry_t e;
+ e.mark_unrollbackable();
+
+ e.version = eversion_t(1, 4);
+ e.soid.set_hash(0x5);
+ log.tail = e.version;
+ log.log.push_back(e);
+ e.version = eversion_t(1, 5);
+ e.soid.set_hash(0x9);
+ log.log.push_back(e);
+ log.head = e.version;
+ log.index();
+
+ info.last_update = log.head;
+
+ e.version = eversion_t(1, 1);
+ e.soid.set_hash(0x5);
+ olog.tail = e.version;
+ olog.log.push_back(e);
+ e.version = eversion_t(1, 5);
+ e.soid.set_hash(0x9);
+ olog.log.push_back(e);
+ olog.head = e.version;
+ }
+
+ hobject_t last_backfill(object_t("oname"), string("key"), 1, 234, 1, "");
+ info.last_backfill = last_backfill;
+ eversion_t stat_version(10, 1);
+ info.stats.version = stat_version;
+
+ EXPECT_FALSE(missing.have_missing());
+ EXPECT_EQ(2U, log.log.size());
+ EXPECT_EQ(stat_version, info.stats.version);
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_EQ(last_backfill, info.last_backfill);
+ EXPECT_TRUE(info.purged_snaps.empty());
+ EXPECT_FALSE(is_dirty());
+ EXPECT_FALSE(dirty_info);
+ EXPECT_FALSE(dirty_big_info);
+
+ TestHandler h(remove_snap);
+ merge_log(oinfo, olog, fromosd, info, &h,
+ dirty_info, dirty_big_info);
+
+ EXPECT_FALSE(missing.have_missing());
+ EXPECT_EQ(3U, log.log.size());
+ EXPECT_EQ(stat_version, info.stats.version);
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_TRUE(info.purged_snaps.empty());
+ EXPECT_TRUE(is_dirty());
+ EXPECT_TRUE(dirty_info);
+ EXPECT_TRUE(dirty_big_info);
+ }
+
+ /* +--------------------------+
+ | log olog |
+ +--------+-------+---------+
+ | |object | |
+ |version | hash | version |
+ | | | |
+ tail > (1,1) | x5 | (1,1) < tail
+ | | | |
+ | | | |
+ | (1,2) | x3 | (1,2) < lower_bound
+ | | | |
+ | | | |
+ head > (1,3) | x9 | |
+ | DELETE | | |
+ | | | |
+ | | x9 | (2,3) |
+ | | | MODIFY |
+ | | | |
+ | | x7 | (2,4) < head
+ | | | DELETE |
+ +--------+-------+---------+
+
+ The log entry (1,3) deletes the object x9 but the olog entry (2,3) modifies
+ it and is authoritative : the log entry (1,3) is divergent.
+
+ */
+ {
+ clear();
+
+ pg_log_t olog;
+ pg_info_t oinfo;
+ pg_shard_t fromosd;
+ pg_info_t info;
+ list<hobject_t> remove_snap;
+ bool dirty_info = false;
+ bool dirty_big_info = false;
+
+ hobject_t divergent_object;
+ missing.may_include_deletes = true;
+
+ {
+ pg_log_entry_t e;
+ e.mark_unrollbackable();
+
+ e.version = eversion_t(1, 1);
+ e.soid.set_hash(0x5);
+ log.tail = e.version;
+ log.log.push_back(e);
+ e.version = eversion_t(1, 2);
+ e.soid.set_hash(0x3);
+ log.log.push_back(e);
+ e.version = eversion_t(1,3);
+ e.soid.set_hash(0x9);
+ divergent_object = e.soid;
+ e.op = pg_log_entry_t::DELETE;
+ log.log.push_back(e);
+ log.head = e.version;
+ log.index();
+
+ info.last_update = log.head;
+
+ e.version = eversion_t(1, 1);
+ e.soid.set_hash(0x5);
+ olog.tail = e.version;
+ olog.log.push_back(e);
+ e.version = eversion_t(1, 2);
+ e.soid.set_hash(0x3);
+ olog.log.push_back(e);
+ e.version = eversion_t(2, 3);
+ e.soid.set_hash(0x9);
+ e.op = pg_log_entry_t::MODIFY;
+ olog.log.push_back(e);
+ e.version = eversion_t(2, 4);
+ e.soid.set_hash(0x7);
+ e.op = pg_log_entry_t::DELETE;
+ olog.log.push_back(e);
+ olog.head = e.version;
+ }
+
+ snapid_t purged_snap(1);
+ {
+ oinfo.last_update = olog.head;
+ oinfo.purged_snaps.insert(purged_snap);
+ }
+
+ EXPECT_FALSE(missing.have_missing());
+ EXPECT_EQ(1U, log.objects.count(divergent_object));
+ EXPECT_EQ(3U, log.log.size());
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_EQ(log.head, info.last_update);
+ EXPECT_TRUE(info.purged_snaps.empty());
+ EXPECT_FALSE(is_dirty());
+ EXPECT_FALSE(dirty_info);
+ EXPECT_FALSE(dirty_big_info);
+
+ TestHandler h(remove_snap);
+ merge_log(oinfo, olog, fromosd, info, &h,
+ dirty_info, dirty_big_info);
+
+ /* When the divergent entry is a DELETE and the authoritative
+ entry is a MODIFY, the object will be added to missing : it is
+ a verifiable side effect proving the entry was identified
+ to be divergent.
+ */
+ EXPECT_TRUE(missing.is_missing(divergent_object));
+ EXPECT_EQ(1U, log.objects.count(divergent_object));
+ EXPECT_EQ(4U, log.log.size());
+ /* DELETE entries from olog that are appended to the hed of the
+ log, and the divergent version of the object is removed (added
+ to remove_snap)
+ */
+ EXPECT_EQ(0x9U, remove_snap.front().get_hash());
+ EXPECT_EQ(log.head, info.last_update);
+ EXPECT_TRUE(info.purged_snaps.contains(purged_snap));
+ EXPECT_TRUE(is_dirty());
+ EXPECT_TRUE(dirty_info);
+ EXPECT_TRUE(dirty_big_info);
+ }
+
+ /* +--------------------------+
+ | log olog |
+ +--------+-------+---------+
+ | |object | |
+ |version | hash | version |
+ | | | |
+ tail > (1,1) | x5 | (1,1) < tail
+ | | | |
+ | | | |
+ | (1,2) | x3 | (1,2) < lower_bound
+ | | | |
+ | | | |
+ head > (1,3) | x9 | |
+ | DELETE | | |
+ | | | |
+ | | x9 | (2,3) |
+ | | | MODIFY |
+ | | | |
+ | | x7 | (2,4) < head
+ | | | DELETE |
+ +--------+-------+---------+
+
+ The log entry (1,3) deletes the object x9 but the olog entry (2,3) modifies
+ it and is authoritative : the log entry (1,3) is divergent.
+
+ */
+ {
+ clear();
+
+ pg_log_t olog;
+ pg_info_t oinfo;
+ pg_shard_t fromosd;
+ pg_info_t info;
+ list<hobject_t> remove_snap;
+ bool dirty_info = false;
+ bool dirty_big_info = false;
+
+ hobject_t divergent_object;
+
+ {
+ pg_log_entry_t e;
+ e.mark_unrollbackable();
+
+ e.version = eversion_t(1, 1);
+ e.soid.set_hash(0x5);
+ log.tail = e.version;
+ log.log.push_back(e);
+ e.version = eversion_t(1, 2);
+ e.soid.set_hash(0x3);
+ log.log.push_back(e);
+ e.version = eversion_t(1,3);
+ e.soid.set_hash(0x9);
+ divergent_object = e.soid;
+ e.op = pg_log_entry_t::DELETE;
+ log.log.push_back(e);
+ log.head = e.version;
+ log.index();
+
+ info.last_update = log.head;
+
+ e.version = eversion_t(1, 1);
+ e.soid.set_hash(0x5);
+ olog.tail = e.version;
+ olog.log.push_back(e);
+ e.version = eversion_t(1, 2);
+ e.soid.set_hash(0x3);
+ olog.log.push_back(e);
+ e.version = eversion_t(2, 3);
+ e.soid.set_hash(0x9);
+ e.op = pg_log_entry_t::MODIFY;
+ olog.log.push_back(e);
+ e.version = eversion_t(2, 4);
+ e.soid.set_hash(0x7);
+ e.op = pg_log_entry_t::DELETE;
+ olog.log.push_back(e);
+ olog.head = e.version;
+ }
+
+ snapid_t purged_snap(1);
+ {
+ oinfo.last_update = olog.head;
+ oinfo.purged_snaps.insert(purged_snap);
+ }
+
+ EXPECT_FALSE(missing.have_missing());
+ EXPECT_EQ(1U, log.objects.count(divergent_object));
+ EXPECT_EQ(3U, log.log.size());
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_EQ(log.head, info.last_update);
+ EXPECT_TRUE(info.purged_snaps.empty());
+ EXPECT_FALSE(is_dirty());
+ EXPECT_FALSE(dirty_info);
+ EXPECT_FALSE(dirty_big_info);
+
+ TestHandler h(remove_snap);
+ missing.may_include_deletes = false;
+ merge_log(oinfo, olog, fromosd, info, &h,
+ dirty_info, dirty_big_info);
+
+ /* When the divergent entry is a DELETE and the authoritative
+ entry is a MODIFY, the object will be added to missing : it is
+ a verifiable side effect proving the entry was identified
+ to be divergent.
+ */
+ EXPECT_TRUE(missing.is_missing(divergent_object));
+ EXPECT_EQ(1U, log.objects.count(divergent_object));
+ EXPECT_EQ(4U, log.log.size());
+ /* DELETE entries from olog that are appended to the hed of the
+ log, and the divergent version of the object is removed (added
+ to remove_snap). When peering handles deletes, it is the earlier
+ version that is in the removed list.
+ */
+ EXPECT_EQ(0x7U, remove_snap.front().get_hash());
+ EXPECT_EQ(log.head, info.last_update);
+ EXPECT_TRUE(info.purged_snaps.contains(purged_snap));
+ EXPECT_TRUE(is_dirty());
+ EXPECT_TRUE(dirty_info);
+ EXPECT_TRUE(dirty_big_info);
+ }
+
+ /* +--------------------------+
+ | log olog |
+ +--------+-------+---------+
+ | |object | |
+ |version | hash | version |
+ | | | |
+ tail > (1,1) | x5 | (1,1) < tail
+ | | | |
+ | | | |
+ | (1,4) | x7 | (1,4) < head
+ | | | |
+ | | | |
+ head > (1,5) | x9 | |
+ | | | |
+ | | | |
+ +--------+-------+---------+
+
+ The head of the log entry (1,5) is divergent because it is greater than the
+ head of olog.
+
+ */
+ {
+ clear();
+
+ pg_log_t olog;
+ pg_info_t oinfo;
+ pg_shard_t fromosd;
+ pg_info_t info;
+ list<hobject_t> remove_snap;
+ bool dirty_info = false;
+ bool dirty_big_info = false;
+
+ {
+ pg_log_entry_t e;
+ e.mark_unrollbackable();
+
+ e.version = eversion_t(1, 1);
+ e.soid.set_hash(0x5);
+ log.tail = e.version;
+ log.log.push_back(e);
+ e.version = eversion_t(1, 4);
+ e.soid.set_hash(0x7);
+ log.log.push_back(e);
+ e.version = eversion_t(1, 5);
+ e.soid.set_hash(0x9);
+ log.log.push_back(e);
+ log.head = e.version;
+ log.index();
+
+ info.last_update = log.head;
+
+ e.version = eversion_t(1, 1);
+ e.soid.set_hash(0x5);
+ olog.tail = e.version;
+ olog.log.push_back(e);
+ e.version = eversion_t(1, 4);
+ e.soid.set_hash(0x7);
+ olog.log.push_back(e);
+ olog.head = e.version;
+ }
+
+ hobject_t last_backfill(object_t("oname"), string("key"), 1, 234, 1, "");
+ info.last_backfill = last_backfill;
+ eversion_t stat_version(10, 1);
+ info.stats.version = stat_version;
+
+ EXPECT_FALSE(missing.have_missing());
+ EXPECT_EQ(3U, log.log.size());
+ EXPECT_EQ(stat_version, info.stats.version);
+ EXPECT_TRUE(remove_snap.empty());
+ EXPECT_EQ(last_backfill, info.last_backfill);
+ EXPECT_TRUE(info.purged_snaps.empty());
+ EXPECT_FALSE(is_dirty());
+ EXPECT_FALSE(dirty_info);
+ EXPECT_FALSE(dirty_big_info);
+
+ TestHandler h(remove_snap);
+ missing.may_include_deletes = false;
+ merge_log(oinfo, olog, fromosd, info, &h,
+ dirty_info, dirty_big_info);
+
+ EXPECT_FALSE(missing.have_missing());
+ EXPECT_EQ(2U, log.log.size());
+ EXPECT_EQ(stat_version, info.stats.version);
+ EXPECT_EQ(0x9U, remove_snap.front().get_hash());
+ EXPECT_TRUE(info.purged_snaps.empty());
+ EXPECT_TRUE(is_dirty());
+ EXPECT_TRUE(dirty_info);
+ EXPECT_TRUE(dirty_big_info);
+ }
+
+}
+
+TEST_F(PGLogTest, proc_replica_log) {
+ // empty log : no side effect
+ {
+ clear();
+
+ pg_log_t olog;
+ pg_info_t oinfo;
+ pg_missing_t omissing;
+ pg_shard_t from;
+
+ eversion_t last_update(1, 1);
+ log.head = olog.head = oinfo.last_update = last_update;
+ eversion_t last_complete(1, 1);
+ oinfo.last_complete = last_complete;
+
+ EXPECT_FALSE(omissing.have_missing());
+ EXPECT_EQ(last_update, oinfo.last_update);
+ EXPECT_EQ(last_complete, oinfo.last_complete);
+
+ missing.may_include_deletes = false;
+ proc_replica_log(oinfo, olog, omissing, from);
+
+ EXPECT_FALSE(omissing.have_missing());
+ EXPECT_EQ(last_update, oinfo.last_update);
+ EXPECT_EQ(last_update, oinfo.last_complete);
+ }
+
+ /* +--------------------------+
+ | log olog |
+ +--------+-------+---------+
+ | |object | |
+ |version | hash | version |
+ | | | |
+ | | x3 | (1,1) < tail
+ | | | |
+ | | | |
+ tail > (1,2) | x5 | |
+ | | | |
+ | | | |
+ head > (1,3) | x9 | |
+ | DELETE | | |
+ | | | |
+ | | x9 | (2,3) < head
+ | | | DELETE |
+ | | | |
+ +--------+-------+---------+
+
+ The log entry (1,3) deletes the object x9 and the olog entry
+ (2,3) also deletes it : do nothing. The olog tail is ignored
+ because it is before the log tail.
+
+ */
+ {
+ clear();
+
+ pg_log_t olog;
+ pg_info_t oinfo;
+ pg_missing_t omissing;
+ pg_shard_t from;
+
+ {
+ pg_log_entry_t e;
+ e.mark_unrollbackable();
+
+ e.version = eversion_t(1, 2);
+ e.soid.set_hash(0x5);
+ log.tail = e.version;
+ log.log.push_back(e);
+ e.version = eversion_t(1, 3);
+ e.soid.set_hash(0x9);
+ e.op = pg_log_entry_t::DELETE;
+ log.log.push_back(e);
+ log.head = e.version;
+ log.index();
+
+ e.version = eversion_t(1, 1);
+ e.soid.set_hash(0x3);
+ olog.tail = e.version;
+ olog.log.push_back(e);
+ e.version = eversion_t(2, 3);
+ e.soid.set_hash(0x9);
+ e.op = pg_log_entry_t::DELETE;
+ olog.log.push_back(e);
+ olog.head = e.version;
+
+ oinfo.last_update = olog.head;
+ oinfo.last_complete = olog.head;
+ }
+
+ EXPECT_FALSE(omissing.have_missing());
+ EXPECT_EQ(olog.head, oinfo.last_update);
+ EXPECT_EQ(olog.head, oinfo.last_complete);
+
+ missing.may_include_deletes = false;
+ proc_replica_log(oinfo, olog, omissing, from);
+
+ EXPECT_FALSE(omissing.have_missing());
+ }
+
+ {
+ clear();
+
+ pg_log_t olog;
+ pg_info_t oinfo;
+ pg_missing_t omissing;
+ pg_shard_t from;
+
+ hobject_t divergent_object;
+
+ {
+ pg_log_entry_t e;
+ e.mark_unrollbackable();
+
+ {
+ e.soid = divergent_object;
+ e.soid.set_hash(0x1);
+ e.version = eversion_t(1, 1);
+ log.tail = e.version;
+ log.log.push_back(e);
+
+ e.soid = divergent_object;
+ e.prior_version = eversion_t(1, 1);
+ e.version = eversion_t(1, 2);
+ log.tail = e.version;
+ log.log.push_back(e);
+
+ e.soid.set_hash(0x3);
+ e.version = eversion_t(1, 4);
+ log.log.push_back(e);
+
+ e.soid.set_hash(0x7);
+ e.version = eversion_t(1, 5);
+ log.log.push_back(e);
+
+ e.soid.set_hash(0x8);
+ e.version = eversion_t(1, 6);
+ log.log.push_back(e);
+
+ e.soid.set_hash(0x9);
+ e.op = pg_log_entry_t::DELETE;
+ e.version = eversion_t(2, 7);
+ log.log.push_back(e);
+
+ e.soid.set_hash(0xa);
+ e.version = eversion_t(2, 8);
+ log.head = e.version;
+ log.log.push_back(e);
+ }
+ log.index();
+
+ {
+ e.soid = divergent_object;
+ e.soid.set_hash(0x1);
+ e.version = eversion_t(1, 1);
+ olog.tail = e.version;
+ olog.log.push_back(e);
+
+ e.soid = divergent_object;
+ e.prior_version = eversion_t(1, 1);
+ e.version = eversion_t(1, 2);
+ olog.log.push_back(e);
+
+ e.prior_version = eversion_t(0, 0);
+ e.soid.set_hash(0x3);
+ e.version = eversion_t(1, 4);
+ olog.log.push_back(e);
+
+ e.soid.set_hash(0x7);
+ e.version = eversion_t(1, 5);
+ olog.log.push_back(e);
+
+ e.soid.set_hash(0x8);
+ e.version = eversion_t(1, 6);
+ olog.log.push_back(e);
+
+ e.soid.set_hash(0x9); // should not be added to missing, create
+ e.op = pg_log_entry_t::MODIFY;
+ e.version = eversion_t(1, 7);
+ olog.log.push_back(e);
+
+ e.soid = divergent_object; // should be added to missing at 1,2
+ e.op = pg_log_entry_t::MODIFY;
+ e.version = eversion_t(1, 8);
+ e.prior_version = eversion_t(1, 2);
+ olog.log.push_back(e);
+ olog.head = e.version;
+ }
+ oinfo.last_update = olog.head;
+ oinfo.last_complete = olog.head;
+ }
+
+ EXPECT_FALSE(omissing.have_missing());
+ EXPECT_EQ(olog.head, oinfo.last_update);
+ EXPECT_EQ(olog.head, oinfo.last_complete);
+
+ missing.may_include_deletes = false;
+ proc_replica_log(oinfo, olog, omissing, from);
+
+ EXPECT_TRUE(omissing.have_missing());
+ EXPECT_TRUE(omissing.is_missing(divergent_object));
+ EXPECT_EQ(eversion_t(1, 2), omissing.get_items().at(divergent_object).need);
+ EXPECT_EQ(eversion_t(1, 6), oinfo.last_update);
+ EXPECT_EQ(eversion_t(1, 1), oinfo.last_complete);
+ }
+
+ /* +--------------------------+
+ | olog log |
+ +--------+-------+---------+
+ | |object | |
+ |version | hash | version |
+ | | | |
+ tail > (1,1) | x9 | (1,1) < tail
+ | | | |
+ | | | |
+ | (1,2) | x3 | (1,2) |
+ | | | |
+ | | | |
+ head > (1,3) | x9 | |
+ | DELETE | | |
+ | | | |
+ | | x9 | (2,3) < head
+ | | | DELETE |
+ | | | |
+ +--------+-------+---------+
+
+ The log entry (1,3) deletes the object x9 and the olog entry
+ (2,3) also deletes it : do nothing.
+
+ */
+ {
+ clear();
+
+ pg_log_t olog;
+ pg_info_t oinfo;
+ pg_missing_t omissing;
+ pg_shard_t from;
+
+ eversion_t last_update(1, 2);
+ hobject_t divergent_object;
+ divergent_object.set_hash(0x9);
+
+ {
+ pg_log_entry_t e;
+ e.mark_unrollbackable();
+
+ e.version = eversion_t(1, 1);
+ e.soid = divergent_object;
+ log.tail = e.version;
+ log.log.push_back(e);
+ e.version = last_update;
+ e.soid.set_hash(0x3);
+ log.log.push_back(e);
+ e.version = eversion_t(2, 3);
+ e.prior_version = eversion_t(1, 1);
+ e.soid = divergent_object;
+ e.op = pg_log_entry_t::DELETE;
+ log.log.push_back(e);
+ log.head = e.version;
+ log.index();
+
+ e.version = eversion_t(1, 1);
+ e.soid = divergent_object;
+ olog.tail = e.version;
+ olog.log.push_back(e);
+ e.version = last_update;
+ e.soid.set_hash(0x3);
+ olog.log.push_back(e);
+ e.version = eversion_t(1, 3);
+ e.prior_version = eversion_t(1, 1);
+ e.soid = divergent_object;
+ e.op = pg_log_entry_t::DELETE;
+ olog.log.push_back(e);
+ olog.head = e.version;
+
+ oinfo.last_update = olog.head;
+ oinfo.last_complete = olog.head;
+ }
+
+ EXPECT_FALSE(omissing.have_missing());
+ EXPECT_EQ(olog.head, oinfo.last_update);
+ EXPECT_EQ(olog.head, oinfo.last_complete);
+
+ missing.may_include_deletes = false;
+ proc_replica_log(oinfo, olog, omissing, from);
+
+ EXPECT_TRUE(omissing.have_missing());
+ EXPECT_TRUE(omissing.is_missing(divergent_object));
+ EXPECT_EQ(omissing.get_items().at(divergent_object).have, eversion_t(0, 0));
+ EXPECT_EQ(omissing.get_items().at(divergent_object).need, eversion_t(1, 1));
+ EXPECT_EQ(last_update, oinfo.last_update);
+ }
+
+ /* +--------------------------+
+ | olog log |
+ +--------+-------+---------+
+ | |object | |
+ |version | hash | version |
+ | | | |
+ tail > (1,1) | x9 | (1,1) < tail
+ | | | |
+ | | | |
+ | (1,2) | x3 | (1,2) |
+ | | | |
+ | | | |
+ head > (1,3) | x9 | |
+ | MODIFY | | |
+ | | | |
+ | | x9 | (2,3) < head
+ | | | DELETE |
+ | | | |
+ +--------+-------+---------+
+
+ The log entry (1,3) deletes the object x9 but the olog entry
+ (2,3) modifies it : remove it from omissing.
+
+ */
+ {
+ clear();
+
+ pg_log_t olog;
+ pg_info_t oinfo;
+ pg_missing_t omissing;
+ pg_shard_t from;
+
+ eversion_t last_update(1, 2);
+ hobject_t divergent_object;
+
+ {
+ pg_log_entry_t e;
+ e.mark_unrollbackable();
+
+ e.version = eversion_t(1, 1);
+ e.soid = divergent_object;
+ log.tail = e.version;
+ log.log.push_back(e);
+ e.version = last_update;
+ e.soid.set_hash(0x3);
+ log.log.push_back(e);
+ e.version = eversion_t(2, 3);
+ e.prior_version = eversion_t(1, 1);
+ e.soid = divergent_object;
+ e.op = pg_log_entry_t::DELETE;
+ log.log.push_back(e);
+ log.head = e.version;
+ log.index();
+
+ e.version = eversion_t(1, 1);
+ e.soid = divergent_object;
+ olog.tail = e.version;
+ olog.log.push_back(e);
+ e.version = last_update;
+ e.soid.set_hash(0x3);
+ olog.log.push_back(e);
+ e.version = eversion_t(1, 3);
+ e.prior_version = eversion_t(1, 1);
+ e.soid = divergent_object;
+ divergent_object = e.soid;
+ omissing.add(divergent_object, e.version, eversion_t(), false);
+ e.op = pg_log_entry_t::MODIFY;
+ olog.log.push_back(e);
+ olog.head = e.version;
+
+ oinfo.last_update = olog.head;
+ oinfo.last_complete = olog.head;
+ }
+
+ EXPECT_TRUE(omissing.have_missing());
+ EXPECT_TRUE(omissing.is_missing(divergent_object));
+ EXPECT_EQ(eversion_t(1, 3), omissing.get_items().at(divergent_object).need);
+ EXPECT_EQ(olog.head, oinfo.last_update);
+ EXPECT_EQ(olog.head, oinfo.last_complete);
+
+ missing.may_include_deletes = false;
+ proc_replica_log(oinfo, olog, omissing, from);
+
+ EXPECT_TRUE(omissing.have_missing());
+ EXPECT_TRUE(omissing.is_missing(divergent_object));
+ EXPECT_EQ(omissing.get_items().at(divergent_object).have, eversion_t(0, 0));
+ EXPECT_EQ(omissing.get_items().at(divergent_object).need, eversion_t(1, 1));
+ EXPECT_EQ(last_update, oinfo.last_update);
+ }
+
+ /* +--------------------------+
+ | log olog |
+ +--------+-------+---------+
+ | |object | |
+ |version | hash | version |
+ | | | |
+ tail > (1,1) | x9 | (1,1) < tail
+ | | | |
+ | | | |
+ | (1,2) | x3 | (1,2) |
+ | | | |
+ | | | |
+ | | x9 | (1,3) < head
+ | | | MODIFY |
+ | | | |
+ head > (2,3) | x9 | |
+ | DELETE | | |
+ | | | |
+ +--------+-------+---------+
+
+ The log entry (2,3) deletes the object x9 but the olog entry
+ (1,3) modifies it : proc_replica_log should adjust missing to
+ 1,1 for that object until add_next_event in PG::activate processes
+ the delete.
+ */
+ {
+ clear();
+
+ pg_log_t olog;
+ pg_info_t oinfo;
+ pg_missing_t omissing;
+ pg_shard_t from;
+
+ eversion_t last_update(1, 2);
+ hobject_t divergent_object;
+ eversion_t new_version(2, 3);
+ eversion_t divergent_version(1, 3);
+
+ {
+ pg_log_entry_t e;
+ e.mark_unrollbackable();
+
+ e.version = eversion_t(1, 1);
+ e.soid.set_hash(0x9);
+ log.tail = e.version;
+ log.log.push_back(e);
+ e.version = last_update;
+ e.soid.set_hash(0x3);
+ log.log.push_back(e);
+ e.version = new_version;
+ e.prior_version = eversion_t(1, 1);
+ e.soid.set_hash(0x9);
+ e.op = pg_log_entry_t::DELETE;
+ log.log.push_back(e);
+ log.head = e.version;
+ log.index();
+
+ e.op = pg_log_entry_t::MODIFY;
+ e.version = eversion_t(1, 1);
+ e.soid.set_hash(0x9);
+ olog.tail = e.version;
+ olog.log.push_back(e);
+ e.version = last_update;
+ e.soid.set_hash(0x3);
+ olog.log.push_back(e);
+ e.version = divergent_version;
+ e.prior_version = eversion_t(1, 1);
+ e.soid.set_hash(0x9);
+ divergent_object = e.soid;
+ omissing.add(divergent_object, e.version, eversion_t(), false);
+ e.op = pg_log_entry_t::MODIFY;
+ olog.log.push_back(e);
+ olog.head = e.version;
+
+ oinfo.last_update = olog.head;
+ oinfo.last_complete = olog.head;
+ }
+
+ EXPECT_TRUE(omissing.have_missing());
+ EXPECT_TRUE(omissing.is_missing(divergent_object));
+ EXPECT_EQ(divergent_version, omissing.get_items().at(divergent_object).need);
+ EXPECT_EQ(olog.head, oinfo.last_update);
+ EXPECT_EQ(olog.head, oinfo.last_complete);
+
+ missing.may_include_deletes = false;
+ proc_replica_log(oinfo, olog, omissing, from);
+
+ EXPECT_TRUE(omissing.have_missing());
+ EXPECT_TRUE(omissing.get_items().begin()->second.need == eversion_t(1, 1));
+ EXPECT_EQ(last_update, oinfo.last_update);
+ EXPECT_EQ(eversion_t(0, 0), oinfo.last_complete);
+ }
+
+}
+
+TEST_F(PGLogTest, merge_log_1) {
+ TestCase t;
+ t.base.push_back(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80)));
+
+ t.div.push_back(mk_ple_mod(mk_obj(1), mk_evt(10, 101), mk_evt(10, 100)));
+
+ t.final.add(mk_obj(1), mk_evt(10, 100), mk_evt(0, 0), false);
+
+ t.toremove.insert(mk_obj(1));
+
+ t.setup();
+ run_test_case(t);
+}
+
+TEST_F(PGLogTest, merge_log_2) {
+ TestCase t;
+ t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80)));
+
+ t.div.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 101), mk_evt(10, 100)));
+ t.div.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 102), mk_evt(10, 101)));
+
+ t.torollback.insert(
+ t.torollback.begin(), t.div.rbegin(), t.div.rend());
+
+ t.setup();
+ run_test_case(t);
+}
+
+TEST_F(PGLogTest, merge_log_3) {
+ TestCase t;
+ t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80)));
+
+ t.div.push_back(mk_ple_mod(mk_obj(1), mk_evt(10, 101), mk_evt(10, 100)));
+ t.div.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 102), mk_evt(10, 101)));
+
+ t.final.add(mk_obj(1), mk_evt(10, 100), mk_evt(0, 0), false);
+
+ t.toremove.insert(mk_obj(1));
+
+ t.setup();
+ run_test_case(t);
+}
+
+TEST_F(PGLogTest, merge_log_4) {
+ TestCase t;
+ t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80)));
+
+ t.div.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 101), mk_evt(10, 100)));
+ t.div.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 102), mk_evt(10, 101)));
+
+ t.init.add(mk_obj(1), mk_evt(10, 102), mk_evt(0, 0), false);
+ t.final.add(mk_obj(1), mk_evt(10, 100), mk_evt(0, 0), false);
+
+ t.setup();
+ run_test_case(t);
+}
+
+TEST_F(PGLogTest, merge_log_5) {
+ TestCase t;
+ t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80)));
+
+ t.div.push_back(mk_ple_mod(mk_obj(1), mk_evt(10, 101), mk_evt(10, 100)));
+ t.div.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 102), mk_evt(10, 101)));
+
+ t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(11, 101), mk_evt(10, 100)));
+
+ t.final.add(mk_obj(1), mk_evt(11, 101), mk_evt(0, 0), false);
+
+ t.toremove.insert(mk_obj(1));
+
+ t.setup();
+ run_test_case(t);
+}
+
+TEST_F(PGLogTest, merge_log_6) {
+ TestCase t;
+ t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80)));
+
+ t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(11, 101), mk_evt(10, 100)));
+
+ t.final.add(mk_obj(1), mk_evt(11, 101), mk_evt(10, 100), false);
+
+ t.setup();
+ run_test_case(t);
+}
+
+TEST_F(PGLogTest, merge_log_7) {
+ TestCase t;
+ t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80)));
+
+ t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(11, 101), mk_evt(10, 100)));
+
+ t.init.add(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80), false);
+ t.final.add(mk_obj(1), mk_evt(11, 101), mk_evt(8, 80), false);
+
+ t.setup();
+ run_test_case(t);
+}
+
+TEST_F(PGLogTest, merge_log_8) {
+ TestCase t;
+ t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80)));
+
+ t.auth.push_back(mk_ple_dt(mk_obj(1), mk_evt(11, 101), mk_evt(10, 100)));
+
+ t.init.add(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80), false);
+ t.final.add(mk_obj(1), mk_evt(11, 101), mk_evt(8, 80), true);
+
+ t.setup();
+ run_test_case(t);
+}
+
+TEST_F(PGLogTest, merge_log_9) {
+ TestCase t;
+ t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80)));
+
+ t.auth.push_back(mk_ple_dt(mk_obj(1), mk_evt(11, 101), mk_evt(10, 100)));
+
+ t.init.add(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80), false);
+ t.toremove.insert(mk_obj(1));
+ t.deletes_during_peering = true;
+
+ t.setup();
+ run_test_case(t);
+}
+
+TEST_F(PGLogTest, merge_log_10) {
+ TestCase t;
+ t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80)));
+
+ t.auth.push_back(mk_ple_ldt(mk_obj(1), mk_evt(11, 101), mk_evt(10, 100)));
+
+ t.init.add(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80), false);
+ t.final.add(mk_obj(1), mk_evt(11, 101), mk_evt(8, 80), true);
+
+ t.setup();
+ run_test_case(t);
+}
+
+TEST_F(PGLogTest, merge_log_prior_version_have) {
+ TestCase t;
+ t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80)));
+
+ t.div.push_back(mk_ple_mod(mk_obj(1), mk_evt(10, 101), mk_evt(10, 100)));
+
+ t.init.add(mk_obj(1), mk_evt(10, 101), mk_evt(10, 100), false);
+
+ t.setup();
+ run_test_case(t);
+}
+
+TEST_F(PGLogTest, merge_log_split_missing_entries_at_head) {
+ TestCase t;
+ t.auth.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70)));
+ t.auth.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(15, 150), mk_evt(10, 100)));
+
+ t.div.push_back(mk_ple_mod(mk_obj(1), mk_evt(8, 70), mk_evt(8, 65)));
+
+ t.setup();
+ t.set_div_bounds(mk_evt(9, 79), mk_evt(8, 69));
+ t.set_auth_bounds(mk_evt(15, 160), mk_evt(9, 77));
+ t.final.add(mk_obj(1), mk_evt(15, 150), mk_evt(8, 70), false);
+ run_test_case(t);
+}
+
+TEST_F(PGLogTest, olog_tail_gt_log_tail_split) {
+ TestCase t;
+ t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70)));
+ t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(15, 150), mk_evt(10, 100)));
+ t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(15, 155), mk_evt(15, 150)));
+
+ t.setup();
+ t.set_div_bounds(mk_evt(15, 153), mk_evt(15, 151));
+ t.set_auth_bounds(mk_evt(15, 156), mk_evt(10, 99));
+ t.final.add(mk_obj(1), mk_evt(15, 155), mk_evt(15, 150), false);
+ run_test_case(t);
+}
+
+TEST_F(PGLogTest, olog_tail_gt_log_tail_split2) {
+ TestCase t;
+ t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70)));
+ t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(15, 150), mk_evt(10, 100)));
+ t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(16, 155), mk_evt(15, 150)));
+ t.div.push_back(mk_ple_mod(mk_obj(1), mk_evt(15, 153), mk_evt(15, 150)));
+
+ t.setup();
+ t.set_div_bounds(mk_evt(15, 153), mk_evt(15, 151));
+ t.set_auth_bounds(mk_evt(16, 156), mk_evt(10, 99));
+ t.final.add(mk_obj(1), mk_evt(16, 155), mk_evt(0, 0), false);
+ t.toremove.insert(mk_obj(1));
+ run_test_case(t);
+}
+
+TEST_F(PGLogTest, filter_log_1) {
+ {
+ clear();
+
+ int osd_id = 1;
+ epoch_t epoch = 40;
+ int64_t pool_id = 1;
+ int bits = 2;
+ int max_osd = 4;
+ int pg_num = max_osd << bits;
+ int num_objects = 1000;
+ int num_internal = 10;
+
+ // Set up splitting map
+ OSDMap *osdmap = new OSDMap;
+ uuid_d test_uuid;
+ test_uuid.generate_random();
+ osdmap->build_simple_with_pool(g_ceph_context, epoch, test_uuid, max_osd, bits, bits);
+ osdmap->set_state(osd_id, CEPH_OSD_EXISTS);
+
+ const string hit_set_namespace("internal");
+
+ {
+ pg_log_entry_t e;
+ e.mark_unrollbackable();
+ e.op = pg_log_entry_t::MODIFY;
+ e.soid.pool = pool_id;
+
+ uuid_d uuid_name;
+ int i;
+ for (i = 1; i <= num_objects; ++i) {
+ e.version = eversion_t(epoch, i);
+ // Use this to generate random file names
+ uuid_name.generate_random();
+ ostringstream name;
+ name << uuid_name;
+ e.soid.oid.name = name.str();
+ // First has no namespace
+ if (i != 1) {
+ // num_internal have the internal namspace
+ if (i <= num_internal + 1) {
+ e.soid.nspace = hit_set_namespace;
+ } else { // rest have different namespaces
+ ostringstream ns;
+ ns << "ns" << i;
+ e.soid.nspace = ns.str();
+ }
+ }
+ log.log.push_back(e);
+ if (i == 1)
+ log.tail = e.version;
+ }
+ log.head = e.version;
+ log.index();
+ }
+
+ spg_t pgid(pg_t(2, pool_id), shard_id_t::NO_SHARD);
+
+ // See if we created the right number of entries
+ int total = log.log.size();
+ ASSERT_EQ(total, num_objects);
+
+ // Some should be removed
+ {
+ pg_log_t filtered, reject;
+ pg_log_t::filter_log(
+ pgid, *osdmap, hit_set_namespace, log, filtered, reject);
+ log = IndexedLog(filtered);
+ }
+ EXPECT_LE(log.log.size(), (size_t)total);
+
+ // If we filter a second time, there should be the same total
+ total = log.log.size();
+ {
+ pg_log_t filtered, reject;
+ pg_log_t::filter_log(
+ pgid, *osdmap, hit_set_namespace, log, filtered, reject);
+ log = IndexedLog(filtered);
+ }
+ EXPECT_EQ(log.log.size(), (size_t)total);
+
+ // Increase pg_num as if there would be a split
+ int new_pg_num = pg_num * 16;
+ OSDMap::Incremental inc(epoch + 1);
+ inc.fsid = test_uuid;
+ const pg_pool_t *pool = osdmap->get_pg_pool(pool_id);
+ pg_pool_t newpool;
+ newpool = *pool;
+ newpool.set_pg_num(new_pg_num);
+ newpool.set_pgp_num(new_pg_num);
+ inc.new_pools[pool_id] = newpool;
+ int ret = osdmap->apply_incremental(inc);
+ ASSERT_EQ(ret, 0);
+
+ // We should have fewer entries after a filter
+ {
+ pg_log_t filtered, reject;
+ pg_log_t::filter_log(
+ pgid, *osdmap, hit_set_namespace, log, filtered, reject);
+ log = IndexedLog(filtered);
+ }
+ EXPECT_LE(log.log.size(), (size_t)total);
+
+ // Make sure all internal entries are retained
+ int count = 0;
+ for (list<pg_log_entry_t>::iterator i = log.log.begin();
+ i != log.log.end(); ++i) {
+ if (i->soid.nspace == hit_set_namespace) count++;
+ }
+ EXPECT_EQ(count, num_internal);
+ }
+}
+
+TEST_F(PGLogTest, get_request) {
+ clear();
+
+ // make sure writes, deletes, and errors are found
+ vector<pg_log_entry_t> entries;
+ hobject_t oid(object_t("objname"), "key", 123, 456, 0, "");
+ entries.push_back(
+ pg_log_entry_t(pg_log_entry_t::ERROR, oid, eversion_t(6,2), eversion_t(3,4),
+ 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 1),
+ utime_t(0,1), -ENOENT));
+ entries.push_back(
+ pg_log_entry_t(pg_log_entry_t::MODIFY, oid, eversion_t(6,3), eversion_t(3,4),
+ 2, osd_reqid_t(entity_name_t::CLIENT(777), 8, 2),
+ utime_t(1,2), 0));
+ entries.push_back(
+ pg_log_entry_t(pg_log_entry_t::DELETE, oid, eversion_t(7,4), eversion_t(7,4),
+ 3, osd_reqid_t(entity_name_t::CLIENT(777), 8, 3),
+ utime_t(10,2), 0));
+ entries.push_back(
+ pg_log_entry_t(pg_log_entry_t::ERROR, oid, eversion_t(7,5), eversion_t(7,4),
+ 3, osd_reqid_t(entity_name_t::CLIENT(777), 8, 4),
+ utime_t(20,1), -ENOENT));
+
+ for (auto &entry : entries) {
+ log.add(entry);
+ }
+
+ for (auto &entry : entries) {
+ eversion_t replay_version;
+ version_t user_version;
+ int return_code = 0;
+ bool got = log.get_request(
+ entry.reqid, &replay_version, &user_version, &return_code);
+ EXPECT_TRUE(got);
+ EXPECT_EQ(entry.return_code, return_code);
+ EXPECT_EQ(entry.version, replay_version);
+ EXPECT_EQ(entry.user_version, user_version);
+ }
+}
+
+TEST_F(PGLogTest, ErrorNotIndexedByObject) {
+ clear();
+
+ // make sure writes, deletes, and errors are found
+ hobject_t oid(object_t("objname"), "key", 123, 456, 0, "");
+ log.add(
+ pg_log_entry_t(pg_log_entry_t::ERROR, oid, eversion_t(6,2), eversion_t(3,4),
+ 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 1),
+ utime_t(0,1), -ENOENT));
+
+ EXPECT_FALSE(log.logged_object(oid));
+
+ pg_log_entry_t modify(pg_log_entry_t::MODIFY, oid, eversion_t(6,3),
+ eversion_t(3,4), 2,
+ osd_reqid_t(entity_name_t::CLIENT(777), 8, 2),
+ utime_t(1,2), 0);
+ log.add(modify);
+
+ EXPECT_TRUE(log.logged_object(oid));
+ pg_log_entry_t *entry = log.objects[oid];
+ EXPECT_EQ(modify.op, entry->op);
+ EXPECT_EQ(modify.version, entry->version);
+ EXPECT_EQ(modify.prior_version, entry->prior_version);
+ EXPECT_EQ(modify.user_version, entry->user_version);
+ EXPECT_EQ(modify.reqid, entry->reqid);
+
+ pg_log_entry_t del(pg_log_entry_t::DELETE, oid, eversion_t(7,4),
+ eversion_t(7,4), 3,
+ osd_reqid_t(entity_name_t::CLIENT(777), 8, 3),
+ utime_t(10,2), 0);
+ log.add(del);
+
+ EXPECT_TRUE(log.logged_object(oid));
+ entry = log.objects[oid];
+ EXPECT_EQ(del.op, entry->op);
+ EXPECT_EQ(del.version, entry->version);
+ EXPECT_EQ(del.prior_version, entry->prior_version);
+ EXPECT_EQ(del.user_version, entry->user_version);
+ EXPECT_EQ(del.reqid, entry->reqid);
+
+ log.add(
+ pg_log_entry_t(pg_log_entry_t::ERROR, oid, eversion_t(7,5), eversion_t(7,4),
+ 3, osd_reqid_t(entity_name_t::CLIENT(777), 8, 4),
+ utime_t(20,1), -ENOENT));
+
+ EXPECT_TRUE(log.logged_object(oid));
+ entry = log.objects[oid];
+ EXPECT_EQ(del.op, entry->op);
+ EXPECT_EQ(del.version, entry->version);
+ EXPECT_EQ(del.prior_version, entry->prior_version);
+ EXPECT_EQ(del.user_version, entry->user_version);
+ EXPECT_EQ(del.reqid, entry->reqid);
+}
+
+TEST_F(PGLogTest, split_into_preserves_may_include_deletes) {
+ clear();
+
+ {
+ rebuilt_missing_with_deletes = false;
+ missing.may_include_deletes = true;
+ PGLog child_log(cct);
+ pg_t child_pg;
+ split_into(child_pg, 6, &child_log);
+ ASSERT_TRUE(child_log.get_missing().may_include_deletes);
+ ASSERT_TRUE(child_log.get_rebuilt_missing_with_deletes());
+ }
+
+ {
+ rebuilt_missing_with_deletes = false;
+ missing.may_include_deletes = false;
+ PGLog child_log(cct);
+ pg_t child_pg;
+ split_into(child_pg, 6, &child_log);
+ ASSERT_FALSE(child_log.get_missing().may_include_deletes);
+ ASSERT_FALSE(child_log.get_rebuilt_missing_with_deletes());
+ }
+}
+
+class PGLogTestRebuildMissing : public PGLogTest, public StoreTestFixture {
+public:
+ PGLogTestRebuildMissing() : PGLogTest(), StoreTestFixture("memstore") {}
+ void SetUp() override {
+ StoreTestFixture::SetUp();
+ ObjectStore::Transaction t;
+ test_coll = coll_t(spg_t(pg_t(1, 1)));
+ ch = store->create_new_collection(test_coll);
+ t.create_collection(test_coll, 0);
+ store->queue_transaction(ch, std::move(t));
+ existing_oid = mk_obj(0);
+ nonexistent_oid = mk_obj(1);
+ ghobject_t existing_ghobj(existing_oid);
+ object_info_t existing_info;
+ existing_info.version = eversion_t(6, 2);
+ bufferlist enc_oi;
+ encode(existing_info, enc_oi, 0);
+ ObjectStore::Transaction t2;
+ t2.touch(test_coll, ghobject_t(existing_oid));
+ t2.setattr(test_coll, ghobject_t(existing_oid), OI_ATTR, enc_oi);
+ ASSERT_EQ(0, store->queue_transaction(ch, std::move(t2)));
+ info.last_backfill = hobject_t::get_max();
+ info.last_complete = eversion_t();
+ }
+
+ void TearDown() override {
+ clear();
+ missing.may_include_deletes = false;
+ StoreTestFixture::TearDown();
+ }
+
+ pg_info_t info;
+ coll_t test_coll;
+ hobject_t existing_oid, nonexistent_oid;
+
+ void run_rebuild_missing_test(const map<hobject_t, pg_missing_item> &expected_missing_items) {
+ rebuild_missing_set_with_deletes(store.get(), ch, info);
+ ASSERT_EQ(expected_missing_items, missing.get_items());
+ }
+};
+
+TEST_F(PGLogTestRebuildMissing, EmptyLog) {
+ missing.add(existing_oid, mk_evt(6, 2), mk_evt(6, 3), false);
+ missing.add(nonexistent_oid, mk_evt(7, 4), mk_evt(0, 0), false);
+ map<hobject_t, pg_missing_item> orig_missing = missing.get_items();
+ run_rebuild_missing_test(orig_missing);
+}
+
+TEST_F(PGLogTestRebuildMissing, SameVersionMod) {
+ missing.add(existing_oid, mk_evt(6, 2), mk_evt(6, 1), false);
+ log.add(mk_ple_mod(existing_oid, mk_evt(6, 2), mk_evt(6, 1)));
+ map<hobject_t, pg_missing_item> empty_missing;
+ run_rebuild_missing_test(empty_missing);
+}
+
+TEST_F(PGLogTestRebuildMissing, DelExisting) {
+ missing.add(existing_oid, mk_evt(6, 3), mk_evt(6, 2), false);
+ log.add(mk_ple_dt(existing_oid, mk_evt(7, 5), mk_evt(7, 4)));
+ map<hobject_t, pg_missing_item> expected;
+ expected[existing_oid] = pg_missing_item(mk_evt(7, 5), mk_evt(6, 2), true);
+ run_rebuild_missing_test(expected);
+}
+
+TEST_F(PGLogTestRebuildMissing, DelNonexistent) {
+ log.add(mk_ple_dt(nonexistent_oid, mk_evt(7, 5), mk_evt(7, 4)));
+ map<hobject_t, pg_missing_item> expected;
+ expected[nonexistent_oid] = pg_missing_item(mk_evt(7, 5), mk_evt(0, 0), true);
+ run_rebuild_missing_test(expected);
+}
+
+TEST_F(PGLogTestRebuildMissing, MissingNotInLog) {
+ missing.add(mk_obj(10), mk_evt(8, 12), mk_evt(8, 10), false);
+ log.add(mk_ple_dt(nonexistent_oid, mk_evt(7, 5), mk_evt(7, 4)));
+ map<hobject_t, pg_missing_item> expected;
+ expected[nonexistent_oid] = pg_missing_item(mk_evt(7, 5), mk_evt(0, 0), true);
+ expected[mk_obj(10)] = pg_missing_item(mk_evt(8, 12), mk_evt(8, 10), false);
+ run_rebuild_missing_test(expected);
+}
+
+
+class PGLogMergeDupsTest : protected PGLog, public StoreTestFixture {
+
+public:
+
+ PGLogMergeDupsTest() : PGLog(g_ceph_context), StoreTestFixture("memstore") { }
+
+ void SetUp() override {
+ StoreTestFixture::SetUp();
+ ObjectStore::Transaction t;
+ test_coll = coll_t(spg_t(pg_t(1, 1)));
+ auto ch = store->create_new_collection(test_coll);
+ t.create_collection(test_coll, 0);
+ store->queue_transaction(ch, std::move(t));
+ }
+
+ void TearDown() override {
+ test_disk_roundtrip();
+ clear();
+ StoreTestFixture::TearDown();
+ }
+
+ static pg_log_dup_t create_dup_entry(uint a, uint b) {
+ // make each dup_entry unique by using different client id's
+ static uint client_id = 777;
+ return pg_log_dup_t(eversion_t(a, b),
+ a,
+ osd_reqid_t(entity_name_t::CLIENT(client_id++), 8, 1),
+ 0);
+ }
+
+ static std::vector<pg_log_dup_t> example_dups_1() {
+ std::vector<pg_log_dup_t> result = {
+ create_dup_entry(10, 11),
+ create_dup_entry(10, 12),
+ create_dup_entry(11, 1),
+ create_dup_entry(12, 3),
+ create_dup_entry(13, 99)
+ };
+ return result;
+ }
+
+ static std::vector<pg_log_dup_t> example_dups_2() {
+ std::vector<pg_log_dup_t> result = {
+ create_dup_entry(12, 3),
+ create_dup_entry(13, 99),
+ create_dup_entry(15, 11),
+ create_dup_entry(16, 14),
+ create_dup_entry(16, 32)
+ };
+ return result;
+ }
+
+ void add_dups(uint a, uint b) {
+ log.dups.push_back(create_dup_entry(a, b));
+ write_from_dups = std::min(write_from_dups, log.dups.back().version);
+ }
+
+ void add_dups(const std::vector<pg_log_dup_t>& l) {
+ for (auto& i : l) {
+ log.dups.push_back(i);
+ write_from_dups = std::min(write_from_dups, log.dups.back().version);
+ }
+ }
+
+ static void add_dups(IndexedLog& log, const std::vector<pg_log_dup_t>& dups) {
+ for (auto& i : dups) {
+ log.dups.push_back(i);
+ }
+ }
+
+ void check_order() {
+ eversion_t prev(0, 0);
+
+ for (auto& i : log.dups) {
+ EXPECT_LT(prev, i.version) << "verify versions monotonically increase";
+ prev = i.version;
+ }
+ }
+
+ void check_index() {
+ EXPECT_EQ(log.dups.size(), log.dup_index.size());
+ for (auto& i : log.dups) {
+ EXPECT_EQ(1u, log.dup_index.count(i.reqid));
+ }
+ }
+
+ void test_disk_roundtrip() {
+ ObjectStore::Transaction t;
+ hobject_t hoid;
+ hoid.pool = 1;
+ hoid.oid = "log";
+ ghobject_t log_oid(hoid);
+ map<string, bufferlist> km;
+ write_log_and_missing(t, &km, test_coll, log_oid, false);
+ if (!km.empty()) {
+ t.omap_setkeys(test_coll, log_oid, km);
+ }
+ auto ch = store->open_collection(test_coll);
+ ASSERT_EQ(0, store->queue_transaction(ch, std::move(t)));
+
+ auto orig_dups = log.dups;
+ clear();
+ ostringstream err;
+ read_log_and_missing(store.get(), ch, log_oid,
+ pg_info_t(), err, false);
+ ASSERT_EQ(orig_dups.size(), log.dups.size());
+ ASSERT_EQ(orig_dups, log.dups);
+ auto dups_it = log.dups.begin();
+ for (auto orig_dup : orig_dups) {
+ ASSERT_EQ(orig_dup, *dups_it);
+ ++dups_it;
+ }
+ }
+
+ coll_t test_coll;
+};
+
+TEST_F(PGLogMergeDupsTest, OtherEmpty) {
+ log.tail = eversion_t(14, 5);
+
+ IndexedLog olog;
+
+ add_dups(example_dups_1());
+ index();
+
+ bool changed = merge_log_dups(olog);
+
+ EXPECT_FALSE(changed);
+ EXPECT_EQ(5u, log.dups.size());
+
+ if (5 == log.dups.size()) {
+ EXPECT_EQ(10u, log.dups.front().version.epoch);
+ EXPECT_EQ(11u, log.dups.front().version.version);
+ EXPECT_EQ(13u, log.dups.back().version.epoch);
+ EXPECT_EQ(99u, log.dups.back().version.version);
+ }
+
+ check_order();
+ check_index();
+}
+
+TEST_F(PGLogMergeDupsTest, AmEmpty) {
+ log.tail = eversion_t(14, 5);
+ index();
+
+ IndexedLog olog;
+
+ add_dups(olog, example_dups_1());
+
+ bool changed = merge_log_dups(olog);
+
+ EXPECT_TRUE(changed);
+ EXPECT_EQ(5u, log.dups.size());
+
+ if (5 == log.dups.size()) {
+ EXPECT_EQ(10u, log.dups.front().version.epoch);
+ EXPECT_EQ(11u, log.dups.front().version.version);
+
+ EXPECT_EQ(13u, log.dups.back().version.epoch);
+ EXPECT_EQ(99u, log.dups.back().version.version);
+ }
+
+ check_order();
+ check_index();
+}
+
+TEST_F(PGLogMergeDupsTest, AmEmptyOverlap) {
+ log.tail = eversion_t(12, 3);
+ index();
+
+ IndexedLog olog;
+
+ add_dups(olog, example_dups_1());
+
+ bool changed = merge_log_dups(olog);
+
+ EXPECT_TRUE(changed);
+ EXPECT_EQ(4u, log.dups.size());
+
+ if (4 == log.dups.size()) {
+ EXPECT_EQ(10u, log.dups.front().version.epoch);
+ EXPECT_EQ(11u, log.dups.front().version.version);
+
+ EXPECT_EQ(12u, log.dups.back().version.epoch);
+ EXPECT_EQ(3u, log.dups.back().version.version);
+ }
+
+ check_order();
+ check_index();
+}
+
+TEST_F(PGLogMergeDupsTest, Same) {
+ log.tail = eversion_t(14, 1);
+
+ IndexedLog olog;
+
+ add_dups(example_dups_1());
+ index();
+ add_dups(olog, example_dups_1());
+
+ bool changed = merge_log_dups(olog);
+
+ EXPECT_FALSE(changed);
+ EXPECT_EQ(5u, log.dups.size());
+
+ if (5 == log.dups.size()) {
+ EXPECT_EQ(10u, log.dups.front().version.epoch);
+ EXPECT_EQ(11u, log.dups.front().version.version);
+
+ EXPECT_EQ(13u, log.dups.back().version.epoch);
+ EXPECT_EQ(99u, log.dups.back().version.version);
+ }
+
+ check_order();
+ check_index();
+}
+
+
+TEST_F(PGLogMergeDupsTest, Later) {
+ log.tail = eversion_t(16, 14);
+
+ IndexedLog olog;
+
+ add_dups(example_dups_1());
+ index();
+ add_dups(olog, example_dups_2());
+
+ bool changed = merge_log_dups(olog);
+
+ EXPECT_TRUE(changed);
+ EXPECT_EQ(7u, log.dups.size());
+
+ if (7 == log.dups.size()) {
+ EXPECT_EQ(10u, log.dups.front().version.epoch);
+ EXPECT_EQ(11u, log.dups.front().version.version);
+
+ EXPECT_EQ(16u, log.dups.back().version.epoch);
+ EXPECT_EQ(14u, log.dups.back().version.version);
+ }
+
+ check_order();
+ check_index();
+}
+
+
+TEST_F(PGLogMergeDupsTest, Earlier) {
+ log.tail = eversion_t(17, 2);
+
+ IndexedLog olog;
+
+ add_dups(example_dups_2());
+ index();
+ add_dups(olog, example_dups_1());
+
+ bool changed = merge_log_dups(olog);
+
+ EXPECT_TRUE(changed);
+ EXPECT_EQ(8u, log.dups.size());
+
+ if (6 == log.dups.size()) {
+ EXPECT_EQ(10u, log.dups.front().version.epoch);
+ EXPECT_EQ(11u, log.dups.front().version.version);
+
+ EXPECT_EQ(16u, log.dups.back().version.epoch);
+ EXPECT_EQ(32u, log.dups.back().version.version);
+ }
+
+ check_order();
+ check_index();
+}
+
+
+TEST_F(PGLogMergeDupsTest, Superset) {
+ log.tail = eversion_t(17, 2);
+
+ IndexedLog olog;
+
+ add_dups(example_dups_1());
+ index();
+
+ olog.dups.push_back(create_dup_entry(9, 5));
+ olog.dups.push_back(create_dup_entry(15, 11));
+
+ bool changed = merge_log_dups(olog);
+
+ EXPECT_TRUE(changed);
+ EXPECT_EQ(7u, log.dups.size());
+
+ if (7 == log.dups.size()) {
+ EXPECT_EQ(9u, log.dups.front().version.epoch);
+ EXPECT_EQ(5u, log.dups.front().version.version);
+
+ EXPECT_EQ(15u, log.dups.back().version.epoch);
+ EXPECT_EQ(11u, log.dups.back().version.version);
+ }
+
+ check_order();
+ check_index();
+}
+
+
+struct PGLogTrimTest :
+ public ::testing::Test,
+ public PGLogTestBase,
+ public PGLog::IndexedLog
+{
+ CephContext *cct = g_ceph_context;
+
+ using ::testing::Test::SetUp;
+ void SetUp(unsigned dup_track) {
+ constexpr size_t size = 10;
+
+ char dup_track_s[size];
+
+ snprintf(dup_track_s, size, "%u", dup_track);
+
+ cct->_conf.set_val_or_die("osd_pg_log_dups_tracked", dup_track_s);
+ }
+}; // struct PGLogTrimTest
+
+
+TEST_F(PGLogTrimTest, TestMakingCephContext)
+{
+ SetUp(5);
+
+ EXPECT_EQ(5u, cct->_conf->osd_pg_log_dups_tracked);
+}
+
+
+TEST_F(PGLogTrimTest, TestPartialTrim)
+{
+ SetUp(20);
+ PGLog::IndexedLog log;
+ log.head = mk_evt(24, 0);
+ log.skip_can_rollback_to_to_head();
+ log.head = mk_evt(9, 0);
+
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70)));
+ log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100)));
+ log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150)));
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(19, 160), mk_evt(25, 152)));
+ log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166)));
+
+ std::set<eversion_t> trimmed;
+ std::set<std::string> trimmed_dups;
+ eversion_t write_from_dups = eversion_t::max();
+
+ log.trim(cct, mk_evt(19, 157), &trimmed, &trimmed_dups, &write_from_dups);
+
+ EXPECT_EQ(eversion_t(15, 150), write_from_dups);
+ EXPECT_EQ(3u, log.log.size());
+ EXPECT_EQ(3u, trimmed.size());
+ EXPECT_EQ(2u, log.dups.size());
+ EXPECT_EQ(0u, trimmed_dups.size());
+
+ SetUp(15);
+
+ std::set<eversion_t> trimmed2;
+ std::set<std::string> trimmed_dups2;
+ eversion_t write_from_dups2 = eversion_t::max();
+
+ log.trim(cct, mk_evt(20, 164), &trimmed2, &trimmed_dups2, &write_from_dups2);
+
+ EXPECT_EQ(eversion_t(19, 160), write_from_dups2);
+ EXPECT_EQ(2u, log.log.size());
+ EXPECT_EQ(1u, trimmed2.size());
+ EXPECT_EQ(2u, log.dups.size());
+ EXPECT_EQ(1u, trimmed_dups2.size());
+}
+
+
+TEST_F(PGLogTrimTest, TestTrimNoTrimmed) {
+ SetUp(20);
+ PGLog::IndexedLog log;
+ log.head = mk_evt(20, 0);
+ log.skip_can_rollback_to_to_head();
+ log.head = mk_evt(9, 0);
+
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70)));
+ log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100)));
+ log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150)));
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 160), mk_evt(25, 152)));
+ log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166)));
+
+ eversion_t write_from_dups = eversion_t::max();
+
+ log.trim(cct, mk_evt(19, 157), nullptr, nullptr, &write_from_dups);
+
+ EXPECT_EQ(eversion_t(15, 150), write_from_dups);
+ EXPECT_EQ(3u, log.log.size());
+ EXPECT_EQ(2u, log.dups.size());
+}
+
+
+TEST_F(PGLogTrimTest, TestTrimNoDups)
+{
+ SetUp(10);
+ PGLog::IndexedLog log;
+ log.head = mk_evt(20, 0);
+ log.skip_can_rollback_to_to_head();
+ log.head = mk_evt(9, 0);
+
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70)));
+ log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100)));
+ log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150)));
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 160), mk_evt(25, 152)));
+ log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166)));
+
+ std::set<eversion_t> trimmed;
+ std::set<std::string> trimmed_dups;
+ eversion_t write_from_dups = eversion_t::max();
+
+ log.trim(cct, mk_evt(19, 157), &trimmed, &trimmed_dups, &write_from_dups);
+
+ EXPECT_EQ(eversion_t::max(), write_from_dups);
+ EXPECT_EQ(3u, log.log.size());
+ EXPECT_EQ(3u, trimmed.size());
+ EXPECT_EQ(0u, log.dups.size());
+ EXPECT_EQ(0u, trimmed_dups.size());
+}
+
+TEST_F(PGLogTrimTest, TestNoTrim)
+{
+ SetUp(20);
+ PGLog::IndexedLog log;
+ log.head = mk_evt(24, 0);
+ log.skip_can_rollback_to_to_head();
+ log.head = mk_evt(9, 0);
+
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70)));
+ log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100)));
+ log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150)));
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(19, 160), mk_evt(25, 152)));
+ log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166)));
+
+ std::set<eversion_t> trimmed;
+ std::set<std::string> trimmed_dups;
+ eversion_t write_from_dups = eversion_t::max();
+
+ log.trim(cct, mk_evt(9, 99), &trimmed, &trimmed_dups, &write_from_dups);
+
+ EXPECT_EQ(eversion_t::max(), write_from_dups);
+ EXPECT_EQ(6u, log.log.size());
+ EXPECT_EQ(0u, trimmed.size());
+ EXPECT_EQ(0u, log.dups.size());
+ EXPECT_EQ(0u, trimmed_dups.size());
+}
+
+TEST_F(PGLogTrimTest, TestTrimAll)
+{
+ SetUp(20);
+ PGLog::IndexedLog log;
+ EXPECT_EQ(0u, log.dup_index.size()); // Sanity check
+ log.head = mk_evt(24, 0);
+ log.skip_can_rollback_to_to_head();
+ log.head = mk_evt(9, 0);
+
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70)));
+ log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100)));
+ log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150)));
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(19, 160), mk_evt(25, 152)));
+ log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166)));
+
+ std::set<eversion_t> trimmed;
+ std::set<std::string> trimmed_dups;
+ eversion_t write_from_dups = eversion_t::max();
+
+ log.trim(cct, mk_evt(22, 180), &trimmed, &trimmed_dups, &write_from_dups);
+
+ EXPECT_EQ(eversion_t(15, 150), write_from_dups);
+ EXPECT_EQ(0u, log.log.size());
+ EXPECT_EQ(6u, trimmed.size());
+ EXPECT_EQ(5u, log.dups.size());
+ EXPECT_EQ(0u, trimmed_dups.size());
+ EXPECT_EQ(0u, log.dup_index.size()); // dup_index entry should be trimmed
+}
+
+
+TEST_F(PGLogTrimTest, TestGetRequest) {
+ SetUp(20);
+ PGLog::IndexedLog log;
+ log.head = mk_evt(20, 0);
+ log.skip_can_rollback_to_to_head();
+ log.head = mk_evt(9, 0);
+
+ entity_name_t client = entity_name_t::CLIENT(777);
+
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70),
+ osd_reqid_t(client, 8, 1)));
+ log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100),
+ osd_reqid_t(client, 8, 2)));
+ log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150),
+ osd_reqid_t(client, 8, 3)));
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 160), mk_evt(25, 152),
+ osd_reqid_t(client, 8, 4)));
+ log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160),
+ osd_reqid_t(client, 8, 5)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166),
+ osd_reqid_t(client, 8, 6)));
+
+ eversion_t write_from_dups = eversion_t::max();
+
+ log.trim(cct, mk_evt(19, 157), nullptr, nullptr, &write_from_dups);
+
+ EXPECT_EQ(eversion_t(15, 150), write_from_dups);
+ EXPECT_EQ(3u, log.log.size());
+ EXPECT_EQ(2u, log.dups.size());
+
+ eversion_t version;
+ version_t user_version;
+ int return_code;
+
+ osd_reqid_t log_reqid = osd_reqid_t(client, 8, 5);
+ osd_reqid_t dup_reqid = osd_reqid_t(client, 8, 3);
+ osd_reqid_t bad_reqid = osd_reqid_t(client, 8, 1);
+
+ bool result;
+
+ result = log.get_request(log_reqid, &version, &user_version, &return_code);
+ EXPECT_EQ(true, result);
+ EXPECT_EQ(mk_evt(21, 165), version);
+
+ result = log.get_request(dup_reqid, &version, &user_version, &return_code);
+ EXPECT_EQ(true, result);
+ EXPECT_EQ(mk_evt(15, 155), version);
+
+ result = log.get_request(bad_reqid, &version, &user_version, &return_code);
+ EXPECT_FALSE(result);
+}
+
+TEST_F(PGLogTest, _merge_object_divergent_entries) {
+ {
+ // Test for issue 20843
+ clear();
+ hobject_t hoid(object_t(/*name*/"notify.7"),
+ /*key*/string(""),
+ /*snap*/7,
+ /*hash*/77,
+ /*pool*/5,
+ /*nspace*/string(""));
+ mempool::osd_pglog::list<pg_log_entry_t> orig_entries;
+ orig_entries.push_back(mk_ple_mod(hoid, eversion_t(8336, 957), eversion_t(8336, 952)));
+ orig_entries.push_back(mk_ple_err(hoid, eversion_t(8336, 958)));
+ orig_entries.push_back(mk_ple_err(hoid, eversion_t(8336, 959)));
+ orig_entries.push_back(mk_ple_mod(hoid, eversion_t(8336, 960), eversion_t(8336, 957)));
+ log.add(mk_ple_mod(hoid, eversion_t(8973, 1075), eversion_t(8971, 1070)));
+ missing.add(hoid,
+ /*need*/eversion_t(8971, 1070),
+ /*have*/eversion_t(8336, 952),
+ false);
+ pg_info_t oinfo;
+ LogHandler rollbacker;
+ _merge_object_divergent_entries(log, hoid,
+ orig_entries, oinfo,
+ log.get_can_rollback_to(),
+ missing, &rollbacker,
+ this);
+ // No core dump
+ }
+ {
+ // skip leading error entries
+ clear();
+ hobject_t hoid(object_t(/*name*/"notify.7"),
+ /*key*/string(""),
+ /*snap*/7,
+ /*hash*/77,
+ /*pool*/5,
+ /*nspace*/string(""));
+ mempool::osd_pglog::list<pg_log_entry_t> orig_entries;
+ orig_entries.push_back(mk_ple_err(hoid, eversion_t(8336, 956)));
+ orig_entries.push_back(mk_ple_mod(hoid, eversion_t(8336, 957), eversion_t(8336, 952)));
+ log.add(mk_ple_mod(hoid, eversion_t(8973, 1075), eversion_t(8971, 1070)));
+ missing.add(hoid,
+ /*need*/eversion_t(8971, 1070),
+ /*have*/eversion_t(8336, 952),
+ false);
+ pg_info_t oinfo;
+ LogHandler rollbacker;
+ _merge_object_divergent_entries(log, hoid,
+ orig_entries, oinfo,
+ log.get_can_rollback_to(),
+ missing, &rollbacker,
+ this);
+ // No core dump
+ }
+}
+
+TEST(eversion_t, get_key_name) {
+ eversion_t a(1234, 5678);
+ std::string a_key_name = a.get_key_name();
+ EXPECT_EQ("0000001234.00000000000000005678", a_key_name);
+}
+
+TEST(pg_log_dup_t, get_key_name) {
+ pg_log_dup_t a(eversion_t(1234, 5678),
+ 13,
+ osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
+ 15);
+ std::string a_key_name = a.get_key_name();
+ EXPECT_EQ("dup_0000001234.00000000000000005678", a_key_name);
+}
+
+
+// This tests trim() to make copies of
+// 2 log entries (107, 106) and 3 additional for a total
+// of 5 dups. Nothing from the original dups is copied.
+TEST_F(PGLogTrimTest, TestTrimDups) {
+ SetUp(5);
+ PGLog::IndexedLog log;
+ log.head = mk_evt(21, 107);
+ log.skip_can_rollback_to_to_head();
+ log.tail = mk_evt(9, 99);
+ log.head = mk_evt(9, 99);
+
+ entity_name_t client = entity_name_t::CLIENT(777);
+
+ log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1),
+ mk_evt(9, 99), mk_evt(8, 98), osd_reqid_t(client, 8, 1))));
+
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(9, 99),
+ osd_reqid_t(client, 8, 1)));
+ log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 101), mk_evt(10, 100),
+ osd_reqid_t(client, 8, 2)));
+ log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 102), mk_evt(15, 101),
+ osd_reqid_t(client, 8, 3)));
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 103), mk_evt(15, 102),
+ osd_reqid_t(client, 8, 4)));
+ log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 104), mk_evt(20, 103),
+ osd_reqid_t(client, 8, 5)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 105), mk_evt(21, 104),
+ osd_reqid_t(client, 8, 6)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 106), mk_evt(21, 105),
+ osd_reqid_t(client, 8, 6)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 107), mk_evt(21, 106),
+ osd_reqid_t(client, 8, 6)));
+
+ eversion_t write_from_dups = eversion_t::max();
+
+ log.trim(cct, mk_evt(21, 105), nullptr, nullptr, &write_from_dups);
+
+ EXPECT_EQ(eversion_t(20, 103), write_from_dups) << log;
+ EXPECT_EQ(2u, log.log.size()) << log;
+ EXPECT_EQ(3u, log.dups.size()) << log;
+}
+
+// This tests trim() to make copies of
+// 4 log entries (107, 106, 105, 104) and 5 additional for a total
+// of 9 dups. Only 1 of 2 existing dups are copied.
+TEST_F(PGLogTrimTest, TestTrimDups2) {
+ SetUp(9);
+ PGLog::IndexedLog log;
+ log.head = mk_evt(21, 107);
+ log.skip_can_rollback_to_to_head();
+ log.tail = mk_evt(9, 99);
+ log.head = mk_evt(9, 99);
+
+ entity_name_t client = entity_name_t::CLIENT(777);
+
+ log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1),
+ mk_evt(9, 98), mk_evt(8, 97), osd_reqid_t(client, 8, 1))));
+ log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1),
+ mk_evt(9, 99), mk_evt(8, 98), osd_reqid_t(client, 8, 1))));
+
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(9, 99),
+ osd_reqid_t(client, 8, 1)));
+ log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 101), mk_evt(10, 100),
+ osd_reqid_t(client, 8, 2)));
+ log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 102), mk_evt(15, 101),
+ osd_reqid_t(client, 8, 3)));
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 103), mk_evt(15, 102),
+ osd_reqid_t(client, 8, 4)));
+ log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 104), mk_evt(20, 103),
+ osd_reqid_t(client, 8, 5)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 105), mk_evt(21, 104),
+ osd_reqid_t(client, 8, 6)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 106), mk_evt(21, 105),
+ osd_reqid_t(client, 8, 6)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 107), mk_evt(21, 106),
+ osd_reqid_t(client, 8, 6)));
+
+ eversion_t write_from_dups = eversion_t::max();
+
+ log.trim(cct, mk_evt(20, 103), nullptr, nullptr, &write_from_dups);
+
+ EXPECT_EQ(eversion_t(10, 100), write_from_dups) << log;
+ EXPECT_EQ(4u, log.log.size()) << log;
+ EXPECT_EQ(5u, log.dups.size()) << log;
+}
+
+// This tests copy_up_to() to make copies of
+// 2 log entries (107, 106) and 3 additional for a total
+// of 5 dups. Nothing from the original dups is copied.
+TEST_F(PGLogTrimTest, TestCopyUpTo) {
+ SetUp(5);
+ PGLog::IndexedLog log, copy;
+ log.tail = mk_evt(9, 99);
+ log.head = mk_evt(9, 99);
+
+ entity_name_t client = entity_name_t::CLIENT(777);
+
+ log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1),
+ mk_evt(9, 99), mk_evt(8, 98), osd_reqid_t(client, 8, 1))));
+
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(9, 99),
+ osd_reqid_t(client, 8, 1)));
+ log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 101), mk_evt(10, 100),
+ osd_reqid_t(client, 8, 2)));
+ log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 102), mk_evt(15, 101),
+ osd_reqid_t(client, 8, 3)));
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 103), mk_evt(15, 102),
+ osd_reqid_t(client, 8, 4)));
+ log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 104), mk_evt(20, 103),
+ osd_reqid_t(client, 8, 5)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 105), mk_evt(21, 104),
+ osd_reqid_t(client, 8, 6)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 106), mk_evt(21, 105),
+ osd_reqid_t(client, 8, 6)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 107), mk_evt(21, 106),
+ osd_reqid_t(client, 8, 6)));
+
+ copy.copy_up_to(cct, log, 2);
+
+ EXPECT_EQ(2u, copy.log.size()) << copy;
+ EXPECT_EQ(copy.head, mk_evt(21, 107)) << copy;
+ EXPECT_EQ(copy.tail, mk_evt(21, 105)) << copy;
+ // Tracking 5 means 3 additional as dups
+ EXPECT_EQ(3u, copy.dups.size()) << copy;
+}
+
+// This tests copy_up_to() to make copies of
+// 4 log entries (107, 106, 105, 104) and 5 additional for a total
+// of 5 dups. Only 1 of 2 existing dups are copied.
+TEST_F(PGLogTrimTest, TestCopyUpTo2) {
+ SetUp(9);
+ PGLog::IndexedLog log, copy;
+ log.tail = mk_evt(9, 99);
+ log.head = mk_evt(9, 99);
+
+ entity_name_t client = entity_name_t::CLIENT(777);
+
+ log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1),
+ mk_evt(8, 98), mk_evt(8, 97), osd_reqid_t(client, 8, 1))));
+ log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1),
+ mk_evt(9, 99), mk_evt(8, 98), osd_reqid_t(client, 8, 1))));
+
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(9, 99),
+ osd_reqid_t(client, 8, 1)));
+ log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 101), mk_evt(10, 100),
+ osd_reqid_t(client, 8, 2)));
+ log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 102), mk_evt(15, 101),
+ osd_reqid_t(client, 8, 3)));
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 103), mk_evt(15, 102),
+ osd_reqid_t(client, 8, 4)));
+ log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 104), mk_evt(20, 103),
+ osd_reqid_t(client, 8, 5)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 105), mk_evt(21, 104),
+ osd_reqid_t(client, 8, 6)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 106), mk_evt(21, 105),
+ osd_reqid_t(client, 8, 6)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 107), mk_evt(21, 106),
+ osd_reqid_t(client, 8, 6)));
+
+ copy.copy_up_to(cct, log, 4);
+
+ EXPECT_EQ(4u, copy.log.size()) << copy;
+ EXPECT_EQ(copy.head, mk_evt(21, 107)) << copy;
+ EXPECT_EQ(copy.tail, mk_evt(20, 103)) << copy;
+ // Tracking 5 means 3 additional as dups
+ EXPECT_EQ(5u, copy.dups.size()) << copy;
+}
+
+// This tests copy_after() by specifying a version that copies
+// 2 log entries (107, 106) and 3 additional for a total
+// of 5 dups. Nothing of the original dups is copied.
+TEST_F(PGLogTrimTest, TestCopyAfter) {
+ SetUp(5);
+ PGLog::IndexedLog log, copy;
+ log.tail = mk_evt(9, 99);
+ log.head = mk_evt(9, 99);
+
+ entity_name_t client = entity_name_t::CLIENT(777);
+
+ log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1),
+ mk_evt(9, 99), mk_evt(8, 98), osd_reqid_t(client, 8, 1))));
+
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(9, 99),
+ osd_reqid_t(client, 8, 1)));
+ log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 101), mk_evt(10, 100),
+ osd_reqid_t(client, 8, 2)));
+ log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 102), mk_evt(15, 101),
+ osd_reqid_t(client, 8, 3)));
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 103), mk_evt(15, 102),
+ osd_reqid_t(client, 8, 4)));
+ log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 104), mk_evt(20, 103),
+ osd_reqid_t(client, 8, 5)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 105), mk_evt(21, 104),
+ osd_reqid_t(client, 8, 6)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 106), mk_evt(21, 105),
+ osd_reqid_t(client, 8, 6)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 107), mk_evt(21, 106),
+ osd_reqid_t(client, 8, 6)));
+
+ copy.copy_after(cct, log, mk_evt(21, 105));
+
+ EXPECT_EQ(2u, copy.log.size()) << copy;
+ EXPECT_EQ(copy.head, mk_evt(21, 107)) << copy;
+ EXPECT_EQ(copy.tail, mk_evt(21, 105)) << copy;
+ // Tracking 5 means 3 additional as dups
+ EXPECT_EQ(3u, copy.dups.size()) << copy;
+}
+
+// This copies everything dups and log because of the large max dups
+// and value passed to copy_after().
+TEST_F(PGLogTrimTest, TestCopyAfter2) {
+ SetUp(3000);
+ PGLog::IndexedLog log, copy;
+ log.tail = mk_evt(9, 99);
+ log.head = mk_evt(9, 99);
+
+ entity_name_t client = entity_name_t::CLIENT(777);
+
+ log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1),
+ mk_evt(8, 93), mk_evt(8, 92), osd_reqid_t(client, 8, 1))));
+ log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1),
+ mk_evt(8, 94), mk_evt(8, 93), osd_reqid_t(client, 8, 1))));
+ log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1),
+ mk_evt(8, 95), mk_evt(8, 94), osd_reqid_t(client, 8, 1))));
+ log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1),
+ mk_evt(8, 96), mk_evt(8, 95), osd_reqid_t(client, 8, 1))));
+ log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1),
+ mk_evt(8, 97), mk_evt(8, 96), osd_reqid_t(client, 8, 1))));
+ log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1),
+ mk_evt(8, 98), mk_evt(8, 97), osd_reqid_t(client, 8, 1))));
+ log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1),
+ mk_evt(9, 99), mk_evt(8, 98), osd_reqid_t(client, 8, 1))));
+
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(9, 99),
+ osd_reqid_t(client, 8, 1)));
+ log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 101), mk_evt(10, 100),
+ osd_reqid_t(client, 8, 2)));
+ log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 102), mk_evt(15, 101),
+ osd_reqid_t(client, 8, 3)));
+ log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 103), mk_evt(15, 102),
+ osd_reqid_t(client, 8, 4)));
+ log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 104), mk_evt(20, 103),
+ osd_reqid_t(client, 8, 5)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 105), mk_evt(21, 104),
+ osd_reqid_t(client, 8, 6)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 106), mk_evt(21, 105),
+ osd_reqid_t(client, 8, 6)));
+ log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 107), mk_evt(21, 106),
+ osd_reqid_t(client, 8, 6)));
+
+ copy.copy_after(cct, log, mk_evt(9, 99));
+
+ EXPECT_EQ(8u, copy.log.size()) << copy;
+ EXPECT_EQ(copy.head, mk_evt(21, 107)) << copy;
+ EXPECT_EQ(copy.tail, mk_evt(9, 99)) << copy;
+ // Tracking 3000 is larger than all entries, so all dups copied
+ EXPECT_EQ(7u, copy.dups.size()) << copy;
+}
+
+// Local Variables:
+// compile-command: "cd ../.. ; make unittest_pglog ; ./unittest_pglog --log-to-stderr=true --debug-osd=20 # --gtest_filter=*.* "
+// End:
diff --git a/src/test/osd/TestRados.cc b/src/test/osd/TestRados.cc
new file mode 100644
index 00000000..3c7020e9
--- /dev/null
+++ b/src/test/osd/TestRados.cc
@@ -0,0 +1,660 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#include "common/Mutex.h"
+#include "common/Cond.h"
+#include "common/errno.h"
+#include "common/version.h"
+
+#include <iostream>
+#include <sstream>
+#include <map>
+#include <numeric>
+#include <string>
+#include <vector>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "test/osd/RadosModel.h"
+
+
+using namespace std;
+
+class WeightedTestGenerator : public TestOpGenerator
+{
+public:
+
+ WeightedTestGenerator(int ops,
+ int objects,
+ map<TestOpType, unsigned int> op_weights,
+ TestOpStat *stats,
+ int max_seconds,
+ bool ec_pool,
+ bool balance_reads,
+ bool set_redirect,
+ bool set_chunk) :
+ m_nextop(NULL), m_op(0), m_ops(ops), m_seconds(max_seconds),
+ m_objects(objects), m_stats(stats),
+ m_total_weight(0),
+ m_ec_pool(ec_pool),
+ m_balance_reads(balance_reads),
+ m_set_redirect(set_redirect),
+ m_set_chunk(set_chunk)
+ {
+ m_start = time(0);
+ for (map<TestOpType, unsigned int>::const_iterator it = op_weights.begin();
+ it != op_weights.end();
+ ++it) {
+ m_total_weight += it->second;
+ m_weight_sums.insert(pair<TestOpType, unsigned int>(it->first,
+ m_total_weight));
+ }
+ if (m_set_redirect || m_set_chunk) {
+ if (m_set_redirect) {
+ m_ops = ops+m_objects+m_objects;
+ } else {
+ /* create 10 chunks per an object*/
+ m_ops = ops+m_objects+m_objects*10;
+ }
+ }
+ }
+
+ TestOp *next(RadosTestContext &context) override
+ {
+ TestOp *retval = NULL;
+
+ ++m_op;
+ if (m_op <= m_objects && !m_set_redirect && !m_set_chunk ) {
+ stringstream oid;
+ oid << m_op;
+ if (m_op % 2) {
+ // make it a long name
+ oid << " " << string(300, 'o');
+ }
+ cout << m_op << ": write initial oid " << oid.str() << std::endl;
+ context.oid_not_flushing.insert(oid.str());
+ if (m_ec_pool) {
+ return new WriteOp(m_op, &context, oid.str(), true, true);
+ } else {
+ return new WriteOp(m_op, &context, oid.str(), false, true);
+ }
+ } else if (m_op >= m_ops) {
+ return NULL;
+ }
+
+ if (m_set_redirect || m_set_chunk) {
+ if (init_extensible_tier(context, retval)) {
+ return retval;
+ }
+ }
+
+ if (m_nextop) {
+ retval = m_nextop;
+ m_nextop = NULL;
+ return retval;
+ }
+
+ while (retval == NULL) {
+ unsigned int rand_val = rand() % m_total_weight;
+
+ time_t now = time(0);
+ if (m_seconds && now - m_start > m_seconds)
+ break;
+
+ for (map<TestOpType, unsigned int>::const_iterator it = m_weight_sums.begin();
+ it != m_weight_sums.end();
+ ++it) {
+ if (rand_val < it->second) {
+ retval = gen_op(context, it->first);
+ break;
+ }
+ }
+ }
+ return retval;
+ }
+
+ bool init_extensible_tier(RadosTestContext &context, TestOp *& op) {
+ /*
+ * set-redirect or set-chunk test (manifest test)
+ * 0. make default objects (using create op)
+ * 1. set-redirect or set-chunk
+ * 2. initialize target objects (using write op)
+ * 3. wait for set-* completion
+ */
+ int copy_manifest_end = 0;
+ if (m_set_chunk) {
+ copy_manifest_end = m_objects*2;
+ } else {
+ copy_manifest_end = m_objects*3;
+ }
+ int make_manifest_end = copy_manifest_end;
+ if (m_set_chunk) {
+ /* make 10 chunks per an object*/
+ make_manifest_end = make_manifest_end + m_objects * 10;
+ } else {
+ /* redirect */
+ make_manifest_end = make_manifest_end + m_objects;
+ }
+
+ if (m_op <= m_objects) {
+ stringstream oid;
+ oid << m_op;
+ if (m_op % 2) {
+ oid << " " << string(300, 'o');
+ }
+ cout << m_op << ": write initial oid " << oid.str() << std::endl;
+ context.oid_not_flushing.insert(oid.str());
+ if (m_ec_pool) {
+ op = new WriteOp(m_op, &context, oid.str(), true, true);
+ } else {
+ op = new WriteOp(m_op, &context, oid.str(), false, true);
+ }
+ return true;
+ } else if (m_op <= copy_manifest_end) {
+ stringstream oid, oid2;
+ //int _oid = m_op-m_objects;
+ int _oid = m_op % m_objects + 1;
+ oid << _oid;
+ if ((_oid) % 2) {
+ oid << " " << string(300, 'o');
+ }
+ int _oid2 = m_op - m_objects + 1;
+ if (_oid2 > copy_manifest_end - m_objects) {
+ _oid2 -= (copy_manifest_end - m_objects);
+ }
+ oid2 << _oid2 << " " << context.low_tier_pool_name;
+ if ((_oid2) % 2) {
+ oid2 << " " << string(300, 'm');
+ }
+ cout << m_op << ": " << "copy oid " << oid.str() << " target oid "
+ << oid2.str() << std::endl;
+ op = new CopyOp(m_op, &context, oid.str(), oid2.str(), context.low_tier_pool_name);
+ return true;
+ } else if (m_op <= make_manifest_end) {
+ if (m_set_redirect) {
+ stringstream oid, oid2;
+ int _oid = m_op-copy_manifest_end;
+ oid << _oid;
+ if ((_oid) % 2) {
+ oid << " " << string(300, 'o');
+ }
+ oid2 << _oid << " " << context.low_tier_pool_name;
+ if ((_oid) % 2) {
+ oid2 << " " << string(300, 'm');
+ }
+ if (context.oid_in_use.count(oid.str())) {
+ /* previous copy is not finished */
+ op = NULL;
+ m_op--;
+ cout << m_op << " retry set_redirect !" << std::endl;
+ return true;
+ }
+ cout << m_op << ": " << "set_redirect oid " << oid.str() << " target oid "
+ << oid2.str() << std::endl;
+ op = new SetRedirectOp(m_op, &context, oid.str(), oid2.str(), context.pool_name);
+ return true;
+ } else if (m_set_chunk) {
+ stringstream oid;
+ int _oid = m_op % m_objects +1;
+ oid << _oid;
+ if ((_oid) % 2) {
+ oid << " " << string(300, 'o');
+ }
+ if (context.oid_in_use.count(oid.str())) {
+ /* previous set-chunk is not finished */
+ op = NULL;
+ m_op--;
+ cout << m_op << " retry set_chunk !" << std::endl;
+ return true;
+ }
+ stringstream oid2;
+ oid2 << _oid << " " << context.low_tier_pool_name;
+ if ((_oid) % 2) {
+ oid2 << " " << string(300, 'm');
+ }
+
+ /* make a chunk (random offset, random length -->
+ * target object's random offset)
+ */
+ ObjectDesc contents, contents2;
+ context.find_object(oid.str(), &contents);
+ uint32_t max_len = contents.most_recent_gen()->get_length(contents.most_recent());
+ uint32_t rand_offset = rand() % max_len;
+ uint32_t rand_length = rand() % max_len;
+ rand_offset = rand_offset - (rand_offset % 512);
+ rand_length = rand_length - (rand_length % 512);
+
+ while (rand_offset + rand_length > max_len || rand_length == 0) {
+ rand_offset = rand() % max_len;
+ rand_length = rand() % max_len;
+ rand_offset = rand_offset - (rand_offset % 512);
+ rand_length = rand_length - (rand_length % 512);
+ }
+ uint32_t rand_tgt_offset = rand_offset;
+ cout << m_op << ": " << "set_chunk oid " << oid.str() << " offset: " << rand_offset
+ << " length: " << rand_length << " target oid " << oid2.str()
+ << " tgt_offset: " << rand_tgt_offset << std::endl;
+ op = new SetChunkOp(m_op, &context, oid.str(), rand_offset, rand_length, oid2.str(),
+ context.low_tier_pool_name, rand_tgt_offset, m_stats);
+ return true;
+ }
+ } else if (m_op == make_manifest_end + 1) {
+ int set_size = context.oid_not_in_use.size();
+ int set_manifest_size = context.oid_redirect_not_in_use.size();
+ cout << m_op << " oid_not_in_use " << set_size << " oid_redirect_not_in_use " << set_manifest_size << std::endl;
+ /* wait for redirect or set_chunk initialization */
+ if (set_size != m_objects || set_manifest_size != 0) {
+ op = NULL;
+ m_op--;
+ cout << m_op << " wait for manifest initialization " << std::endl;
+ return true;
+ }
+ for (int t_op = m_objects+1; t_op <= m_objects*2; t_op++) {
+ stringstream oid;
+ oid << t_op << " " << context.low_tier_pool_name;
+ if (t_op % 2) {
+ oid << " " << string(300, 'm');
+ }
+ cout << " redirect_not_in_use: " << oid.str() << std::endl;
+ context.oid_redirect_not_in_use.insert(oid.str());
+ }
+ }
+
+ return false;
+ }
+
+private:
+
+ TestOp *gen_op(RadosTestContext &context, TestOpType type)
+ {
+ string oid, oid2;
+ ceph_assert(context.oid_not_in_use.size());
+
+ switch (type) {
+ case TEST_OP_READ:
+ oid = *(rand_choose(context.oid_not_in_use));
+ return new ReadOp(m_op, &context, oid, m_balance_reads, m_stats);
+
+ case TEST_OP_WRITE:
+ oid = *(rand_choose(context.oid_not_in_use));
+ cout << m_op << ": " << "write oid " << oid << " current snap is "
+ << context.current_snap << std::endl;
+ return new WriteOp(m_op, &context, oid, false, false, m_stats);
+
+ case TEST_OP_WRITE_EXCL:
+ oid = *(rand_choose(context.oid_not_in_use));
+ cout << m_op << ": " << "write (excl) oid "
+ << oid << " current snap is "
+ << context.current_snap << std::endl;
+ return new WriteOp(m_op, &context, oid, false, true, m_stats);
+
+ case TEST_OP_WRITESAME:
+ oid = *(rand_choose(context.oid_not_in_use));
+ cout << m_op << ": " << "writesame oid "
+ << oid << " current snap is "
+ << context.current_snap << std::endl;
+ return new WriteSameOp(m_op, &context, oid, m_stats);
+
+ case TEST_OP_DELETE:
+ oid = *(rand_choose(context.oid_not_in_use));
+ cout << m_op << ": " << "delete oid " << oid << " current snap is "
+ << context.current_snap << std::endl;
+ return new DeleteOp(m_op, &context, oid, m_stats);
+
+ case TEST_OP_SNAP_CREATE:
+ cout << m_op << ": " << "snap_create" << std::endl;
+ return new SnapCreateOp(m_op, &context, m_stats);
+
+ case TEST_OP_SNAP_REMOVE:
+ if (context.snaps.size() <= context.snaps_in_use.size()) {
+ return NULL;
+ }
+ while (true) {
+ int snap = rand_choose(context.snaps)->first;
+ if (context.snaps_in_use.lookup(snap))
+ continue; // in use; try again!
+ cout << m_op << ": " << "snap_remove snap " << snap << std::endl;
+ return new SnapRemoveOp(m_op, &context, snap, m_stats);
+ }
+
+ case TEST_OP_ROLLBACK:
+ {
+ string oid = *(rand_choose(context.oid_not_in_use));
+ cout << m_op << ": " << "rollback oid " << oid << " current snap is "
+ << context.current_snap << std::endl;
+ return new RollbackOp(m_op, &context, oid);
+ }
+
+ case TEST_OP_SETATTR:
+ oid = *(rand_choose(context.oid_not_in_use));
+ cout << m_op << ": " << "setattr oid " << oid
+ << " current snap is " << context.current_snap << std::endl;
+ return new SetAttrsOp(m_op, &context, oid, m_stats);
+
+ case TEST_OP_RMATTR:
+ oid = *(rand_choose(context.oid_not_in_use));
+ cout << m_op << ": " << "rmattr oid " << oid
+ << " current snap is " << context.current_snap << std::endl;
+ return new RemoveAttrsOp(m_op, &context, oid, m_stats);
+
+ case TEST_OP_WATCH:
+ oid = *(rand_choose(context.oid_not_in_use));
+ cout << m_op << ": " << "watch oid " << oid
+ << " current snap is " << context.current_snap << std::endl;
+ return new WatchOp(m_op, &context, oid, m_stats);
+
+ case TEST_OP_COPY_FROM:
+ oid = *(rand_choose(context.oid_not_in_use));
+ do {
+ oid2 = *(rand_choose(context.oid_not_in_use));
+ } while (oid == oid2);
+ cout << m_op << ": " << "copy_from oid " << oid << " from oid " << oid2
+ << " current snap is " << context.current_snap << std::endl;
+ return new CopyFromOp(m_op, &context, oid, oid2, m_stats);
+
+ case TEST_OP_HIT_SET_LIST:
+ {
+ uint32_t hash = rjhash32(rand());
+ cout << m_op << ": " << "hit_set_list " << hash << std::endl;
+ return new HitSetListOp(m_op, &context, hash, m_stats);
+ }
+
+ case TEST_OP_UNDIRTY:
+ {
+ oid = *(rand_choose(context.oid_not_in_use));
+ cout << m_op << ": " << "undirty oid " << oid << std::endl;
+ return new UndirtyOp(m_op, &context, oid, m_stats);
+ }
+
+ case TEST_OP_IS_DIRTY:
+ {
+ oid = *(rand_choose(context.oid_not_flushing));
+ return new IsDirtyOp(m_op, &context, oid, m_stats);
+ }
+
+ case TEST_OP_CACHE_FLUSH:
+ {
+ oid = *(rand_choose(context.oid_not_in_use));
+ return new CacheFlushOp(m_op, &context, oid, m_stats, true);
+ }
+
+ case TEST_OP_CACHE_TRY_FLUSH:
+ {
+ oid = *(rand_choose(context.oid_not_in_use));
+ return new CacheFlushOp(m_op, &context, oid, m_stats, false);
+ }
+
+ case TEST_OP_CACHE_EVICT:
+ {
+ oid = *(rand_choose(context.oid_not_in_use));
+ return new CacheEvictOp(m_op, &context, oid, m_stats);
+ }
+
+ case TEST_OP_APPEND:
+ oid = *(rand_choose(context.oid_not_in_use));
+ cout << "append oid " << oid << " current snap is "
+ << context.current_snap << std::endl;
+ return new WriteOp(m_op, &context, oid, true, false, m_stats);
+
+ case TEST_OP_APPEND_EXCL:
+ oid = *(rand_choose(context.oid_not_in_use));
+ cout << "append oid (excl) " << oid << " current snap is "
+ << context.current_snap << std::endl;
+ return new WriteOp(m_op, &context, oid, true, true, m_stats);
+
+ case TEST_OP_CHUNK_READ:
+ oid = *(rand_choose(context.oid_not_in_use));
+ cout << m_op << ": " << "chunk read oid " << oid << " target oid " << oid2 << std::endl;
+ return new ChunkReadOp(m_op, &context, oid, context.pool_name, false, m_stats);
+
+ case TEST_OP_TIER_PROMOTE:
+ oid = *(rand_choose(context.oid_not_in_use));
+ cout << m_op << ": " << "tier_promote oid " << oid << std::endl;
+ return new TierPromoteOp(m_op, &context, oid, m_stats);
+
+ case TEST_OP_SET_REDIRECT:
+ oid = *(rand_choose(context.oid_not_in_use));
+ oid2 = *(rand_choose(context.oid_redirect_not_in_use));
+ cout << m_op << ": " << "set_redirect oid " << oid << " target oid " << oid2 << std::endl;
+ return new SetRedirectOp(m_op, &context, oid, oid2, context.pool_name, m_stats);
+
+ case TEST_OP_UNSET_REDIRECT:
+ oid = *(rand_choose(context.oid_not_in_use));
+ cout << m_op << ": " << "unset_redirect oid " << oid << std::endl;
+ return new UnsetRedirectOp(m_op, &context, oid, m_stats);
+
+ default:
+ cerr << m_op << ": Invalid op type " << type << std::endl;
+ ceph_abort();
+ return nullptr;
+ }
+ }
+
+ TestOp *m_nextop;
+ int m_op;
+ int m_ops;
+ int m_seconds;
+ int m_objects;
+ time_t m_start;
+ TestOpStat *m_stats;
+ map<TestOpType, unsigned int> m_weight_sums;
+ unsigned int m_total_weight;
+ bool m_ec_pool;
+ bool m_balance_reads;
+ bool m_set_redirect;
+ bool m_set_chunk;
+};
+
+int main(int argc, char **argv)
+{
+ int ops = 1000;
+ int objects = 50;
+ int max_in_flight = 16;
+ int64_t size = 4000000; // 4 MB
+ int64_t min_stride_size = -1, max_stride_size = -1;
+ int max_seconds = 0;
+ bool pool_snaps = false;
+ bool write_fadvise_dontneed = false;
+
+ struct {
+ TestOpType op;
+ const char *name;
+ bool ec_pool_valid;
+ } op_types[] = {
+ { TEST_OP_READ, "read", true },
+ { TEST_OP_WRITE, "write", false },
+ { TEST_OP_WRITE_EXCL, "write_excl", false },
+ { TEST_OP_WRITESAME, "writesame", false },
+ { TEST_OP_DELETE, "delete", true },
+ { TEST_OP_SNAP_CREATE, "snap_create", true },
+ { TEST_OP_SNAP_REMOVE, "snap_remove", true },
+ { TEST_OP_ROLLBACK, "rollback", true },
+ { TEST_OP_SETATTR, "setattr", true },
+ { TEST_OP_RMATTR, "rmattr", true },
+ { TEST_OP_WATCH, "watch", true },
+ { TEST_OP_COPY_FROM, "copy_from", true },
+ { TEST_OP_HIT_SET_LIST, "hit_set_list", true },
+ { TEST_OP_IS_DIRTY, "is_dirty", true },
+ { TEST_OP_UNDIRTY, "undirty", true },
+ { TEST_OP_CACHE_FLUSH, "cache_flush", true },
+ { TEST_OP_CACHE_TRY_FLUSH, "cache_try_flush", true },
+ { TEST_OP_CACHE_EVICT, "cache_evict", true },
+ { TEST_OP_APPEND, "append", true },
+ { TEST_OP_APPEND_EXCL, "append_excl", true },
+ { TEST_OP_SET_REDIRECT, "set_redirect", true },
+ { TEST_OP_UNSET_REDIRECT, "unset_redirect", true },
+ { TEST_OP_CHUNK_READ, "chunk_read", true },
+ { TEST_OP_TIER_PROMOTE, "tier_promote", true },
+ { TEST_OP_READ /* grr */, NULL },
+ };
+
+ map<TestOpType, unsigned int> op_weights;
+ string pool_name = "rbd";
+ string low_tier_pool_name = "";
+ bool ec_pool = false;
+ bool no_omap = false;
+ bool no_sparse = false;
+ bool balance_reads = false;
+ bool set_redirect = false;
+ bool set_chunk = false;
+
+ for (int i = 1; i < argc; ++i) {
+ if (strcmp(argv[i], "--max-ops") == 0)
+ ops = atoi(argv[++i]);
+ else if (strcmp(argv[i], "--pool") == 0)
+ pool_name = argv[++i];
+ else if (strcmp(argv[i], "--max-seconds") == 0)
+ max_seconds = atoi(argv[++i]);
+ else if (strcmp(argv[i], "--objects") == 0)
+ objects = atoi(argv[++i]);
+ else if (strcmp(argv[i], "--max-in-flight") == 0)
+ max_in_flight = atoi(argv[++i]);
+ else if (strcmp(argv[i], "--size") == 0)
+ size = atoi(argv[++i]);
+ else if (strcmp(argv[i], "--min-stride-size") == 0)
+ min_stride_size = atoi(argv[++i]);
+ else if (strcmp(argv[i], "--max-stride-size") == 0)
+ max_stride_size = atoi(argv[++i]);
+ else if (strcmp(argv[i], "--no-omap") == 0)
+ no_omap = true;
+ else if (strcmp(argv[i], "--no-sparse") == 0)
+ no_sparse = true;
+ else if (strcmp(argv[i], "--balance_reads") == 0)
+ balance_reads = true;
+ else if (strcmp(argv[i], "--pool-snaps") == 0)
+ pool_snaps = true;
+ else if (strcmp(argv[i], "--write-fadvise-dontneed") == 0)
+ write_fadvise_dontneed = true;
+ else if (strcmp(argv[i], "--ec-pool") == 0) {
+ if (!op_weights.empty()) {
+ cerr << "--ec-pool must be specified prior to any ops" << std::endl;
+ exit(1);
+ }
+ ec_pool = true;
+ no_omap = true;
+ no_sparse = true;
+ } else if (strcmp(argv[i], "--op") == 0) {
+ i++;
+ if (i == argc) {
+ cerr << "Missing op after --op" << std::endl;
+ return 1;
+ }
+ int j;
+ for (j = 0; op_types[j].name; ++j) {
+ if (strcmp(op_types[j].name, argv[i]) == 0) {
+ break;
+ }
+ }
+ if (!op_types[j].name) {
+ cerr << "unknown op " << argv[i] << std::endl;
+ exit(1);
+ }
+ i++;
+ if (i == argc) {
+ cerr << "Weight unspecified." << std::endl;
+ return 1;
+ }
+ int weight = atoi(argv[i]);
+ if (weight < 0) {
+ cerr << "Weights must be nonnegative." << std::endl;
+ return 1;
+ } else if (weight > 0) {
+ if (ec_pool && !op_types[j].ec_pool_valid) {
+ cerr << "Error: cannot use op type " << op_types[j].name
+ << " with --ec-pool" << std::endl;
+ exit(1);
+ }
+ cout << "adding op weight " << op_types[j].name << " -> " << weight << std::endl;
+ op_weights.insert(pair<TestOpType, unsigned int>(op_types[j].op, weight));
+ }
+ } else if (strcmp(argv[i], "--set_redirect") == 0) {
+ set_redirect = true;
+ } else if (strcmp(argv[i], "--set_chunk") == 0) {
+ set_chunk = true;
+ } else if (strcmp(argv[i], "--low_tier_pool") == 0) {
+ /*
+ * disallow redirect or chunk object into the same pool
+ * to prevent the race. see https://github.com/ceph/ceph/pull/20096
+ */
+ low_tier_pool_name = argv[++i];
+ } else {
+ cerr << "unknown arg " << argv[i] << std::endl;
+ exit(1);
+ }
+ }
+
+ if (set_redirect || set_chunk) {
+ if (low_tier_pool_name == "") {
+ cerr << "low_tier_pool_name is needed" << std::endl;
+ exit(1);
+ }
+ }
+
+ if (op_weights.empty()) {
+ cerr << "No operations specified" << std::endl;
+ exit(1);
+ }
+
+ if (min_stride_size < 0)
+ min_stride_size = size / 10;
+ if (max_stride_size < 0)
+ max_stride_size = size / 5;
+
+ cout << pretty_version_to_str() << std::endl;
+ cout << "Configuration:" << std::endl
+ << "\tNumber of operations: " << ops << std::endl
+ << "\tNumber of objects: " << objects << std::endl
+ << "\tMax in flight operations: " << max_in_flight << std::endl
+ << "\tObject size (in bytes): " << size << std::endl
+ << "\tWrite stride min: " << min_stride_size << std::endl
+ << "\tWrite stride max: " << max_stride_size << std::endl;
+
+ if (min_stride_size >= max_stride_size) {
+ cerr << "Error: max_stride_size must be more than min_stride_size"
+ << std::endl;
+ return 1;
+ }
+
+ if (min_stride_size > size || max_stride_size > size) {
+ cerr << "Error: min_stride_size and max_stride_size must be "
+ << "smaller than object size" << std::endl;
+ return 1;
+ }
+
+ if (max_in_flight * 2 > objects) {
+ cerr << "Error: max_in_flight must be <= than the number of objects / 2"
+ << std::endl;
+ return 1;
+ }
+
+ char *id = getenv("CEPH_CLIENT_ID");
+ RadosTestContext context(
+ pool_name,
+ max_in_flight,
+ size,
+ min_stride_size,
+ max_stride_size,
+ no_omap,
+ no_sparse,
+ pool_snaps,
+ write_fadvise_dontneed,
+ low_tier_pool_name,
+ id);
+
+ TestOpStat stats;
+ WeightedTestGenerator gen = WeightedTestGenerator(
+ ops, objects,
+ op_weights, &stats, max_seconds,
+ ec_pool, balance_reads, set_redirect, set_chunk);
+ int r = context.init();
+ if (r < 0) {
+ cerr << "Error initializing rados test context: "
+ << cpp_strerror(r) << std::endl;
+ exit(1);
+ }
+ context.loop(&gen);
+
+ context.shutdown();
+ cerr << context.errors << " errors." << std::endl;
+ cerr << stats << std::endl;
+ return 0;
+}
diff --git a/src/test/osd/hitset.cc b/src/test/osd/hitset.cc
new file mode 100644
index 00000000..6234bdab
--- /dev/null
+++ b/src/test/osd/hitset.cc
@@ -0,0 +1,197 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ * Copyright 2013 Inktank
+ */
+
+#include "gtest/gtest.h"
+#include "osd/HitSet.h"
+#include <iostream>
+
+class HitSetTestStrap {
+public:
+ HitSet *hitset;
+
+ explicit HitSetTestStrap(HitSet *h) : hitset(h) {}
+
+ void fill(unsigned count) {
+ char buf[50];
+ for (unsigned i = 0; i < count; ++i) {
+ sprintf(buf, "hitsettest_%u", i);
+ hobject_t obj(object_t(buf), "", 0, i, 0, "");
+ hitset->insert(obj);
+ }
+ EXPECT_EQ(count, hitset->insert_count());
+ }
+ void verify_fill(unsigned count) {
+ char buf[50];
+ for (unsigned i = 0; i < count; ++i) {
+ sprintf(buf, "hitsettest_%u", i);
+ hobject_t obj(object_t(buf), "", 0, i, 0, "");
+ EXPECT_TRUE(hitset->contains(obj));
+ }
+ }
+
+};
+
+class BloomHitSetTest : public testing::Test, public HitSetTestStrap {
+public:
+
+ BloomHitSetTest() : HitSetTestStrap(new HitSet(new BloomHitSet)) {}
+
+ void rebuild(double fp, uint64_t target, uint64_t seed) {
+ BloomHitSet::Params *bparams = new BloomHitSet::Params(fp, target, seed);
+ HitSet::Params param(bparams);
+ HitSet new_set(param);
+ *hitset = new_set;
+ }
+
+ BloomHitSet *get_hitset() { return static_cast<BloomHitSet*>(hitset->impl.get()); }
+};
+
+TEST_F(BloomHitSetTest, Params) {
+ BloomHitSet::Params params(0.01, 100, 5);
+ EXPECT_EQ(.01, params.get_fpp());
+ EXPECT_EQ((unsigned)100, params.target_size);
+ EXPECT_EQ((unsigned)5, params.seed);
+ params.set_fpp(0.1);
+ EXPECT_EQ(0.1, params.get_fpp());
+
+ bufferlist bl;
+ params.encode(bl);
+ BloomHitSet::Params p2;
+ auto iter = bl.cbegin();
+ p2.decode(iter);
+ EXPECT_EQ(0.1, p2.get_fpp());
+ EXPECT_EQ((unsigned)100, p2.target_size);
+ EXPECT_EQ((unsigned)5, p2.seed);
+}
+
+TEST_F(BloomHitSetTest, Construct) {
+ ASSERT_EQ(hitset->impl->get_type(), HitSet::TYPE_BLOOM);
+ // success!
+}
+
+TEST_F(BloomHitSetTest, Rebuild) {
+ rebuild(0.1, 100, 1);
+ ASSERT_EQ(hitset->impl->get_type(), HitSet::TYPE_BLOOM);
+}
+
+TEST_F(BloomHitSetTest, InsertsMatch) {
+ rebuild(0.1, 100, 1);
+ fill(50);
+ /*
+ * the approx unique count is atrocious on bloom filters. Empirical
+ * evidence suggests the current test will produce a value of 62
+ * regardless of hitset size
+ */
+ EXPECT_TRUE(hitset->approx_unique_insert_count() >= 50 &&
+ hitset->approx_unique_insert_count() <= 62);
+ verify_fill(50);
+ EXPECT_FALSE(hitset->is_full());
+}
+
+TEST_F(BloomHitSetTest, FillsUp) {
+ rebuild(0.1, 20, 1);
+ fill(20);
+ verify_fill(20);
+ EXPECT_TRUE(hitset->is_full());
+}
+
+TEST_F(BloomHitSetTest, RejectsNoMatch) {
+ rebuild(0.001, 100, 1);
+ fill(100);
+ verify_fill(100);
+ EXPECT_TRUE(hitset->is_full());
+
+ char buf[50];
+ int matches = 0;
+ for (int i = 100; i < 200; ++i) {
+ sprintf(buf, "hitsettest_%d", i);
+ hobject_t obj(object_t(buf), "", 0, i, 0, "");
+ if (hitset->contains(obj))
+ ++matches;
+ }
+ // we set a 1 in 1000 false positive; allow one in our 100
+ EXPECT_LT(matches, 2);
+}
+
+class ExplicitHashHitSetTest : public testing::Test, public HitSetTestStrap {
+public:
+
+ ExplicitHashHitSetTest() : HitSetTestStrap(new HitSet(new ExplicitHashHitSet)) {}
+
+ ExplicitHashHitSet *get_hitset() { return static_cast<ExplicitHashHitSet*>(hitset->impl.get()); }
+};
+
+TEST_F(ExplicitHashHitSetTest, Construct) {
+ ASSERT_EQ(hitset->impl->get_type(), HitSet::TYPE_EXPLICIT_HASH);
+ // success!
+}
+
+TEST_F(ExplicitHashHitSetTest, InsertsMatch) {
+ fill(50);
+ verify_fill(50);
+ EXPECT_EQ((unsigned)50, hitset->approx_unique_insert_count());
+ EXPECT_FALSE(hitset->is_full());
+}
+
+TEST_F(ExplicitHashHitSetTest, RejectsNoMatch) {
+ fill(100);
+ verify_fill(100);
+ EXPECT_FALSE(hitset->is_full());
+
+ char buf[50];
+ int matches = 0;
+ for (int i = 100; i < 200; ++i) {
+ sprintf(buf, "hitsettest_%d", i);
+ hobject_t obj(object_t(buf), "", 0, i, 0, "");
+ if (hitset->contains(obj)) {
+ ++matches;
+ }
+ }
+ EXPECT_EQ(matches, 0);
+}
+
+class ExplicitObjectHitSetTest : public testing::Test, public HitSetTestStrap {
+public:
+
+ ExplicitObjectHitSetTest() : HitSetTestStrap(new HitSet(new ExplicitObjectHitSet)) {}
+
+ ExplicitObjectHitSet *get_hitset() { return static_cast<ExplicitObjectHitSet*>(hitset->impl.get()); }
+};
+
+TEST_F(ExplicitObjectHitSetTest, Construct) {
+ ASSERT_EQ(hitset->impl->get_type(), HitSet::TYPE_EXPLICIT_OBJECT);
+ // success!
+}
+
+TEST_F(ExplicitObjectHitSetTest, InsertsMatch) {
+ fill(50);
+ verify_fill(50);
+ EXPECT_EQ((unsigned)50, hitset->approx_unique_insert_count());
+ EXPECT_FALSE(hitset->is_full());
+}
+
+TEST_F(ExplicitObjectHitSetTest, RejectsNoMatch) {
+ fill(100);
+ verify_fill(100);
+ EXPECT_FALSE(hitset->is_full());
+
+ char buf[50];
+ int matches = 0;
+ for (int i = 100; i < 200; ++i) {
+ sprintf(buf, "hitsettest_%d", i);
+ hobject_t obj(object_t(buf), "", 0, i, 0, "");
+ if (hitset->contains(obj)) {
+ ++matches;
+ }
+ }
+ EXPECT_EQ(matches, 0);
+}
diff --git a/src/test/osd/osdcap.cc b/src/test/osd/osdcap.cc
new file mode 100644
index 00000000..55dd0e52
--- /dev/null
+++ b/src/test/osd/osdcap.cc
@@ -0,0 +1,1353 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2012 Inktank
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <iostream>
+
+#include "include/stringify.h"
+#include "osd/OSDCap.h"
+
+#include "gtest/gtest.h"
+
+const char *parse_good[] = {
+ "allow *",
+ "allow r",
+ "allow rwx",
+ "allow r pool foo ",
+ "allow r pool=foo",
+ "allow wx pool taco",
+ "allow pool foo r",
+ "allow pool taco wx",
+ "allow wx pool taco object_prefix obj",
+ "allow wx pool taco object_prefix obj_with_underscores_and_no_quotes",
+ "allow pool taco object_prefix obj wx",
+ "allow pool taco object_prefix obj_with_underscores_and_no_quotes wx",
+ "allow rwx pool 'weird name'",
+ "allow rwx pool \"weird name with ''s\"",
+ "allow rwx pool foo, allow r pool bar",
+ "allow rwx pool foo ; allow r pool bar",
+ "allow rwx pool foo ;allow r pool bar",
+ "allow rwx pool foo; allow r pool bar",
+ "allow pool foo rwx, allow pool bar r",
+ "allow pool foo.froo.foo rwx, allow pool bar r",
+ "allow pool foo rwx ; allow pool bar r",
+ "allow pool foo rwx ;allow pool bar r",
+ "allow pool foo rwx; allow pool bar r",
+ "allow pool data rw, allow pool rbd rwx, allow pool images class rbd foo",
+ "allow class-read",
+ "allow class-write",
+ "allow class-read class-write",
+ "allow r class-read pool foo",
+ "allow rw class-read class-write pool foo",
+ "allow r class-read pool foo",
+ "allow pool bar rwx; allow pool baz r class-read",
+ "allow class foo",
+ "allow class clsname \"clsthingidon'tunderstand\"",
+ " allow rwx pool foo; allow r pool bar ",
+ " allow rwx pool foo; allow r pool bar ",
+ " allow pool foo rwx; allow pool bar r ",
+ " allow pool foo rwx; allow pool bar r ",
+ " allow wx pool taco",
+ "\tallow\nwx\tpool \n taco\t",
+ "allow class-read object_prefix rbd_children, allow pool libvirt-pool-test rwx",
+ "allow class-read object_prefix rbd-children, allow pool libvirt_pool_test rwx",
+ "allow pool foo namespace nfoo rwx, allow pool bar namespace=nbar r",
+ "allow pool foo namespace=nfoo rwx ; allow pool bar namespace=nbar r",
+ "allow pool foo namespace nfoo rwx ;allow pool bar namespace nbar r",
+ "allow pool foo namespace=nfoo rwx; allow pool bar namespace nbar object_prefix rbd r",
+ "allow rwx namespace=nfoo tag cephfs data=cephfs_a",
+ "allow rwx namespace foo tag cephfs data =cephfs_a",
+ "allow pool foo namespace=nfoo* rwx",
+ "allow pool foo namespace=\"\" rwx; allow pool bar namespace='' object_prefix rbd r",
+ "allow pool foo namespace \"\" rwx; allow pool bar namespace '' object_prefix rbd r",
+ "profile abc, profile abc pool=bar, profile abc pool=bar namespace=foo",
+ "allow rwx tag application key=value",
+ "allow rwx tag application key = value",
+ "allow rwx tag application key =value",
+ "allow rwx tag application key= value",
+ "allow rwx tag application key = value",
+ "allow all tag application all=all",
+ "allow rwx network 127.0.0.1/8",
+ "allow rwx network ::1/128",
+ "allow rwx network [ff::1]/128",
+ "profile foo network 127.0.0.1/8",
+ "allow rwx namespace foo tag cephfs data =cephfs_a network 127.0.0.1/8",
+ "allow pool foo rwx network 1.2.3.4/24",
+ 0
+};
+
+TEST(OSDCap, ParseGood) {
+ for (int i=0; parse_good[i]; i++) {
+ string str = parse_good[i];
+ OSDCap cap;
+ std::cout << "Testing good input: '" << str << "'" << std::endl;
+ ASSERT_TRUE(cap.parse(str, &cout));
+ }
+}
+
+const char *parse_bad[] = {
+ "allow r poolfoo",
+ "allow r w",
+ "ALLOW r",
+ "allow rwx,",
+ "allow rwx x",
+ "allow r pool foo r",
+ "allow wwx pool taco",
+ "allow wwx pool taco^funny&chars",
+ "allow rwx pool 'weird name''",
+ "allow rwx object_prefix \"beforepool\" pool weird",
+ "allow rwx auid 123 pool asdf",
+ "allow xrwx pool foo,, allow r pool bar",
+ ";allow rwx pool foo rwx ; allow r pool bar",
+ "allow rwx pool foo ;allow r pool bar gibberish",
+ "allow rwx auid 123 pool asdf namespace=foo",
+ "allow rwx auid 123 namespace",
+ "allow rwx namespace",
+ "allow namespace",
+ "allow namespace=foo",
+ "allow namespace=f*oo",
+ "allow rwx auid 123 namespace asdf",
+ "allow wwx pool ''",
+ "allow rwx tag application key value",
+ "allow rwx auid 123",
+ "allow auid 123 rwx",
+ "allow r pool foo object_prefix blah ; allow w auid 5",
+ 0
+};
+
+TEST(OSDCap, ParseBad) {
+ for (int i=0; parse_bad[i]; i++) {
+ string str = parse_bad[i];
+ OSDCap cap;
+ std::cout << "Testing bad input: '" << str << "'" << std::endl;
+ ASSERT_FALSE(cap.parse(str, &cout));
+ }
+}
+
+TEST(OSDCap, AllowAll) {
+ OSDCap cap;
+ entity_addr_t addr;
+ ASSERT_FALSE(cap.allow_all());
+
+ ASSERT_TRUE(cap.parse("allow r", NULL));
+ ASSERT_FALSE(cap.allow_all());
+ cap.grants.clear();
+
+ ASSERT_TRUE(cap.parse("allow w", NULL));
+ ASSERT_FALSE(cap.allow_all());
+ cap.grants.clear();
+
+ ASSERT_TRUE(cap.parse("allow x", NULL));
+ ASSERT_FALSE(cap.allow_all());
+ cap.grants.clear();
+
+ ASSERT_TRUE(cap.parse("allow rwx", NULL));
+ ASSERT_FALSE(cap.allow_all());
+ cap.grants.clear();
+
+ ASSERT_TRUE(cap.parse("allow rw", NULL));
+ ASSERT_FALSE(cap.allow_all());
+ cap.grants.clear();
+
+ ASSERT_TRUE(cap.parse("allow rx", NULL));
+ ASSERT_FALSE(cap.allow_all());
+ cap.grants.clear();
+
+ ASSERT_TRUE(cap.parse("allow wx", NULL));
+ ASSERT_FALSE(cap.allow_all());
+ cap.grants.clear();
+
+ ASSERT_TRUE(cap.parse("allow *", NULL));
+ ASSERT_TRUE(cap.allow_all());
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "asdf", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "anamespace", {}, "asdf", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "asdf", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "anamespace", {}, "asdf", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "anamespace", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "anamespace", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, true}}, addr));
+ // 'allow *' overrides whitelist
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "asdf", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "anamespace", {}, "asdf", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "asdf", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "anamespace", {}, "asdf", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "anamespace", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "anamespace", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, false}}, addr));
+}
+
+TEST(OSDCap, AllowPool) {
+ OSDCap cap;
+ entity_addr_t addr;
+ bool r = cap.parse("allow rwx pool foo", NULL);
+ ASSERT_TRUE(r);
+
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr));
+ // true->false for classes not on whitelist
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, false}}, addr));
+
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "ns", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr));
+}
+
+TEST(OSDCap, AllowPools) {
+ entity_addr_t addr;
+ OSDCap cap;
+ bool r = cap.parse("allow rwx pool foo, allow r pool bar", NULL);
+ ASSERT_TRUE(r);
+
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr));
+ // true-false for classes not on whitelist
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, false}}, addr));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "", true, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "ns", {}, "", true, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "", true, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "ns", {{"application", {{"key", "value"}}}}, "", true, false, {}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "ns", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("baz", "", {}, "", true, false, {}, addr));
+ ASSERT_FALSE(cap.is_capable("baz", "ns", {}, "", true, false, {}, addr));
+ ASSERT_FALSE(cap.is_capable("baz", "", {{"application", {{"key", "value"}}}}, "", true, false, {}, addr));
+ ASSERT_FALSE(cap.is_capable("baz", "ns", {{"application", {{"key", "value"}}}}, "", true, false, {}, addr));
+}
+
+TEST(OSDCap, AllowPools2) {
+ entity_addr_t addr;
+ OSDCap cap;
+ bool r = cap.parse("allow r, allow rwx pool foo", NULL);
+ ASSERT_TRUE(r);
+
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "", true, true, {{"cls", "", true, true, true}}, addr));
+ // true-false for classes not on whitelist
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "", true, true, {{"cls", "", true, true, true}}, addr));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "", true, false, {}, addr));
+}
+
+TEST(OSDCap, ObjectPrefix) {
+ entity_addr_t addr;
+ OSDCap cap;
+ bool r = cap.parse("allow rwx object_prefix foo", NULL);
+ ASSERT_TRUE(r);
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "food", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo_bar", true, true, {{"cls", "", true, true, true}}, addr));
+ // true-false for classes not on whitelist
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "food", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo_bar", true, true, {{"cls", "", true, true, false}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "_foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, " foo ", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "fo", true, true, {{"cls", "", true, true, true}}, addr));
+}
+
+TEST(OSDCap, ObjectPoolAndPrefix) {
+ entity_addr_t addr;
+ OSDCap cap;
+ bool r = cap.parse("allow rwx pool bar object_prefix foo", NULL);
+ ASSERT_TRUE(r);
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "food", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo_bar", true, true, {{"cls", "", true, true, true}}, addr));
+ // true-false for classes not on whitelist
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "food", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo_bar", true, true, {{"cls", "", true, true, false}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("baz", "", {}, "food", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("baz", "", {}, "fo", true, true, {{"cls", "", true, true, true}}, addr));
+}
+
+TEST(OSDCap, Namespace) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow rw namespace=nfoo"));
+
+ ASSERT_TRUE(cap.is_capable("bar", "nfoo", {}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "nfoobar", {}, "foo", true, true, {}, addr));
+}
+
+TEST(OSDCap, NamespaceGlob) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow rw namespace=nfoo*"));
+
+ ASSERT_TRUE(cap.is_capable("bar", "nfoo", {}, "foo", true, true, {}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "nfoobar", {}, "foo", true, true, {}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "nfo", {}, "foo", true, true, {}, addr));
+}
+
+TEST(OSDCap, BasicR) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow r", NULL));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr));
+}
+
+TEST(OSDCap, BasicW) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow w", NULL));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr));
+}
+
+TEST(OSDCap, BasicX) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow x", NULL));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr));
+ // true->false when class not on whitelist
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr));
+
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr));
+}
+
+TEST(OSDCap, BasicRW) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow rw", NULL));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+}
+
+TEST(OSDCap, BasicRX) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow rx", NULL));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, true, true}}, addr));
+ // true->false for class not on whitelist
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, true, false}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr));
+}
+
+TEST(OSDCap, BasicWX) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow wx", NULL));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr));
+ // true->false for class not on whitelist
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr));
+}
+
+TEST(OSDCap, BasicRWX) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow rwx", NULL));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr));
+ // true->false for class not on whitelist
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, false, false}}, addr));
+}
+
+TEST(OSDCap, BasicRWClassRClassW) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow rw class-read class-write", NULL));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr));
+ // true->false when class not whitelisted
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, false, false}}, addr));
+}
+
+TEST(OSDCap, ClassR) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow class-read", NULL));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr));
+ // true->false when class not whitelisted
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+}
+
+TEST(OSDCap, ClassW) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow class-write", NULL));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, true}}, addr));
+ // true->false when class not whitelisted
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, false}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+}
+
+TEST(OSDCap, ClassRW) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow class-read class-write", NULL));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr));
+ // true->false when class not whitelisted
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+}
+
+TEST(OSDCap, BasicRClassR) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow r class-read", NULL));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, false, {}, addr));
+ // true->false when class not whitelisted
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, false, {{"cls", "", true, false, false}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+
+ ASSERT_TRUE(cap.is_capable("bar", "any", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "any", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "any", {}, "foo", true, false, {}, addr));
+ // true->false when class not whitelisted
+ ASSERT_FALSE(cap.is_capable("bar", "any", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "any", {}, "foo", true, false, {{"cls", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "any", {{"application", {{"key", "value"}}}}, "foo", true, false, {{"cls", "", true, false, false}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "any", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "any", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "any", {}, "foo", false, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "any", {}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "any", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+}
+
+TEST(OSDCap, PoolClassR) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow pool bar r class-read, allow pool foo rwx", NULL));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, false, {}, addr));
+ // true->false when class not whitelisted
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, false, {{"cls", "", true, false, false}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+
+ ASSERT_TRUE(cap.is_capable("bar", "ns", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "ns", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "ns", {}, "foo", true, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "ns", {{"application", {{"key", "value"}}}}, "foo", true, false, {}, addr));
+ // true->false when class not whitelisted
+ ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", true, false, {{"cls", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "ns", {{"application", {{"key", "value"}}}}, "foo", true, false, {{"cls", "", true, false, false}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", false, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", false, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr));
+ // true->false when class not whitelisted
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, false, {{"cls", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, false}}, addr));
+
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", false, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr));
+ // true->false when class not whitelisted
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, false, {{"cls", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, false}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", false, false, {}, addr));
+ ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, false, {}, addr));
+ ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("baz", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr));
+}
+
+TEST(OSDCap, PoolClassRNS) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow pool bar namespace='' r class-read, allow pool foo namespace=ns rwx", NULL));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, false, {}, addr));
+ // true->false when class not whitelisted
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, false, {{"cls", "", true, false, false}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", true, false, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "ns", {{"application", {{"key", "value"}}}}, "foo", true, false, {}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "other", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "other", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "other", {}, "foo", false, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "other", {}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "other", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, false, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, false, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr));
+
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", false, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr));
+ // true->false when class not whitelisted
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, false, {{"cls", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, false}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", false, false, {}, addr));
+ ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, false, {}, addr));
+ ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("baz", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr));
+}
+
+TEST(OSDCap, NSClassR) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow namespace '' rw class-read class-write, allow namespace test r", NULL));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr));
+ // true->false when class not whitelisted
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, false}}, addr));
+
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", false, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr));
+ // true->false when class not whitelisted
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, false, {{"cls", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, false}}, addr));
+
+ ASSERT_TRUE(cap.is_capable("bar", "test", {}, "foo", true, false, {}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "test", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "test", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "test", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "test", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "test", {}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "test", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+
+ ASSERT_TRUE(cap.is_capable("foo", "test", {}, "foo", true, false, {}, addr));
+
+ ASSERT_FALSE(cap.is_capable("foo", "test", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "test", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "test", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "test", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "test", {}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "test", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+
+ ASSERT_FALSE(cap.is_capable("foo", "bad", {}, "foo", true, false, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "bad", {}, "foo", false, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "bad", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "bad", {}, "foo", false, false, {{"cls", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "bad", {{"application", {{"key", "value"}}}}, "foo", false, false, {{"cls", "", false, true, true}}, addr));
+}
+
+TEST(OSDCap, PoolTagBasic) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow rwx tag application key=value", NULL));
+
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}}, "foo", true, true, {}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr));
+
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {}}}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key2", "value"}}}}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}}}}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"app2", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key2", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr));
+
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, false, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, false, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {}, addr));
+
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", false, false, true}}, addr));
+ // true->false when class not whitelisted
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", false, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", false, true, false}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", false, false, true}}, addr));
+}
+
+TEST(OSDCap, PoolTagWildK)
+{
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow rwx tag application *=value", NULL));
+
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}}, "foo", true, true, {}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr));
+
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key2", "value"}}}}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {}}}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}}}}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"app2", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key2", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr));
+}
+
+TEST(OSDCap, PoolTagWildV)
+{
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow rwx tag application key=*", NULL));
+
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}}, "foo", true, true, {}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr));
+
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key2", "value"}}}}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}}}}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"app2", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key2", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr));
+}
+
+TEST(OSDCap, PoolTagWildKV)
+{
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow rwx tag application *=*", NULL));
+
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}}, "foo", true, true, {}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr));
+
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {}}}, "foo", true, true, {}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key2", "value"}}}}, "foo", true, true, {}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}}}}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"app2", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key2", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr));
+}
+
+TEST(OSDCap, NSPool)
+{
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow rwx namespace ns tag application key=value", NULL));
+
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns2", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key", "value2"}}}}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key2", "value"}}}}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr));
+}
+
+TEST(OSDCap, NSPoolGlob)
+{
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow rwx namespace ns* tag application key=value", NULL));
+
+ ASSERT_TRUE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+
+ ASSERT_TRUE(cap.is_capable("foo", "ns2", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key", "value2"}}}}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key2", "value"}}}}, "foo", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr));
+}
+
+TEST(OSDCap, OutputParsed)
+{
+ entity_addr_t addr;
+ struct CapsTest {
+ const char *input;
+ const char *output;
+ };
+ CapsTest test_values[] = {
+ {"allow *",
+ "osdcap[grant(*)]"},
+ {"allow r",
+ "osdcap[grant(r)]"},
+ {"allow rx",
+ "osdcap[grant(rx)]"},
+ {"allow rwx",
+ "osdcap[grant(rwx)]"},
+ {"allow rw class-read class-write",
+ "osdcap[grant(rwx)]"},
+ {"allow rw class-read",
+ "osdcap[grant(rw class-read)]"},
+ {"allow rw class-write",
+ "osdcap[grant(rw class-write)]"},
+ {"allow rwx pool images",
+ "osdcap[grant(pool images rwx)]"},
+ {"allow r pool images",
+ "osdcap[grant(pool images r)]"},
+ {"allow pool images rwx",
+ "osdcap[grant(pool images rwx)]"},
+ {"allow pool images r",
+ "osdcap[grant(pool images r)]"},
+ {"allow pool images w",
+ "osdcap[grant(pool images w)]"},
+ {"allow pool images x",
+ "osdcap[grant(pool images x)]"},
+ {"allow r pool images namespace ''",
+ "osdcap[grant(pool images namespace \"\" r)]"},
+ {"allow r pool images namespace foo",
+ "osdcap[grant(pool images namespace foo r)]"},
+ {"allow r pool images namespace \"\"",
+ "osdcap[grant(pool images namespace \"\" r)]"},
+ {"allow r namespace foo",
+ "osdcap[grant(namespace foo r)]"},
+ {"allow pool images r; allow pool rbd rwx",
+ "osdcap[grant(pool images r),grant(pool rbd rwx)]"},
+ {"allow pool images r, allow pool rbd rwx",
+ "osdcap[grant(pool images r),grant(pool rbd rwx)]"},
+ {"allow class-read object_prefix rbd_children, allow pool libvirt-pool-test rwx",
+ "osdcap[grant(object_prefix rbd_children class-read),grant(pool libvirt-pool-test rwx)]"},
+ {"allow rwx tag application key=value",
+ "osdcap[grant(app application key key val value rwx)]"},
+ {"allow rwx namespace ns* tag application key=value",
+ "osdcap[grant(namespace ns* app application key key val value rwx)]"},
+ {"allow all",
+ "osdcap[grant(*)]"},
+ {"allow rwx tag application all=all",
+ "osdcap[grant(app application key * val * rwx)]"},
+ {"allow rwx network 1.2.3.4/24",
+ "osdcap[grant(rwx network 1.2.3.4/24)]"},
+ };
+
+ size_t num_tests = sizeof(test_values) / sizeof(*test_values);
+ for (size_t i = 0; i < num_tests; ++i) {
+ OSDCap cap;
+ std::cout << "Testing input '" << test_values[i].input << "'" << std::endl;
+ ASSERT_TRUE(cap.parse(test_values[i].input));
+ ASSERT_EQ(test_values[i].output, stringify(cap));
+ }
+}
+
+TEST(OSDCap, AllowClass) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow class foo", NULL));
+
+ // can call any method on class foo regardless of whitelist status
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}}, addr));
+
+ // does not permit invoking class bar
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, false}}, addr));
+}
+
+TEST(OSDCap, AllowClassMethod) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow class foo xyz", NULL));
+
+ // can call the xyz method on class foo regardless of whitelist status
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "xyz", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "xyz", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "xyz", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "xyz", true, false, false}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "xyz", false, true, false}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "xyz", true, true, false}}, addr));
+
+ // does not permit invoking class bar
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "xyz", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "xyz", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "xyz", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "xyz", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "xyz", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "xyz", true, true, false}}, addr));
+}
+
+TEST(OSDCap, AllowClass2) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow class foo, allow class bar", NULL));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}}, addr));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, false}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, false}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, false}}, addr));
+}
+
+TEST(OSDCap, AllowClassRWX) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow rwx, allow class foo", NULL));
+
+ // can call any method on class foo regardless of whitelist status
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}}, addr));
+
+ // does not permit invoking class bar
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, false}}, addr));
+
+ // allows class bar if it is whitelisted
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, true}}, addr));
+}
+
+TEST(OSDCap, AllowClassMulti) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow class foo", NULL));
+
+ // can call any method on foo, but not bar, so the entire op is rejected
+ // bar with whitelist is rejected because it still needs rwx/class-read,write
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, false, false}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, false, false}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, false, false}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, false, false}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, false, false}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, false, false}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, false, false}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, false, false}}, addr));
+
+ // these are OK because 'bar' is on the whitelist BUT the calls don't read or write
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, false, true}}, addr));
+
+ // can call any method on foo or bar regardless of whitelist status
+ OSDCap cap2;
+ ASSERT_TRUE(cap2.parse("allow class foo, allow class bar", NULL));
+
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, true, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, false, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, true, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, false, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, false, false}}, addr));
+
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, true, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, false, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, true, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, false, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, false, false}}, addr));
+
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, true, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, false, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, true, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, false, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, false, false}}, addr));
+
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, true, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, false, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, true, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, false, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, false, false}}, addr));
+
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, true, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, false, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, true, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, false, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, false, false}}, addr));
+
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, true, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, false, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, true, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, false, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, false, false}}, addr));
+
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, true, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, false, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, true, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, false, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, false, false}}, addr));
+
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, true, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, false, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, true, false}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, false, true}}, addr));
+ ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, false, false}}, addr));
+}
+
+TEST(OSDCap, AllowClassMultiRWX) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow rwx, allow class foo", NULL));
+
+ // can call anything on foo, but only whitelisted methods on bar
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, false, true}}, addr));
+
+ // fails because bar not whitelisted
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, false, false}}, addr));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, false, true}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, false, false}}, addr));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, false, true}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, false, false}}, addr));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, false, true}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, false, false}}, addr));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, false, true}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, false, false}}, addr));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, false, true}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, false, false}}, addr));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, false, true}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, false, false}}, addr));
+
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, false, true}}, addr));
+
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, false, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, true, false}}, addr));
+ ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, false, false}}, addr));
+}
+
+TEST(OSDCap, AllowProfile) {
+ entity_addr_t addr;
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("profile read-only, profile read-write pool abc", NULL));
+ ASSERT_FALSE(cap.allow_all());
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "asdf", true, true, {}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "asdf", true, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("abc", "", {}, "asdf", false, true, {}, addr));
+
+ // RBD
+ cap.grants.clear();
+ ASSERT_TRUE(cap.parse("profile rbd pool abc", NULL));
+ ASSERT_FALSE(cap.allow_all());
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "asdf", true, true, {}, addr));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "rbd_children", true, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "rbd_children", false, false,
+ {{"rbd", "", true, false, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("abc", "", {}, "asdf", true, true,
+ {{"rbd", "", true, true, true}}, addr));
+
+ cap.grants.clear();
+ ASSERT_TRUE(cap.parse("profile rbd-read-only pool abc", NULL));
+ ASSERT_FALSE(cap.allow_all());
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "rbd_children", true, false, {}, addr));
+ ASSERT_TRUE(cap.is_capable("abc", "", {}, "asdf", true, false,
+ {{"rbd", "", true, false, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("abc", "", {}, "asdf", true, true, {}, addr));
+ ASSERT_TRUE(cap.is_capable("abc", "", {}, "rbd_header.ABC", false, false,
+ {{"rbd", "child_attach", true, true, true}}, addr));
+ ASSERT_TRUE(cap.is_capable("abc", "", {}, "rbd_header.ABC", false, false,
+ {{"rbd", "child_detach", true, true, true}}, addr));
+ ASSERT_FALSE(cap.is_capable("abc", "", {}, "rbd_header.ABC", false, false,
+ {{"rbd", "other function", true, true, true}}, addr));
+}
+
+TEST(OSDCap, network) {
+ entity_addr_t a, b, c;
+ a.parse("10.1.2.3");
+ b.parse("192.168.2.3");
+ c.parse("192.167.2.3");
+
+ OSDCap cap;
+ ASSERT_TRUE(cap.parse("allow * network 192.168.0.0/16, allow * network 10.0.0.0/8", NULL));
+
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "asdf", true, true, {{"cls", "", true, true, true}}, a));
+ ASSERT_TRUE(cap.is_capable("foo", "", {}, "asdf", true, true, {{"cls", "", true, true, true}}, b));
+ ASSERT_FALSE(cap.is_capable("foo", "", {}, "asdf", true, true, {{"cls", "", true, true, true}}, c));
+}
diff --git a/src/test/osd/safe-to-destroy.sh b/src/test/osd/safe-to-destroy.sh
new file mode 100755
index 00000000..670e5ad7
--- /dev/null
+++ b/src/test/osd/safe-to-destroy.sh
@@ -0,0 +1,99 @@
+#!/usr/bin/env bash
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+set -e
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:$(get_unused_port)"
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ set -e
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+function TEST_safe_to_destroy() {
+ local dir=$1
+
+ run_mon $dir a
+ run_mgr $dir x
+ run_osd $dir 0
+ run_osd $dir 1
+ run_osd $dir 2
+ run_osd $dir 3
+
+ flush_pg_stats
+
+ ceph osd safe-to-destroy 0
+ ceph osd safe-to-destroy 1
+ ceph osd safe-to-destroy 2
+ ceph osd safe-to-destroy 3
+
+ ceph osd pool create foo 128
+ sleep 2
+ flush_pg_stats
+ wait_for_clean
+
+ expect_failure $dir 'pgs currently' osd safe-to-destroy 0
+ expect_failure $dir 'pgs currently' ceph osd safe-to-destroy 1
+ expect_failure $dir 'pgs currently' ceph osd safe-to-destroy 2
+ expect_failure $dir 'pgs currently' ceph osd safe-to-destroy 3
+
+ ceph osd out 0
+ sleep 2
+ flush_pg_stats
+ wait_for_clean
+
+ ceph osd safe-to-destroy 0
+
+ # even osds without osd_stat are ok if all pgs are active+clean
+ id=`ceph osd create`
+ ceph osd safe-to-destroy $id
+}
+
+function TEST_ok_to_stop() {
+ local dir=$1
+
+ run_mon $dir a
+ run_mgr $dir x
+ run_osd $dir 0
+ run_osd $dir 1
+ run_osd $dir 2
+ run_osd $dir 3
+
+ ceph osd pool create foo 128
+ ceph osd pool set foo size 3
+ ceph osd pool set foo min_size 2
+ sleep 1
+ flush_pg_stats
+ wait_for_clean
+
+ ceph osd ok-to-stop 0
+ ceph osd ok-to-stop 1
+ ceph osd ok-to-stop 2
+ ceph osd ok-to-stop 3
+ expect_failure $dir degraded ceph osd ok-to-stop 0 1
+
+ ceph osd pool set foo min_size 1
+ sleep 1
+ flush_pg_stats
+ wait_for_clean
+ ceph osd ok-to-stop 0 1
+ ceph osd ok-to-stop 1 2
+ ceph osd ok-to-stop 2 3
+ ceph osd ok-to-stop 3 4
+ expect_failure $dir degraded ceph osd ok-to-stop 0 1 2
+ expect_failure $dir degraded ceph osd ok-to-stop 0 1 2 3
+}
+
+main safe-to-destroy "$@"
diff --git a/src/test/osd/test_ec_transaction.cc b/src/test/osd/test_ec_transaction.cc
new file mode 100644
index 00000000..98669667
--- /dev/null
+++ b/src/test/osd/test_ec_transaction.cc
@@ -0,0 +1,124 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2016 Red Hat
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <gtest/gtest.h>
+#include "osd/PGTransaction.h"
+#include "osd/ECTransaction.h"
+
+#include "test/unit.cc"
+
+struct mydpp : public DoutPrefixProvider {
+ std::ostream& gen_prefix(std::ostream& out) const override { return out << "foo"; }
+ CephContext *get_cct() const override { return g_ceph_context; }
+ unsigned get_subsys() const override { return ceph_subsys_osd; }
+} dpp;
+
+#define dout_context g_ceph_context
+
+TEST(ectransaction, two_writes_separated)
+{
+ hobject_t h;
+ PGTransactionUPtr t(new PGTransaction);
+ bufferlist a, b;
+ t->create(h);
+ a.append_zero(565760);
+ t->write(h, 0, a.length(), a, 0);
+ b.append_zero(2437120);
+ t->write(h, 669856, b.length(), b, 0);
+
+ ECUtil::stripe_info_t sinfo(2, 8192);
+ auto plan = ECTransaction::get_write_plan(
+ sinfo,
+ std::move(t),
+ [&](const hobject_t &i) {
+ ECUtil::HashInfoRef ref(new ECUtil::HashInfo(1));
+ return ref;
+ },
+ &dpp);
+ generic_derr << "to_read " << plan.to_read << dendl;
+ generic_derr << "will_write " << plan.will_write << dendl;
+
+ ASSERT_EQ(0u, plan.to_read.size());
+ ASSERT_EQ(1u, plan.will_write.size());
+}
+
+TEST(ectransaction, two_writes_nearby)
+{
+ hobject_t h;
+ PGTransactionUPtr t(new PGTransaction);
+ bufferlist a, b;
+ t->create(h);
+
+ // two nearby writes, both partly touching the same 8192-byte stripe
+ ECUtil::stripe_info_t sinfo(2, 8192);
+ a.append_zero(565760);
+ t->write(h, 0, a.length(), a, 0);
+ b.append_zero(2437120);
+ t->write(h, 569856, b.length(), b, 0);
+
+ auto plan = ECTransaction::get_write_plan(
+ sinfo,
+ std::move(t),
+ [&](const hobject_t &i) {
+ ECUtil::HashInfoRef ref(new ECUtil::HashInfo(1));
+ return ref;
+ },
+ &dpp);
+ generic_derr << "to_read " << plan.to_read << dendl;
+ generic_derr << "will_write " << plan.will_write << dendl;
+
+ ASSERT_EQ(0u, plan.to_read.size());
+ ASSERT_EQ(1u, plan.will_write.size());
+}
+
+TEST(ectransaction, many_writes)
+{
+ hobject_t h;
+ PGTransactionUPtr t(new PGTransaction);
+ bufferlist a, b;
+ a.append_zero(512);
+ b.append_zero(4096);
+ t->create(h);
+
+ ECUtil::stripe_info_t sinfo(2, 8192);
+ // write 2801664~512
+ // write 2802176~512
+ // write 2802688~512
+ // write 2803200~512
+ t->write(h, 2801664, a.length(), a, 0);
+ t->write(h, 2802176, a.length(), a, 0);
+ t->write(h, 2802688, a.length(), a, 0);
+ t->write(h, 2803200, a.length(), a, 0);
+
+ // write 2805760~4096
+ // write 2809856~4096
+ // write 2813952~4096
+ t->write(h, 2805760, b.length(), b, 0);
+ t->write(h, 2809856, b.length(), b, 0);
+ t->write(h, 2813952, b.length(), b, 0);
+
+ auto plan = ECTransaction::get_write_plan(
+ sinfo,
+ std::move(t),
+ [&](const hobject_t &i) {
+ ECUtil::HashInfoRef ref(new ECUtil::HashInfo(1));
+ return ref;
+ },
+ &dpp);
+ generic_derr << "to_read " << plan.to_read << dendl;
+ generic_derr << "will_write " << plan.will_write << dendl;
+
+ ASSERT_EQ(0u, plan.to_read.size());
+ ASSERT_EQ(1u, plan.will_write.size());
+}
diff --git a/src/test/osd/test_extent_cache.cc b/src/test/osd/test_extent_cache.cc
new file mode 100644
index 00000000..04b638a9
--- /dev/null
+++ b/src/test/osd/test_extent_cache.cc
@@ -0,0 +1,280 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2016 Red Hat
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#include <gtest/gtest.h>
+#include "osd/ExtentCache.h"
+#include <iostream>
+
+extent_map imap_from_vector(vector<pair<uint64_t, uint64_t> > &&in)
+{
+ extent_map out;
+ for (auto &&tup: in) {
+ bufferlist bl;
+ bl.append_zero(tup.second);
+ out.insert(tup.first, bl.length(), bl);
+ }
+ return out;
+}
+
+extent_map imap_from_iset(const extent_set &set)
+{
+ extent_map out;
+ for (auto &&iter: set) {
+ bufferlist bl;
+ bl.append_zero(iter.second);
+ out.insert(iter.first, iter.second, bl);
+ }
+ return out;
+}
+
+extent_set iset_from_vector(vector<pair<uint64_t, uint64_t> > &&in)
+{
+ extent_set out;
+ for (auto &&tup: in) {
+ out.insert(tup.first, tup.second);
+ }
+ return out;
+}
+
+TEST(extentcache, simple_write)
+{
+ hobject_t oid;
+
+ ExtentCache c;
+ ExtentCache::write_pin pin;
+ c.open_write_pin(pin);
+
+ auto to_read = iset_from_vector(
+ {{0, 2}, {8, 2}, {20, 2}});
+ auto to_write = iset_from_vector(
+ {{0, 10}, {20, 4}});
+ auto must_read = c.reserve_extents_for_rmw(
+ oid, pin, to_write, to_read);
+ ASSERT_EQ(
+ must_read,
+ to_read);
+
+ c.print(std::cerr);
+
+ auto got = imap_from_iset(must_read);
+ auto pending_read = to_read;
+ pending_read.subtract(must_read);
+
+ auto pending = c.get_remaining_extents_for_rmw(
+ oid,
+ pin,
+ pending_read);
+ ASSERT_TRUE(pending.empty());
+
+ auto write_map = imap_from_iset(to_write);
+ c.present_rmw_update(
+ oid,
+ pin,
+ write_map);
+
+ c.release_write_pin(pin);
+}
+
+TEST(extentcache, write_write_overlap)
+{
+ hobject_t oid;
+
+ ExtentCache c;
+ ExtentCache::write_pin pin;
+ c.open_write_pin(pin);
+
+ // start write 1
+ auto to_read = iset_from_vector(
+ {{0, 2}, {8, 2}, {20, 2}});
+ auto to_write = iset_from_vector(
+ {{0, 10}, {20, 4}});
+ auto must_read = c.reserve_extents_for_rmw(
+ oid, pin, to_write, to_read);
+ ASSERT_EQ(
+ must_read,
+ to_read);
+
+ c.print(std::cerr);
+
+ // start write 2
+ ExtentCache::write_pin pin2;
+ c.open_write_pin(pin2);
+ auto to_read2 = iset_from_vector(
+ {{2, 4}, {10, 4}, {18, 4}});
+ auto to_write2 = iset_from_vector(
+ {{2, 12}, {18, 12}});
+ auto must_read2 = c.reserve_extents_for_rmw(
+ oid, pin2, to_write2, to_read2);
+ ASSERT_EQ(
+ must_read2,
+ iset_from_vector({{10, 4}, {18, 2}}));
+
+ c.print(std::cerr);
+
+ // complete read for write 1 and start commit
+ auto got = imap_from_iset(must_read);
+ auto pending_read = to_read;
+ pending_read.subtract(must_read);
+ auto pending = c.get_remaining_extents_for_rmw(
+ oid,
+ pin,
+ pending_read);
+ ASSERT_TRUE(pending.empty());
+
+ auto write_map = imap_from_iset(to_write);
+ c.present_rmw_update(
+ oid,
+ pin,
+ write_map);
+
+ c.print(std::cerr);
+
+ // complete read for write 2 and start commit
+ auto pending_read2 = to_read2;
+ pending_read2.subtract(must_read2);
+ auto pending2 = c.get_remaining_extents_for_rmw(
+ oid,
+ pin2,
+ pending_read2);
+ ASSERT_EQ(
+ pending2,
+ imap_from_iset(pending_read2));
+
+ auto write_map2 = imap_from_iset(to_write2);
+ c.present_rmw_update(
+ oid,
+ pin2,
+ write_map2);
+
+ c.print(std::cerr);
+
+ c.release_write_pin(pin);
+
+ c.print(std::cerr);
+
+ c.release_write_pin(pin2);
+}
+
+TEST(extentcache, write_write_overlap2)
+{
+ hobject_t oid;
+
+ ExtentCache c;
+ ExtentCache::write_pin pin;
+ c.open_write_pin(pin);
+
+ // start write 1
+ auto to_read = extent_set();
+ auto to_write = iset_from_vector(
+ {{659456, 4096}});
+ auto must_read = c.reserve_extents_for_rmw(
+ oid, pin, to_write, to_read);
+ ASSERT_EQ(
+ must_read,
+ to_read);
+
+ c.print(std::cerr);
+
+ // start write 2
+ ExtentCache::write_pin pin2;
+ c.open_write_pin(pin2);
+ auto to_read2 = extent_set();
+ auto to_write2 = iset_from_vector(
+ {{663552, 4096}});
+ auto must_read2 = c.reserve_extents_for_rmw(
+ oid, pin2, to_write2, to_read2);
+ ASSERT_EQ(
+ must_read2,
+ to_read2);
+
+
+ // start write 3
+ ExtentCache::write_pin pin3;
+ c.open_write_pin(pin3);
+ auto to_read3 = iset_from_vector({{659456, 8192}});
+ auto to_write3 = iset_from_vector({{659456, 8192}});
+ auto must_read3 = c.reserve_extents_for_rmw(
+ oid, pin3, to_write3, to_read3);
+ ASSERT_EQ(
+ must_read3,
+ extent_set());
+
+ c.print(std::cerr);
+
+ // complete read for write 1 and start commit
+ auto got = imap_from_iset(must_read);
+ auto pending_read = to_read;
+ pending_read.subtract(must_read);
+ auto pending = c.get_remaining_extents_for_rmw(
+ oid,
+ pin,
+ pending_read);
+ ASSERT_TRUE(pending.empty());
+
+ auto write_map = imap_from_iset(to_write);
+ c.present_rmw_update(
+ oid,
+ pin,
+ write_map);
+
+ c.print(std::cerr);
+
+ // complete read for write 2 and start commit
+ auto pending_read2 = to_read2;
+ pending_read2.subtract(must_read2);
+ auto pending2 = c.get_remaining_extents_for_rmw(
+ oid,
+ pin2,
+ pending_read2);
+ ASSERT_EQ(
+ pending2,
+ imap_from_iset(pending_read2));
+
+ auto write_map2 = imap_from_iset(to_write2);
+ c.present_rmw_update(
+ oid,
+ pin2,
+ write_map2);
+
+ // complete read for write 2 and start commit
+ auto pending_read3 = to_read3;
+ pending_read3.subtract(must_read3);
+ auto pending3 = c.get_remaining_extents_for_rmw(
+ oid,
+ pin3,
+ pending_read3);
+ ASSERT_EQ(
+ pending3,
+ imap_from_iset(pending_read3));
+
+ auto write_map3 = imap_from_iset(to_write3);
+ c.present_rmw_update(
+ oid,
+ pin3,
+ write_map3);
+
+
+ c.print(std::cerr);
+
+ c.release_write_pin(pin);
+
+ c.print(std::cerr);
+
+ c.release_write_pin(pin2);
+
+ c.print(std::cerr);
+
+ c.release_write_pin(pin3);
+}
diff --git a/src/test/osd/test_pg_transaction.cc b/src/test/osd/test_pg_transaction.cc
new file mode 100644
index 00000000..63b6197b
--- /dev/null
+++ b/src/test/osd/test_pg_transaction.cc
@@ -0,0 +1,129 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2016 Red Hat
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <gtest/gtest.h>
+#include "osd/PGTransaction.h"
+
+TEST(pgtransaction, simple)
+{
+ hobject_t h;
+ PGTransaction t;
+ ASSERT_TRUE(t.empty());
+ t.nop(h);
+ ASSERT_FALSE(t.empty());
+ unsigned num = 0;
+ t.safe_create_traverse(
+ [&](const pair<const hobject_t, PGTransaction::ObjectOperation> &p) {
+ ASSERT_EQ(p.first, h);
+ using T = PGTransaction::ObjectOperation::Init;
+ ASSERT_TRUE(boost::get<T::None>(&p.second.init_type));
+ ++num;
+ });
+ ASSERT_EQ(num, 1u);
+}
+
+TEST(pgtransaction, clone_safe_create_traverse)
+{
+ hobject_t h, h2;
+ h2.snap = 1;
+ PGTransaction t;
+ ASSERT_TRUE(t.empty());
+ t.nop(h2);
+ ASSERT_FALSE(t.empty());
+ t.clone(h, h2);
+ unsigned num = 0;
+ t.safe_create_traverse(
+ [&](const pair<const hobject_t, PGTransaction::ObjectOperation> &p) {
+ using T = PGTransaction::ObjectOperation::Init;
+ if (num == 0) {
+ ASSERT_EQ(p.first, h);
+ ASSERT_TRUE(boost::get<T::Clone>(&p.second.init_type));
+ ASSERT_EQ(
+ boost::get<T::Clone>(&p.second.init_type)->source,
+ h2);
+ } else if (num == 1) {
+ ASSERT_EQ(p.first, h2);
+ ASSERT_TRUE(boost::get<T::None>(&p.second.init_type));
+ } else {
+ ASSERT_LT(num, 2u);
+ }
+ ++num;
+ });
+}
+
+TEST(pgtransaction, clone_safe_create_traverse2)
+{
+ hobject_t h, h2, h3;
+ h.snap = 10;
+ h2.snap = 5;
+ h3.snap = 3;
+ PGTransaction t;
+ ASSERT_TRUE(t.empty());
+ t.nop(h3);
+ ASSERT_FALSE(t.empty());
+ t.clone(h, h2);
+ t.remove(h2);
+ t.clone(h2, h3);
+ unsigned num = 0;
+ t.safe_create_traverse(
+ [&](const pair<const hobject_t, PGTransaction::ObjectOperation> &p) {
+ using T = PGTransaction::ObjectOperation::Init;
+ if (num == 0) {
+ ASSERT_EQ(p.first, h);
+ ASSERT_TRUE(boost::get<T::Clone>(&p.second.init_type));
+ ASSERT_EQ(
+ boost::get<T::Clone>(&p.second.init_type)->source,
+ h2);
+ } else if (num == 1) {
+ ASSERT_EQ(p.first, h2);
+ ASSERT_TRUE(boost::get<T::Clone>(&p.second.init_type));
+ ASSERT_EQ(
+ boost::get<T::Clone>(&p.second.init_type)->source,
+ h3);
+ } else if (num == 2) {
+ ASSERT_EQ(p.first, h3);
+ ASSERT_TRUE(boost::get<T::None>(&p.second.init_type));
+ } else {
+ ASSERT_LT(num, 3u);
+ }
+ ++num;
+ });
+}
+
+TEST(pgtransaction, clone_safe_create_traverse3)
+{
+ hobject_t h, h2, h3;
+ h.snap = 10;
+ h2.snap = 5;
+ h3.snap = 3;
+ PGTransaction t;
+ t.remove(h);
+ t.remove(h2);
+ t.clone(h2, h3);
+ unsigned num = 0;
+ t.safe_create_traverse(
+ [&](const pair<const hobject_t, PGTransaction::ObjectOperation> &p) {
+ using T = PGTransaction::ObjectOperation::Init;
+ if (p.first == h) {
+ ASSERT_TRUE(p.second.is_delete());
+ } else if (p.first == h2) {
+ ASSERT_TRUE(boost::get<T::Clone>(&p.second.init_type));
+ ASSERT_EQ(
+ boost::get<T::Clone>(&p.second.init_type)->source,
+ h3);
+ }
+ ASSERT_LT(num, 2u);
+ ++num;
+ });
+}
diff --git a/src/test/osd/types.cc b/src/test/osd/types.cc
new file mode 100644
index 00000000..6aba395c
--- /dev/null
+++ b/src/test/osd/types.cc
@@ -0,0 +1,1830 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2011 New Dream Network
+ * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com>
+ *
+ * Author: Loic Dachary <loic@dachary.org>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "include/types.h"
+#include "osd/osd_types.h"
+#include "osd/OSDMap.h"
+#include "gtest/gtest.h"
+#include "include/coredumpctl.h"
+#include "common/Thread.h"
+#include "include/stringify.h"
+#include "osd/ReplicatedBackend.h"
+
+#include <sstream>
+
+TEST(hobject, prefixes0)
+{
+ uint32_t mask = 0xE947FA20;
+ uint32_t bits = 12;
+ int64_t pool = 0;
+
+ set<string> prefixes_correct;
+ prefixes_correct.insert(string("0000000000000000.02A"));
+
+ set<string> prefixes_out(hobject_t::get_prefixes(bits, mask, pool));
+ ASSERT_EQ(prefixes_out, prefixes_correct);
+}
+
+TEST(hobject, prefixes1)
+{
+ uint32_t mask = 0x0000000F;
+ uint32_t bits = 6;
+ int64_t pool = 20;
+
+ set<string> prefixes_correct;
+ prefixes_correct.insert(string("0000000000000014.F0"));
+ prefixes_correct.insert(string("0000000000000014.F4"));
+ prefixes_correct.insert(string("0000000000000014.F8"));
+ prefixes_correct.insert(string("0000000000000014.FC"));
+
+ set<string> prefixes_out(hobject_t::get_prefixes(bits, mask, pool));
+ ASSERT_EQ(prefixes_out, prefixes_correct);
+}
+
+TEST(hobject, prefixes2)
+{
+ uint32_t mask = 0xDEADBEAF;
+ uint32_t bits = 25;
+ int64_t pool = 0;
+
+ set<string> prefixes_correct;
+ prefixes_correct.insert(string("0000000000000000.FAEBDA0"));
+ prefixes_correct.insert(string("0000000000000000.FAEBDA2"));
+ prefixes_correct.insert(string("0000000000000000.FAEBDA4"));
+ prefixes_correct.insert(string("0000000000000000.FAEBDA6"));
+ prefixes_correct.insert(string("0000000000000000.FAEBDA8"));
+ prefixes_correct.insert(string("0000000000000000.FAEBDAA"));
+ prefixes_correct.insert(string("0000000000000000.FAEBDAC"));
+ prefixes_correct.insert(string("0000000000000000.FAEBDAE"));
+
+ set<string> prefixes_out(hobject_t::get_prefixes(bits, mask, pool));
+ ASSERT_EQ(prefixes_out, prefixes_correct);
+}
+
+TEST(hobject, prefixes3)
+{
+ uint32_t mask = 0xE947FA20;
+ uint32_t bits = 32;
+ int64_t pool = 0x23;
+
+ set<string> prefixes_correct;
+ prefixes_correct.insert(string("0000000000000023.02AF749E"));
+
+ set<string> prefixes_out(hobject_t::get_prefixes(bits, mask, pool));
+ ASSERT_EQ(prefixes_out, prefixes_correct);
+}
+
+TEST(hobject, prefixes4)
+{
+ uint32_t mask = 0xE947FA20;
+ uint32_t bits = 0;
+ int64_t pool = 0x23;
+
+ set<string> prefixes_correct;
+ prefixes_correct.insert(string("0000000000000023."));
+
+ set<string> prefixes_out(hobject_t::get_prefixes(bits, mask, pool));
+ ASSERT_EQ(prefixes_out, prefixes_correct);
+}
+
+TEST(hobject, prefixes5)
+{
+ uint32_t mask = 0xDEADBEAF;
+ uint32_t bits = 1;
+ int64_t pool = 0x34AC5D00;
+
+ set<string> prefixes_correct;
+ prefixes_correct.insert(string("0000000034AC5D00.1"));
+ prefixes_correct.insert(string("0000000034AC5D00.3"));
+ prefixes_correct.insert(string("0000000034AC5D00.5"));
+ prefixes_correct.insert(string("0000000034AC5D00.7"));
+ prefixes_correct.insert(string("0000000034AC5D00.9"));
+ prefixes_correct.insert(string("0000000034AC5D00.B"));
+ prefixes_correct.insert(string("0000000034AC5D00.D"));
+ prefixes_correct.insert(string("0000000034AC5D00.F"));
+
+ set<string> prefixes_out(hobject_t::get_prefixes(bits, mask, pool));
+ ASSERT_EQ(prefixes_out, prefixes_correct);
+}
+
+TEST(pg_interval_t, check_new_interval)
+{
+// iterate through all 4 combinations
+for (unsigned i = 0; i < 4; ++i) {
+ //
+ // Create a situation where osdmaps are the same so that
+ // each test case can diverge from it using minimal code.
+ //
+ int osd_id = 1;
+ epoch_t epoch = 40;
+ std::shared_ptr<OSDMap> osdmap(new OSDMap());
+ osdmap->set_max_osd(10);
+ osdmap->set_state(osd_id, CEPH_OSD_EXISTS);
+ osdmap->set_epoch(epoch);
+ std::shared_ptr<OSDMap> lastmap(new OSDMap());
+ lastmap->set_max_osd(10);
+ lastmap->set_state(osd_id, CEPH_OSD_EXISTS);
+ lastmap->set_epoch(epoch);
+ epoch_t same_interval_since = epoch;
+ epoch_t last_epoch_clean = same_interval_since;
+ int64_t pool_id = 200;
+ int pg_num = 4;
+ __u8 min_size = 2;
+ boost::scoped_ptr<IsPGRecoverablePredicate> recoverable(new ReplicatedBackend::RPCRecPred());
+ {
+ OSDMap::Incremental inc(epoch + 1);
+ inc.new_pools[pool_id].min_size = min_size;
+ inc.new_pools[pool_id].set_pg_num(pg_num);
+ inc.new_pools[pool_id].set_pg_num_pending(pg_num);
+ inc.new_up_thru[osd_id] = epoch + 1;
+ osdmap->apply_incremental(inc);
+ lastmap->apply_incremental(inc);
+ }
+ vector<int> new_acting;
+ new_acting.push_back(osd_id);
+ new_acting.push_back(osd_id + 1);
+ vector<int> old_acting = new_acting;
+ int old_primary = osd_id;
+ int new_primary = osd_id;
+ vector<int> new_up;
+ new_up.push_back(osd_id);
+ int old_up_primary = osd_id;
+ int new_up_primary = osd_id;
+ vector<int> old_up = new_up;
+ pg_t pgid;
+ pgid.set_pool(pool_id);
+
+ //
+ // Do nothing if there are no modifications in
+ // acting, up or pool size and that the pool is not
+ // being split
+ //
+ {
+ PastIntervals past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_FALSE(PastIntervals::check_new_interval(old_primary,
+ new_primary,
+ old_acting,
+ new_acting,
+ old_up_primary,
+ new_up_primary,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pgid,
+ recoverable.get(),
+ &past_intervals));
+ ASSERT_TRUE(past_intervals.empty());
+ }
+
+ //
+ // The acting set has changed
+ //
+ {
+ vector<int> new_acting;
+ int _new_primary = osd_id + 1;
+ new_acting.push_back(_new_primary);
+
+ PastIntervals past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
+ new_primary,
+ old_acting,
+ new_acting,
+ old_up_primary,
+ new_up_primary,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pgid,
+ recoverable.get(),
+ &past_intervals));
+ old_primary = new_primary;
+ }
+
+ //
+ // The up set has changed
+ //
+ {
+ vector<int> new_up;
+ int _new_primary = osd_id + 1;
+ new_up.push_back(_new_primary);
+
+ PastIntervals past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
+ new_primary,
+ old_acting,
+ new_acting,
+ old_up_primary,
+ new_up_primary,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pgid,
+ recoverable.get(),
+ &past_intervals));
+ }
+
+ //
+ // The up primary has changed
+ //
+ {
+ vector<int> new_up;
+ int _new_up_primary = osd_id + 1;
+
+ PastIntervals past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
+ new_primary,
+ old_acting,
+ new_acting,
+ old_up_primary,
+ _new_up_primary,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pgid,
+ recoverable.get(),
+ &past_intervals));
+ }
+
+ //
+ // PG is splitting
+ //
+ {
+ std::shared_ptr<OSDMap> osdmap(new OSDMap());
+ osdmap->set_max_osd(10);
+ osdmap->set_state(osd_id, CEPH_OSD_EXISTS);
+ osdmap->set_epoch(epoch);
+ int new_pg_num = pg_num ^ 2;
+ OSDMap::Incremental inc(epoch + 1);
+ inc.new_pools[pool_id].min_size = min_size;
+ inc.new_pools[pool_id].set_pg_num(new_pg_num);
+ osdmap->apply_incremental(inc);
+
+ PastIntervals past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
+ new_primary,
+ old_acting,
+ new_acting,
+ old_up_primary,
+ new_up_primary,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pgid,
+ recoverable.get(),
+ &past_intervals));
+ }
+
+ //
+ // PG is pre-merge source
+ //
+ {
+ std::shared_ptr<OSDMap> osdmap(new OSDMap());
+ osdmap->set_max_osd(10);
+ osdmap->set_state(osd_id, CEPH_OSD_EXISTS);
+ osdmap->set_epoch(epoch);
+ OSDMap::Incremental inc(epoch + 1);
+ inc.new_pools[pool_id].min_size = min_size;
+ inc.new_pools[pool_id].set_pg_num(pg_num);
+ inc.new_pools[pool_id].set_pg_num_pending(pg_num - 1);
+ osdmap->apply_incremental(inc);
+ cout << "pg_num " << pg_num << std::endl;
+ PastIntervals past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
+ new_primary,
+ old_acting,
+ new_acting,
+ old_up_primary,
+ new_up_primary,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pg_t(pg_num - 1, pool_id),
+ recoverable.get(),
+ &past_intervals));
+ }
+
+ //
+ // PG was pre-merge source
+ //
+ {
+ std::shared_ptr<OSDMap> osdmap(new OSDMap());
+ osdmap->set_max_osd(10);
+ osdmap->set_state(osd_id, CEPH_OSD_EXISTS);
+ osdmap->set_epoch(epoch);
+ OSDMap::Incremental inc(epoch + 1);
+ inc.new_pools[pool_id].min_size = min_size;
+ inc.new_pools[pool_id].set_pg_num(pg_num);
+ inc.new_pools[pool_id].set_pg_num_pending(pg_num - 1);
+ osdmap->apply_incremental(inc);
+
+ cout << "pg_num " << pg_num << std::endl;
+ PastIntervals past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
+ new_primary,
+ old_acting,
+ new_acting,
+ old_up_primary,
+ new_up_primary,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ lastmap, // reverse order!
+ osdmap,
+ pg_t(pg_num - 1, pool_id),
+ recoverable.get(),
+ &past_intervals));
+ }
+
+ //
+ // PG is merge source
+ //
+ {
+ std::shared_ptr<OSDMap> osdmap(new OSDMap());
+ osdmap->set_max_osd(10);
+ osdmap->set_state(osd_id, CEPH_OSD_EXISTS);
+ osdmap->set_epoch(epoch);
+ OSDMap::Incremental inc(epoch + 1);
+ inc.new_pools[pool_id].min_size = min_size;
+ inc.new_pools[pool_id].set_pg_num(pg_num - 1);
+ osdmap->apply_incremental(inc);
+
+ PastIntervals past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
+ new_primary,
+ old_acting,
+ new_acting,
+ old_up_primary,
+ new_up_primary,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pg_t(pg_num - 1, pool_id),
+ recoverable.get(),
+ &past_intervals));
+ }
+
+ //
+ // PG is pre-merge target
+ //
+ {
+ std::shared_ptr<OSDMap> osdmap(new OSDMap());
+ osdmap->set_max_osd(10);
+ osdmap->set_state(osd_id, CEPH_OSD_EXISTS);
+ osdmap->set_epoch(epoch);
+ OSDMap::Incremental inc(epoch + 1);
+ inc.new_pools[pool_id].min_size = min_size;
+ inc.new_pools[pool_id].set_pg_num_pending(pg_num - 1);
+ osdmap->apply_incremental(inc);
+
+ PastIntervals past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
+ new_primary,
+ old_acting,
+ new_acting,
+ old_up_primary,
+ new_up_primary,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pg_t(pg_num / 2 - 1, pool_id),
+ recoverable.get(),
+ &past_intervals));
+ }
+
+ //
+ // PG was pre-merge target
+ //
+ {
+ std::shared_ptr<OSDMap> osdmap(new OSDMap());
+ osdmap->set_max_osd(10);
+ osdmap->set_state(osd_id, CEPH_OSD_EXISTS);
+ osdmap->set_epoch(epoch);
+ OSDMap::Incremental inc(epoch + 1);
+ inc.new_pools[pool_id].min_size = min_size;
+ inc.new_pools[pool_id].set_pg_num_pending(pg_num - 1);
+ osdmap->apply_incremental(inc);
+
+ PastIntervals past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
+ new_primary,
+ old_acting,
+ new_acting,
+ old_up_primary,
+ new_up_primary,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ lastmap, // reverse order!
+ osdmap,
+ pg_t(pg_num / 2 - 1, pool_id),
+ recoverable.get(),
+ &past_intervals));
+ }
+
+ //
+ // PG is merge target
+ //
+ {
+ std::shared_ptr<OSDMap> osdmap(new OSDMap());
+ osdmap->set_max_osd(10);
+ osdmap->set_state(osd_id, CEPH_OSD_EXISTS);
+ osdmap->set_epoch(epoch);
+ OSDMap::Incremental inc(epoch + 1);
+ inc.new_pools[pool_id].min_size = min_size;
+ inc.new_pools[pool_id].set_pg_num(pg_num - 1);
+ osdmap->apply_incremental(inc);
+
+ PastIntervals past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
+ new_primary,
+ old_acting,
+ new_acting,
+ old_up_primary,
+ new_up_primary,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pg_t(pg_num / 2 - 1, pool_id),
+ recoverable.get(),
+ &past_intervals));
+ }
+
+ //
+ // PG size has changed
+ //
+ {
+ std::shared_ptr<OSDMap> osdmap(new OSDMap());
+ osdmap->set_max_osd(10);
+ osdmap->set_state(osd_id, CEPH_OSD_EXISTS);
+ osdmap->set_epoch(epoch);
+ OSDMap::Incremental inc(epoch + 1);
+ __u8 new_min_size = min_size + 1;
+ inc.new_pools[pool_id].min_size = new_min_size;
+ inc.new_pools[pool_id].set_pg_num(pg_num);
+ osdmap->apply_incremental(inc);
+
+ PastIntervals past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
+ new_primary,
+ old_acting,
+ new_acting,
+ old_up_primary,
+ new_up_primary,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pgid,
+ recoverable.get(),
+ &past_intervals));
+ }
+
+ //
+ // The old acting set was empty : the previous interval could not
+ // have been rw
+ //
+ {
+ vector<int> old_acting;
+
+ PastIntervals past_intervals;
+
+ ostringstream out;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
+ new_primary,
+ old_acting,
+ new_acting,
+ old_up_primary,
+ new_up_primary,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pgid,
+ recoverable.get(),
+ &past_intervals,
+ &out));
+ ASSERT_NE(string::npos, out.str().find("acting set is too small"));
+ }
+
+ //
+ // The old acting set did not have enough osd : it could
+ // not have been rw
+ //
+ {
+ vector<int> old_acting;
+ old_acting.push_back(osd_id);
+
+ //
+ // see http://tracker.ceph.com/issues/5780
+ // the size of the old acting set should be compared
+ // with the min_size of the old osdmap
+ //
+ // The new osdmap is created so that it triggers the
+ // bug.
+ //
+ std::shared_ptr<OSDMap> osdmap(new OSDMap());
+ osdmap->set_max_osd(10);
+ osdmap->set_state(osd_id, CEPH_OSD_EXISTS);
+ osdmap->set_epoch(epoch);
+ OSDMap::Incremental inc(epoch + 1);
+ __u8 new_min_size = old_acting.size();
+ inc.new_pools[pool_id].min_size = new_min_size;
+ inc.new_pools[pool_id].set_pg_num(pg_num);
+ osdmap->apply_incremental(inc);
+
+ ostringstream out;
+
+ PastIntervals past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
+ new_primary,
+ old_acting,
+ new_acting,
+ old_up_primary,
+ new_up_primary,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pgid,
+ recoverable.get(),
+ &past_intervals,
+ &out));
+ ASSERT_NE(string::npos, out.str().find("acting set is too small"));
+ }
+
+ //
+ // The acting set changes. The old acting set primary was up during the
+ // previous interval and may have been rw.
+ //
+ {
+ vector<int> new_acting;
+ new_acting.push_back(osd_id + 4);
+ new_acting.push_back(osd_id + 5);
+
+ ostringstream out;
+
+ PastIntervals past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
+ new_primary,
+ old_acting,
+ new_acting,
+ old_up_primary,
+ new_up_primary,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pgid,
+ recoverable.get(),
+ &past_intervals,
+ &out));
+ ASSERT_NE(string::npos, out.str().find("includes interval"));
+ }
+ //
+ // The acting set changes. The old acting set primary was not up
+ // during the old interval but last_epoch_clean is in the
+ // old interval and it may have been rw.
+ //
+ {
+ vector<int> new_acting;
+ new_acting.push_back(osd_id + 4);
+ new_acting.push_back(osd_id + 5);
+
+ std::shared_ptr<OSDMap> lastmap(new OSDMap());
+ lastmap->set_max_osd(10);
+ lastmap->set_state(osd_id, CEPH_OSD_EXISTS);
+ lastmap->set_epoch(epoch);
+ OSDMap::Incremental inc(epoch + 1);
+ inc.new_pools[pool_id].min_size = min_size;
+ inc.new_pools[pool_id].set_pg_num(pg_num);
+ inc.new_up_thru[osd_id] = epoch - 10;
+ lastmap->apply_incremental(inc);
+
+ ostringstream out;
+
+ PastIntervals past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
+ new_primary,
+ old_acting,
+ new_acting,
+ old_up_primary,
+ new_up_primary,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pgid,
+ recoverable.get(),
+ &past_intervals,
+ &out));
+ ASSERT_NE(string::npos, out.str().find("presumed to have been rw"));
+ }
+
+ //
+ // The acting set changes. The old acting set primary was not up
+ // during the old interval and last_epoch_clean is before the
+ // old interval : the previous interval could not possibly have
+ // been rw.
+ //
+ {
+ vector<int> new_acting;
+ new_acting.push_back(osd_id + 4);
+ new_acting.push_back(osd_id + 5);
+
+ epoch_t last_epoch_clean = epoch - 10;
+
+ std::shared_ptr<OSDMap> lastmap(new OSDMap());
+ lastmap->set_max_osd(10);
+ lastmap->set_state(osd_id, CEPH_OSD_EXISTS);
+ lastmap->set_epoch(epoch);
+ OSDMap::Incremental inc(epoch + 1);
+ inc.new_pools[pool_id].min_size = min_size;
+ inc.new_pools[pool_id].set_pg_num(pg_num);
+ inc.new_up_thru[osd_id] = last_epoch_clean;
+ lastmap->apply_incremental(inc);
+
+ ostringstream out;
+
+ PastIntervals past_intervals;
+
+ ASSERT_TRUE(past_intervals.empty());
+ ASSERT_TRUE(PastIntervals::check_new_interval(old_primary,
+ new_primary,
+ old_acting,
+ new_acting,
+ old_up_primary,
+ new_up_primary,
+ old_up,
+ new_up,
+ same_interval_since,
+ last_epoch_clean,
+ osdmap,
+ lastmap,
+ pgid,
+ recoverable.get(),
+ &past_intervals,
+ &out));
+ ASSERT_NE(string::npos, out.str().find("does not include interval"));
+ }
+} // end for, didn't want to reindent
+}
+
+TEST(pg_t, get_ancestor)
+{
+ ASSERT_EQ(pg_t(0, 0), pg_t(16, 0).get_ancestor(16));
+ ASSERT_EQ(pg_t(1, 0), pg_t(17, 0).get_ancestor(16));
+ ASSERT_EQ(pg_t(0, 0), pg_t(16, 0).get_ancestor(8));
+ ASSERT_EQ(pg_t(16, 0), pg_t(16, 0).get_ancestor(80));
+ ASSERT_EQ(pg_t(16, 0), pg_t(16, 0).get_ancestor(83));
+ ASSERT_EQ(pg_t(1, 0), pg_t(1321, 0).get_ancestor(123).get_ancestor(8));
+ ASSERT_EQ(pg_t(3, 0), pg_t(1323, 0).get_ancestor(123).get_ancestor(8));
+ ASSERT_EQ(pg_t(3, 0), pg_t(1323, 0).get_ancestor(8));
+}
+
+TEST(pg_t, split)
+{
+ pg_t pgid(0, 0);
+ set<pg_t> s;
+ bool b;
+
+ s.clear();
+ b = pgid.is_split(1, 1, &s);
+ ASSERT_TRUE(!b);
+
+ s.clear();
+ b = pgid.is_split(2, 4, NULL);
+ ASSERT_TRUE(b);
+ b = pgid.is_split(2, 4, &s);
+ ASSERT_TRUE(b);
+ ASSERT_EQ(1u, s.size());
+ ASSERT_TRUE(s.count(pg_t(2, 0)));
+
+ s.clear();
+ b = pgid.is_split(2, 8, &s);
+ ASSERT_TRUE(b);
+ ASSERT_EQ(3u, s.size());
+ ASSERT_TRUE(s.count(pg_t(2, 0)));
+ ASSERT_TRUE(s.count(pg_t(4, 0)));
+ ASSERT_TRUE(s.count(pg_t(6, 0)));
+
+ s.clear();
+ b = pgid.is_split(3, 8, &s);
+ ASSERT_TRUE(b);
+ ASSERT_EQ(1u, s.size());
+ ASSERT_TRUE(s.count(pg_t(4, 0)));
+
+ s.clear();
+ b = pgid.is_split(6, 8, NULL);
+ ASSERT_TRUE(!b);
+ b = pgid.is_split(6, 8, &s);
+ ASSERT_TRUE(!b);
+ ASSERT_EQ(0u, s.size());
+
+ pgid = pg_t(1, 0);
+
+ s.clear();
+ b = pgid.is_split(2, 4, &s);
+ ASSERT_TRUE(b);
+ ASSERT_EQ(1u, s.size());
+ ASSERT_TRUE(s.count(pg_t(3, 0)));
+
+ s.clear();
+ b = pgid.is_split(2, 6, &s);
+ ASSERT_TRUE(b);
+ ASSERT_EQ(2u, s.size());
+ ASSERT_TRUE(s.count(pg_t(3, 0)));
+ ASSERT_TRUE(s.count(pg_t(5, 0)));
+
+ s.clear();
+ b = pgid.is_split(2, 8, &s);
+ ASSERT_TRUE(b);
+ ASSERT_EQ(3u, s.size());
+ ASSERT_TRUE(s.count(pg_t(3, 0)));
+ ASSERT_TRUE(s.count(pg_t(5, 0)));
+ ASSERT_TRUE(s.count(pg_t(7, 0)));
+
+ s.clear();
+ b = pgid.is_split(4, 8, &s);
+ ASSERT_TRUE(b);
+ ASSERT_EQ(1u, s.size());
+ ASSERT_TRUE(s.count(pg_t(5, 0)));
+
+ s.clear();
+ b = pgid.is_split(3, 8, &s);
+ ASSERT_TRUE(b);
+ ASSERT_EQ(3u, s.size());
+ ASSERT_TRUE(s.count(pg_t(3, 0)));
+ ASSERT_TRUE(s.count(pg_t(5, 0)));
+ ASSERT_TRUE(s.count(pg_t(7, 0)));
+
+ s.clear();
+ b = pgid.is_split(6, 8, &s);
+ ASSERT_TRUE(!b);
+ ASSERT_EQ(0u, s.size());
+
+ pgid = pg_t(3, 0);
+
+ s.clear();
+ b = pgid.is_split(7, 8, &s);
+ ASSERT_TRUE(b);
+ ASSERT_EQ(1u, s.size());
+ ASSERT_TRUE(s.count(pg_t(7, 0)));
+
+ s.clear();
+ b = pgid.is_split(7, 12, &s);
+ ASSERT_TRUE(b);
+ ASSERT_EQ(2u, s.size());
+ ASSERT_TRUE(s.count(pg_t(7, 0)));
+ ASSERT_TRUE(s.count(pg_t(11, 0)));
+
+ s.clear();
+ b = pgid.is_split(7, 11, &s);
+ ASSERT_TRUE(b);
+ ASSERT_EQ(1u, s.size());
+ ASSERT_TRUE(s.count(pg_t(7, 0)));
+
+}
+
+TEST(pg_t, merge)
+{
+ pg_t pgid, parent;
+ bool b;
+
+ pgid = pg_t(7, 0);
+ b = pgid.is_merge_source(8, 7, &parent);
+ ASSERT_TRUE(b);
+ ASSERT_EQ(parent, pg_t(3, 0));
+ ASSERT_TRUE(parent.is_merge_target(8, 7));
+
+ b = pgid.is_merge_source(8, 5, &parent);
+ ASSERT_TRUE(b);
+ ASSERT_EQ(parent, pg_t(3, 0));
+ ASSERT_TRUE(parent.is_merge_target(8, 5));
+
+ b = pgid.is_merge_source(8, 4, &parent);
+ ASSERT_TRUE(b);
+ ASSERT_EQ(parent, pg_t(3, 0));
+ ASSERT_TRUE(parent.is_merge_target(8, 4));
+
+ b = pgid.is_merge_source(8, 3, &parent);
+ ASSERT_TRUE(b);
+ ASSERT_EQ(parent, pg_t(1, 0));
+ ASSERT_TRUE(parent.is_merge_target(8, 4));
+
+ b = pgid.is_merge_source(9, 8, &parent);
+ ASSERT_FALSE(b);
+ ASSERT_FALSE(parent.is_merge_target(9, 8));
+}
+
+TEST(pg_missing_t, constructor)
+{
+ pg_missing_t missing;
+ EXPECT_EQ((unsigned int)0, missing.num_missing());
+ EXPECT_FALSE(missing.have_missing());
+}
+
+TEST(pg_missing_t, have_missing)
+{
+ hobject_t oid(object_t("objname"), "key", 123, 456, 0, "");
+ pg_missing_t missing;
+ EXPECT_FALSE(missing.have_missing());
+ missing.add(oid, eversion_t(), eversion_t(), false);
+ EXPECT_TRUE(missing.have_missing());
+}
+
+TEST(pg_missing_t, claim)
+{
+ hobject_t oid(object_t("objname"), "key", 123, 456, 0, "");
+ pg_missing_t missing;
+ EXPECT_FALSE(missing.have_missing());
+ missing.add(oid, eversion_t(), eversion_t(), false);
+ EXPECT_TRUE(missing.have_missing());
+
+ pg_missing_t other;
+ EXPECT_FALSE(other.have_missing());
+
+ other.claim(missing);
+ EXPECT_TRUE(other.have_missing());
+}
+
+TEST(pg_missing_t, is_missing)
+{
+ // pg_missing_t::is_missing(const hobject_t& oid) const
+ {
+ hobject_t oid(object_t("objname"), "key", 123, 456, 0, "");
+ pg_missing_t missing;
+ EXPECT_FALSE(missing.is_missing(oid));
+ missing.add(oid, eversion_t(), eversion_t(), false);
+ EXPECT_TRUE(missing.is_missing(oid));
+ }
+
+ // bool pg_missing_t::is_missing(const hobject_t& oid, eversion_t v) const
+ {
+ hobject_t oid(object_t("objname"), "key", 123, 456, 0, "");
+ pg_missing_t missing;
+ eversion_t need(10,5);
+ EXPECT_FALSE(missing.is_missing(oid, eversion_t()));
+ missing.add(oid, need, eversion_t(), false);
+ EXPECT_TRUE(missing.is_missing(oid));
+ EXPECT_FALSE(missing.is_missing(oid, eversion_t()));
+ EXPECT_TRUE(missing.is_missing(oid, need));
+ }
+}
+
+TEST(pg_missing_t, add_next_event)
+{
+ hobject_t oid(object_t("objname"), "key", 123, 456, 0, "");
+ hobject_t oid_other(object_t("other"), "key", 9123, 9456, 0, "");
+ eversion_t version(10,5);
+ eversion_t prior_version(3,4);
+ pg_log_entry_t sample_e(pg_log_entry_t::DELETE, oid, version, prior_version,
+ 0, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
+ utime_t(8,9), 0);
+
+ // new object (MODIFY)
+ {
+ pg_missing_t missing;
+ pg_log_entry_t e = sample_e;
+
+ e.op = pg_log_entry_t::MODIFY;
+ e.prior_version = eversion_t();
+ EXPECT_TRUE(e.is_update());
+ EXPECT_TRUE(e.object_is_indexed());
+ EXPECT_TRUE(e.reqid_is_indexed());
+ EXPECT_FALSE(missing.is_missing(oid));
+ missing.add_next_event(e);
+ EXPECT_TRUE(missing.is_missing(oid));
+ EXPECT_EQ(eversion_t(), missing.get_items().at(oid).have);
+ EXPECT_EQ(oid, missing.get_rmissing().at(e.version.version));
+ EXPECT_EQ(1U, missing.num_missing());
+ EXPECT_EQ(1U, missing.get_rmissing().size());
+
+ // adding the same object replaces the previous one
+ missing.add_next_event(e);
+ EXPECT_TRUE(missing.is_missing(oid));
+ EXPECT_EQ(1U, missing.num_missing());
+ EXPECT_EQ(1U, missing.get_rmissing().size());
+ }
+
+ // new object (CLONE)
+ {
+ pg_missing_t missing;
+ pg_log_entry_t e = sample_e;
+
+ e.op = pg_log_entry_t::CLONE;
+ e.prior_version = eversion_t();
+ EXPECT_TRUE(e.is_clone());
+ EXPECT_TRUE(e.object_is_indexed());
+ EXPECT_FALSE(e.reqid_is_indexed());
+ EXPECT_FALSE(missing.is_missing(oid));
+ missing.add_next_event(e);
+ EXPECT_TRUE(missing.is_missing(oid));
+ EXPECT_EQ(eversion_t(), missing.get_items().at(oid).have);
+ EXPECT_EQ(oid, missing.get_rmissing().at(e.version.version));
+ EXPECT_EQ(1U, missing.num_missing());
+ EXPECT_EQ(1U, missing.get_rmissing().size());
+
+ // adding the same object replaces the previous one
+ missing.add_next_event(e);
+ EXPECT_TRUE(missing.is_missing(oid));
+ EXPECT_EQ(1U, missing.num_missing());
+ EXPECT_EQ(1U, missing.get_rmissing().size());
+ }
+
+ // existing object (MODIFY)
+ {
+ pg_missing_t missing;
+ pg_log_entry_t e = sample_e;
+
+ e.op = pg_log_entry_t::MODIFY;
+ e.prior_version = eversion_t();
+ EXPECT_TRUE(e.is_update());
+ EXPECT_TRUE(e.object_is_indexed());
+ EXPECT_TRUE(e.reqid_is_indexed());
+ EXPECT_FALSE(missing.is_missing(oid));
+ missing.add_next_event(e);
+ EXPECT_TRUE(missing.is_missing(oid));
+ EXPECT_EQ(eversion_t(), missing.get_items().at(oid).have);
+ EXPECT_EQ(oid, missing.get_rmissing().at(e.version.version));
+ EXPECT_EQ(1U, missing.num_missing());
+ EXPECT_EQ(1U, missing.get_rmissing().size());
+
+ // adding the same object with a different version
+ e.prior_version = prior_version;
+ missing.add_next_event(e);
+ EXPECT_EQ(eversion_t(), missing.get_items().at(oid).have);
+ EXPECT_TRUE(missing.is_missing(oid));
+ EXPECT_EQ(1U, missing.num_missing());
+ EXPECT_EQ(1U, missing.get_rmissing().size());
+ }
+
+ // object with prior version (MODIFY)
+ {
+ pg_missing_t missing;
+ pg_log_entry_t e = sample_e;
+
+ e.op = pg_log_entry_t::MODIFY;
+ EXPECT_TRUE(e.is_update());
+ EXPECT_TRUE(e.object_is_indexed());
+ EXPECT_TRUE(e.reqid_is_indexed());
+ EXPECT_FALSE(missing.is_missing(oid));
+ missing.add_next_event(e);
+ EXPECT_TRUE(missing.is_missing(oid));
+ EXPECT_EQ(prior_version, missing.get_items().at(oid).have);
+ EXPECT_EQ(version, missing.get_items().at(oid).need);
+ EXPECT_EQ(oid, missing.get_rmissing().at(e.version.version));
+ EXPECT_EQ(1U, missing.num_missing());
+ EXPECT_EQ(1U, missing.get_rmissing().size());
+ }
+
+ // adding a DELETE matching an existing event
+ {
+ pg_missing_t missing;
+ pg_log_entry_t e = sample_e;
+
+ e.op = pg_log_entry_t::MODIFY;
+ EXPECT_TRUE(e.is_update());
+ EXPECT_TRUE(e.object_is_indexed());
+ EXPECT_TRUE(e.reqid_is_indexed());
+ EXPECT_FALSE(missing.is_missing(oid));
+ missing.add_next_event(e);
+ EXPECT_TRUE(missing.is_missing(oid));
+
+ e.op = pg_log_entry_t::DELETE;
+ EXPECT_TRUE(e.is_delete());
+ missing.add_next_event(e);
+ EXPECT_TRUE(missing.is_missing(oid));
+ EXPECT_TRUE(missing.get_items().at(oid).is_delete());
+ EXPECT_EQ(prior_version, missing.get_items().at(oid).have);
+ EXPECT_EQ(version, missing.get_items().at(oid).need);
+ EXPECT_EQ(oid, missing.get_rmissing().at(e.version.version));
+ EXPECT_EQ(1U, missing.num_missing());
+ EXPECT_EQ(1U, missing.get_rmissing().size());
+ }
+
+ // adding a LOST_DELETE after an existing event
+ {
+ pg_missing_t missing;
+ pg_log_entry_t e = sample_e;
+
+ e.op = pg_log_entry_t::MODIFY;
+ EXPECT_TRUE(e.is_update());
+ EXPECT_TRUE(e.object_is_indexed());
+ EXPECT_TRUE(e.reqid_is_indexed());
+ EXPECT_FALSE(missing.is_missing(oid));
+ missing.add_next_event(e);
+ EXPECT_TRUE(missing.is_missing(oid));
+ EXPECT_FALSE(missing.get_items().at(oid).is_delete());
+
+ e.op = pg_log_entry_t::LOST_DELETE;
+ e.version.version++;
+ EXPECT_TRUE(e.is_delete());
+ missing.add_next_event(e);
+ EXPECT_TRUE(missing.is_missing(oid));
+ EXPECT_TRUE(missing.get_items().at(oid).is_delete());
+ EXPECT_EQ(prior_version, missing.get_items().at(oid).have);
+ EXPECT_EQ(e.version, missing.get_items().at(oid).need);
+ EXPECT_EQ(oid, missing.get_rmissing().at(e.version.version));
+ EXPECT_EQ(1U, missing.num_missing());
+ EXPECT_EQ(1U, missing.get_rmissing().size());
+ }
+}
+
+TEST(pg_missing_t, revise_need)
+{
+ hobject_t oid(object_t("objname"), "key", 123, 456, 0, "");
+ pg_missing_t missing;
+ // create a new entry
+ EXPECT_FALSE(missing.is_missing(oid));
+ eversion_t need(10,10);
+ missing.revise_need(oid, need, false);
+ EXPECT_TRUE(missing.is_missing(oid));
+ EXPECT_EQ(eversion_t(), missing.get_items().at(oid).have);
+ EXPECT_EQ(need, missing.get_items().at(oid).need);
+ // update an existing entry and preserve have
+ eversion_t have(1,1);
+ missing.revise_have(oid, have);
+ eversion_t new_need(10,12);
+ EXPECT_EQ(have, missing.get_items().at(oid).have);
+ missing.revise_need(oid, new_need, false);
+ EXPECT_EQ(have, missing.get_items().at(oid).have);
+ EXPECT_EQ(new_need, missing.get_items().at(oid).need);
+}
+
+TEST(pg_missing_t, revise_have)
+{
+ hobject_t oid(object_t("objname"), "key", 123, 456, 0, "");
+ pg_missing_t missing;
+ // a non existing entry means noop
+ EXPECT_FALSE(missing.is_missing(oid));
+ eversion_t have(1,1);
+ missing.revise_have(oid, have);
+ EXPECT_FALSE(missing.is_missing(oid));
+ // update an existing entry
+ eversion_t need(10,12);
+ missing.add(oid, need, have, false);
+ EXPECT_TRUE(missing.is_missing(oid));
+ eversion_t new_have(2,2);
+ EXPECT_EQ(have, missing.get_items().at(oid).have);
+ missing.revise_have(oid, new_have);
+ EXPECT_EQ(new_have, missing.get_items().at(oid).have);
+ EXPECT_EQ(need, missing.get_items().at(oid).need);
+}
+
+TEST(pg_missing_t, add)
+{
+ hobject_t oid(object_t("objname"), "key", 123, 456, 0, "");
+ pg_missing_t missing;
+ EXPECT_FALSE(missing.is_missing(oid));
+ eversion_t have(1,1);
+ eversion_t need(10,10);
+ missing.add(oid, need, have, false);
+ EXPECT_TRUE(missing.is_missing(oid));
+ EXPECT_EQ(have, missing.get_items().at(oid).have);
+ EXPECT_EQ(need, missing.get_items().at(oid).need);
+}
+
+TEST(pg_missing_t, rm)
+{
+ // void pg_missing_t::rm(const hobject_t& oid, eversion_t v)
+ {
+ hobject_t oid(object_t("objname"), "key", 123, 456, 0, "");
+ pg_missing_t missing;
+ EXPECT_FALSE(missing.is_missing(oid));
+ epoch_t epoch = 10;
+ eversion_t need(epoch,10);
+ missing.add(oid, need, eversion_t(), false);
+ EXPECT_TRUE(missing.is_missing(oid));
+ // rm of an older version is a noop
+ missing.rm(oid, eversion_t(epoch / 2,20));
+ EXPECT_TRUE(missing.is_missing(oid));
+ // rm of a later version removes the object
+ missing.rm(oid, eversion_t(epoch * 2,20));
+ EXPECT_FALSE(missing.is_missing(oid));
+ }
+ // void pg_missing_t::rm(const std::map<hobject_t, pg_missing_item>::iterator &m)
+ {
+ hobject_t oid(object_t("objname"), "key", 123, 456, 0, "");
+ pg_missing_t missing;
+ EXPECT_FALSE(missing.is_missing(oid));
+ missing.add(oid, eversion_t(), eversion_t(), false);
+ EXPECT_TRUE(missing.is_missing(oid));
+ auto m = missing.get_items().find(oid);
+ missing.rm(m);
+ EXPECT_FALSE(missing.is_missing(oid));
+ }
+}
+
+TEST(pg_missing_t, got)
+{
+ // void pg_missing_t::got(const hobject_t& oid, eversion_t v)
+ {
+ hobject_t oid(object_t("objname"), "key", 123, 456, 0, "");
+ pg_missing_t missing;
+ // assert if the oid does not exist
+ {
+ PrCtl unset_dumpable;
+ EXPECT_DEATH(missing.got(oid, eversion_t()), "");
+ }
+ EXPECT_FALSE(missing.is_missing(oid));
+ epoch_t epoch = 10;
+ eversion_t need(epoch,10);
+ missing.add(oid, need, eversion_t(), false);
+ EXPECT_TRUE(missing.is_missing(oid));
+ // assert if that the version to be removed is lower than the version of the object
+ {
+ PrCtl unset_dumpable;
+ EXPECT_DEATH(missing.got(oid, eversion_t(epoch / 2,20)), "");
+ }
+ // remove of a later version removes the object
+ missing.got(oid, eversion_t(epoch * 2,20));
+ EXPECT_FALSE(missing.is_missing(oid));
+ }
+ // void pg_missing_t::got(const std::map<hobject_t, pg_missing_item>::iterator &m)
+ {
+ hobject_t oid(object_t("objname"), "key", 123, 456, 0, "");
+ pg_missing_t missing;
+ EXPECT_FALSE(missing.is_missing(oid));
+ missing.add(oid, eversion_t(), eversion_t(), false);
+ EXPECT_TRUE(missing.is_missing(oid));
+ auto m = missing.get_items().find(oid);
+ missing.got(m);
+ EXPECT_FALSE(missing.is_missing(oid));
+ }
+}
+
+TEST(pg_missing_t, split_into)
+{
+ uint32_t hash1 = 1;
+ hobject_t oid1(object_t("objname"), "key1", 123, hash1, 0, "");
+ uint32_t hash2 = 2;
+ hobject_t oid2(object_t("objname"), "key2", 123, hash2, 0, "");
+ pg_missing_t missing;
+ missing.add(oid1, eversion_t(), eversion_t(), false);
+ missing.add(oid2, eversion_t(), eversion_t(), false);
+ pg_t child_pgid;
+ child_pgid.m_seed = 1;
+ pg_missing_t child;
+ unsigned split_bits = 1;
+ missing.split_into(child_pgid, split_bits, &child);
+ EXPECT_TRUE(child.is_missing(oid1));
+ EXPECT_FALSE(child.is_missing(oid2));
+ EXPECT_FALSE(missing.is_missing(oid1));
+ EXPECT_TRUE(missing.is_missing(oid2));
+}
+
+TEST(pg_pool_t_test, get_pg_num_divisor) {
+ pg_pool_t p;
+ p.set_pg_num(16);
+ p.set_pgp_num(16);
+
+ for (int i = 0; i < 16; ++i)
+ ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(i, 1)));
+
+ p.set_pg_num(12);
+ p.set_pgp_num(12);
+
+ ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(0, 1)));
+ ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(1, 1)));
+ ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(2, 1)));
+ ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(3, 1)));
+ ASSERT_EQ(8u, p.get_pg_num_divisor(pg_t(4, 1)));
+ ASSERT_EQ(8u, p.get_pg_num_divisor(pg_t(5, 1)));
+ ASSERT_EQ(8u, p.get_pg_num_divisor(pg_t(6, 1)));
+ ASSERT_EQ(8u, p.get_pg_num_divisor(pg_t(7, 1)));
+ ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(8, 1)));
+ ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(9, 1)));
+ ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(10, 1)));
+ ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(11, 1)));
+}
+
+TEST(pg_pool_t_test, get_random_pg_position) {
+ srand(getpid());
+ for (int i = 0; i < 100; ++i) {
+ pg_pool_t p;
+ p.set_pg_num(1 + (rand() % 1000));
+ p.set_pgp_num(p.get_pg_num());
+ pg_t pgid(rand() % p.get_pg_num(), 1);
+ uint32_t h = p.get_random_pg_position(pgid, rand());
+ uint32_t ps = p.raw_hash_to_pg(h);
+ cout << p.get_pg_num() << " " << pgid << ": "
+ << h << " -> " << pg_t(ps, 1) << std::endl;
+ ASSERT_EQ(pgid.ps(), ps);
+ }
+}
+
+TEST(shard_id_t, iostream) {
+ set<shard_id_t> shards;
+ shards.insert(shard_id_t(0));
+ shards.insert(shard_id_t(1));
+ shards.insert(shard_id_t(2));
+ ostringstream out;
+ out << shards;
+ ASSERT_EQ(out.str(), "0,1,2");
+
+ shard_id_t noshard = shard_id_t::NO_SHARD;
+ shard_id_t zero(0);
+ ASSERT_GT(zero, noshard);
+}
+
+TEST(spg_t, parse) {
+ spg_t a(pg_t(1,2), shard_id_t::NO_SHARD);
+ spg_t aa, bb;
+ spg_t b(pg_t(3,2), shard_id_t(2));
+ std::string s = stringify(a);
+ ASSERT_TRUE(aa.parse(s.c_str()));
+ ASSERT_EQ(a, aa);
+
+ s = stringify(b);
+ ASSERT_TRUE(bb.parse(s.c_str()));
+ ASSERT_EQ(b, bb);
+}
+
+TEST(coll_t, parse) {
+ const char *ok[] = {
+ "meta",
+ "1.2_head",
+ "1.2_TEMP",
+ "1.2s3_head",
+ "1.3s2_TEMP",
+ "1.2s0_head",
+ 0
+ };
+ const char *bad[] = {
+ "foo",
+ "1.2_food",
+ "1.2_head ",
+ //" 1.2_head", // hrm, this parses, which is not ideal.. pg_t's fault?
+ "1.2_temp",
+ "1.2_HEAD",
+ "1.xS3_HEAD",
+ "1.2s_HEAD",
+ "1.2sfoo_HEAD",
+ 0
+ };
+ coll_t a;
+ for (int i = 0; ok[i]; ++i) {
+ cout << "check ok " << ok[i] << std::endl;
+ ASSERT_TRUE(a.parse(ok[i]));
+ ASSERT_EQ(string(ok[i]), a.to_str());
+ }
+ for (int i = 0; bad[i]; ++i) {
+ cout << "check bad " << bad[i] << std::endl;
+ ASSERT_FALSE(a.parse(bad[i]));
+ }
+}
+
+TEST(coll_t, temp) {
+ spg_t pgid;
+ coll_t foo(pgid);
+ ASSERT_EQ(foo.to_str(), string("0.0_head"));
+
+ coll_t temp = foo.get_temp();
+ ASSERT_EQ(temp.to_str(), string("0.0_TEMP"));
+
+ spg_t pgid2;
+ ASSERT_TRUE(temp.is_temp());
+ ASSERT_TRUE(temp.is_temp(&pgid2));
+ ASSERT_EQ(pgid, pgid2);
+}
+
+TEST(coll_t, assigment) {
+ spg_t pgid;
+ coll_t right(pgid);
+ ASSERT_EQ(right.to_str(), string("0.0_head"));
+
+ coll_t left, middle;
+
+ ASSERT_EQ(left.to_str(), string("meta"));
+ ASSERT_EQ(middle.to_str(), string("meta"));
+
+ left = middle = right;
+
+ ASSERT_EQ(left.to_str(), string("0.0_head"));
+ ASSERT_EQ(middle.to_str(), string("0.0_head"));
+
+ ASSERT_NE(middle.c_str(), right.c_str());
+ ASSERT_NE(left.c_str(), middle.c_str());
+}
+
+TEST(hobject_t, parse) {
+ const char *v[] = {
+ "MIN",
+ "MAX",
+ "-1:60c2fa6d:::inc_osdmap.1:0",
+ "-1:60c2fa6d:::inc_osdmap.1:333",
+ "0:00000000::::head",
+ "1:00000000:nspace:key:obj:head",
+ "-40:00000000:nspace::obj:head",
+ "20:00000000::key:obj:head",
+ "20:00000000:::o%fdj:head",
+ "20:00000000:::o%02fdj:head",
+ "20:00000000:::_zero_%00_:head",
+ NULL
+ };
+
+ for (unsigned i=0; v[i]; ++i) {
+ hobject_t o;
+ bool b = o.parse(v[i]);
+ if (!b) {
+ cout << "failed to parse " << v[i] << std::endl;
+ ASSERT_TRUE(false);
+ }
+ string s = stringify(o);
+ if (s != v[i]) {
+ cout << v[i] << " -> " << o << " -> " << s << std::endl;
+ ASSERT_EQ(s, string(v[i]));
+ }
+ }
+}
+
+TEST(ghobject_t, cmp) {
+ ghobject_t min;
+ ghobject_t sep;
+ sep.set_shard(shard_id_t(1));
+ sep.hobj.pool = -1;
+ cout << min << " < " << sep << std::endl;
+ ASSERT_TRUE(min < sep);
+
+ sep.set_shard(shard_id_t::NO_SHARD);
+ cout << "sep shard " << sep.shard_id << std::endl;
+ ghobject_t o(hobject_t(object_t(), string(), CEPH_NOSNAP, 0x42,
+ 1, string()));
+ cout << "o " << o << std::endl;
+ ASSERT_TRUE(o > sep);
+}
+
+TEST(ghobject_t, parse) {
+ const char *v[] = {
+ "GHMIN",
+ "GHMAX",
+ "13#0:00000000::::head#",
+ "13#0:00000000::::head#deadbeef",
+ "#-1:60c2fa6d:::inc_osdmap.1:333#deadbeef",
+ "#-1:60c2fa6d:::inc%02osdmap.1:333#deadbeef",
+ "#-1:60c2fa6d:::inc_osdmap.1:333#",
+ "1#MIN#deadbeefff",
+ "1#MAX#",
+ "#MAX#123",
+ "#-40:00000000:nspace::obj:head#",
+ NULL
+ };
+
+ for (unsigned i=0; v[i]; ++i) {
+ ghobject_t o;
+ bool b = o.parse(v[i]);
+ if (!b) {
+ cout << "failed to parse " << v[i] << std::endl;
+ ASSERT_TRUE(false);
+ }
+ string s = stringify(o);
+ if (s != v[i]) {
+ cout << v[i] << " -> " << o << " -> " << s << std::endl;
+ ASSERT_EQ(s, string(v[i]));
+ }
+ }
+}
+
+TEST(pool_opts_t, invalid_opt) {
+ EXPECT_FALSE(pool_opts_t::is_opt_name("INVALID_OPT"));
+ PrCtl unset_dumpable;
+ EXPECT_DEATH(pool_opts_t::get_opt_desc("INVALID_OPT"), "");
+}
+
+TEST(pool_opts_t, scrub_min_interval) {
+ EXPECT_TRUE(pool_opts_t::is_opt_name("scrub_min_interval"));
+ EXPECT_EQ(pool_opts_t::get_opt_desc("scrub_min_interval"),
+ pool_opts_t::opt_desc_t(pool_opts_t::SCRUB_MIN_INTERVAL,
+ pool_opts_t::DOUBLE));
+
+ pool_opts_t opts;
+ EXPECT_FALSE(opts.is_set(pool_opts_t::SCRUB_MIN_INTERVAL));
+ {
+ PrCtl unset_dumpable;
+ EXPECT_DEATH(opts.get(pool_opts_t::SCRUB_MIN_INTERVAL), "");
+ }
+ double val;
+ EXPECT_FALSE(opts.get(pool_opts_t::SCRUB_MIN_INTERVAL, &val));
+ opts.set(pool_opts_t::SCRUB_MIN_INTERVAL, static_cast<double>(2015));
+ EXPECT_TRUE(opts.get(pool_opts_t::SCRUB_MIN_INTERVAL, &val));
+ EXPECT_EQ(val, 2015);
+ opts.unset(pool_opts_t::SCRUB_MIN_INTERVAL);
+ EXPECT_FALSE(opts.is_set(pool_opts_t::SCRUB_MIN_INTERVAL));
+}
+
+TEST(pool_opts_t, scrub_max_interval) {
+ EXPECT_TRUE(pool_opts_t::is_opt_name("scrub_max_interval"));
+ EXPECT_EQ(pool_opts_t::get_opt_desc("scrub_max_interval"),
+ pool_opts_t::opt_desc_t(pool_opts_t::SCRUB_MAX_INTERVAL,
+ pool_opts_t::DOUBLE));
+
+ pool_opts_t opts;
+ EXPECT_FALSE(opts.is_set(pool_opts_t::SCRUB_MAX_INTERVAL));
+ {
+ PrCtl unset_dumpable;
+ EXPECT_DEATH(opts.get(pool_opts_t::SCRUB_MAX_INTERVAL), "");
+ }
+ double val;
+ EXPECT_FALSE(opts.get(pool_opts_t::SCRUB_MAX_INTERVAL, &val));
+ opts.set(pool_opts_t::SCRUB_MAX_INTERVAL, static_cast<double>(2015));
+ EXPECT_TRUE(opts.get(pool_opts_t::SCRUB_MAX_INTERVAL, &val));
+ EXPECT_EQ(val, 2015);
+ opts.unset(pool_opts_t::SCRUB_MAX_INTERVAL);
+ EXPECT_FALSE(opts.is_set(pool_opts_t::SCRUB_MAX_INTERVAL));
+}
+
+TEST(pool_opts_t, deep_scrub_interval) {
+ EXPECT_TRUE(pool_opts_t::is_opt_name("deep_scrub_interval"));
+ EXPECT_EQ(pool_opts_t::get_opt_desc("deep_scrub_interval"),
+ pool_opts_t::opt_desc_t(pool_opts_t::DEEP_SCRUB_INTERVAL,
+ pool_opts_t::DOUBLE));
+
+ pool_opts_t opts;
+ EXPECT_FALSE(opts.is_set(pool_opts_t::DEEP_SCRUB_INTERVAL));
+ {
+ PrCtl unset_dumpable;
+ EXPECT_DEATH(opts.get(pool_opts_t::DEEP_SCRUB_INTERVAL), "");
+ }
+ double val;
+ EXPECT_FALSE(opts.get(pool_opts_t::DEEP_SCRUB_INTERVAL, &val));
+ opts.set(pool_opts_t::DEEP_SCRUB_INTERVAL, static_cast<double>(2015));
+ EXPECT_TRUE(opts.get(pool_opts_t::DEEP_SCRUB_INTERVAL, &val));
+ EXPECT_EQ(val, 2015);
+ opts.unset(pool_opts_t::DEEP_SCRUB_INTERVAL);
+ EXPECT_FALSE(opts.is_set(pool_opts_t::DEEP_SCRUB_INTERVAL));
+}
+
+struct RequiredPredicate : IsPGRecoverablePredicate {
+ unsigned required_size;
+ explicit RequiredPredicate(unsigned required_size) : required_size(required_size) {}
+ bool operator()(const set<pg_shard_t> &have) const override {
+ return have.size() >= required_size;
+ }
+};
+
+using namespace std;
+struct MapPredicate {
+ map<int, pair<PastIntervals::osd_state_t, epoch_t>> states;
+ explicit MapPredicate(
+ const vector<pair<int, pair<PastIntervals::osd_state_t, epoch_t>>> &_states)
+ : states(_states.begin(), _states.end()) {}
+ PastIntervals::osd_state_t operator()(epoch_t start, int osd, epoch_t *lost_at) {
+ auto val = states.at(osd);
+ if (lost_at)
+ *lost_at = val.second;
+ return val.first;
+ }
+};
+
+using sit = shard_id_t;
+using PI = PastIntervals;
+using pst = pg_shard_t;
+using ival = PastIntervals::pg_interval_t;
+using ivallst = std::list<ival>;
+const int N = 0x7fffffff /* CRUSH_ITEM_NONE, can't import crush.h here */;
+
+struct PITest : ::testing::Test {
+ PITest() {}
+ void run(
+ bool ec_pool,
+ ivallst intervals,
+ epoch_t last_epoch_started,
+ unsigned min_to_peer,
+ vector<pair<int, pair<PastIntervals::osd_state_t, epoch_t>>> osd_states,
+ vector<int> up,
+ vector<int> acting,
+ set<pg_shard_t> probe,
+ set<int> down,
+ map<int, epoch_t> blocked_by,
+ bool pg_down) {
+ RequiredPredicate rec_pred(min_to_peer);
+ MapPredicate map_pred(osd_states);
+
+ PI::PriorSet correct(
+ ec_pool,
+ probe,
+ down,
+ blocked_by,
+ pg_down,
+ new RequiredPredicate(rec_pred));
+
+ PastIntervals compact;
+ for (auto &&i: intervals) {
+ compact.add_interval(ec_pool, i);
+ }
+ PI::PriorSet compact_ps = compact.get_prior_set(
+ ec_pool,
+ last_epoch_started,
+ new RequiredPredicate(rec_pred),
+ map_pred,
+ up,
+ acting,
+ nullptr);
+ ASSERT_EQ(correct, compact_ps);
+ }
+};
+
+TEST_F(PITest, past_intervals_rep) {
+ run(
+ /* ec_pool */ false,
+ /* intervals */
+ { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
+ , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1}
+ , ival{{ 2}, { 2}, 31, 35, false, 2, 2}
+ , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
+ },
+ /* les */ 5,
+ /* min_peer */ 1,
+ /* osd states at end */
+ { make_pair(0, make_pair(PI::UP , 0))
+ , make_pair(1, make_pair(PI::UP , 0))
+ , make_pair(2, make_pair(PI::DOWN , 0))
+ },
+ /* acting */ {0, 1 },
+ /* up */ {0, 1 },
+ /* probe */ {pst(0), pst(1)},
+ /* down */ {2},
+ /* blocked_by */ {},
+ /* pg_down */ false);
+}
+
+TEST_F(PITest, past_intervals_ec) {
+ run(
+ /* ec_pool */ true,
+ /* intervals */
+ { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
+ , ival{{N, 1, 2}, {N, 1, 2}, 21, 30, true, 1, 1}
+ },
+ /* les */ 5,
+ /* min_peer */ 2,
+ /* osd states at end */
+ { make_pair(0, make_pair(PI::DOWN , 0))
+ , make_pair(1, make_pair(PI::UP , 0))
+ , make_pair(2, make_pair(PI::UP , 0))
+ },
+ /* acting */ {N, 1, 2},
+ /* up */ {N, 1, 2},
+ /* probe */ {pst(1, sit(1)), pst(2, sit(2))},
+ /* down */ {0},
+ /* blocked_by */ {},
+ /* pg_down */ false);
+}
+
+TEST_F(PITest, past_intervals_rep_down) {
+ run(
+ /* ec_pool */ false,
+ /* intervals */
+ { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
+ , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1}
+ , ival{{ 2}, { 2}, 31, 35, true, 2, 2}
+ , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
+ },
+ /* les */ 5,
+ /* min_peer */ 1,
+ /* osd states at end */
+ { make_pair(0, make_pair(PI::UP , 0))
+ , make_pair(1, make_pair(PI::UP , 0))
+ , make_pair(2, make_pair(PI::DOWN , 0))
+ },
+ /* acting */ {0, 1 },
+ /* up */ {0, 1 },
+ /* probe */ {pst(0), pst(1)},
+ /* down */ {2},
+ /* blocked_by */ {{2, 0}},
+ /* pg_down */ true);
+}
+
+TEST_F(PITest, past_intervals_ec_down) {
+ run(
+ /* ec_pool */ true,
+ /* intervals */
+ { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
+ , ival{{N, 1, 2}, {N, 1, 2}, 21, 30, true, 1, 1}
+ , ival{{N, N, 2}, {N, N, 2}, 31, 35, false, 2, 2}
+ },
+ /* les */ 5,
+ /* min_peer */ 2,
+ /* osd states at end */
+ { make_pair(0, make_pair(PI::UP , 0))
+ , make_pair(1, make_pair(PI::DOWN , 0))
+ , make_pair(2, make_pair(PI::UP , 0))
+ },
+ /* acting */ {0, N, 2},
+ /* up */ {0, N, 2},
+ /* probe */ {pst(0, sit(0)), pst(2, sit(2))},
+ /* down */ {1},
+ /* blocked_by */ {{1, 0}},
+ /* pg_down */ true);
+}
+
+TEST_F(PITest, past_intervals_rep_no_subsets) {
+ run(
+ /* ec_pool */ false,
+ /* intervals */
+ { ival{{0, 2}, {0, 2}, 10, 20, true, 0, 0}
+ , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1}
+ , ival{{0, 1 }, {0, 1 }, 31, 35, true, 0, 0}
+ },
+ /* les */ 5,
+ /* min_peer */ 1,
+ /* osd states at end */
+ { make_pair(0, make_pair(PI::UP , 0))
+ , make_pair(1, make_pair(PI::UP , 0))
+ , make_pair(2, make_pair(PI::DOWN , 0))
+ },
+ /* acting */ {0, 1 },
+ /* up */ {0, 1 },
+ /* probe */ {pst(0), pst(1)},
+ /* down */ {2},
+ /* blocked_by */ {},
+ /* pg_down */ false);
+}
+
+TEST_F(PITest, past_intervals_ec_no_subsets) {
+ run(
+ /* ec_pool */ true,
+ /* intervals */
+ { ival{{0, N, 2}, {0, N, 2}, 10, 20, true, 0, 0}
+ , ival{{N, 1, 2}, {N, 1, 2}, 21, 30, true, 1, 1}
+ , ival{{0, 1, N}, {0, 1, N}, 31, 35, true, 0, 0}
+ },
+ /* les */ 5,
+ /* min_peer */ 2,
+ /* osd states at end */
+ { make_pair(0, make_pair(PI::UP , 0))
+ , make_pair(1, make_pair(PI::DOWN , 0))
+ , make_pair(2, make_pair(PI::UP , 0))
+ },
+ /* acting */ {0, N, 2},
+ /* up */ {0, N, 2},
+ /* probe */ {pst(0, sit(0)), pst(2, sit(2))},
+ /* down */ {1},
+ /* blocked_by */ {{1, 0}},
+ /* pg_down */ true);
+}
+
+TEST_F(PITest, past_intervals_ec_no_subsets2) {
+ run(
+ /* ec_pool */ true,
+ /* intervals */
+ { ival{{N, 1, 2}, {N, 1, 2}, 10, 20, true, 0, 0}
+ , ival{{0, N, 2}, {0, N, 2}, 21, 30, true, 1, 1}
+ , ival{{0, 3, N}, {0, 3, N}, 31, 35, true, 0, 0}
+ },
+ /* les */ 31,
+ /* min_peer */ 2,
+ /* osd states at end */
+ { make_pair(0, make_pair(PI::UP , 0))
+ , make_pair(1, make_pair(PI::DOWN , 0))
+ , make_pair(2, make_pair(PI::UP , 0))
+ , make_pair(3, make_pair(PI::UP , 0))
+ },
+ /* acting */ {0, N, 2},
+ /* up */ {0, N, 2},
+ /* probe */ {pst(0, sit(0)), pst(2, sit(2)), pst(3, sit(1))},
+ /* down */ {1},
+ /* blocked_by */ {},
+ /* pg_down */ false);
+}
+
+TEST_F(PITest, past_intervals_rep_lost) {
+ run(
+ /* ec_pool */ false,
+ /* intervals */
+ { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0}
+ , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1}
+ , ival{{ 2}, { 2}, 31, 35, true, 2, 2}
+ , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0}
+ },
+ /* les */ 5,
+ /* min_peer */ 1,
+ /* osd states at end */
+ { make_pair(0, make_pair(PI::UP , 0))
+ , make_pair(1, make_pair(PI::UP , 0))
+ , make_pair(2, make_pair(PI::LOST , 55))
+ },
+ /* acting */ {0, 1 },
+ /* up */ {0, 1 },
+ /* probe */ {pst(0), pst(1)},
+ /* down */ {2},
+ /* blocked_by */ {},
+ /* pg_down */ false);
+}
+
+TEST_F(PITest, past_intervals_ec_lost) {
+ run(
+ /* ec_pool */ true,
+ /* intervals */
+ { ival{{0, N, 2}, {0, N, 2}, 10, 20, true, 0, 0}
+ , ival{{N, 1, 2}, {N, 1, 2}, 21, 30, true, 1, 1}
+ , ival{{0, 1, N}, {0, 1, N}, 31, 35, true, 0, 0}
+ },
+ /* les */ 5,
+ /* min_peer */ 2,
+ /* osd states at end */
+ { make_pair(0, make_pair(PI::UP , 0))
+ , make_pair(1, make_pair(PI::LOST , 36))
+ , make_pair(2, make_pair(PI::UP , 0))
+ },
+ /* acting */ {0, N, 2},
+ /* up */ {0, N, 2},
+ /* probe */ {pst(0, sit(0)), pst(2, sit(2))},
+ /* down */ {1},
+ /* blocked_by */ {},
+ /* pg_down */ false);
+}
+
+
+/*
+ * Local Variables:
+ * compile-command: "cd ../.. ;
+ * make unittest_osd_types ;
+ * ./unittest_osd_types # --gtest_filter=pg_missing_t.constructor
+ * "
+ * End:
+ */
diff --git a/src/test/osdc/CMakeLists.txt b/src/test/osdc/CMakeLists.txt
new file mode 100644
index 00000000..297c2672
--- /dev/null
+++ b/src/test/osdc/CMakeLists.txt
@@ -0,0 +1,13 @@
+add_executable(ceph_test_objectcacher_stress
+ object_cacher_stress.cc
+ FakeWriteback.cc
+ MemWriteback.cc
+ )
+target_link_libraries(ceph_test_objectcacher_stress
+ osdc
+ global
+ ${EXTRALIBS}
+ ${CMAKE_DL_LIBS}
+ )
+install(TARGETS ceph_test_objectcacher_stress
+ DESTINATION ${CMAKE_INSTALL_BINDIR})
diff --git a/src/test/osdc/FakeWriteback.cc b/src/test/osdc/FakeWriteback.cc
new file mode 100644
index 00000000..56d208b7
--- /dev/null
+++ b/src/test/osdc/FakeWriteback.cc
@@ -0,0 +1,94 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <errno.h>
+#include <time.h>
+
+#include <thread>
+#include "common/debug.h"
+#include "common/Cond.h"
+#include "common/Finisher.h"
+#include "common/Mutex.h"
+#include "include/ceph_assert.h"
+#include "common/ceph_time.h"
+
+#include "FakeWriteback.h"
+
+#define dout_subsys ceph_subsys_objectcacher
+#undef dout_prefix
+#define dout_prefix *_dout << "FakeWriteback(" << this << ") "
+
+class C_Delay : public Context {
+ CephContext *m_cct;
+ Context *m_con;
+ ceph::timespan m_delay;
+ Mutex *m_lock;
+ bufferlist *m_bl;
+ uint64_t m_off;
+
+public:
+ C_Delay(CephContext *cct, Context *c, Mutex *lock, uint64_t off,
+ bufferlist *pbl, uint64_t delay_ns=0)
+ : m_cct(cct), m_con(c), m_delay(delay_ns * std::chrono::nanoseconds(1)),
+ m_lock(lock), m_bl(pbl), m_off(off) {}
+ void finish(int r) override {
+ std::this_thread::sleep_for(m_delay);
+ if (m_bl) {
+ buffer::ptr bp(r);
+ bp.zero();
+ m_bl->append(bp);
+ ldout(m_cct, 20) << "finished read " << m_off << "~" << r << dendl;
+ }
+ m_lock->Lock();
+ m_con->complete(r);
+ m_lock->Unlock();
+ }
+};
+
+FakeWriteback::FakeWriteback(CephContext *cct, Mutex *lock, uint64_t delay_ns)
+ : m_cct(cct), m_lock(lock), m_delay_ns(delay_ns)
+{
+ m_finisher = new Finisher(cct);
+ m_finisher->start();
+}
+
+FakeWriteback::~FakeWriteback()
+{
+ m_finisher->stop();
+ delete m_finisher;
+}
+
+void FakeWriteback::read(const object_t& oid, uint64_t object_no,
+ const object_locator_t& oloc,
+ uint64_t off, uint64_t len, snapid_t snapid,
+ bufferlist *pbl, uint64_t trunc_size,
+ __u32 trunc_seq, int op_flags,
+ const ZTracer::Trace &parent_trace,
+ Context *onfinish)
+{
+ C_Delay *wrapper = new C_Delay(m_cct, onfinish, m_lock, off, pbl,
+ m_delay_ns);
+ m_finisher->queue(wrapper, len);
+}
+
+ceph_tid_t FakeWriteback::write(const object_t& oid,
+ const object_locator_t& oloc,
+ uint64_t off, uint64_t len,
+ const SnapContext& snapc,
+ const bufferlist &bl, ceph::real_time mtime,
+ uint64_t trunc_size, __u32 trunc_seq,
+ ceph_tid_t journal_tid,
+ const ZTracer::Trace &parent_trace,
+ Context *oncommit)
+{
+ C_Delay *wrapper = new C_Delay(m_cct, oncommit, m_lock, off, NULL,
+ m_delay_ns);
+ m_finisher->queue(wrapper, 0);
+ return ++m_tid;
+}
+
+bool FakeWriteback::may_copy_on_write(const object_t&, uint64_t, uint64_t,
+ snapid_t)
+{
+ return false;
+}
diff --git a/src/test/osdc/FakeWriteback.h b/src/test/osdc/FakeWriteback.h
new file mode 100644
index 00000000..0d3705c8
--- /dev/null
+++ b/src/test/osdc/FakeWriteback.h
@@ -0,0 +1,48 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_TEST_OSDC_FAKEWRITEBACK_H
+#define CEPH_TEST_OSDC_FAKEWRITEBACK_H
+
+#include "include/Context.h"
+#include "include/types.h"
+#include "osd/osd_types.h"
+#include "osdc/WritebackHandler.h"
+
+#include <atomic>
+
+class Finisher;
+class Mutex;
+
+class FakeWriteback : public WritebackHandler {
+public:
+ FakeWriteback(CephContext *cct, Mutex *lock, uint64_t delay_ns);
+ ~FakeWriteback() override;
+
+ void read(const object_t& oid, uint64_t object_no,
+ const object_locator_t& oloc, uint64_t off, uint64_t len,
+ snapid_t snapid, bufferlist *pbl, uint64_t trunc_size,
+ __u32 trunc_seq, int op_flags,
+ const ZTracer::Trace &parent_trace,
+ Context *onfinish) override;
+
+ ceph_tid_t write(const object_t& oid, const object_locator_t& oloc,
+ uint64_t off, uint64_t len,
+ const SnapContext& snapc, const bufferlist &bl,
+ ceph::real_time mtime, uint64_t trunc_size,
+ __u32 trunc_seq, ceph_tid_t journal_tid,
+ const ZTracer::Trace &parent_trace,
+ Context *oncommit) override;
+
+ using WritebackHandler::write;
+
+ bool may_copy_on_write(const object_t&, uint64_t, uint64_t,
+ snapid_t) override;
+private:
+ CephContext *m_cct;
+ Mutex *m_lock;
+ uint64_t m_delay_ns;
+ std::atomic<unsigned> m_tid = { 0 };
+ Finisher *m_finisher;
+};
+
+#endif
diff --git a/src/test/osdc/MemWriteback.cc b/src/test/osdc/MemWriteback.cc
new file mode 100644
index 00000000..32d3a38f
--- /dev/null
+++ b/src/test/osdc/MemWriteback.cc
@@ -0,0 +1,168 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <errno.h>
+#include <time.h>
+
+#include <thread>
+#include "common/debug.h"
+#include "common/Cond.h"
+#include "common/Finisher.h"
+#include "common/Mutex.h"
+#include "include/ceph_assert.h"
+#include "common/ceph_time.h"
+
+#include "MemWriteback.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_objectcacher
+#undef dout_prefix
+#define dout_prefix *_dout << "MemWriteback(" << this << ") "
+
+class C_DelayRead : public Context {
+ MemWriteback *wb;
+ CephContext *m_cct;
+ Context *m_con;
+ ceph::timespan m_delay;
+ Mutex *m_lock;
+ object_t m_oid;
+ uint64_t m_off;
+ uint64_t m_len;
+ bufferlist *m_bl;
+
+public:
+ C_DelayRead(MemWriteback *mwb, CephContext *cct, Context *c, Mutex *lock,
+ const object_t& oid, uint64_t off, uint64_t len, bufferlist *pbl,
+ uint64_t delay_ns=0)
+ : wb(mwb), m_cct(cct), m_con(c),
+ m_delay(delay_ns * std::chrono::nanoseconds(1)),
+ m_lock(lock), m_oid(oid), m_off(off), m_len(len), m_bl(pbl) {}
+ void finish(int r) override {
+ std::this_thread::sleep_for(m_delay);
+ m_lock->Lock();
+ r = wb->read_object_data(m_oid, m_off, m_len, m_bl);
+ if (m_con)
+ m_con->complete(r);
+ m_lock->Unlock();
+ }
+};
+
+class C_DelayWrite : public Context {
+ MemWriteback *wb;
+ CephContext *m_cct;
+ Context *m_con;
+ ceph::timespan m_delay;
+ Mutex *m_lock;
+ object_t m_oid;
+ uint64_t m_off;
+ uint64_t m_len;
+ const bufferlist& m_bl;
+
+public:
+ C_DelayWrite(MemWriteback *mwb, CephContext *cct, Context *c, Mutex *lock,
+ const object_t& oid, uint64_t off, uint64_t len,
+ const bufferlist& bl, uint64_t delay_ns=0)
+ : wb(mwb), m_cct(cct), m_con(c),
+ m_delay(delay_ns * std::chrono::nanoseconds(1)),
+ m_lock(lock), m_oid(oid), m_off(off), m_len(len), m_bl(bl) {}
+ void finish(int r) override {
+ std::this_thread::sleep_for(m_delay);
+ m_lock->Lock();
+ wb->write_object_data(m_oid, m_off, m_len, m_bl);
+ if (m_con)
+ m_con->complete(r);
+ m_lock->Unlock();
+ }
+};
+
+MemWriteback::MemWriteback(CephContext *cct, Mutex *lock, uint64_t delay_ns)
+ : m_cct(cct), m_lock(lock), m_delay_ns(delay_ns)
+{
+ m_finisher = new Finisher(cct);
+ m_finisher->start();
+}
+
+MemWriteback::~MemWriteback()
+{
+ m_finisher->stop();
+ delete m_finisher;
+}
+
+void MemWriteback::read(const object_t& oid, uint64_t object_no,
+ const object_locator_t& oloc,
+ uint64_t off, uint64_t len, snapid_t snapid,
+ bufferlist *pbl, uint64_t trunc_size,
+ __u32 trunc_seq, int op_flags,
+ const ZTracer::Trace &parent_trace,
+ Context *onfinish)
+{
+ ceph_assert(snapid == CEPH_NOSNAP);
+ C_DelayRead *wrapper = new C_DelayRead(this, m_cct, onfinish, m_lock, oid,
+ off, len, pbl, m_delay_ns);
+ m_finisher->queue(wrapper, len);
+}
+
+ceph_tid_t MemWriteback::write(const object_t& oid,
+ const object_locator_t& oloc,
+ uint64_t off, uint64_t len,
+ const SnapContext& snapc,
+ const bufferlist &bl, ceph::real_time mtime,
+ uint64_t trunc_size, __u32 trunc_seq,
+ ceph_tid_t journal_tid,
+ const ZTracer::Trace &parent_trace,
+ Context *oncommit)
+{
+ ceph_assert(snapc.seq == 0);
+ C_DelayWrite *wrapper = new C_DelayWrite(this, m_cct, oncommit, m_lock, oid,
+ off, len, bl, m_delay_ns);
+ m_finisher->queue(wrapper, 0);
+ return ++m_tid;
+}
+
+void MemWriteback::write_object_data(const object_t& oid, uint64_t off, uint64_t len,
+ const bufferlist& data_bl)
+{
+ dout(1) << "writing " << oid << " " << off << "~" << len << dendl;
+ ceph_assert(len == data_bl.length());
+ bufferlist& obj_bl = object_data[oid];
+ bufferlist new_obj_bl;
+ // ensure size, or set it if new object
+ if (off + len > obj_bl.length()) {
+ obj_bl.append_zero(off + len - obj_bl.length());
+ }
+
+ // beginning
+ new_obj_bl.substr_of(obj_bl, 0, off);
+ // overwritten bit
+ new_obj_bl.append(data_bl);
+ // tail bit
+ bufferlist tmp;
+ tmp.substr_of(obj_bl, off+len, obj_bl.length()-(off+len));
+ new_obj_bl.append(tmp);
+ obj_bl.swap(new_obj_bl);
+ dout(1) << oid << " final size " << obj_bl.length() << dendl;
+}
+
+int MemWriteback::read_object_data(const object_t& oid, uint64_t off, uint64_t len,
+ bufferlist *data_bl)
+{
+ dout(1) << "reading " << oid << " " << off << "~" << len << dendl;
+ auto obj_i = object_data.find(oid);
+ if (obj_i == object_data.end()) {
+ dout(1) << oid << "DNE!" << dendl;
+ return -ENOENT;
+ }
+
+ const bufferlist& obj_bl = obj_i->second;
+ dout(1) << "reading " << oid << " from total size " << obj_bl.length() << dendl;
+
+ uint64_t read_len = std::min(len, obj_bl.length()-off);
+ data_bl->substr_of(obj_bl, off, read_len);
+ return 0;
+}
+
+bool MemWriteback::may_copy_on_write(const object_t&, uint64_t, uint64_t,
+ snapid_t)
+{
+ return false;
+}
diff --git a/src/test/osdc/MemWriteback.h b/src/test/osdc/MemWriteback.h
new file mode 100644
index 00000000..84ea78f5
--- /dev/null
+++ b/src/test/osdc/MemWriteback.h
@@ -0,0 +1,53 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_TEST_OSDC_MEMWRITEBACK_H
+#define CEPH_TEST_OSDC_MEMWRITEBACK_H
+
+#include "include/Context.h"
+#include "include/types.h"
+#include "osd/osd_types.h"
+#include "osdc/WritebackHandler.h"
+
+#include <atomic>
+
+class Finisher;
+class Mutex;
+
+class MemWriteback : public WritebackHandler {
+public:
+ MemWriteback(CephContext *cct, Mutex *lock, uint64_t delay_ns);
+ ~MemWriteback() override;
+
+ void read(const object_t& oid, uint64_t object_no,
+ const object_locator_t& oloc, uint64_t off, uint64_t len,
+ snapid_t snapid, bufferlist *pbl, uint64_t trunc_size,
+ __u32 trunc_seq, int op_flags,
+ const ZTracer::Trace &parent_trace,
+ Context *onfinish) override;
+
+ ceph_tid_t write(const object_t& oid, const object_locator_t& oloc,
+ uint64_t off, uint64_t len,
+ const SnapContext& snapc, const bufferlist &bl,
+ ceph::real_time mtime, uint64_t trunc_size,
+ __u32 trunc_seq, ceph_tid_t journal_tid,
+ const ZTracer::Trace &parent_trace,
+ Context *oncommit) override;
+
+ using WritebackHandler::write;
+
+ bool may_copy_on_write(const object_t&, uint64_t, uint64_t,
+ snapid_t) override;
+ void write_object_data(const object_t& oid, uint64_t off, uint64_t len,
+ const bufferlist& data_bl);
+ int read_object_data(const object_t& oid, uint64_t off, uint64_t len,
+ bufferlist *data_bl);
+private:
+ std::map<object_t, bufferlist> object_data;
+ CephContext *m_cct;
+ Mutex *m_lock;
+ uint64_t m_delay_ns;
+ std::atomic<unsigned> m_tid = { 0 };
+ Finisher *m_finisher;
+};
+
+#endif
diff --git a/src/test/osdc/object_cacher_stress.cc b/src/test/osdc/object_cacher_stress.cc
new file mode 100644
index 00000000..cd65e10a
--- /dev/null
+++ b/src/test/osdc/object_cacher_stress.cc
@@ -0,0 +1,426 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <cstdlib>
+#include <ctime>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <boost/scoped_ptr.hpp>
+
+#include "common/ceph_argparse.h"
+#include "common/common_init.h"
+#include "common/config.h"
+#include "common/Mutex.h"
+#include "common/snap_types.h"
+#include "global/global_init.h"
+#include "include/buffer.h"
+#include "include/Context.h"
+#include "include/stringify.h"
+#include "osdc/ObjectCacher.h"
+
+#include "FakeWriteback.h"
+#include "MemWriteback.h"
+
+#include <atomic>
+
+// XXX: Only tests default namespace
+struct op_data {
+ op_data(const std::string &oid, uint64_t offset, uint64_t len, bool read)
+ : extent(oid, 0, offset, len, 0), is_read(read)
+ {
+ extent.oloc.pool = 0;
+ extent.buffer_extents.push_back(make_pair(0, len));
+ }
+
+ ObjectExtent extent;
+ bool is_read;
+ ceph::bufferlist result;
+ std::atomic<unsigned> done = { 0 };
+};
+
+class C_Count : public Context {
+ op_data *m_op;
+ std::atomic<unsigned> *m_outstanding = nullptr;
+public:
+ C_Count(op_data *op, std::atomic<unsigned> *outstanding)
+ : m_op(op), m_outstanding(outstanding) {}
+ void finish(int r) override {
+ m_op->done++;
+ ceph_assert(*m_outstanding > 0);
+ (*m_outstanding)--;
+ }
+};
+
+int stress_test(uint64_t num_ops, uint64_t num_objs,
+ uint64_t max_obj_size, uint64_t delay_ns,
+ uint64_t max_op_len, float percent_reads)
+{
+ Mutex lock("object_cacher_stress::object_cacher");
+ FakeWriteback writeback(g_ceph_context, &lock, delay_ns);
+
+ ObjectCacher obc(g_ceph_context, "test", writeback, lock, NULL, NULL,
+ g_conf()->client_oc_size,
+ g_conf()->client_oc_max_objects,
+ g_conf()->client_oc_max_dirty,
+ g_conf()->client_oc_target_dirty,
+ g_conf()->client_oc_max_dirty_age,
+ true);
+ obc.start();
+
+ std::atomic<unsigned> outstanding_reads = { 0 };
+ vector<std::shared_ptr<op_data> > ops;
+ ObjectCacher::ObjectSet object_set(NULL, 0, 0);
+ SnapContext snapc;
+ ceph::buffer::ptr bp(max_op_len);
+ ceph::bufferlist bl;
+ uint64_t journal_tid = 0;
+ bp.zero();
+ bl.append(bp);
+
+ // schedule ops
+ std::cout << "Test configuration:\n\n"
+ << setw(10) << "ops: " << num_ops << "\n"
+ << setw(10) << "objects: " << num_objs << "\n"
+ << setw(10) << "obj size: " << max_obj_size << "\n"
+ << setw(10) << "delay: " << delay_ns << "\n"
+ << setw(10) << "max op len: " << max_op_len << "\n"
+ << setw(10) << "percent reads: " << percent_reads << "\n\n";
+
+ for (uint64_t i = 0; i < num_ops; ++i) {
+ uint64_t offset = random() % max_obj_size;
+ uint64_t max_len = std::min(max_obj_size - offset, max_op_len);
+ // no zero-length operations
+ uint64_t length = random() % (std::max<uint64_t>(max_len - 1, 1)) + 1;
+ std::string oid = "test" + stringify(random() % num_objs);
+ bool is_read = random() < percent_reads * RAND_MAX;
+ std::shared_ptr<op_data> op(new op_data(oid, offset, length, is_read));
+ ops.push_back(op);
+ std::cout << "op " << i << " " << (is_read ? "read" : "write")
+ << " " << op->extent << "\n";
+ if (op->is_read) {
+ ObjectCacher::OSDRead *rd = obc.prepare_read(CEPH_NOSNAP, &op->result, 0);
+ rd->extents.push_back(op->extent);
+ outstanding_reads++;
+ Context *completion = new C_Count(op.get(), &outstanding_reads);
+ lock.Lock();
+ int r = obc.readx(rd, &object_set, completion);
+ lock.Unlock();
+ ceph_assert(r >= 0);
+ if ((uint64_t)r == length)
+ completion->complete(r);
+ else
+ ceph_assert(r == 0);
+ } else {
+ ObjectCacher::OSDWrite *wr = obc.prepare_write(snapc, bl,
+ ceph::real_time::min(), 0,
+ ++journal_tid);
+ wr->extents.push_back(op->extent);
+ lock.Lock();
+ obc.writex(wr, &object_set, NULL);
+ lock.Unlock();
+ }
+ }
+
+ // check that all reads completed
+ for (uint64_t i = 0; i < num_ops; ++i) {
+ if (!ops[i]->is_read)
+ continue;
+ std::cout << "waiting for read " << i << ops[i]->extent << std::endl;
+ uint64_t done = 0;
+ while (done == 0) {
+ done = ops[i]->done;
+ if (!done) {
+ usleep(500);
+ }
+ }
+ if (done > 1) {
+ std::cout << "completion called more than once!\n" << std::endl;
+ return EXIT_FAILURE;
+ }
+ }
+
+ lock.Lock();
+ obc.release_set(&object_set);
+ lock.Unlock();
+
+ int r = 0;
+ Mutex mylock("librbd::ImageCtx::flush_cache");
+ Cond cond;
+ bool done;
+ Context *onfinish = new C_SafeCond(&mylock, &cond, &done, &r);
+ lock.Lock();
+ bool already_flushed = obc.flush_set(&object_set, onfinish);
+ std::cout << "already flushed = " << already_flushed << std::endl;
+ lock.Unlock();
+ mylock.Lock();
+ while (!done) {
+ cond.Wait(mylock);
+ }
+ mylock.Unlock();
+
+ lock.Lock();
+ bool unclean = obc.release_set(&object_set);
+ lock.Unlock();
+
+ if (unclean) {
+ std::cout << "unclean buffers left over!" << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ obc.stop();
+
+ std::cout << "Test completed successfully." << std::endl;
+
+ return EXIT_SUCCESS;
+}
+
+int correctness_test(uint64_t delay_ns)
+{
+ std::cerr << "starting correctness test" << std::endl;
+ Mutex lock("object_cacher_stress::object_cacher");
+ MemWriteback writeback(g_ceph_context, &lock, delay_ns);
+
+ ObjectCacher obc(g_ceph_context, "test", writeback, lock, NULL, NULL,
+ 1<<21, // max cache size, 2MB
+ 1, // max objects, just one
+ 1<<18, // max dirty, 256KB
+ 1<<17, // target dirty, 128KB
+ g_conf()->client_oc_max_dirty_age,
+ true);
+ obc.start();
+ std::cerr << "just start()ed ObjectCacher" << std::endl;
+
+ SnapContext snapc;
+ ceph_tid_t journal_tid = 0;
+ std::string oid("correctness_test_obj");
+ ObjectCacher::ObjectSet object_set(NULL, 0, 0);
+ ceph::bufferlist zeroes_bl;
+ zeroes_bl.append_zero(1<<20);
+
+ // set up a 4MB all-zero object
+ std::cerr << "writing 4x1MB object" << std::endl;
+ std::map<int, C_SaferCond> create_finishers;
+ for (int i = 0; i < 4; ++i) {
+ ObjectCacher::OSDWrite *wr = obc.prepare_write(snapc, zeroes_bl,
+ ceph::real_time::min(), 0,
+ ++journal_tid);
+ ObjectExtent extent(oid, 0, zeroes_bl.length()*i, zeroes_bl.length(), 0);
+ extent.oloc.pool = 0;
+ extent.buffer_extents.push_back(make_pair(0, 1<<20));
+ wr->extents.push_back(extent);
+ lock.Lock();
+ obc.writex(wr, &object_set, &create_finishers[i]);
+ lock.Unlock();
+ }
+
+ // write some 1-valued bits at 256-KB intervals for checking consistency
+ std::cerr << "Writing some 0xff values" << std::endl;
+ ceph::buffer::ptr ones(1<<16);
+ memset(ones.c_str(), 0xff, ones.length());
+ ceph::bufferlist ones_bl;
+ ones_bl.append(ones);
+ for (int i = 1<<18; i < 1<<22; i+=1<<18) {
+ ObjectCacher::OSDWrite *wr = obc.prepare_write(snapc, ones_bl,
+ ceph::real_time::min(), 0,
+ ++journal_tid);
+ ObjectExtent extent(oid, 0, i, ones_bl.length(), 0);
+ extent.oloc.pool = 0;
+ extent.buffer_extents.push_back(make_pair(0, 1<<16));
+ wr->extents.push_back(extent);
+ lock.Lock();
+ obc.writex(wr, &object_set, &create_finishers[i]);
+ lock.Unlock();
+ }
+
+ for (auto i = create_finishers.begin(); i != create_finishers.end(); ++i) {
+ i->second.wait();
+ }
+ std::cout << "Finished setting up object" << std::endl;
+ lock.Lock();
+ C_SaferCond flushcond;
+ bool done = obc.flush_all(&flushcond);
+ if (!done) {
+ std::cout << "Waiting for flush" << std::endl;
+ lock.Unlock();
+ flushcond.wait();
+ lock.Lock();
+ }
+ lock.Unlock();
+
+ /* now read the back half of the object in, check consistency,
+ */
+ std::cout << "Reading back half of object (1<<21~1<<21)" << std::endl;
+ bufferlist readbl;
+ C_SaferCond backreadcond;
+ ObjectCacher::OSDRead *back_half_rd = obc.prepare_read(CEPH_NOSNAP, &readbl, 0);
+ ObjectExtent back_half_extent(oid, 0, 1<<21, 1<<21, 0);
+ back_half_extent.oloc.pool = 0;
+ back_half_extent.buffer_extents.push_back(make_pair(0, 1<<21));
+ back_half_rd->extents.push_back(back_half_extent);
+ lock.Lock();
+ int r = obc.readx(back_half_rd, &object_set, &backreadcond);
+ lock.Unlock();
+ ceph_assert(r >= 0);
+ if (r == 0) {
+ std::cout << "Waiting to read data into cache" << std::endl;
+ r = backreadcond.wait();
+ }
+
+ ceph_assert(r == 1<<21);
+
+ /* Read the whole object in,
+ * verify we have to wait for it to complete,
+ * overwrite a small piece, (http://tracker.ceph.com/issues/16002),
+ * and check consistency */
+
+ readbl.clear();
+ std::cout<< "Reading whole object (0~1<<22)" << std::endl;
+ C_SaferCond frontreadcond;
+ ObjectCacher::OSDRead *whole_rd = obc.prepare_read(CEPH_NOSNAP, &readbl, 0);
+ ObjectExtent whole_extent(oid, 0, 0, 1<<22, 0);
+ whole_extent.oloc.pool = 0;
+ whole_extent.buffer_extents.push_back(make_pair(0, 1<<22));
+ whole_rd->extents.push_back(whole_extent);
+ lock.Lock();
+ r = obc.readx(whole_rd, &object_set, &frontreadcond);
+ // we cleared out the cache by reading back half, it shouldn't pass immediately!
+ ceph_assert(r == 0);
+ std::cout << "Data (correctly) not available without fetching" << std::endl;
+
+ ObjectCacher::OSDWrite *verify_wr = obc.prepare_write(snapc, ones_bl,
+ ceph::real_time::min(), 0,
+ ++journal_tid);
+ ObjectExtent verify_extent(oid, 0, (1<<18)+(1<<16), ones_bl.length(), 0);
+ verify_extent.oloc.pool = 0;
+ verify_extent.buffer_extents.push_back(make_pair(0, 1<<16));
+ verify_wr->extents.push_back(verify_extent);
+ C_SaferCond verify_finisher;
+ obc.writex(verify_wr, &object_set, &verify_finisher);
+ lock.Unlock();
+ std::cout << "wrote dirtying data" << std::endl;
+
+ std::cout << "Waiting to read data into cache" << std::endl;
+ frontreadcond.wait();
+ verify_finisher.wait();
+
+ std::cout << "Validating data" << std::endl;
+
+ for (int i = 1<<18; i < 1<<22; i+=1<<18) {
+ bufferlist ones_maybe;
+ ones_maybe.substr_of(readbl, i, ones_bl.length());
+ ceph_assert(0 == memcmp(ones_maybe.c_str(), ones_bl.c_str(), ones_bl.length()));
+ }
+ bufferlist ones_maybe;
+ ones_maybe.substr_of(readbl, (1<<18)+(1<<16), ones_bl.length());
+ ceph_assert(0 == memcmp(ones_maybe.c_str(), ones_bl.c_str(), ones_bl.length()));
+
+ std::cout << "validated that data is 0xff where it should be" << std::endl;
+
+ lock.Lock();
+ C_SaferCond flushcond2;
+ done = obc.flush_all(&flushcond2);
+ if (!done) {
+ std::cout << "Waiting for final write flush" << std::endl;
+ lock.Unlock();
+ flushcond2.wait();
+ lock.Lock();
+ }
+
+ bool unclean = obc.release_set(&object_set);
+ if (unclean) {
+ std::cout << "unclean buffers left over!" << std::endl;
+ vector<ObjectExtent> discard_extents;
+ int i = 0;
+ for (auto oi = object_set.objects.begin(); !oi.end(); ++oi) {
+ discard_extents.emplace_back(oid, i++, 0, 1<<22, 0);
+ }
+ obc.discard_set(&object_set, discard_extents);
+ lock.Unlock();
+ obc.stop();
+ goto fail;
+ }
+ lock.Unlock();
+
+ obc.stop();
+
+ std::cout << "Testing ObjectCacher correctness complete" << std::endl;
+ return EXIT_SUCCESS;
+
+ fail:
+ return EXIT_FAILURE;
+}
+
+int main(int argc, const char **argv)
+{
+ std::vector<const char*> args;
+ argv_to_vec(argc, argv, args);
+ auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
+ CODE_ENVIRONMENT_UTILITY,
+ CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
+
+ long long delay_ns = 0;
+ long long num_ops = 1000;
+ long long obj_bytes = 4 << 20;
+ long long max_len = 128 << 10;
+ long long num_objs = 10;
+ float percent_reads = 0.90;
+ int seed = time(0) % 100000;
+ bool stress = false;
+ bool correctness = false;
+ std::ostringstream err;
+ std::vector<const char*>::iterator i;
+ for (i = args.begin(); i != args.end();) {
+ if (ceph_argparse_witharg(args, i, &delay_ns, err, "--delay-ns", (char*)NULL)) {
+ if (!err.str().empty()) {
+ cerr << argv[0] << ": " << err.str() << std::endl;
+ return EXIT_FAILURE;
+ }
+ } else if (ceph_argparse_witharg(args, i, &num_ops, err, "--ops", (char*)NULL)) {
+ if (!err.str().empty()) {
+ cerr << argv[0] << ": " << err.str() << std::endl;
+ return EXIT_FAILURE;
+ }
+ } else if (ceph_argparse_witharg(args, i, &num_objs, err, "--objects", (char*)NULL)) {
+ if (!err.str().empty()) {
+ cerr << argv[0] << ": " << err.str() << std::endl;
+ return EXIT_FAILURE;
+ }
+ } else if (ceph_argparse_witharg(args, i, &obj_bytes, err, "--obj-size", (char*)NULL)) {
+ if (!err.str().empty()) {
+ cerr << argv[0] << ": " << err.str() << std::endl;
+ return EXIT_FAILURE;
+ }
+ } else if (ceph_argparse_witharg(args, i, &max_len, err, "--max-op-size", (char*)NULL)) {
+ if (!err.str().empty()) {
+ cerr << argv[0] << ": " << err.str() << std::endl;
+ return EXIT_FAILURE;
+ }
+ } else if (ceph_argparse_witharg(args, i, &percent_reads, err, "--percent-read", (char*)NULL)) {
+ if (!err.str().empty()) {
+ cerr << argv[0] << ": " << err.str() << std::endl;
+ return EXIT_FAILURE;
+ }
+ } else if (ceph_argparse_witharg(args, i, &seed, err, "--seed", (char*)NULL)) {
+ if (!err.str().empty()) {
+ cerr << argv[0] << ": " << err.str() << std::endl;
+ return EXIT_FAILURE;
+ }
+ } else if (ceph_argparse_flag(args, i, "--stress-test", NULL)) {
+ stress = true;
+ } else if (ceph_argparse_flag(args, i, "--correctness-test", NULL)) {
+ correctness = true;
+ } else {
+ cerr << "unknown option " << *i << std::endl;
+ return EXIT_FAILURE;
+ }
+ }
+
+ if (stress) {
+ srandom(seed);
+ return stress_test(num_ops, num_objs, obj_bytes, delay_ns, max_len, percent_reads);
+ }
+ if (correctness) {
+ return correctness_test(delay_ns);
+ }
+}