diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/osd/SnapMapper.cc | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/osd/SnapMapper.cc')
-rw-r--r-- | src/osd/SnapMapper.cc | 975 |
1 files changed, 975 insertions, 0 deletions
diff --git a/src/osd/SnapMapper.cc b/src/osd/SnapMapper.cc new file mode 100644 index 000000000..7893bc08f --- /dev/null +++ b/src/osd/SnapMapper.cc @@ -0,0 +1,975 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "SnapMapper.h" + +#include <fmt/printf.h> +#include <fmt/ranges.h> + +#include "global/global_context.h" +#include "osd/osd_types_fmt.h" +#include "SnapMapReaderI.h" + +#define dout_context cct +#define dout_subsys ceph_subsys_osd +#undef dout_prefix +#define dout_prefix *_dout << "snap_mapper." + +using std::make_pair; +using std::map; +using std::pair; +using std::set; +using std::string; +using std::vector; + +using ceph::decode; +using ceph::encode; +using ceph::timespan_str; +using result_t = Scrub::SnapMapReaderI::result_t; +using code_t = Scrub::SnapMapReaderI::result_t::code_t; + + +const string SnapMapper::LEGACY_MAPPING_PREFIX = "MAP_"; +const string SnapMapper::MAPPING_PREFIX = "SNA_"; +const string SnapMapper::OBJECT_PREFIX = "OBJ_"; + +const char *SnapMapper::PURGED_SNAP_PREFIX = "PSN_"; + +/* + + We have a bidirectional mapping, (1) from each snap+obj to object, + sorted by snapshot, such that we can enumerate to identify all clones + mapped to a particular snapshot, and (2) from object to snaps, so we + can identify which reverse mappings exist for any given object (and, + e.g., clean up on deletion). + + "MAP_" + + ("%016x" % snapid) + + "_" + + (".%x" % shard_id) + + "_" + + hobject_t::to_str() ("%llx.%8x.%lx.name...." % pool, hash, snap) + -> SnapMapping::Mapping { snap, hoid } + + "SNA_" + + ("%lld" % poolid) + + "_" + + ("%016x" % snapid) + + "_" + + (".%x" % shard_id) + + "_" + + hobject_t::to_str() ("%llx.%8x.%lx.name...." % pool, hash, snap) + -> SnapMapping::Mapping { snap, hoid } + + "OBJ_" + + + (".%x" % shard_id) + + hobject_t::to_str() + -> SnapMapper::object_snaps { oid, set<snapid_t> } + + */ + +#ifdef WITH_SEASTAR +#include "crimson/common/log.h" +#include "crimson/osd/pg_interval_interrupt_condition.h" + template <typename ValuesT = void> + using interruptible_future = + ::crimson::interruptible::interruptible_future< + ::crimson::osd::IOInterruptCondition, ValuesT>; + using interruptor = + ::crimson::interruptible::interruptor< + ::crimson::osd::IOInterruptCondition>; + +#define CRIMSON_DEBUG(FMT_MSG, ...) crimson::get_logger(ceph_subsys_).debug(FMT_MSG, ##__VA_ARGS__) +int OSDriver::get_keys( + const std::set<std::string> &keys, + std::map<std::string, ceph::buffer::list> *out) +{ + CRIMSON_DEBUG("OSDriver::{}:{}", __func__, __LINE__); + using crimson::os::FuturizedStore; + return interruptor::green_get(os->omap_get_values( + ch, hoid, keys + ).safe_then([out] (FuturizedStore::Shard::omap_values_t&& vals) { + // just the difference in comparator (`std::less<>` in omap_values_t`) + reinterpret_cast<FuturizedStore::Shard::omap_values_t&>(*out) = std::move(vals); + return 0; + }, FuturizedStore::Shard::read_errorator::all_same_way([] (auto& e) { + assert(e.value() > 0); + return -e.value(); + }))); // this requires seastar::thread + CRIMSON_DEBUG("OSDriver::{}:{}", __func__, __LINE__); +} + +int OSDriver::get_next( + const std::string &key, + std::pair<std::string, ceph::buffer::list> *next) +{ + CRIMSON_DEBUG("OSDriver::{}:{}", __func__, __LINE__); + using crimson::os::FuturizedStore; + return interruptor::green_get(os->omap_get_values( + ch, hoid, key + ).safe_then_unpack([&key, next] (bool, FuturizedStore::Shard::omap_values_t&& vals) { + CRIMSON_DEBUG("OSDriver::{}:{}", "get_next", __LINE__); + if (auto nit = std::begin(vals); nit == std::end(vals)) { + CRIMSON_DEBUG("OSDriver::{}:{}", "get_next", __LINE__); + return -ENOENT; + } else { + CRIMSON_DEBUG("OSDriver::{}:{}", "get_next", __LINE__); + assert(nit->first > key); + *next = *nit; + return 0; + } + }, FuturizedStore::Shard::read_errorator::all_same_way([] { + CRIMSON_DEBUG("OSDriver::{}:{}", "get_next", __LINE__); + return -EINVAL; + }))); // this requires seastar::thread + CRIMSON_DEBUG("OSDriver::{}:{}", __func__, __LINE__); +} + +int OSDriver::get_next_or_current( + const std::string &key, + std::pair<std::string, ceph::buffer::list> *next_or_current) +{ + CRIMSON_DEBUG("OSDriver::{}:{}", __func__, __LINE__); + using crimson::os::FuturizedStore; + // let's try to get current first + return interruptor::green_get(os->omap_get_values( + ch, hoid, FuturizedStore::Shard::omap_keys_t{key} + ).safe_then([&key, next_or_current] (FuturizedStore::Shard::omap_values_t&& vals) { + assert(vals.size() == 1); + *next_or_current = std::make_pair(key, std::move(vals[0])); + return 0; + }, FuturizedStore::Shard::read_errorator::all_same_way( + [next_or_current, &key, this] { + // no current, try next + return get_next(key, next_or_current); + }))); // this requires seastar::thread + CRIMSON_DEBUG("OSDriver::{}:{}", __func__, __LINE__); +} +#else +int OSDriver::get_keys( + const std::set<std::string> &keys, + std::map<std::string, ceph::buffer::list> *out) +{ + return os->omap_get_values(ch, hoid, keys, out); +} + +int OSDriver::get_next( + const std::string &key, + std::pair<std::string, ceph::buffer::list> *next) +{ + ObjectMap::ObjectMapIterator iter = + os->get_omap_iterator(ch, hoid); + if (!iter) { + ceph_abort(); + return -EINVAL; + } + iter->upper_bound(key); + if (iter->valid()) { + if (next) + *next = make_pair(iter->key(), iter->value()); + return 0; + } else { + return -ENOENT; + } +} + +int OSDriver::get_next_or_current( + const std::string &key, + std::pair<std::string, ceph::buffer::list> *next_or_current) +{ + ObjectMap::ObjectMapIterator iter = + os->get_omap_iterator(ch, hoid); + if (!iter) { + ceph_abort(); + return -EINVAL; + } + iter->lower_bound(key); + if (iter->valid()) { + if (next_or_current) + *next_or_current = make_pair(iter->key(), iter->value()); + return 0; + } else { + return -ENOENT; + } +} +#endif // WITH_SEASTAR + +string SnapMapper::get_prefix(int64_t pool, snapid_t snap) +{ + static_assert(sizeof(pool) == 8, "assumed by the formatting code"); + return fmt::sprintf("%s%lld_%.16X_", + MAPPING_PREFIX, + pool, + snap); +} + +string SnapMapper::to_raw_key( + const pair<snapid_t, hobject_t> &in) const +{ + return get_prefix(in.second.pool, in.first) + shard_prefix + in.second.to_str(); +} + +std::string SnapMapper::to_raw_key(snapid_t snap, const hobject_t &clone) const +{ + return get_prefix(clone.pool, snap) + shard_prefix + clone.to_str(); +} + +pair<string, ceph::buffer::list> SnapMapper::to_raw( + const pair<snapid_t, hobject_t> &in) const +{ + ceph::buffer::list bl; + encode(Mapping(in), bl); + return make_pair(to_raw_key(in), bl); +} + +pair<snapid_t, hobject_t> SnapMapper::from_raw( + const pair<std::string, ceph::buffer::list> &image) +{ + using ceph::decode; + Mapping map; + ceph::buffer::list bl(image.second); + auto bp = bl.cbegin(); + decode(map, bp); + return make_pair(map.snap, map.hoid); +} + +std::pair<snapid_t, hobject_t> SnapMapper::from_raw( + const ceph::buffer::list &image) +{ + using ceph::decode; + Mapping map; + auto bp = image.cbegin(); + decode(map, bp); + return make_pair(map.snap, map.hoid); +} + +bool SnapMapper::is_mapping(const string &to_test) +{ + return to_test.substr(0, MAPPING_PREFIX.size()) == MAPPING_PREFIX; +} + +string SnapMapper::to_object_key(const hobject_t &hoid) const +{ + return OBJECT_PREFIX + shard_prefix + hoid.to_str(); +} + +void SnapMapper::object_snaps::encode(ceph::buffer::list &bl) const +{ + ENCODE_START(1, 1, bl); + encode(oid, bl); + encode(snaps, bl); + ENCODE_FINISH(bl); +} + +void SnapMapper::object_snaps::decode(ceph::buffer::list::const_iterator &bl) +{ + DECODE_START(1, bl); + decode(oid, bl); + decode(snaps, bl); + DECODE_FINISH(bl); +} + +bool SnapMapper::check(const hobject_t &hoid) const +{ + if (hoid.match(mask_bits, match)) { + return true; + } + derr << __func__ << " " << hoid << " mask_bits " << mask_bits + << " match 0x" << std::hex << match << std::dec << " is false" + << dendl; + return false; +} + +int SnapMapper::get_snaps(const hobject_t &oid, object_snaps *out) const +{ + auto snaps = get_snaps_common(oid); + if (snaps) { + *out = *snaps; + return 0; + } + switch (auto e = snaps.error(); e.code) { + case code_t::backend_error: + return e.backend_error; + case code_t::not_found: + return -ENOENT; + case code_t::inconsistent: + // As this is a legacy interface, we cannot surprise the user with + // a new error code here. + return -ENOENT; + default: + // Can't happen. Just to keep the compiler happy. + ceph_abort("get_snaps_common() returned invalid error code"); + } +} + +tl::expected<std::set<snapid_t>, Scrub::SnapMapReaderI::result_t> +SnapMapper::get_snaps(const hobject_t &oid) const +{ + auto snaps = get_snaps_common(oid); + if (snaps) { + return snaps->snaps; + } + return tl::unexpected(snaps.error()); +} + +tl::expected<SnapMapper::object_snaps, Scrub::SnapMapReaderI::result_t> +SnapMapper::get_snaps_common(const hobject_t &oid) const +{ + ceph_assert(check(oid)); + set<string> keys{to_object_key(oid)}; + dout(20) << fmt::format("{}: key string: {} oid:{}", __func__, keys, oid) + << dendl; + + map<string, ceph::buffer::list> got; + int r = backend.get_keys(keys, &got); + if (r < 0) { + dout(10) << __func__ << " " << oid << " got err " << r << dendl; + return tl::unexpected(result_t{code_t::backend_error, r}); + } + if (got.empty()) { + dout(10) << __func__ << " " << oid << " got.empty()" << dendl; + return tl::unexpected(result_t{code_t::not_found, -ENOENT}); + } + + object_snaps out; + auto bp = got.begin()->second.cbegin(); + try { + decode(out, bp); + } catch (...) { + dout(1) << __func__ << ": " << oid << " decode failed" << dendl; + return tl::unexpected(result_t{code_t::backend_error, -EIO}); + } + + dout(20) << __func__ << " " << oid << " " << out.snaps << dendl; + if (out.snaps.empty()) { + dout(1) << __func__ << " " << oid << " empty snapset" << dendl; + ceph_assert(!cct->_conf->osd_debug_verify_snaps); + } + return out; +} + +std::set<std::string> SnapMapper::to_raw_keys( + const hobject_t &clone, + const std::set<snapid_t> &snaps) const +{ + std::set<std::string> keys; + for (auto snap : snaps) { + keys.insert(to_raw_key(snap, clone)); + } + dout(20) << fmt::format( + "{}: clone:{} snaps:{} -> keys: {}", __func__, clone, snaps, + keys) + << dendl; + return keys; +} + +tl::expected<std::set<snapid_t>, result_t> +SnapMapper::get_snaps_check_consistency(const hobject_t &hoid) const +{ + // derive the set of snaps from the 'OBJ_' entry + auto obj_snaps = get_snaps(hoid); + if (!obj_snaps) { + return obj_snaps; + } + + // make sure we have the expected set of SNA_ entries: + // we have the clone oid and the set of snaps relevant to this clone. + // Let's construct all expected SNA_ key, then fetch them. + + auto mapping_keys = to_raw_keys(hoid, *obj_snaps); + map<string, ceph::buffer::list> kvmap; + auto r = backend.get_keys(mapping_keys, &kvmap); + if (r < 0) { + dout(10) << fmt::format( + "{}: backend error ({}) for cobject {}", __func__, r, hoid) + << dendl; + // that's a backend error, but for the SNA_ entries. Let's treat it as an + // internal consistency error (although a backend error would have made + // sense too). + return tl::unexpected(result_t{code_t::inconsistent, r}); + } + + std::set<snapid_t> snaps_from_mapping; + for (auto &[k, v] : kvmap) { + dout(20) << __func__ << " " << hoid << " " << k << dendl; + // extract the object ID from the value fetched for an SNA mapping key + auto [sn, obj] = SnapMapper::from_raw(v); + if (obj != hoid) { + dout(1) << fmt::format( + "{}: unexpected object ID {} for key{} (expected {})", + __func__, obj, k, hoid) + << dendl; + return tl::unexpected(result_t{code_t::inconsistent}); + } + snaps_from_mapping.insert(sn); + } + + if (snaps_from_mapping != *obj_snaps) { + dout(10) << fmt::format( + "{}: hoid:{} -> mapper internal inconsistency ({} vs {})", + __func__, hoid, *obj_snaps, snaps_from_mapping) + << dendl; + return tl::unexpected(result_t{code_t::inconsistent}); + } + dout(10) << fmt::format( + "{}: snaps for {}: {}", __func__, hoid, snaps_from_mapping) + << dendl; + return obj_snaps; +} + +void SnapMapper::clear_snaps( + const hobject_t &oid, + MapCacher::Transaction<std::string, ceph::buffer::list> *t) +{ + dout(20) << __func__ << " " << oid << dendl; + ceph_assert(check(oid)); + set<string> to_remove; + to_remove.insert(to_object_key(oid)); + if (g_conf()->subsys.should_gather<ceph_subsys_osd, 20>()) { + for (auto& i : to_remove) { + dout(20) << __func__ << " rm " << i << dendl; + } + } + backend.remove_keys(to_remove, t); +} + +void SnapMapper::set_snaps( + const hobject_t &oid, + const object_snaps &in, + MapCacher::Transaction<std::string, ceph::buffer::list> *t) +{ + ceph_assert(check(oid)); + map<string, ceph::buffer::list> to_set; + ceph::buffer::list bl; + encode(in, bl); + to_set[to_object_key(oid)] = bl; + dout(20) << __func__ << " " << oid << " " << in.snaps << dendl; + if (g_conf()->subsys.should_gather<ceph_subsys_osd, 20>()) { + for (auto& i : to_set) { + dout(20) << __func__ << " set " << i.first << dendl; + } + } + backend.set_keys(to_set, t); +} + +int SnapMapper::update_snaps( + const hobject_t &oid, + const set<snapid_t> &new_snaps, + const set<snapid_t> *old_snaps_check, + MapCacher::Transaction<std::string, ceph::buffer::list> *t) +{ + dout(20) << __func__ << " " << oid << " " << new_snaps + << " was " << (old_snaps_check ? *old_snaps_check : set<snapid_t>()) + << dendl; + ceph_assert(check(oid)); + if (new_snaps.empty()) + return remove_oid(oid, t); + + object_snaps out; + int r = get_snaps(oid, &out); + // Tolerate missing keys but not disk errors + if (r < 0 && r != -ENOENT) + return r; + if (old_snaps_check) + ceph_assert(out.snaps == *old_snaps_check); + + object_snaps in(oid, new_snaps); + set_snaps(oid, in, t); + + set<string> to_remove; + for (set<snapid_t>::iterator i = out.snaps.begin(); + i != out.snaps.end(); + ++i) { + if (!new_snaps.count(*i)) { + to_remove.insert(to_raw_key(make_pair(*i, oid))); + } + } + if (g_conf()->subsys.should_gather<ceph_subsys_osd, 20>()) { + for (auto& i : to_remove) { + dout(20) << __func__ << " rm " << i << dendl; + } + } + backend.remove_keys(to_remove, t); + return 0; +} + +void SnapMapper::add_oid( + const hobject_t &oid, + const set<snapid_t>& snaps, + MapCacher::Transaction<std::string, ceph::buffer::list> *t) +{ + dout(20) << __func__ << " " << oid << " " << snaps << dendl; + ceph_assert(!snaps.empty()); + ceph_assert(check(oid)); + { + object_snaps out; + int r = get_snaps(oid, &out); + if (r != -ENOENT) { + derr << __func__ << " found existing snaps mapped on " << oid + << ", removing" << dendl; + ceph_assert(!cct->_conf->osd_debug_verify_snaps); + remove_oid(oid, t); + } + } + + object_snaps _snaps(oid, snaps); + set_snaps(oid, _snaps, t); + + map<string, ceph::buffer::list> to_add; + for (set<snapid_t>::iterator i = snaps.begin(); + i != snaps.end(); + ++i) { + to_add.insert(to_raw(make_pair(*i, oid))); + } + if (g_conf()->subsys.should_gather<ceph_subsys_osd, 20>()) { + for (auto& i : to_add) { + dout(20) << __func__ << " set " << i.first << dendl; + } + } + backend.set_keys(to_add, t); +} + +int SnapMapper::get_next_objects_to_trim( + snapid_t snap, + unsigned max, + vector<hobject_t> *out) +{ + ceph_assert(out); + ceph_assert(out->empty()); + + // if max would be 0, we return ENOENT and the caller would mistakenly + // trim the snaptrim queue + ceph_assert(max > 0); + int r = 0; + + /// \todo cache the prefixes-set in update_bits() + for (set<string>::iterator i = prefixes.begin(); + i != prefixes.end() && out->size() < max && r == 0; + ++i) { + string prefix(get_prefix(pool, snap) + *i); + string pos = prefix; + while (out->size() < max) { + pair<string, ceph::buffer::list> next; + r = backend.get_next(pos, &next); + dout(20) << __func__ << " get_next(" << pos << ") returns " << r + << " " << next << dendl; + if (r != 0) { + break; // Done + } + + if (next.first.substr(0, prefix.size()) != + prefix) { + break; // Done with this prefix + } + + ceph_assert(is_mapping(next.first)); + + dout(20) << __func__ << " " << next.first << dendl; + pair<snapid_t, hobject_t> next_decoded(from_raw(next)); + ceph_assert(next_decoded.first == snap); + ceph_assert(check(next_decoded.second)); + + out->push_back(next_decoded.second); + pos = next.first; + } + } + if (out->size() == 0) { + return -ENOENT; + } else { + return 0; + } +} + + +int SnapMapper::remove_oid( + const hobject_t &oid, + MapCacher::Transaction<std::string, ceph::buffer::list> *t) +{ + dout(20) << __func__ << " " << oid << dendl; + ceph_assert(check(oid)); + return _remove_oid(oid, t); +} + +int SnapMapper::_remove_oid( + const hobject_t &oid, + MapCacher::Transaction<std::string, ceph::buffer::list> *t) +{ + dout(20) << __func__ << " " << oid << dendl; + object_snaps out; + int r = get_snaps(oid, &out); + if (r < 0) + return r; + + clear_snaps(oid, t); + + set<string> to_remove; + for (set<snapid_t>::iterator i = out.snaps.begin(); + i != out.snaps.end(); + ++i) { + to_remove.insert(to_raw_key(make_pair(*i, oid))); + } + if (g_conf()->subsys.should_gather<ceph_subsys_osd, 20>()) { + for (auto& i : to_remove) { + dout(20) << __func__ << " rm " << i << dendl; + } + } + backend.remove_keys(to_remove, t); + return 0; +} + +int SnapMapper::get_snaps( + const hobject_t &oid, + std::set<snapid_t> *snaps) const +{ + ceph_assert(check(oid)); + object_snaps out; + int r = get_snaps(oid, &out); + if (r < 0) + return r; + if (snaps) + snaps->swap(out.snaps); + return 0; +} + + +// -- purged snaps -- + +string SnapMapper::make_purged_snap_key(int64_t pool, snapid_t last) +{ + return fmt::sprintf("%s_%lld_%016llx", + PURGED_SNAP_PREFIX, + pool, + last); +} + +void SnapMapper::make_purged_snap_key_value( + int64_t pool, snapid_t begin, snapid_t end, map<string,ceph::buffer::list> *m) +{ + string k = make_purged_snap_key(pool, end - 1); + auto& v = (*m)[k]; + ceph::encode(pool, v); + ceph::encode(begin, v); + ceph::encode(end, v); +} + +int SnapMapper::_lookup_purged_snap( + CephContext *cct, + OSDriver& backend, + int64_t pool, snapid_t snap, + snapid_t *begin, snapid_t *end) +{ + string k = make_purged_snap_key(pool, snap); + std::pair<std::string, ceph::buffer::list> kv; + if (auto ret = backend.get_next_or_current(k, &kv); ret == -ENOENT) { + dout(20) << __func__ << " pool " << pool << " snap " << snap + << " key '" << k << "' lower_bound not found" << dendl; + return -ENOENT; + } + if (kv.first.find(PURGED_SNAP_PREFIX) != 0) { + dout(20) << __func__ << " pool " << pool << " snap " << snap + << " key '" << k << "' lower_bound got mismatched prefix '" + << kv.first << "'" << dendl; + return -ENOENT; + } + ceph::buffer::list v = kv.second; + auto p = v.cbegin(); + int64_t gotpool; + decode(gotpool, p); + decode(*begin, p); + decode(*end, p); + if (snap < *begin || snap >= *end) { + dout(20) << __func__ << " pool " << pool << " snap " << snap + << " found [" << *begin << "," << *end << "), no overlap" << dendl; + return -ENOENT; + } + return 0; +} + +void SnapMapper::record_purged_snaps( + CephContext *cct, + OSDriver& backend, + OSDriver::OSTransaction&& txn, + map<epoch_t,mempool::osdmap::map<int64_t,snap_interval_set_t>> purged_snaps) +{ + dout(10) << __func__ << " purged_snaps " << purged_snaps << dendl; + map<string,ceph::buffer::list> m; + set<string> rm; + for (auto& [epoch, bypool] : purged_snaps) { + // index by (pool, snap) + for (auto& [pool, snaps] : bypool) { + for (auto i = snaps.begin(); + i != snaps.end(); + ++i) { + snapid_t begin = i.get_start(); + snapid_t end = i.get_end(); + snapid_t before_begin, before_end; + snapid_t after_begin, after_end; + int b = _lookup_purged_snap(cct, backend, + pool, begin - 1, &before_begin, &before_end); + int a = _lookup_purged_snap(cct, backend, + pool, end, &after_begin, &after_end); + if (!b && !a) { + dout(10) << __func__ + << " [" << begin << "," << end << ") - joins [" + << before_begin << "," << before_end << ") and [" + << after_begin << "," << after_end << ")" << dendl; + // erase only the begin record; we'll overwrite the end one + rm.insert(make_purged_snap_key(pool, before_end - 1)); + make_purged_snap_key_value(pool, before_begin, after_end, &m); + } else if (!b) { + dout(10) << __func__ + << " [" << begin << "," << end << ") - join with earlier [" + << before_begin << "," << before_end << ")" << dendl; + rm.insert(make_purged_snap_key(pool, before_end - 1)); + make_purged_snap_key_value(pool, before_begin, end, &m); + } else if (!a) { + dout(10) << __func__ + << " [" << begin << "," << end << ") - join with later [" + << after_begin << "," << after_end << ")" << dendl; + // overwrite after record + make_purged_snap_key_value(pool, begin, after_end, &m); + } else { + make_purged_snap_key_value(pool, begin, end, &m); + } + } + } + } + txn.remove_keys(rm); + txn.set_keys(m); + dout(10) << __func__ << " rm " << rm.size() << " keys, set " << m.size() + << " keys" << dendl; +} + + +#ifndef WITH_SEASTAR +bool SnapMapper::Scrubber::_parse_p() +{ + if (!psit->valid()) { + pool = -1; + return false; + } + if (psit->key().find(PURGED_SNAP_PREFIX) != 0) { + pool = -1; + return false; + } + ceph::buffer::list v = psit->value(); + auto p = v.cbegin(); + ceph::decode(pool, p); + ceph::decode(begin, p); + ceph::decode(end, p); + dout(20) << __func__ << " purged_snaps pool " << pool + << " [" << begin << "," << end << ")" << dendl; + psit->next(); + return true; +} + +bool SnapMapper::Scrubber::_parse_m() +{ + if (!mapit->valid()) { + return false; + } + if (mapit->key().find(MAPPING_PREFIX) != 0) { + return false; + } + auto v = mapit->value(); + auto p = v.cbegin(); + mapping.decode(p); + + { + unsigned long long p, s; + long sh; + string k = mapit->key(); + int r = sscanf(k.c_str(), "SNA_%lld_%llx.%lx", &p, &s, &sh); + if (r != 1) { + shard = shard_id_t::NO_SHARD; + } else { + shard = shard_id_t(sh); + } + } + dout(20) << __func__ << " mapping pool " << mapping.hoid.pool + << " snap " << mapping.snap + << " shard " << shard + << " " << mapping.hoid << dendl; + mapit->next(); + return true; +} + +void SnapMapper::Scrubber::run() +{ + dout(10) << __func__ << dendl; + + psit = store->get_omap_iterator(ch, purged_snaps_hoid); + psit->upper_bound(PURGED_SNAP_PREFIX); + _parse_p(); + + mapit = store->get_omap_iterator(ch, mapping_hoid); + mapit->upper_bound(MAPPING_PREFIX); + + while (_parse_m()) { + // advance to next purged_snaps range? + while (pool >= 0 && + (mapping.hoid.pool > pool || + (mapping.hoid.pool == pool && mapping.snap >= end))) { + _parse_p(); + } + if (pool < 0) { + dout(10) << __func__ << " passed final purged_snaps interval, rest ok" + << dendl; + break; + } + if (mapping.hoid.pool < pool || + mapping.snap < begin) { + // ok + dout(20) << __func__ << " ok " << mapping.hoid + << " snap " << mapping.snap + << " precedes pool " << pool + << " purged_snaps [" << begin << "," << end << ")" << dendl; + } else { + assert(mapping.snap >= begin && + mapping.snap < end && + mapping.hoid.pool == pool); + // invalid + dout(10) << __func__ << " stray " << mapping.hoid + << " snap " << mapping.snap + << " in pool " << pool + << " shard " << shard + << " purged_snaps [" << begin << "," << end << ")" << dendl; + stray.emplace_back(std::tuple<int64_t,snapid_t,uint32_t,shard_id_t>( + pool, mapping.snap, mapping.hoid.get_hash(), + shard + )); + } + } + + dout(10) << __func__ << " end, found " << stray.size() << " stray" << dendl; + psit = ObjectMap::ObjectMapIterator(); + mapit = ObjectMap::ObjectMapIterator(); +} +#endif // !WITH_SEASTAR + + +// ------------------------------------- +// legacy conversion/support + +string SnapMapper::get_legacy_prefix(snapid_t snap) +{ + return fmt::sprintf("%s%.16X_", + LEGACY_MAPPING_PREFIX, + snap); +} + +string SnapMapper::to_legacy_raw_key( + const pair<snapid_t, hobject_t> &in) +{ + return get_legacy_prefix(in.first) + shard_prefix + in.second.to_str(); +} + +bool SnapMapper::is_legacy_mapping(const string &to_test) +{ + return to_test.substr(0, LEGACY_MAPPING_PREFIX.size()) == + LEGACY_MAPPING_PREFIX; +} + +#ifndef WITH_SEASTAR +/* Octopus modified the SnapMapper key format from + * + * <LEGACY_MAPPING_PREFIX><snapid>_<shardid>_<hobject_t::to_str()> + * + * to + * + * <MAPPING_PREFIX><pool>_<snapid>_<shardid>_<hobject_t::to_str()> + * + * We can't reconstruct the new key format just from the value since the + * Mapping object contains an hobject rather than a ghobject. Instead, + * we exploit the fact that the new format is identical starting at <snapid>. + * + * Note that the original version of this conversion introduced in 94ebe0ea + * had a crucial bug which essentially destroyed legacy keys by mapping + * them to + * + * <MAPPING_PREFIX><poolid>_<snapid>_ + * + * without the object-unique suffix. + * See https://tracker.ceph.com/issues/56147 + */ +std::string SnapMapper::convert_legacy_key( + const std::string& old_key, + const ceph::buffer::list& value) +{ + auto old = from_raw(make_pair(old_key, value)); + std::string object_suffix = old_key.substr( + SnapMapper::LEGACY_MAPPING_PREFIX.length()); + return SnapMapper::MAPPING_PREFIX + std::to_string(old.second.pool) + + "_" + object_suffix; +} + +int SnapMapper::convert_legacy( + CephContext *cct, + ObjectStore *store, + ObjectStore::CollectionHandle& ch, + ghobject_t hoid, + unsigned max) +{ + uint64_t n = 0; + + ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(ch, hoid); + if (!iter) { + return -EIO; + } + + auto start = ceph::mono_clock::now(); + + iter->upper_bound(SnapMapper::LEGACY_MAPPING_PREFIX); + map<string,ceph::buffer::list> to_set; + while (iter->valid()) { + bool valid = SnapMapper::is_legacy_mapping(iter->key()); + if (valid) { + to_set.emplace( + convert_legacy_key(iter->key(), iter->value()), + iter->value()); + ++n; + iter->next(); + } + if (!valid || !iter->valid() || to_set.size() >= max) { + ObjectStore::Transaction t; + t.omap_setkeys(ch->cid, hoid, to_set); + int r = store->queue_transaction(ch, std::move(t)); + ceph_assert(r == 0); + to_set.clear(); + if (!valid) { + break; + } + dout(10) << __func__ << " converted " << n << " keys" << dendl; + } + } + + auto end = ceph::mono_clock::now(); + + dout(1) << __func__ << " converted " << n << " keys in " + << timespan_str(end - start) << dendl; + + // remove the old keys + { + ObjectStore::Transaction t; + string end = SnapMapper::LEGACY_MAPPING_PREFIX; + ++end[end.size()-1]; // turn _ to whatever comes after _ + t.omap_rmkeyrange(ch->cid, hoid, + SnapMapper::LEGACY_MAPPING_PREFIX, + end); + int r = store->queue_transaction(ch, std::move(t)); + ceph_assert(r == 0); + } + return 0; +} +#endif // !WITH_SEASTAR |