From 19fcec84d8d7d21e796c7624e521b60d28ee21ed Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 20:45:59 +0200 Subject: Adding upstream version 16.2.11+ds. Signed-off-by: Daniel Baumann --- src/crimson/os/cyanstore/cyan_store.cc | 835 +++++++++++++++++++++++++++++++++ 1 file changed, 835 insertions(+) create mode 100644 src/crimson/os/cyanstore/cyan_store.cc (limited to 'src/crimson/os/cyanstore/cyan_store.cc') diff --git a/src/crimson/os/cyanstore/cyan_store.cc b/src/crimson/os/cyanstore/cyan_store.cc new file mode 100644 index 000000000..eb93d72ec --- /dev/null +++ b/src/crimson/os/cyanstore/cyan_store.cc @@ -0,0 +1,835 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "cyan_store.h" + +#include +#include +#include + +#include "common/safe_io.h" +#include "os/Transaction.h" + +#include "crimson/common/buffer_io.h" +#include "crimson/common/config_proxy.h" +#include "cyan_collection.h" +#include "cyan_object.h" + +namespace { + seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_filestore); + } +} + +using crimson::common::local_conf; + +namespace crimson::os { + +using ObjectRef = boost::intrusive_ptr; + +CyanStore::CyanStore(const std::string& path) + : path{path} +{} + +CyanStore::~CyanStore() = default; + +seastar::future<> CyanStore::mount() +{ + ceph::bufferlist bl; + std::string fn = path + "/collections"; + std::string err; + if (int r = bl.read_file(fn.c_str(), &err); r < 0) { + throw std::runtime_error("read_file"); + } + + std::set collections; + auto p = bl.cbegin(); + ceph::decode(collections, p); + + for (auto& coll : collections) { + std::string fn = fmt::format("{}/{}", path, coll); + ceph::bufferlist cbl; + if (int r = cbl.read_file(fn.c_str(), &err); r < 0) { + throw std::runtime_error("read_file"); + } + boost::intrusive_ptr c{new Collection{coll}}; + auto p = cbl.cbegin(); + c->decode(p); + coll_map[coll] = c; + used_bytes += c->used_bytes(); + } + return seastar::now(); +} + +seastar::future<> CyanStore::umount() +{ + return seastar::do_with(std::set{}, [this](auto& collections) { + return seastar::do_for_each(coll_map, [&collections, this](auto& coll) { + auto& [col, ch] = coll; + collections.insert(col); + ceph::bufferlist bl; + ceph_assert(ch); + ch->encode(bl); + std::string fn = fmt::format("{}/{}", path, col); + return crimson::write_file(std::move(bl), fn); + }).then([&collections, this] { + ceph::bufferlist bl; + ceph::encode(collections, bl); + std::string fn = fmt::format("{}/collections", path); + return crimson::write_file(std::move(bl), fn); + }); + }); +} + +seastar::future<> CyanStore::mkfs(uuid_d new_osd_fsid) +{ + return read_meta("fsid").then([=](auto&& ret) { + auto& [r, fsid_str] = ret; + if (r == -ENOENT) { + if (new_osd_fsid.is_zero()) { + osd_fsid.generate_random(); + } else { + osd_fsid = new_osd_fsid; + } + return write_meta("fsid", fmt::format("{}", osd_fsid)); + } else if (r < 0) { + throw std::runtime_error("read_meta"); + } else { + logger().info("{} already has fsid {}", __func__, fsid_str); + if (!osd_fsid.parse(fsid_str.c_str())) { + throw std::runtime_error("failed to parse fsid"); + } else if (osd_fsid != new_osd_fsid) { + logger().error("on-disk fsid {} != provided {}", osd_fsid, new_osd_fsid); + throw std::runtime_error("unmatched osd_fsid"); + } else { + return seastar::now(); + } + } + }).then([this]{ + std::string fn = path + "/collections"; + ceph::bufferlist bl; + std::set collections; + ceph::encode(collections, bl); + return crimson::write_file(std::move(bl), fn); + }).then([this] { + return write_meta("type", "memstore"); + }); +} + +seastar::future CyanStore::stat() const +{ + logger().debug("{}", __func__); + store_statfs_t st; + st.total = crimson::common::local_conf().get_val("memstore_device_bytes"); + st.available = st.total - used_bytes; + return seastar::make_ready_future(std::move(st)); +} + +seastar::future, ghobject_t>> +CyanStore::list_objects(CollectionRef ch, + const ghobject_t& start, + const ghobject_t& end, + uint64_t limit) const +{ + auto c = static_cast(ch.get()); + logger().debug("{} {} {} {} {}", + __func__, c->get_cid(), start, end, limit); + std::vector objects; + objects.reserve(limit); + ghobject_t next = ghobject_t::get_max(); + for (const auto& [oid, obj] : + boost::make_iterator_range(c->object_map.lower_bound(start), + c->object_map.end())) { + std::ignore = obj; + if (oid >= end || objects.size() >= limit) { + next = oid; + break; + } + objects.push_back(oid); + } + return seastar::make_ready_future, ghobject_t>>( + std::make_tuple(std::move(objects), next)); +} + +seastar::future CyanStore::create_new_collection(const coll_t& cid) +{ + auto c = new Collection{cid}; + new_coll_map[cid] = c; + return seastar::make_ready_future(c); +} + +seastar::future CyanStore::open_collection(const coll_t& cid) +{ + return seastar::make_ready_future(_get_collection(cid)); +} + +seastar::future> CyanStore::list_collections() +{ + std::vector collections; + for (auto& coll : coll_map) { + collections.push_back(coll.first); + } + return seastar::make_ready_future>(std::move(collections)); +} + +CyanStore::read_errorator::future CyanStore::read( + CollectionRef ch, + const ghobject_t& oid, + uint64_t offset, + size_t len, + uint32_t op_flags) +{ + auto c = static_cast(ch.get()); + logger().debug("{} {} {} {}~{}", + __func__, c->get_cid(), oid, offset, len); + if (!c->exists) { + return crimson::ct_error::enoent::make(); + } + ObjectRef o = c->get_object(oid); + if (!o) { + return crimson::ct_error::enoent::make(); + } + if (offset >= o->get_size()) + return read_errorator::make_ready_future(); + size_t l = len; + if (l == 0 && offset == 0) // note: len == 0 means read the entire object + l = o->get_size(); + else if (offset + l > o->get_size()) + l = o->get_size() - offset; + return read_errorator::make_ready_future(o->read(offset, l)); +} + +CyanStore::read_errorator::future CyanStore::readv( + CollectionRef ch, + const ghobject_t& oid, + interval_set& m, + uint32_t op_flags) +{ + return seastar::do_with(ceph::bufferlist{}, + [this, ch, oid, &m, op_flags](auto& bl) { + return crimson::do_for_each(m, + [this, ch, oid, op_flags, &bl](auto& p) { + return read(ch, oid, p.first, p.second, op_flags) + .safe_then([&bl](auto ret) { + bl.claim_append(ret); + }); + }).safe_then([&bl] { + return read_errorator::make_ready_future(std::move(bl)); + }); + }); +} + + +CyanStore::get_attr_errorator::future CyanStore::get_attr( + CollectionRef ch, + const ghobject_t& oid, + std::string_view name) const +{ + auto c = static_cast(ch.get()); + logger().debug("{} {} {}", + __func__, c->get_cid(), oid); + auto o = c->get_object(oid); + if (!o) { + return crimson::ct_error::enoent::make(); + } + if (auto found = o->xattr.find(name); found != o->xattr.end()) { + return get_attr_errorator::make_ready_future(found->second); + } else { + return crimson::ct_error::enodata::make(); + } +} + +CyanStore::get_attrs_ertr::future CyanStore::get_attrs( + CollectionRef ch, + const ghobject_t& oid) +{ + auto c = static_cast(ch.get()); + logger().debug("{} {} {}", + __func__, c->get_cid(), oid); + auto o = c->get_object(oid); + if (!o) { + return crimson::ct_error::enoent::make(); + } + return get_attrs_ertr::make_ready_future(o->xattr); +} + +auto CyanStore::omap_get_values(CollectionRef ch, + const ghobject_t& oid, + const omap_keys_t& keys) + -> read_errorator::future +{ + auto c = static_cast(ch.get()); + logger().debug("{} {} {}", __func__, c->get_cid(), oid); + auto o = c->get_object(oid); + if (!o) { + return crimson::ct_error::enoent::make(); + } + omap_values_t values; + for (auto& key : keys) { + if (auto found = o->omap.find(key); found != o->omap.end()) { + values.insert(*found); + } + } + return seastar::make_ready_future(std::move(values)); +} + +auto +CyanStore::omap_get_values(CollectionRef ch, + const ghobject_t &oid, + const std::optional &start) + -> read_errorator::future> +{ + auto c = static_cast(ch.get()); + logger().debug("{} {} {}", __func__, c->get_cid(), oid); + auto o = c->get_object(oid); + if (!o) { + return crimson::ct_error::enoent::make(); + } + omap_values_t values; + for (auto i = start ? o->omap.upper_bound(*start) : o->omap.begin(); + values.size() < MAX_KEYS_PER_OMAP_GET_CALL && i != o->omap.end(); + ++i) { + values.insert(*i); + } + return seastar::make_ready_future>( + std::make_tuple(true, std::move(values))); +} + +auto +CyanStore::omap_get_header(CollectionRef ch, + const ghobject_t& oid) + -> read_errorator::future +{ + auto c = static_cast(ch.get()); + auto o = c->get_object(oid); + if (!o) { + return crimson::ct_error::enoent::make(); + } + + return read_errorator::make_ready_future( + o->omap_header); +} + +seastar::future<> CyanStore::do_transaction(CollectionRef ch, + ceph::os::Transaction&& t) +{ + using ceph::os::Transaction; + int r = 0; + try { + auto i = t.begin(); + while (i.have_op()) { + r = 0; + switch (auto op = i.decode_op(); op->op) { + case Transaction::OP_NOP: + break; + case Transaction::OP_REMOVE: + { + coll_t cid = i.get_cid(op->cid); + ghobject_t oid = i.get_oid(op->oid); + r = _remove(cid, oid); + if (r == -ENOENT) { + r = 0; + } + } + break; + case Transaction::OP_TOUCH: + { + coll_t cid = i.get_cid(op->cid); + ghobject_t oid = i.get_oid(op->oid); + r = _touch(cid, oid); + } + break; + case Transaction::OP_WRITE: + { + coll_t cid = i.get_cid(op->cid); + ghobject_t oid = i.get_oid(op->oid); + uint64_t off = op->off; + uint64_t len = op->len; + uint32_t fadvise_flags = i.get_fadvise_flags(); + ceph::bufferlist bl; + i.decode_bl(bl); + r = _write(cid, oid, off, len, bl, fadvise_flags); + } + break; + case Transaction::OP_ZERO: + { + coll_t cid = i.get_cid(op->cid); + ghobject_t oid = i.get_oid(op->oid); + uint64_t off = op->off; + uint64_t len = op->len; + r = _zero(cid, oid, off, len); + } + break; + case Transaction::OP_TRUNCATE: + { + coll_t cid = i.get_cid(op->cid); + ghobject_t oid = i.get_oid(op->oid); + uint64_t off = op->off; + r = _truncate(cid, oid, off); + } + break; + case Transaction::OP_SETATTR: + { + coll_t cid = i.get_cid(op->cid); + ghobject_t oid = i.get_oid(op->oid); + std::string name = i.decode_string(); + ceph::bufferlist bl; + i.decode_bl(bl); + std::map to_set; + to_set[name] = bufferptr(bl.c_str(), bl.length()); + r = _setattrs(cid, oid, to_set); + } + break; + case Transaction::OP_RMATTR: + { + coll_t cid = i.get_cid(op->cid); + ghobject_t oid = i.get_oid(op->oid); + std::string name = i.decode_string(); + r = _rm_attr(cid, oid, name); + } + break; + case Transaction::OP_MKCOLL: + { + coll_t cid = i.get_cid(op->cid); + r = _create_collection(cid, op->split_bits); + } + break; + case Transaction::OP_OMAP_CLEAR: + { + coll_t cid = i.get_cid(op->cid); + ghobject_t oid = i.get_oid(op->oid); + r = _omap_clear(cid, oid); + } + break; + case Transaction::OP_OMAP_SETKEYS: + { + coll_t cid = i.get_cid(op->cid); + ghobject_t oid = i.get_oid(op->oid); + std::map aset; + i.decode_attrset(aset); + r = _omap_set_values(cid, oid, std::move(aset)); + } + break; + case Transaction::OP_OMAP_SETHEADER: + { + const coll_t &cid = i.get_cid(op->cid); + const ghobject_t &oid = i.get_oid(op->oid); + ceph::bufferlist bl; + i.decode_bl(bl); + r = _omap_set_header(cid, oid, bl); + } + break; + case Transaction::OP_OMAP_RMKEYS: + { + const coll_t &cid = i.get_cid(op->cid); + const ghobject_t &oid = i.get_oid(op->oid); + omap_keys_t keys; + i.decode_keyset(keys); + r = _omap_rmkeys(cid, oid, keys); + } + break; + case Transaction::OP_OMAP_RMKEYRANGE: + { + const coll_t &cid = i.get_cid(op->cid); + const ghobject_t &oid = i.get_oid(op->oid); + string first, last; + first = i.decode_string(); + last = i.decode_string(); + r = _omap_rmkeyrange(cid, oid, first, last); + } + break; + case Transaction::OP_COLL_HINT: + { + ceph::bufferlist hint; + i.decode_bl(hint); + // ignored + break; + } + default: + logger().error("bad op {}", static_cast(op->op)); + abort(); + } + if (r < 0) { + break; + } + } + } catch (std::exception &e) { + logger().error("{} got exception {}", __func__, e); + r = -EINVAL; + } + if (r < 0) { + logger().error(" transaction dump:\n"); + JSONFormatter f(true); + f.open_object_section("transaction"); + t.dump(&f); + f.close_section(); + std::stringstream str; + f.flush(str); + logger().error("{}", str.str()); + ceph_assert(r == 0); + } + for (auto i : { + t.get_on_applied(), + t.get_on_commit(), + t.get_on_applied_sync()}) { + if (i) { + i->complete(0); + } + } + return seastar::now(); +} + +int CyanStore::_remove(const coll_t& cid, const ghobject_t& oid) +{ + logger().debug("{} cid={} oid={}", + __func__, cid, oid); + auto c = _get_collection(cid); + if (!c) + return -ENOENT; + + auto i = c->object_hash.find(oid); + if (i == c->object_hash.end()) + return -ENOENT; + used_bytes -= i->second->get_size(); + c->object_hash.erase(i); + c->object_map.erase(oid); + return 0; +} + +int CyanStore::_touch(const coll_t& cid, const ghobject_t& oid) +{ + logger().debug("{} cid={} oid={}", + __func__, cid, oid); + auto c = _get_collection(cid); + if (!c) + return -ENOENT; + + c->get_or_create_object(oid); + return 0; +} + +int CyanStore::_write(const coll_t& cid, const ghobject_t& oid, + uint64_t offset, size_t len, const ceph::bufferlist& bl, + uint32_t fadvise_flags) +{ + logger().debug("{} {} {} {} ~ {}", + __func__, cid, oid, offset, len); + assert(len == bl.length()); + + auto c = _get_collection(cid); + if (!c) + return -ENOENT; + + ObjectRef o = c->get_or_create_object(oid); + if (len > 0 && !local_conf()->memstore_debug_omit_block_device_write) { + const ssize_t old_size = o->get_size(); + o->write(offset, bl); + used_bytes += (o->get_size() - old_size); + } + + return 0; +} + +int CyanStore::_zero(const coll_t& cid, const ghobject_t& oid, + uint64_t offset, size_t len) +{ + logger().debug("{} {} {} {} ~ {}", + __func__, cid, oid, offset, len); + + ceph::buffer::list bl; + bl.append_zero(len); + return _write(cid, oid, offset, len, bl, 0); +} + +int CyanStore::_omap_clear( + const coll_t& cid, + const ghobject_t& oid) +{ + logger().debug("{} {} {}", __func__, cid, oid); + + auto c = _get_collection(cid); + if (!c) { + return -ENOENT; + } + ObjectRef o = c->get_object(oid); + if (!o) { + return -ENOENT; + } + o->omap.clear(); + o->omap_header.clear(); + return 0; +} + +int CyanStore::_omap_set_values( + const coll_t& cid, + const ghobject_t& oid, + std::map &&aset) +{ + logger().debug( + "{} {} {} {} keys", + __func__, cid, oid, aset.size()); + + auto c = _get_collection(cid); + if (!c) + return -ENOENT; + + ObjectRef o = c->get_or_create_object(oid); + for (auto &&i: aset) { + o->omap.insert(std::move(i)); + } + return 0; +} + +int CyanStore::_omap_set_header( + const coll_t& cid, + const ghobject_t& oid, + const ceph::bufferlist &header) +{ + logger().debug( + "{} {} {} {} bytes", + __func__, cid, oid, header.length()); + + auto c = _get_collection(cid); + if (!c) + return -ENOENT; + + ObjectRef o = c->get_or_create_object(oid); + o->omap_header = header; + return 0; +} + +int CyanStore::_omap_rmkeys( + const coll_t& cid, + const ghobject_t& oid, + const omap_keys_t& aset) +{ + logger().debug( + "{} {} {} {} keys", + __func__, cid, oid, aset.size()); + + auto c = _get_collection(cid); + if (!c) + return -ENOENT; + + ObjectRef o = c->get_or_create_object(oid); + for (auto &i: aset) { + o->omap.erase(i); + } + return 0; +} + +int CyanStore::_omap_rmkeyrange( + const coll_t& cid, + const ghobject_t& oid, + const std::string &first, + const std::string &last) +{ + logger().debug( + "{} {} {} first={} last={}", + __func__, cid, oid, first, last); + + auto c = _get_collection(cid); + if (!c) + return -ENOENT; + + ObjectRef o = c->get_or_create_object(oid); + for (auto i = o->omap.lower_bound(first); + i != o->omap.end() && i->first <= last; + o->omap.erase(i++)); + return 0; +} + +int CyanStore::_truncate(const coll_t& cid, const ghobject_t& oid, uint64_t size) +{ + logger().debug("{} cid={} oid={} size={}", + __func__, cid, oid, size); + auto c = _get_collection(cid); + if (!c) + return -ENOENT; + + ObjectRef o = c->get_object(oid); + if (!o) + return -ENOENT; + if (local_conf()->memstore_debug_omit_block_device_write) + return 0; + const ssize_t old_size = o->get_size(); + int r = o->truncate(size); + used_bytes += (o->get_size() - old_size); + return r; +} + +int CyanStore::_setattrs(const coll_t& cid, const ghobject_t& oid, + std::map& aset) +{ + logger().debug("{} cid={} oid={}", + __func__, cid, oid); + auto c = _get_collection(cid); + if (!c) + return -ENOENT; + + ObjectRef o = c->get_object(oid); + if (!o) + return -ENOENT; + for (std::map::const_iterator p = aset.begin(); + p != aset.end(); ++p) + o->xattr[p->first] = p->second; + return 0; +} + +int CyanStore::_rm_attr(const coll_t& cid, const ghobject_t& oid, + std::string_view name) +{ + logger().debug("{} cid={} oid={} name={}", __func__, cid, oid, name); + auto c = _get_collection(cid); + if (!c) { + return -ENOENT; + } + ObjectRef o = c->get_object(oid); + if (!o) { + return -ENOENT; + } + auto i = o->xattr.find(name); + if (i == o->xattr.end()) { + return -ENODATA; + } + o->xattr.erase(i); + return 0; +} + +int CyanStore::_create_collection(const coll_t& cid, int bits) +{ + auto result = coll_map.try_emplace(cid); + if (!result.second) + return -EEXIST; + auto p = new_coll_map.find(cid); + assert(p != new_coll_map.end()); + result.first->second = p->second; + result.first->second->bits = bits; + new_coll_map.erase(p); + return 0; +} + +boost::intrusive_ptr CyanStore::_get_collection(const coll_t& cid) +{ + auto cp = coll_map.find(cid); + if (cp == coll_map.end()) + return {}; + return cp->second; +} + +seastar::future<> CyanStore::write_meta(const std::string& key, + const std::string& value) +{ + std::string v = value; + v += "\n"; + if (int r = safe_write_file(path.c_str(), key.c_str(), + v.c_str(), v.length(), 0600); + r < 0) { + throw std::runtime_error{fmt::format("unable to write_meta({})", key)}; + } + return seastar::make_ready_future<>(); +} + +seastar::future> +CyanStore::read_meta(const std::string& key) +{ + std::string fsid(4096, '\0'); + int r = safe_read_file(path.c_str(), key.c_str(), fsid.data(), fsid.size()); + if (r > 0) { + fsid.resize(r); + // drop trailing newlines + boost::algorithm::trim_right_if(fsid, + [](unsigned char c) {return isspace(c);}); + } else { + fsid.clear(); + } + return seastar::make_ready_future>( + std::make_tuple(r, fsid)); +} + +uuid_d CyanStore::get_fsid() const +{ + return osd_fsid; +} + +unsigned CyanStore::get_max_attr_name_length() const +{ + // arbitrary limitation exactly like in the case of MemStore. + return 256; +} + +seastar::future CyanStore::get_omap_iterator( + CollectionRef ch, + const ghobject_t& oid) +{ + auto c = static_cast(ch.get()); + auto o = c->get_object(oid); + if (!o) { + throw std::runtime_error(fmt::format("object does not exist: {}", oid)); + } + return seastar::make_ready_future( + new CyanStore::CyanOmapIterator(o)); +} + +seastar::future> +CyanStore::fiemap( + CollectionRef ch, + const ghobject_t& oid, + uint64_t off, + uint64_t len) +{ + auto c = static_cast(ch.get()); + + ObjectRef o = c->get_object(oid); + if (!o) { + throw std::runtime_error(fmt::format("object does not exist: {}", oid)); + } + std::map m{{0, o->get_size()}}; + return seastar::make_ready_future>(std::move(m)); +} + +seastar::future +CyanStore::stat( + CollectionRef ch, + const ghobject_t& oid) +{ + auto c = static_cast(ch.get()); + auto o = c->get_object(oid); + if (!o) { + throw std::runtime_error(fmt::format("object does not exist: {}", oid)); + } + struct stat st; + st.st_size = o->get_size(); + return seastar::make_ready_future(std::move(st)); +} + +seastar::future<> CyanStore::CyanOmapIterator::seek_to_first() +{ + iter = obj->omap.begin(); + return seastar::make_ready_future<>(); +} + +seastar::future<> CyanStore::CyanOmapIterator::upper_bound(const std::string& after) +{ + iter = obj->omap.upper_bound(after); + return seastar::make_ready_future<>(); +} + +seastar::future<> CyanStore::CyanOmapIterator::lower_bound(const std::string &to) +{ + iter = obj->omap.lower_bound(to); + return seastar::make_ready_future<>(); +} + +bool CyanStore::CyanOmapIterator::valid() const +{ + return iter != obj->omap.end(); +} + +seastar::future<> CyanStore::CyanOmapIterator::next() +{ + ++iter; + return seastar::make_ready_future<>(); +} + +} -- cgit v1.2.3