From 19fcec84d8d7d21e796c7624e521b60d28ee21ed Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 20:45:59 +0200 Subject: Adding upstream version 16.2.11+ds. Signed-off-by: Daniel Baumann --- src/osd/osd_op_util.cc | 263 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 263 insertions(+) create mode 100644 src/osd/osd_op_util.cc (limited to 'src/osd/osd_op_util.cc') diff --git a/src/osd/osd_op_util.cc b/src/osd/osd_op_util.cc new file mode 100644 index 000000000..54c590ee2 --- /dev/null +++ b/src/osd/osd_op_util.cc @@ -0,0 +1,263 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "osd/osd_op_util.h" + +#include "osd/ClassHandler.h" +#include "messages/MOSDOp.h" + +using std::ostream; +using std::string; +using std::vector; + +using ceph::bufferlist; + +bool OpInfo::check_rmw(int flag) const { + ceph_assert(rmw_flags != 0); + return rmw_flags & flag; +} +bool OpInfo::may_read() const { + return need_read_cap() || check_rmw(CEPH_OSD_RMW_FLAG_CLASS_READ); +} +bool OpInfo::may_write() const { + return need_write_cap() || check_rmw(CEPH_OSD_RMW_FLAG_CLASS_WRITE); +} +bool OpInfo::may_cache() const { return check_rmw(CEPH_OSD_RMW_FLAG_CACHE); } +bool OpInfo::rwordered_forced() const { + return check_rmw(CEPH_OSD_RMW_FLAG_RWORDERED); +} +bool OpInfo::rwordered() const { + return may_write() || may_cache() || rwordered_forced(); +} + +bool OpInfo::includes_pg_op() const { + return check_rmw(CEPH_OSD_RMW_FLAG_PGOP); +} +bool OpInfo::need_read_cap() const { + return check_rmw(CEPH_OSD_RMW_FLAG_READ); +} +bool OpInfo::need_write_cap() const { + return check_rmw(CEPH_OSD_RMW_FLAG_WRITE); +} +bool OpInfo::need_promote() const { + return check_rmw(CEPH_OSD_RMW_FLAG_FORCE_PROMOTE); +} +bool OpInfo::need_skip_handle_cache() const { + return check_rmw(CEPH_OSD_RMW_FLAG_SKIP_HANDLE_CACHE); +} +bool OpInfo::need_skip_promote() const { + return check_rmw(CEPH_OSD_RMW_FLAG_SKIP_PROMOTE); +} +bool OpInfo::allows_returnvec() const { + return check_rmw(CEPH_OSD_RMW_FLAG_RETURNVEC); +} + +void OpInfo::set_rmw_flags(int flags) { + rmw_flags |= flags; +} + +void OpInfo::set_read() { set_rmw_flags(CEPH_OSD_RMW_FLAG_READ); } +void OpInfo::set_write() { set_rmw_flags(CEPH_OSD_RMW_FLAG_WRITE); } +void OpInfo::set_class_read() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CLASS_READ); } +void OpInfo::set_class_write() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CLASS_WRITE); } +void OpInfo::set_pg_op() { set_rmw_flags(CEPH_OSD_RMW_FLAG_PGOP); } +void OpInfo::set_cache() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CACHE); } +void OpInfo::set_promote() { set_rmw_flags(CEPH_OSD_RMW_FLAG_FORCE_PROMOTE); } +void OpInfo::set_skip_handle_cache() { set_rmw_flags(CEPH_OSD_RMW_FLAG_SKIP_HANDLE_CACHE); } +void OpInfo::set_skip_promote() { set_rmw_flags(CEPH_OSD_RMW_FLAG_SKIP_PROMOTE); } +void OpInfo::set_force_rwordered() { set_rmw_flags(CEPH_OSD_RMW_FLAG_RWORDERED); } +void OpInfo::set_returnvec() { set_rmw_flags(CEPH_OSD_RMW_FLAG_RETURNVEC); } + + +int OpInfo::set_from_op( + const MOSDOp *m, + const OSDMap &osdmap) +{ + vector::const_iterator iter; + + // client flags have no bearing on whether an op is a read, write, etc. + clear(); + + if (m->has_flag(CEPH_OSD_FLAG_RWORDERED)) { + set_force_rwordered(); + } + if (m->has_flag(CEPH_OSD_FLAG_RETURNVEC)) { + set_returnvec(); + } + + // set bits based on op codes, called methods. + for (iter = m->ops.begin(); iter != m->ops.end(); ++iter) { + if ((iter->op.op == CEPH_OSD_OP_WATCH && + iter->op.watch.op == CEPH_OSD_WATCH_OP_PING)) { + /* This a bit odd. PING isn't actually a write. It can't + * result in an update to the object_info. PINGs also aren't + * resent, so there's no reason to write out a log entry. + * + * However, we pipeline them behind writes, so let's force + * the write_ordered flag. + */ + set_force_rwordered(); + } else { + if (ceph_osd_op_mode_modify(iter->op.op)) + set_write(); + } + if (ceph_osd_op_mode_read(iter->op.op)) + set_read(); + + // set READ flag if there are src_oids + if (iter->soid.oid.name.length()) + set_read(); + + // set PGOP flag if there are PG ops + if (ceph_osd_op_type_pg(iter->op.op)) + set_pg_op(); + + if (ceph_osd_op_mode_cache(iter->op.op)) + set_cache(); + + // check for ec base pool + int64_t poolid = m->get_pg().pool(); + const pg_pool_t *pool = osdmap.get_pg_pool(poolid); + if (pool && pool->is_tier()) { + const pg_pool_t *base_pool = osdmap.get_pg_pool(pool->tier_of); + if (base_pool && base_pool->require_rollback()) { + if ((iter->op.op != CEPH_OSD_OP_READ) && + (iter->op.op != CEPH_OSD_OP_CHECKSUM) && + (iter->op.op != CEPH_OSD_OP_CMPEXT) && + (iter->op.op != CEPH_OSD_OP_STAT) && + (iter->op.op != CEPH_OSD_OP_ISDIRTY) && + (iter->op.op != CEPH_OSD_OP_UNDIRTY) && + (iter->op.op != CEPH_OSD_OP_GETXATTR) && + (iter->op.op != CEPH_OSD_OP_GETXATTRS) && + (iter->op.op != CEPH_OSD_OP_CMPXATTR) && + (iter->op.op != CEPH_OSD_OP_ASSERT_VER) && + (iter->op.op != CEPH_OSD_OP_LIST_WATCHERS) && + (iter->op.op != CEPH_OSD_OP_LIST_SNAPS) && + (iter->op.op != CEPH_OSD_OP_SETALLOCHINT) && + (iter->op.op != CEPH_OSD_OP_WRITEFULL) && + (iter->op.op != CEPH_OSD_OP_ROLLBACK) && + (iter->op.op != CEPH_OSD_OP_CREATE) && + (iter->op.op != CEPH_OSD_OP_DELETE) && + (iter->op.op != CEPH_OSD_OP_SETXATTR) && + (iter->op.op != CEPH_OSD_OP_RMXATTR) && + (iter->op.op != CEPH_OSD_OP_STARTSYNC) && + (iter->op.op != CEPH_OSD_OP_COPY_GET) && + (iter->op.op != CEPH_OSD_OP_COPY_FROM) && + (iter->op.op != CEPH_OSD_OP_COPY_FROM2)) { + set_promote(); + } + } + } + + switch (iter->op.op) { + case CEPH_OSD_OP_CALL: + { + bufferlist::iterator bp = const_cast(iter->indata).begin(); + int is_write, is_read; + string cname, mname; + bp.copy(iter->op.cls.class_len, cname); + bp.copy(iter->op.cls.method_len, mname); + + ClassHandler::ClassData *cls; + int r = ClassHandler::get_instance().open_class(cname, &cls); + if (r) { + if (r == -ENOENT) + r = -EOPNOTSUPP; + else if (r != -EPERM) // propagate permission errors + r = -EIO; + return r; + } + int flags = cls->get_method_flags(mname); + if (flags < 0) { + if (flags == -ENOENT) + r = -EOPNOTSUPP; + else + r = flags; + return r; + } + is_read = flags & CLS_METHOD_RD; + is_write = flags & CLS_METHOD_WR; + bool is_promote = flags & CLS_METHOD_PROMOTE; + + if (is_read) + set_class_read(); + if (is_write) + set_class_write(); + if (is_promote) + set_promote(); + add_class(std::move(cname), std::move(mname), is_read, is_write, + cls->allowed); + break; + } + + case CEPH_OSD_OP_WATCH: + // force the read bit for watch since it is depends on previous + // watch state (and may return early if the watch exists) or, in + // the case of ping, is simply a read op. + set_read(); + // fall through + case CEPH_OSD_OP_NOTIFY: + case CEPH_OSD_OP_NOTIFY_ACK: + { + set_promote(); + break; + } + + case CEPH_OSD_OP_DELETE: + // if we get a delete with FAILOK we can skip handle cache. without + // FAILOK we still need to promote (or do something smarter) to + // determine whether to return ENOENT or 0. + if (iter == m->ops.begin() && + iter->op.flags == CEPH_OSD_OP_FLAG_FAILOK) { + set_skip_handle_cache(); + } + // skip promotion when proxying a delete op + if (m->ops.size() == 1) { + set_skip_promote(); + } + break; + + case CEPH_OSD_OP_CACHE_TRY_FLUSH: + case CEPH_OSD_OP_CACHE_FLUSH: + case CEPH_OSD_OP_CACHE_EVICT: + // If try_flush/flush/evict is the only op, can skip handle cache. + if (m->ops.size() == 1) { + set_skip_handle_cache(); + } + break; + + case CEPH_OSD_OP_READ: + case CEPH_OSD_OP_SYNC_READ: + case CEPH_OSD_OP_SPARSE_READ: + case CEPH_OSD_OP_CHECKSUM: + case CEPH_OSD_OP_WRITEFULL: + if (m->ops.size() == 1 && + (iter->op.flags & CEPH_OSD_OP_FLAG_FADVISE_NOCACHE || + iter->op.flags & CEPH_OSD_OP_FLAG_FADVISE_DONTNEED)) { + set_skip_promote(); + } + break; + + // force promotion when pin an object in cache tier + case CEPH_OSD_OP_CACHE_PIN: + set_promote(); + break; + + default: + break; + } + } + + if (rmw_flags == 0) + return -EINVAL; + + return 0; + +} + +ostream& operator<<(ostream& out, const OpInfo::ClassInfo& i) +{ + out << "class " << i.class_name << " method " << i.method_name + << " rd " << i.read << " wr " << i.write << " allowed " << i.allowed; + return out; +} -- cgit v1.2.3