summaryrefslogtreecommitdiffstats
path: root/src/osd/osd_op_util.cc
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
commit19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/osd/osd_op_util.cc
parentInitial commit. (diff)
downloadceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz
ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/osd/osd_op_util.cc')
-rw-r--r--src/osd/osd_op_util.cc263
1 files changed, 263 insertions, 0 deletions
diff --git a/src/osd/osd_op_util.cc b/src/osd/osd_op_util.cc
new file mode 100644
index 000000000..54c590ee2
--- /dev/null
+++ b/src/osd/osd_op_util.cc
@@ -0,0 +1,263 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "osd/osd_op_util.h"
+
+#include "osd/ClassHandler.h"
+#include "messages/MOSDOp.h"
+
+using std::ostream;
+using std::string;
+using std::vector;
+
+using ceph::bufferlist;
+
+bool OpInfo::check_rmw(int flag) const {
+ ceph_assert(rmw_flags != 0);
+ return rmw_flags & flag;
+}
+bool OpInfo::may_read() const {
+ return need_read_cap() || check_rmw(CEPH_OSD_RMW_FLAG_CLASS_READ);
+}
+bool OpInfo::may_write() const {
+ return need_write_cap() || check_rmw(CEPH_OSD_RMW_FLAG_CLASS_WRITE);
+}
+bool OpInfo::may_cache() const { return check_rmw(CEPH_OSD_RMW_FLAG_CACHE); }
+bool OpInfo::rwordered_forced() const {
+ return check_rmw(CEPH_OSD_RMW_FLAG_RWORDERED);
+}
+bool OpInfo::rwordered() const {
+ return may_write() || may_cache() || rwordered_forced();
+}
+
+bool OpInfo::includes_pg_op() const {
+ return check_rmw(CEPH_OSD_RMW_FLAG_PGOP);
+}
+bool OpInfo::need_read_cap() const {
+ return check_rmw(CEPH_OSD_RMW_FLAG_READ);
+}
+bool OpInfo::need_write_cap() const {
+ return check_rmw(CEPH_OSD_RMW_FLAG_WRITE);
+}
+bool OpInfo::need_promote() const {
+ return check_rmw(CEPH_OSD_RMW_FLAG_FORCE_PROMOTE);
+}
+bool OpInfo::need_skip_handle_cache() const {
+ return check_rmw(CEPH_OSD_RMW_FLAG_SKIP_HANDLE_CACHE);
+}
+bool OpInfo::need_skip_promote() const {
+ return check_rmw(CEPH_OSD_RMW_FLAG_SKIP_PROMOTE);
+}
+bool OpInfo::allows_returnvec() const {
+ return check_rmw(CEPH_OSD_RMW_FLAG_RETURNVEC);
+}
+
+void OpInfo::set_rmw_flags(int flags) {
+ rmw_flags |= flags;
+}
+
+void OpInfo::set_read() { set_rmw_flags(CEPH_OSD_RMW_FLAG_READ); }
+void OpInfo::set_write() { set_rmw_flags(CEPH_OSD_RMW_FLAG_WRITE); }
+void OpInfo::set_class_read() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CLASS_READ); }
+void OpInfo::set_class_write() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CLASS_WRITE); }
+void OpInfo::set_pg_op() { set_rmw_flags(CEPH_OSD_RMW_FLAG_PGOP); }
+void OpInfo::set_cache() { set_rmw_flags(CEPH_OSD_RMW_FLAG_CACHE); }
+void OpInfo::set_promote() { set_rmw_flags(CEPH_OSD_RMW_FLAG_FORCE_PROMOTE); }
+void OpInfo::set_skip_handle_cache() { set_rmw_flags(CEPH_OSD_RMW_FLAG_SKIP_HANDLE_CACHE); }
+void OpInfo::set_skip_promote() { set_rmw_flags(CEPH_OSD_RMW_FLAG_SKIP_PROMOTE); }
+void OpInfo::set_force_rwordered() { set_rmw_flags(CEPH_OSD_RMW_FLAG_RWORDERED); }
+void OpInfo::set_returnvec() { set_rmw_flags(CEPH_OSD_RMW_FLAG_RETURNVEC); }
+
+
+int OpInfo::set_from_op(
+ const MOSDOp *m,
+ const OSDMap &osdmap)
+{
+ vector<OSDOp>::const_iterator iter;
+
+ // client flags have no bearing on whether an op is a read, write, etc.
+ clear();
+
+ if (m->has_flag(CEPH_OSD_FLAG_RWORDERED)) {
+ set_force_rwordered();
+ }
+ if (m->has_flag(CEPH_OSD_FLAG_RETURNVEC)) {
+ set_returnvec();
+ }
+
+ // set bits based on op codes, called methods.
+ for (iter = m->ops.begin(); iter != m->ops.end(); ++iter) {
+ if ((iter->op.op == CEPH_OSD_OP_WATCH &&
+ iter->op.watch.op == CEPH_OSD_WATCH_OP_PING)) {
+ /* This a bit odd. PING isn't actually a write. It can't
+ * result in an update to the object_info. PINGs also aren't
+ * resent, so there's no reason to write out a log entry.
+ *
+ * However, we pipeline them behind writes, so let's force
+ * the write_ordered flag.
+ */
+ set_force_rwordered();
+ } else {
+ if (ceph_osd_op_mode_modify(iter->op.op))
+ set_write();
+ }
+ if (ceph_osd_op_mode_read(iter->op.op))
+ set_read();
+
+ // set READ flag if there are src_oids
+ if (iter->soid.oid.name.length())
+ set_read();
+
+ // set PGOP flag if there are PG ops
+ if (ceph_osd_op_type_pg(iter->op.op))
+ set_pg_op();
+
+ if (ceph_osd_op_mode_cache(iter->op.op))
+ set_cache();
+
+ // check for ec base pool
+ int64_t poolid = m->get_pg().pool();
+ const pg_pool_t *pool = osdmap.get_pg_pool(poolid);
+ if (pool && pool->is_tier()) {
+ const pg_pool_t *base_pool = osdmap.get_pg_pool(pool->tier_of);
+ if (base_pool && base_pool->require_rollback()) {
+ if ((iter->op.op != CEPH_OSD_OP_READ) &&
+ (iter->op.op != CEPH_OSD_OP_CHECKSUM) &&
+ (iter->op.op != CEPH_OSD_OP_CMPEXT) &&
+ (iter->op.op != CEPH_OSD_OP_STAT) &&
+ (iter->op.op != CEPH_OSD_OP_ISDIRTY) &&
+ (iter->op.op != CEPH_OSD_OP_UNDIRTY) &&
+ (iter->op.op != CEPH_OSD_OP_GETXATTR) &&
+ (iter->op.op != CEPH_OSD_OP_GETXATTRS) &&
+ (iter->op.op != CEPH_OSD_OP_CMPXATTR) &&
+ (iter->op.op != CEPH_OSD_OP_ASSERT_VER) &&
+ (iter->op.op != CEPH_OSD_OP_LIST_WATCHERS) &&
+ (iter->op.op != CEPH_OSD_OP_LIST_SNAPS) &&
+ (iter->op.op != CEPH_OSD_OP_SETALLOCHINT) &&
+ (iter->op.op != CEPH_OSD_OP_WRITEFULL) &&
+ (iter->op.op != CEPH_OSD_OP_ROLLBACK) &&
+ (iter->op.op != CEPH_OSD_OP_CREATE) &&
+ (iter->op.op != CEPH_OSD_OP_DELETE) &&
+ (iter->op.op != CEPH_OSD_OP_SETXATTR) &&
+ (iter->op.op != CEPH_OSD_OP_RMXATTR) &&
+ (iter->op.op != CEPH_OSD_OP_STARTSYNC) &&
+ (iter->op.op != CEPH_OSD_OP_COPY_GET) &&
+ (iter->op.op != CEPH_OSD_OP_COPY_FROM) &&
+ (iter->op.op != CEPH_OSD_OP_COPY_FROM2)) {
+ set_promote();
+ }
+ }
+ }
+
+ switch (iter->op.op) {
+ case CEPH_OSD_OP_CALL:
+ {
+ bufferlist::iterator bp = const_cast<bufferlist&>(iter->indata).begin();
+ int is_write, is_read;
+ string cname, mname;
+ bp.copy(iter->op.cls.class_len, cname);
+ bp.copy(iter->op.cls.method_len, mname);
+
+ ClassHandler::ClassData *cls;
+ int r = ClassHandler::get_instance().open_class(cname, &cls);
+ if (r) {
+ if (r == -ENOENT)
+ r = -EOPNOTSUPP;
+ else if (r != -EPERM) // propagate permission errors
+ r = -EIO;
+ return r;
+ }
+ int flags = cls->get_method_flags(mname);
+ if (flags < 0) {
+ if (flags == -ENOENT)
+ r = -EOPNOTSUPP;
+ else
+ r = flags;
+ return r;
+ }
+ is_read = flags & CLS_METHOD_RD;
+ is_write = flags & CLS_METHOD_WR;
+ bool is_promote = flags & CLS_METHOD_PROMOTE;
+
+ if (is_read)
+ set_class_read();
+ if (is_write)
+ set_class_write();
+ if (is_promote)
+ set_promote();
+ add_class(std::move(cname), std::move(mname), is_read, is_write,
+ cls->allowed);
+ break;
+ }
+
+ case CEPH_OSD_OP_WATCH:
+ // force the read bit for watch since it is depends on previous
+ // watch state (and may return early if the watch exists) or, in
+ // the case of ping, is simply a read op.
+ set_read();
+ // fall through
+ case CEPH_OSD_OP_NOTIFY:
+ case CEPH_OSD_OP_NOTIFY_ACK:
+ {
+ set_promote();
+ break;
+ }
+
+ case CEPH_OSD_OP_DELETE:
+ // if we get a delete with FAILOK we can skip handle cache. without
+ // FAILOK we still need to promote (or do something smarter) to
+ // determine whether to return ENOENT or 0.
+ if (iter == m->ops.begin() &&
+ iter->op.flags == CEPH_OSD_OP_FLAG_FAILOK) {
+ set_skip_handle_cache();
+ }
+ // skip promotion when proxying a delete op
+ if (m->ops.size() == 1) {
+ set_skip_promote();
+ }
+ break;
+
+ case CEPH_OSD_OP_CACHE_TRY_FLUSH:
+ case CEPH_OSD_OP_CACHE_FLUSH:
+ case CEPH_OSD_OP_CACHE_EVICT:
+ // If try_flush/flush/evict is the only op, can skip handle cache.
+ if (m->ops.size() == 1) {
+ set_skip_handle_cache();
+ }
+ break;
+
+ case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_SYNC_READ:
+ case CEPH_OSD_OP_SPARSE_READ:
+ case CEPH_OSD_OP_CHECKSUM:
+ case CEPH_OSD_OP_WRITEFULL:
+ if (m->ops.size() == 1 &&
+ (iter->op.flags & CEPH_OSD_OP_FLAG_FADVISE_NOCACHE ||
+ iter->op.flags & CEPH_OSD_OP_FLAG_FADVISE_DONTNEED)) {
+ set_skip_promote();
+ }
+ break;
+
+ // force promotion when pin an object in cache tier
+ case CEPH_OSD_OP_CACHE_PIN:
+ set_promote();
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ if (rmw_flags == 0)
+ return -EINVAL;
+
+ return 0;
+
+}
+
+ostream& operator<<(ostream& out, const OpInfo::ClassInfo& i)
+{
+ out << "class " << i.class_name << " method " << i.method_name
+ << " rd " << i.read << " wr " << i.write << " allowed " << i.allowed;
+ return out;
+}