summaryrefslogtreecommitdiffstats
path: root/src/rgw/services
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/rgw/services/svc_bi.h44
-rw-r--r--src/rgw/services/svc_bi_rados.cc509
-rw-r--r--src/rgw/services/svc_bi_rados.h166
-rw-r--r--src/rgw/services/svc_bilog_rados.cc220
-rw-r--r--src/rgw/services/svc_bilog_rados.h60
-rw-r--r--src/rgw/services/svc_bucket.cc25
-rw-r--r--src/rgw/services/svc_bucket.h111
-rw-r--r--src/rgw/services/svc_bucket_sobj.cc644
-rw-r--r--src/rgw/services/svc_bucket_sobj.h180
-rw-r--r--src/rgw/services/svc_bucket_sync.h55
-rw-r--r--src/rgw/services/svc_bucket_sync_sobj.cc903
-rw-r--r--src/rgw/services/svc_bucket_sync_sobj.h123
-rw-r--r--src/rgw/services/svc_bucket_types.h38
-rw-r--r--src/rgw/services/svc_cls.cc478
-rw-r--r--src/rgw/services/svc_cls.h166
-rw-r--r--src/rgw/services/svc_config_key.h31
-rw-r--r--src/rgw/services/svc_config_key_rados.cc50
-rw-r--r--src/rgw/services/svc_config_key_rados.h54
-rw-r--r--src/rgw/services/svc_finisher.cc58
-rw-r--r--src/rgw/services/svc_finisher.h44
-rw-r--r--src/rgw/services/svc_mdlog.cc549
-rw-r--r--src/rgw/services/svc_mdlog.h118
-rw-r--r--src/rgw/services/svc_meta.cc46
-rw-r--r--src/rgw/services/svc_meta.h48
-rw-r--r--src/rgw/services/svc_meta_be.cc193
-rw-r--r--src/rgw/services/svc_meta_be.h294
-rw-r--r--src/rgw/services/svc_meta_be_otp.cc73
-rw-r--r--src/rgw/services/svc_meta_be_otp.h89
-rw-r--r--src/rgw/services/svc_meta_be_params.h25
-rw-r--r--src/rgw/services/svc_meta_be_sobj.cc246
-rw-r--r--src/rgw/services/svc_meta_be_sobj.h194
-rw-r--r--src/rgw/services/svc_meta_be_types.h26
-rw-r--r--src/rgw/services/svc_notify.cc515
-rw-r--r--src/rgw/services/svc_notify.h106
-rw-r--r--src/rgw/services/svc_otp.cc186
-rw-r--r--src/rgw/services/svc_otp.h95
-rw-r--r--src/rgw/services/svc_otp_types.h29
-rw-r--r--src/rgw/services/svc_quota.cc18
-rw-r--r--src/rgw/services/svc_quota.h22
-rw-r--r--src/rgw/services/svc_rados.cc445
-rw-r--r--src/rgw/services/svc_rados.h252
-rw-r--r--src/rgw/services/svc_role_rados.cc82
-rw-r--r--src/rgw/services/svc_role_rados.h50
-rw-r--r--src/rgw/services/svc_sync_modules.cc44
-rw-r--r--src/rgw/services/svc_sync_modules.h34
-rw-r--r--src/rgw/services/svc_sys_obj.cc183
-rw-r--r--src/rgw/services/svc_sys_obj.h270
-rw-r--r--src/rgw/services/svc_sys_obj_cache.cc670
-rw-r--r--src/rgw/services/svc_sys_obj_cache.h222
-rw-r--r--src/rgw/services/svc_sys_obj_core.cc666
-rw-r--r--src/rgw/services/svc_sys_obj_core.h145
-rw-r--r--src/rgw/services/svc_sys_obj_core_types.h34
-rw-r--r--src/rgw/services/svc_sys_obj_types.h15
-rw-r--r--src/rgw/services/svc_tier_rados.cc36
-rw-r--r--src/rgw/services/svc_tier_rados.h154
-rw-r--r--src/rgw/services/svc_user.cc11
-rw-r--r--src/rgw/services/svc_user.h127
-rw-r--r--src/rgw/services/svc_user_rados.cc968
-rw-r--r--src/rgw/services/svc_user_rados.h211
-rw-r--r--src/rgw/services/svc_zone.cc1100
-rw-r--r--src/rgw/services/svc_zone.h165
-rw-r--r--src/rgw/services/svc_zone_utils.cc64
-rw-r--r--src/rgw/services/svc_zone_utils.h38
63 files changed, 12817 insertions, 0 deletions
diff --git a/src/rgw/services/svc_bi.h b/src/rgw/services/svc_bi.h
new file mode 100644
index 000000000..bd811e162
--- /dev/null
+++ b/src/rgw/services/svc_bi.h
@@ -0,0 +1,44 @@
+
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#pragma once
+
+#include "rgw_service.h"
+
+class RGWBucketInfo;
+struct RGWBucketEnt;
+
+
+class RGWSI_BucketIndex : public RGWServiceInstance
+{
+public:
+ RGWSI_BucketIndex(CephContext *cct) : RGWServiceInstance(cct) {}
+ virtual ~RGWSI_BucketIndex() {}
+
+ virtual int init_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout) = 0;
+ virtual int clean_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout) = 0;
+
+ virtual int read_stats(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ RGWBucketEnt *stats,
+ optional_yield y) = 0;
+
+ virtual int handle_overwrite(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& info,
+ const RGWBucketInfo& orig_info,
+ optional_yield y) = 0;
+};
diff --git a/src/rgw/services/svc_bi_rados.cc b/src/rgw/services/svc_bi_rados.cc
new file mode 100644
index 000000000..6002b986f
--- /dev/null
+++ b/src/rgw/services/svc_bi_rados.cc
@@ -0,0 +1,509 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include "svc_bi_rados.h"
+#include "svc_bilog_rados.h"
+#include "svc_zone.h"
+
+#include "rgw_bucket.h"
+#include "rgw_zone.h"
+#include "rgw_datalog.h"
+
+#include "cls/rgw/cls_rgw_client.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+using namespace std;
+
+static string dir_oid_prefix = ".dir.";
+
+RGWSI_BucketIndex_RADOS::RGWSI_BucketIndex_RADOS(CephContext *cct) : RGWSI_BucketIndex(cct)
+{
+}
+
+void RGWSI_BucketIndex_RADOS::init(RGWSI_Zone *zone_svc,
+ RGWSI_RADOS *rados_svc,
+ RGWSI_BILog_RADOS *bilog_svc,
+ RGWDataChangesLog *datalog_rados_svc)
+{
+ svc.zone = zone_svc;
+ svc.rados = rados_svc;
+ svc.bilog = bilog_svc;
+ svc.datalog_rados = datalog_rados_svc;
+}
+
+int RGWSI_BucketIndex_RADOS::open_pool(const DoutPrefixProvider *dpp,
+ const rgw_pool& pool,
+ RGWSI_RADOS::Pool *index_pool,
+ bool mostly_omap)
+{
+ *index_pool = svc.rados->pool(pool);
+ return index_pool->open(dpp, RGWSI_RADOS::OpenParams()
+ .set_mostly_omap(mostly_omap));
+}
+
+int RGWSI_BucketIndex_RADOS::open_bucket_index_pool(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ RGWSI_RADOS::Pool *index_pool)
+{
+ const rgw_pool& explicit_pool = bucket_info.bucket.explicit_placement.index_pool;
+
+ if (!explicit_pool.empty()) {
+ return open_pool(dpp, explicit_pool, index_pool, false);
+ }
+
+ auto& zonegroup = svc.zone->get_zonegroup();
+ auto& zone_params = svc.zone->get_zone_params();
+
+ const rgw_placement_rule *rule = &bucket_info.placement_rule;
+ if (rule->empty()) {
+ rule = &zonegroup.default_placement;
+ }
+ auto iter = zone_params.placement_pools.find(rule->name);
+ if (iter == zone_params.placement_pools.end()) {
+ ldpp_dout(dpp, 0) << "could not find placement rule " << *rule << " within zonegroup " << dendl;
+ return -EINVAL;
+ }
+
+ int r = open_pool(dpp, iter->second.index_pool, index_pool, true);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int RGWSI_BucketIndex_RADOS::open_bucket_index_base(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ RGWSI_RADOS::Pool *index_pool,
+ string *bucket_oid_base)
+{
+ const rgw_bucket& bucket = bucket_info.bucket;
+ int r = open_bucket_index_pool(dpp, bucket_info, index_pool);
+ if (r < 0)
+ return r;
+
+ if (bucket.bucket_id.empty()) {
+ ldpp_dout(dpp, 0) << "ERROR: empty bucket_id for bucket operation" << dendl;
+ return -EIO;
+ }
+
+ *bucket_oid_base = dir_oid_prefix;
+ bucket_oid_base->append(bucket.bucket_id);
+
+ return 0;
+
+}
+
+int RGWSI_BucketIndex_RADOS::open_bucket_index(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ RGWSI_RADOS::Pool *index_pool,
+ string *bucket_oid)
+{
+ const rgw_bucket& bucket = bucket_info.bucket;
+ int r = open_bucket_index_pool(dpp, bucket_info, index_pool);
+ if (r < 0) {
+ ldpp_dout(dpp, 20) << __func__ << ": open_bucket_index_pool() returned "
+ << r << dendl;
+ return r;
+ }
+
+ if (bucket.bucket_id.empty()) {
+ ldpp_dout(dpp, 0) << "ERROR: empty bucket id for bucket operation" << dendl;
+ return -EIO;
+ }
+
+ *bucket_oid = dir_oid_prefix;
+ bucket_oid->append(bucket.bucket_id);
+
+ return 0;
+}
+
+static char bucket_obj_with_generation(char *buf, size_t len, const string& bucket_oid_base, uint64_t gen_id,
+ uint32_t shard_id)
+{
+ return snprintf(buf, len, "%s.%" PRIu64 ".%d", bucket_oid_base.c_str(), gen_id, shard_id);
+}
+
+static char bucket_obj_without_generation(char *buf, size_t len, const string& bucket_oid_base, uint32_t shard_id)
+{
+ return snprintf(buf, len, "%s.%d", bucket_oid_base.c_str(), shard_id);
+}
+
+static void get_bucket_index_objects(const string& bucket_oid_base,
+ uint32_t num_shards, uint64_t gen_id,
+ map<int, string> *_bucket_objects,
+ int shard_id = -1)
+{
+ auto& bucket_objects = *_bucket_objects;
+ if (!num_shards) {
+ bucket_objects[0] = bucket_oid_base;
+ } else {
+ char buf[bucket_oid_base.size() + 64];
+ if (shard_id < 0) {
+ for (uint32_t i = 0; i < num_shards; ++i) {
+ if (gen_id) {
+ bucket_obj_with_generation(buf, sizeof(buf), bucket_oid_base, gen_id, i);
+ } else {
+ bucket_obj_without_generation(buf, sizeof(buf), bucket_oid_base, i);
+ }
+ bucket_objects[i] = buf;
+ }
+ } else {
+ if (std::cmp_greater(shard_id, num_shards)) {
+ return;
+ } else {
+ if (gen_id) {
+ bucket_obj_with_generation(buf, sizeof(buf), bucket_oid_base, gen_id, shard_id);
+ } else {
+ // for backward compatibility, gen_id(0) will not be added in the object name
+ bucket_obj_without_generation(buf, sizeof(buf), bucket_oid_base, shard_id);
+ }
+ bucket_objects[shard_id] = buf;
+ }
+ }
+ }
+}
+
+static void get_bucket_instance_ids(const RGWBucketInfo& bucket_info,
+ int num_shards, int shard_id,
+ map<int, string> *result)
+{
+ const rgw_bucket& bucket = bucket_info.bucket;
+ string plain_id = bucket.name + ":" + bucket.bucket_id;
+
+ if (!num_shards) {
+ (*result)[0] = plain_id;
+ } else {
+ char buf[16];
+ if (shard_id < 0) {
+ for (int i = 0; i < num_shards; ++i) {
+ snprintf(buf, sizeof(buf), ":%d", i);
+ (*result)[i] = plain_id + buf;
+ }
+ } else {
+ if (shard_id > num_shards) {
+ return;
+ }
+ snprintf(buf, sizeof(buf), ":%d", shard_id);
+ (*result)[shard_id] = plain_id + buf;
+ }
+ }
+}
+
+int RGWSI_BucketIndex_RADOS::open_bucket_index(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ std::optional<int> _shard_id,
+ const rgw::bucket_index_layout_generation& idx_layout,
+ RGWSI_RADOS::Pool *index_pool,
+ map<int, string> *bucket_objs,
+ map<int, string> *bucket_instance_ids)
+{
+ int shard_id = _shard_id.value_or(-1);
+ string bucket_oid_base;
+ int ret = open_bucket_index_base(dpp, bucket_info, index_pool, &bucket_oid_base);
+ if (ret < 0) {
+ ldpp_dout(dpp, 20) << __func__ << ": open_bucket_index_pool() returned "
+ << ret << dendl;
+ return ret;
+ }
+
+ get_bucket_index_objects(bucket_oid_base, idx_layout.layout.normal.num_shards,
+ idx_layout.gen, bucket_objs, shard_id);
+ if (bucket_instance_ids) {
+ get_bucket_instance_ids(bucket_info, idx_layout.layout.normal.num_shards,
+ shard_id, bucket_instance_ids);
+ }
+ return 0;
+}
+
+void RGWSI_BucketIndex_RADOS::get_bucket_index_object(
+ const std::string& bucket_oid_base,
+ const rgw::bucket_index_normal_layout& normal,
+ uint64_t gen_id, int shard_id,
+ std::string* bucket_obj)
+{
+ if (!normal.num_shards) {
+ // By default with no sharding, we use the bucket oid as itself
+ (*bucket_obj) = bucket_oid_base;
+ } else {
+ char buf[bucket_oid_base.size() + 64];
+ if (gen_id) {
+ bucket_obj_with_generation(buf, sizeof(buf), bucket_oid_base, gen_id, shard_id);
+ (*bucket_obj) = buf;
+ ldout(cct, 10) << "bucket_obj is " << (*bucket_obj) << dendl;
+ } else {
+ // for backward compatibility, gen_id(0) will not be added in the object name
+ bucket_obj_without_generation(buf, sizeof(buf), bucket_oid_base, shard_id);
+ (*bucket_obj) = buf;
+ }
+ }
+}
+
+int RGWSI_BucketIndex_RADOS::get_bucket_index_object(
+ const std::string& bucket_oid_base,
+ const rgw::bucket_index_normal_layout& normal,
+ uint64_t gen_id, const std::string& obj_key,
+ std::string* bucket_obj, int* shard_id)
+{
+ int r = 0;
+ switch (normal.hash_type) {
+ case rgw::BucketHashType::Mod:
+ if (!normal.num_shards) {
+ // By default with no sharding, we use the bucket oid as itself
+ (*bucket_obj) = bucket_oid_base;
+ if (shard_id) {
+ *shard_id = -1;
+ }
+ } else {
+ uint32_t sid = bucket_shard_index(obj_key, normal.num_shards);
+ char buf[bucket_oid_base.size() + 64];
+ if (gen_id) {
+ bucket_obj_with_generation(buf, sizeof(buf), bucket_oid_base, gen_id, sid);
+ } else {
+ bucket_obj_without_generation(buf, sizeof(buf), bucket_oid_base, sid);
+ }
+ (*bucket_obj) = buf;
+ if (shard_id) {
+ *shard_id = (int)sid;
+ }
+ }
+ break;
+ default:
+ r = -ENOTSUP;
+ }
+ return r;
+}
+
+int RGWSI_BucketIndex_RADOS::open_bucket_index_shard(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ const string& obj_key,
+ RGWSI_RADOS::Obj *bucket_obj,
+ int *shard_id)
+{
+ string bucket_oid_base;
+
+ RGWSI_RADOS::Pool pool;
+
+ int ret = open_bucket_index_base(dpp, bucket_info, &pool, &bucket_oid_base);
+ if (ret < 0) {
+ ldpp_dout(dpp, 20) << __func__ << ": open_bucket_index_pool() returned "
+ << ret << dendl;
+ return ret;
+ }
+
+ string oid;
+
+ const auto& current_index = bucket_info.layout.current_index;
+ ret = get_bucket_index_object(bucket_oid_base, current_index.layout.normal,
+ current_index.gen, obj_key, &oid, shard_id);
+ if (ret < 0) {
+ ldpp_dout(dpp, 10) << "get_bucket_index_object() returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ *bucket_obj = svc.rados->obj(pool, oid);
+
+ return 0;
+}
+
+int RGWSI_BucketIndex_RADOS::open_bucket_index_shard(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ const rgw::bucket_index_layout_generation& index,
+ int shard_id,
+ RGWSI_RADOS::Obj *bucket_obj)
+{
+ RGWSI_RADOS::Pool index_pool;
+ string bucket_oid_base;
+ int ret = open_bucket_index_base(dpp, bucket_info, &index_pool, &bucket_oid_base);
+ if (ret < 0) {
+ ldpp_dout(dpp, 20) << __func__ << ": open_bucket_index_pool() returned "
+ << ret << dendl;
+ return ret;
+ }
+
+ string oid;
+
+ get_bucket_index_object(bucket_oid_base, index.layout.normal,
+ index.gen, shard_id, &oid);
+
+ *bucket_obj = svc.rados->obj(index_pool, oid);
+
+ return 0;
+}
+
+int RGWSI_BucketIndex_RADOS::cls_bucket_head(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ const rgw::bucket_index_layout_generation& idx_layout,
+ int shard_id,
+ vector<rgw_bucket_dir_header> *headers,
+ map<int, string> *bucket_instance_ids,
+ optional_yield y)
+{
+ RGWSI_RADOS::Pool index_pool;
+ map<int, string> oids;
+ int r = open_bucket_index(dpp, bucket_info, shard_id, idx_layout, &index_pool, &oids, bucket_instance_ids);
+ if (r < 0)
+ return r;
+
+ map<int, struct rgw_cls_list_ret> list_results;
+ for (auto& iter : oids) {
+ list_results.emplace(iter.first, rgw_cls_list_ret());
+ }
+
+ r = CLSRGWIssueGetDirHeader(index_pool.ioctx(), oids, list_results, cct->_conf->rgw_bucket_index_max_aio)();
+ if (r < 0)
+ return r;
+
+ map<int, struct rgw_cls_list_ret>::iterator iter = list_results.begin();
+ for(; iter != list_results.end(); ++iter) {
+ headers->push_back(std::move(iter->second.dir.header));
+ }
+ return 0;
+}
+
+int RGWSI_BucketIndex_RADOS::init_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout)
+{
+ RGWSI_RADOS::Pool index_pool;
+
+ string dir_oid = dir_oid_prefix;
+ int r = open_bucket_index_pool(dpp, bucket_info, &index_pool);
+ if (r < 0) {
+ return r;
+ }
+
+ dir_oid.append(bucket_info.bucket.bucket_id);
+
+ map<int, string> bucket_objs;
+ get_bucket_index_objects(dir_oid, idx_layout.layout.normal.num_shards, idx_layout.gen, &bucket_objs);
+
+ return CLSRGWIssueBucketIndexInit(index_pool.ioctx(),
+ bucket_objs,
+ cct->_conf->rgw_bucket_index_max_aio)();
+}
+
+int RGWSI_BucketIndex_RADOS::clean_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout)
+{
+ RGWSI_RADOS::Pool index_pool;
+
+ std::string dir_oid = dir_oid_prefix;
+ int r = open_bucket_index_pool(dpp, bucket_info, &index_pool);
+ if (r < 0) {
+ return r;
+ }
+
+ dir_oid.append(bucket_info.bucket.bucket_id);
+
+ std::map<int, std::string> bucket_objs;
+ get_bucket_index_objects(dir_oid, idx_layout.layout.normal.num_shards,
+ idx_layout.gen, &bucket_objs);
+
+ return CLSRGWIssueBucketIndexClean(index_pool.ioctx(),
+ bucket_objs,
+ cct->_conf->rgw_bucket_index_max_aio)();
+}
+
+int RGWSI_BucketIndex_RADOS::read_stats(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ RGWBucketEnt *result,
+ optional_yield y)
+{
+ vector<rgw_bucket_dir_header> headers;
+
+ result->bucket = bucket_info.bucket;
+ int r = cls_bucket_head(dpp, bucket_info, bucket_info.layout.current_index, RGW_NO_SHARD, &headers, nullptr, y);
+ if (r < 0) {
+ return r;
+ }
+
+ result->count = 0;
+ result->size = 0;
+ result->size_rounded = 0;
+
+ auto hiter = headers.begin();
+ for (; hiter != headers.end(); ++hiter) {
+ RGWObjCategory category = RGWObjCategory::Main;
+ auto iter = (hiter->stats).find(category);
+ if (iter != hiter->stats.end()) {
+ struct rgw_bucket_category_stats& stats = iter->second;
+ result->count += stats.num_entries;
+ result->size += stats.total_size;
+ result->size_rounded += stats.total_size_rounded;
+ }
+ }
+
+ result->placement_rule = std::move(bucket_info.placement_rule);
+
+ return 0;
+}
+
+int RGWSI_BucketIndex_RADOS::get_reshard_status(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, list<cls_rgw_bucket_instance_entry> *status)
+{
+ map<int, string> bucket_objs;
+
+ RGWSI_RADOS::Pool index_pool;
+
+ int r = open_bucket_index(dpp, bucket_info,
+ std::nullopt,
+ bucket_info.layout.current_index,
+ &index_pool,
+ &bucket_objs,
+ nullptr);
+ if (r < 0) {
+ return r;
+ }
+
+ for (auto i : bucket_objs) {
+ cls_rgw_bucket_instance_entry entry;
+
+ int ret = cls_rgw_get_bucket_resharding(index_pool.ioctx(), i.second, &entry);
+ if (ret < 0 && ret != -ENOENT) {
+ ldpp_dout(dpp, -1) << "ERROR: " << __func__ << ": cls_rgw_get_bucket_resharding() returned ret=" << ret << dendl;
+ return ret;
+ }
+
+ status->push_back(entry);
+ }
+
+ return 0;
+}
+
+int RGWSI_BucketIndex_RADOS::handle_overwrite(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& info,
+ const RGWBucketInfo& orig_info,
+ optional_yield y)
+{
+ bool new_sync_enabled = info.datasync_flag_enabled();
+ bool old_sync_enabled = orig_info.datasync_flag_enabled();
+
+ if (old_sync_enabled == new_sync_enabled) {
+ return 0; // datasync flag didn't change
+ }
+ if (info.layout.logs.empty()) {
+ return 0; // no bilog
+ }
+ const auto& bilog = info.layout.logs.back();
+ if (bilog.layout.type != rgw::BucketLogType::InIndex) {
+ return -ENOTSUP;
+ }
+ const int shards_num = rgw::num_shards(bilog.layout.in_index);
+
+ int ret;
+ if (!new_sync_enabled) {
+ ret = svc.bilog->log_stop(dpp, info, bilog, -1);
+ } else {
+ ret = svc.bilog->log_start(dpp, info, bilog, -1);
+ }
+ if (ret < 0) {
+ ldpp_dout(dpp, -1) << "ERROR: failed writing bilog (bucket=" << info.bucket << "); ret=" << ret << dendl;
+ return ret;
+ }
+
+ for (int i = 0; i < shards_num; ++i) {
+ ret = svc.datalog_rados->add_entry(dpp, info, bilog, i, y);
+ if (ret < 0) {
+ ldpp_dout(dpp, -1) << "ERROR: failed writing data log (info.bucket=" << info.bucket << ", shard_id=" << i << ")" << dendl;
+ } // datalog error is not fatal
+ }
+
+ return 0;
+}
diff --git a/src/rgw/services/svc_bi_rados.h b/src/rgw/services/svc_bi_rados.h
new file mode 100644
index 000000000..feba0cfcd
--- /dev/null
+++ b/src/rgw/services/svc_bi_rados.h
@@ -0,0 +1,166 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#pragma once
+
+#include "rgw_datalog.h"
+#include "rgw_service.h"
+#include "rgw_tools.h"
+
+#include "svc_bi.h"
+#include "svc_rados.h"
+#include "svc_tier_rados.h"
+
+struct rgw_bucket_dir_header;
+
+class RGWSI_BILog_RADOS;
+
+#define RGW_NO_SHARD -1
+
+#define RGW_SHARDS_PRIME_0 7877
+#define RGW_SHARDS_PRIME_1 65521
+
+/*
+ * Defined Bucket Index Namespaces
+ */
+#define RGW_OBJ_NS_MULTIPART "multipart"
+#define RGW_OBJ_NS_SHADOW "shadow"
+
+class RGWSI_BucketIndex_RADOS : public RGWSI_BucketIndex
+{
+ friend class RGWSI_BILog_RADOS;
+
+ int open_pool(const DoutPrefixProvider *dpp,
+ const rgw_pool& pool,
+ RGWSI_RADOS::Pool *index_pool,
+ bool mostly_omap);
+
+ int open_bucket_index_pool(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ RGWSI_RADOS::Pool *index_pool);
+ int open_bucket_index_base(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ RGWSI_RADOS::Pool *index_pool,
+ std::string *bucket_oid_base);
+
+ // return the index oid for the given shard id
+ void get_bucket_index_object(const std::string& bucket_oid_base,
+ const rgw::bucket_index_normal_layout& normal,
+ uint64_t gen_id, int shard_id,
+ std::string* bucket_obj);
+ // return the index oid and shard id for the given object name
+ int get_bucket_index_object(const std::string& bucket_oid_base,
+ const rgw::bucket_index_normal_layout& normal,
+ uint64_t gen_id, const std::string& obj_key,
+ std::string* bucket_obj, int* shard_id);
+
+ int cls_bucket_head(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ const rgw::bucket_index_layout_generation& idx_layout,
+ int shard_id,
+ std::vector<rgw_bucket_dir_header> *headers,
+ std::map<int, std::string> *bucket_instance_ids,
+ optional_yield y);
+
+public:
+
+ struct Svc {
+ RGWSI_Zone *zone{nullptr};
+ RGWSI_RADOS *rados{nullptr};
+ RGWSI_BILog_RADOS *bilog{nullptr};
+ RGWDataChangesLog *datalog_rados{nullptr};
+ } svc;
+
+ RGWSI_BucketIndex_RADOS(CephContext *cct);
+
+ void init(RGWSI_Zone *zone_svc,
+ RGWSI_RADOS *rados_svc,
+ RGWSI_BILog_RADOS *bilog_svc,
+ RGWDataChangesLog *datalog_rados_svc);
+
+ static int shards_max() {
+ return RGW_SHARDS_PRIME_1;
+ }
+
+ static int shard_id(const std::string& key, int max_shards) {
+ return rgw_shard_id(key, max_shards);
+ }
+
+ static uint32_t bucket_shard_index(const std::string& key,
+ int num_shards) {
+ uint32_t sid = ceph_str_hash_linux(key.c_str(), key.size());
+ uint32_t sid2 = sid ^ ((sid & 0xFF) << 24);
+ return rgw_shards_mod(sid2, num_shards);
+ }
+
+ static uint32_t bucket_shard_index(const rgw_obj_key& obj_key,
+ int num_shards)
+ {
+ std::string sharding_key;
+ if (obj_key.ns == RGW_OBJ_NS_MULTIPART) {
+ RGWMPObj mp;
+ mp.from_meta(obj_key.name);
+ sharding_key = mp.get_key();
+ } else {
+ sharding_key = obj_key.name;
+ }
+
+ return bucket_shard_index(sharding_key, num_shards);
+ }
+
+ int init_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info,const rgw::bucket_index_layout_generation& idx_layout) override;
+ int clean_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const rgw::bucket_index_layout_generation& idx_layout) override;
+
+ /* RADOS specific */
+
+ int read_stats(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ RGWBucketEnt *stats,
+ optional_yield y) override;
+
+ int get_reshard_status(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info,
+ std::list<cls_rgw_bucket_instance_entry> *status);
+
+ int handle_overwrite(const DoutPrefixProvider *dpp, const RGWBucketInfo& info,
+ const RGWBucketInfo& orig_info,
+ optional_yield y) override;
+
+ int open_bucket_index_shard(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ const std::string& obj_key,
+ RGWSI_RADOS::Obj *bucket_obj,
+ int *shard_id);
+
+ int open_bucket_index_shard(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ const rgw::bucket_index_layout_generation& index,
+ int shard_id, RGWSI_RADOS::Obj *bucket_obj);
+
+ int open_bucket_index(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ RGWSI_RADOS::Pool *index_pool,
+ std::string *bucket_oid);
+
+ int open_bucket_index(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ std::optional<int> shard_id,
+ const rgw::bucket_index_layout_generation& idx_layout,
+ RGWSI_RADOS::Pool *index_pool,
+ std::map<int, std::string> *bucket_objs,
+ std::map<int, std::string> *bucket_instance_ids);
+};
+
+
diff --git a/src/rgw/services/svc_bilog_rados.cc b/src/rgw/services/svc_bilog_rados.cc
new file mode 100644
index 000000000..f4bb13ec1
--- /dev/null
+++ b/src/rgw/services/svc_bilog_rados.cc
@@ -0,0 +1,220 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include "svc_bilog_rados.h"
+#include "svc_bi_rados.h"
+
+#include "cls/rgw/cls_rgw_client.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+using namespace std;
+
+RGWSI_BILog_RADOS::RGWSI_BILog_RADOS(CephContext *cct) : RGWServiceInstance(cct)
+{
+}
+
+void RGWSI_BILog_RADOS::init(RGWSI_BucketIndex_RADOS *bi_rados_svc)
+{
+ svc.bi = bi_rados_svc;
+}
+
+int RGWSI_BILog_RADOS::log_trim(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ const rgw::bucket_log_layout_generation& log_layout,
+ int shard_id,
+ std::string_view start_marker,
+ std::string_view end_marker)
+{
+ RGWSI_RADOS::Pool index_pool;
+ map<int, string> bucket_objs;
+
+ BucketIndexShardsManager start_marker_mgr;
+ BucketIndexShardsManager end_marker_mgr;
+
+ const auto& current_index = rgw::log_to_index_layout(log_layout);
+ int r = svc.bi->open_bucket_index(dpp, bucket_info, shard_id, current_index, &index_pool, &bucket_objs, nullptr);
+ if (r < 0) {
+ return r;
+ }
+
+ r = start_marker_mgr.from_string(start_marker, shard_id);
+ if (r < 0) {
+ return r;
+ }
+
+ r = end_marker_mgr.from_string(end_marker, shard_id);
+ if (r < 0) {
+ return r;
+ }
+
+ return CLSRGWIssueBILogTrim(index_pool.ioctx(), start_marker_mgr, end_marker_mgr, bucket_objs,
+ cct->_conf->rgw_bucket_index_max_aio)();
+}
+
+int RGWSI_BILog_RADOS::log_start(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw::bucket_log_layout_generation& log_layout, int shard_id)
+{
+ RGWSI_RADOS::Pool index_pool;
+ map<int, string> bucket_objs;
+ const auto& current_index = rgw::log_to_index_layout(log_layout);
+ int r = svc.bi->open_bucket_index(dpp, bucket_info, shard_id, current_index, &index_pool, &bucket_objs, nullptr);
+ if (r < 0)
+ return r;
+
+ return CLSRGWIssueResyncBucketBILog(index_pool.ioctx(), bucket_objs, cct->_conf->rgw_bucket_index_max_aio)();
+}
+
+int RGWSI_BILog_RADOS::log_stop(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw::bucket_log_layout_generation& log_layout, int shard_id)
+{
+ RGWSI_RADOS::Pool index_pool;
+ map<int, string> bucket_objs;
+ const auto& current_index = rgw::log_to_index_layout(log_layout);
+ int r = svc.bi->open_bucket_index(dpp, bucket_info, shard_id, current_index, &index_pool, &bucket_objs, nullptr);
+ if (r < 0)
+ return r;
+
+ return CLSRGWIssueBucketBILogStop(index_pool.ioctx(), bucket_objs, cct->_conf->rgw_bucket_index_max_aio)();
+}
+
+static void build_bucket_index_marker(const string& shard_id_str,
+ const string& shard_marker,
+ string *marker) {
+ if (marker) {
+ *marker = shard_id_str;
+ marker->append(BucketIndexShardsManager::KEY_VALUE_SEPARATOR);
+ marker->append(shard_marker);
+ }
+}
+
+int RGWSI_BILog_RADOS::log_list(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ const rgw::bucket_log_layout_generation& log_layout,
+ int shard_id, string& marker, uint32_t max,
+ std::list<rgw_bi_log_entry>& result, bool *truncated)
+{
+ ldpp_dout(dpp, 20) << __func__ << ": " << bucket_info.bucket << " marker " << marker << " shard_id=" << shard_id << " max " << max << dendl;
+ result.clear();
+
+ RGWSI_RADOS::Pool index_pool;
+ map<int, string> oids;
+ map<int, cls_rgw_bi_log_list_ret> bi_log_lists;
+ const auto& current_index = rgw::log_to_index_layout(log_layout);
+ int r = svc.bi->open_bucket_index(dpp, bucket_info, shard_id, current_index, &index_pool, &oids, nullptr);
+ if (r < 0)
+ return r;
+
+ BucketIndexShardsManager marker_mgr;
+ bool has_shards = (oids.size() > 1 || shard_id >= 0);
+ // If there are multiple shards for the bucket index object, the marker
+ // should have the pattern '{shard_id_1}#{shard_marker_1},{shard_id_2}#
+ // {shard_marker_2}...', if there is no sharding, the bi_log_list should
+ // only contain one record, and the key is the bucket instance id.
+ r = marker_mgr.from_string(marker, shard_id);
+ if (r < 0)
+ return r;
+
+ r = CLSRGWIssueBILogList(index_pool.ioctx(), marker_mgr, max, oids, bi_log_lists, cct->_conf->rgw_bucket_index_max_aio)();
+ if (r < 0)
+ return r;
+
+ map<int, list<rgw_bi_log_entry>::iterator> vcurrents;
+ map<int, list<rgw_bi_log_entry>::iterator> vends;
+ if (truncated) {
+ *truncated = false;
+ }
+ map<int, cls_rgw_bi_log_list_ret>::iterator miter = bi_log_lists.begin();
+ for (; miter != bi_log_lists.end(); ++miter) {
+ int shard_id = miter->first;
+ vcurrents[shard_id] = miter->second.entries.begin();
+ vends[shard_id] = miter->second.entries.end();
+ if (truncated) {
+ *truncated = (*truncated || miter->second.truncated);
+ }
+ }
+
+ size_t total = 0;
+ bool has_more = true;
+ map<int, list<rgw_bi_log_entry>::iterator>::iterator viter;
+ map<int, list<rgw_bi_log_entry>::iterator>::iterator eiter;
+ while (total < max && has_more) {
+ has_more = false;
+
+ viter = vcurrents.begin();
+ eiter = vends.begin();
+
+ for (; total < max && viter != vcurrents.end(); ++viter, ++eiter) {
+ assert (eiter != vends.end());
+
+ int shard_id = viter->first;
+ list<rgw_bi_log_entry>::iterator& liter = viter->second;
+
+ if (liter == eiter->second){
+ continue;
+ }
+ rgw_bi_log_entry& entry = *(liter);
+ if (has_shards) {
+ char buf[16];
+ snprintf(buf, sizeof(buf), "%d", shard_id);
+ string tmp_id;
+ build_bucket_index_marker(buf, entry.id, &tmp_id);
+ entry.id.swap(tmp_id);
+ }
+ marker_mgr.add(shard_id, entry.id);
+ result.push_back(entry);
+ total++;
+ has_more = true;
+ ++liter;
+ }
+ }
+
+ if (truncated) {
+ for (viter = vcurrents.begin(), eiter = vends.begin(); viter != vcurrents.end(); ++viter, ++eiter) {
+ assert (eiter != vends.end());
+ *truncated = (*truncated || (viter->second != eiter->second));
+ }
+ }
+
+ // Refresh marker, if there are multiple shards, the output will look like
+ // '{shard_oid_1}#{shard_marker_1},{shard_oid_2}#{shard_marker_2}...',
+ // if there is no sharding, the simply marker (without oid) is returned
+ if (has_shards) {
+ marker_mgr.to_string(&marker);
+ } else {
+ if (!result.empty()) {
+ marker = result.rbegin()->id;
+ }
+ }
+
+ return 0;
+}
+
+int RGWSI_BILog_RADOS::get_log_status(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ const rgw::bucket_log_layout_generation& log_layout,
+ int shard_id,
+ map<int, string> *markers,
+ optional_yield y)
+{
+ vector<rgw_bucket_dir_header> headers;
+ map<int, string> bucket_instance_ids;
+ const auto& current_index = rgw::log_to_index_layout(log_layout);
+ int r = svc.bi->cls_bucket_head(dpp, bucket_info, current_index, shard_id, &headers, &bucket_instance_ids, y);
+ if (r < 0)
+ return r;
+
+ ceph_assert(headers.size() == bucket_instance_ids.size());
+
+ auto iter = headers.begin();
+ map<int, string>::iterator viter = bucket_instance_ids.begin();
+
+ for(; iter != headers.end(); ++iter, ++viter) {
+ if (shard_id >= 0) {
+ (*markers)[shard_id] = iter->max_marker;
+ } else {
+ (*markers)[viter->first] = iter->max_marker;
+ }
+ }
+
+ return 0;
+}
+
diff --git a/src/rgw/services/svc_bilog_rados.h b/src/rgw/services/svc_bilog_rados.h
new file mode 100644
index 000000000..e9d5dbb5c
--- /dev/null
+++ b/src/rgw/services/svc_bilog_rados.h
@@ -0,0 +1,60 @@
+
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#pragma once
+
+#include "rgw_service.h"
+
+#include "svc_rados.h"
+
+
+
+
+class RGWSI_BILog_RADOS : public RGWServiceInstance
+{
+public:
+ struct Svc {
+ RGWSI_BucketIndex_RADOS *bi{nullptr};
+ } svc;
+
+ RGWSI_BILog_RADOS(CephContext *cct);
+
+ void init(RGWSI_BucketIndex_RADOS *bi_rados_svc);
+
+ int log_start(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw::bucket_log_layout_generation& log_layout, int shard_id);
+ int log_stop(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, const rgw::bucket_log_layout_generation& log_layout, int shard_id);
+
+ int log_trim(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info,
+ const rgw::bucket_log_layout_generation& log_layout,
+ int shard_id,
+ std::string_view start_marker,
+ std::string_view end_marker);
+ int log_list(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info,
+ const rgw::bucket_log_layout_generation& log_layout,
+ int shard_id,
+ std::string& marker,
+ uint32_t max,
+ std::list<rgw_bi_log_entry>& result,
+ bool *truncated);
+
+ int get_log_status(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ const rgw::bucket_log_layout_generation& log_layout,
+ int shard_id,
+ std::map<int, std::string> *markers,
+ optional_yield y);
+};
diff --git a/src/rgw/services/svc_bucket.cc b/src/rgw/services/svc_bucket.cc
new file mode 100644
index 000000000..b115990d2
--- /dev/null
+++ b/src/rgw/services/svc_bucket.cc
@@ -0,0 +1,25 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+
+#include "svc_bucket.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+std::string RGWSI_Bucket::get_entrypoint_meta_key(const rgw_bucket& bucket)
+{
+ if (bucket.bucket_id.empty()) {
+ return bucket.get_key();
+ }
+
+ rgw_bucket b(bucket);
+ b.bucket_id.clear();
+
+ return b.get_key();
+}
+
+std::string RGWSI_Bucket::get_bi_meta_key(const rgw_bucket& bucket)
+{
+ return bucket.get_key();
+}
+
diff --git a/src/rgw/services/svc_bucket.h b/src/rgw/services/svc_bucket.h
new file mode 100644
index 000000000..4a526e4f2
--- /dev/null
+++ b/src/rgw/services/svc_bucket.h
@@ -0,0 +1,111 @@
+
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#pragma once
+
+#include "rgw_service.h"
+
+#include "svc_bucket_types.h"
+
+class RGWSI_Bucket : public RGWServiceInstance
+{
+public:
+ RGWSI_Bucket(CephContext *cct) : RGWServiceInstance(cct) {}
+ virtual ~RGWSI_Bucket() {}
+
+ static std::string get_entrypoint_meta_key(const rgw_bucket& bucket);
+ static std::string get_bi_meta_key(const rgw_bucket& bucket);
+
+ virtual RGWSI_Bucket_BE_Handler& get_ep_be_handler() = 0;
+ virtual RGWSI_BucketInstance_BE_Handler& get_bi_be_handler() = 0;
+
+ virtual int read_bucket_entrypoint_info(RGWSI_Bucket_EP_Ctx& ctx,
+ const std::string& key,
+ RGWBucketEntryPoint *entry_point,
+ RGWObjVersionTracker *objv_tracker,
+ real_time *pmtime,
+ std::map<std::string, bufferlist> *pattrs,
+ optional_yield y,
+ const DoutPrefixProvider *dpp,
+ rgw_cache_entry_info *cache_info = nullptr,
+ boost::optional<obj_version> refresh_version = boost::none) = 0;
+
+ virtual int store_bucket_entrypoint_info(RGWSI_Bucket_EP_Ctx& ctx,
+ const std::string& key,
+ RGWBucketEntryPoint& info,
+ bool exclusive,
+ real_time mtime,
+ std::map<std::string, bufferlist> *pattrs,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) = 0;
+
+ virtual int remove_bucket_entrypoint_info(RGWSI_Bucket_EP_Ctx& ctx,
+ const std::string& key,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) = 0;
+
+ virtual int read_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx,
+ const std::string& key,
+ RGWBucketInfo *info,
+ real_time *pmtime,
+ std::map<std::string, bufferlist> *pattrs,
+ optional_yield y,
+ const DoutPrefixProvider *dpp,
+ rgw_cache_entry_info *cache_info = nullptr,
+ boost::optional<obj_version> refresh_version = boost::none) = 0;
+
+ virtual int read_bucket_info(RGWSI_Bucket_X_Ctx& ep_ctx,
+ const rgw_bucket& bucket,
+ RGWBucketInfo *info,
+ real_time *pmtime,
+ std::map<std::string, bufferlist> *pattrs,
+ boost::optional<obj_version> refresh_version,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) = 0;
+
+ virtual int store_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx,
+ const std::string& key,
+ RGWBucketInfo& info,
+ std::optional<RGWBucketInfo *> orig_info, /* nullopt: orig_info was not fetched,
+ nullptr: orig_info was not found (new bucket instance */
+ bool exclusive,
+ real_time mtime,
+ std::map<std::string, bufferlist> *pattrs,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) = 0;
+
+ virtual int remove_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx,
+ const std::string& key,
+ const RGWBucketInfo& bucket_info,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) = 0;
+
+ virtual int read_bucket_stats(RGWSI_Bucket_X_Ctx& ctx,
+ const rgw_bucket& bucket,
+ RGWBucketEnt *ent,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) = 0;
+
+ virtual int read_buckets_stats(RGWSI_Bucket_X_Ctx& ctx,
+ std::map<std::string, RGWBucketEnt>& m,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) = 0;
+};
+
diff --git a/src/rgw/services/svc_bucket_sobj.cc b/src/rgw/services/svc_bucket_sobj.cc
new file mode 100644
index 000000000..08a528015
--- /dev/null
+++ b/src/rgw/services/svc_bucket_sobj.cc
@@ -0,0 +1,644 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+
+#include "svc_bucket_sobj.h"
+#include "svc_zone.h"
+#include "svc_sys_obj.h"
+#include "svc_sys_obj_cache.h"
+#include "svc_bi.h"
+#include "svc_meta.h"
+#include "svc_meta_be_sobj.h"
+#include "svc_sync_modules.h"
+
+#include "rgw_bucket.h"
+#include "rgw_tools.h"
+#include "rgw_zone.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+#define RGW_BUCKET_INSTANCE_MD_PREFIX ".bucket.meta."
+
+using namespace std;
+
+class RGWSI_Bucket_SObj_Module : public RGWSI_MBSObj_Handler_Module {
+ RGWSI_Bucket_SObj::Svc& svc;
+
+ const string prefix;
+public:
+ RGWSI_Bucket_SObj_Module(RGWSI_Bucket_SObj::Svc& _svc) : RGWSI_MBSObj_Handler_Module("bucket"),
+ svc(_svc) {}
+
+ void get_pool_and_oid(const string& key, rgw_pool *pool, string *oid) override {
+ if (pool) {
+ *pool = svc.zone->get_zone_params().domain_root;
+ }
+ if (oid) {
+ *oid = key;
+ }
+ }
+
+ const string& get_oid_prefix() override {
+ return prefix;
+ }
+
+ bool is_valid_oid(const string& oid) override {
+ return (!oid.empty() && oid[0] != '.');
+ }
+
+ string key_to_oid(const string& key) override {
+ return key;
+ }
+
+ string oid_to_key(const string& oid) override {
+ /* should have been called after is_valid_oid(),
+ * so no need to check for validity */
+ return oid;
+ }
+};
+
+class RGWSI_BucketInstance_SObj_Module : public RGWSI_MBSObj_Handler_Module {
+ RGWSI_Bucket_SObj::Svc& svc;
+
+ const string prefix;
+public:
+ RGWSI_BucketInstance_SObj_Module(RGWSI_Bucket_SObj::Svc& _svc) : RGWSI_MBSObj_Handler_Module("bucket.instance"),
+ svc(_svc), prefix(RGW_BUCKET_INSTANCE_MD_PREFIX) {}
+
+ void get_pool_and_oid(const string& key, rgw_pool *pool, string *oid) override {
+ if (pool) {
+ *pool = svc.zone->get_zone_params().domain_root;
+ }
+ if (oid) {
+ *oid = key_to_oid(key);
+ }
+ }
+
+ const string& get_oid_prefix() override {
+ return prefix;
+ }
+
+ bool is_valid_oid(const string& oid) override {
+ return (oid.compare(0, prefix.size(), RGW_BUCKET_INSTANCE_MD_PREFIX) == 0);
+ }
+
+// 'tenant/' is used in bucket instance keys for sync to avoid parsing ambiguity
+// with the existing instance[:shard] format. once we parse the shard, the / is
+// replaced with a : to match the [tenant:]instance format
+ string key_to_oid(const string& key) override {
+ string oid = prefix + key;
+
+ // replace tenant/ with tenant:
+ auto c = oid.find('/', prefix.size());
+ if (c != string::npos) {
+ oid[c] = ':';
+ }
+
+ return oid;
+ }
+
+ // convert bucket instance oids back to the tenant/ format for metadata keys.
+ // it's safe to parse 'tenant:' only for oids, because they won't contain the
+ // optional :shard at the end
+ string oid_to_key(const string& oid) override {
+ /* this should have been called after oid was checked for validity */
+
+ if (oid.size() < prefix.size()) { /* just sanity check */
+ return string();
+ }
+
+ string key = oid.substr(prefix.size());
+
+ // find first : (could be tenant:bucket or bucket:instance)
+ auto c = key.find(':');
+ if (c != string::npos) {
+ // if we find another :, the first one was for tenant
+ if (key.find(':', c + 1) != string::npos) {
+ key[c] = '/';
+ }
+ }
+
+ return key;
+ }
+
+ /*
+ * hash entry for mdlog placement. Use the same hash key we'd have for the bucket entry
+ * point, so that the log entries end up at the same log shard, so that we process them
+ * in order
+ */
+ string get_hash_key(const string& key) override {
+ string k = "bucket:";
+ int pos = key.find(':');
+ if (pos < 0)
+ k.append(key);
+ else
+ k.append(key.substr(0, pos));
+
+ return k;
+ }
+};
+
+RGWSI_Bucket_SObj::RGWSI_Bucket_SObj(CephContext *cct): RGWSI_Bucket(cct) {
+}
+
+RGWSI_Bucket_SObj::~RGWSI_Bucket_SObj() {
+}
+
+void RGWSI_Bucket_SObj::init(RGWSI_Zone *_zone_svc, RGWSI_SysObj *_sysobj_svc,
+ RGWSI_SysObj_Cache *_cache_svc, RGWSI_BucketIndex *_bi,
+ RGWSI_Meta *_meta_svc, RGWSI_MetaBackend *_meta_be_svc,
+ RGWSI_SyncModules *_sync_modules_svc,
+ RGWSI_Bucket_Sync *_bucket_sync_svc)
+{
+ svc.bucket = this;
+ svc.zone = _zone_svc;
+ svc.sysobj = _sysobj_svc;
+ svc.cache = _cache_svc;
+ svc.bi = _bi;
+ svc.meta = _meta_svc;
+ svc.meta_be = _meta_be_svc;
+ svc.sync_modules = _sync_modules_svc;
+ svc.bucket_sync = _bucket_sync_svc;
+}
+
+int RGWSI_Bucket_SObj::do_start(optional_yield, const DoutPrefixProvider *dpp)
+{
+ binfo_cache.reset(new RGWChainedCacheImpl<bucket_info_cache_entry>);
+ binfo_cache->init(svc.cache);
+
+ /* create first backend handler for bucket entrypoints */
+
+ RGWSI_MetaBackend_Handler *ep_handler;
+
+ int r = svc.meta->create_be_handler(RGWSI_MetaBackend::Type::MDBE_SOBJ, &ep_handler);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to create be handler: r=" << r << dendl;
+ return r;
+ }
+
+ ep_be_handler = ep_handler;
+
+ RGWSI_MetaBackend_Handler_SObj *ep_bh = static_cast<RGWSI_MetaBackend_Handler_SObj *>(ep_handler);
+
+ auto ep_module = new RGWSI_Bucket_SObj_Module(svc);
+ ep_be_module.reset(ep_module);
+ ep_bh->set_module(ep_module);
+
+ /* create a second backend handler for bucket instance */
+
+ RGWSI_MetaBackend_Handler *bi_handler;
+
+ r = svc.meta->create_be_handler(RGWSI_MetaBackend::Type::MDBE_SOBJ, &bi_handler);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to create be handler: r=" << r << dendl;
+ return r;
+ }
+
+ bi_be_handler = bi_handler;
+
+ RGWSI_MetaBackend_Handler_SObj *bi_bh = static_cast<RGWSI_MetaBackend_Handler_SObj *>(bi_handler);
+
+ auto bi_module = new RGWSI_BucketInstance_SObj_Module(svc);
+ bi_be_module.reset(bi_module);
+ bi_bh->set_module(bi_module);
+
+ return 0;
+}
+
+int RGWSI_Bucket_SObj::read_bucket_entrypoint_info(RGWSI_Bucket_EP_Ctx& ctx,
+ const string& key,
+ RGWBucketEntryPoint *entry_point,
+ RGWObjVersionTracker *objv_tracker,
+ real_time *pmtime,
+ map<string, bufferlist> *pattrs,
+ optional_yield y,
+ const DoutPrefixProvider *dpp,
+ rgw_cache_entry_info *cache_info,
+ boost::optional<obj_version> refresh_version)
+{
+ bufferlist bl;
+
+ auto params = RGWSI_MBSObj_GetParams(&bl, pattrs, pmtime).set_cache_info(cache_info)
+ .set_refresh_version(refresh_version);
+
+ int ret = svc.meta_be->get_entry(ctx.get(), key, params, objv_tracker, y, dpp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ auto iter = bl.cbegin();
+ try {
+ decode(*entry_point, iter);
+ } catch (buffer::error& err) {
+ ldpp_dout(dpp, 0) << "ERROR: could not decode buffer info, caught buffer::error" << dendl;
+ return -EIO;
+ }
+ return 0;
+}
+
+int RGWSI_Bucket_SObj::store_bucket_entrypoint_info(RGWSI_Bucket_EP_Ctx& ctx,
+ const string& key,
+ RGWBucketEntryPoint& info,
+ bool exclusive,
+ real_time mtime,
+ map<string, bufferlist> *pattrs,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp)
+{
+ bufferlist bl;
+ encode(info, bl);
+
+ RGWSI_MBSObj_PutParams params(bl, pattrs, mtime, exclusive);
+
+ int ret = svc.meta_be->put(ctx.get(), key, params, objv_tracker, y, dpp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return ret;
+}
+
+int RGWSI_Bucket_SObj::remove_bucket_entrypoint_info(RGWSI_Bucket_EP_Ctx& ctx,
+ const string& key,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp)
+{
+ RGWSI_MBSObj_RemoveParams params;
+ return svc.meta_be->remove(ctx.get(), key, params, objv_tracker, y, dpp);
+}
+
+int RGWSI_Bucket_SObj::read_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx,
+ const string& key,
+ RGWBucketInfo *info,
+ real_time *pmtime, map<string, bufferlist> *pattrs,
+ optional_yield y,
+ const DoutPrefixProvider *dpp,
+ rgw_cache_entry_info *cache_info,
+ boost::optional<obj_version> refresh_version)
+{
+ string cache_key("bi/");
+ cache_key.append(key);
+
+ if (auto e = binfo_cache->find(cache_key)) {
+ if (refresh_version &&
+ e->info.objv_tracker.read_version.compare(&(*refresh_version))) {
+ ldpp_dout(dpp, -1) << "WARNING: The bucket info cache is inconsistent. This is "
+ << "a failure that should be debugged. I am a nice machine, "
+ << "so I will try to recover." << dendl;
+ binfo_cache->invalidate(key);
+ } else {
+ *info = e->info;
+ if (pattrs)
+ *pattrs = e->attrs;
+ if (pmtime)
+ *pmtime = e->mtime;
+ return 0;
+ }
+ }
+
+ bucket_info_cache_entry e;
+ rgw_cache_entry_info ci;
+
+ int ret = do_read_bucket_instance_info(ctx, key,
+ &e.info, &e.mtime, &e.attrs,
+ &ci, refresh_version, y, dpp);
+ *info = e.info;
+
+ if (ret < 0) {
+ if (ret != -ENOENT) {
+ ldpp_dout(dpp, -1) << "ERROR: do_read_bucket_instance_info failed: " << ret << dendl;
+ } else {
+ ldpp_dout(dpp, 20) << "do_read_bucket_instance_info, bucket instance not found (key=" << key << ")" << dendl;
+ }
+ return ret;
+ }
+
+ if (pmtime) {
+ *pmtime = e.mtime;
+ }
+ if (pattrs) {
+ *pattrs = e.attrs;
+ }
+ if (cache_info) {
+ *cache_info = ci;
+ }
+
+ /* chain to only bucket instance and *not* bucket entrypoint */
+ if (!binfo_cache->put(dpp, svc.cache, cache_key, &e, {&ci})) {
+ ldpp_dout(dpp, 20) << "couldn't put binfo cache entry, might have raced with data changes" << dendl;
+ }
+
+ if (refresh_version &&
+ refresh_version->compare(&info->objv_tracker.read_version)) {
+ ldpp_dout(dpp, -1) << "WARNING: The OSD has the same version I have. Something may "
+ << "have gone squirrelly. An administrator may have forced a "
+ << "change; otherwise there is a problem somewhere." << dendl;
+ }
+
+ return 0;
+}
+
+int RGWSI_Bucket_SObj::do_read_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx,
+ const string& key,
+ RGWBucketInfo *info,
+ real_time *pmtime, map<string, bufferlist> *pattrs,
+ rgw_cache_entry_info *cache_info,
+ boost::optional<obj_version> refresh_version,
+ optional_yield y,
+ const DoutPrefixProvider *dpp)
+{
+ bufferlist bl;
+ RGWObjVersionTracker ot;
+
+ auto params = RGWSI_MBSObj_GetParams(&bl, pattrs, pmtime).set_cache_info(cache_info)
+ .set_refresh_version(refresh_version);
+
+ int ret = svc.meta_be->get_entry(ctx.get(), key, params, &ot, y, dpp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ auto iter = bl.cbegin();
+ try {
+ decode(*info, iter);
+ } catch (buffer::error& err) {
+ ldpp_dout(dpp, 0) << "ERROR: could not decode buffer info, caught buffer::error" << dendl;
+ return -EIO;
+ }
+ info->objv_tracker = ot;
+ return 0;
+}
+
+int RGWSI_Bucket_SObj::read_bucket_info(RGWSI_Bucket_X_Ctx& ctx,
+ const rgw_bucket& bucket,
+ RGWBucketInfo *info,
+ real_time *pmtime,
+ map<string, bufferlist> *pattrs,
+ boost::optional<obj_version> refresh_version,
+ optional_yield y,
+ const DoutPrefixProvider *dpp)
+{
+ rgw_cache_entry_info cache_info;
+
+ if (!bucket.bucket_id.empty()) {
+ return read_bucket_instance_info(ctx.bi, get_bi_meta_key(bucket),
+ info,
+ pmtime, pattrs,
+ y,
+ dpp,
+ &cache_info, refresh_version);
+ }
+
+ string bucket_entry = get_entrypoint_meta_key(bucket);
+ string cache_key("b/");
+ cache_key.append(bucket_entry);
+
+ if (auto e = binfo_cache->find(cache_key)) {
+ bool found_version = (bucket.bucket_id.empty() ||
+ bucket.bucket_id == e->info.bucket.bucket_id);
+
+ if (!found_version ||
+ (refresh_version &&
+ e->info.objv_tracker.read_version.compare(&(*refresh_version)))) {
+ ldpp_dout(dpp, -1) << "WARNING: The bucket info cache is inconsistent. This is "
+ << "a failure that should be debugged. I am a nice machine, "
+ << "so I will try to recover." << dendl;
+ binfo_cache->invalidate(cache_key);
+ } else {
+ *info = e->info;
+ if (pattrs)
+ *pattrs = e->attrs;
+ if (pmtime)
+ *pmtime = e->mtime;
+ return 0;
+ }
+ }
+
+ RGWBucketEntryPoint entry_point;
+ real_time ep_mtime;
+ RGWObjVersionTracker ot;
+ rgw_cache_entry_info entry_cache_info;
+ int ret = read_bucket_entrypoint_info(ctx.ep, bucket_entry,
+ &entry_point, &ot, &ep_mtime, pattrs,
+ y,
+ dpp,
+ &entry_cache_info, refresh_version);
+ if (ret < 0) {
+ /* only init these fields */
+ info->bucket = bucket;
+ return ret;
+ }
+
+ if (entry_point.has_bucket_info) {
+ *info = entry_point.old_bucket_info;
+ info->bucket.tenant = bucket.tenant;
+ ldpp_dout(dpp, 20) << "rgw_get_bucket_info: old bucket info, bucket=" << info->bucket << " owner " << info->owner << dendl;
+ return 0;
+ }
+
+ /* data is in the bucket instance object, we need to get attributes from there, clear everything
+ * that we got
+ */
+ if (pattrs) {
+ pattrs->clear();
+ }
+
+ ldpp_dout(dpp, 20) << "rgw_get_bucket_info: bucket instance: " << entry_point.bucket << dendl;
+
+
+ /* read bucket instance info */
+
+ bucket_info_cache_entry e;
+
+ ret = read_bucket_instance_info(ctx.bi, get_bi_meta_key(entry_point.bucket),
+ &e.info, &e.mtime, &e.attrs,
+ y,
+ dpp,
+ &cache_info, refresh_version);
+ *info = e.info;
+ if (ret < 0) {
+ ldpp_dout(dpp, -1) << "ERROR: read_bucket_instance_from_oid failed: " << ret << dendl;
+ info->bucket = bucket;
+ // XXX and why return anything in case of an error anyway?
+ return ret;
+ }
+
+ if (pmtime)
+ *pmtime = e.mtime;
+ if (pattrs)
+ *pattrs = e.attrs;
+
+ /* chain to both bucket entry point and bucket instance */
+ if (!binfo_cache->put(dpp, svc.cache, cache_key, &e, {&entry_cache_info, &cache_info})) {
+ ldpp_dout(dpp, 20) << "couldn't put binfo cache entry, might have raced with data changes" << dendl;
+ }
+
+ if (refresh_version &&
+ refresh_version->compare(&info->objv_tracker.read_version)) {
+ ldpp_dout(dpp, -1) << "WARNING: The OSD has the same version I have. Something may "
+ << "have gone squirrelly. An administrator may have forced a "
+ << "change; otherwise there is a problem somewhere." << dendl;
+ }
+
+ return 0;
+}
+
+
+int RGWSI_Bucket_SObj::store_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx,
+ const string& key,
+ RGWBucketInfo& info,
+ std::optional<RGWBucketInfo *> orig_info,
+ bool exclusive,
+ real_time mtime,
+ map<string, bufferlist> *pattrs,
+ optional_yield y,
+ const DoutPrefixProvider *dpp)
+{
+ bufferlist bl;
+ encode(info, bl);
+
+ /*
+ * we might need some special handling if overwriting
+ */
+ RGWBucketInfo shared_bucket_info;
+ if (!orig_info && !exclusive) { /* if exclusive, we're going to fail when try
+ to overwrite, so the whole check here is moot */
+ /*
+ * we're here because orig_info wasn't passed in
+ * we don't have info about what was there before, so need to fetch first
+ */
+ int r = read_bucket_instance_info(ctx,
+ key,
+ &shared_bucket_info,
+ nullptr, nullptr,
+ y,
+ dpp,
+ nullptr, boost::none);
+ if (r < 0) {
+ if (r != -ENOENT) {
+ ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): read_bucket_instance_info() of key=" << key << " returned r=" << r << dendl;
+ return r;
+ }
+ } else {
+ orig_info = &shared_bucket_info;
+ }
+ }
+
+ if (orig_info && *orig_info && !exclusive) {
+ int r = svc.bi->handle_overwrite(dpp, info, *(orig_info.value()), y);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): svc.bi->handle_overwrite() of key=" << key << " returned r=" << r << dendl;
+ return r;
+ }
+ }
+
+ RGWSI_MBSObj_PutParams params(bl, pattrs, mtime, exclusive);
+
+ int ret = svc.meta_be->put(ctx.get(), key, params, &info.objv_tracker, y, dpp);
+
+ if (ret >= 0) {
+ int r = svc.bucket_sync->handle_bi_update(dpp, info,
+ orig_info.value_or(nullptr),
+ y);
+ if (r < 0) {
+ return r;
+ }
+ } else if (ret == -EEXIST) {
+ /* well, if it's exclusive we shouldn't overwrite it, because we might race with another
+ * bucket operation on this specific bucket (e.g., being synced from the master), but
+ * since bucket instance meta object is unique for this specific bucket instance, we don't
+ * need to return an error.
+ * A scenario where we'd get -EEXIST here, is in a multi-zone config, we're not on the
+ * master, creating a bucket, sending bucket creation to the master, we create the bucket
+ * locally, while in the sync thread we sync the new bucket.
+ */
+ ret = 0;
+ }
+
+ if (ret < 0) {
+ return ret;
+ }
+
+ return ret;
+}
+
+int RGWSI_Bucket_SObj::remove_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx,
+ const string& key,
+ const RGWBucketInfo& info,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp)
+{
+ RGWSI_MBSObj_RemoveParams params;
+ int ret = svc.meta_be->remove_entry(dpp, ctx.get(), key, params, objv_tracker, y);
+
+ if (ret < 0 &&
+ ret != -ENOENT) {
+ return ret;
+ }
+
+ int r = svc.bucket_sync->handle_bi_removal(dpp, info, y);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to update bucket instance sync index: r=" << r << dendl;
+ /* returning success as index is just keeping hints, so will keep extra hints,
+ * but bucket removal succeeded
+ */
+ }
+
+ return 0;
+}
+
+int RGWSI_Bucket_SObj::read_bucket_stats(const RGWBucketInfo& bucket_info,
+ RGWBucketEnt *ent,
+ optional_yield y,
+ const DoutPrefixProvider *dpp)
+{
+ ent->count = 0;
+ ent->size = 0;
+ ent->size_rounded = 0;
+
+ vector<rgw_bucket_dir_header> headers;
+
+ int r = svc.bi->read_stats(dpp, bucket_info, ent, y);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): read_stats returned r=" << r << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWSI_Bucket_SObj::read_bucket_stats(RGWSI_Bucket_X_Ctx& ctx,
+ const rgw_bucket& bucket,
+ RGWBucketEnt *ent,
+ optional_yield y,
+ const DoutPrefixProvider *dpp)
+{
+ RGWBucketInfo bucket_info;
+ int ret = read_bucket_info(ctx, bucket, &bucket_info, nullptr, nullptr, boost::none, y, dpp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return read_bucket_stats(bucket_info, ent, y, dpp);
+}
+
+int RGWSI_Bucket_SObj::read_buckets_stats(RGWSI_Bucket_X_Ctx& ctx,
+ map<string, RGWBucketEnt>& m,
+ optional_yield y,
+ const DoutPrefixProvider *dpp)
+{
+ map<string, RGWBucketEnt>::iterator iter;
+ for (iter = m.begin(); iter != m.end(); ++iter) {
+ RGWBucketEnt& ent = iter->second;
+ int r = read_bucket_stats(ctx, ent.bucket, &ent, y, dpp);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): read_bucket_stats returned r=" << r << dendl;
+ return r;
+ }
+ }
+
+ return m.size();
+}
diff --git a/src/rgw/services/svc_bucket_sobj.h b/src/rgw/services/svc_bucket_sobj.h
new file mode 100644
index 000000000..8e9fe063c
--- /dev/null
+++ b/src/rgw/services/svc_bucket_sobj.h
@@ -0,0 +1,180 @@
+
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#pragma once
+
+#include "rgw_service.h"
+
+#include "svc_meta_be.h"
+#include "svc_bucket_types.h"
+#include "svc_bucket.h"
+#include "svc_bucket_sync.h"
+
+class RGWSI_Zone;
+class RGWSI_SysObj;
+class RGWSI_SysObj_Cache;
+class RGWSI_Meta;
+class RGWSI_SyncModules;
+
+struct rgw_cache_entry_info;
+
+template <class T>
+class RGWChainedCacheImpl;
+
+class RGWSI_Bucket_SObj : public RGWSI_Bucket
+{
+ struct bucket_info_cache_entry {
+ RGWBucketInfo info;
+ real_time mtime;
+ std::map<std::string, bufferlist> attrs;
+ };
+
+ using RGWChainedCacheImpl_bucket_info_cache_entry = RGWChainedCacheImpl<bucket_info_cache_entry>;
+ std::unique_ptr<RGWChainedCacheImpl_bucket_info_cache_entry> binfo_cache;
+
+ RGWSI_Bucket_BE_Handler ep_be_handler;
+ std::unique_ptr<RGWSI_MetaBackend::Module> ep_be_module;
+ RGWSI_BucketInstance_BE_Handler bi_be_handler;
+ std::unique_ptr<RGWSI_MetaBackend::Module> bi_be_module;
+
+ int do_start(optional_yield, const DoutPrefixProvider *dpp) override;
+
+ int do_read_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx,
+ const std::string& key,
+ RGWBucketInfo *info,
+ real_time *pmtime,
+ std::map<std::string, bufferlist> *pattrs,
+ rgw_cache_entry_info *cache_info,
+ boost::optional<obj_version> refresh_version,
+ optional_yield y,
+ const DoutPrefixProvider *dpp);
+
+ int read_bucket_stats(const RGWBucketInfo& bucket_info,
+ RGWBucketEnt *ent,
+ optional_yield y,
+ const DoutPrefixProvider *dpp);
+
+public:
+ struct Svc {
+ RGWSI_Bucket_SObj *bucket{nullptr};
+ RGWSI_BucketIndex *bi{nullptr};
+ RGWSI_Zone *zone{nullptr};
+ RGWSI_SysObj *sysobj{nullptr};
+ RGWSI_SysObj_Cache *cache{nullptr};
+ RGWSI_Meta *meta{nullptr};
+ RGWSI_MetaBackend *meta_be{nullptr};
+ RGWSI_SyncModules *sync_modules{nullptr};
+ RGWSI_Bucket_Sync *bucket_sync{nullptr};
+ } svc;
+
+ RGWSI_Bucket_SObj(CephContext *cct);
+ ~RGWSI_Bucket_SObj();
+
+ RGWSI_Bucket_BE_Handler& get_ep_be_handler() override {
+ return ep_be_handler;
+ }
+
+ RGWSI_BucketInstance_BE_Handler& get_bi_be_handler() override {
+ return bi_be_handler;
+ }
+
+ void init(RGWSI_Zone *_zone_svc,
+ RGWSI_SysObj *_sysobj_svc,
+ RGWSI_SysObj_Cache *_cache_svc,
+ RGWSI_BucketIndex *_bi,
+ RGWSI_Meta *_meta_svc,
+ RGWSI_MetaBackend *_meta_be_svc,
+ RGWSI_SyncModules *_sync_modules_svc,
+ RGWSI_Bucket_Sync *_bucket_sync_svc);
+
+
+ int read_bucket_entrypoint_info(RGWSI_Bucket_EP_Ctx& ctx,
+ const std::string& key,
+ RGWBucketEntryPoint *entry_point,
+ RGWObjVersionTracker *objv_tracker,
+ real_time *pmtime,
+ std::map<std::string, bufferlist> *pattrs,
+ optional_yield y,
+ const DoutPrefixProvider *dpp,
+ rgw_cache_entry_info *cache_info = nullptr,
+ boost::optional<obj_version> refresh_version = boost::none) override;
+
+ int store_bucket_entrypoint_info(RGWSI_Bucket_EP_Ctx& ctx,
+ const std::string& key,
+ RGWBucketEntryPoint& info,
+ bool exclusive,
+ real_time mtime,
+ std::map<std::string, bufferlist> *pattrs,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) override;
+
+ int remove_bucket_entrypoint_info(RGWSI_Bucket_EP_Ctx& ctx,
+ const std::string& key,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) override;
+
+ int read_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx,
+ const std::string& key,
+ RGWBucketInfo *info,
+ real_time *pmtime,
+ std::map<std::string, bufferlist> *pattrs,
+ optional_yield y,
+ const DoutPrefixProvider *dpp,
+ rgw_cache_entry_info *cache_info = nullptr,
+ boost::optional<obj_version> refresh_version = boost::none) override;
+
+ int read_bucket_info(RGWSI_Bucket_X_Ctx& ep_ctx,
+ const rgw_bucket& bucket,
+ RGWBucketInfo *info,
+ real_time *pmtime,
+ std::map<std::string, bufferlist> *pattrs,
+ boost::optional<obj_version> refresh_version,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) override;
+
+ int store_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx,
+ const std::string& key,
+ RGWBucketInfo& info,
+ std::optional<RGWBucketInfo *> orig_info, /* nullopt: orig_info was not fetched,
+ nullptr: orig_info was not found (new bucket instance */
+ bool exclusive,
+ real_time mtime,
+ std::map<std::string, bufferlist> *pattrs,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) override;
+
+ int remove_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx,
+ const std::string& key,
+ const RGWBucketInfo& bucket_info,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) override;
+
+ int read_bucket_stats(RGWSI_Bucket_X_Ctx& ctx,
+ const rgw_bucket& bucket,
+ RGWBucketEnt *ent,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) override;
+
+ int read_buckets_stats(RGWSI_Bucket_X_Ctx& ctx,
+ std::map<std::string, RGWBucketEnt>& m,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) override;
+};
+
diff --git a/src/rgw/services/svc_bucket_sync.h b/src/rgw/services/svc_bucket_sync.h
new file mode 100644
index 000000000..7975e062b
--- /dev/null
+++ b/src/rgw/services/svc_bucket_sync.h
@@ -0,0 +1,55 @@
+
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#pragma once
+
+#include "rgw_service.h"
+
+#include "svc_bucket_types.h"
+
+class RGWBucketSyncPolicyHandler;
+using RGWBucketSyncPolicyHandlerRef = std::shared_ptr<RGWBucketSyncPolicyHandler>;
+
+
+class RGWSI_Bucket_Sync : public RGWServiceInstance
+{
+public:
+ RGWSI_Bucket_Sync(CephContext *cct) : RGWServiceInstance(cct) {}
+
+ virtual int get_policy_handler(RGWSI_Bucket_X_Ctx& ctx,
+ std::optional<rgw_zone_id> zone,
+ std::optional<rgw_bucket> bucket,
+ RGWBucketSyncPolicyHandlerRef *handler,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) = 0;
+
+ virtual int handle_bi_update(const DoutPrefixProvider *dpp,
+ RGWBucketInfo& bucket_info,
+ RGWBucketInfo *orig_bucket_info,
+ optional_yield y) = 0;
+ virtual int handle_bi_removal(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ optional_yield y) = 0;
+
+ virtual int get_bucket_sync_hints(const DoutPrefixProvider *dpp,
+ const rgw_bucket& bucket,
+ std::set<rgw_bucket> *sources,
+ std::set<rgw_bucket> *dests,
+ optional_yield y) = 0;
+};
+
+
diff --git a/src/rgw/services/svc_bucket_sync_sobj.cc b/src/rgw/services/svc_bucket_sync_sobj.cc
new file mode 100644
index 000000000..ea3398a3f
--- /dev/null
+++ b/src/rgw/services/svc_bucket_sync_sobj.cc
@@ -0,0 +1,903 @@
+#include "svc_bucket_sync_sobj.h"
+#include "svc_zone.h"
+#include "svc_sys_obj_cache.h"
+#include "svc_bucket_sobj.h"
+
+#include "rgw_bucket_sync.h"
+#include "rgw_zone.h"
+#include "rgw_sync_policy.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+using namespace std;
+
+static string bucket_sync_sources_oid_prefix = "bucket.sync-source-hints";
+static string bucket_sync_targets_oid_prefix = "bucket.sync-target-hints";
+
+class RGWSI_Bucket_Sync_SObj_HintIndexManager {
+ CephContext *cct;
+
+ struct {
+ RGWSI_Zone *zone;
+ RGWSI_SysObj *sysobj;
+ } svc;
+
+public:
+ RGWSI_Bucket_Sync_SObj_HintIndexManager(RGWSI_Zone *_zone_svc,
+ RGWSI_SysObj *_sysobj_svc) {
+ svc.zone = _zone_svc;
+ svc.sysobj = _sysobj_svc;
+
+ cct = svc.zone->ctx();
+ }
+
+ rgw_raw_obj get_sources_obj(const rgw_bucket& bucket) const;
+ rgw_raw_obj get_dests_obj(const rgw_bucket& bucket) const;
+
+ template <typename C1, typename C2>
+ int update_hints(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ C1& added_dests,
+ C2& removed_dests,
+ C1& added_sources,
+ C2& removed_sources,
+ optional_yield y);
+};
+
+RGWSI_Bucket_Sync_SObj::RGWSI_Bucket_Sync_SObj(CephContext *cct) : RGWSI_Bucket_Sync(cct) {
+}
+RGWSI_Bucket_Sync_SObj::~RGWSI_Bucket_Sync_SObj() {
+}
+
+void RGWSI_Bucket_Sync_SObj::init(RGWSI_Zone *_zone_svc,
+ RGWSI_SysObj *_sysobj_svc,
+ RGWSI_SysObj_Cache *_cache_svc,
+ RGWSI_Bucket_SObj *bucket_sobj_svc)
+{
+ svc.zone = _zone_svc;
+ svc.sysobj = _sysobj_svc;
+ svc.cache = _cache_svc;
+ svc.bucket_sobj = bucket_sobj_svc;
+
+ hint_index_mgr.reset(new RGWSI_Bucket_Sync_SObj_HintIndexManager(svc.zone, svc.sysobj));
+}
+
+int RGWSI_Bucket_Sync_SObj::do_start(optional_yield, const DoutPrefixProvider *dpp)
+{
+ sync_policy_cache.reset(new RGWChainedCacheImpl<bucket_sync_policy_cache_entry>);
+ sync_policy_cache->init(svc.cache);
+
+ return 0;
+}
+
+void RGWSI_Bucket_Sync_SObj::get_hint_entities(RGWSI_Bucket_X_Ctx& ctx,
+ const std::set<rgw_zone_id>& zones,
+ const std::set<rgw_bucket>& buckets,
+ std::set<rgw_sync_bucket_entity> *hint_entities,
+ optional_yield y, const DoutPrefixProvider *dpp)
+{
+ vector<rgw_bucket> hint_buckets;
+
+ hint_buckets.reserve(buckets.size());
+
+ for (auto& b : buckets) {
+ RGWBucketInfo hint_bucket_info;
+ int ret = svc.bucket_sobj->read_bucket_info(ctx, b, &hint_bucket_info,
+ nullptr, nullptr, boost::none,
+ y, dpp);
+ if (ret < 0) {
+ ldpp_dout(dpp, 20) << "could not init bucket info for hint bucket=" << b << " ... skipping" << dendl;
+ continue;
+ }
+
+ hint_buckets.emplace_back(std::move(hint_bucket_info.bucket));
+ }
+
+ for (auto& zone : zones) {
+ for (auto& b : hint_buckets) {
+ hint_entities->insert(rgw_sync_bucket_entity(zone, b));
+ }
+ }
+}
+
+int RGWSI_Bucket_Sync_SObj::resolve_policy_hints(RGWSI_Bucket_X_Ctx& ctx,
+ rgw_sync_bucket_entity& self_entity,
+ RGWBucketSyncPolicyHandlerRef& handler,
+ RGWBucketSyncPolicyHandlerRef& zone_policy_handler,
+ std::map<optional_zone_bucket, RGWBucketSyncPolicyHandlerRef>& temp_map,
+ optional_yield y,
+ const DoutPrefixProvider *dpp)
+{
+ set<rgw_zone_id> source_zones;
+ set<rgw_zone_id> target_zones;
+
+ zone_policy_handler->reflect(dpp, nullptr, nullptr,
+ nullptr, nullptr,
+ &source_zones,
+ &target_zones,
+ false); /* relaxed: also get all zones that we allow to sync to/from */
+
+ std::set<rgw_sync_bucket_entity> hint_entities;
+
+ get_hint_entities(ctx, source_zones, handler->get_source_hints(), &hint_entities, y, dpp);
+ get_hint_entities(ctx, target_zones, handler->get_target_hints(), &hint_entities, y, dpp);
+
+ std::set<rgw_sync_bucket_pipe> resolved_sources;
+ std::set<rgw_sync_bucket_pipe> resolved_dests;
+
+ for (auto& hint_entity : hint_entities) {
+ if (!hint_entity.zone ||
+ !hint_entity.bucket) {
+ continue; /* shouldn't really happen */
+ }
+
+ auto& zid = *hint_entity.zone;
+ auto& hint_bucket = *hint_entity.bucket;
+
+ RGWBucketSyncPolicyHandlerRef hint_bucket_handler;
+
+ auto iter = temp_map.find(optional_zone_bucket(zid, hint_bucket));
+ if (iter != temp_map.end()) {
+ hint_bucket_handler = iter->second;
+ } else {
+ int r = do_get_policy_handler(ctx, zid, hint_bucket, temp_map, &hint_bucket_handler, y, dpp);
+ if (r < 0) {
+ ldpp_dout(dpp, 20) << "could not get bucket sync policy handler for hint bucket=" << hint_bucket << " ... skipping" << dendl;
+ continue;
+ }
+ }
+
+ hint_bucket_handler->get_pipes(&resolved_dests,
+ &resolved_sources,
+ self_entity); /* flipping resolved dests and sources as these are
+ relative to the remote entity */
+ }
+
+ handler->set_resolved_hints(std::move(resolved_sources), std::move(resolved_dests));
+
+ return 0;
+}
+
+int RGWSI_Bucket_Sync_SObj::do_get_policy_handler(RGWSI_Bucket_X_Ctx& ctx,
+ std::optional<rgw_zone_id> zone,
+ std::optional<rgw_bucket> _bucket,
+ std::map<optional_zone_bucket, RGWBucketSyncPolicyHandlerRef>& temp_map,
+ RGWBucketSyncPolicyHandlerRef *handler,
+ optional_yield y,
+ const DoutPrefixProvider *dpp)
+{
+ if (!_bucket) {
+ *handler = svc.zone->get_sync_policy_handler(zone);
+ return 0;
+ }
+
+ auto bucket = *_bucket;
+
+ if (bucket.bucket_id.empty()) {
+ RGWBucketEntryPoint ep_info;
+ int ret = svc.bucket_sobj->read_bucket_entrypoint_info(ctx.ep,
+ RGWSI_Bucket::get_entrypoint_meta_key(bucket),
+ &ep_info,
+ nullptr, /* objv_tracker */
+ nullptr, /* mtime */
+ nullptr, /* attrs */
+ y,
+ dpp,
+ nullptr, /* cache_info */
+ boost::none /* refresh_version */);
+ if (ret < 0) {
+ if (ret != -ENOENT) {
+ ldout(cct, 0) << "ERROR: svc.bucket->read_bucket_info(bucket=" << bucket << ") returned r=" << ret << dendl;
+ }
+ return ret;
+ }
+
+ bucket = ep_info.bucket;
+ }
+
+ string zone_key;
+ string bucket_key;
+
+ if (zone && *zone != svc.zone->zone_id()) {
+ zone_key = zone->id;
+ }
+
+ bucket_key = RGWSI_Bucket::get_bi_meta_key(bucket);
+
+ string cache_key("bi/" + zone_key + "/" + bucket_key);
+
+ if (auto e = sync_policy_cache->find(cache_key)) {
+ *handler = e->handler;
+ return 0;
+ }
+
+ bucket_sync_policy_cache_entry e;
+ rgw_cache_entry_info cache_info;
+
+ RGWBucketInfo bucket_info;
+ map<string, bufferlist> attrs;
+
+ int r = svc.bucket_sobj->read_bucket_instance_info(ctx.bi,
+ bucket_key,
+ &bucket_info,
+ nullptr,
+ &attrs,
+ y,
+ dpp,
+ &cache_info);
+ if (r < 0) {
+ if (r != -ENOENT) {
+ ldpp_dout(dpp, 0) << "ERROR: svc.bucket->read_bucket_instance_info(key=" << bucket_key << ") returned r=" << r << dendl;
+ }
+ return r;
+ }
+
+ auto zone_policy_handler = svc.zone->get_sync_policy_handler(zone);
+ if (!zone_policy_handler) {
+ ldpp_dout(dpp, 20) << "ERROR: could not find policy handler for zone=" << zone << dendl;
+ return -ENOENT;
+ }
+
+ e.handler.reset(zone_policy_handler->alloc_child(bucket_info, std::move(attrs)));
+
+ r = e.handler->init(dpp, y);
+ if (r < 0) {
+ ldpp_dout(dpp, 20) << "ERROR: failed to init bucket sync policy handler: r=" << r << dendl;
+ return r;
+ }
+
+ temp_map.emplace(optional_zone_bucket{zone, bucket}, e.handler);
+
+ rgw_sync_bucket_entity self_entity(zone.value_or(svc.zone->zone_id()), bucket);
+
+ r = resolve_policy_hints(ctx, self_entity,
+ e.handler,
+ zone_policy_handler,
+ temp_map, y, dpp);
+ if (r < 0) {
+ ldpp_dout(dpp, 20) << "ERROR: failed to resolve policy hints: bucket_key=" << bucket_key << ", r=" << r << dendl;
+ return r;
+ }
+
+ if (!sync_policy_cache->put(dpp, svc.cache, cache_key, &e, {&cache_info})) {
+ ldpp_dout(dpp, 20) << "couldn't put bucket_sync_policy cache entry, might have raced with data changes" << dendl;
+ }
+
+ *handler = e.handler;
+
+ return 0;
+}
+
+int RGWSI_Bucket_Sync_SObj::get_policy_handler(RGWSI_Bucket_X_Ctx& ctx,
+ std::optional<rgw_zone_id> zone,
+ std::optional<rgw_bucket> _bucket,
+ RGWBucketSyncPolicyHandlerRef *handler,
+ optional_yield y,
+ const DoutPrefixProvider *dpp)
+{
+ std::map<optional_zone_bucket, RGWBucketSyncPolicyHandlerRef> temp_map;
+ return do_get_policy_handler(ctx, zone, _bucket, temp_map, handler, y, dpp);
+}
+
+static bool diff_sets(std::set<rgw_bucket>& orig_set,
+ std::set<rgw_bucket>& new_set,
+ vector<rgw_bucket> *added,
+ vector<rgw_bucket> *removed)
+{
+ auto oiter = orig_set.begin();
+ auto niter = new_set.begin();
+
+ while (oiter != orig_set.end() &&
+ niter != new_set.end()) {
+ if (*oiter == *niter) {
+ ++oiter;
+ ++niter;
+ continue;
+ } else if (*oiter < *niter) {
+ removed->push_back(*oiter);
+ ++oiter;
+ } else {
+ added->push_back(*niter);
+ ++niter;
+ }
+ }
+ for (; oiter != orig_set.end(); ++oiter) {
+ removed->push_back(*oiter);
+ }
+ for (; niter != new_set.end(); ++niter) {
+ added->push_back(*niter);
+ }
+
+ return !(removed->empty() && added->empty());
+}
+
+
+class RGWSI_BS_SObj_HintIndexObj
+{
+ friend class RGWSI_Bucket_Sync_SObj;
+
+ CephContext *cct;
+ struct {
+ RGWSI_SysObj *sysobj;
+ } svc;
+
+ rgw_raw_obj obj;
+ RGWSysObj sysobj;
+
+ RGWObjVersionTracker ot;
+
+ bool has_data{false};
+
+public:
+ struct bi_entry {
+ rgw_bucket bucket;
+ map<rgw_bucket /* info_source */, obj_version> sources;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(bucket, bl);
+ encode(sources, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(bucket, bl);
+ decode(sources, bl);
+ DECODE_FINISH(bl);
+ }
+
+ bool add(const rgw_bucket& info_source,
+ const obj_version& info_source_ver) {
+ auto& ver = sources[info_source];
+
+ if (ver == info_source_ver) { /* already updated */
+ return false;
+ }
+
+ if (info_source_ver.tag == ver.tag &&
+ info_source_ver.ver < ver.ver) {
+ return false;
+ }
+
+ ver = info_source_ver;
+
+ return true;
+ }
+
+ bool remove(const rgw_bucket& info_source,
+ const obj_version& info_source_ver) {
+ auto iter = sources.find(info_source);
+ if (iter == sources.end()) {
+ return false;
+ }
+
+ auto& ver = iter->second;
+
+ if (info_source_ver.tag == ver.tag &&
+ info_source_ver.ver < ver.ver) {
+ return false;
+ }
+
+ sources.erase(info_source);
+ return true;
+ }
+
+ bool empty() const {
+ return sources.empty();
+ }
+ };
+
+ struct single_instance_info {
+ map<rgw_bucket, bi_entry> entries;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(entries, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(entries, bl);
+ DECODE_FINISH(bl);
+ }
+
+ bool add_entry(const rgw_bucket& info_source,
+ const obj_version& info_source_ver,
+ const rgw_bucket& bucket) {
+ auto& entry = entries[bucket];
+
+ if (!entry.add(info_source, info_source_ver)) {
+ return false;
+ }
+
+ entry.bucket = bucket;
+
+ return true;
+ }
+
+ bool remove_entry(const rgw_bucket& info_source,
+ const obj_version& info_source_ver,
+ const rgw_bucket& bucket) {
+ auto iter = entries.find(bucket);
+ if (iter == entries.end()) {
+ return false;
+ }
+
+ if (!iter->second.remove(info_source, info_source_ver)) {
+ return false;
+ }
+
+ if (iter->second.empty()) {
+ entries.erase(iter);
+ }
+
+ return true;
+ }
+
+ void clear() {
+ entries.clear();
+ }
+
+ bool empty() const {
+ return entries.empty();
+ }
+
+ void get_entities(std::set<rgw_bucket> *result) const {
+ for (auto& iter : entries) {
+ result->insert(iter.first);
+ }
+ }
+ };
+
+ struct info_map {
+ map<rgw_bucket, single_instance_info> instances;
+
+ void encode(bufferlist& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(instances, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator& bl) {
+ DECODE_START(1, bl);
+ decode(instances, bl);
+ DECODE_FINISH(bl);
+ }
+
+ bool empty() const {
+ return instances.empty();
+ }
+
+ void clear() {
+ instances.clear();
+ }
+
+ void get_entities(const rgw_bucket& bucket,
+ std::set<rgw_bucket> *result) const {
+ auto iter = instances.find(bucket);
+ if (iter == instances.end()) {
+ return;
+ }
+ iter->second.get_entities(result);
+ }
+ } info;
+
+ RGWSI_BS_SObj_HintIndexObj(RGWSI_SysObj *_sysobj_svc,
+ const rgw_raw_obj& _obj) : cct(_sysobj_svc->ctx()),
+ obj(_obj),
+ sysobj(_sysobj_svc->get_obj(obj))
+ {
+ svc.sysobj = _sysobj_svc;
+ }
+
+ template <typename C1, typename C2>
+ int update(const DoutPrefixProvider *dpp,
+ const rgw_bucket& entity,
+ const RGWBucketInfo& info_source,
+ C1 *add,
+ C2 *remove,
+ optional_yield y);
+
+private:
+ template <typename C1, typename C2>
+ void update_entries(const rgw_bucket& info_source,
+ const obj_version& info_source_ver,
+ C1 *add,
+ C2 *remove,
+ single_instance_info *instance);
+
+ int read(const DoutPrefixProvider *dpp, optional_yield y);
+ int flush(const DoutPrefixProvider *dpp, optional_yield y);
+
+ void invalidate() {
+ has_data = false;
+ info.clear();
+ }
+
+ void get_entities(const rgw_bucket& bucket,
+ std::set<rgw_bucket> *result) const {
+ info.get_entities(bucket, result);
+ }
+};
+WRITE_CLASS_ENCODER(RGWSI_BS_SObj_HintIndexObj::bi_entry)
+WRITE_CLASS_ENCODER(RGWSI_BS_SObj_HintIndexObj::single_instance_info)
+WRITE_CLASS_ENCODER(RGWSI_BS_SObj_HintIndexObj::info_map)
+
+template <typename C1, typename C2>
+int RGWSI_BS_SObj_HintIndexObj::update(const DoutPrefixProvider *dpp,
+ const rgw_bucket& entity,
+ const RGWBucketInfo& info_source,
+ C1 *add,
+ C2 *remove,
+ optional_yield y)
+{
+ int r = 0;
+
+ auto& info_source_ver = info_source.objv_tracker.read_version;
+
+#define MAX_RETRIES 25
+
+ for (int i = 0; i < MAX_RETRIES; ++i) {
+ if (!has_data) {
+ r = read(dpp, y);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: cannot update hint index: failed to read: r=" << r << dendl;
+ return r;
+ }
+ }
+
+ auto& instance = info.instances[entity];
+
+ update_entries(info_source.bucket,
+ info_source_ver,
+ add, remove,
+ &instance);
+
+ if (instance.empty()) {
+ info.instances.erase(entity);
+ }
+
+ r = flush(dpp, y);
+ if (r >= 0) {
+ return 0;
+ }
+
+ if (r != -ECANCELED) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to flush hint index: obj=" << obj << " r=" << r << dendl;
+ return r;
+ }
+
+ invalidate();
+ }
+ ldpp_dout(dpp, 0) << "ERROR: failed to flush hint index: too many retries (obj=" << obj << "), likely a bug" << dendl;
+
+ return -EIO;
+}
+
+template <typename C1, typename C2>
+void RGWSI_BS_SObj_HintIndexObj::update_entries(const rgw_bucket& info_source,
+ const obj_version& info_source_ver,
+ C1 *add,
+ C2 *remove,
+ single_instance_info *instance)
+{
+ if (remove) {
+ for (auto& bucket : *remove) {
+ instance->remove_entry(info_source, info_source_ver, bucket);
+ }
+ }
+
+ if (add) {
+ for (auto& bucket : *add) {
+ instance->add_entry(info_source, info_source_ver, bucket);
+ }
+ }
+}
+
+int RGWSI_BS_SObj_HintIndexObj::read(const DoutPrefixProvider *dpp, optional_yield y) {
+ RGWObjVersionTracker _ot;
+ bufferlist bl;
+ int r = sysobj.rop()
+ .set_objv_tracker(&_ot) /* forcing read of current version */
+ .read(dpp, &bl, y);
+ if (r < 0 && r != -ENOENT) {
+ ldpp_dout(dpp, 0) << "ERROR: failed reading data (obj=" << obj << "), r=" << r << dendl;
+ return r;
+ }
+
+ ot = _ot;
+
+ if (r >= 0) {
+ auto iter = bl.cbegin();
+ try {
+ decode(info, iter);
+ has_data = true;
+ } catch (buffer::error& err) {
+ ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): failed to decode entries, ignoring" << dendl;
+ info.clear();
+ }
+ } else {
+ info.clear();
+ }
+
+ return 0;
+}
+
+int RGWSI_BS_SObj_HintIndexObj::flush(const DoutPrefixProvider *dpp, optional_yield y) {
+ int r;
+
+ if (!info.empty()) {
+ bufferlist bl;
+ encode(info, bl);
+
+ r = sysobj.wop()
+ .set_objv_tracker(&ot) /* forcing read of current version */
+ .write(dpp, bl, y);
+
+ } else { /* remove */
+ r = sysobj.wop()
+ .set_objv_tracker(&ot)
+ .remove(dpp, y);
+ }
+
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+rgw_raw_obj RGWSI_Bucket_Sync_SObj_HintIndexManager::get_sources_obj(const rgw_bucket& bucket) const
+{
+ rgw_bucket b = bucket;
+ b.bucket_id.clear();
+ return rgw_raw_obj(svc.zone->get_zone_params().log_pool,
+ bucket_sync_sources_oid_prefix + "." + b.get_key());
+}
+
+rgw_raw_obj RGWSI_Bucket_Sync_SObj_HintIndexManager::get_dests_obj(const rgw_bucket& bucket) const
+{
+ rgw_bucket b = bucket;
+ b.bucket_id.clear();
+ return rgw_raw_obj(svc.zone->get_zone_params().log_pool,
+ bucket_sync_targets_oid_prefix + "." + b.get_key());
+}
+
+template <typename C1, typename C2>
+int RGWSI_Bucket_Sync_SObj_HintIndexManager::update_hints(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ C1& added_dests,
+ C2& removed_dests,
+ C1& added_sources,
+ C2& removed_sources,
+ optional_yield y)
+{
+ C1 self_entity = { bucket_info.bucket };
+
+ if (!added_dests.empty() ||
+ !removed_dests.empty()) {
+ /* update our dests */
+ RGWSI_BS_SObj_HintIndexObj index(svc.sysobj,
+ get_dests_obj(bucket_info.bucket));
+ int r = index.update(dpp, bucket_info.bucket,
+ bucket_info,
+ &added_dests,
+ &removed_dests,
+ y);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to update targets index for bucket=" << bucket_info.bucket << " r=" << r << dendl;
+ return r;
+ }
+
+ /* update dest buckets */
+ for (auto& dest_bucket : added_dests) {
+ RGWSI_BS_SObj_HintIndexObj dep_index(svc.sysobj,
+ get_sources_obj(dest_bucket));
+ int r = dep_index.update(dpp, dest_bucket,
+ bucket_info,
+ &self_entity,
+ static_cast<C2 *>(nullptr),
+ y);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to update targets index for bucket=" << dest_bucket << " r=" << r << dendl;
+ return r;
+ }
+ }
+ /* update removed dest buckets */
+ for (auto& dest_bucket : removed_dests) {
+ RGWSI_BS_SObj_HintIndexObj dep_index(svc.sysobj,
+ get_sources_obj(dest_bucket));
+ int r = dep_index.update(dpp, dest_bucket,
+ bucket_info,
+ static_cast<C1 *>(nullptr),
+ &self_entity,
+ y);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to update targets index for bucket=" << dest_bucket << " r=" << r << dendl;
+ return r;
+ }
+ }
+ }
+
+ if (!added_sources.empty() ||
+ !removed_sources.empty()) {
+ RGWSI_BS_SObj_HintIndexObj index(svc.sysobj,
+ get_sources_obj(bucket_info.bucket));
+ /* update our sources */
+ int r = index.update(dpp, bucket_info.bucket,
+ bucket_info,
+ &added_sources,
+ &removed_sources,
+ y);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to update targets index for bucket=" << bucket_info.bucket << " r=" << r << dendl;
+ return r;
+ }
+
+ /* update added sources buckets */
+ for (auto& source_bucket : added_sources) {
+ RGWSI_BS_SObj_HintIndexObj dep_index(svc.sysobj,
+ get_dests_obj(source_bucket));
+ int r = dep_index.update(dpp, source_bucket,
+ bucket_info,
+ &self_entity,
+ static_cast<C2 *>(nullptr),
+ y);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to update targets index for bucket=" << source_bucket << " r=" << r << dendl;
+ return r;
+ }
+ }
+ /* update removed dest buckets */
+ for (auto& source_bucket : removed_sources) {
+ RGWSI_BS_SObj_HintIndexObj dep_index(svc.sysobj,
+ get_dests_obj(source_bucket));
+ int r = dep_index.update(dpp, source_bucket,
+ bucket_info,
+ static_cast<C1 *>(nullptr),
+ &self_entity,
+ y);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to update targets index for bucket=" << source_bucket << " r=" << r << dendl;
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int RGWSI_Bucket_Sync_SObj::handle_bi_removal(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ optional_yield y)
+{
+ std::set<rgw_bucket> sources_set;
+ std::set<rgw_bucket> dests_set;
+
+ if (bucket_info.sync_policy) {
+ bucket_info.sync_policy->get_potential_related_buckets(bucket_info.bucket,
+ &sources_set,
+ &dests_set);
+ }
+
+ std::vector<rgw_bucket> removed_sources;
+ removed_sources.reserve(sources_set.size());
+ for (auto& e : sources_set) {
+ removed_sources.push_back(e);
+ }
+
+ std::vector<rgw_bucket> removed_dests;
+ removed_dests.reserve(dests_set.size());
+ for (auto& e : dests_set) {
+ removed_dests.push_back(e);
+ }
+
+ std::vector<rgw_bucket> added_sources;
+ std::vector<rgw_bucket> added_dests;
+
+ return hint_index_mgr->update_hints(dpp, bucket_info,
+ added_dests,
+ removed_dests,
+ added_sources,
+ removed_sources,
+ y);
+}
+
+int RGWSI_Bucket_Sync_SObj::handle_bi_update(const DoutPrefixProvider *dpp,
+ RGWBucketInfo& bucket_info,
+ RGWBucketInfo *orig_bucket_info,
+ optional_yield y)
+{
+ std::set<rgw_bucket> orig_sources;
+ std::set<rgw_bucket> orig_dests;
+
+ if (orig_bucket_info &&
+ orig_bucket_info->sync_policy) {
+ orig_bucket_info->sync_policy->get_potential_related_buckets(bucket_info.bucket,
+ &orig_sources,
+ &orig_dests);
+ }
+
+ std::set<rgw_bucket> sources;
+ std::set<rgw_bucket> dests;
+ if (bucket_info.sync_policy) {
+ bucket_info.sync_policy->get_potential_related_buckets(bucket_info.bucket,
+ &sources,
+ &dests);
+ }
+
+ std::vector<rgw_bucket> removed_sources;
+ std::vector<rgw_bucket> added_sources;
+ bool found = diff_sets(orig_sources, sources, &added_sources, &removed_sources);
+ ldpp_dout(dpp, 20) << __func__ << "(): bucket=" << bucket_info.bucket << ": orig_sources=" << orig_sources << " new_sources=" << sources << dendl;
+ ldpp_dout(dpp, 20) << __func__ << "(): bucket=" << bucket_info.bucket << ": potential sources added=" << added_sources << " removed=" << removed_sources << dendl;
+
+ std::vector<rgw_bucket> removed_dests;
+ std::vector<rgw_bucket> added_dests;
+ found = found || diff_sets(orig_dests, dests, &added_dests, &removed_dests);
+
+ ldpp_dout(dpp, 20) << __func__ << "(): bucket=" << bucket_info.bucket << ": orig_dests=" << orig_dests << " new_dests=" << dests << dendl;
+ ldpp_dout(dpp, 20) << __func__ << "(): bucket=" << bucket_info.bucket << ": potential dests added=" << added_dests << " removed=" << removed_dests << dendl;
+
+ if (!found) {
+ return 0;
+ }
+
+ return hint_index_mgr->update_hints(dpp, bucket_info,
+ dests, /* set all dests, not just the ones that were added */
+ removed_dests,
+ sources, /* set all sources, not just that the ones that were added */
+ removed_sources,
+ y);
+}
+
+int RGWSI_Bucket_Sync_SObj::get_bucket_sync_hints(const DoutPrefixProvider *dpp,
+ const rgw_bucket& bucket,
+ std::set<rgw_bucket> *sources,
+ std::set<rgw_bucket> *dests,
+ optional_yield y)
+{
+ if (!sources && !dests) {
+ return 0;
+ }
+
+ if (sources) {
+ RGWSI_BS_SObj_HintIndexObj index(svc.sysobj,
+ hint_index_mgr->get_sources_obj(bucket));
+ int r = index.read(dpp, y);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to update sources index for bucket=" << bucket << " r=" << r << dendl;
+ return r;
+ }
+
+ index.get_entities(bucket, sources);
+
+ if (!bucket.bucket_id.empty()) {
+ rgw_bucket b = bucket;
+ b.bucket_id.clear();
+ index.get_entities(b, sources);
+ }
+ }
+
+ if (dests) {
+ RGWSI_BS_SObj_HintIndexObj index(svc.sysobj,
+ hint_index_mgr->get_dests_obj(bucket));
+ int r = index.read(dpp, y);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to read targets index for bucket=" << bucket << " r=" << r << dendl;
+ return r;
+ }
+
+ index.get_entities(bucket, dests);
+
+ if (!bucket.bucket_id.empty()) {
+ rgw_bucket b = bucket;
+ b.bucket_id.clear();
+ index.get_entities(b, dests);
+ }
+ }
+
+ return 0;
+}
diff --git a/src/rgw/services/svc_bucket_sync_sobj.h b/src/rgw/services/svc_bucket_sync_sobj.h
new file mode 100644
index 000000000..779df7b99
--- /dev/null
+++ b/src/rgw/services/svc_bucket_sync_sobj.h
@@ -0,0 +1,123 @@
+
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#pragma once
+
+#include "rgw_service.h"
+
+#include "svc_meta_be.h"
+#include "svc_bucket_sync.h"
+
+class RGWSI_Zone;
+class RGWSI_SysObj_Cache;
+class RGWSI_Bucket_SObj;
+
+template <class T>
+class RGWChainedCacheImpl;
+
+class RGWSI_Bucket_Sync_SObj_HintIndexManager;
+
+struct rgw_sync_bucket_entity;
+
+class RGWSI_Bucket_Sync_SObj : public RGWSI_Bucket_Sync
+{
+ struct bucket_sync_policy_cache_entry {
+ std::shared_ptr<RGWBucketSyncPolicyHandler> handler;
+ };
+
+ std::unique_ptr<RGWChainedCacheImpl<bucket_sync_policy_cache_entry> > sync_policy_cache;
+
+ std::unique_ptr<RGWSI_Bucket_Sync_SObj_HintIndexManager> hint_index_mgr;
+
+ int do_start(optional_yield, const DoutPrefixProvider *dpp) override;
+
+ struct optional_zone_bucket {
+ std::optional<rgw_zone_id> zone;
+ std::optional<rgw_bucket> bucket;
+
+ optional_zone_bucket(const std::optional<rgw_zone_id>& _zone,
+ const std::optional<rgw_bucket>& _bucket) : zone(_zone), bucket(_bucket) {}
+
+ bool operator<(const optional_zone_bucket& ozb) const {
+ if (zone < ozb.zone) {
+ return true;
+ }
+ if (zone > ozb.zone) {
+ return false;
+ }
+ return bucket < ozb.bucket;
+ }
+ };
+
+ void get_hint_entities(RGWSI_Bucket_X_Ctx& ctx,
+ const std::set<rgw_zone_id>& zone_names,
+ const std::set<rgw_bucket>& buckets,
+ std::set<rgw_sync_bucket_entity> *hint_entities,
+ optional_yield y, const DoutPrefixProvider *);
+ int resolve_policy_hints(RGWSI_Bucket_X_Ctx& ctx,
+ rgw_sync_bucket_entity& self_entity,
+ RGWBucketSyncPolicyHandlerRef& handler,
+ RGWBucketSyncPolicyHandlerRef& zone_policy_handler,
+ std::map<optional_zone_bucket, RGWBucketSyncPolicyHandlerRef>& temp_map,
+ optional_yield y,
+ const DoutPrefixProvider *dpp);
+ int do_get_policy_handler(RGWSI_Bucket_X_Ctx& ctx,
+ std::optional<rgw_zone_id> zone,
+ std::optional<rgw_bucket> _bucket,
+ std::map<optional_zone_bucket, RGWBucketSyncPolicyHandlerRef>& temp_map,
+ RGWBucketSyncPolicyHandlerRef *handler,
+ optional_yield y,
+ const DoutPrefixProvider *dpp);
+public:
+ struct Svc {
+ RGWSI_Zone *zone{nullptr};
+ RGWSI_SysObj *sysobj{nullptr};
+ RGWSI_SysObj_Cache *cache{nullptr};
+ RGWSI_Bucket_SObj *bucket_sobj{nullptr};
+ } svc;
+
+ RGWSI_Bucket_Sync_SObj(CephContext *cct);
+ ~RGWSI_Bucket_Sync_SObj();
+
+ void init(RGWSI_Zone *_zone_svc,
+ RGWSI_SysObj *_sysobj_svc,
+ RGWSI_SysObj_Cache *_cache_svc,
+ RGWSI_Bucket_SObj *_bucket_sobj_svc);
+
+
+ int get_policy_handler(RGWSI_Bucket_X_Ctx& ctx,
+ std::optional<rgw_zone_id> zone,
+ std::optional<rgw_bucket> bucket,
+ RGWBucketSyncPolicyHandlerRef *handler,
+ optional_yield y,
+ const DoutPrefixProvider *dpp);
+
+ int handle_bi_update(const DoutPrefixProvider *dpp,
+ RGWBucketInfo& bucket_info,
+ RGWBucketInfo *orig_bucket_info,
+ optional_yield y) override;
+ int handle_bi_removal(const DoutPrefixProvider *dpp,
+ const RGWBucketInfo& bucket_info,
+ optional_yield y) override;
+
+ int get_bucket_sync_hints(const DoutPrefixProvider *dpp,
+ const rgw_bucket& bucket,
+ std::set<rgw_bucket> *sources,
+ std::set<rgw_bucket> *dests,
+ optional_yield y) override;
+};
+
diff --git a/src/rgw/services/svc_bucket_types.h b/src/rgw/services/svc_bucket_types.h
new file mode 100644
index 000000000..30e5309d5
--- /dev/null
+++ b/src/rgw/services/svc_bucket_types.h
@@ -0,0 +1,38 @@
+
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#pragma once
+
+#include "common/ptr_wrapper.h"
+
+#include "svc_meta_be.h"
+#include "svc_meta_be_types.h"
+
+class RGWSI_MetaBackend_Handler;
+
+using RGWSI_Bucket_BE_Handler = ptr_wrapper<RGWSI_MetaBackend_Handler, RGWSI_META_BE_TYPES::BUCKET>;
+using RGWSI_BucketInstance_BE_Handler = ptr_wrapper<RGWSI_MetaBackend_Handler, RGWSI_META_BE_TYPES::BI>;
+
+
+using RGWSI_Bucket_EP_Ctx = ptr_wrapper<RGWSI_MetaBackend::Context, RGWSI_META_BE_TYPES::BUCKET>;
+using RGWSI_Bucket_BI_Ctx = ptr_wrapper<RGWSI_MetaBackend::Context, RGWSI_META_BE_TYPES::BI>;
+
+struct RGWSI_Bucket_X_Ctx {
+ RGWSI_Bucket_EP_Ctx ep;
+ RGWSI_Bucket_BI_Ctx bi;
+};
+
diff --git a/src/rgw/services/svc_cls.cc b/src/rgw/services/svc_cls.cc
new file mode 100644
index 000000000..342146bfe
--- /dev/null
+++ b/src/rgw/services/svc_cls.cc
@@ -0,0 +1,478 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+
+#include "svc_cls.h"
+#include "svc_rados.h"
+#include "svc_zone.h"
+
+#include "rgw_zone.h"
+
+#include "cls/otp/cls_otp_client.h"
+#include "cls/log/cls_log_client.h"
+#include "cls/lock/cls_lock_client.h"
+
+
+#define dout_subsys ceph_subsys_rgw
+
+using namespace std;
+
+static string log_lock_name = "rgw_log_lock";
+
+int RGWSI_Cls::do_start(optional_yield y, const DoutPrefixProvider *dpp)
+{
+ int r = mfa.do_start(y, dpp);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to start mfa service" << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWSI_Cls::MFA::get_mfa_obj(const DoutPrefixProvider *dpp, const rgw_user& user, std::optional<RGWSI_RADOS::Obj> *obj)
+{
+ string oid = get_mfa_oid(user);
+ rgw_raw_obj o(zone_svc->get_zone_params().otp_pool, oid);
+
+ obj->emplace(rados_svc->obj(o));
+ int r = (*obj)->open(dpp);
+ if (r < 0) {
+ ldpp_dout(dpp, 4) << "failed to open rados context for " << o << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWSI_Cls::MFA::get_mfa_ref(const DoutPrefixProvider *dpp, const rgw_user& user, rgw_rados_ref *ref)
+{
+ std::optional<RGWSI_RADOS::Obj> obj;
+ int r = get_mfa_obj(dpp, user, &obj);
+ if (r < 0) {
+ return r;
+ }
+ *ref = obj->get_ref();
+ return 0;
+}
+
+int RGWSI_Cls::MFA::check_mfa(const DoutPrefixProvider *dpp, const rgw_user& user, const string& otp_id, const string& pin, optional_yield y)
+{
+ rgw_rados_ref ref;
+ int r = get_mfa_ref(dpp, user, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ rados::cls::otp::otp_check_t result;
+
+ r = rados::cls::otp::OTP::check(cct, ref.pool.ioctx(), ref.obj.oid, otp_id, pin, &result);
+ if (r < 0)
+ return r;
+
+ ldpp_dout(dpp, 20) << "OTP check, otp_id=" << otp_id << " result=" << (int)result.result << dendl;
+
+ return (result.result == rados::cls::otp::OTP_CHECK_SUCCESS ? 0 : -EACCES);
+}
+
+void RGWSI_Cls::MFA::prepare_mfa_write(librados::ObjectWriteOperation *op,
+ RGWObjVersionTracker *objv_tracker,
+ const ceph::real_time& mtime)
+{
+ RGWObjVersionTracker ot;
+
+ if (objv_tracker) {
+ ot = *objv_tracker;
+ }
+
+ if (ot.write_version.tag.empty()) {
+ if (ot.read_version.tag.empty()) {
+ ot.generate_new_write_ver(cct);
+ } else {
+ ot.write_version = ot.read_version;
+ ot.write_version.ver++;
+ }
+ }
+
+ ot.prepare_op_for_write(op);
+ struct timespec mtime_ts = real_clock::to_timespec(mtime);
+ op->mtime2(&mtime_ts);
+}
+
+int RGWSI_Cls::MFA::create_mfa(const DoutPrefixProvider *dpp, const rgw_user& user, const rados::cls::otp::otp_info_t& config,
+ RGWObjVersionTracker *objv_tracker, const ceph::real_time& mtime, optional_yield y)
+{
+ std::optional<RGWSI_RADOS::Obj> obj;
+ int r = get_mfa_obj(dpp, user, &obj);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+ prepare_mfa_write(&op, objv_tracker, mtime);
+ rados::cls::otp::OTP::create(&op, config);
+ r = obj->operate(dpp, &op, y);
+ if (r < 0) {
+ ldpp_dout(dpp, 20) << "OTP create, otp_id=" << config.id << " result=" << (int)r << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWSI_Cls::MFA::remove_mfa(const DoutPrefixProvider *dpp,
+ const rgw_user& user, const string& id,
+ RGWObjVersionTracker *objv_tracker,
+ const ceph::real_time& mtime,
+ optional_yield y)
+{
+ std::optional<RGWSI_RADOS::Obj> obj;
+ int r = get_mfa_obj(dpp, user, &obj);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+ prepare_mfa_write(&op, objv_tracker, mtime);
+ rados::cls::otp::OTP::remove(&op, id);
+ r = obj->operate(dpp, &op, y);
+ if (r < 0) {
+ ldpp_dout(dpp, 20) << "OTP remove, otp_id=" << id << " result=" << (int)r << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWSI_Cls::MFA::get_mfa(const DoutPrefixProvider *dpp, const rgw_user& user, const string& id, rados::cls::otp::otp_info_t *result,
+ optional_yield y)
+{
+ rgw_rados_ref ref;
+
+ int r = get_mfa_ref(dpp, user, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ r = rados::cls::otp::OTP::get(nullptr, ref.pool.ioctx(), ref.obj.oid, id, result);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWSI_Cls::MFA::list_mfa(const DoutPrefixProvider *dpp, const rgw_user& user, list<rados::cls::otp::otp_info_t> *result,
+ optional_yield y)
+{
+ rgw_rados_ref ref;
+
+ int r = get_mfa_ref(dpp, user, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ r = rados::cls::otp::OTP::get_all(nullptr, ref.pool.ioctx(), ref.obj.oid, result);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWSI_Cls::MFA::otp_get_current_time(const DoutPrefixProvider *dpp, const rgw_user& user, ceph::real_time *result,
+ optional_yield y)
+{
+ rgw_rados_ref ref;
+
+ int r = get_mfa_ref(dpp, user, &ref);
+ if (r < 0) {
+ return r;
+ }
+
+ r = rados::cls::otp::OTP::get_current_time(ref.pool.ioctx(), ref.obj.oid, result);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWSI_Cls::MFA::set_mfa(const DoutPrefixProvider *dpp, const string& oid, const list<rados::cls::otp::otp_info_t>& entries,
+ bool reset_obj, RGWObjVersionTracker *objv_tracker,
+ const real_time& mtime,
+ optional_yield y)
+{
+ rgw_raw_obj o(zone_svc->get_zone_params().otp_pool, oid);
+ auto obj = rados_svc->obj(o);
+ int r = obj.open(dpp);
+ if (r < 0) {
+ ldpp_dout(dpp, 4) << "failed to open rados context for " << o << dendl;
+ return r;
+ }
+ librados::ObjectWriteOperation op;
+ if (reset_obj) {
+ op.remove();
+ op.set_op_flags2(LIBRADOS_OP_FLAG_FAILOK);
+ op.create(false);
+ }
+ prepare_mfa_write(&op, objv_tracker, mtime);
+ rados::cls::otp::OTP::set(&op, entries);
+ r = obj.operate(dpp, &op, y);
+ if (r < 0) {
+ ldpp_dout(dpp, 20) << "OTP set entries.size()=" << entries.size() << " result=" << (int)r << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWSI_Cls::MFA::list_mfa(const DoutPrefixProvider *dpp, const string& oid, list<rados::cls::otp::otp_info_t> *result,
+ RGWObjVersionTracker *objv_tracker, ceph::real_time *pmtime,
+ optional_yield y)
+{
+ rgw_raw_obj o(zone_svc->get_zone_params().otp_pool, oid);
+ auto obj = rados_svc->obj(o);
+ int r = obj.open(dpp);
+ if (r < 0) {
+ ldpp_dout(dpp, 4) << "failed to open rados context for " << o << dendl;
+ return r;
+ }
+ auto& ref = obj.get_ref();
+ librados::ObjectReadOperation op;
+ struct timespec mtime_ts;
+ if (pmtime) {
+ op.stat2(nullptr, &mtime_ts, nullptr);
+ }
+ objv_tracker->prepare_op_for_read(&op);
+ r = rados::cls::otp::OTP::get_all(&op, ref.pool.ioctx(), ref.obj.oid, result);
+ if (r < 0) {
+ return r;
+ }
+ if (pmtime) {
+ *pmtime = ceph::real_clock::from_timespec(mtime_ts);
+ }
+
+ return 0;
+}
+
+void RGWSI_Cls::TimeLog::prepare_entry(cls_log_entry& entry,
+ const real_time& ut,
+ const string& section,
+ const string& key,
+ bufferlist& bl)
+{
+ cls_log_add_prepare_entry(entry, utime_t(ut), section, key, bl);
+}
+
+int RGWSI_Cls::TimeLog::init_obj(const DoutPrefixProvider *dpp, const string& oid, RGWSI_RADOS::Obj& obj)
+{
+ rgw_raw_obj o(zone_svc->get_zone_params().log_pool, oid);
+ obj = rados_svc->obj(o);
+ return obj.open(dpp);
+
+}
+int RGWSI_Cls::TimeLog::add(const DoutPrefixProvider *dpp,
+ const string& oid,
+ const real_time& ut,
+ const string& section,
+ const string& key,
+ bufferlist& bl,
+ optional_yield y)
+{
+ RGWSI_RADOS::Obj obj;
+
+ int r = init_obj(dpp, oid, obj);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+ utime_t t(ut);
+ cls_log_add(op, t, section, key, bl);
+
+ return obj.operate(dpp, &op, y);
+}
+
+int RGWSI_Cls::TimeLog::add(const DoutPrefixProvider *dpp,
+ const string& oid,
+ std::list<cls_log_entry>& entries,
+ librados::AioCompletion *completion,
+ bool monotonic_inc,
+ optional_yield y)
+{
+ RGWSI_RADOS::Obj obj;
+
+ int r = init_obj(dpp, oid, obj);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+ cls_log_add(op, entries, monotonic_inc);
+
+ if (!completion) {
+ r = obj.operate(dpp, &op, y);
+ } else {
+ r = obj.aio_operate(completion, &op);
+ }
+ return r;
+}
+
+int RGWSI_Cls::TimeLog::list(const DoutPrefixProvider *dpp,
+ const string& oid,
+ const real_time& start_time,
+ const real_time& end_time,
+ int max_entries, std::list<cls_log_entry>& entries,
+ const string& marker,
+ string *out_marker,
+ bool *truncated,
+ optional_yield y)
+{
+ RGWSI_RADOS::Obj obj;
+
+ int r = init_obj(dpp, oid, obj);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::ObjectReadOperation op;
+
+ utime_t st(start_time);
+ utime_t et(end_time);
+
+ cls_log_list(op, st, et, marker, max_entries, entries,
+ out_marker, truncated);
+
+ bufferlist obl;
+
+ int ret = obj.operate(dpp, &op, &obl, y);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWSI_Cls::TimeLog::info(const DoutPrefixProvider *dpp,
+ const string& oid,
+ cls_log_header *header,
+ optional_yield y)
+{
+ RGWSI_RADOS::Obj obj;
+
+ int r = init_obj(dpp, oid, obj);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::ObjectReadOperation op;
+
+ cls_log_info(op, header);
+
+ bufferlist obl;
+
+ int ret = obj.operate(dpp, &op, &obl, y);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWSI_Cls::TimeLog::info_async(const DoutPrefixProvider *dpp,
+ RGWSI_RADOS::Obj& obj,
+ const string& oid,
+ cls_log_header *header,
+ librados::AioCompletion *completion)
+{
+ int r = init_obj(dpp, oid, obj);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::ObjectReadOperation op;
+
+ cls_log_info(op, header);
+
+ int ret = obj.aio_operate(completion, &op, nullptr);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWSI_Cls::TimeLog::trim(const DoutPrefixProvider *dpp,
+ const string& oid,
+ const real_time& start_time,
+ const real_time& end_time,
+ const string& from_marker,
+ const string& to_marker,
+ librados::AioCompletion *completion,
+ optional_yield y)
+{
+ RGWSI_RADOS::Obj obj;
+
+ int r = init_obj(dpp, oid, obj);
+ if (r < 0) {
+ return r;
+ }
+
+ utime_t st(start_time);
+ utime_t et(end_time);
+
+ librados::ObjectWriteOperation op;
+ cls_log_trim(op, st, et, from_marker, to_marker);
+
+ if (!completion) {
+ r = obj.operate(dpp, &op, y);
+ } else {
+ r = obj.aio_operate(completion, &op);
+ }
+ return r;
+}
+
+int RGWSI_Cls::Lock::lock_exclusive(const DoutPrefixProvider *dpp,
+ const rgw_pool& pool,
+ const string& oid,
+ timespan& duration,
+ string& zone_id,
+ string& owner_id,
+ std::optional<string> lock_name)
+{
+ auto p = rados_svc->pool(pool);
+ int r = p.open(dpp);
+ if (r < 0) {
+ return r;
+ }
+
+ uint64_t msec = std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
+ utime_t ut(msec / 1000, msec % 1000);
+
+ rados::cls::lock::Lock l(lock_name.value_or(log_lock_name));
+ l.set_duration(ut);
+ l.set_cookie(owner_id);
+ l.set_tag(zone_id);
+ l.set_may_renew(true);
+
+ return l.lock_exclusive(&p.ioctx(), oid);
+}
+
+int RGWSI_Cls::Lock::unlock(const DoutPrefixProvider *dpp,
+ const rgw_pool& pool,
+ const string& oid,
+ string& zone_id,
+ string& owner_id,
+ std::optional<string> lock_name)
+{
+ auto p = rados_svc->pool(pool);
+ int r = p.open(dpp);
+ if (r < 0) {
+ return r;
+ }
+
+ rados::cls::lock::Lock l(lock_name.value_or(log_lock_name));
+ l.set_tag(zone_id);
+ l.set_cookie(owner_id);
+
+ return l.unlock(&p.ioctx(), oid);
+}
+
diff --git a/src/rgw/services/svc_cls.h b/src/rgw/services/svc_cls.h
new file mode 100644
index 000000000..d1d1d659b
--- /dev/null
+++ b/src/rgw/services/svc_cls.h
@@ -0,0 +1,166 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#pragma once
+
+#include "cls/otp/cls_otp_types.h"
+#include "cls/log/cls_log_types.h"
+
+#include "rgw_service.h"
+
+#include "svc_rados.h"
+
+
+class RGWSI_Cls : public RGWServiceInstance
+{
+ RGWSI_Zone *zone_svc{nullptr};
+ RGWSI_RADOS *rados_svc{nullptr};
+
+ class ClsSubService : public RGWServiceInstance {
+ friend class RGWSI_Cls;
+
+ RGWSI_Cls *cls_svc{nullptr};
+ RGWSI_Zone *zone_svc{nullptr};
+ RGWSI_RADOS *rados_svc{nullptr};
+
+ void init(RGWSI_Cls *_cls_svc, RGWSI_Zone *_zone_svc, RGWSI_RADOS *_rados_svc) {
+ cls_svc = _cls_svc;
+ zone_svc = _cls_svc->zone_svc;
+ rados_svc = _cls_svc->rados_svc;
+ }
+
+ public:
+ ClsSubService(CephContext *cct) : RGWServiceInstance(cct) {}
+ };
+
+public:
+ class MFA : public ClsSubService {
+ int get_mfa_obj(const DoutPrefixProvider *dpp, const rgw_user& user, std::optional<RGWSI_RADOS::Obj> *obj);
+ int get_mfa_ref(const DoutPrefixProvider *dpp, const rgw_user& user, rgw_rados_ref *ref);
+
+ void prepare_mfa_write(librados::ObjectWriteOperation *op,
+ RGWObjVersionTracker *objv_tracker,
+ const ceph::real_time& mtime);
+
+ public:
+ MFA(CephContext *cct): ClsSubService(cct) {}
+
+ std::string get_mfa_oid(const rgw_user& user) {
+ return std::string("user:") + user.to_str();
+ }
+
+ int check_mfa(const DoutPrefixProvider *dpp, const rgw_user& user, const std::string& otp_id, const std::string& pin, optional_yield y);
+ int create_mfa(const DoutPrefixProvider *dpp, const rgw_user& user, const rados::cls::otp::otp_info_t& config,
+ RGWObjVersionTracker *objv_tracker, const ceph::real_time& mtime, optional_yield y);
+ int remove_mfa(const DoutPrefixProvider *dpp,
+ const rgw_user& user, const std::string& id,
+ RGWObjVersionTracker *objv_tracker,
+ const ceph::real_time& mtime,
+ optional_yield y);
+ int get_mfa(const DoutPrefixProvider *dpp, const rgw_user& user, const std::string& id, rados::cls::otp::otp_info_t *result, optional_yield y);
+ int list_mfa(const DoutPrefixProvider *dpp, const rgw_user& user, std::list<rados::cls::otp::otp_info_t> *result, optional_yield y);
+ int otp_get_current_time(const DoutPrefixProvider *dpp, const rgw_user& user, ceph::real_time *result, optional_yield y);
+ int set_mfa(const DoutPrefixProvider *dpp, const std::string& oid, const std::list<rados::cls::otp::otp_info_t>& entries,
+ bool reset_obj, RGWObjVersionTracker *objv_tracker,
+ const real_time& mtime, optional_yield y);
+ int list_mfa(const DoutPrefixProvider *dpp, const std::string& oid, std::list<rados::cls::otp::otp_info_t> *result,
+ RGWObjVersionTracker *objv_tracker, ceph::real_time *pmtime, optional_yield y);
+ } mfa;
+
+ class TimeLog : public ClsSubService {
+ int init_obj(const DoutPrefixProvider *dpp, const std::string& oid, RGWSI_RADOS::Obj& obj);
+ public:
+ TimeLog(CephContext *cct): ClsSubService(cct) {}
+
+ void prepare_entry(cls_log_entry& entry,
+ const real_time& ut,
+ const std::string& section,
+ const std::string& key,
+ bufferlist& bl);
+ int add(const DoutPrefixProvider *dpp,
+ const std::string& oid,
+ const real_time& ut,
+ const std::string& section,
+ const std::string& key,
+ bufferlist& bl,
+ optional_yield y);
+ int add(const DoutPrefixProvider *dpp,
+ const std::string& oid,
+ std::list<cls_log_entry>& entries,
+ librados::AioCompletion *completion,
+ bool monotonic_inc,
+ optional_yield y);
+ int list(const DoutPrefixProvider *dpp,
+ const std::string& oid,
+ const real_time& start_time,
+ const real_time& end_time,
+ int max_entries, std::list<cls_log_entry>& entries,
+ const std::string& marker,
+ std::string *out_marker,
+ bool *truncated,
+ optional_yield y);
+ int info(const DoutPrefixProvider *dpp,
+ const std::string& oid,
+ cls_log_header *header,
+ optional_yield y);
+ int info_async(const DoutPrefixProvider *dpp,
+ RGWSI_RADOS::Obj& obj,
+ const std::string& oid,
+ cls_log_header *header,
+ librados::AioCompletion *completion);
+ int trim(const DoutPrefixProvider *dpp,
+ const std::string& oid,
+ const real_time& start_time,
+ const real_time& end_time,
+ const std::string& from_marker,
+ const std::string& to_marker,
+ librados::AioCompletion *completion,
+ optional_yield y);
+ } timelog;
+
+ class Lock : public ClsSubService {
+ int init_obj(const std::string& oid, RGWSI_RADOS::Obj& obj);
+ public:
+ Lock(CephContext *cct): ClsSubService(cct) {}
+ int lock_exclusive(const DoutPrefixProvider *dpp,
+ const rgw_pool& pool,
+ const std::string& oid,
+ timespan& duration,
+ std::string& zone_id,
+ std::string& owner_id,
+ std::optional<std::string> lock_name = std::nullopt);
+ int unlock(const DoutPrefixProvider *dpp,
+ const rgw_pool& pool,
+ const std::string& oid,
+ std::string& zone_id,
+ std::string& owner_id,
+ std::optional<std::string> lock_name = std::nullopt);
+ } lock;
+
+ RGWSI_Cls(CephContext *cct): RGWServiceInstance(cct), mfa(cct), timelog(cct), lock(cct) {}
+
+ void init(RGWSI_Zone *_zone_svc, RGWSI_RADOS *_rados_svc) {
+ rados_svc = _rados_svc;
+ zone_svc = _zone_svc;
+
+ mfa.init(this, zone_svc, rados_svc);
+ timelog.init(this, zone_svc, rados_svc);
+ lock.init(this, zone_svc, rados_svc);
+ }
+
+ int do_start(optional_yield, const DoutPrefixProvider *dpp) override;
+};
+
diff --git a/src/rgw/services/svc_config_key.h b/src/rgw/services/svc_config_key.h
new file mode 100644
index 000000000..1c068b795
--- /dev/null
+++ b/src/rgw/services/svc_config_key.h
@@ -0,0 +1,31 @@
+
+
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#pragma once
+
+#include "rgw_service.h"
+
+class RGWSI_ConfigKey : public RGWServiceInstance
+{
+public:
+ RGWSI_ConfigKey(CephContext *cct) : RGWServiceInstance(cct) {}
+ virtual ~RGWSI_ConfigKey() {}
+
+ virtual int get(const std::string& key, bool secure, bufferlist *result) = 0;
+};
+
diff --git a/src/rgw/services/svc_config_key_rados.cc b/src/rgw/services/svc_config_key_rados.cc
new file mode 100644
index 000000000..5edb02ea7
--- /dev/null
+++ b/src/rgw/services/svc_config_key_rados.cc
@@ -0,0 +1,50 @@
+
+#include "svc_rados.h"
+#include "svc_config_key_rados.h"
+
+using namespace std;
+
+RGWSI_ConfigKey_RADOS::~RGWSI_ConfigKey_RADOS(){}
+
+int RGWSI_ConfigKey_RADOS::do_start(optional_yield, const DoutPrefixProvider *dpp)
+{
+ maybe_insecure_mon_conn = !svc.rados->check_secure_mon_conn(dpp);
+
+ return 0;
+}
+
+void RGWSI_ConfigKey_RADOS::warn_if_insecure()
+{
+ if (!maybe_insecure_mon_conn ||
+ warned_insecure.test_and_set()) {
+ return;
+ }
+
+ string s = "rgw is configured to optionally allow insecure connections to the monitors (auth_supported, ms_mon_client_mode), ssl certificates stored at the monitor configuration could leak";
+
+ svc.rados->clog_warn(s);
+
+ lderr(ctx()) << __func__ << "(): WARNING: " << s << dendl;
+}
+
+int RGWSI_ConfigKey_RADOS::get(const string& key, bool secure, bufferlist *result)
+{
+ string cmd =
+ "{"
+ "\"prefix\": \"config-key get\", "
+ "\"key\": \"" + key + "\""
+ "}";
+
+ bufferlist inbl;
+ auto handle = svc.rados->handle();
+ int ret = handle.mon_command(cmd, inbl, result, nullptr);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (secure) {
+ warn_if_insecure();
+ }
+
+ return 0;
+}
diff --git a/src/rgw/services/svc_config_key_rados.h b/src/rgw/services/svc_config_key_rados.h
new file mode 100644
index 000000000..b3b995ac7
--- /dev/null
+++ b/src/rgw/services/svc_config_key_rados.h
@@ -0,0 +1,54 @@
+
+
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#pragma once
+
+#include <atomic>
+
+#include "rgw_service.h"
+
+#include "svc_config_key.h"
+
+class RGWSI_RADOS;
+
+class RGWSI_ConfigKey_RADOS : public RGWSI_ConfigKey
+{
+ bool maybe_insecure_mon_conn{false};
+ std::atomic_flag warned_insecure = ATOMIC_FLAG_INIT;
+
+ int do_start(optional_yield, const DoutPrefixProvider *dpp) override;
+
+ void warn_if_insecure();
+
+public:
+ struct Svc {
+ RGWSI_RADOS *rados{nullptr};
+ } svc;
+
+ void init(RGWSI_RADOS *rados_svc) {
+ svc.rados = rados_svc;
+ }
+
+ RGWSI_ConfigKey_RADOS(CephContext *cct) : RGWSI_ConfigKey(cct) {}
+
+ virtual ~RGWSI_ConfigKey_RADOS() override;
+
+ int get(const std::string& key, bool secure, bufferlist *result) override;
+};
+
+
diff --git a/src/rgw/services/svc_finisher.cc b/src/rgw/services/svc_finisher.cc
new file mode 100644
index 000000000..4883c7c50
--- /dev/null
+++ b/src/rgw/services/svc_finisher.cc
@@ -0,0 +1,58 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include "common/Finisher.h"
+
+#include "svc_finisher.h"
+
+using namespace std;
+
+int RGWSI_Finisher::do_start(optional_yield, const DoutPrefixProvider *dpp)
+{
+ finisher = new Finisher(cct);
+ finisher->start();
+
+ return 0;
+}
+
+void RGWSI_Finisher::shutdown()
+{
+ if (finalized) {
+ return;
+ }
+
+ if (finisher) {
+ finisher->stop();
+
+ map<int, ShutdownCB *> cbs;
+ cbs.swap(shutdown_cbs); /* move cbs out, in case caller unregisters */
+ for (auto& iter : cbs) {
+ iter.second->call();
+ }
+ delete finisher;
+ }
+
+ finalized = true;
+}
+
+RGWSI_Finisher::~RGWSI_Finisher()
+{
+ shutdown();
+}
+
+void RGWSI_Finisher::register_caller(ShutdownCB *cb, int *phandle)
+{
+ *phandle = ++handles_counter;
+ shutdown_cbs[*phandle] = cb;
+}
+
+void RGWSI_Finisher::unregister_caller(int handle)
+{
+ shutdown_cbs.erase(handle);
+}
+
+void RGWSI_Finisher::schedule_context(Context *c)
+{
+ finisher->queue(c);
+}
+
diff --git a/src/rgw/services/svc_finisher.h b/src/rgw/services/svc_finisher.h
new file mode 100644
index 000000000..911b48f2b
--- /dev/null
+++ b/src/rgw/services/svc_finisher.h
@@ -0,0 +1,44 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#pragma once
+
+#include "rgw_service.h"
+
+class Context;
+class Finisher;
+
+class RGWSI_Finisher : public RGWServiceInstance
+{
+ friend struct RGWServices_Def;
+public:
+ class ShutdownCB;
+
+private:
+ Finisher *finisher{nullptr};
+ bool finalized{false};
+
+ void shutdown() override;
+
+ std::map<int, ShutdownCB *> shutdown_cbs;
+ std::atomic<int> handles_counter{0};
+
+protected:
+ void init() {}
+ int do_start(optional_yield y, const DoutPrefixProvider *dpp) override;
+
+public:
+ RGWSI_Finisher(CephContext *cct): RGWServiceInstance(cct) {}
+ ~RGWSI_Finisher();
+
+ class ShutdownCB {
+ public:
+ virtual ~ShutdownCB() {}
+ virtual void call() = 0;
+ };
+
+ void register_caller(ShutdownCB *cb, int *phandle);
+ void unregister_caller(int handle);
+
+ void schedule_context(Context *c);
+};
diff --git a/src/rgw/services/svc_mdlog.cc b/src/rgw/services/svc_mdlog.cc
new file mode 100644
index 000000000..09a68d3d7
--- /dev/null
+++ b/src/rgw/services/svc_mdlog.cc
@@ -0,0 +1,549 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include "svc_mdlog.h"
+#include "svc_rados.h"
+#include "svc_zone.h"
+#include "svc_sys_obj.h"
+
+#include "rgw_tools.h"
+#include "rgw_mdlog.h"
+#include "rgw_coroutine.h"
+#include "rgw_cr_rados.h"
+#include "rgw_zone.h"
+
+#include "common/errno.h"
+
+#include <boost/asio/yield.hpp>
+
+#define dout_subsys ceph_subsys_rgw
+
+using namespace std;
+
+using Svc = RGWSI_MDLog::Svc;
+using Cursor = RGWPeriodHistory::Cursor;
+
+RGWSI_MDLog::RGWSI_MDLog(CephContext *cct, bool _run_sync) : RGWServiceInstance(cct), run_sync(_run_sync) {
+}
+
+RGWSI_MDLog::~RGWSI_MDLog() {
+}
+
+int RGWSI_MDLog::init(RGWSI_RADOS *_rados_svc, RGWSI_Zone *_zone_svc, RGWSI_SysObj *_sysobj_svc, RGWSI_Cls *_cls_svc)
+{
+ svc.zone = _zone_svc;
+ svc.sysobj = _sysobj_svc;
+ svc.mdlog = this;
+ svc.rados = _rados_svc;
+ svc.cls = _cls_svc;
+
+ return 0;
+}
+
+int RGWSI_MDLog::do_start(optional_yield y, const DoutPrefixProvider *dpp)
+{
+ auto& current_period = svc.zone->get_current_period();
+
+ current_log = get_log(current_period.get_id());
+
+ period_puller.reset(new RGWPeriodPuller(svc.zone, svc.sysobj));
+ period_history.reset(new RGWPeriodHistory(cct, period_puller.get(),
+ current_period));
+
+ if (run_sync &&
+ svc.zone->need_to_sync()) {
+ // initialize the log period history
+ svc.mdlog->init_oldest_log_period(y, dpp);
+ }
+ return 0;
+}
+
+int RGWSI_MDLog::read_history(RGWMetadataLogHistory *state,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) const
+{
+ auto& pool = svc.zone->get_zone_params().log_pool;
+ const auto& oid = RGWMetadataLogHistory::oid;
+ bufferlist bl;
+ int ret = rgw_get_system_obj(svc.sysobj, pool, oid, bl, objv_tracker, nullptr, y, dpp);
+ if (ret < 0) {
+ return ret;
+ }
+ if (bl.length() == 0) {
+ /* bad history object, remove it */
+ rgw_raw_obj obj(pool, oid);
+ auto sysobj = svc.sysobj->get_obj(obj);
+ ret = sysobj.wop().remove(dpp, y);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: meta history is empty, but cannot remove it (" << cpp_strerror(-ret) << ")" << dendl;
+ return ret;
+ }
+ return -ENOENT;
+ }
+ try {
+ auto p = bl.cbegin();
+ state->decode(p);
+ } catch (buffer::error& e) {
+ ldpp_dout(dpp, 1) << "failed to decode the mdlog history: "
+ << e.what() << dendl;
+ return -EIO;
+ }
+ return 0;
+}
+
+int RGWSI_MDLog::write_history(const DoutPrefixProvider *dpp,
+ const RGWMetadataLogHistory& state,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y, bool exclusive)
+{
+ bufferlist bl;
+ state.encode(bl);
+
+ auto& pool = svc.zone->get_zone_params().log_pool;
+ const auto& oid = RGWMetadataLogHistory::oid;
+ return rgw_put_system_obj(dpp, svc.sysobj, pool, oid, bl,
+ exclusive, objv_tracker, real_time{}, y);
+}
+
+namespace mdlog {
+
+using Cursor = RGWPeriodHistory::Cursor;
+
+namespace {
+template <class T>
+class SysObjReadCR : public RGWSimpleCoroutine {
+ const DoutPrefixProvider *dpp;
+ RGWAsyncRadosProcessor *async_rados;
+ RGWSI_SysObj *svc;
+
+ rgw_raw_obj obj;
+ T *result;
+ /// on ENOENT, call handle_data() with an empty object instead of failing
+ const bool empty_on_enoent;
+ RGWObjVersionTracker *objv_tracker;
+ RGWAsyncGetSystemObj *req{nullptr};
+
+public:
+ SysObjReadCR(const DoutPrefixProvider *_dpp,
+ RGWAsyncRadosProcessor *_async_rados, RGWSI_SysObj *_svc,
+ const rgw_raw_obj& _obj,
+ T *_result, bool empty_on_enoent = true,
+ RGWObjVersionTracker *objv_tracker = nullptr)
+ : RGWSimpleCoroutine(_svc->ctx()), dpp(_dpp), async_rados(_async_rados), svc(_svc),
+ obj(_obj), result(_result),
+ empty_on_enoent(empty_on_enoent), objv_tracker(objv_tracker) {}
+
+ ~SysObjReadCR() override {
+ try {
+ request_cleanup();
+ } catch (const boost::container::length_error_t& e) {
+ ldpp_dout(dpp, 0) << "ERROR: " << __func__ <<
+ ": reference counted object mismatched, \"" << e.what() <<
+ "\"" << dendl;
+ }
+ }
+
+ void request_cleanup() override {
+ if (req) {
+ req->finish();
+ req = NULL;
+ }
+ }
+
+ int send_request(const DoutPrefixProvider *dpp) {
+ req = new RGWAsyncGetSystemObj(dpp, this, stack->create_completion_notifier(), svc,
+ objv_tracker, obj, false, false);
+ async_rados->queue(req);
+ return 0;
+ }
+
+ int request_complete() {
+ int ret = req->get_ret_status();
+ retcode = ret;
+ if (ret == -ENOENT && empty_on_enoent) {
+ *result = T();
+ } else {
+ if (ret < 0) {
+ return ret;
+ }
+ if (objv_tracker) { // copy the updated version
+ *objv_tracker = req->objv_tracker;
+ }
+ try {
+ auto iter = req->bl.cbegin();
+ if (iter.end()) {
+ // allow successful reads with empty buffers. ReadSyncStatus
+ // coroutines depend on this to be able to read without
+ // locking, because the cls lock from InitSyncStatus will
+ // create an empty object if it didn't exist
+ *result = T();
+ } else {
+ decode(*result, iter);
+ }
+ } catch (buffer::error& err) {
+ return -EIO;
+ }
+ }
+ return handle_data(*result);
+ }
+
+ virtual int handle_data(T& data) {
+ return 0;
+ }
+};
+
+template <class T>
+class SysObjWriteCR : public RGWSimpleCoroutine {
+ const DoutPrefixProvider *dpp;
+ RGWAsyncRadosProcessor *async_rados;
+ RGWSI_SysObj *svc;
+ bufferlist bl;
+ rgw_raw_obj obj;
+ RGWObjVersionTracker *objv_tracker;
+ bool exclusive;
+ RGWAsyncPutSystemObj *req{nullptr};
+
+public:
+ SysObjWriteCR(const DoutPrefixProvider *_dpp,
+ RGWAsyncRadosProcessor *_async_rados, RGWSI_SysObj *_svc,
+ const rgw_raw_obj& _obj, const T& _data,
+ RGWObjVersionTracker *objv_tracker = nullptr,
+ bool exclusive = false)
+ : RGWSimpleCoroutine(_svc->ctx()), dpp(_dpp), async_rados(_async_rados),
+ svc(_svc), obj(_obj), objv_tracker(objv_tracker), exclusive(exclusive) {
+ encode(_data, bl);
+ }
+
+ ~SysObjWriteCR() override {
+ try {
+ request_cleanup();
+ } catch (const boost::container::length_error_t& e) {
+ ldpp_dout(dpp, 0) << "ERROR: " << __func__ <<
+ ": reference counted object mismatched, \"" << e.what() <<
+ "\"" << dendl;
+ }
+ }
+
+ void request_cleanup() override {
+ if (req) {
+ req->finish();
+ req = NULL;
+ }
+ }
+
+ int send_request(const DoutPrefixProvider *dpp) override {
+ req = new RGWAsyncPutSystemObj(dpp, this, stack->create_completion_notifier(),
+ svc, objv_tracker, obj, exclusive, std::move(bl));
+ async_rados->queue(req);
+ return 0;
+ }
+
+ int request_complete() override {
+ if (objv_tracker) { // copy the updated version
+ *objv_tracker = req->objv_tracker;
+ }
+ return req->get_ret_status();
+ }
+};
+}
+
+/// read the mdlog history and use it to initialize the given cursor
+class ReadHistoryCR : public RGWCoroutine {
+ const DoutPrefixProvider *dpp;
+ Svc svc;
+ Cursor *cursor;
+ RGWObjVersionTracker *objv_tracker;
+ RGWMetadataLogHistory state;
+ RGWAsyncRadosProcessor *async_processor;
+
+ public:
+ ReadHistoryCR(const DoutPrefixProvider *dpp,
+ const Svc& svc,
+ Cursor *cursor,
+ RGWObjVersionTracker *objv_tracker)
+ : RGWCoroutine(svc.zone->ctx()), dpp(dpp), svc(svc),
+ cursor(cursor),
+ objv_tracker(objv_tracker),
+ async_processor(svc.rados->get_async_processor())
+ {}
+
+ int operate(const DoutPrefixProvider *dpp) {
+ reenter(this) {
+ yield {
+ rgw_raw_obj obj{svc.zone->get_zone_params().log_pool,
+ RGWMetadataLogHistory::oid};
+ constexpr bool empty_on_enoent = false;
+
+ using ReadCR = SysObjReadCR<RGWMetadataLogHistory>;
+ call(new ReadCR(dpp, async_processor, svc.sysobj, obj,
+ &state, empty_on_enoent, objv_tracker));
+ }
+ if (retcode < 0) {
+ ldpp_dout(dpp, 1) << "failed to read mdlog history: "
+ << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+ *cursor = svc.mdlog->period_history->lookup(state.oldest_realm_epoch);
+ if (!*cursor) {
+ return set_cr_error(cursor->get_error());
+ }
+
+ ldpp_dout(dpp, 10) << "read mdlog history with oldest period id="
+ << state.oldest_period_id << " realm_epoch="
+ << state.oldest_realm_epoch << dendl;
+ return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+/// write the given cursor to the mdlog history
+class WriteHistoryCR : public RGWCoroutine {
+ const DoutPrefixProvider *dpp;
+ Svc svc;
+ Cursor cursor;
+ RGWObjVersionTracker *objv;
+ RGWMetadataLogHistory state;
+ RGWAsyncRadosProcessor *async_processor;
+
+ public:
+ WriteHistoryCR(const DoutPrefixProvider *dpp,
+ Svc& svc,
+ const Cursor& cursor,
+ RGWObjVersionTracker *objv)
+ : RGWCoroutine(svc.zone->ctx()), dpp(dpp), svc(svc),
+ cursor(cursor), objv(objv),
+ async_processor(svc.rados->get_async_processor())
+ {}
+
+ int operate(const DoutPrefixProvider *dpp) {
+ reenter(this) {
+ state.oldest_period_id = cursor.get_period().get_id();
+ state.oldest_realm_epoch = cursor.get_epoch();
+
+ yield {
+ rgw_raw_obj obj{svc.zone->get_zone_params().log_pool,
+ RGWMetadataLogHistory::oid};
+
+ using WriteCR = SysObjWriteCR<RGWMetadataLogHistory>;
+ call(new WriteCR(dpp, async_processor, svc.sysobj, obj, state, objv));
+ }
+ if (retcode < 0) {
+ ldpp_dout(dpp, 1) << "failed to write mdlog history: "
+ << cpp_strerror(retcode) << dendl;
+ return set_cr_error(retcode);
+ }
+
+ ldpp_dout(dpp, 10) << "wrote mdlog history with oldest period id="
+ << state.oldest_period_id << " realm_epoch="
+ << state.oldest_realm_epoch << dendl;
+ return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+/// update the mdlog history to reflect trimmed logs
+class TrimHistoryCR : public RGWCoroutine {
+ const DoutPrefixProvider *dpp;
+ Svc svc;
+ const Cursor cursor; //< cursor to trimmed period
+ RGWObjVersionTracker *objv; //< to prevent racing updates
+ Cursor next; //< target cursor for oldest log period
+ Cursor existing; //< existing cursor read from disk
+
+ public:
+ TrimHistoryCR(const DoutPrefixProvider *dpp, const Svc& svc, Cursor cursor, RGWObjVersionTracker *objv)
+ : RGWCoroutine(svc.zone->ctx()), dpp(dpp), svc(svc),
+ cursor(cursor), objv(objv), next(cursor) {
+ next.next(); // advance past cursor
+ }
+
+ int operate(const DoutPrefixProvider *dpp) {
+ reenter(this) {
+ // read an existing history, and write the new history if it's newer
+ yield call(new ReadHistoryCR(dpp, svc, &existing, objv));
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+ // reject older trims with ECANCELED
+ if (cursor.get_epoch() < existing.get_epoch()) {
+ ldpp_dout(dpp, 4) << "found oldest log epoch=" << existing.get_epoch()
+ << ", rejecting trim at epoch=" << cursor.get_epoch() << dendl;
+ return set_cr_error(-ECANCELED);
+ }
+ // overwrite with updated history
+ yield call(new WriteHistoryCR(dpp, svc, next, objv));
+ if (retcode < 0) {
+ return set_cr_error(retcode);
+ }
+ return set_cr_done();
+ }
+ return 0;
+ }
+};
+
+} // mdlog namespace
+
+// traverse all the way back to the beginning of the period history, and
+// return a cursor to the first period in a fully attached history
+Cursor RGWSI_MDLog::find_oldest_period(const DoutPrefixProvider *dpp, optional_yield y)
+{
+ auto cursor = period_history->get_current();
+
+ while (cursor) {
+ // advance to the period's predecessor
+ if (!cursor.has_prev()) {
+ auto& predecessor = cursor.get_period().get_predecessor();
+ if (predecessor.empty()) {
+ // this is the first period, so our logs must start here
+ ldpp_dout(dpp, 10) << "find_oldest_period returning first "
+ "period " << cursor.get_period().get_id() << dendl;
+ return cursor;
+ }
+ // pull the predecessor and add it to our history
+ RGWPeriod period;
+ int r = period_puller->pull(dpp, predecessor, period, y);
+ if (r < 0) {
+ return cursor;
+ }
+ auto prev = period_history->insert(std::move(period));
+ if (!prev) {
+ return prev;
+ }
+ ldpp_dout(dpp, 20) << "find_oldest_period advancing to "
+ "predecessor period " << predecessor << dendl;
+ ceph_assert(cursor.has_prev());
+ }
+ cursor.prev();
+ }
+ ldpp_dout(dpp, 10) << "find_oldest_period returning empty cursor" << dendl;
+ return cursor;
+}
+
+Cursor RGWSI_MDLog::init_oldest_log_period(optional_yield y, const DoutPrefixProvider *dpp)
+{
+ // read the mdlog history
+ RGWMetadataLogHistory state;
+ RGWObjVersionTracker objv;
+ int ret = read_history(&state, &objv, y, dpp);
+
+ if (ret == -ENOENT) {
+ // initialize the mdlog history and write it
+ ldpp_dout(dpp, 10) << "initializing mdlog history" << dendl;
+ auto cursor = find_oldest_period(dpp, y);
+ if (!cursor) {
+ return cursor;
+ }
+ // write the initial history
+ state.oldest_realm_epoch = cursor.get_epoch();
+ state.oldest_period_id = cursor.get_period().get_id();
+
+ constexpr bool exclusive = true; // don't overwrite
+ int ret = write_history(dpp, state, &objv, y, exclusive);
+ if (ret < 0 && ret != -EEXIST) {
+ ldpp_dout(dpp, 1) << "failed to write mdlog history: "
+ << cpp_strerror(ret) << dendl;
+ return Cursor{ret};
+ }
+ return cursor;
+ } else if (ret < 0) {
+ ldpp_dout(dpp, 1) << "failed to read mdlog history: "
+ << cpp_strerror(ret) << dendl;
+ return Cursor{ret};
+ }
+
+ // if it's already in the history, return it
+ auto cursor = period_history->lookup(state.oldest_realm_epoch);
+ if (cursor) {
+ return cursor;
+ } else {
+ cursor = find_oldest_period(dpp, y);
+ state.oldest_realm_epoch = cursor.get_epoch();
+ state.oldest_period_id = cursor.get_period().get_id();
+ ldpp_dout(dpp, 10) << "rewriting mdlog history" << dendl;
+ ret = write_history(dpp, state, &objv, y);
+ if (ret < 0 && ret != -ECANCELED) {
+ ldpp_dout(dpp, 1) << "failed to write mdlog history: "
+ << cpp_strerror(ret) << dendl;
+ return Cursor{ret};
+ }
+ return cursor;
+ }
+
+ // pull the oldest period by id
+ RGWPeriod period;
+ ret = period_puller->pull(dpp, state.oldest_period_id, period, y);
+ if (ret < 0) {
+ ldpp_dout(dpp, 1) << "failed to read period id=" << state.oldest_period_id
+ << " for mdlog history: " << cpp_strerror(ret) << dendl;
+ return Cursor{ret};
+ }
+ // verify its realm_epoch
+ if (period.get_realm_epoch() != state.oldest_realm_epoch) {
+ ldpp_dout(dpp, 1) << "inconsistent mdlog history: read period id="
+ << period.get_id() << " with realm_epoch=" << period.get_realm_epoch()
+ << ", expected realm_epoch=" << state.oldest_realm_epoch << dendl;
+ return Cursor{-EINVAL};
+ }
+ // attach the period to our history
+ return period_history->attach(dpp, std::move(period), y);
+}
+
+Cursor RGWSI_MDLog::read_oldest_log_period(optional_yield y, const DoutPrefixProvider *dpp) const
+{
+ RGWMetadataLogHistory state;
+ int ret = read_history(&state, nullptr, y, dpp);
+ if (ret < 0) {
+ ldpp_dout(dpp, 1) << "failed to read mdlog history: "
+ << cpp_strerror(ret) << dendl;
+ return Cursor{ret};
+ }
+
+ ldpp_dout(dpp, 10) << "read mdlog history with oldest period id="
+ << state.oldest_period_id << " realm_epoch="
+ << state.oldest_realm_epoch << dendl;
+
+ return period_history->lookup(state.oldest_realm_epoch);
+}
+
+RGWCoroutine* RGWSI_MDLog::read_oldest_log_period_cr(const DoutPrefixProvider *dpp,
+ Cursor *period, RGWObjVersionTracker *objv) const
+{
+ return new mdlog::ReadHistoryCR(dpp, svc, period, objv);
+}
+
+RGWCoroutine* RGWSI_MDLog::trim_log_period_cr(const DoutPrefixProvider *dpp,
+ Cursor period, RGWObjVersionTracker *objv) const
+{
+ return new mdlog::TrimHistoryCR(dpp, svc, period, objv);
+}
+
+RGWMetadataLog* RGWSI_MDLog::get_log(const std::string& period)
+{
+ // construct the period's log in place if it doesn't exist
+ auto insert = md_logs.emplace(std::piecewise_construct,
+ std::forward_as_tuple(period),
+ std::forward_as_tuple(cct, svc.zone, svc.cls, period));
+ return &insert.first->second;
+}
+
+int RGWSI_MDLog::add_entry(const DoutPrefixProvider *dpp, const string& hash_key, const string& section, const string& key, bufferlist& bl)
+{
+ ceph_assert(current_log); // must have called init()
+ return current_log->add_entry(dpp, hash_key, section, key, bl);
+}
+
+int RGWSI_MDLog::get_shard_id(const string& hash_key, int *shard_id)
+{
+ ceph_assert(current_log); // must have called init()
+ return current_log->get_shard_id(hash_key, shard_id);
+}
+
+int RGWSI_MDLog::pull_period(const DoutPrefixProvider *dpp, const std::string& period_id, RGWPeriod& period,
+ optional_yield y)
+{
+ return period_puller->pull(dpp, period_id, period, y);
+}
+
diff --git a/src/rgw/services/svc_mdlog.h b/src/rgw/services/svc_mdlog.h
new file mode 100644
index 000000000..703d6f605
--- /dev/null
+++ b/src/rgw/services/svc_mdlog.h
@@ -0,0 +1,118 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#pragma once
+
+#include "rgw_service.h"
+#include "rgw_period_history.h"
+#include "rgw_period_puller.h"
+
+#include "svc_meta_be.h"
+
+
+class RGWMetadataLog;
+class RGWMetadataLogHistory;
+class RGWCoroutine;
+
+class RGWSI_Zone;
+class RGWSI_SysObj;
+class RGWSI_RADOS;
+
+namespace mdlog {
+ class ReadHistoryCR;
+ class WriteHistoryCR;
+}
+
+class RGWSI_MDLog : public RGWServiceInstance
+{
+ friend class mdlog::ReadHistoryCR;
+ friend class mdlog::WriteHistoryCR;
+
+ // maintain a separate metadata log for each period
+ std::map<std::string, RGWMetadataLog> md_logs;
+
+ // use the current period's log for mutating operations
+ RGWMetadataLog* current_log{nullptr};
+
+ bool run_sync;
+
+ // pulls missing periods for period_history
+ std::unique_ptr<RGWPeriodPuller> period_puller;
+ // maintains a connected history of periods
+ std::unique_ptr<RGWPeriodHistory> period_history;
+
+public:
+ RGWSI_MDLog(CephContext *cct, bool run_sync);
+ virtual ~RGWSI_MDLog();
+
+ struct Svc {
+ RGWSI_RADOS *rados{nullptr};
+ RGWSI_Zone *zone{nullptr};
+ RGWSI_SysObj *sysobj{nullptr};
+ RGWSI_MDLog *mdlog{nullptr};
+ RGWSI_Cls *cls{nullptr};
+ } svc;
+
+ int init(RGWSI_RADOS *_rados_svc,
+ RGWSI_Zone *_zone_svc,
+ RGWSI_SysObj *_sysobj_svc,
+ RGWSI_Cls *_cls_svc);
+
+ int do_start(optional_yield y, const DoutPrefixProvider *dpp) override;
+
+ // traverse all the way back to the beginning of the period history, and
+ // return a cursor to the first period in a fully attached history
+ RGWPeriodHistory::Cursor find_oldest_period(const DoutPrefixProvider *dpp, optional_yield y);
+
+ /// initialize the oldest log period if it doesn't exist, and attach it to
+ /// our current history
+ RGWPeriodHistory::Cursor init_oldest_log_period(optional_yield y, const DoutPrefixProvider *dpp);
+
+ /// read the oldest log period, and return a cursor to it in our existing
+ /// period history
+ RGWPeriodHistory::Cursor read_oldest_log_period(optional_yield y, const DoutPrefixProvider *dpp) const;
+
+ /// read the oldest log period asynchronously and write its result to the
+ /// given cursor pointer
+ RGWCoroutine* read_oldest_log_period_cr(const DoutPrefixProvider *dpp,
+ RGWPeriodHistory::Cursor *period,
+ RGWObjVersionTracker *objv) const;
+
+ /// try to advance the oldest log period when the given period is trimmed,
+ /// using a rados lock to provide atomicity
+ RGWCoroutine* trim_log_period_cr(const DoutPrefixProvider *dpp,
+ RGWPeriodHistory::Cursor period,
+ RGWObjVersionTracker *objv) const;
+ int read_history(RGWMetadataLogHistory *state, RGWObjVersionTracker *objv_tracker,optional_yield y, const DoutPrefixProvider *dpp) const;
+ int write_history(const DoutPrefixProvider *dpp,
+ const RGWMetadataLogHistory& state,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y, bool exclusive = false);
+
+ int add_entry(const DoutPrefixProvider *dpp, const std::string& hash_key, const std::string& section, const std::string& key, bufferlist& bl);
+
+ int get_shard_id(const std::string& hash_key, int *shard_id);
+
+ RGWPeriodHistory *get_period_history() {
+ return period_history.get();
+ }
+
+ int pull_period(const DoutPrefixProvider *dpp, const std::string& period_id, RGWPeriod& period, optional_yield y);
+
+ /// find or create the metadata log for the given period
+ RGWMetadataLog* get_log(const std::string& period);
+};
+
diff --git a/src/rgw/services/svc_meta.cc b/src/rgw/services/svc_meta.cc
new file mode 100644
index 000000000..735c39f85
--- /dev/null
+++ b/src/rgw/services/svc_meta.cc
@@ -0,0 +1,46 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+
+#include "svc_meta.h"
+
+#include "rgw_metadata.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+using namespace std;
+
+RGWSI_Meta::RGWSI_Meta(CephContext *cct) : RGWServiceInstance(cct) {
+}
+
+RGWSI_Meta::~RGWSI_Meta() {}
+
+void RGWSI_Meta::init(RGWSI_SysObj *_sysobj_svc,
+ RGWSI_MDLog *_mdlog_svc,
+ vector<RGWSI_MetaBackend *>& _be_svc)
+{
+ sysobj_svc = _sysobj_svc;
+ mdlog_svc = _mdlog_svc;
+
+ for (auto& be : _be_svc) {
+ be_svc[be->get_type()] = be;
+ }
+}
+
+int RGWSI_Meta::create_be_handler(RGWSI_MetaBackend::Type be_type,
+ RGWSI_MetaBackend_Handler **phandler)
+{
+ auto iter = be_svc.find(be_type);
+ if (iter == be_svc.end()) {
+ ldout(cct, 0) << __func__ << "(): ERROR: backend type not found" << dendl;
+ return -EINVAL;
+ }
+
+ auto handler = iter->second->alloc_be_handler();
+
+ be_handlers.emplace_back(handler);
+ *phandler = handler;
+
+ return 0;
+}
+
diff --git a/src/rgw/services/svc_meta.h b/src/rgw/services/svc_meta.h
new file mode 100644
index 000000000..b398e27fd
--- /dev/null
+++ b/src/rgw/services/svc_meta.h
@@ -0,0 +1,48 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#pragma once
+
+#include "svc_meta_be.h"
+
+#include "rgw_service.h"
+
+
+class RGWMetadataLog;
+class RGWCoroutine;
+
+
+class RGWSI_Meta : public RGWServiceInstance
+{
+ RGWSI_SysObj *sysobj_svc{nullptr};
+ RGWSI_MDLog *mdlog_svc{nullptr};
+
+ std::map<RGWSI_MetaBackend::Type, RGWSI_MetaBackend *> be_svc;
+
+ std::vector<std::unique_ptr<RGWSI_MetaBackend_Handler> > be_handlers;
+
+public:
+ RGWSI_Meta(CephContext *cct);
+ ~RGWSI_Meta();
+
+ void init(RGWSI_SysObj *_sysobj_svc,
+ RGWSI_MDLog *_mdlog_svc,
+ std::vector<RGWSI_MetaBackend *>& _be_svc);
+
+ int create_be_handler(RGWSI_MetaBackend::Type be_type,
+ RGWSI_MetaBackend_Handler **phandler);
+};
+
diff --git a/src/rgw/services/svc_meta_be.cc b/src/rgw/services/svc_meta_be.cc
new file mode 100644
index 000000000..2cb0365c8
--- /dev/null
+++ b/src/rgw/services/svc_meta_be.cc
@@ -0,0 +1,193 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+
+#include "svc_meta_be.h"
+
+#include "rgw_mdlog.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+using namespace std;
+
+RGWSI_MetaBackend::Context::~Context() {} // needed, even though destructor is pure virtual
+RGWSI_MetaBackend::Module::~Module() {} // ditto
+RGWSI_MetaBackend::PutParams::~PutParams() {} // ...
+RGWSI_MetaBackend::GetParams::~GetParams() {} // ...
+RGWSI_MetaBackend::RemoveParams::~RemoveParams() {} // ...
+
+int RGWSI_MetaBackend::pre_modify(const DoutPrefixProvider *dpp,
+ RGWSI_MetaBackend::Context *ctx,
+ const string& key,
+ RGWMetadataLogData& log_data,
+ RGWObjVersionTracker *objv_tracker,
+ RGWMDLogStatus op_type,
+ optional_yield y)
+{
+ /* if write version has not been set, and there's a read version, set it so that we can
+ * log it
+ */
+ if (objv_tracker &&
+ objv_tracker->read_version.ver && !objv_tracker->write_version.ver) {
+ objv_tracker->write_version = objv_tracker->read_version;
+ objv_tracker->write_version.ver++;
+ }
+
+ return 0;
+}
+
+int RGWSI_MetaBackend::post_modify(const DoutPrefixProvider *dpp,
+ RGWSI_MetaBackend::Context *ctx,
+ const string& key,
+ RGWMetadataLogData& log_data,
+ RGWObjVersionTracker *objv_tracker, int ret,
+ optional_yield y)
+{
+ return ret;
+}
+
+int RGWSI_MetaBackend::prepare_mutate(RGWSI_MetaBackend::Context *ctx,
+ const string& key,
+ const real_time& mtime,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp)
+{
+ real_time orig_mtime;
+
+ int ret = call_with_get_params(&orig_mtime, [&](GetParams& params) {
+ return get_entry(ctx, key, params, objv_tracker, y, dpp);
+ });
+ if (ret < 0 && ret != -ENOENT) {
+ return ret;
+ }
+
+ if (objv_tracker->write_version.tag.empty()) {
+ if (objv_tracker->read_version.tag.empty()) {
+ objv_tracker->generate_new_write_ver(cct);
+ } else {
+ objv_tracker->write_version = objv_tracker->read_version;
+ objv_tracker->write_version.ver++;
+ }
+ }
+ return 0;
+}
+
+int RGWSI_MetaBackend::do_mutate(RGWSI_MetaBackend::Context *ctx,
+ const string& key,
+ const ceph::real_time& mtime,
+ RGWObjVersionTracker *objv_tracker,
+ RGWMDLogStatus op_type,
+ optional_yield y,
+ std::function<int()> f,
+ bool generic_prepare,
+ const DoutPrefixProvider *dpp)
+{
+ int ret;
+
+ if (generic_prepare) {
+ ret = prepare_mutate(ctx, key, mtime, objv_tracker, y, dpp);
+ if (ret < 0 ||
+ ret == STATUS_NO_APPLY) {
+ return ret;
+ }
+ }
+
+ RGWMetadataLogData log_data;
+ ret = pre_modify(dpp, ctx, key, log_data, objv_tracker, op_type, y);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = f();
+
+ /* cascading ret into post_modify() */
+
+ ret = post_modify(dpp, ctx, key, log_data, objv_tracker, ret, y);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWSI_MetaBackend::get(Context *ctx,
+ const string& key,
+ GetParams& params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp,
+ bool get_raw_attrs)
+{
+ return get_entry(ctx, key, params, objv_tracker, y, dpp, get_raw_attrs);
+}
+
+int RGWSI_MetaBackend::put(Context *ctx,
+ const string& key,
+ PutParams& params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp)
+{
+ std::function<int()> f = [&]() {
+ return put_entry(dpp, ctx, key, params, objv_tracker, y);
+ };
+
+ return do_mutate(ctx, key, params.mtime, objv_tracker,
+ MDLOG_STATUS_WRITE,
+ y,
+ f,
+ false,
+ dpp);
+}
+
+int RGWSI_MetaBackend::remove(Context *ctx,
+ const string& key,
+ RemoveParams& params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp)
+{
+ std::function<int()> f = [&]() {
+ return remove_entry(dpp, ctx, key, params, objv_tracker, y);
+ };
+
+ return do_mutate(ctx, key, params.mtime, objv_tracker,
+ MDLOG_STATUS_REMOVE,
+ y,
+ f,
+ false,
+ dpp);
+}
+
+int RGWSI_MetaBackend::mutate(Context *ctx,
+ const std::string& key,
+ MutateParams& params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ std::function<int()> f,
+ const DoutPrefixProvider *dpp)
+{
+ return do_mutate(ctx, key, params.mtime, objv_tracker,
+ params.op_type, y,
+ f,
+ false,
+ dpp);
+}
+
+int RGWSI_MetaBackend_Handler::call(std::optional<RGWSI_MetaBackend_CtxParams> bectx_params,
+ std::function<int(Op *)> f)
+{
+ return be->call(bectx_params, [&](RGWSI_MetaBackend::Context *ctx) {
+ ctx->init(this);
+ Op op(be, ctx);
+ return f(&op);
+ });
+}
+
+RGWSI_MetaBackend_Handler::Op_ManagedCtx::Op_ManagedCtx(RGWSI_MetaBackend_Handler *handler) : Op(handler->be, handler->be->alloc_ctx())
+{
+ auto c = ctx();
+ c->init(handler);
+ pctx.reset(c);
+}
+
diff --git a/src/rgw/services/svc_meta_be.h b/src/rgw/services/svc_meta_be.h
new file mode 100644
index 000000000..97267a4e7
--- /dev/null
+++ b/src/rgw/services/svc_meta_be.h
@@ -0,0 +1,294 @@
+
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#pragma once
+
+#include "svc_meta_be_params.h"
+
+#include "rgw_service.h"
+#include "rgw_mdlog_types.h"
+
+class RGWMetadataLogData;
+
+class RGWSI_MDLog;
+class RGWSI_Meta;
+class RGWObjVersionTracker;
+class RGWSI_MetaBackend_Handler;
+
+class RGWSI_MetaBackend : public RGWServiceInstance
+{
+ friend class RGWSI_Meta;
+public:
+ class Module;
+ class Context;
+protected:
+ RGWSI_MDLog *mdlog_svc{nullptr};
+
+ void base_init(RGWSI_MDLog *_mdlog_svc) {
+ mdlog_svc = _mdlog_svc;
+ }
+
+ int prepare_mutate(RGWSI_MetaBackend::Context *ctx,
+ const std::string& key,
+ const ceph::real_time& mtime,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp);
+
+ virtual int do_mutate(Context *ctx,
+ const std::string& key,
+ const ceph::real_time& mtime, RGWObjVersionTracker *objv_tracker,
+ RGWMDLogStatus op_type,
+ optional_yield y,
+ std::function<int()> f,
+ bool generic_prepare,
+ const DoutPrefixProvider *dpp);
+
+ virtual int pre_modify(const DoutPrefixProvider *dpp,
+ Context *ctx,
+ const std::string& key,
+ RGWMetadataLogData& log_data,
+ RGWObjVersionTracker *objv_tracker,
+ RGWMDLogStatus op_type,
+ optional_yield y);
+ virtual int post_modify(const DoutPrefixProvider *dpp,
+ Context *ctx,
+ const std::string& key,
+ RGWMetadataLogData& log_data,
+ RGWObjVersionTracker *objv_tracker, int ret,
+ optional_yield y);
+public:
+ class Module {
+ /*
+ * Backend specialization module
+ */
+ public:
+ virtual ~Module() = 0;
+ };
+
+ using ModuleRef = std::shared_ptr<Module>;
+
+ struct Context { /*
+ * A single metadata operation context. Will be holding info about
+ * backend and operation itself; operation might span multiple backend
+ * calls.
+ */
+ virtual ~Context() = 0;
+
+ virtual void init(RGWSI_MetaBackend_Handler *h) = 0;
+ };
+
+ virtual Context *alloc_ctx() = 0;
+
+ struct PutParams {
+ ceph::real_time mtime;
+
+ PutParams() {}
+ PutParams(const ceph::real_time& _mtime) : mtime(_mtime) {}
+ virtual ~PutParams() = 0;
+ };
+
+ struct GetParams {
+ GetParams() {}
+ GetParams(ceph::real_time *_pmtime) : pmtime(_pmtime) {}
+ virtual ~GetParams();
+
+ ceph::real_time *pmtime{nullptr};
+ };
+
+ struct RemoveParams {
+ virtual ~RemoveParams() = 0;
+
+ ceph::real_time mtime;
+ };
+
+ struct MutateParams {
+ ceph::real_time mtime;
+ RGWMDLogStatus op_type;
+
+ MutateParams() {}
+ MutateParams(const ceph::real_time& _mtime,
+ RGWMDLogStatus _op_type) : mtime(_mtime), op_type(_op_type) {}
+ virtual ~MutateParams() {}
+ };
+
+ enum Type {
+ MDBE_SOBJ = 0,
+ MDBE_OTP = 1,
+ };
+
+ RGWSI_MetaBackend(CephContext *cct) : RGWServiceInstance(cct) {}
+ virtual ~RGWSI_MetaBackend() {}
+
+ virtual Type get_type() = 0;
+
+ virtual RGWSI_MetaBackend_Handler *alloc_be_handler() = 0;
+ virtual int call_with_get_params(ceph::real_time *pmtime, std::function<int(RGWSI_MetaBackend::GetParams&)>) = 0;
+
+ /* these should be implemented by backends */
+ virtual int get_entry(RGWSI_MetaBackend::Context *ctx,
+ const std::string& key,
+ RGWSI_MetaBackend::GetParams& params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp,
+ bool get_raw_attrs=false) = 0;
+ virtual int put_entry(const DoutPrefixProvider *dpp,
+ RGWSI_MetaBackend::Context *ctx,
+ const std::string& key,
+ RGWSI_MetaBackend::PutParams& params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y) = 0;
+ virtual int remove_entry(const DoutPrefixProvider *dpp,
+ Context *ctx,
+ const std::string& key,
+ RGWSI_MetaBackend::RemoveParams& params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y) = 0;
+
+ virtual int list_init(const DoutPrefixProvider *dpp, RGWSI_MetaBackend::Context *ctx, const std::string& marker) = 0;
+ virtual int list_next(const DoutPrefixProvider *dpp,
+ RGWSI_MetaBackend::Context *ctx,
+ int max, std::list<std::string> *keys,
+ bool *truncated) = 0;
+ virtual int list_get_marker(RGWSI_MetaBackend::Context *ctx,
+ std::string *marker) = 0;
+
+ int call(std::function<int(RGWSI_MetaBackend::Context *)> f) {
+ return call(std::nullopt, f);
+ }
+
+ virtual int call(std::optional<RGWSI_MetaBackend_CtxParams> opt,
+ std::function<int(RGWSI_MetaBackend::Context *)> f) = 0;
+
+ virtual int get_shard_id(RGWSI_MetaBackend::Context *ctx,
+ const std::string& key,
+ int *shard_id) = 0;
+
+ /* higher level */
+ virtual int get(Context *ctx,
+ const std::string& key,
+ GetParams &params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp,
+ bool get_raw_attrs=false);
+
+ virtual int put(Context *ctx,
+ const std::string& key,
+ PutParams& params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp);
+
+ virtual int remove(Context *ctx,
+ const std::string& key,
+ RemoveParams& params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp);
+
+ virtual int mutate(Context *ctx,
+ const std::string& key,
+ MutateParams& params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ std::function<int()> f,
+ const DoutPrefixProvider *dpp);
+};
+
+class RGWSI_MetaBackend_Handler {
+ RGWSI_MetaBackend *be{nullptr};
+
+public:
+ class Op {
+ friend class RGWSI_MetaBackend_Handler;
+
+ RGWSI_MetaBackend *be;
+ RGWSI_MetaBackend::Context *be_ctx;
+
+ Op(RGWSI_MetaBackend *_be,
+ RGWSI_MetaBackend::Context *_ctx) : be(_be), be_ctx(_ctx) {}
+
+ public:
+ RGWSI_MetaBackend::Context *ctx() {
+ return be_ctx;
+ }
+
+ int get(const std::string& key,
+ RGWSI_MetaBackend::GetParams &params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y, const DoutPrefixProvider *dpp) {
+ return be->get(be_ctx, key, params, objv_tracker, y, dpp);
+ }
+
+ int put(const std::string& key,
+ RGWSI_MetaBackend::PutParams& params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y, const DoutPrefixProvider *dpp) {
+ return be->put(be_ctx, key, params, objv_tracker, y, dpp);
+ }
+
+ int remove(const std::string& key,
+ RGWSI_MetaBackend::RemoveParams& params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y, const DoutPrefixProvider *dpp) {
+ return be->remove(be_ctx, key, params, objv_tracker, y, dpp);
+ }
+
+ int mutate(const std::string& key,
+ RGWSI_MetaBackend::MutateParams& params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ std::function<int()> f,
+ const DoutPrefixProvider *dpp) {
+ return be->mutate(be_ctx, key, params, objv_tracker, y, f, dpp);
+ }
+
+ int list_init(const DoutPrefixProvider *dpp, const std::string& marker) {
+ return be->list_init(dpp, be_ctx, marker);
+ }
+ int list_next(const DoutPrefixProvider *dpp, int max, std::list<std::string> *keys,
+ bool *truncated) {
+ return be->list_next(dpp, be_ctx, max, keys, truncated);
+ }
+ int list_get_marker(std::string *marker) {
+ return be->list_get_marker(be_ctx, marker);
+ }
+
+ int get_shard_id(const std::string& key, int *shard_id) {
+ return be->get_shard_id(be_ctx, key, shard_id);
+ }
+ };
+
+ class Op_ManagedCtx : public Op {
+ std::unique_ptr<RGWSI_MetaBackend::Context> pctx;
+ public:
+ Op_ManagedCtx(RGWSI_MetaBackend_Handler *handler);
+ };
+
+ RGWSI_MetaBackend_Handler(RGWSI_MetaBackend *_be) : be(_be) {}
+ virtual ~RGWSI_MetaBackend_Handler() {}
+
+ int call(std::function<int(Op *)> f) {
+ return call(std::nullopt, f);
+ }
+
+ virtual int call(std::optional<RGWSI_MetaBackend_CtxParams> bectx_params,
+ std::function<int(Op *)> f);
+};
+
diff --git a/src/rgw/services/svc_meta_be_otp.cc b/src/rgw/services/svc_meta_be_otp.cc
new file mode 100644
index 000000000..3cabeb9d0
--- /dev/null
+++ b/src/rgw/services/svc_meta_be_otp.cc
@@ -0,0 +1,73 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include "svc_meta_be_otp.h"
+
+#include "rgw_tools.h"
+#include "rgw_metadata.h"
+#include "rgw_mdlog.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+using namespace std;
+
+RGWSI_MetaBackend_OTP::RGWSI_MetaBackend_OTP(CephContext *cct) : RGWSI_MetaBackend_SObj(cct) {
+}
+
+RGWSI_MetaBackend_OTP::~RGWSI_MetaBackend_OTP() {
+}
+
+string RGWSI_MetaBackend_OTP::get_meta_key(const rgw_user& user)
+{
+ return string("otp:user:") + user.to_str();
+}
+
+RGWSI_MetaBackend_Handler *RGWSI_MetaBackend_OTP::alloc_be_handler()
+{
+ return new RGWSI_MetaBackend_Handler_OTP(this);
+}
+
+RGWSI_MetaBackend::Context *RGWSI_MetaBackend_OTP::alloc_ctx()
+{
+ return new Context_OTP;
+}
+
+int RGWSI_MetaBackend_OTP::call_with_get_params(ceph::real_time *pmtime, std::function<int(RGWSI_MetaBackend::GetParams&)> cb)
+{
+ otp_devices_list_t devices;
+ RGWSI_MBOTP_GetParams params;
+ params.pdevices = &devices;
+ params.pmtime = pmtime;
+ return cb(params);
+}
+
+int RGWSI_MetaBackend_OTP::get_entry(RGWSI_MetaBackend::Context *_ctx,
+ const string& key,
+ RGWSI_MetaBackend::GetParams& _params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp,
+ bool get_raw_attrs)
+{
+ RGWSI_MBOTP_GetParams& params = static_cast<RGWSI_MBOTP_GetParams&>(_params);
+
+ int r = cls_svc->mfa.list_mfa(dpp, key, params.pdevices, objv_tracker, params.pmtime, y);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWSI_MetaBackend_OTP::put_entry(const DoutPrefixProvider *dpp,
+ RGWSI_MetaBackend::Context *_ctx,
+ const string& key,
+ RGWSI_MetaBackend::PutParams& _params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y)
+{
+ RGWSI_MBOTP_PutParams& params = static_cast<RGWSI_MBOTP_PutParams&>(_params);
+
+ return cls_svc->mfa.set_mfa(dpp, key, params.devices, true, objv_tracker, params.mtime, y);
+}
+
diff --git a/src/rgw/services/svc_meta_be_otp.h b/src/rgw/services/svc_meta_be_otp.h
new file mode 100644
index 000000000..7bd9cf652
--- /dev/null
+++ b/src/rgw/services/svc_meta_be_otp.h
@@ -0,0 +1,89 @@
+
+
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#pragma once
+
+#include "rgw_service.h"
+
+#include "svc_cls.h"
+#include "svc_meta_be.h"
+#include "svc_meta_be_sobj.h"
+#include "svc_sys_obj.h"
+
+
+using RGWSI_MBOTP_Handler_Module = RGWSI_MBSObj_Handler_Module;
+using RGWSI_MetaBackend_Handler_OTP = RGWSI_MetaBackend_Handler_SObj;
+
+using otp_devices_list_t = std::list<rados::cls::otp::otp_info_t>;
+
+struct RGWSI_MBOTP_GetParams : public RGWSI_MetaBackend::GetParams {
+ otp_devices_list_t *pdevices{nullptr};
+};
+
+struct RGWSI_MBOTP_PutParams : public RGWSI_MetaBackend::PutParams {
+ otp_devices_list_t devices;
+};
+
+using RGWSI_MBOTP_RemoveParams = RGWSI_MBSObj_RemoveParams;
+
+class RGWSI_MetaBackend_OTP : public RGWSI_MetaBackend_SObj
+{
+ RGWSI_Cls *cls_svc{nullptr};
+
+public:
+ struct Context_OTP : public RGWSI_MetaBackend_SObj::Context_SObj {
+ otp_devices_list_t devices;
+ };
+
+ RGWSI_MetaBackend_OTP(CephContext *cct);
+ virtual ~RGWSI_MetaBackend_OTP();
+
+ RGWSI_MetaBackend::Type get_type() {
+ return MDBE_OTP;
+ }
+
+ static std::string get_meta_key(const rgw_user& user);
+
+ void init(RGWSI_SysObj *_sysobj_svc,
+ RGWSI_MDLog *_mdlog_svc,
+ RGWSI_Cls *_cls_svc) {
+ RGWSI_MetaBackend_SObj::init(_sysobj_svc, _mdlog_svc);
+ cls_svc = _cls_svc;
+ }
+
+ RGWSI_MetaBackend_Handler *alloc_be_handler() override;
+ RGWSI_MetaBackend::Context *alloc_ctx() override;
+
+ int call_with_get_params(ceph::real_time *pmtime, std::function<int(RGWSI_MetaBackend::GetParams&)> cb) override;
+
+ int get_entry(RGWSI_MetaBackend::Context *ctx,
+ const std::string& key,
+ RGWSI_MetaBackend::GetParams& _params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp,
+ bool get_raw_attrs=false);
+ int put_entry(const DoutPrefixProvider *dpp,
+ RGWSI_MetaBackend::Context *ctx,
+ const std::string& key,
+ RGWSI_MetaBackend::PutParams& _params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y);
+};
+
+
diff --git a/src/rgw/services/svc_meta_be_params.h b/src/rgw/services/svc_meta_be_params.h
new file mode 100644
index 000000000..445f6e188
--- /dev/null
+++ b/src/rgw/services/svc_meta_be_params.h
@@ -0,0 +1,25 @@
+
+
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#pragma once
+
+#include <variant>
+
+struct RGWSI_MetaBackend_CtxParams_SObj {};
+
+using RGWSI_MetaBackend_CtxParams = std::variant<RGWSI_MetaBackend_CtxParams_SObj>;
diff --git a/src/rgw/services/svc_meta_be_sobj.cc b/src/rgw/services/svc_meta_be_sobj.cc
new file mode 100644
index 000000000..c0ff402fc
--- /dev/null
+++ b/src/rgw/services/svc_meta_be_sobj.cc
@@ -0,0 +1,246 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include "svc_meta_be_sobj.h"
+#include "svc_meta_be_params.h"
+#include "svc_mdlog.h"
+
+#include "rgw_tools.h"
+#include "rgw_metadata.h"
+#include "rgw_mdlog.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+using namespace std;
+
+RGWSI_MetaBackend_SObj::RGWSI_MetaBackend_SObj(CephContext *cct) : RGWSI_MetaBackend(cct) {
+}
+
+RGWSI_MetaBackend_SObj::~RGWSI_MetaBackend_SObj() {
+}
+
+RGWSI_MetaBackend_Handler *RGWSI_MetaBackend_SObj::alloc_be_handler()
+{
+ return new RGWSI_MetaBackend_Handler_SObj(this);
+}
+
+RGWSI_MetaBackend::Context *RGWSI_MetaBackend_SObj::alloc_ctx()
+{
+ return new Context_SObj;
+}
+
+int RGWSI_MetaBackend_SObj::pre_modify(const DoutPrefixProvider *dpp, RGWSI_MetaBackend::Context *_ctx,
+ const string& key,
+ RGWMetadataLogData& log_data,
+ RGWObjVersionTracker *objv_tracker,
+ RGWMDLogStatus op_type,
+ optional_yield y)
+{
+ auto ctx = static_cast<Context_SObj *>(_ctx);
+ int ret = RGWSI_MetaBackend::pre_modify(dpp, ctx, key, log_data,
+ objv_tracker, op_type,
+ y);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* if write version has not been set, and there's a read version, set it so that we can
+ * log it
+ */
+ if (objv_tracker) {
+ log_data.read_version = objv_tracker->read_version;
+ log_data.write_version = objv_tracker->write_version;
+ }
+
+ log_data.status = op_type;
+
+ bufferlist logbl;
+ encode(log_data, logbl);
+
+ ret = mdlog_svc->add_entry(dpp, ctx->module->get_hash_key(key), ctx->module->get_section(), key, logbl);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int RGWSI_MetaBackend_SObj::post_modify(const DoutPrefixProvider *dpp,
+ RGWSI_MetaBackend::Context *_ctx,
+ const string& key,
+ RGWMetadataLogData& log_data,
+ RGWObjVersionTracker *objv_tracker, int ret,
+ optional_yield y)
+{
+ auto ctx = static_cast<Context_SObj *>(_ctx);
+ if (ret >= 0)
+ log_data.status = MDLOG_STATUS_COMPLETE;
+ else
+ log_data.status = MDLOG_STATUS_ABORT;
+
+ bufferlist logbl;
+ encode(log_data, logbl);
+
+ int r = mdlog_svc->add_entry(dpp, ctx->module->get_hash_key(key), ctx->module->get_section(), key, logbl);
+ if (ret < 0)
+ return ret;
+
+ if (r < 0)
+ return r;
+
+ return RGWSI_MetaBackend::post_modify(dpp, ctx, key, log_data, objv_tracker, ret, y);
+}
+
+int RGWSI_MetaBackend_SObj::get_shard_id(RGWSI_MetaBackend::Context *_ctx,
+ const std::string& key,
+ int *shard_id)
+{
+ auto ctx = static_cast<Context_SObj *>(_ctx);
+ *shard_id = mdlog_svc->get_shard_id(ctx->module->get_hash_key(key), shard_id);
+ return 0;
+}
+
+int RGWSI_MetaBackend_SObj::call(std::optional<RGWSI_MetaBackend_CtxParams> opt,
+ std::function<int(RGWSI_MetaBackend::Context *)> f)
+{
+ RGWSI_MetaBackend_SObj::Context_SObj ctx;
+ return f(&ctx);
+}
+
+void RGWSI_MetaBackend_SObj::Context_SObj::init(RGWSI_MetaBackend_Handler *h)
+{
+ RGWSI_MetaBackend_Handler_SObj *handler = static_cast<RGWSI_MetaBackend_Handler_SObj *>(h);
+ module = handler->module;
+}
+
+int RGWSI_MetaBackend_SObj::call_with_get_params(ceph::real_time *pmtime, std::function<int(RGWSI_MetaBackend::GetParams&)> cb)
+{
+ bufferlist bl;
+ RGWSI_MBSObj_GetParams params;
+ params.pmtime = pmtime;
+ params.pbl = &bl;
+ return cb(params);
+}
+
+int RGWSI_MetaBackend_SObj::get_entry(RGWSI_MetaBackend::Context *_ctx,
+ const string& key,
+ GetParams& _params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp,
+ bool get_raw_attrs)
+{
+ RGWSI_MetaBackend_SObj::Context_SObj *ctx = static_cast<RGWSI_MetaBackend_SObj::Context_SObj *>(_ctx);
+ RGWSI_MBSObj_GetParams& params = static_cast<RGWSI_MBSObj_GetParams&>(_params);
+
+ rgw_pool pool;
+ string oid;
+ ctx->module->get_pool_and_oid(key, &pool, &oid);
+
+ int ret = 0;
+ ret = rgw_get_system_obj(sysobj_svc, pool, oid, *params.pbl,
+ objv_tracker, params.pmtime,
+ y, dpp,
+ params.pattrs, params.cache_info,
+ params.refresh_version, get_raw_attrs);
+
+ return ret;
+}
+
+int RGWSI_MetaBackend_SObj::put_entry(const DoutPrefixProvider *dpp,
+ RGWSI_MetaBackend::Context *_ctx,
+ const string& key,
+ PutParams& _params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y)
+{
+ RGWSI_MetaBackend_SObj::Context_SObj *ctx = static_cast<RGWSI_MetaBackend_SObj::Context_SObj *>(_ctx);
+ RGWSI_MBSObj_PutParams& params = static_cast<RGWSI_MBSObj_PutParams&>(_params);
+
+ rgw_pool pool;
+ string oid;
+ ctx->module->get_pool_and_oid(key, &pool, &oid);
+
+ return rgw_put_system_obj(dpp, sysobj_svc, pool, oid, params.bl, params.exclusive,
+ objv_tracker, params.mtime, y, params.pattrs);
+}
+
+int RGWSI_MetaBackend_SObj::remove_entry(const DoutPrefixProvider *dpp,
+ RGWSI_MetaBackend::Context *_ctx,
+ const string& key,
+ RemoveParams& params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y)
+{
+ RGWSI_MetaBackend_SObj::Context_SObj *ctx = static_cast<RGWSI_MetaBackend_SObj::Context_SObj *>(_ctx);
+
+ rgw_pool pool;
+ string oid;
+ ctx->module->get_pool_and_oid(key, &pool, &oid);
+ rgw_raw_obj k(pool, oid);
+
+ auto sysobj = sysobj_svc->get_obj(k);
+ return sysobj.wop()
+ .set_objv_tracker(objv_tracker)
+ .remove(dpp, y);
+}
+
+int RGWSI_MetaBackend_SObj::list_init(const DoutPrefixProvider *dpp,
+ RGWSI_MetaBackend::Context *_ctx,
+ const string& marker)
+{
+ RGWSI_MetaBackend_SObj::Context_SObj *ctx = static_cast<RGWSI_MetaBackend_SObj::Context_SObj *>(_ctx);
+
+ rgw_pool pool;
+
+ string no_key;
+ ctx->module->get_pool_and_oid(no_key, &pool, nullptr);
+
+ ctx->list.pool = sysobj_svc->get_pool(pool);
+ ctx->list.op.emplace(ctx->list.pool->op());
+
+ string prefix = ctx->module->get_oid_prefix();
+ ctx->list.op->init(dpp, marker, prefix);
+
+ return 0;
+}
+
+int RGWSI_MetaBackend_SObj::list_next(const DoutPrefixProvider *dpp,
+ RGWSI_MetaBackend::Context *_ctx,
+ int max, list<string> *keys,
+ bool *truncated)
+{
+ RGWSI_MetaBackend_SObj::Context_SObj *ctx = static_cast<RGWSI_MetaBackend_SObj::Context_SObj *>(_ctx);
+
+ vector<string> oids;
+
+ keys->clear();
+
+ int ret = ctx->list.op->get_next(dpp, max, &oids, truncated);
+ if (ret < 0 && ret != -ENOENT)
+ return ret;
+ if (ret == -ENOENT) {
+ if (truncated)
+ *truncated = false;
+ return 0;
+ }
+
+ auto module = ctx->module;
+
+ for (auto& o : oids) {
+ if (!module->is_valid_oid(o)) {
+ continue;
+ }
+ keys->emplace_back(module->oid_to_key(o));
+ }
+
+ return 0;
+}
+
+int RGWSI_MetaBackend_SObj::list_get_marker(RGWSI_MetaBackend::Context *_ctx,
+ string *marker)
+{
+ RGWSI_MetaBackend_SObj::Context_SObj *ctx = static_cast<RGWSI_MetaBackend_SObj::Context_SObj *>(_ctx);
+
+ return ctx->list.op->get_marker(marker);
+}
+
diff --git a/src/rgw/services/svc_meta_be_sobj.h b/src/rgw/services/svc_meta_be_sobj.h
new file mode 100644
index 000000000..304afc8bf
--- /dev/null
+++ b/src/rgw/services/svc_meta_be_sobj.h
@@ -0,0 +1,194 @@
+
+
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#pragma once
+
+#include "rgw_service.h"
+
+#include "svc_meta_be.h"
+#include "svc_sys_obj.h"
+
+
+class RGWSI_MBSObj_Handler_Module : public RGWSI_MetaBackend::Module {
+protected:
+ std::string section;
+public:
+ RGWSI_MBSObj_Handler_Module(const std::string& _section) : section(_section) {}
+ virtual void get_pool_and_oid(const std::string& key, rgw_pool *pool, std::string *oid) = 0;
+ virtual const std::string& get_oid_prefix() = 0;
+ virtual std::string key_to_oid(const std::string& key) = 0;
+ virtual bool is_valid_oid(const std::string& oid) = 0;
+ virtual std::string oid_to_key(const std::string& oid) = 0;
+
+ const std::string& get_section() {
+ return section;
+ }
+
+ /* key to use for hashing entries for log shard placement */
+ virtual std::string get_hash_key(const std::string& key) {
+ return section + ":" + key;
+ }
+};
+
+struct RGWSI_MBSObj_GetParams : public RGWSI_MetaBackend::GetParams {
+ bufferlist *pbl{nullptr};
+ std::map<std::string, bufferlist> *pattrs{nullptr};
+ rgw_cache_entry_info *cache_info{nullptr};
+ boost::optional<obj_version> refresh_version;
+
+ RGWSI_MBSObj_GetParams() {}
+ RGWSI_MBSObj_GetParams(bufferlist *_pbl,
+ std::map<std::string, bufferlist> *_pattrs,
+ ceph::real_time *_pmtime) : RGWSI_MetaBackend::GetParams(_pmtime),
+ pbl(_pbl),
+ pattrs(_pattrs) {}
+
+ RGWSI_MBSObj_GetParams& set_cache_info(rgw_cache_entry_info *_cache_info) {
+ cache_info = _cache_info;
+ return *this;
+ }
+ RGWSI_MBSObj_GetParams& set_refresh_version(boost::optional<obj_version>& _refresh_version) {
+ refresh_version = _refresh_version;
+ return *this;
+ }
+};
+
+struct RGWSI_MBSObj_PutParams : public RGWSI_MetaBackend::PutParams {
+ bufferlist bl;
+ std::map<std::string, bufferlist> *pattrs{nullptr};
+ bool exclusive{false};
+
+ RGWSI_MBSObj_PutParams() {}
+ RGWSI_MBSObj_PutParams(std::map<std::string, bufferlist> *_pattrs,
+ const ceph::real_time& _mtime) : RGWSI_MetaBackend::PutParams(_mtime),
+ pattrs(_pattrs) {}
+ RGWSI_MBSObj_PutParams(bufferlist& _bl,
+ std::map<std::string, bufferlist> *_pattrs,
+ const ceph::real_time& _mtime,
+ bool _exclusive) : RGWSI_MetaBackend::PutParams(_mtime),
+ bl(_bl),
+ pattrs(_pattrs),
+ exclusive(_exclusive) {}
+};
+
+struct RGWSI_MBSObj_RemoveParams : public RGWSI_MetaBackend::RemoveParams {
+};
+
+class RGWSI_MetaBackend_SObj : public RGWSI_MetaBackend
+{
+protected:
+ RGWSI_SysObj *sysobj_svc{nullptr};
+
+public:
+ struct Context_SObj : public RGWSI_MetaBackend::Context {
+ RGWSI_MBSObj_Handler_Module *module{nullptr};
+ struct _list {
+ std::optional<RGWSI_SysObj::Pool> pool;
+ std::optional<RGWSI_SysObj::Pool::Op> op;
+ } list;
+
+ void init(RGWSI_MetaBackend_Handler *h) override;
+ };
+
+ RGWSI_MetaBackend_SObj(CephContext *cct);
+ virtual ~RGWSI_MetaBackend_SObj();
+
+ RGWSI_MetaBackend::Type get_type() {
+ return MDBE_SOBJ;
+ }
+
+ void init(RGWSI_SysObj *_sysobj_svc,
+ RGWSI_MDLog *_mdlog_svc) {
+ base_init(_mdlog_svc);
+ sysobj_svc = _sysobj_svc;
+ }
+
+ RGWSI_MetaBackend_Handler *alloc_be_handler() override;
+ RGWSI_MetaBackend::Context *alloc_ctx() override;
+
+
+ int call_with_get_params(ceph::real_time *pmtime, std::function<int(RGWSI_MetaBackend::GetParams&)> cb) override;
+
+ int pre_modify(const DoutPrefixProvider *dpp,
+ RGWSI_MetaBackend::Context *ctx,
+ const std::string& key,
+ RGWMetadataLogData& log_data,
+ RGWObjVersionTracker *objv_tracker,
+ RGWMDLogStatus op_type,
+ optional_yield y);
+ int post_modify(const DoutPrefixProvider *dpp,
+ RGWSI_MetaBackend::Context *ctx,
+ const std::string& key,
+ RGWMetadataLogData& log_data,
+ RGWObjVersionTracker *objv_tracker, int ret,
+ optional_yield y);
+
+ int get_entry(RGWSI_MetaBackend::Context *ctx,
+ const std::string& key,
+ RGWSI_MetaBackend::GetParams& params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp,
+ bool get_raw_attrs=false) override;
+ int put_entry(const DoutPrefixProvider *dpp,
+ RGWSI_MetaBackend::Context *ctx,
+ const std::string& key,
+ RGWSI_MetaBackend::PutParams& params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y) override;
+ int remove_entry(const DoutPrefixProvider *dpp,
+ RGWSI_MetaBackend::Context *ctx,
+ const std::string& key,
+ RGWSI_MetaBackend::RemoveParams& params,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y) override;
+
+ int list_init(const DoutPrefixProvider *dpp, RGWSI_MetaBackend::Context *_ctx, const std::string& marker) override;
+ int list_next(const DoutPrefixProvider *dpp,
+ RGWSI_MetaBackend::Context *_ctx,
+ int max, std::list<std::string> *keys,
+ bool *truncated) override;
+ int list_get_marker(RGWSI_MetaBackend::Context *ctx,
+ std::string *marker) override;
+
+ int get_shard_id(RGWSI_MetaBackend::Context *ctx,
+ const std::string& key,
+ int *shard_id) override;
+
+ int call(std::optional<RGWSI_MetaBackend_CtxParams> opt,
+ std::function<int(RGWSI_MetaBackend::Context *)> f) override;
+};
+
+
+class RGWSI_MetaBackend_Handler_SObj : public RGWSI_MetaBackend_Handler {
+ friend class RGWSI_MetaBackend_SObj::Context_SObj;
+
+ RGWSI_MBSObj_Handler_Module *module{nullptr};
+
+public:
+ RGWSI_MetaBackend_Handler_SObj(RGWSI_MetaBackend *be) :
+ RGWSI_MetaBackend_Handler(be) {}
+
+ void set_module(RGWSI_MBSObj_Handler_Module *_module) {
+ module = _module;
+ }
+
+ RGWSI_MBSObj_Handler_Module *get_module() {
+ return module;
+ }
+};
diff --git a/src/rgw/services/svc_meta_be_types.h b/src/rgw/services/svc_meta_be_types.h
new file mode 100644
index 000000000..4a88a8e0b
--- /dev/null
+++ b/src/rgw/services/svc_meta_be_types.h
@@ -0,0 +1,26 @@
+
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#pragma once
+
+enum RGWSI_META_BE_TYPES {
+ SOBJ = 1,
+ OTP = 2,
+ BUCKET = 3,
+ BI = 4,
+ USER = 5,
+};
+
diff --git a/src/rgw/services/svc_notify.cc b/src/rgw/services/svc_notify.cc
new file mode 100644
index 000000000..43f84ed0a
--- /dev/null
+++ b/src/rgw/services/svc_notify.cc
@@ -0,0 +1,515 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include "include/random.h"
+#include "include/Context.h"
+#include "common/errno.h"
+
+#include "rgw_cache.h"
+#include "svc_notify.h"
+#include "svc_finisher.h"
+#include "svc_zone.h"
+#include "svc_rados.h"
+
+#include "rgw_zone.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+using namespace std;
+
+static string notify_oid_prefix = "notify";
+
+RGWSI_Notify::~RGWSI_Notify()
+{
+ shutdown();
+}
+
+
+class RGWWatcher : public DoutPrefixProvider , public librados::WatchCtx2 {
+ CephContext *cct;
+ RGWSI_Notify *svc;
+ int index;
+ RGWSI_RADOS::Obj obj;
+ uint64_t watch_handle;
+ int register_ret{0};
+ bool unregister_done{false};
+ librados::AioCompletion *register_completion{nullptr};
+
+ class C_ReinitWatch : public Context {
+ RGWWatcher *watcher;
+ public:
+ explicit C_ReinitWatch(RGWWatcher *_watcher) : watcher(_watcher) {}
+ void finish(int r) override {
+ watcher->reinit();
+ }
+ };
+
+ CephContext *get_cct() const override { return cct; }
+ unsigned get_subsys() const override { return dout_subsys; }
+ std::ostream& gen_prefix(std::ostream& out) const override {
+ return out << "rgw watcher librados: ";
+ }
+
+public:
+ RGWWatcher(CephContext *_cct, RGWSI_Notify *s, int i, RGWSI_RADOS::Obj& o) : cct(_cct), svc(s), index(i), obj(o), watch_handle(0) {}
+ void handle_notify(uint64_t notify_id,
+ uint64_t cookie,
+ uint64_t notifier_id,
+ bufferlist& bl) override {
+ ldpp_dout(this, 10) << "RGWWatcher::handle_notify() "
+ << " notify_id " << notify_id
+ << " cookie " << cookie
+ << " notifier " << notifier_id
+ << " bl.length()=" << bl.length() << dendl;
+
+ if (unlikely(svc->inject_notify_timeout_probability == 1) ||
+ (svc->inject_notify_timeout_probability > 0 &&
+ (svc->inject_notify_timeout_probability >
+ ceph::util::generate_random_number(0.0, 1.0)))) {
+ ldpp_dout(this, 0)
+ << "RGWWatcher::handle_notify() dropping notification! "
+ << "If this isn't what you want, set "
+ << "rgw_inject_notify_timeout_probability to zero!" << dendl;
+ return;
+ }
+
+ svc->watch_cb(this, notify_id, cookie, notifier_id, bl);
+
+ bufferlist reply_bl; // empty reply payload
+ obj.notify_ack(notify_id, cookie, reply_bl);
+ }
+ void handle_error(uint64_t cookie, int err) override {
+ ldpp_dout(this, -1) << "RGWWatcher::handle_error cookie " << cookie
+ << " err " << cpp_strerror(err) << dendl;
+ svc->remove_watcher(index);
+ svc->schedule_context(new C_ReinitWatch(this));
+ }
+
+ void reinit() {
+ if(!unregister_done) {
+ int ret = unregister_watch();
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: unregister_watch() returned ret=" << ret << dendl;
+ }
+ }
+ int ret = register_watch();
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: register_watch() returned ret=" << ret << dendl;
+ svc->schedule_context(new C_ReinitWatch(this));
+ return;
+ }
+ }
+
+ int unregister_watch() {
+ int r = svc->unwatch(obj, watch_handle);
+ unregister_done = true;
+ if (r < 0) {
+ return r;
+ }
+ svc->remove_watcher(index);
+ return 0;
+ }
+
+ int register_watch_async() {
+ if (register_completion) {
+ register_completion->release();
+ register_completion = nullptr;
+ }
+ register_completion = librados::Rados::aio_create_completion(nullptr, nullptr);
+ register_ret = obj.aio_watch(register_completion, &watch_handle, this);
+ if (register_ret < 0) {
+ register_completion->release();
+ return register_ret;
+ }
+ return 0;
+ }
+
+ int register_watch_finish() {
+ if (register_ret < 0) {
+ return register_ret;
+ }
+ if (!register_completion) {
+ return -EINVAL;
+ }
+ register_completion->wait_for_complete();
+ int r = register_completion->get_return_value();
+ register_completion->release();
+ register_completion = nullptr;
+ if (r < 0) {
+ return r;
+ }
+ svc->add_watcher(index);
+ unregister_done = false;
+ return 0;
+ }
+
+ int register_watch() {
+ int r = obj.watch(&watch_handle, this);
+ if (r < 0) {
+ return r;
+ }
+ svc->add_watcher(index);
+ unregister_done = false;
+ return 0;
+ }
+};
+
+
+class RGWSI_Notify_ShutdownCB : public RGWSI_Finisher::ShutdownCB
+{
+ RGWSI_Notify *svc;
+public:
+ RGWSI_Notify_ShutdownCB(RGWSI_Notify *_svc) : svc(_svc) {}
+ void call() override {
+ svc->shutdown();
+ }
+};
+
+string RGWSI_Notify::get_control_oid(int i)
+{
+ char buf[notify_oid_prefix.size() + 16];
+ snprintf(buf, sizeof(buf), "%s.%d", notify_oid_prefix.c_str(), i);
+
+ return string(buf);
+}
+
+// do not call pick_obj_control before init_watch
+RGWSI_RADOS::Obj RGWSI_Notify::pick_control_obj(const string& key)
+{
+ uint32_t r = ceph_str_hash_linux(key.c_str(), key.size());
+
+ int i = r % num_watchers;
+ return notify_objs[i];
+}
+
+int RGWSI_Notify::init_watch(const DoutPrefixProvider *dpp, optional_yield y)
+{
+ num_watchers = cct->_conf->rgw_num_control_oids;
+
+ bool compat_oid = (num_watchers == 0);
+
+ if (num_watchers <= 0)
+ num_watchers = 1;
+
+ watchers = new RGWWatcher *[num_watchers];
+
+ int error = 0;
+
+ notify_objs.resize(num_watchers);
+
+ for (int i=0; i < num_watchers; i++) {
+ string notify_oid;
+
+ if (!compat_oid) {
+ notify_oid = get_control_oid(i);
+ } else {
+ notify_oid = notify_oid_prefix;
+ }
+
+ notify_objs[i] = rados_svc->handle().obj({control_pool, notify_oid});
+ auto& notify_obj = notify_objs[i];
+
+ int r = notify_obj.open(dpp);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: notify_obj.open() returned r=" << r << dendl;
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+ op.create(false);
+ r = notify_obj.operate(dpp, &op, y);
+ if (r < 0 && r != -EEXIST) {
+ ldpp_dout(dpp, 0) << "ERROR: notify_obj.operate() returned r=" << r << dendl;
+ return r;
+ }
+
+ RGWWatcher *watcher = new RGWWatcher(cct, this, i, notify_obj);
+ watchers[i] = watcher;
+
+ r = watcher->register_watch_async();
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "WARNING: register_watch_aio() returned " << r << dendl;
+ error = r;
+ continue;
+ }
+ }
+
+ for (int i = 0; i < num_watchers; ++i) {
+ int r = watchers[i]->register_watch_finish();
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "WARNING: async watch returned " << r << dendl;
+ error = r;
+ }
+ }
+
+ if (error < 0) {
+ return error;
+ }
+
+ return 0;
+}
+
+void RGWSI_Notify::finalize_watch()
+{
+ for (int i = 0; i < num_watchers; i++) {
+ RGWWatcher *watcher = watchers[i];
+ if (watchers_set.find(i) != watchers_set.end())
+ watcher->unregister_watch();
+ delete watcher;
+ }
+
+ delete[] watchers;
+}
+
+int RGWSI_Notify::do_start(optional_yield y, const DoutPrefixProvider *dpp)
+{
+ int r = zone_svc->start(y, dpp);
+ if (r < 0) {
+ return r;
+ }
+
+ assert(zone_svc->is_started()); /* otherwise there's an ordering problem */
+
+ r = rados_svc->start(y, dpp);
+ if (r < 0) {
+ return r;
+ }
+ r = finisher_svc->start(y, dpp);
+ if (r < 0) {
+ return r;
+ }
+
+ inject_notify_timeout_probability =
+ cct->_conf.get_val<double>("rgw_inject_notify_timeout_probability");
+ max_notify_retries = cct->_conf.get_val<uint64_t>("rgw_max_notify_retries");
+
+ control_pool = zone_svc->get_zone_params().control_pool;
+
+ int ret = init_watch(dpp, y);
+ if (ret < 0) {
+ ldpp_dout(dpp, -1) << "ERROR: failed to initialize watch: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ shutdown_cb = new RGWSI_Notify_ShutdownCB(this);
+ int handle;
+ finisher_svc->register_caller(shutdown_cb, &handle);
+ finisher_handle = handle;
+
+ return 0;
+}
+
+void RGWSI_Notify::shutdown()
+{
+ if (finalized) {
+ return;
+ }
+
+ if (finisher_handle) {
+ finisher_svc->unregister_caller(*finisher_handle);
+ }
+ finalize_watch();
+
+ delete shutdown_cb;
+
+ finalized = true;
+}
+
+int RGWSI_Notify::unwatch(RGWSI_RADOS::Obj& obj, uint64_t watch_handle)
+{
+ int r = obj.unwatch(watch_handle);
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: rados->unwatch2() returned r=" << r << dendl;
+ return r;
+ }
+ r = rados_svc->handle().watch_flush();
+ if (r < 0) {
+ ldout(cct, 0) << "ERROR: rados->watch_flush() returned r=" << r << dendl;
+ return r;
+ }
+ return 0;
+}
+
+void RGWSI_Notify::add_watcher(int i)
+{
+ ldout(cct, 20) << "add_watcher() i=" << i << dendl;
+ std::unique_lock l{watchers_lock};
+ watchers_set.insert(i);
+ if (watchers_set.size() == (size_t)num_watchers) {
+ ldout(cct, 2) << "all " << num_watchers << " watchers are set, enabling cache" << dendl;
+ _set_enabled(true);
+ }
+}
+
+void RGWSI_Notify::remove_watcher(int i)
+{
+ ldout(cct, 20) << "remove_watcher() i=" << i << dendl;
+ std::unique_lock l{watchers_lock};
+ size_t orig_size = watchers_set.size();
+ watchers_set.erase(i);
+ if (orig_size == (size_t)num_watchers &&
+ watchers_set.size() < orig_size) { /* actually removed */
+ ldout(cct, 2) << "removed watcher, disabling cache" << dendl;
+ _set_enabled(false);
+ }
+}
+
+int RGWSI_Notify::watch_cb(const DoutPrefixProvider *dpp,
+ uint64_t notify_id,
+ uint64_t cookie,
+ uint64_t notifier_id,
+ bufferlist& bl)
+{
+ std::shared_lock l{watchers_lock};
+ if (cb) {
+ return cb->watch_cb(dpp, notify_id, cookie, notifier_id, bl);
+ }
+ return 0;
+}
+
+void RGWSI_Notify::set_enabled(bool status)
+{
+ std::unique_lock l{watchers_lock};
+ _set_enabled(status);
+}
+
+void RGWSI_Notify::_set_enabled(bool status)
+{
+ enabled = status;
+ if (cb) {
+ cb->set_enabled(status);
+ }
+}
+
+int RGWSI_Notify::distribute(const DoutPrefixProvider *dpp, const string& key,
+ const RGWCacheNotifyInfo& cni,
+ optional_yield y)
+{
+ /* The RGW uses the control pool to store the watch notify objects.
+ The precedence in RGWSI_Notify::do_start is to call to zone_svc->start and later to init_watch().
+ The first time, RGW starts in the cluster, the RGW will try to create zone and zonegroup system object.
+ In that case RGW will try to distribute the cache before it ran init_watch,
+ which will lead to division by 0 in pick_obj_control (num_watchers is 0).
+ */
+ if (num_watchers > 0) {
+ RGWSI_RADOS::Obj notify_obj = pick_control_obj(key);
+
+ ldpp_dout(dpp, 10) << "distributing notification oid=" << notify_obj.get_ref().obj
+ << " cni=" << cni << dendl;
+ return robust_notify(dpp, notify_obj, cni, y);
+ }
+ return 0;
+}
+
+namespace librados {
+
+static std::ostream& operator<<(std::ostream& out, const notify_timeout_t& t)
+{
+ return out << t.notifier_id << ':' << t.cookie;
+}
+
+} // namespace librados
+
+using timeout_vector = std::vector<librados::notify_timeout_t>;
+
+static timeout_vector decode_timeouts(const bufferlist& bl)
+{
+ using ceph::decode;
+ auto p = bl.begin();
+
+ // decode and discard the acks
+ uint32_t num_acks;
+ decode(num_acks, p);
+ for (auto i = 0u; i < num_acks; ++i) {
+ std::pair<uint64_t, uint64_t> id;
+ decode(id, p);
+ // discard the payload
+ uint32_t blen;
+ decode(blen, p);
+ p += blen;
+ }
+
+ // decode and return the timeouts
+ uint32_t num_timeouts;
+ decode(num_timeouts, p);
+
+ timeout_vector timeouts;
+ for (auto i = 0u; i < num_timeouts; ++i) {
+ std::pair<uint64_t, uint64_t> id;
+ decode(id, p);
+ timeouts.push_back({id.first, id.second});
+ }
+ return timeouts;
+}
+
+int RGWSI_Notify::robust_notify(const DoutPrefixProvider *dpp,
+ RGWSI_RADOS::Obj& notify_obj,
+ const RGWCacheNotifyInfo& cni,
+ optional_yield y)
+{
+ bufferlist bl, rbl;
+ encode(cni, bl);
+
+ // First, try to send, without being fancy about it.
+ auto r = notify_obj.notify(dpp, bl, 0, &rbl, y);
+
+ if (r < 0) {
+ timeout_vector timeouts;
+ try {
+ timeouts = decode_timeouts(rbl);
+ } catch (const buffer::error& e) {
+ ldpp_dout(dpp, 0) << "robust_notify failed to decode notify response: "
+ << e.what() << dendl;
+ }
+
+ ldpp_dout(dpp, 1) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " Watchers " << timeouts << " did not respond."
+ << " Notify failed on object " << cni.obj << ": "
+ << cpp_strerror(-r) << dendl;
+ }
+
+ // If we timed out, get serious.
+ if (r == -ETIMEDOUT) {
+ RGWCacheNotifyInfo info;
+ info.op = INVALIDATE_OBJ;
+ info.obj = cni.obj;
+ bufferlist retrybl;
+ encode(info, retrybl);
+
+ for (auto tries = 0u;
+ r == -ETIMEDOUT && tries < max_notify_retries;
+ ++tries) {
+ ldpp_dout(dpp, 1) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " Invalidating obj=" << info.obj << " tries="
+ << tries << dendl;
+ r = notify_obj.notify(dpp, retrybl, 0, &rbl, y);
+ if (r < 0) {
+ timeout_vector timeouts;
+ try {
+ timeouts = decode_timeouts(rbl);
+ } catch (const buffer::error& e) {
+ ldpp_dout(dpp, 0) << "robust_notify failed to decode notify response: "
+ << e.what() << dendl;
+ }
+
+ ldpp_dout(dpp, 1) << __PRETTY_FUNCTION__ << ":" << __LINE__
+ << " Watchers " << timeouts << " did not respond."
+ << " Invalidation attempt " << tries << " failed: "
+ << cpp_strerror(-r) << dendl;
+ }
+ }
+ }
+ return r;
+}
+
+void RGWSI_Notify::register_watch_cb(CB *_cb)
+{
+ std::unique_lock l{watchers_lock};
+ cb = _cb;
+ _set_enabled(enabled);
+}
+
+void RGWSI_Notify::schedule_context(Context *c)
+{
+ finisher_svc->schedule_context(c);
+}
diff --git a/src/rgw/services/svc_notify.h b/src/rgw/services/svc_notify.h
new file mode 100644
index 000000000..f7329136e
--- /dev/null
+++ b/src/rgw/services/svc_notify.h
@@ -0,0 +1,106 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#pragma once
+
+#include "rgw_service.h"
+
+#include "svc_rados.h"
+
+
+class Context;
+
+class RGWSI_Zone;
+class RGWSI_Finisher;
+
+class RGWWatcher;
+class RGWSI_Notify_ShutdownCB;
+struct RGWCacheNotifyInfo;
+
+class RGWSI_Notify : public RGWServiceInstance
+{
+ friend class RGWWatcher;
+ friend class RGWSI_Notify_ShutdownCB;
+ friend class RGWServices_Def;
+
+public:
+ class CB;
+
+private:
+ RGWSI_Zone *zone_svc{nullptr};
+ RGWSI_RADOS *rados_svc{nullptr};
+ RGWSI_Finisher *finisher_svc{nullptr};
+
+ ceph::shared_mutex watchers_lock = ceph::make_shared_mutex("watchers_lock");
+ rgw_pool control_pool;
+
+ int num_watchers{0};
+ RGWWatcher **watchers{nullptr};
+ std::set<int> watchers_set;
+ std::vector<RGWSI_RADOS::Obj> notify_objs;
+
+ bool enabled{false};
+
+ double inject_notify_timeout_probability{0};
+ uint64_t max_notify_retries = 10;
+
+ std::string get_control_oid(int i);
+ RGWSI_RADOS::Obj pick_control_obj(const std::string& key);
+
+ CB *cb{nullptr};
+
+ std::optional<int> finisher_handle;
+ RGWSI_Notify_ShutdownCB *shutdown_cb{nullptr};
+
+ bool finalized{false};
+
+ int init_watch(const DoutPrefixProvider *dpp, optional_yield y);
+ void finalize_watch();
+
+ void init(RGWSI_Zone *_zone_svc,
+ RGWSI_RADOS *_rados_svc,
+ RGWSI_Finisher *_finisher_svc) {
+ zone_svc = _zone_svc;
+ rados_svc = _rados_svc;
+ finisher_svc = _finisher_svc;
+ }
+ int do_start(optional_yield, const DoutPrefixProvider *dpp) override;
+ void shutdown() override;
+
+ int unwatch(RGWSI_RADOS::Obj& obj, uint64_t watch_handle);
+ void add_watcher(int i);
+ void remove_watcher(int i);
+
+ int watch_cb(const DoutPrefixProvider *dpp,
+ uint64_t notify_id,
+ uint64_t cookie,
+ uint64_t notifier_id,
+ bufferlist& bl);
+ void _set_enabled(bool status);
+ void set_enabled(bool status);
+
+ int robust_notify(const DoutPrefixProvider *dpp, RGWSI_RADOS::Obj& notify_obj,
+ const RGWCacheNotifyInfo& bl, optional_yield y);
+
+ void schedule_context(Context *c);
+public:
+ RGWSI_Notify(CephContext *cct): RGWServiceInstance(cct) {}
+
+ virtual ~RGWSI_Notify() override;
+
+ class CB {
+ public:
+ virtual ~CB() {}
+ virtual int watch_cb(const DoutPrefixProvider *dpp,
+ uint64_t notify_id,
+ uint64_t cookie,
+ uint64_t notifier_id,
+ bufferlist& bl) = 0;
+ virtual void set_enabled(bool status) = 0;
+ };
+
+ int distribute(const DoutPrefixProvider *dpp, const std::string& key, const RGWCacheNotifyInfo& bl,
+ optional_yield y);
+
+ void register_watch_cb(CB *cb);
+};
diff --git a/src/rgw/services/svc_otp.cc b/src/rgw/services/svc_otp.cc
new file mode 100644
index 000000000..81d8d5711
--- /dev/null
+++ b/src/rgw/services/svc_otp.cc
@@ -0,0 +1,186 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include "svc_otp.h"
+#include "svc_zone.h"
+#include "svc_meta.h"
+#include "svc_meta_be_sobj.h"
+
+#include "rgw_zone.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+using namespace std;
+
+class RGW_MB_Handler_Module_OTP : public RGWSI_MBSObj_Handler_Module {
+ RGWSI_Zone *zone_svc;
+ string prefix;
+public:
+ RGW_MB_Handler_Module_OTP(RGWSI_Zone *_zone_svc) : RGWSI_MBSObj_Handler_Module("otp"),
+ zone_svc(_zone_svc) {}
+
+ void get_pool_and_oid(const string& key, rgw_pool *pool, string *oid) override {
+ if (pool) {
+ *pool = zone_svc->get_zone_params().otp_pool;
+ }
+
+ if (oid) {
+ *oid = key;
+ }
+ }
+
+ const string& get_oid_prefix() override {
+ return prefix;
+ }
+
+ bool is_valid_oid(const string& oid) override {
+ return true;
+ }
+
+ string key_to_oid(const string& key) override {
+ return key;
+ }
+
+ string oid_to_key(const string& oid) override {
+ return oid;
+ }
+};
+
+RGWSI_OTP::RGWSI_OTP(CephContext *cct): RGWServiceInstance(cct) {
+}
+
+RGWSI_OTP::~RGWSI_OTP() {
+}
+
+void RGWSI_OTP::init(RGWSI_Zone *_zone_svc,
+ RGWSI_Meta *_meta_svc,
+ RGWSI_MetaBackend *_meta_be_svc)
+{
+ svc.otp = this;
+ svc.zone = _zone_svc;
+ svc.meta = _meta_svc;
+ svc.meta_be = _meta_be_svc;
+}
+
+int RGWSI_OTP::do_start(optional_yield, const DoutPrefixProvider *dpp)
+{
+ /* create first backend handler for bucket entrypoints */
+
+ RGWSI_MetaBackend_Handler *_otp_be_handler;
+
+ int r = svc.meta->create_be_handler(RGWSI_MetaBackend::Type::MDBE_OTP, &_otp_be_handler);
+ if (r < 0) {
+ ldout(ctx(), 0) << "ERROR: failed to create be handler: r=" << r << dendl;
+ return r;
+ }
+
+ be_handler = _otp_be_handler;
+
+ RGWSI_MetaBackend_Handler_OTP *otp_be_handler = static_cast<RGWSI_MetaBackend_Handler_OTP *>(_otp_be_handler);
+
+ auto otp_be_module = new RGW_MB_Handler_Module_OTP(svc.zone);
+ be_module.reset(otp_be_module);
+ otp_be_handler->set_module(otp_be_module);
+
+ return 0;
+}
+
+int RGWSI_OTP::read_all(RGWSI_OTP_BE_Ctx& ctx,
+ const string& key,
+ otp_devices_list_t *devices,
+ real_time *pmtime,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y, const DoutPrefixProvider *dpp)
+{
+ RGWSI_MBOTP_GetParams params;
+ params.pdevices = devices;
+ params.pmtime = pmtime;
+
+ int ret = svc.meta_be->get_entry(ctx.get(), key, params, objv_tracker, y, dpp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWSI_OTP::read_all(RGWSI_OTP_BE_Ctx& ctx,
+ const rgw_user& uid,
+ otp_devices_list_t *devices,
+ real_time *pmtime,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp)
+{
+ return read_all(ctx,
+ uid.to_str(),
+ devices,
+ pmtime,
+ objv_tracker,
+ y,
+ dpp);
+}
+
+int RGWSI_OTP::store_all(const DoutPrefixProvider *dpp,
+ RGWSI_OTP_BE_Ctx& ctx,
+ const string& key,
+ const otp_devices_list_t& devices,
+ real_time mtime,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y)
+{
+ RGWSI_MBOTP_PutParams params;
+ params.mtime = mtime;
+ params.devices = devices;
+
+ int ret = svc.meta_be->put_entry(dpp, ctx.get(), key, params, objv_tracker, y);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWSI_OTP::store_all(const DoutPrefixProvider *dpp,
+ RGWSI_OTP_BE_Ctx& ctx,
+ const rgw_user& uid,
+ const otp_devices_list_t& devices,
+ real_time mtime,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y)
+{
+ return store_all(dpp, ctx,
+ uid.to_str(),
+ devices,
+ mtime,
+ objv_tracker,
+ y);
+}
+
+int RGWSI_OTP::remove_all(const DoutPrefixProvider *dpp,
+ RGWSI_OTP_BE_Ctx& ctx,
+ const string& key,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y)
+{
+ RGWSI_MBOTP_RemoveParams params;
+
+ int ret = svc.meta_be->remove_entry(dpp, ctx.get(), key, params, objv_tracker, y);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWSI_OTP::remove_all(const DoutPrefixProvider *dpp,
+ RGWSI_OTP_BE_Ctx& ctx,
+ const rgw_user& uid,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y)
+{
+ return remove_all(dpp,ctx,
+ uid.to_str(),
+ objv_tracker,
+ y);
+}
diff --git a/src/rgw/services/svc_otp.h b/src/rgw/services/svc_otp.h
new file mode 100644
index 000000000..e639c2c92
--- /dev/null
+++ b/src/rgw/services/svc_otp.h
@@ -0,0 +1,95 @@
+
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#pragma once
+
+#include "cls/otp/cls_otp_types.h"
+
+#include "rgw_service.h"
+
+#include "svc_otp_types.h"
+#include "svc_meta_be_otp.h"
+
+class RGWSI_Zone;
+
+class RGWSI_OTP : public RGWServiceInstance
+{
+ RGWSI_OTP_BE_Handler be_handler;
+ std::unique_ptr<RGWSI_MetaBackend::Module> be_module;
+
+ int do_start(optional_yield, const DoutPrefixProvider *dpp) override;
+
+public:
+ struct Svc {
+ RGWSI_OTP *otp{nullptr};
+ RGWSI_Zone *zone{nullptr};
+ RGWSI_Meta *meta{nullptr};
+ RGWSI_MetaBackend *meta_be{nullptr};
+ } svc;
+
+ RGWSI_OTP(CephContext *cct);
+ ~RGWSI_OTP();
+
+ RGWSI_OTP_BE_Handler& get_be_handler() {
+ return be_handler;
+ }
+
+ void init(RGWSI_Zone *_zone_svc,
+ RGWSI_Meta *_meta_svc,
+ RGWSI_MetaBackend *_meta_be_svc);
+
+ int read_all(RGWSI_OTP_BE_Ctx& ctx,
+ const std::string& key,
+ otp_devices_list_t *devices,
+ real_time *pmtime,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp);
+ int read_all(RGWSI_OTP_BE_Ctx& ctx,
+ const rgw_user& uid,
+ otp_devices_list_t *devices,
+ real_time *pmtime,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp);
+ int store_all(const DoutPrefixProvider *dpp,
+ RGWSI_OTP_BE_Ctx& ctx,
+ const std::string& key,
+ const otp_devices_list_t& devices,
+ real_time mtime,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y);
+ int store_all(const DoutPrefixProvider *dpp,
+ RGWSI_OTP_BE_Ctx& ctx,
+ const rgw_user& uid,
+ const otp_devices_list_t& devices,
+ real_time mtime,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y);
+ int remove_all(const DoutPrefixProvider *dpp,
+ RGWSI_OTP_BE_Ctx& ctx,
+ const std::string& key,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y);
+ int remove_all(const DoutPrefixProvider *dpp,
+ RGWSI_OTP_BE_Ctx& ctx,
+ const rgw_user& uid,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y);
+};
+
+
diff --git a/src/rgw/services/svc_otp_types.h b/src/rgw/services/svc_otp_types.h
new file mode 100644
index 000000000..60e2a79d6
--- /dev/null
+++ b/src/rgw/services/svc_otp_types.h
@@ -0,0 +1,29 @@
+
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#pragma once
+
+#include "common/ptr_wrapper.h"
+
+#include "svc_meta_be.h"
+#include "svc_meta_be_types.h"
+
+class RGWSI_MetaBackend_Handler;
+
+using RGWSI_OTP_BE_Handler = ptr_wrapper<RGWSI_MetaBackend_Handler, RGWSI_META_BE_TYPES::OTP>;
+using RGWSI_OTP_BE_Ctx = ptr_wrapper<RGWSI_MetaBackend::Context, RGWSI_META_BE_TYPES::OTP>;
+
diff --git a/src/rgw/services/svc_quota.cc b/src/rgw/services/svc_quota.cc
new file mode 100644
index 000000000..3108a1173
--- /dev/null
+++ b/src/rgw/services/svc_quota.cc
@@ -0,0 +1,18 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include "svc_quota.h"
+#include "svc_zone.h"
+
+#include "rgw_zone.h"
+
+const RGWQuotaInfo& RGWSI_Quota::get_bucket_quota() const
+{
+ return zone_svc->get_current_period().get_config().quota.bucket_quota;
+}
+
+const RGWQuotaInfo& RGWSI_Quota::get_user_quota() const
+{
+ return zone_svc->get_current_period().get_config().quota.user_quota;
+}
+
diff --git a/src/rgw/services/svc_quota.h b/src/rgw/services/svc_quota.h
new file mode 100644
index 000000000..81aa0e1bd
--- /dev/null
+++ b/src/rgw/services/svc_quota.h
@@ -0,0 +1,22 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#pragma once
+
+#include "rgw_service.h"
+
+
+class RGWSI_Quota : public RGWServiceInstance
+{
+ RGWSI_Zone *zone_svc{nullptr};
+
+public:
+ RGWSI_Quota(CephContext *cct): RGWServiceInstance(cct) {}
+
+ void init(RGWSI_Zone *_zone_svc) {
+ zone_svc = _zone_svc;
+ }
+
+ const RGWQuotaInfo& get_bucket_quota() const;
+ const RGWQuotaInfo& get_user_quota() const;
+};
diff --git a/src/rgw/services/svc_rados.cc b/src/rgw/services/svc_rados.cc
new file mode 100644
index 000000000..99f400f42
--- /dev/null
+++ b/src/rgw/services/svc_rados.cc
@@ -0,0 +1,445 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include "svc_rados.h"
+
+#include "include/rados/librados.hpp"
+#include "common/errno.h"
+#include "osd/osd_types.h"
+#include "rgw_tools.h"
+#include "rgw_cr_rados.h"
+
+#include "auth/AuthRegistry.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+using namespace std;
+
+RGWSI_RADOS::RGWSI_RADOS(CephContext *cct) : RGWServiceInstance(cct)
+{
+}
+
+RGWSI_RADOS::~RGWSI_RADOS()
+{
+}
+
+int RGWSI_RADOS::do_start(optional_yield, const DoutPrefixProvider *dpp)
+{
+ int ret = rados.init_with_context(cct);
+ if (ret < 0) {
+ return ret;
+ }
+ ret = rados.connect();
+ if (ret < 0) {
+ return ret;
+ }
+
+ async_processor.reset(new RGWAsyncRadosProcessor(cct, cct->_conf->rgw_num_async_rados_threads));
+ async_processor->start();
+
+ return 0;
+}
+
+void RGWSI_RADOS::shutdown()
+{
+ if (async_processor) {
+ async_processor->stop();
+ }
+ rados.shutdown();
+}
+
+void RGWSI_RADOS::stop_processor()
+{
+ if (async_processor) {
+ async_processor->stop();
+ }
+}
+
+librados::Rados* RGWSI_RADOS::get_rados_handle()
+{
+ return &rados;
+}
+
+std::string RGWSI_RADOS::cluster_fsid()
+{
+ std::string fsid;
+ (void) get_rados_handle()->cluster_fsid(&fsid);
+ return fsid;
+}
+
+uint64_t RGWSI_RADOS::instance_id()
+{
+ return get_rados_handle()->get_instance_id();
+}
+
+int RGWSI_RADOS::open_pool_ctx(const DoutPrefixProvider *dpp, const rgw_pool& pool, librados::IoCtx& io_ctx,
+ const OpenParams& params)
+{
+ return rgw_init_ioctx(dpp, get_rados_handle(), pool, io_ctx,
+ params.create,
+ params.mostly_omap);
+}
+
+int RGWSI_RADOS::pool_iterate(const DoutPrefixProvider *dpp,
+ librados::IoCtx& io_ctx,
+ librados::NObjectIterator& iter,
+ uint32_t num, vector<rgw_bucket_dir_entry>& objs,
+ RGWAccessListFilter *filter,
+ bool *is_truncated)
+{
+ if (iter == io_ctx.nobjects_end())
+ return -ENOENT;
+
+ uint32_t i;
+
+ for (i = 0; i < num && iter != io_ctx.nobjects_end(); ++i, ++iter) {
+ rgw_bucket_dir_entry e;
+
+ string oid = iter->get_oid();
+ ldpp_dout(dpp, 20) << "RGWRados::pool_iterate: got " << oid << dendl;
+
+ // fill it in with initial values; we may correct later
+ if (filter && !filter->filter(oid, oid))
+ continue;
+
+ e.key = oid;
+ objs.push_back(e);
+ }
+
+ if (is_truncated)
+ *is_truncated = (iter != io_ctx.nobjects_end());
+
+ return objs.size();
+}
+
+RGWSI_RADOS::Obj::Obj(Pool& pool, const string& oid) : rados_svc(pool.rados_svc)
+{
+ ref.pool = pool;
+ ref.obj = rgw_raw_obj(pool.get_pool(), oid);
+}
+
+void RGWSI_RADOS::Obj::init(const rgw_raw_obj& obj)
+{
+ ref.pool = RGWSI_RADOS::Pool(rados_svc, obj.pool);
+ ref.obj = obj;
+}
+
+int RGWSI_RADOS::Obj::open(const DoutPrefixProvider *dpp)
+{
+ int r = ref.pool.open(dpp);
+ if (r < 0) {
+ return r;
+ }
+
+ ref.pool.ioctx().locator_set_key(ref.obj.loc);
+
+ return 0;
+}
+
+int RGWSI_RADOS::Obj::operate(const DoutPrefixProvider *dpp, librados::ObjectWriteOperation *op,
+ optional_yield y, int flags)
+{
+ return rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, op, y, flags);
+}
+
+int RGWSI_RADOS::Obj::operate(const DoutPrefixProvider *dpp, librados::ObjectReadOperation *op,
+ bufferlist *pbl, optional_yield y, int flags)
+{
+ return rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, op, pbl, y, flags);
+}
+
+int RGWSI_RADOS::Obj::aio_operate(librados::AioCompletion *c, librados::ObjectWriteOperation *op)
+{
+ return ref.pool.ioctx().aio_operate(ref.obj.oid, c, op);
+}
+
+int RGWSI_RADOS::Obj::aio_operate(librados::AioCompletion *c, librados::ObjectReadOperation *op,
+ bufferlist *pbl)
+{
+ return ref.pool.ioctx().aio_operate(ref.obj.oid, c, op, pbl);
+}
+
+int RGWSI_RADOS::Obj::watch(uint64_t *handle, librados::WatchCtx2 *ctx)
+{
+ return ref.pool.ioctx().watch2(ref.obj.oid, handle, ctx);
+}
+
+int RGWSI_RADOS::Obj::aio_watch(librados::AioCompletion *c, uint64_t *handle, librados::WatchCtx2 *ctx)
+{
+ return ref.pool.ioctx().aio_watch(ref.obj.oid, c, handle, ctx);
+}
+
+int RGWSI_RADOS::Obj::unwatch(uint64_t handle)
+{
+ return ref.pool.ioctx().unwatch2(handle);
+}
+
+int RGWSI_RADOS::Obj::notify(const DoutPrefixProvider *dpp, bufferlist& bl, uint64_t timeout_ms,
+ bufferlist *pbl, optional_yield y)
+{
+ return rgw_rados_notify(dpp, ref.pool.ioctx(), ref.obj.oid, bl, timeout_ms, pbl, y);
+}
+
+void RGWSI_RADOS::Obj::notify_ack(uint64_t notify_id,
+ uint64_t cookie,
+ bufferlist& bl)
+{
+ ref.pool.ioctx().notify_ack(ref.obj.oid, notify_id, cookie, bl);
+}
+
+uint64_t RGWSI_RADOS::Obj::get_last_version()
+{
+ return ref.pool.ioctx().get_last_version();
+}
+
+int RGWSI_RADOS::Pool::create(const DoutPrefixProvider *dpp)
+{
+ librados::Rados *rad = rados_svc->get_rados_handle();
+ int r = rad->pool_create(pool.name.c_str());
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "WARNING: pool_create returned " << r << dendl;
+ return r;
+ }
+ librados::IoCtx io_ctx;
+ r = rad->ioctx_create(pool.name.c_str(), io_ctx);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "WARNING: ioctx_create returned " << r << dendl;
+ return r;
+ }
+ r = io_ctx.application_enable(pg_pool_t::APPLICATION_NAME_RGW, false);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "WARNING: application_enable returned " << r << dendl;
+ return r;
+ }
+ return 0;
+}
+
+int RGWSI_RADOS::Pool::create(const DoutPrefixProvider *dpp, const vector<rgw_pool>& pools, vector<int> *retcodes)
+{
+ vector<librados::PoolAsyncCompletion *> completions;
+ vector<int> rets;
+
+ librados::Rados *rad = rados_svc->get_rados_handle();
+ for (auto iter = pools.begin(); iter != pools.end(); ++iter) {
+ librados::PoolAsyncCompletion *c = librados::Rados::pool_async_create_completion();
+ completions.push_back(c);
+ auto& pool = *iter;
+ int ret = rad->pool_create_async(pool.name.c_str(), c);
+ rets.push_back(ret);
+ }
+
+ vector<int>::iterator riter;
+ vector<librados::PoolAsyncCompletion *>::iterator citer;
+
+ bool error = false;
+ ceph_assert(rets.size() == completions.size());
+ for (riter = rets.begin(), citer = completions.begin(); riter != rets.end(); ++riter, ++citer) {
+ int r = *riter;
+ librados::PoolAsyncCompletion *c = *citer;
+ if (r == 0) {
+ c->wait();
+ r = c->get_return_value();
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "WARNING: async pool_create returned " << r << dendl;
+ error = true;
+ }
+ }
+ c->release();
+ retcodes->push_back(r);
+ }
+ if (error) {
+ return 0;
+ }
+
+ std::vector<librados::IoCtx> io_ctxs;
+ retcodes->clear();
+ for (auto pool : pools) {
+ io_ctxs.emplace_back();
+ int ret = rad->ioctx_create(pool.name.c_str(), io_ctxs.back());
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "WARNING: ioctx_create returned " << ret << dendl;
+ error = true;
+ }
+ retcodes->push_back(ret);
+ }
+ if (error) {
+ return 0;
+ }
+
+ completions.clear();
+ for (auto &io_ctx : io_ctxs) {
+ librados::PoolAsyncCompletion *c =
+ librados::Rados::pool_async_create_completion();
+ completions.push_back(c);
+ int ret = io_ctx.application_enable_async(pg_pool_t::APPLICATION_NAME_RGW,
+ false, c);
+ ceph_assert(ret == 0);
+ }
+
+ retcodes->clear();
+ for (auto c : completions) {
+ c->wait();
+ int ret = c->get_return_value();
+ if (ret == -EOPNOTSUPP) {
+ ret = 0;
+ } else if (ret < 0) {
+ ldpp_dout(dpp, 0) << "WARNING: async application_enable returned " << ret
+ << dendl;
+ error = true;
+ }
+ c->release();
+ retcodes->push_back(ret);
+ }
+ return 0;
+}
+
+int RGWSI_RADOS::Pool::lookup()
+{
+ librados::Rados *rad = rados_svc->get_rados_handle();
+ int ret = rad->pool_lookup(pool.name.c_str());
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWSI_RADOS::Pool::open(const DoutPrefixProvider *dpp, const OpenParams& params)
+{
+ return rados_svc->open_pool_ctx(dpp, pool, state.ioctx, params);
+}
+
+int RGWSI_RADOS::Pool::List::init(const DoutPrefixProvider *dpp, const string& marker, RGWAccessListFilter *filter)
+{
+ if (ctx.initialized) {
+ return -EINVAL;
+ }
+
+ if (!pool) {
+ return -EINVAL;
+ }
+
+ int r = pool->rados_svc->open_pool_ctx(dpp, pool->pool, ctx.ioctx);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::ObjectCursor oc;
+ if (!oc.from_str(marker)) {
+ ldpp_dout(dpp, 10) << "failed to parse cursor: " << marker << dendl;
+ return -EINVAL;
+ }
+
+ try {
+ ctx.iter = ctx.ioctx.nobjects_begin(oc);
+ ctx.filter = filter;
+ ctx.initialized = true;
+ return 0;
+ } catch (const std::system_error& e) {
+ r = -e.code().value();
+ ldpp_dout(dpp, 10) << "nobjects_begin threw " << e.what()
+ << ", returning " << r << dendl;
+ return r;
+ } catch (const std::exception& e) {
+ ldpp_dout(dpp, 10) << "nobjects_begin threw " << e.what()
+ << ", returning -5" << dendl;
+ return -EIO;
+ }
+}
+
+int RGWSI_RADOS::Pool::List::get_next(const DoutPrefixProvider *dpp,
+ int max,
+ std::vector<string> *oids,
+ bool *is_truncated)
+{
+ if (!ctx.initialized) {
+ return -EINVAL;
+ }
+ vector<rgw_bucket_dir_entry> objs;
+ int r = pool->rados_svc->pool_iterate(dpp, ctx.ioctx, ctx.iter, max, objs, ctx.filter, is_truncated);
+ if (r < 0) {
+ if(r != -ENOENT) {
+ ldpp_dout(dpp, 10) << "failed to list objects pool_iterate returned r=" << r << dendl;
+ }
+ return r;
+ }
+
+ for (auto& o : objs) {
+ oids->push_back(o.key.name);
+ }
+
+ return oids->size();
+}
+
+RGWSI_RADOS::Obj RGWSI_RADOS::Handle::obj(const rgw_raw_obj& o)
+{
+ return RGWSI_RADOS::Obj(rados_svc, o);
+}
+int RGWSI_RADOS::Handle::watch_flush()
+{
+ librados::Rados *rad = rados_svc->get_rados_handle();
+ return rad->watch_flush();
+}
+
+int RGWSI_RADOS::Handle::mon_command(std::string cmd,
+ const bufferlist& inbl,
+ bufferlist *outbl,
+ std::string *outs)
+{
+ librados::Rados *rad = rados_svc->get_rados_handle();
+ return rad->mon_command(cmd, inbl, outbl, outs);
+}
+
+int RGWSI_RADOS::Pool::List::get_marker(string *marker)
+{
+ if (!ctx.initialized) {
+ return -EINVAL;
+ }
+
+ *marker = ctx.iter.get_cursor().to_str();
+ return 0;
+}
+
+int RGWSI_RADOS::clog_warn(const string& msg)
+{
+ string cmd =
+ "{"
+ "\"prefix\": \"log\", "
+ "\"level\": \"warn\", "
+ "\"logtext\": [\"" + msg + "\"]"
+ "}";
+
+ bufferlist inbl;
+ auto h = handle();
+ return h.mon_command(cmd, inbl, nullptr, nullptr);
+}
+
+bool RGWSI_RADOS::check_secure_mon_conn(const DoutPrefixProvider *dpp) const
+{
+ AuthRegistry reg(cct);
+
+ reg.refresh_config();
+
+ std::vector<uint32_t> methods;
+ std::vector<uint32_t> modes;
+
+ reg.get_supported_methods(CEPH_ENTITY_TYPE_MON, &methods, &modes);
+ ldpp_dout(dpp, 20) << __func__ << "(): auth registy supported: methods=" << methods << " modes=" << modes << dendl;
+
+ for (auto method : methods) {
+ if (!reg.is_secure_method(method)) {
+ ldpp_dout(dpp, 20) << __func__ << "(): method " << method << " is insecure" << dendl;
+ return false;
+ }
+ }
+
+ for (auto mode : modes) {
+ if (!reg.is_secure_mode(mode)) {
+ ldpp_dout(dpp, 20) << __func__ << "(): mode " << mode << " is insecure" << dendl;
+ return false;
+ }
+ }
+
+ return true;
+}
+
diff --git a/src/rgw/services/svc_rados.h b/src/rgw/services/svc_rados.h
new file mode 100644
index 000000000..ede029aa8
--- /dev/null
+++ b/src/rgw/services/svc_rados.h
@@ -0,0 +1,252 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#pragma once
+
+#include "rgw_service.h"
+
+#include "include/rados/librados.hpp"
+#include "common/async/yield_context.h"
+
+class RGWAsyncRadosProcessor;
+
+class RGWAccessListFilter {
+public:
+ virtual ~RGWAccessListFilter() {}
+ virtual bool filter(const std::string& name, std::string& key) = 0;
+};
+
+struct RGWAccessListFilterPrefix : public RGWAccessListFilter {
+ std::string prefix;
+
+ explicit RGWAccessListFilterPrefix(const std::string& _prefix) : prefix(_prefix) {}
+ bool filter(const std::string& name, std::string& key) override {
+ return (prefix.compare(key.substr(0, prefix.size())) == 0);
+ }
+};
+
+class RGWSI_RADOS : public RGWServiceInstance
+{
+ librados::Rados rados;
+ std::unique_ptr<RGWAsyncRadosProcessor> async_processor;
+
+ int do_start(optional_yield, const DoutPrefixProvider *dpp) override;
+
+public:
+ struct OpenParams {
+ bool create{true};
+ bool mostly_omap{false};
+
+ OpenParams() {}
+
+ OpenParams& set_create(bool _create) {
+ create = _create;
+ return *this;
+ }
+ OpenParams& set_mostly_omap(bool _mostly_omap) {
+ mostly_omap = _mostly_omap;
+ return *this;
+ }
+ };
+
+private:
+ int open_pool_ctx(const DoutPrefixProvider *dpp, const rgw_pool& pool, librados::IoCtx& io_ctx,
+ const OpenParams& params = {});
+ int pool_iterate(const DoutPrefixProvider *dpp,
+ librados::IoCtx& ioctx,
+ librados::NObjectIterator& iter,
+ uint32_t num, std::vector<rgw_bucket_dir_entry>& objs,
+ RGWAccessListFilter *filter,
+ bool *is_truncated);
+
+public:
+ RGWSI_RADOS(CephContext *cct);
+ ~RGWSI_RADOS();
+ librados::Rados* get_rados_handle();
+
+ void init() {}
+ void shutdown() override;
+ void stop_processor();
+
+ std::string cluster_fsid();
+ uint64_t instance_id();
+ bool check_secure_mon_conn(const DoutPrefixProvider *dpp) const;
+
+ RGWAsyncRadosProcessor *get_async_processor() {
+ return async_processor.get();
+ }
+
+ int clog_warn(const std::string& msg);
+
+ class Handle;
+
+ class Pool {
+ friend class RGWSI_RADOS;
+ friend Handle;
+ friend class Obj;
+
+ RGWSI_RADOS *rados_svc{nullptr};
+ rgw_pool pool;
+
+ struct State {
+ librados::IoCtx ioctx;
+ } state;
+
+ Pool(RGWSI_RADOS *_rados_svc,
+ const rgw_pool& _pool) : rados_svc(_rados_svc),
+ pool(_pool) {}
+
+ Pool(RGWSI_RADOS *_rados_svc) : rados_svc(_rados_svc) {}
+ public:
+ Pool() {}
+
+ int create(const DoutPrefixProvider *dpp);
+ int create(const DoutPrefixProvider *dpp, const std::vector<rgw_pool>& pools, std::vector<int> *retcodes);
+ int lookup();
+ int open(const DoutPrefixProvider *dpp, const OpenParams& params = {});
+
+ const rgw_pool& get_pool() {
+ return pool;
+ }
+
+ librados::IoCtx& ioctx() & {
+ return state.ioctx;
+ }
+
+ librados::IoCtx&& ioctx() && {
+ return std::move(state.ioctx);
+ }
+
+ struct List {
+ Pool *pool{nullptr};
+
+ struct Ctx {
+ bool initialized{false};
+ librados::IoCtx ioctx;
+ librados::NObjectIterator iter;
+ RGWAccessListFilter *filter{nullptr};
+ } ctx;
+
+ List() {}
+ List(Pool *_pool) : pool(_pool) {}
+
+ int init(const DoutPrefixProvider *dpp, const std::string& marker, RGWAccessListFilter *filter = nullptr);
+ int get_next(const DoutPrefixProvider *dpp, int max,
+ std::vector<std::string> *oids,
+ bool *is_truncated);
+
+ int get_marker(std::string *marker);
+ };
+
+ List op() {
+ return List(this);
+ }
+
+ friend List;
+ };
+
+
+ struct rados_ref {
+ RGWSI_RADOS::Pool pool;
+ rgw_raw_obj obj;
+ };
+
+ class Obj {
+ friend class RGWSI_RADOS;
+ friend class Handle;
+
+ RGWSI_RADOS *rados_svc{nullptr};
+ rados_ref ref;
+
+ void init(const rgw_raw_obj& obj);
+
+ Obj(RGWSI_RADOS *_rados_svc, const rgw_raw_obj& _obj)
+ : rados_svc(_rados_svc) {
+ init(_obj);
+ }
+
+ Obj(Pool& pool, const std::string& oid);
+
+ public:
+ Obj() {}
+
+ int open(const DoutPrefixProvider *dpp);
+
+ int operate(const DoutPrefixProvider *dpp, librados::ObjectWriteOperation *op, optional_yield y,
+ int flags = 0);
+ int operate(const DoutPrefixProvider *dpp, librados::ObjectReadOperation *op, bufferlist *pbl,
+ optional_yield y, int flags = 0);
+ int aio_operate(librados::AioCompletion *c, librados::ObjectWriteOperation *op);
+ int aio_operate(librados::AioCompletion *c, librados::ObjectReadOperation *op,
+ bufferlist *pbl);
+
+ int watch(uint64_t *handle, librados::WatchCtx2 *ctx);
+ int aio_watch(librados::AioCompletion *c, uint64_t *handle, librados::WatchCtx2 *ctx);
+ int unwatch(uint64_t handle);
+ int notify(const DoutPrefixProvider *dpp, bufferlist& bl, uint64_t timeout_ms,
+ bufferlist *pbl, optional_yield y);
+ void notify_ack(uint64_t notify_id,
+ uint64_t cookie,
+ bufferlist& bl);
+
+ uint64_t get_last_version();
+
+ rados_ref& get_ref() { return ref; }
+ const rados_ref& get_ref() const { return ref; }
+
+ const rgw_raw_obj& get_raw_obj() const {
+ return ref.obj;
+ }
+ };
+
+ class Handle {
+ friend class RGWSI_RADOS;
+
+ RGWSI_RADOS *rados_svc{nullptr};
+
+ Handle(RGWSI_RADOS *_rados_svc) : rados_svc(_rados_svc) {}
+ public:
+ Obj obj(const rgw_raw_obj& o);
+
+ Pool pool(const rgw_pool& p) {
+ return Pool(rados_svc, p);
+ }
+
+ int watch_flush();
+
+ int mon_command(std::string cmd,
+ const bufferlist& inbl,
+ bufferlist *outbl,
+ std::string *outs);
+ };
+
+ Handle handle() {
+ return Handle(this);
+ }
+
+ Obj obj(const rgw_raw_obj& o) {
+ return Obj(this, o);
+ }
+
+ Obj obj(Pool& pool, const std::string& oid) {
+ return Obj(pool, oid);
+ }
+
+ Pool pool() {
+ return Pool(this);
+ }
+
+ Pool pool(const rgw_pool& p) {
+ return Pool(this, p);
+ }
+
+ friend Obj;
+ friend Pool;
+ friend Pool::List;
+};
+
+using rgw_rados_ref = RGWSI_RADOS::rados_ref;
+
+inline std::ostream& operator<<(std::ostream& out, const RGWSI_RADOS::Obj& obj) {
+ return out << obj.get_raw_obj();
+}
diff --git a/src/rgw/services/svc_role_rados.cc b/src/rgw/services/svc_role_rados.cc
new file mode 100644
index 000000000..a84022497
--- /dev/null
+++ b/src/rgw/services/svc_role_rados.cc
@@ -0,0 +1,82 @@
+#include "svc_role_rados.h"
+#include "svc_meta_be_sobj.h"
+#include "svc_meta.h"
+#include "rgw_role.h"
+#include "rgw_zone.h"
+#include "svc_zone.h"
+#include "rgw_tools.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+class RGWSI_Role_Module : public RGWSI_MBSObj_Handler_Module {
+ RGWSI_Role_RADOS::Svc& svc;
+ const std::string prefix;
+public:
+ RGWSI_Role_Module(RGWSI_Role_RADOS::Svc& _svc): RGWSI_MBSObj_Handler_Module("roles"),
+ svc(_svc),
+ prefix(role_oid_prefix) {}
+
+ void get_pool_and_oid(const std::string& key,
+ rgw_pool *pool,
+ std::string *oid) override
+ {
+ if (pool) {
+ *pool = svc.zone->get_zone_params().roles_pool;
+ }
+
+ if (oid) {
+ *oid = key_to_oid(key);
+ }
+ }
+
+ bool is_valid_oid(const std::string& oid) override {
+ return boost::algorithm::starts_with(oid, prefix);
+ }
+
+ std::string key_to_oid(const std::string& key) override {
+ return prefix + key;
+ }
+
+ // This is called after `is_valid_oid` and is assumed to be a valid oid
+ std::string oid_to_key(const std::string& oid) override {
+ return oid.substr(prefix.size());
+ }
+
+ const std::string& get_oid_prefix() {
+ return prefix;
+ }
+};
+
+RGWSI_MetaBackend_Handler* RGWSI_Role_RADOS::get_be_handler()
+{
+ return be_handler;
+}
+
+void RGWSI_Role_RADOS::init(RGWSI_Zone *_zone_svc,
+ RGWSI_Meta *_meta_svc,
+ RGWSI_MetaBackend *_meta_be_svc,
+ RGWSI_SysObj *_sysobj_svc)
+{
+ svc.zone = _zone_svc;
+ svc.meta = _meta_svc;
+ svc.meta_be = _meta_be_svc;
+ svc.sysobj = _sysobj_svc;
+}
+
+int RGWSI_Role_RADOS::do_start(optional_yield y, const DoutPrefixProvider *dpp)
+{
+
+ int r = svc.meta->create_be_handler(RGWSI_MetaBackend::Type::MDBE_SOBJ,
+ &be_handler);
+ if (r < 0) {
+ ldout(ctx(), 0) << "ERROR: failed to create be_handler for Roles: r="
+ << r <<dendl;
+ return r;
+ }
+
+ auto module = new RGWSI_Role_Module(svc);
+ RGWSI_MetaBackend_Handler_SObj* bh= static_cast<RGWSI_MetaBackend_Handler_SObj *>(be_handler);
+ be_module.reset(module);
+ bh->set_module(module);
+ return 0;
+}
diff --git a/src/rgw/services/svc_role_rados.h b/src/rgw/services/svc_role_rados.h
new file mode 100644
index 000000000..d4d3530c2
--- /dev/null
+++ b/src/rgw/services/svc_role_rados.h
@@ -0,0 +1,50 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2020 SUSE LLC
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#pragma once
+
+#include "rgw_service.h"
+#include "rgw_role.h"
+#include "svc_meta_be.h"
+
+class RGWSI_Role_RADOS: public RGWServiceInstance
+{
+ public:
+ struct Svc {
+ RGWSI_Zone *zone{nullptr};
+ RGWSI_Meta *meta{nullptr};
+ RGWSI_MetaBackend *meta_be{nullptr};
+ RGWSI_SysObj *sysobj{nullptr};
+ } svc;
+
+ RGWSI_Role_RADOS(CephContext *cct) : RGWServiceInstance(cct) {}
+ ~RGWSI_Role_RADOS() {}
+
+ void init(RGWSI_Zone *_zone_svc,
+ RGWSI_Meta *_meta_svc,
+ RGWSI_MetaBackend *_meta_be_svc,
+ RGWSI_SysObj *_sysobj_svc);
+
+ RGWSI_MetaBackend_Handler * get_be_handler();
+ int do_start(optional_yield y, const DoutPrefixProvider *dpp) override;
+
+private:
+ RGWSI_MetaBackend_Handler *be_handler;
+ std::unique_ptr<RGWSI_MetaBackend::Module> be_module;
+};
+
+static const std::string role_name_oid_prefix = "role_names.";
+static const std::string role_oid_prefix = "roles.";
+static const std::string role_path_oid_prefix = "role_paths.";
diff --git a/src/rgw/services/svc_sync_modules.cc b/src/rgw/services/svc_sync_modules.cc
new file mode 100644
index 000000000..ba9e7d172
--- /dev/null
+++ b/src/rgw/services/svc_sync_modules.cc
@@ -0,0 +1,44 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include "svc_sync_modules.h"
+#include "svc_zone.h"
+
+#include "rgw_sync_module.h"
+#include "rgw_zone.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+void RGWSI_SyncModules::init(RGWSI_Zone *zone_svc)
+{
+ svc.zone = zone_svc;
+ sync_modules_manager = new RGWSyncModulesManager();
+ rgw_register_sync_modules(sync_modules_manager);
+}
+
+int RGWSI_SyncModules::do_start(optional_yield, const DoutPrefixProvider *dpp)
+{
+ auto& zone_public_config = svc.zone->get_zone();
+
+ int ret = sync_modules_manager->create_instance(dpp, cct, zone_public_config.tier_type, svc.zone->get_zone_params().tier_config, &sync_module);
+ if (ret < 0) {
+ ldpp_dout(dpp, -1) << "ERROR: failed to start sync module instance, ret=" << ret << dendl;
+ if (ret == -ENOENT) {
+ ldpp_dout(dpp, -1) << "ERROR: " << zone_public_config.tier_type
+ << " sync module does not exist. valid sync modules: "
+ << sync_modules_manager->get_registered_module_names()
+ << dendl;
+ }
+ return ret;
+ }
+
+ ldpp_dout(dpp, 20) << "started sync module instance, tier type = " << zone_public_config.tier_type << dendl;
+
+ return 0;
+}
+
+RGWSI_SyncModules::~RGWSI_SyncModules()
+{
+ delete sync_modules_manager;
+}
+
diff --git a/src/rgw/services/svc_sync_modules.h b/src/rgw/services/svc_sync_modules.h
new file mode 100644
index 000000000..ea78f5817
--- /dev/null
+++ b/src/rgw/services/svc_sync_modules.h
@@ -0,0 +1,34 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#pragma once
+
+#include "rgw_service.h"
+#include "rgw_sync_module.h"
+
+class RGWSI_Zone;
+
+class RGWSyncModulesManager;
+
+class RGWSI_SyncModules : public RGWServiceInstance
+{
+ RGWSyncModulesManager *sync_modules_manager{nullptr};
+ RGWSyncModuleInstanceRef sync_module;
+
+ struct Svc {
+ RGWSI_Zone *zone{nullptr};
+ } svc;
+
+public:
+ RGWSI_SyncModules(CephContext *cct): RGWServiceInstance(cct) {}
+ ~RGWSI_SyncModules();
+
+ RGWSyncModulesManager *get_manager() {
+ return sync_modules_manager;
+ }
+
+ void init(RGWSI_Zone *zone_svc);
+ int do_start(optional_yield, const DoutPrefixProvider *dpp) override;
+
+ RGWSyncModuleInstanceRef& get_sync_module() { return sync_module; }
+};
diff --git a/src/rgw/services/svc_sys_obj.cc b/src/rgw/services/svc_sys_obj.cc
new file mode 100644
index 000000000..310e60514
--- /dev/null
+++ b/src/rgw/services/svc_sys_obj.cc
@@ -0,0 +1,183 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include "svc_sys_obj.h"
+#include "svc_sys_obj_core.h"
+#include "svc_rados.h"
+#include "svc_zone.h"
+
+#include "rgw_zone.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+using namespace std;
+
+RGWSI_SysObj::Obj RGWSI_SysObj::get_obj(const rgw_raw_obj& obj)
+{
+ return Obj(core_svc, obj);
+}
+
+RGWSI_SysObj::Obj::ROp::ROp(Obj& _source) : source(_source) {
+ state.emplace<RGWSI_SysObj_Core::GetObjState>();
+}
+
+int RGWSI_SysObj::Obj::ROp::stat(optional_yield y, const DoutPrefixProvider *dpp)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.obj;
+
+ return svc->stat(*state, obj, attrs, raw_attrs,
+ lastmod, obj_size, objv_tracker, y, dpp);
+}
+
+int RGWSI_SysObj::Obj::ROp::read(const DoutPrefixProvider *dpp,
+ int64_t ofs, int64_t end, bufferlist *bl,
+ optional_yield y)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.get_obj();
+
+ return svc->read(dpp, *state,
+ objv_tracker,
+ obj, bl, ofs, end,
+ lastmod, obj_size,
+ attrs,
+ raw_attrs,
+ cache_info,
+ refresh_version, y);
+}
+
+int RGWSI_SysObj::Obj::ROp::get_attr(const DoutPrefixProvider *dpp,
+ const char *name, bufferlist *dest,
+ optional_yield y)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.get_obj();
+
+ return svc->get_attr(dpp, obj, name, dest, y);
+}
+
+int RGWSI_SysObj::Obj::WOp::remove(const DoutPrefixProvider *dpp, optional_yield y)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.get_obj();
+
+ return svc->remove(dpp, objv_tracker, obj, y);
+}
+
+int RGWSI_SysObj::Obj::WOp::write(const DoutPrefixProvider *dpp, bufferlist& bl, optional_yield y)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.get_obj();
+
+ return svc->write(dpp, obj, pmtime, attrs, exclusive,
+ bl, objv_tracker, mtime, y);
+}
+
+int RGWSI_SysObj::Obj::WOp::write_data(const DoutPrefixProvider *dpp, bufferlist& bl, optional_yield y)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.get_obj();
+
+ return svc->write_data(dpp, obj, bl, exclusive, objv_tracker, y);
+}
+
+int RGWSI_SysObj::Obj::WOp::write_attrs(const DoutPrefixProvider *dpp, optional_yield y)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.get_obj();
+
+ return svc->set_attrs(dpp, obj, attrs, nullptr, objv_tracker, exclusive, y);
+}
+
+int RGWSI_SysObj::Obj::WOp::write_attr(const DoutPrefixProvider *dpp, const char *name, bufferlist& bl,
+ optional_yield y)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.get_obj();
+
+ map<string, bufferlist> m;
+ m[name] = bl;
+
+ return svc->set_attrs(dpp, obj, m, nullptr, objv_tracker, exclusive, y);
+}
+
+int RGWSI_SysObj::Pool::list_prefixed_objs(const DoutPrefixProvider *dpp, const string& prefix, std::function<void(const string&)> cb)
+{
+ return core_svc->pool_list_prefixed_objs(dpp, pool, prefix, cb);
+}
+
+int RGWSI_SysObj::Pool::Op::init(const DoutPrefixProvider *dpp, const string& marker, const string& prefix)
+{
+ return source.core_svc->pool_list_objects_init(dpp, source.pool, marker, prefix, &ctx);
+}
+
+int RGWSI_SysObj::Pool::Op::get_next(const DoutPrefixProvider *dpp, int max, vector<string> *oids, bool *is_truncated)
+{
+ return source.core_svc->pool_list_objects_next(dpp, ctx, max, oids, is_truncated);
+}
+
+int RGWSI_SysObj::Pool::Op::get_marker(string *marker)
+{
+ return source.core_svc->pool_list_objects_get_marker(ctx, marker);
+}
+
+int RGWSI_SysObj::Obj::OmapOp::get_all(const DoutPrefixProvider *dpp, std::map<string, bufferlist> *m,
+ optional_yield y)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.obj;
+
+ return svc->omap_get_all(dpp, obj, m, y);
+}
+
+int RGWSI_SysObj::Obj::OmapOp::get_vals(const DoutPrefixProvider *dpp,
+ const string& marker, uint64_t count,
+ std::map<string, bufferlist> *m,
+ bool *pmore, optional_yield y)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.obj;
+
+ return svc->omap_get_vals(dpp, obj, marker, count, m, pmore, y);
+}
+
+int RGWSI_SysObj::Obj::OmapOp::set(const DoutPrefixProvider *dpp, const std::string& key, bufferlist& bl,
+ optional_yield y)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.obj;
+
+ return svc->omap_set(dpp, obj, key, bl, must_exist, y);
+}
+
+int RGWSI_SysObj::Obj::OmapOp::set(const DoutPrefixProvider *dpp, const map<std::string, bufferlist>& m,
+ optional_yield y)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.obj;
+
+ return svc->omap_set(dpp, obj, m, must_exist, y);
+}
+
+int RGWSI_SysObj::Obj::OmapOp::del(const DoutPrefixProvider *dpp, const std::string& key, optional_yield y)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.obj;
+
+ return svc->omap_del(dpp, obj, key, y);
+}
+
+int RGWSI_SysObj::Obj::WNOp::notify(const DoutPrefixProvider *dpp, bufferlist& bl, uint64_t timeout_ms,
+ bufferlist *pbl, optional_yield y)
+{
+ RGWSI_SysObj_Core *svc = source.core_svc;
+ rgw_raw_obj& obj = source.obj;
+
+ return svc->notify(dpp, obj, bl, timeout_ms, pbl, y);
+}
+
+RGWSI_Zone *RGWSI_SysObj::get_zone_svc()
+{
+ return core_svc->get_zone_svc();
+}
diff --git a/src/rgw/services/svc_sys_obj.h b/src/rgw/services/svc_sys_obj.h
new file mode 100644
index 000000000..f3e217dbd
--- /dev/null
+++ b/src/rgw/services/svc_sys_obj.h
@@ -0,0 +1,270 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#pragma once
+
+#include "common/static_ptr.h"
+
+#include "rgw_service.h"
+
+#include "svc_rados.h"
+#include "svc_sys_obj_types.h"
+#include "svc_sys_obj_core_types.h"
+
+
+class RGWSI_Zone;
+class RGWSI_SysObj;
+
+struct rgw_cache_entry_info;
+
+class RGWSI_SysObj : public RGWServiceInstance
+{
+ friend struct RGWServices_Def;
+
+public:
+ class Obj {
+ friend class ROp;
+
+ RGWSI_SysObj_Core *core_svc;
+ rgw_raw_obj obj;
+
+ public:
+ Obj(RGWSI_SysObj_Core *_core_svc, const rgw_raw_obj& _obj)
+ : core_svc(_core_svc), obj(_obj) {}
+
+ rgw_raw_obj& get_obj() {
+ return obj;
+ }
+
+ struct ROp {
+ Obj& source;
+
+ ceph::static_ptr<RGWSI_SysObj_Obj_GetObjState, sizeof(RGWSI_SysObj_Core_GetObjState)> state;
+
+ RGWObjVersionTracker *objv_tracker{nullptr};
+ std::map<std::string, bufferlist> *attrs{nullptr};
+ bool raw_attrs{false};
+ boost::optional<obj_version> refresh_version{boost::none};
+ ceph::real_time *lastmod{nullptr};
+ uint64_t *obj_size{nullptr};
+ rgw_cache_entry_info *cache_info{nullptr};
+
+ ROp& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) {
+ objv_tracker = _objv_tracker;
+ return *this;
+ }
+
+ ROp& set_last_mod(ceph::real_time *_lastmod) {
+ lastmod = _lastmod;
+ return *this;
+ }
+
+ ROp& set_obj_size(uint64_t *_obj_size) {
+ obj_size = _obj_size;
+ return *this;
+ }
+
+ ROp& set_attrs(std::map<std::string, bufferlist> *_attrs) {
+ attrs = _attrs;
+ return *this;
+ }
+
+ ROp& set_raw_attrs(bool ra) {
+ raw_attrs = ra;
+ return *this;
+ }
+
+ ROp& set_refresh_version(boost::optional<obj_version>& rf) {
+ refresh_version = rf;
+ return *this;
+ }
+
+ ROp& set_cache_info(rgw_cache_entry_info *ci) {
+ cache_info = ci;
+ return *this;
+ }
+
+ ROp(Obj& _source);
+
+ int stat(optional_yield y, const DoutPrefixProvider *dpp);
+ int read(const DoutPrefixProvider *dpp, int64_t ofs, int64_t end, bufferlist *pbl, optional_yield y);
+ int read(const DoutPrefixProvider *dpp, bufferlist *pbl, optional_yield y) {
+ return read(dpp, 0, -1, pbl, y);
+ }
+ int get_attr(const DoutPrefixProvider *dpp, const char *name, bufferlist *dest, optional_yield y);
+ };
+
+ struct WOp {
+ Obj& source;
+
+ RGWObjVersionTracker *objv_tracker{nullptr};
+ std::map<std::string, bufferlist> attrs;
+ ceph::real_time mtime;
+ ceph::real_time *pmtime{nullptr};
+ bool exclusive{false};
+
+ WOp& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) {
+ objv_tracker = _objv_tracker;
+ return *this;
+ }
+
+ WOp& set_attrs(std::map<std::string, bufferlist>& _attrs) {
+ attrs = _attrs;
+ return *this;
+ }
+
+ WOp& set_attrs(std::map<std::string, bufferlist>&& _attrs) {
+ attrs = _attrs;
+ return *this;
+ }
+
+ WOp& set_mtime(const ceph::real_time& _mtime) {
+ mtime = _mtime;
+ return *this;
+ }
+
+ WOp& set_pmtime(ceph::real_time *_pmtime) {
+ pmtime = _pmtime;
+ return *this;
+ }
+
+ WOp& set_exclusive(bool _exclusive = true) {
+ exclusive = _exclusive;
+ return *this;
+ }
+
+ WOp(Obj& _source) : source(_source) {}
+
+ int remove(const DoutPrefixProvider *dpp, optional_yield y);
+ int write(const DoutPrefixProvider *dpp, bufferlist& bl, optional_yield y);
+
+ int write_data(const DoutPrefixProvider *dpp, bufferlist& bl, optional_yield y); /* write data only */
+ int write_attrs(const DoutPrefixProvider *dpp, optional_yield y); /* write attrs only */
+ int write_attr(const DoutPrefixProvider *dpp, const char *name, bufferlist& bl,
+ optional_yield y); /* write attrs only */
+ };
+
+ struct OmapOp {
+ Obj& source;
+
+ bool must_exist{false};
+
+ OmapOp& set_must_exist(bool _must_exist = true) {
+ must_exist = _must_exist;
+ return *this;
+ }
+
+ OmapOp(Obj& _source) : source(_source) {}
+
+ int get_all(const DoutPrefixProvider *dpp, std::map<std::string, bufferlist> *m, optional_yield y);
+ int get_vals(const DoutPrefixProvider *dpp, const std::string& marker, uint64_t count,
+ std::map<std::string, bufferlist> *m,
+ bool *pmore, optional_yield y);
+ int set(const DoutPrefixProvider *dpp, const std::string& key, bufferlist& bl, optional_yield y);
+ int set(const DoutPrefixProvider *dpp, const std::map<std::string, bufferlist>& m, optional_yield y);
+ int del(const DoutPrefixProvider *dpp, const std::string& key, optional_yield y);
+ };
+
+ struct WNOp {
+ Obj& source;
+
+ WNOp(Obj& _source) : source(_source) {}
+
+ int notify(const DoutPrefixProvider *dpp, bufferlist& bl, uint64_t timeout_ms, bufferlist *pbl,
+ optional_yield y);
+ };
+ ROp rop() {
+ return ROp(*this);
+ }
+
+ WOp wop() {
+ return WOp(*this);
+ }
+
+ OmapOp omap() {
+ return OmapOp(*this);
+ }
+
+ WNOp wn() {
+ return WNOp(*this);
+ }
+ };
+
+ class Pool {
+ friend class Op;
+ friend class RGWSI_SysObj_Core;
+
+ RGWSI_SysObj_Core *core_svc;
+ rgw_pool pool;
+
+ protected:
+ using ListImplInfo = RGWSI_SysObj_Pool_ListInfo;
+
+ struct ListCtx {
+ ceph::static_ptr<ListImplInfo, sizeof(RGWSI_SysObj_Core_PoolListImplInfo)> impl; /* update this if creating new backend types */
+ };
+
+ public:
+ Pool(RGWSI_SysObj_Core *_core_svc,
+ const rgw_pool& _pool) : core_svc(_core_svc),
+ pool(_pool) {}
+
+ rgw_pool& get_pool() {
+ return pool;
+ }
+
+ struct Op {
+ Pool& source;
+ ListCtx ctx;
+
+ Op(Pool& _source) : source(_source) {}
+
+ int init(const DoutPrefixProvider *dpp, const std::string& marker, const std::string& prefix);
+ int get_next(const DoutPrefixProvider *dpp, int max, std::vector<std::string> *oids, bool *is_truncated);
+ int get_marker(std::string *marker);
+ };
+
+ int list_prefixed_objs(const DoutPrefixProvider *dpp, const std::string& prefix, std::function<void(const std::string&)> cb);
+
+ template <typename Container>
+ int list_prefixed_objs(const DoutPrefixProvider *dpp, const std::string& prefix,
+ Container *result) {
+ return list_prefixed_objs(dpp, prefix, [&](const std::string& val) {
+ result->push_back(val);
+ });
+ }
+
+ Op op() {
+ return Op(*this);
+ }
+ };
+
+ friend class Obj;
+ friend class Obj::ROp;
+ friend class Obj::WOp;
+ friend class Pool;
+ friend class Pool::Op;
+
+protected:
+ RGWSI_RADOS *rados_svc{nullptr};
+ RGWSI_SysObj_Core *core_svc{nullptr};
+
+ void init(RGWSI_RADOS *_rados_svc,
+ RGWSI_SysObj_Core *_core_svc) {
+ rados_svc = _rados_svc;
+ core_svc = _core_svc;
+ }
+
+public:
+ RGWSI_SysObj(CephContext *cct): RGWServiceInstance(cct) {}
+
+ Obj get_obj(const rgw_raw_obj& obj);
+
+ Pool get_pool(const rgw_pool& pool) {
+ return Pool(core_svc, pool);
+ }
+
+ RGWSI_Zone *get_zone_svc();
+};
+
+using RGWSysObj = RGWSI_SysObj::Obj;
diff --git a/src/rgw/services/svc_sys_obj_cache.cc b/src/rgw/services/svc_sys_obj_cache.cc
new file mode 100644
index 000000000..d1b7a3dbb
--- /dev/null
+++ b/src/rgw/services/svc_sys_obj_cache.cc
@@ -0,0 +1,670 @@
+
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include "common/admin_socket.h"
+
+#include "svc_sys_obj_cache.h"
+#include "svc_zone.h"
+#include "svc_notify.h"
+
+#include "rgw_zone.h"
+#include "rgw_tools.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+using namespace std;
+
+class RGWSI_SysObj_Cache_CB : public RGWSI_Notify::CB
+{
+ RGWSI_SysObj_Cache *svc;
+public:
+ RGWSI_SysObj_Cache_CB(RGWSI_SysObj_Cache *_svc) : svc(_svc) {}
+ int watch_cb(const DoutPrefixProvider *dpp,
+ uint64_t notify_id,
+ uint64_t cookie,
+ uint64_t notifier_id,
+ bufferlist& bl) {
+ return svc->watch_cb(dpp, notify_id, cookie, notifier_id, bl);
+ }
+
+ void set_enabled(bool status) {
+ svc->set_enabled(status);
+ }
+};
+
+int RGWSI_SysObj_Cache::do_start(optional_yield y, const DoutPrefixProvider *dpp)
+{
+ int r = asocket.start();
+ if (r < 0) {
+ return r;
+ }
+
+ r = RGWSI_SysObj_Core::do_start(y, dpp);
+ if (r < 0) {
+ return r;
+ }
+
+ r = notify_svc->start(y, dpp);
+ if (r < 0) {
+ return r;
+ }
+
+ assert(notify_svc->is_started());
+
+ cb.reset(new RGWSI_SysObj_Cache_CB(this));
+
+ notify_svc->register_watch_cb(cb.get());
+
+ return 0;
+}
+
+void RGWSI_SysObj_Cache::shutdown()
+{
+ asocket.shutdown();
+ RGWSI_SysObj_Core::shutdown();
+}
+
+static string normal_name(rgw_pool& pool, const std::string& oid) {
+ std::string buf;
+ buf.reserve(pool.name.size() + pool.ns.size() + oid.size() + 2);
+ buf.append(pool.name).append("+").append(pool.ns).append("+").append(oid);
+ return buf;
+}
+
+void RGWSI_SysObj_Cache::normalize_pool_and_obj(const rgw_pool& src_pool, const string& src_obj, rgw_pool& dst_pool, string& dst_obj)
+{
+ if (src_obj.size()) {
+ dst_pool = src_pool;
+ dst_obj = src_obj;
+ } else {
+ dst_pool = zone_svc->get_zone_params().domain_root;
+ dst_obj = src_pool.name;
+ }
+}
+
+
+int RGWSI_SysObj_Cache::remove(const DoutPrefixProvider *dpp,
+ RGWObjVersionTracker *objv_tracker,
+ const rgw_raw_obj& obj,
+ optional_yield y)
+
+{
+ rgw_pool pool;
+ string oid;
+ normalize_pool_and_obj(obj.pool, obj.oid, pool, oid);
+
+ string name = normal_name(pool, oid);
+ cache.invalidate_remove(dpp, name);
+
+ ObjectCacheInfo info;
+ int r = distribute_cache(dpp, name, obj, info, INVALIDATE_OBJ, y);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): failed to distribute cache: r=" << r << dendl;
+ }
+
+ return RGWSI_SysObj_Core::remove(dpp, objv_tracker, obj, y);
+}
+
+int RGWSI_SysObj_Cache::read(const DoutPrefixProvider *dpp,
+ RGWSI_SysObj_Obj_GetObjState& read_state,
+ RGWObjVersionTracker *objv_tracker,
+ const rgw_raw_obj& obj,
+ bufferlist *obl, off_t ofs, off_t end,
+ ceph::real_time* pmtime, uint64_t* psize,
+ map<string, bufferlist> *attrs,
+ bool raw_attrs,
+ rgw_cache_entry_info *cache_info,
+ boost::optional<obj_version> refresh_version,
+ optional_yield y)
+{
+ rgw_pool pool;
+ string oid;
+ if (ofs != 0) {
+ return RGWSI_SysObj_Core::read(dpp, read_state, objv_tracker, obj, obl,
+ ofs, end, pmtime, psize, attrs, raw_attrs,
+ cache_info, refresh_version, y);
+ }
+
+ normalize_pool_and_obj(obj.pool, obj.oid, pool, oid);
+ string name = normal_name(pool, oid);
+
+ ObjectCacheInfo info;
+
+ uint32_t flags = (end != 0 ? CACHE_FLAG_DATA : 0);
+ if (objv_tracker)
+ flags |= CACHE_FLAG_OBJV;
+ if (pmtime || psize)
+ flags |= CACHE_FLAG_META;
+ if (attrs)
+ flags |= CACHE_FLAG_XATTRS;
+
+ int r = cache.get(dpp, name, info, flags, cache_info);
+ if (r == 0 &&
+ (!refresh_version || !info.version.compare(&(*refresh_version)))) {
+ if (info.status < 0)
+ return info.status;
+
+ bufferlist& bl = info.data;
+
+ bufferlist::iterator i = bl.begin();
+
+ obl->clear();
+
+ i.copy_all(*obl);
+ if (objv_tracker)
+ objv_tracker->read_version = info.version;
+ if (pmtime) {
+ *pmtime = info.meta.mtime;
+ }
+ if (psize) {
+ *psize = info.meta.size;
+ }
+ if (attrs) {
+ if (raw_attrs) {
+ *attrs = info.xattrs;
+ } else {
+ rgw_filter_attrset(info.xattrs, RGW_ATTR_PREFIX, attrs);
+ }
+ }
+ return obl->length();
+ }
+ if(r == -ENODATA)
+ return -ENOENT;
+
+ // if we only ask for one of mtime or size, ask for the other too so we can
+ // satisfy CACHE_FLAG_META
+ uint64_t size = 0;
+ real_time mtime;
+ if (pmtime) {
+ if (!psize) {
+ psize = &size;
+ }
+ } else if (psize) {
+ if (!pmtime) {
+ pmtime = &mtime;
+ }
+ }
+
+ map<string, bufferlist> unfiltered_attrset;
+ r = RGWSI_SysObj_Core::read(dpp, read_state, objv_tracker,
+ obj, obl, ofs, end, pmtime, psize,
+ (attrs ? &unfiltered_attrset : nullptr),
+ true, /* cache unfiltered attrs */
+ cache_info,
+ refresh_version, y);
+ if (r < 0) {
+ if (r == -ENOENT) { // only update ENOENT, we'd rather retry other errors
+ info.status = r;
+ cache.put(dpp, name, info, cache_info);
+ }
+ return r;
+ }
+
+ if (obl->length() == end + 1) {
+ /* in this case, most likely object contains more data, we can't cache it */
+ flags &= ~CACHE_FLAG_DATA;
+ } else {
+ bufferptr p(r);
+ bufferlist& bl = info.data;
+ bl.clear();
+ bufferlist::iterator o = obl->begin();
+ o.copy_all(bl);
+ }
+
+ info.status = 0;
+ info.flags = flags;
+ if (objv_tracker) {
+ info.version = objv_tracker->read_version;
+ }
+ if (pmtime) {
+ info.meta.mtime = *pmtime;
+ }
+ if (psize) {
+ info.meta.size = *psize;
+ }
+ if (attrs) {
+ info.xattrs = std::move(unfiltered_attrset);
+ if (raw_attrs) {
+ *attrs = info.xattrs;
+ } else {
+ rgw_filter_attrset(info.xattrs, RGW_ATTR_PREFIX, attrs);
+ }
+ }
+ cache.put(dpp, name, info, cache_info);
+ return r;
+}
+
+int RGWSI_SysObj_Cache::get_attr(const DoutPrefixProvider *dpp,
+ const rgw_raw_obj& obj,
+ const char *attr_name,
+ bufferlist *dest,
+ optional_yield y)
+{
+ rgw_pool pool;
+ string oid;
+
+ normalize_pool_and_obj(obj.pool, obj.oid, pool, oid);
+ string name = normal_name(pool, oid);
+
+ ObjectCacheInfo info;
+
+ uint32_t flags = CACHE_FLAG_XATTRS;
+
+ int r = cache.get(dpp, name, info, flags, nullptr);
+ if (r == 0) {
+ if (info.status < 0)
+ return info.status;
+
+ auto iter = info.xattrs.find(attr_name);
+ if (iter == info.xattrs.end()) {
+ return -ENODATA;
+ }
+
+ *dest = iter->second;
+ return dest->length();
+ } else if (r == -ENODATA) {
+ return -ENOENT;
+ }
+ /* don't try to cache this one */
+ return RGWSI_SysObj_Core::get_attr(dpp, obj, attr_name, dest, y);
+}
+
+int RGWSI_SysObj_Cache::set_attrs(const DoutPrefixProvider *dpp,
+ const rgw_raw_obj& obj,
+ map<string, bufferlist>& attrs,
+ map<string, bufferlist> *rmattrs,
+ RGWObjVersionTracker *objv_tracker,
+ bool exclusive, optional_yield y)
+{
+ rgw_pool pool;
+ string oid;
+ normalize_pool_and_obj(obj.pool, obj.oid, pool, oid);
+ ObjectCacheInfo info;
+ info.xattrs = attrs;
+ if (rmattrs) {
+ info.rm_xattrs = *rmattrs;
+ }
+ info.status = 0;
+ info.flags = CACHE_FLAG_MODIFY_XATTRS;
+ int ret = RGWSI_SysObj_Core::set_attrs(dpp, obj, attrs, rmattrs, objv_tracker, exclusive, y);
+ string name = normal_name(pool, oid);
+ if (ret >= 0) {
+ if (objv_tracker && objv_tracker->read_version.ver) {
+ info.version = objv_tracker->read_version;
+ info.flags |= CACHE_FLAG_OBJV;
+ }
+ cache.put(dpp, name, info, NULL);
+ int r = distribute_cache(dpp, name, obj, info, UPDATE_OBJ, y);
+ if (r < 0)
+ ldpp_dout(dpp, 0) << "ERROR: failed to distribute cache for " << obj << dendl;
+ } else {
+ cache.invalidate_remove(dpp, name);
+ }
+
+ return ret;
+}
+
+int RGWSI_SysObj_Cache::write(const DoutPrefixProvider *dpp,
+ const rgw_raw_obj& obj,
+ real_time *pmtime,
+ map<std::string, bufferlist>& attrs,
+ bool exclusive,
+ const bufferlist& data,
+ RGWObjVersionTracker *objv_tracker,
+ real_time set_mtime,
+ optional_yield y)
+{
+ rgw_pool pool;
+ string oid;
+ normalize_pool_and_obj(obj.pool, obj.oid, pool, oid);
+ ObjectCacheInfo info;
+ info.xattrs = attrs;
+ info.status = 0;
+ info.data = data;
+ info.flags = CACHE_FLAG_XATTRS | CACHE_FLAG_DATA | CACHE_FLAG_META;
+ ceph::real_time result_mtime;
+ int ret = RGWSI_SysObj_Core::write(dpp, obj, &result_mtime, attrs,
+ exclusive, data,
+ objv_tracker, set_mtime, y);
+ if (pmtime) {
+ *pmtime = result_mtime;
+ }
+ if (objv_tracker && objv_tracker->read_version.ver) {
+ info.version = objv_tracker->read_version;
+ info.flags |= CACHE_FLAG_OBJV;
+ }
+ info.meta.mtime = result_mtime;
+ info.meta.size = data.length();
+ string name = normal_name(pool, oid);
+ if (ret >= 0) {
+ cache.put(dpp, name, info, NULL);
+ int r = distribute_cache(dpp, name, obj, info, UPDATE_OBJ, y);
+ if (r < 0)
+ ldpp_dout(dpp, 0) << "ERROR: failed to distribute cache for " << obj << dendl;
+ } else {
+ cache.invalidate_remove(dpp, name);
+ }
+
+ return ret;
+}
+
+int RGWSI_SysObj_Cache::write_data(const DoutPrefixProvider *dpp,
+ const rgw_raw_obj& obj,
+ const bufferlist& data,
+ bool exclusive,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y)
+{
+ rgw_pool pool;
+ string oid;
+ normalize_pool_and_obj(obj.pool, obj.oid, pool, oid);
+
+ ObjectCacheInfo info;
+ info.data = data;
+ info.meta.size = data.length();
+ info.status = 0;
+ info.flags = CACHE_FLAG_DATA;
+
+ int ret = RGWSI_SysObj_Core::write_data(dpp, obj, data, exclusive, objv_tracker, y);
+ string name = normal_name(pool, oid);
+ if (ret >= 0) {
+ if (objv_tracker && objv_tracker->read_version.ver) {
+ info.version = objv_tracker->read_version;
+ info.flags |= CACHE_FLAG_OBJV;
+ }
+ cache.put(dpp, name, info, NULL);
+ int r = distribute_cache(dpp, name, obj, info, UPDATE_OBJ, y);
+ if (r < 0)
+ ldpp_dout(dpp, 0) << "ERROR: failed to distribute cache for " << obj << dendl;
+ } else {
+ cache.invalidate_remove(dpp, name);
+ }
+
+ return ret;
+}
+
+int RGWSI_SysObj_Cache::raw_stat(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj,
+ uint64_t *psize, real_time *pmtime,
+ map<string, bufferlist> *attrs,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y)
+{
+ rgw_pool pool;
+ string oid;
+ normalize_pool_and_obj(obj.pool, obj.oid, pool, oid);
+
+ string name = normal_name(pool, oid);
+
+ uint64_t size;
+ real_time mtime;
+
+ ObjectCacheInfo info;
+ uint32_t flags = CACHE_FLAG_META | CACHE_FLAG_XATTRS;
+ if (objv_tracker)
+ flags |= CACHE_FLAG_OBJV;
+ int r = cache.get(dpp, name, info, flags, NULL);
+ if (r == 0) {
+ if (info.status < 0)
+ return info.status;
+
+ size = info.meta.size;
+ mtime = info.meta.mtime;
+ if (objv_tracker)
+ objv_tracker->read_version = info.version;
+ goto done;
+ }
+ if (r == -ENODATA) {
+ return -ENOENT;
+ }
+ r = RGWSI_SysObj_Core::raw_stat(dpp, obj, &size, &mtime, &info.xattrs,
+ objv_tracker, y);
+ if (r < 0) {
+ if (r == -ENOENT) {
+ info.status = r;
+ cache.put(dpp, name, info, NULL);
+ }
+ return r;
+ }
+ info.status = 0;
+ info.meta.mtime = mtime;
+ info.meta.size = size;
+ info.flags = CACHE_FLAG_META | CACHE_FLAG_XATTRS;
+ if (objv_tracker) {
+ info.flags |= CACHE_FLAG_OBJV;
+ info.version = objv_tracker->read_version;
+ }
+ cache.put(dpp, name, info, NULL);
+done:
+ if (psize)
+ *psize = size;
+ if (pmtime)
+ *pmtime = mtime;
+ if (attrs)
+ *attrs = info.xattrs;
+ return 0;
+}
+
+int RGWSI_SysObj_Cache::distribute_cache(const DoutPrefixProvider *dpp,
+ const string& normal_name,
+ const rgw_raw_obj& obj,
+ ObjectCacheInfo& obj_info, int op,
+ optional_yield y)
+{
+ RGWCacheNotifyInfo info;
+ info.op = op;
+ info.obj_info = obj_info;
+ info.obj = obj;
+ return notify_svc->distribute(dpp, normal_name, info, y);
+}
+
+int RGWSI_SysObj_Cache::watch_cb(const DoutPrefixProvider *dpp,
+ uint64_t notify_id,
+ uint64_t cookie,
+ uint64_t notifier_id,
+ bufferlist& bl)
+{
+ RGWCacheNotifyInfo info;
+
+ try {
+ auto iter = bl.cbegin();
+ decode(info, iter);
+ } catch (buffer::end_of_buffer& err) {
+ ldpp_dout(dpp, 0) << "ERROR: got bad notification" << dendl;
+ return -EIO;
+ } catch (buffer::error& err) {
+ ldpp_dout(dpp, 0) << "ERROR: buffer::error" << dendl;
+ return -EIO;
+ }
+
+ rgw_pool pool;
+ string oid;
+ normalize_pool_and_obj(info.obj.pool, info.obj.oid, pool, oid);
+ string name = normal_name(pool, oid);
+
+ switch (info.op) {
+ case UPDATE_OBJ:
+ cache.put(dpp, name, info.obj_info, NULL);
+ break;
+ case INVALIDATE_OBJ:
+ cache.invalidate_remove(dpp, name);
+ break;
+ default:
+ ldpp_dout(dpp, 0) << "WARNING: got unknown notification op: " << info.op << dendl;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void RGWSI_SysObj_Cache::set_enabled(bool status)
+{
+ cache.set_enabled(status);
+}
+
+bool RGWSI_SysObj_Cache::chain_cache_entry(const DoutPrefixProvider *dpp,
+ std::initializer_list<rgw_cache_entry_info *> cache_info_entries,
+ RGWChainedCache::Entry *chained_entry)
+{
+ return cache.chain_cache_entry(dpp, cache_info_entries, chained_entry);
+}
+
+void RGWSI_SysObj_Cache::register_chained_cache(RGWChainedCache *cc)
+{
+ cache.chain_cache(cc);
+}
+
+void RGWSI_SysObj_Cache::unregister_chained_cache(RGWChainedCache *cc)
+{
+ cache.unchain_cache(cc);
+}
+
+static void cache_list_dump_helper(Formatter* f,
+ const std::string& name,
+ const ceph::real_time mtime,
+ const std::uint64_t size)
+{
+ f->dump_string("name", name);
+ f->dump_string("mtime", ceph::to_iso_8601(mtime));
+ f->dump_unsigned("size", size);
+}
+
+class RGWSI_SysObj_Cache_ASocketHook : public AdminSocketHook {
+ RGWSI_SysObj_Cache *svc;
+
+ static constexpr std::string_view admin_commands[][2] = {
+ { "cache list name=filter,type=CephString,req=false",
+ "cache list [filter_str]: list object cache, possibly matching substrings" },
+ { "cache inspect name=target,type=CephString,req=true",
+ "cache inspect target: print cache element" },
+ { "cache erase name=target,type=CephString,req=true",
+ "cache erase target: erase element from cache" },
+ { "cache zap",
+ "cache zap: erase all elements from cache" }
+ };
+
+public:
+ RGWSI_SysObj_Cache_ASocketHook(RGWSI_SysObj_Cache *_svc) : svc(_svc) {}
+
+ int start();
+ void shutdown();
+
+ int call(std::string_view command, const cmdmap_t& cmdmap,
+ const bufferlist&,
+ Formatter *f,
+ std::ostream& ss,
+ bufferlist& out) override;
+};
+
+int RGWSI_SysObj_Cache_ASocketHook::start()
+{
+ auto admin_socket = svc->ctx()->get_admin_socket();
+ for (auto cmd : admin_commands) {
+ int r = admin_socket->register_command(cmd[0], this, cmd[1]);
+ if (r < 0) {
+ ldout(svc->ctx(), 0) << "ERROR: fail to register admin socket command (r=" << r
+ << ")" << dendl;
+ return r;
+ }
+ }
+ return 0;
+}
+
+void RGWSI_SysObj_Cache_ASocketHook::shutdown()
+{
+ auto admin_socket = svc->ctx()->get_admin_socket();
+ admin_socket->unregister_commands(this);
+}
+
+int RGWSI_SysObj_Cache_ASocketHook::call(
+ std::string_view command, const cmdmap_t& cmdmap,
+ const bufferlist&,
+ Formatter *f,
+ std::ostream& ss,
+ bufferlist& out)
+{
+ if (command == "cache list"sv) {
+ std::optional<std::string> filter;
+ if (auto i = cmdmap.find("filter"); i != cmdmap.cend()) {
+ filter = boost::get<std::string>(i->second);
+ }
+ f->open_array_section("cache_entries");
+ svc->asocket.call_list(filter, f);
+ f->close_section();
+ return 0;
+ } else if (command == "cache inspect"sv) {
+ const auto& target = boost::get<std::string>(cmdmap.at("target"));
+ if (svc->asocket.call_inspect(target, f)) {
+ return 0;
+ } else {
+ ss << "Unable to find entry "s + target + ".\n";
+ return -ENOENT;
+ }
+ } else if (command == "cache erase"sv) {
+ const auto& target = boost::get<std::string>(cmdmap.at("target"));
+ if (svc->asocket.call_erase(target)) {
+ return 0;
+ } else {
+ ss << "Unable to find entry "s + target + ".\n";
+ return -ENOENT;
+ }
+ } else if (command == "cache zap"sv) {
+ svc->asocket.call_zap();
+ return 0;
+ }
+ return -ENOSYS;
+}
+
+RGWSI_SysObj_Cache::ASocketHandler::ASocketHandler(const DoutPrefixProvider *_dpp, RGWSI_SysObj_Cache *_svc) : dpp(_dpp), svc(_svc)
+{
+ hook.reset(new RGWSI_SysObj_Cache_ASocketHook(_svc));
+}
+
+RGWSI_SysObj_Cache::ASocketHandler::~ASocketHandler()
+{
+}
+
+int RGWSI_SysObj_Cache::ASocketHandler::start()
+{
+ return hook->start();
+}
+
+void RGWSI_SysObj_Cache::ASocketHandler::shutdown()
+{
+ return hook->shutdown();
+}
+
+void RGWSI_SysObj_Cache::ASocketHandler::call_list(const std::optional<std::string>& filter, Formatter* f)
+{
+ svc->cache.for_each(
+ [&filter, f] (const string& name, const ObjectCacheEntry& entry) {
+ if (!filter || name.find(*filter) != name.npos) {
+ cache_list_dump_helper(f, name, entry.info.meta.mtime,
+ entry.info.meta.size);
+ }
+ });
+}
+
+int RGWSI_SysObj_Cache::ASocketHandler::call_inspect(const std::string& target, Formatter* f)
+{
+ if (const auto entry = svc->cache.get(dpp, target)) {
+ f->open_object_section("cache_entry");
+ f->dump_string("name", target.c_str());
+ entry->dump(f);
+ f->close_section();
+ return true;
+ } else {
+ return false;
+ }
+}
+
+int RGWSI_SysObj_Cache::ASocketHandler::call_erase(const std::string& target)
+{
+ return svc->cache.invalidate_remove(dpp, target);
+}
+
+int RGWSI_SysObj_Cache::ASocketHandler::call_zap()
+{
+ svc->cache.invalidate_all();
+ return 0;
+}
diff --git a/src/rgw/services/svc_sys_obj_cache.h b/src/rgw/services/svc_sys_obj_cache.h
new file mode 100644
index 000000000..f7950843f
--- /dev/null
+++ b/src/rgw/services/svc_sys_obj_cache.h
@@ -0,0 +1,222 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#pragma once
+
+#include "common/RWLock.h"
+#include "rgw_service.h"
+#include "rgw_cache.h"
+
+#include "svc_sys_obj_core.h"
+
+class RGWSI_Notify;
+
+class RGWSI_SysObj_Cache_CB;
+class RGWSI_SysObj_Cache_ASocketHook;
+
+class RGWSI_SysObj_Cache : public RGWSI_SysObj_Core
+{
+ friend class RGWSI_SysObj_Cache_CB;
+ friend class RGWServices_Def;
+ friend class ASocketHandler;
+
+ RGWSI_Notify *notify_svc{nullptr};
+ ObjectCache cache;
+
+ std::shared_ptr<RGWSI_SysObj_Cache_CB> cb;
+
+ void normalize_pool_and_obj(const rgw_pool& src_pool, const std::string& src_obj, rgw_pool& dst_pool, std::string& dst_obj);
+protected:
+ void init(RGWSI_RADOS *_rados_svc,
+ RGWSI_Zone *_zone_svc,
+ RGWSI_Notify *_notify_svc) {
+ core_init(_rados_svc, _zone_svc);
+ notify_svc = _notify_svc;
+ }
+
+ int do_start(optional_yield, const DoutPrefixProvider *dpp) override;
+ void shutdown() override;
+
+ int raw_stat(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj,
+ uint64_t *psize, real_time *pmtime,
+ std::map<std::string, bufferlist> *attrs,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y) override;
+
+ int read(const DoutPrefixProvider *dpp,
+ RGWSI_SysObj_Obj_GetObjState& read_state,
+ RGWObjVersionTracker *objv_tracker,
+ const rgw_raw_obj& obj,
+ bufferlist *bl, off_t ofs, off_t end,
+ ceph::real_time* pmtime, uint64_t* psize,
+ std::map<std::string, bufferlist> *attrs,
+ bool raw_attrs,
+ rgw_cache_entry_info *cache_info,
+ boost::optional<obj_version>,
+ optional_yield y) override;
+
+ int get_attr(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, const char *name, bufferlist *dest,
+ optional_yield y) override;
+
+ int set_attrs(const DoutPrefixProvider *dpp,
+ const rgw_raw_obj& obj,
+ std::map<std::string, bufferlist>& attrs,
+ std::map<std::string, bufferlist> *rmattrs,
+ RGWObjVersionTracker *objv_tracker,
+ bool exclusive, optional_yield y) override;
+
+ int remove(const DoutPrefixProvider *dpp,
+ RGWObjVersionTracker *objv_tracker,
+ const rgw_raw_obj& obj,
+ optional_yield y) override;
+
+ int write(const DoutPrefixProvider *dpp,
+ const rgw_raw_obj& obj,
+ real_time *pmtime,
+ std::map<std::string, bufferlist>& attrs,
+ bool exclusive,
+ const bufferlist& data,
+ RGWObjVersionTracker *objv_tracker,
+ real_time set_mtime,
+ optional_yield y) override;
+
+ int write_data(const DoutPrefixProvider *dpp,
+ const rgw_raw_obj& obj,
+ const bufferlist& bl,
+ bool exclusive,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y);
+
+ int distribute_cache(const DoutPrefixProvider *dpp, const std::string& normal_name, const rgw_raw_obj& obj,
+ ObjectCacheInfo& obj_info, int op,
+ optional_yield y);
+
+ int watch_cb(const DoutPrefixProvider *dpp,
+ uint64_t notify_id,
+ uint64_t cookie,
+ uint64_t notifier_id,
+ bufferlist& bl);
+
+ void set_enabled(bool status);
+
+public:
+ RGWSI_SysObj_Cache(const DoutPrefixProvider *dpp, CephContext *cct) : RGWSI_SysObj_Core(cct), asocket(dpp, this) {
+ cache.set_ctx(cct);
+ }
+
+ bool chain_cache_entry(const DoutPrefixProvider *dpp,
+ std::initializer_list<rgw_cache_entry_info *> cache_info_entries,
+ RGWChainedCache::Entry *chained_entry);
+ void register_chained_cache(RGWChainedCache *cc);
+ void unregister_chained_cache(RGWChainedCache *cc);
+
+ class ASocketHandler {
+ const DoutPrefixProvider *dpp;
+ RGWSI_SysObj_Cache *svc;
+
+ std::unique_ptr<RGWSI_SysObj_Cache_ASocketHook> hook;
+
+ public:
+ ASocketHandler(const DoutPrefixProvider *dpp, RGWSI_SysObj_Cache *_svc);
+ ~ASocketHandler();
+
+ int start();
+ void shutdown();
+
+ // `call_list` must iterate over all cache entries and call
+ // `cache_list_dump_helper` with the supplied Formatter on any that
+ // include `filter` as a substd::string.
+ //
+ void call_list(const std::optional<std::string>& filter, Formatter* f);
+
+ // `call_inspect` must look up the requested target and, if found,
+ // dump it to the supplied Formatter and return true. If not found,
+ // it must return false.
+ //
+ int call_inspect(const std::string& target, Formatter* f);
+
+ // `call_erase` must erase the requested target and return true. If
+ // the requested target does not exist, it should return false.
+ int call_erase(const std::string& target);
+
+ // `call_zap` must erase the cache.
+ int call_zap();
+ } asocket;
+};
+
+template <class T>
+class RGWChainedCacheImpl : public RGWChainedCache {
+ RGWSI_SysObj_Cache *svc{nullptr};
+ ceph::timespan expiry;
+ RWLock lock;
+
+ std::unordered_map<std::string, std::pair<T, ceph::coarse_mono_time>> entries;
+
+public:
+ RGWChainedCacheImpl() : lock("RGWChainedCacheImpl::lock") {}
+ ~RGWChainedCacheImpl() {
+ if (!svc) {
+ return;
+ }
+ svc->unregister_chained_cache(this);
+ }
+
+ void unregistered() override {
+ svc = nullptr;
+ }
+
+ void init(RGWSI_SysObj_Cache *_svc) {
+ if (!_svc) {
+ return;
+ }
+ svc = _svc;
+ svc->register_chained_cache(this);
+ expiry = std::chrono::seconds(svc->ctx()->_conf.get_val<uint64_t>(
+ "rgw_cache_expiry_interval"));
+ }
+
+ boost::optional<T> find(const std::string& key) {
+ std::shared_lock rl{lock};
+ auto iter = entries.find(key);
+ if (iter == entries.end()) {
+ return boost::none;
+ }
+ if (expiry.count() &&
+ (ceph::coarse_mono_clock::now() - iter->second.second) > expiry) {
+ return boost::none;
+ }
+
+ return iter->second.first;
+ }
+
+ bool put(const DoutPrefixProvider *dpp, RGWSI_SysObj_Cache *svc, const std::string& key, T *entry,
+ std::initializer_list<rgw_cache_entry_info *> cache_info_entries) {
+ if (!svc) {
+ return false;
+ }
+
+ Entry chain_entry(this, key, entry);
+
+ /* we need the svc cache to call us under its lock to maintain lock ordering */
+ return svc->chain_cache_entry(dpp, cache_info_entries, &chain_entry);
+ }
+
+ void chain_cb(const std::string& key, void *data) override {
+ T *entry = static_cast<T *>(data);
+ std::unique_lock wl{lock};
+ entries[key].first = *entry;
+ if (expiry.count() > 0) {
+ entries[key].second = ceph::coarse_mono_clock::now();
+ }
+ }
+
+ void invalidate(const std::string& key) override {
+ std::unique_lock wl{lock};
+ entries.erase(key);
+ }
+
+ void invalidate_all() override {
+ std::unique_lock wl{lock};
+ entries.clear();
+ }
+}; /* RGWChainedCacheImpl */
diff --git a/src/rgw/services/svc_sys_obj_core.cc b/src/rgw/services/svc_sys_obj_core.cc
new file mode 100644
index 000000000..303089691
--- /dev/null
+++ b/src/rgw/services/svc_sys_obj_core.cc
@@ -0,0 +1,666 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include "svc_sys_obj_core.h"
+#include "svc_rados.h"
+#include "svc_zone.h"
+
+#include "rgw_tools.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+using namespace std;
+
+int RGWSI_SysObj_Core_GetObjState::get_rados_obj(const DoutPrefixProvider *dpp,
+ RGWSI_RADOS *rados_svc,
+ RGWSI_Zone *zone_svc,
+ const rgw_raw_obj& obj,
+ RGWSI_RADOS::Obj **pobj)
+{
+ if (!has_rados_obj) {
+ if (obj.oid.empty()) {
+ ldpp_dout(dpp, 0) << "ERROR: obj.oid is empty" << dendl;
+ return -EINVAL;
+ }
+
+ rados_obj = rados_svc->obj(obj);
+ int r = rados_obj.open(dpp);
+ if (r < 0) {
+ return r;
+ }
+ has_rados_obj = true;
+ }
+ *pobj = &rados_obj;
+ return 0;
+}
+
+int RGWSI_SysObj_Core::get_rados_obj(const DoutPrefixProvider *dpp,
+ RGWSI_Zone *zone_svc,
+ const rgw_raw_obj& obj,
+ RGWSI_RADOS::Obj *pobj)
+{
+ if (obj.oid.empty()) {
+ ldpp_dout(dpp, 0) << "ERROR: obj.oid is empty" << dendl;
+ return -EINVAL;
+ }
+
+ *pobj = rados_svc->obj(obj);
+ int r = pobj->open(dpp);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWSI_SysObj_Core::raw_stat(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj,
+ uint64_t *psize, real_time *pmtime,
+ map<string, bufferlist> *attrs,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ return r;
+ }
+
+ uint64_t size = 0;
+ struct timespec mtime_ts;
+
+ librados::ObjectReadOperation op;
+ if (objv_tracker) {
+ objv_tracker->prepare_op_for_read(&op);
+ }
+ op.getxattrs(attrs, nullptr);
+ if (psize || pmtime) {
+ op.stat2(&size, &mtime_ts, nullptr);
+ }
+ bufferlist outbl;
+ r = rados_obj.operate(dpp, &op, &outbl, y);
+ if (r < 0)
+ return r;
+
+ if (psize)
+ *psize = size;
+ if (pmtime)
+ *pmtime = ceph::real_clock::from_timespec(mtime_ts);
+
+ return 0;
+}
+
+int RGWSI_SysObj_Core::stat(RGWSI_SysObj_Obj_GetObjState& _state,
+ const rgw_raw_obj& obj,
+ map<string, bufferlist> *attrs,
+ bool raw_attrs,
+ real_time *lastmod,
+ uint64_t *obj_size,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp)
+{
+ uint64_t size = 0;
+ ceph::real_time mtime;
+ std::map<std::string, bufferlist> attrset;
+
+ int r = raw_stat(dpp, obj, &size, &mtime, &attrset, objv_tracker, y);
+ if (r < 0)
+ return r;
+
+ if (attrs) {
+ if (raw_attrs) {
+ *attrs = std::move(attrset);
+ } else {
+ rgw_filter_attrset(attrset, RGW_ATTR_PREFIX, attrs);
+ }
+ if (cct->_conf->subsys.should_gather<ceph_subsys_rgw, 20>()) {
+ map<string, bufferlist>::iterator iter;
+ for (iter = attrs->begin(); iter != attrs->end(); ++iter) {
+ ldpp_dout(dpp, 20) << "Read xattr: " << iter->first << dendl;
+ }
+ }
+ }
+
+ if (obj_size)
+ *obj_size = size;
+ if (lastmod)
+ *lastmod = mtime;
+
+ return 0;
+}
+
+int RGWSI_SysObj_Core::read(const DoutPrefixProvider *dpp,
+ RGWSI_SysObj_Obj_GetObjState& _read_state,
+ RGWObjVersionTracker *objv_tracker,
+ const rgw_raw_obj& obj,
+ bufferlist *bl, off_t ofs, off_t end,
+ ceph::real_time* pmtime, uint64_t* psize,
+ map<string, bufferlist> *attrs,
+ bool raw_attrs,
+ rgw_cache_entry_info *cache_info,
+ boost::optional<obj_version>,
+ optional_yield y)
+{
+ auto& read_state = static_cast<GetObjState&>(_read_state);
+
+ uint64_t len;
+ struct timespec mtime_ts;
+ librados::ObjectReadOperation op;
+
+ if (end < 0)
+ len = 0;
+ else
+ len = end - ofs + 1;
+
+ if (objv_tracker) {
+ objv_tracker->prepare_op_for_read(&op);
+ }
+ if (psize || pmtime) {
+ op.stat2(psize, &mtime_ts, nullptr);
+ }
+
+ ldpp_dout(dpp, 20) << "rados->read ofs=" << ofs << " len=" << len << dendl;
+ op.read(ofs, len, bl, nullptr);
+
+ map<string, bufferlist> unfiltered_attrset;
+
+ if (attrs) {
+ if (raw_attrs) {
+ op.getxattrs(attrs, nullptr);
+ } else {
+ op.getxattrs(&unfiltered_attrset, nullptr);
+ }
+ }
+
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+ r = rados_obj.operate(dpp, &op, nullptr, y);
+ if (r < 0) {
+ ldpp_dout(dpp, 20) << "rados_obj.operate() r=" << r << " bl.length=" << bl->length() << dendl;
+ return r;
+ }
+ ldpp_dout(dpp, 20) << "rados_obj.operate() r=" << r << " bl.length=" << bl->length() << dendl;
+
+ uint64_t op_ver = rados_obj.get_last_version();
+
+ if (read_state.last_ver > 0 &&
+ read_state.last_ver != op_ver) {
+ ldpp_dout(dpp, 5) << "raced with an object write, abort" << dendl;
+ return -ECANCELED;
+ }
+
+ if (pmtime) {
+ *pmtime = ceph::real_clock::from_timespec(mtime_ts);
+ }
+ if (attrs && !raw_attrs) {
+ rgw_filter_attrset(unfiltered_attrset, RGW_ATTR_PREFIX, attrs);
+ }
+
+ read_state.last_ver = op_ver;
+
+ return bl->length();
+}
+
+/**
+ * Get an attribute for a system object.
+ * obj: the object to get attr
+ * name: name of the attr to retrieve
+ * dest: bufferlist to store the result in
+ * Returns: 0 on success, -ERR# otherwise.
+ */
+int RGWSI_SysObj_Core::get_attr(const DoutPrefixProvider *dpp,
+ const rgw_raw_obj& obj,
+ const char *name,
+ bufferlist *dest,
+ optional_yield y)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+
+ librados::ObjectReadOperation op;
+
+ int rval;
+ op.getxattr(name, dest, &rval);
+
+ r = rados_obj.operate(dpp, &op, nullptr, y);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int RGWSI_SysObj_Core::set_attrs(const DoutPrefixProvider *dpp,
+ const rgw_raw_obj& obj,
+ map<string, bufferlist>& attrs,
+ map<string, bufferlist> *rmattrs,
+ RGWObjVersionTracker *objv_tracker,
+ bool exclusive, optional_yield y)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+
+ if (exclusive) {
+ op.create(true); // exclusive create
+ }
+ if (objv_tracker) {
+ objv_tracker->prepare_op_for_write(&op);
+ }
+
+ map<string, bufferlist>::iterator iter;
+ if (rmattrs) {
+ for (iter = rmattrs->begin(); iter != rmattrs->end(); ++iter) {
+ const string& name = iter->first;
+ op.rmxattr(name.c_str());
+ }
+ }
+
+ for (iter = attrs.begin(); iter != attrs.end(); ++iter) {
+ const string& name = iter->first;
+ bufferlist& bl = iter->second;
+
+ if (!bl.length())
+ continue;
+
+ op.setxattr(name.c_str(), bl);
+ }
+
+ if (!op.size())
+ return 0;
+
+ bufferlist bl;
+
+ r = rados_obj.operate(dpp, &op, y);
+ if (r < 0)
+ return r;
+
+ if (objv_tracker) {
+ objv_tracker->apply_write();
+ }
+ return 0;
+}
+
+int RGWSI_SysObj_Core::omap_get_vals(const DoutPrefixProvider *dpp,
+ const rgw_raw_obj& obj,
+ const string& marker,
+ uint64_t count,
+ std::map<string, bufferlist> *m,
+ bool *pmore,
+ optional_yield y)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+
+ string start_after = marker;
+ bool more;
+
+ do {
+ librados::ObjectReadOperation op;
+
+ std::map<string, bufferlist> t;
+ int rval;
+ op.omap_get_vals2(start_after, count, &t, &more, &rval);
+
+ r = rados_obj.operate(dpp, &op, nullptr, y);
+ if (r < 0) {
+ return r;
+ }
+ if (t.empty()) {
+ break;
+ }
+ count -= t.size();
+ start_after = t.rbegin()->first;
+ m->insert(t.begin(), t.end());
+ } while (more && count > 0);
+
+ if (pmore) {
+ *pmore = more;
+ }
+ return 0;
+}
+
+int RGWSI_SysObj_Core::omap_get_all(const DoutPrefixProvider *dpp,
+ const rgw_raw_obj& obj,
+ std::map<string, bufferlist> *m,
+ optional_yield y)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+
+#define MAX_OMAP_GET_ENTRIES 1024
+ const int count = MAX_OMAP_GET_ENTRIES;
+ string start_after;
+ bool more;
+
+ do {
+ librados::ObjectReadOperation op;
+
+ std::map<string, bufferlist> t;
+ int rval;
+ op.omap_get_vals2(start_after, count, &t, &more, &rval);
+
+ r = rados_obj.operate(dpp, &op, nullptr, y);
+ if (r < 0) {
+ return r;
+ }
+ if (t.empty()) {
+ break;
+ }
+ start_after = t.rbegin()->first;
+ m->insert(t.begin(), t.end());
+ } while (more);
+ return 0;
+}
+
+int RGWSI_SysObj_Core::omap_set(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, const std::string& key,
+ bufferlist& bl, bool must_exist,
+ optional_yield y)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+
+ ldpp_dout(dpp, 15) << "omap_set obj=" << obj << " key=" << key << dendl;
+
+ map<string, bufferlist> m;
+ m[key] = bl;
+ librados::ObjectWriteOperation op;
+ if (must_exist)
+ op.assert_exists();
+ op.omap_set(m);
+ r = rados_obj.operate(dpp, &op, y);
+ return r;
+}
+
+int RGWSI_SysObj_Core::omap_set(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj,
+ const std::map<std::string, bufferlist>& m,
+ bool must_exist, optional_yield y)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+ if (must_exist)
+ op.assert_exists();
+ op.omap_set(m);
+ r = rados_obj.operate(dpp, &op, y);
+ return r;
+}
+
+int RGWSI_SysObj_Core::omap_del(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, const std::string& key,
+ optional_yield y)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+
+ set<string> k;
+ k.insert(key);
+
+ librados::ObjectWriteOperation op;
+
+ op.omap_rm_keys(k);
+
+ r = rados_obj.operate(dpp, &op, y);
+ return r;
+}
+
+int RGWSI_SysObj_Core::notify(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, bufferlist& bl,
+ uint64_t timeout_ms, bufferlist *pbl,
+ optional_yield y)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+
+ r = rados_obj.notify(dpp, bl, timeout_ms, pbl, y);
+ return r;
+}
+
+int RGWSI_SysObj_Core::remove(const DoutPrefixProvider *dpp,
+ RGWObjVersionTracker *objv_tracker,
+ const rgw_raw_obj& obj,
+ optional_yield y)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+
+ if (objv_tracker) {
+ objv_tracker->prepare_op_for_write(&op);
+ }
+
+ op.remove();
+ r = rados_obj.operate(dpp, &op, y);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int RGWSI_SysObj_Core::write(const DoutPrefixProvider *dpp,
+ const rgw_raw_obj& obj,
+ real_time *pmtime,
+ map<std::string, bufferlist>& attrs,
+ bool exclusive,
+ const bufferlist& data,
+ RGWObjVersionTracker *objv_tracker,
+ real_time set_mtime,
+ optional_yield y)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+
+ if (exclusive) {
+ op.create(true); // exclusive create
+ } else {
+ op.remove();
+ op.set_op_flags2(LIBRADOS_OP_FLAG_FAILOK);
+ op.create(false);
+ }
+
+ if (objv_tracker) {
+ objv_tracker->prepare_op_for_write(&op);
+ }
+
+ if (real_clock::is_zero(set_mtime)) {
+ set_mtime = real_clock::now();
+ }
+
+ struct timespec mtime_ts = real_clock::to_timespec(set_mtime);
+ op.mtime2(&mtime_ts);
+ op.write_full(data);
+
+ bufferlist acl_bl;
+
+ for (map<string, bufferlist>::iterator iter = attrs.begin(); iter != attrs.end(); ++iter) {
+ const string& name = iter->first;
+ bufferlist& bl = iter->second;
+
+ if (!bl.length())
+ continue;
+
+ op.setxattr(name.c_str(), bl);
+ }
+
+ r = rados_obj.operate(dpp, &op, y);
+ if (r < 0) {
+ return r;
+ }
+
+ if (objv_tracker) {
+ objv_tracker->apply_write();
+ }
+
+ if (pmtime) {
+ *pmtime = set_mtime;
+ }
+
+ return 0;
+}
+
+
+int RGWSI_SysObj_Core::write_data(const DoutPrefixProvider *dpp,
+ const rgw_raw_obj& obj,
+ const bufferlist& bl,
+ bool exclusive,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y)
+{
+ RGWSI_RADOS::Obj rados_obj;
+ int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj);
+ if (r < 0) {
+ ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+
+ if (exclusive) {
+ op.create(true);
+ }
+
+ if (objv_tracker) {
+ objv_tracker->prepare_op_for_write(&op);
+ }
+ op.write_full(bl);
+ r = rados_obj.operate(dpp, &op, y);
+ if (r < 0)
+ return r;
+
+ if (objv_tracker) {
+ objv_tracker->apply_write();
+ }
+ return 0;
+}
+
+int RGWSI_SysObj_Core::pool_list_prefixed_objs(const DoutPrefixProvider *dpp,
+ const rgw_pool& pool, const string& prefix,
+ std::function<void(const string&)> cb)
+{
+ bool is_truncated;
+
+ auto rados_pool = rados_svc->pool(pool);
+
+ auto op = rados_pool.op();
+
+ RGWAccessListFilterPrefix filter(prefix);
+
+ int r = op.init(dpp, string(), &filter);
+ if (r < 0) {
+ return r;
+ }
+
+ do {
+ vector<string> oids;
+#define MAX_OBJS_DEFAULT 1000
+ int r = op.get_next(dpp, MAX_OBJS_DEFAULT, &oids, &is_truncated);
+ if (r < 0) {
+ return r;
+ }
+ for (auto& val : oids) {
+ if (val.size() > prefix.size()) {
+ cb(val.substr(prefix.size()));
+ }
+ }
+ } while (is_truncated);
+
+ return 0;
+}
+
+int RGWSI_SysObj_Core::pool_list_objects_init(const DoutPrefixProvider *dpp,
+ const rgw_pool& pool,
+ const string& marker,
+ const string& prefix,
+ RGWSI_SysObj::Pool::ListCtx *_ctx)
+{
+ _ctx->impl.emplace<PoolListImplInfo>(prefix);
+
+ auto& ctx = static_cast<PoolListImplInfo&>(*_ctx->impl);
+
+ ctx.pool = rados_svc->pool(pool);
+ ctx.op = ctx.pool.op();
+
+ int r = ctx.op.init(dpp, marker, &ctx.filter);
+ if (r < 0) {
+ ldpp_dout(dpp, 10) << "failed to list objects pool_iterate_begin() returned r=" << r << dendl;
+ return r;
+ }
+ return 0;
+}
+
+int RGWSI_SysObj_Core::pool_list_objects_next(const DoutPrefixProvider *dpp,
+ RGWSI_SysObj::Pool::ListCtx& _ctx,
+ int max,
+ vector<string> *oids,
+ bool *is_truncated)
+{
+ if (!_ctx.impl) {
+ return -EINVAL;
+ }
+ auto& ctx = static_cast<PoolListImplInfo&>(*_ctx.impl);
+ int r = ctx.op.get_next(dpp, max, oids, is_truncated);
+ if (r < 0) {
+ if(r != -ENOENT)
+ ldpp_dout(dpp, 10) << "failed to list objects pool_iterate returned r=" << r << dendl;
+ return r;
+ }
+
+ return oids->size();
+}
+
+int RGWSI_SysObj_Core::pool_list_objects_get_marker(RGWSI_SysObj::Pool::ListCtx& _ctx,
+ string *marker)
+{
+ if (!_ctx.impl) {
+ return -EINVAL;
+ }
+
+ auto& ctx = static_cast<PoolListImplInfo&>(*_ctx.impl);
+ return ctx.op.get_marker(marker);
+}
diff --git a/src/rgw/services/svc_sys_obj_core.h b/src/rgw/services/svc_sys_obj_core.h
new file mode 100644
index 000000000..d02a37eee
--- /dev/null
+++ b/src/rgw/services/svc_sys_obj_core.h
@@ -0,0 +1,145 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#pragma once
+
+#include "rgw_service.h"
+
+#include "svc_rados.h"
+#include "svc_sys_obj.h"
+#include "svc_sys_obj_core_types.h"
+
+
+class RGWSI_Zone;
+
+struct rgw_cache_entry_info;
+
+class RGWSI_SysObj_Core : public RGWServiceInstance
+{
+ friend class RGWServices_Def;
+ friend class RGWSI_SysObj;
+
+protected:
+ RGWSI_RADOS *rados_svc{nullptr};
+ RGWSI_Zone *zone_svc{nullptr};
+
+ using GetObjState = RGWSI_SysObj_Core_GetObjState;
+ using PoolListImplInfo = RGWSI_SysObj_Core_PoolListImplInfo;
+
+ void core_init(RGWSI_RADOS *_rados_svc,
+ RGWSI_Zone *_zone_svc) {
+ rados_svc = _rados_svc;
+ zone_svc = _zone_svc;
+ }
+ int get_rados_obj(const DoutPrefixProvider *dpp, RGWSI_Zone *zone_svc, const rgw_raw_obj& obj, RGWSI_RADOS::Obj *pobj);
+
+ virtual int raw_stat(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj,
+ uint64_t *psize, real_time *pmtime,
+ std::map<std::string, bufferlist> *attrs,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y);
+
+ virtual int read(const DoutPrefixProvider *dpp,
+ RGWSI_SysObj_Obj_GetObjState& read_state,
+ RGWObjVersionTracker *objv_tracker,
+ const rgw_raw_obj& obj,
+ bufferlist *bl, off_t ofs, off_t end,
+ ceph::real_time* pmtime, uint64_t* psize,
+ std::map<std::string, bufferlist> *attrs,
+ bool raw_attrs,
+ rgw_cache_entry_info *cache_info,
+ boost::optional<obj_version>,
+ optional_yield y);
+
+ virtual int remove(const DoutPrefixProvider *dpp,
+ RGWObjVersionTracker *objv_tracker,
+ const rgw_raw_obj& obj,
+ optional_yield y);
+
+ virtual int write(const DoutPrefixProvider *dpp,
+ const rgw_raw_obj& obj,
+ real_time *pmtime,
+ std::map<std::string, bufferlist>& attrs,
+ bool exclusive,
+ const bufferlist& data,
+ RGWObjVersionTracker *objv_tracker,
+ real_time set_mtime,
+ optional_yield y);
+
+ virtual int write_data(const DoutPrefixProvider *dpp,
+ const rgw_raw_obj& obj,
+ const bufferlist& bl,
+ bool exclusive,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y);
+
+ virtual int get_attr(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj,
+ const char *name, bufferlist *dest,
+ optional_yield y);
+
+ virtual int set_attrs(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj,
+ std::map<std::string, bufferlist>& attrs,
+ std::map<std::string, bufferlist> *rmattrs,
+ RGWObjVersionTracker *objv_tracker,
+ bool exclusive, optional_yield y);
+
+ virtual int omap_get_all(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, std::map<std::string, bufferlist> *m,
+ optional_yield y);
+ virtual int omap_get_vals(const DoutPrefixProvider *dpp,
+ const rgw_raw_obj& obj,
+ const std::string& marker,
+ uint64_t count,
+ std::map<std::string, bufferlist> *m,
+ bool *pmore,
+ optional_yield y);
+ virtual int omap_set(const DoutPrefixProvider *dpp,
+ const rgw_raw_obj& obj, const std::string& key,
+ bufferlist& bl, bool must_exist,
+ optional_yield y);
+ virtual int omap_set(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj,
+ const std::map<std::string, bufferlist>& m, bool must_exist,
+ optional_yield y);
+ virtual int omap_del(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, const std::string& key,
+ optional_yield y);
+
+ virtual int notify(const DoutPrefixProvider *dpp,
+ const rgw_raw_obj& obj, bufferlist& bl,
+ uint64_t timeout_ms, bufferlist *pbl,
+ optional_yield y);
+
+ virtual int pool_list_prefixed_objs(const DoutPrefixProvider *dpp,
+ const rgw_pool& pool,
+ const std::string& prefix,
+ std::function<void(const std::string&)> cb);
+
+ virtual int pool_list_objects_init(const DoutPrefixProvider *dpp,
+ const rgw_pool& pool,
+ const std::string& marker,
+ const std::string& prefix,
+ RGWSI_SysObj::Pool::ListCtx *ctx);
+ virtual int pool_list_objects_next(const DoutPrefixProvider *dpp,
+ RGWSI_SysObj::Pool::ListCtx& ctx,
+ int max,
+ std::vector<std::string> *oids,
+ bool *is_truncated);
+
+ virtual int pool_list_objects_get_marker(RGWSI_SysObj::Pool::ListCtx& _ctx,
+ std::string *marker);
+
+ int stat(RGWSI_SysObj_Obj_GetObjState& state,
+ const rgw_raw_obj& obj,
+ std::map<std::string, bufferlist> *attrs,
+ bool raw_attrs,
+ real_time *lastmod,
+ uint64_t *obj_size,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp);
+
+public:
+ RGWSI_SysObj_Core(CephContext *cct): RGWServiceInstance(cct) {}
+
+ RGWSI_Zone *get_zone_svc() {
+ return zone_svc;
+ }
+};
diff --git a/src/rgw/services/svc_sys_obj_core_types.h b/src/rgw/services/svc_sys_obj_core_types.h
new file mode 100644
index 000000000..74f489d91
--- /dev/null
+++ b/src/rgw/services/svc_sys_obj_core_types.h
@@ -0,0 +1,34 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#pragma once
+
+
+#include "rgw_service.h"
+
+#include "svc_rados.h"
+#include "svc_sys_obj_types.h"
+
+
+
+struct RGWSI_SysObj_Core_GetObjState : public RGWSI_SysObj_Obj_GetObjState {
+ RGWSI_RADOS::Obj rados_obj;
+ bool has_rados_obj{false};
+ uint64_t last_ver{0};
+
+ RGWSI_SysObj_Core_GetObjState() {}
+
+ int get_rados_obj(const DoutPrefixProvider *dpp,
+ RGWSI_RADOS *rados_svc,
+ RGWSI_Zone *zone_svc,
+ const rgw_raw_obj& obj,
+ RGWSI_RADOS::Obj **pobj);
+};
+
+struct RGWSI_SysObj_Core_PoolListImplInfo : public RGWSI_SysObj_Pool_ListInfo {
+ RGWSI_RADOS::Pool pool;
+ RGWSI_RADOS::Pool::List op;
+ RGWAccessListFilterPrefix filter;
+
+ RGWSI_SysObj_Core_PoolListImplInfo(const std::string& prefix) : op(pool.op()), filter(prefix) {}
+};
diff --git a/src/rgw/services/svc_sys_obj_types.h b/src/rgw/services/svc_sys_obj_types.h
new file mode 100644
index 000000000..b5bc2d40d
--- /dev/null
+++ b/src/rgw/services/svc_sys_obj_types.h
@@ -0,0 +1,15 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+
+#pragma once
+
+
+#include "rgw_service.h"
+
+
+struct RGWSI_SysObj_Obj_GetObjState {
+};
+
+struct RGWSI_SysObj_Pool_ListInfo {
+};
diff --git a/src/rgw/services/svc_tier_rados.cc b/src/rgw/services/svc_tier_rados.cc
new file mode 100644
index 000000000..ca87e8ace
--- /dev/null
+++ b/src/rgw/services/svc_tier_rados.cc
@@ -0,0 +1,36 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include "svc_tier_rados.h"
+
+using namespace std;
+
+const std::string MP_META_SUFFIX = ".meta";
+
+MultipartMetaFilter::~MultipartMetaFilter() {}
+
+bool MultipartMetaFilter::filter(const string& name, string& key) {
+ // the length of the suffix so we can skip past it
+ static const size_t MP_META_SUFFIX_LEN = MP_META_SUFFIX.length();
+
+ size_t len = name.size();
+
+ // make sure there's room for suffix plus at least one more
+ // character
+ if (len <= MP_META_SUFFIX_LEN)
+ return false;
+
+ size_t pos = name.find(MP_META_SUFFIX, len - MP_META_SUFFIX_LEN);
+ if (pos == string::npos)
+ return false;
+
+ pos = name.rfind('.', pos - 1);
+ if (pos == string::npos)
+ return false;
+
+ key = name.substr(0, pos);
+
+ return true;
+}
+
+
diff --git a/src/rgw/services/svc_tier_rados.h b/src/rgw/services/svc_tier_rados.h
new file mode 100644
index 000000000..a2036b933
--- /dev/null
+++ b/src/rgw/services/svc_tier_rados.h
@@ -0,0 +1,154 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#pragma once
+
+#include <iomanip>
+
+#include "rgw_service.h"
+
+#include "svc_rados.h"
+
+extern const std::string MP_META_SUFFIX;
+
+class RGWMPObj {
+ std::string oid;
+ std::string prefix;
+ std::string meta;
+ std::string upload_id;
+public:
+ RGWMPObj() {}
+ RGWMPObj(const std::string& _oid, const std::string& _upload_id) {
+ init(_oid, _upload_id, _upload_id);
+ }
+ RGWMPObj(const std::string& _oid, std::optional<std::string> _upload_id) {
+ if (_upload_id) {
+ init(_oid, *_upload_id, *_upload_id);
+ } else {
+ from_meta(_oid);
+ }
+ }
+ void init(const std::string& _oid, const std::string& _upload_id) {
+ init(_oid, _upload_id, _upload_id);
+ }
+ void init(const std::string& _oid, const std::string& _upload_id, const std::string& part_unique_str) {
+ if (_oid.empty()) {
+ clear();
+ return;
+ }
+ oid = _oid;
+ upload_id = _upload_id;
+ prefix = oid + ".";
+ meta = prefix + upload_id + MP_META_SUFFIX;
+ prefix.append(part_unique_str);
+ }
+ const std::string& get_meta() const { return meta; }
+ std::string get_part(int num) const {
+ char buf[16];
+ snprintf(buf, 16, ".%d", num);
+ std::string s = prefix;
+ s.append(buf);
+ return s;
+ }
+ std::string get_part(const std::string& part) const {
+ std::string s = prefix;
+ s.append(".");
+ s.append(part);
+ return s;
+ }
+ const std::string& get_upload_id() const {
+ return upload_id;
+ }
+ const std::string& get_key() const {
+ return oid;
+ }
+ bool from_meta(const std::string& meta) {
+ int end_pos = meta.rfind('.'); // search for ".meta"
+ if (end_pos < 0)
+ return false;
+ int mid_pos = meta.rfind('.', end_pos - 1); // <key>.<upload_id>
+ if (mid_pos < 0)
+ return false;
+ oid = meta.substr(0, mid_pos);
+ upload_id = meta.substr(mid_pos + 1, end_pos - mid_pos - 1);
+ init(oid, upload_id, upload_id);
+ return true;
+ }
+ void clear() {
+ oid = "";
+ prefix = "";
+ meta = "";
+ upload_id = "";
+ }
+ friend std::ostream& operator<<(std::ostream& out, const RGWMPObj& obj) {
+ return out << "RGWMPObj:{ prefix=" << std::quoted(obj.prefix) <<
+ ", meta=" << std::quoted(obj.meta) << " }";
+ }
+}; // class RGWMPObj
+
+/**
+ * A filter to a) test whether an object name is a multipart meta
+ * object, and b) filter out just the key used to determine the bucket
+ * index shard.
+ *
+ * Objects for multipart meta have names adorned with an upload id and
+ * other elements -- specifically a ".", MULTIPART_UPLOAD_ID_PREFIX,
+ * unique id, and MP_META_SUFFIX. This filter will return true when
+ * the name provided is such. It will also extract the key used for
+ * bucket index shard calculation from the adorned name.
+ */
+class MultipartMetaFilter : public RGWAccessListFilter {
+public:
+ MultipartMetaFilter() {}
+
+ virtual ~MultipartMetaFilter() override;
+
+ /**
+ * @param name [in] The object name as it appears in the bucket index.
+ * @param key [out] An output parameter that will contain the bucket
+ * index key if this entry is in the form of a multipart meta object.
+ * @return true if the name provided is in the form of a multipart meta
+ * object, false otherwise
+ */
+ bool filter(const std::string& name, std::string& key) override;
+};
+
+class RGWSI_Tier_RADOS : public RGWServiceInstance
+{
+ RGWSI_Zone *zone_svc{nullptr};
+
+public:
+ RGWSI_Tier_RADOS(CephContext *cct): RGWServiceInstance(cct) {}
+
+ void init(RGWSI_Zone *_zone_svc) {
+ zone_svc = _zone_svc;
+ }
+
+ static inline bool raw_obj_to_obj(const rgw_bucket& bucket, const rgw_raw_obj& raw_obj, rgw_obj *obj) {
+ ssize_t pos = raw_obj.oid.find('_', bucket.marker.length());
+ if (pos < 0) {
+ return false;
+ }
+
+ if (!rgw_obj_key::parse_raw_oid(raw_obj.oid.substr(pos + 1), &obj->key)) {
+ return false;
+ }
+ obj->bucket = bucket;
+
+ return true;
+ }
+};
+
diff --git a/src/rgw/services/svc_user.cc b/src/rgw/services/svc_user.cc
new file mode 100644
index 000000000..9a07c207b
--- /dev/null
+++ b/src/rgw/services/svc_user.cc
@@ -0,0 +1,11 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+
+#include "svc_user.h"
+
+RGWSI_User::RGWSI_User(CephContext *cct): RGWServiceInstance(cct) {
+}
+
+RGWSI_User::~RGWSI_User() {
+}
diff --git a/src/rgw/services/svc_user.h b/src/rgw/services/svc_user.h
new file mode 100644
index 000000000..1cb459d31
--- /dev/null
+++ b/src/rgw/services/svc_user.h
@@ -0,0 +1,127 @@
+
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#pragma once
+
+#include "svc_meta_be.h"
+
+#include "rgw_service.h"
+
+class RGWUserBuckets;
+class RGWGetUserStats_CB;
+
+class RGWSI_User : public RGWServiceInstance
+{
+public:
+ RGWSI_User(CephContext *cct);
+ virtual ~RGWSI_User();
+
+ static std::string get_meta_key(const rgw_user& user) {
+ return user.to_str();
+ }
+
+ static rgw_user user_from_meta_key(const std::string& key) {
+ return rgw_user(key);
+ }
+
+ virtual RGWSI_MetaBackend_Handler *get_be_handler() = 0;
+
+ /* base svc_user interfaces */
+
+ virtual int read_user_info(RGWSI_MetaBackend::Context *ctx,
+ const rgw_user& user,
+ RGWUserInfo *info,
+ RGWObjVersionTracker * const objv_tracker,
+ real_time * const pmtime,
+ rgw_cache_entry_info * const cache_info,
+ std::map<std::string, bufferlist> * const pattrs,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) = 0;
+
+ virtual int store_user_info(RGWSI_MetaBackend::Context *ctx,
+ const RGWUserInfo& info,
+ RGWUserInfo *old_info,
+ RGWObjVersionTracker *objv_tracker,
+ const real_time& mtime,
+ bool exclusive,
+ std::map<std::string, bufferlist> *attrs,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) = 0;
+
+ virtual int remove_user_info(RGWSI_MetaBackend::Context *ctx,
+ const RGWUserInfo& info,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) = 0;
+
+ virtual int get_user_info_by_email(RGWSI_MetaBackend::Context *ctx,
+ const std::string& email, RGWUserInfo *info,
+ RGWObjVersionTracker *objv_tracker,
+ real_time *pmtime,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) = 0;
+ virtual int get_user_info_by_swift(RGWSI_MetaBackend::Context *ctx,
+ const std::string& swift_name,
+ RGWUserInfo *info, /* out */
+ RGWObjVersionTracker * const objv_tracker,
+ real_time * const pmtime,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) = 0;
+ virtual int get_user_info_by_access_key(RGWSI_MetaBackend::Context *ctx,
+ const std::string& access_key,
+ RGWUserInfo *info,
+ RGWObjVersionTracker* objv_tracker,
+ real_time *pmtime,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) = 0;
+
+ virtual int add_bucket(const DoutPrefixProvider *dpp,
+ const rgw_user& user,
+ const rgw_bucket& bucket,
+ ceph::real_time creation_time,
+ optional_yield y) = 0;
+ virtual int remove_bucket(const DoutPrefixProvider *dpp,
+ const rgw_user& user,
+ const rgw_bucket& _bucket, optional_yield) = 0;
+ virtual int list_buckets(const DoutPrefixProvider *dpp,
+ const rgw_user& user,
+ const std::string& marker,
+ const std::string& end_marker,
+ uint64_t max,
+ RGWUserBuckets *buckets,
+ bool *is_truncated,
+ optional_yield y) = 0;
+
+ virtual int flush_bucket_stats(const DoutPrefixProvider *dpp,
+ const rgw_user& user,
+ const RGWBucketEnt& ent, optional_yield y) = 0;
+ virtual int complete_flush_stats(const DoutPrefixProvider *dpp,
+ const rgw_user& user, optional_yield y) = 0;
+ virtual int reset_bucket_stats(const DoutPrefixProvider *dpp,
+ const rgw_user& user,
+ optional_yield y) = 0;
+ virtual int read_stats(const DoutPrefixProvider *dpp,
+ RGWSI_MetaBackend::Context *ctx,
+ const rgw_user& user, RGWStorageStats *stats,
+ ceph::real_time *last_stats_sync, /* last time a full stats sync completed */
+ ceph::real_time *last_stats_update,
+ optional_yield y) = 0; /* last time a stats update was done */
+
+ virtual int read_stats_async(const DoutPrefixProvider *dpp,
+ const rgw_user& user, RGWGetUserStats_CB *cb) = 0;
+};
+
diff --git a/src/rgw/services/svc_user_rados.cc b/src/rgw/services/svc_user_rados.cc
new file mode 100644
index 000000000..c99af9354
--- /dev/null
+++ b/src/rgw/services/svc_user_rados.cc
@@ -0,0 +1,968 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include <boost/algorithm/string.hpp>
+
+#include "svc_user.h"
+#include "svc_user_rados.h"
+#include "svc_zone.h"
+#include "svc_sys_obj.h"
+#include "svc_sys_obj_cache.h"
+#include "svc_meta.h"
+#include "svc_meta_be_sobj.h"
+#include "svc_sync_modules.h"
+
+#include "rgw_user.h"
+#include "rgw_bucket.h"
+#include "rgw_tools.h"
+#include "rgw_zone.h"
+#include "rgw_rados.h"
+
+#include "cls/user/cls_user_client.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+#define RGW_BUCKETS_OBJ_SUFFIX ".buckets"
+
+using namespace std;
+
+class RGWSI_User_Module : public RGWSI_MBSObj_Handler_Module {
+ RGWSI_User_RADOS::Svc& svc;
+
+ const string prefix;
+public:
+ RGWSI_User_Module(RGWSI_User_RADOS::Svc& _svc) : RGWSI_MBSObj_Handler_Module("user"),
+ svc(_svc) {}
+
+ void get_pool_and_oid(const string& key, rgw_pool *pool, string *oid) override {
+ if (pool) {
+ *pool = svc.zone->get_zone_params().user_uid_pool;
+ }
+ if (oid) {
+ *oid = key;
+ }
+ }
+
+ const string& get_oid_prefix() override {
+ return prefix;
+ }
+
+ bool is_valid_oid(const string& oid) override {
+ // filter out the user.buckets objects
+ return !boost::algorithm::ends_with(oid, RGW_BUCKETS_OBJ_SUFFIX);
+ }
+
+ string key_to_oid(const string& key) override {
+ return key;
+ }
+
+ string oid_to_key(const string& oid) override {
+ return oid;
+ }
+};
+
+RGWSI_User_RADOS::RGWSI_User_RADOS(CephContext *cct): RGWSI_User(cct) {
+}
+
+RGWSI_User_RADOS::~RGWSI_User_RADOS() {
+}
+
+void RGWSI_User_RADOS::init(RGWSI_RADOS *_rados_svc,
+ RGWSI_Zone *_zone_svc, RGWSI_SysObj *_sysobj_svc,
+ RGWSI_SysObj_Cache *_cache_svc, RGWSI_Meta *_meta_svc,
+ RGWSI_MetaBackend *_meta_be_svc,
+ RGWSI_SyncModules *_sync_modules_svc)
+{
+ svc.user = this;
+ svc.rados = _rados_svc;
+ svc.zone = _zone_svc;
+ svc.sysobj = _sysobj_svc;
+ svc.cache = _cache_svc;
+ svc.meta = _meta_svc;
+ svc.meta_be = _meta_be_svc;
+ svc.sync_modules = _sync_modules_svc;
+}
+
+int RGWSI_User_RADOS::do_start(optional_yield, const DoutPrefixProvider *dpp)
+{
+ uinfo_cache.reset(new RGWChainedCacheImpl<user_info_cache_entry>);
+ uinfo_cache->init(svc.cache);
+
+ int r = svc.meta->create_be_handler(RGWSI_MetaBackend::Type::MDBE_SOBJ, &be_handler);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to create be handler: r=" << r << dendl;
+ return r;
+ }
+
+ RGWSI_MetaBackend_Handler_SObj *bh = static_cast<RGWSI_MetaBackend_Handler_SObj *>(be_handler);
+
+ auto module = new RGWSI_User_Module(svc);
+ be_module.reset(module);
+ bh->set_module(module);
+ return 0;
+}
+
+rgw_raw_obj RGWSI_User_RADOS::get_buckets_obj(const rgw_user& user) const
+{
+ string oid = user.to_str() + RGW_BUCKETS_OBJ_SUFFIX;
+ return rgw_raw_obj(svc.zone->get_zone_params().user_uid_pool, oid);
+}
+
+int RGWSI_User_RADOS::read_user_info(RGWSI_MetaBackend::Context *ctx,
+ const rgw_user& user,
+ RGWUserInfo *info,
+ RGWObjVersionTracker * const objv_tracker,
+ real_time * const pmtime,
+ rgw_cache_entry_info * const cache_info,
+ map<string, bufferlist> * const pattrs,
+ optional_yield y,
+ const DoutPrefixProvider *dpp)
+{
+ if(user.id == RGW_USER_ANON_ID) {
+ ldpp_dout(dpp, 20) << "RGWSI_User_RADOS::read_user_info(): anonymous user" << dendl;
+ return -ENOENT;
+ }
+ bufferlist bl;
+ RGWUID user_id;
+
+ RGWSI_MBSObj_GetParams params(&bl, pattrs, pmtime);
+ params.set_cache_info(cache_info);
+
+ int ret = svc.meta_be->get_entry(ctx, get_meta_key(user), params, objv_tracker, y, dpp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ auto iter = bl.cbegin();
+ try {
+ decode(user_id, iter);
+ if (user_id.user_id != user) {
+ ldpp_dout(dpp, -1) << "ERROR: rgw_get_user_info_by_uid(): user id mismatch: " << user_id.user_id << " != " << user << dendl;
+ return -EIO;
+ }
+ if (!iter.end()) {
+ decode(*info, iter);
+ }
+ } catch (buffer::error& err) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to decode user info, caught buffer::error" << dendl;
+ return -EIO;
+ }
+
+ return 0;
+}
+
+class PutOperation
+{
+ RGWSI_User_RADOS::Svc& svc;
+ RGWSI_MetaBackend_SObj::Context_SObj *ctx;
+ RGWUID ui;
+ const RGWUserInfo& info;
+ RGWUserInfo *old_info;
+ RGWObjVersionTracker *objv_tracker;
+ const real_time& mtime;
+ bool exclusive;
+ map<string, bufferlist> *pattrs;
+ RGWObjVersionTracker ot;
+ string err_msg;
+ optional_yield y;
+
+ void set_err_msg(string msg) {
+ if (!err_msg.empty()) {
+ err_msg = std::move(msg);
+ }
+ }
+
+public:
+ PutOperation(RGWSI_User_RADOS::Svc& svc,
+ RGWSI_MetaBackend::Context *_ctx,
+ const RGWUserInfo& info,
+ RGWUserInfo *old_info,
+ RGWObjVersionTracker *objv_tracker,
+ const real_time& mtime,
+ bool exclusive,
+ map<string, bufferlist> *pattrs,
+ optional_yield y) :
+ svc(svc), info(info), old_info(old_info),
+ objv_tracker(objv_tracker), mtime(mtime),
+ exclusive(exclusive), pattrs(pattrs), y(y) {
+ ctx = static_cast<RGWSI_MetaBackend_SObj::Context_SObj *>(_ctx);
+ ui.user_id = info.user_id;
+ }
+
+ int prepare(const DoutPrefixProvider *dpp) {
+ if (objv_tracker) {
+ ot = *objv_tracker;
+ }
+
+ if (ot.write_version.tag.empty()) {
+ if (ot.read_version.tag.empty()) {
+ ot.generate_new_write_ver(svc.meta_be->ctx());
+ } else {
+ ot.write_version = ot.read_version;
+ ot.write_version.ver++;
+ }
+ }
+
+ for (auto iter = info.swift_keys.begin(); iter != info.swift_keys.end(); ++iter) {
+ if (old_info && old_info->swift_keys.count(iter->first) != 0)
+ continue;
+ auto& k = iter->second;
+ /* check if swift mapping exists */
+ RGWUserInfo inf;
+ int r = svc.user->get_user_info_by_swift(ctx, k.id, &inf, nullptr, nullptr, y, dpp);
+ if (r >= 0 && inf.user_id != info.user_id &&
+ (!old_info || inf.user_id != old_info->user_id)) {
+ ldpp_dout(dpp, 0) << "WARNING: can't store user info, swift id (" << k.id
+ << ") already mapped to another user (" << info.user_id << ")" << dendl;
+ return -EEXIST;
+ }
+ }
+
+ /* check if access keys already exist */
+ for (auto iter = info.access_keys.begin(); iter != info.access_keys.end(); ++iter) {
+ if (old_info && old_info->access_keys.count(iter->first) != 0)
+ continue;
+ auto& k = iter->second;
+ RGWUserInfo inf;
+ int r = svc.user->get_user_info_by_access_key(ctx, k.id, &inf, nullptr, nullptr, y, dpp);
+ if (r >= 0 && inf.user_id != info.user_id &&
+ (!old_info || inf.user_id != old_info->user_id)) {
+ ldpp_dout(dpp, 0) << "WARNING: can't store user info, access key already mapped to another user" << dendl;
+ return -EEXIST;
+ }
+ }
+
+ return 0;
+ }
+
+ int put(const DoutPrefixProvider *dpp) {
+ bufferlist data_bl;
+ encode(ui, data_bl);
+ encode(info, data_bl);
+
+ RGWSI_MBSObj_PutParams params(data_bl, pattrs, mtime, exclusive);
+
+ int ret = svc.meta_be->put(ctx, RGWSI_User::get_meta_key(info.user_id), params, &ot, y, dpp);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+ }
+
+ int complete(const DoutPrefixProvider *dpp) {
+ int ret;
+
+ bufferlist link_bl;
+ encode(ui, link_bl);
+
+ if (!info.user_email.empty()) {
+ if (!old_info ||
+ old_info->user_email.compare(info.user_email) != 0) { /* only if new index changed */
+ ret = rgw_put_system_obj(dpp, svc.sysobj, svc.zone->get_zone_params().user_email_pool, info.user_email,
+ link_bl, exclusive, NULL, real_time(), y);
+ if (ret < 0)
+ return ret;
+ }
+ }
+
+ const bool renamed = old_info && old_info->user_id != info.user_id;
+ for (auto iter = info.access_keys.begin(); iter != info.access_keys.end(); ++iter) {
+ auto& k = iter->second;
+ if (old_info && old_info->access_keys.count(iter->first) != 0 && !renamed)
+ continue;
+
+ ret = rgw_put_system_obj(dpp, svc.sysobj, svc.zone->get_zone_params().user_keys_pool, k.id,
+ link_bl, exclusive, NULL, real_time(), y);
+ if (ret < 0)
+ return ret;
+ }
+
+ for (auto siter = info.swift_keys.begin(); siter != info.swift_keys.end(); ++siter) {
+ auto& k = siter->second;
+ if (old_info && old_info->swift_keys.count(siter->first) != 0 && !renamed)
+ continue;
+
+ ret = rgw_put_system_obj(dpp, svc.sysobj, svc.zone->get_zone_params().user_swift_pool, k.id,
+ link_bl, exclusive, NULL, real_time(), y);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (old_info) {
+ ret = remove_old_indexes(*old_info, info, y, dpp);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ return 0;
+ }
+
+ int remove_old_indexes(const RGWUserInfo& old_info, const RGWUserInfo& new_info, optional_yield y, const DoutPrefixProvider *dpp) {
+ int ret;
+
+ if (!old_info.user_id.empty() &&
+ old_info.user_id != new_info.user_id) {
+ if (old_info.user_id.tenant != new_info.user_id.tenant) {
+ ldpp_dout(dpp, 0) << "ERROR: tenant mismatch: " << old_info.user_id.tenant << " != " << new_info.user_id.tenant << dendl;
+ return -EINVAL;
+ }
+ ret = svc.user->remove_uid_index(ctx, old_info, nullptr, y, dpp);
+ if (ret < 0 && ret != -ENOENT) {
+ set_err_msg("ERROR: could not remove index for uid " + old_info.user_id.to_str());
+ return ret;
+ }
+ }
+
+ if (!old_info.user_email.empty() &&
+ old_info.user_email != new_info.user_email) {
+ ret = svc.user->remove_email_index(dpp, old_info.user_email, y);
+ if (ret < 0 && ret != -ENOENT) {
+ set_err_msg("ERROR: could not remove index for email " + old_info.user_email);
+ return ret;
+ }
+ }
+
+ for ([[maybe_unused]] const auto& [name, access_key] : old_info.access_keys) {
+ if (!new_info.access_keys.count(access_key.id)) {
+ ret = svc.user->remove_key_index(dpp, access_key, y);
+ if (ret < 0 && ret != -ENOENT) {
+ set_err_msg("ERROR: could not remove index for key " + access_key.id);
+ return ret;
+ }
+ }
+ }
+
+ for (auto old_iter = old_info.swift_keys.begin(); old_iter != old_info.swift_keys.end(); ++old_iter) {
+ const auto& swift_key = old_iter->second;
+ auto new_iter = new_info.swift_keys.find(swift_key.id);
+ if (new_iter == new_info.swift_keys.end()) {
+ ret = svc.user->remove_swift_name_index(dpp, swift_key.id, y);
+ if (ret < 0 && ret != -ENOENT) {
+ set_err_msg("ERROR: could not remove index for swift_name " + swift_key.id);
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+ }
+
+ const string& get_err_msg() {
+ return err_msg;
+ }
+};
+
+int RGWSI_User_RADOS::store_user_info(RGWSI_MetaBackend::Context *ctx,
+ const RGWUserInfo& info,
+ RGWUserInfo *old_info,
+ RGWObjVersionTracker *objv_tracker,
+ const real_time& mtime,
+ bool exclusive,
+ map<string, bufferlist> *attrs,
+ optional_yield y,
+ const DoutPrefixProvider *dpp)
+{
+ PutOperation op(svc, ctx,
+ info, old_info,
+ objv_tracker,
+ mtime, exclusive,
+ attrs,
+ y);
+
+ int r = op.prepare(dpp);
+ if (r < 0) {
+ return r;
+ }
+
+ r = op.put(dpp);
+ if (r < 0) {
+ return r;
+ }
+
+ r = op.complete(dpp);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWSI_User_RADOS::remove_key_index(const DoutPrefixProvider *dpp,
+ const RGWAccessKey& access_key,
+ optional_yield y)
+{
+ rgw_raw_obj obj(svc.zone->get_zone_params().user_keys_pool, access_key.id);
+ auto sysobj = svc.sysobj->get_obj(obj);
+ return sysobj.wop().remove(dpp, y);
+}
+
+int RGWSI_User_RADOS::remove_email_index(const DoutPrefixProvider *dpp,
+ const string& email,
+ optional_yield y)
+{
+ if (email.empty()) {
+ return 0;
+ }
+ rgw_raw_obj obj(svc.zone->get_zone_params().user_email_pool, email);
+ auto sysobj = svc.sysobj->get_obj(obj);
+ return sysobj.wop().remove(dpp, y);
+}
+
+int RGWSI_User_RADOS::remove_swift_name_index(const DoutPrefixProvider *dpp,
+ const string& swift_name,
+ optional_yield y)
+{
+ rgw_raw_obj obj(svc.zone->get_zone_params().user_swift_pool, swift_name);
+ auto sysobj = svc.sysobj->get_obj(obj);
+ return sysobj.wop().remove(dpp, y);
+}
+
+/**
+ * delete a user's presence from the RGW system.
+ * First remove their bucket ACLs, then delete them
+ * from the user and user email pools. This leaves the pools
+ * themselves alone, as well as any ACLs embedded in object xattrs.
+ */
+int RGWSI_User_RADOS::remove_user_info(RGWSI_MetaBackend::Context *ctx,
+ const RGWUserInfo& info,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp)
+{
+ int ret;
+
+ auto kiter = info.access_keys.begin();
+ for (; kiter != info.access_keys.end(); ++kiter) {
+ ldpp_dout(dpp, 10) << "removing key index: " << kiter->first << dendl;
+ ret = remove_key_index(dpp, kiter->second, y);
+ if (ret < 0 && ret != -ENOENT) {
+ ldpp_dout(dpp, 0) << "ERROR: could not remove " << kiter->first << " (access key object), should be fixed (err=" << ret << ")" << dendl;
+ return ret;
+ }
+ }
+
+ auto siter = info.swift_keys.begin();
+ for (; siter != info.swift_keys.end(); ++siter) {
+ auto& k = siter->second;
+ ldpp_dout(dpp, 10) << "removing swift subuser index: " << k.id << dendl;
+ /* check if swift mapping exists */
+ ret = remove_swift_name_index(dpp, k.id, y);
+ if (ret < 0 && ret != -ENOENT) {
+ ldpp_dout(dpp, 0) << "ERROR: could not remove " << k.id << " (swift name object), should be fixed (err=" << ret << ")" << dendl;
+ return ret;
+ }
+ }
+
+ ldpp_dout(dpp, 10) << "removing email index: " << info.user_email << dendl;
+ ret = remove_email_index(dpp, info.user_email, y);
+ if (ret < 0 && ret != -ENOENT) {
+ ldpp_dout(dpp, 0) << "ERROR: could not remove email index object for "
+ << info.user_email << ", should be fixed (err=" << ret << ")" << dendl;
+ return ret;
+ }
+
+ rgw_raw_obj uid_bucks = get_buckets_obj(info.user_id);
+ ldpp_dout(dpp, 10) << "removing user buckets index" << dendl;
+ auto sysobj = svc.sysobj->get_obj(uid_bucks);
+ ret = sysobj.wop().remove(dpp, y);
+ if (ret < 0 && ret != -ENOENT) {
+ ldpp_dout(dpp, 0) << "ERROR: could not remove " << info.user_id << ":" << uid_bucks << ", should be fixed (err=" << ret << ")" << dendl;
+ return ret;
+ }
+
+ ret = remove_uid_index(ctx, info, objv_tracker, y, dpp);
+ if (ret < 0 && ret != -ENOENT) {
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWSI_User_RADOS::remove_uid_index(RGWSI_MetaBackend::Context *ctx, const RGWUserInfo& user_info, RGWObjVersionTracker *objv_tracker,
+ optional_yield y, const DoutPrefixProvider *dpp)
+{
+ ldpp_dout(dpp, 10) << "removing user index: " << user_info.user_id << dendl;
+
+ RGWSI_MBSObj_RemoveParams params;
+ int ret = svc.meta_be->remove(ctx, get_meta_key(user_info.user_id), params, objv_tracker, y, dpp);
+ if (ret < 0 && ret != -ENOENT && ret != -ECANCELED) {
+ string key;
+ user_info.user_id.to_str(key);
+ rgw_raw_obj uid_obj(svc.zone->get_zone_params().user_uid_pool, key);
+ ldpp_dout(dpp, 0) << "ERROR: could not remove " << user_info.user_id << ":" << uid_obj << ", should be fixed (err=" << ret << ")" << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWSI_User_RADOS::get_user_info_from_index(RGWSI_MetaBackend::Context* ctx,
+ const string& key,
+ const rgw_pool& pool,
+ RGWUserInfo *info,
+ RGWObjVersionTracker* objv_tracker,
+ real_time* pmtime, optional_yield y,
+ const DoutPrefixProvider* dpp)
+{
+ string cache_key = pool.to_str() + "/" + key;
+
+ if (auto e = uinfo_cache->find(cache_key)) {
+ *info = e->info;
+ if (objv_tracker)
+ *objv_tracker = e->objv_tracker;
+ if (pmtime)
+ *pmtime = e->mtime;
+ return 0;
+ }
+
+ user_info_cache_entry e;
+ bufferlist bl;
+ RGWUID uid;
+
+ int ret = rgw_get_system_obj(svc.sysobj, pool, key, bl, nullptr, &e.mtime, y, dpp);
+ if (ret < 0)
+ return ret;
+
+ rgw_cache_entry_info cache_info;
+
+ auto iter = bl.cbegin();
+ try {
+ decode(uid, iter);
+
+ int ret = read_user_info(ctx, uid.user_id,
+ &e.info, &e.objv_tracker, nullptr, &cache_info, nullptr,
+ y, dpp);
+ if (ret < 0) {
+ return ret;
+ }
+ } catch (buffer::error& err) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to decode user info, caught buffer::error" << dendl;
+ return -EIO;
+ }
+
+ uinfo_cache->put(dpp, svc.cache, cache_key, &e, { &cache_info });
+
+ *info = e.info;
+ if (objv_tracker)
+ *objv_tracker = e.objv_tracker;
+ if (pmtime)
+ *pmtime = e.mtime;
+
+ return 0;
+}
+
+/**
+ * Given an email, finds the user info associated with it.
+ * returns: 0 on success, -ERR# on failure (including nonexistence)
+ */
+int RGWSI_User_RADOS::get_user_info_by_email(RGWSI_MetaBackend::Context *ctx,
+ const string& email, RGWUserInfo *info,
+ RGWObjVersionTracker *objv_tracker,
+ real_time *pmtime, optional_yield y,
+ const DoutPrefixProvider *dpp)
+{
+ return get_user_info_from_index(ctx, email, svc.zone->get_zone_params().user_email_pool,
+ info, objv_tracker, pmtime, y, dpp);
+}
+
+/**
+ * Given an swift username, finds the user_info associated with it.
+ * returns: 0 on success, -ERR# on failure (including nonexistence)
+ */
+int RGWSI_User_RADOS::get_user_info_by_swift(RGWSI_MetaBackend::Context *ctx,
+ const string& swift_name,
+ RGWUserInfo *info, /* out */
+ RGWObjVersionTracker * const objv_tracker,
+ real_time * const pmtime, optional_yield y,
+ const DoutPrefixProvider *dpp)
+{
+ return get_user_info_from_index(ctx,
+ swift_name,
+ svc.zone->get_zone_params().user_swift_pool,
+ info, objv_tracker, pmtime, y, dpp);
+}
+
+/**
+ * Given an access key, finds the user info associated with it.
+ * returns: 0 on success, -ERR# on failure (including nonexistence)
+ */
+int RGWSI_User_RADOS::get_user_info_by_access_key(RGWSI_MetaBackend::Context *ctx,
+ const std::string& access_key,
+ RGWUserInfo *info,
+ RGWObjVersionTracker* objv_tracker,
+ real_time *pmtime, optional_yield y,
+ const DoutPrefixProvider *dpp)
+{
+ return get_user_info_from_index(ctx,
+ access_key,
+ svc.zone->get_zone_params().user_keys_pool,
+ info, objv_tracker, pmtime, y, dpp);
+}
+
+int RGWSI_User_RADOS::cls_user_update_buckets(const DoutPrefixProvider *dpp, rgw_raw_obj& obj, list<cls_user_bucket_entry>& entries, bool add, optional_yield y)
+{
+ auto rados_obj = svc.rados->obj(obj);
+ int r = rados_obj.open(dpp);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+ cls_user_set_buckets(op, entries, add);
+ r = rados_obj.operate(dpp, &op, y);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWSI_User_RADOS::cls_user_add_bucket(const DoutPrefixProvider *dpp, rgw_raw_obj& obj, const cls_user_bucket_entry& entry, optional_yield y)
+{
+ list<cls_user_bucket_entry> l;
+ l.push_back(entry);
+
+ return cls_user_update_buckets(dpp, obj, l, true, y);
+}
+
+int RGWSI_User_RADOS::cls_user_remove_bucket(const DoutPrefixProvider *dpp, rgw_raw_obj& obj, const cls_user_bucket& bucket, optional_yield y)
+{
+ auto rados_obj = svc.rados->obj(obj);
+ int r = rados_obj.open(dpp);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::ObjectWriteOperation op;
+ ::cls_user_remove_bucket(op, bucket);
+ r = rados_obj.operate(dpp, &op, y);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int RGWSI_User_RADOS::add_bucket(const DoutPrefixProvider *dpp,
+ const rgw_user& user,
+ const rgw_bucket& bucket,
+ ceph::real_time creation_time,
+ optional_yield y)
+{
+ int ret;
+
+ cls_user_bucket_entry new_bucket;
+
+ bucket.convert(&new_bucket.bucket);
+ new_bucket.size = 0;
+ if (real_clock::is_zero(creation_time))
+ new_bucket.creation_time = real_clock::now();
+ else
+ new_bucket.creation_time = creation_time;
+
+ rgw_raw_obj obj = get_buckets_obj(user);
+ ret = cls_user_add_bucket(dpp, obj, new_bucket, y);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: error adding bucket to user: ret=" << ret << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+
+int RGWSI_User_RADOS::remove_bucket(const DoutPrefixProvider *dpp,
+ const rgw_user& user,
+ const rgw_bucket& _bucket,
+ optional_yield y)
+{
+ cls_user_bucket bucket;
+ bucket.name = _bucket.name;
+ rgw_raw_obj obj = get_buckets_obj(user);
+ int ret = cls_user_remove_bucket(dpp, obj, bucket, y);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: error removing bucket from user: ret=" << ret << dendl;
+ }
+
+ return 0;
+}
+
+int RGWSI_User_RADOS::cls_user_flush_bucket_stats(const DoutPrefixProvider *dpp,
+ rgw_raw_obj& user_obj,
+ const RGWBucketEnt& ent, optional_yield y)
+{
+ cls_user_bucket_entry entry;
+ ent.convert(&entry);
+
+ list<cls_user_bucket_entry> entries;
+ entries.push_back(entry);
+
+ int r = cls_user_update_buckets(dpp, user_obj, entries, false, y);
+ if (r < 0) {
+ ldpp_dout(dpp, 20) << "cls_user_update_buckets() returned " << r << dendl;
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWSI_User_RADOS::cls_user_list_buckets(const DoutPrefixProvider *dpp,
+ rgw_raw_obj& obj,
+ const string& in_marker,
+ const string& end_marker,
+ const int max_entries,
+ list<cls_user_bucket_entry>& entries,
+ string * const out_marker,
+ bool * const truncated,
+ optional_yield y)
+{
+ auto rados_obj = svc.rados->obj(obj);
+ int r = rados_obj.open(dpp);
+ if (r < 0) {
+ return r;
+ }
+
+ librados::ObjectReadOperation op;
+ int rc;
+
+ cls_user_bucket_list(op, in_marker, end_marker, max_entries, entries, out_marker, truncated, &rc);
+ bufferlist ibl;
+ r = rados_obj.operate(dpp, &op, &ibl, y);
+ if (r < 0)
+ return r;
+ if (rc < 0)
+ return rc;
+
+ return 0;
+}
+
+int RGWSI_User_RADOS::list_buckets(const DoutPrefixProvider *dpp,
+ const rgw_user& user,
+ const string& marker,
+ const string& end_marker,
+ uint64_t max,
+ RGWUserBuckets *buckets,
+ bool *is_truncated, optional_yield y)
+{
+ int ret;
+
+ buckets->clear();
+ if (user.id == RGW_USER_ANON_ID) {
+ ldpp_dout(dpp, 20) << "RGWSI_User_RADOS::list_buckets(): anonymous user" << dendl;
+ *is_truncated = false;
+ return 0;
+ }
+ rgw_raw_obj obj = get_buckets_obj(user);
+
+ bool truncated = false;
+ string m = marker;
+
+ uint64_t total = 0;
+
+ do {
+ std::list<cls_user_bucket_entry> entries;
+ ret = cls_user_list_buckets(dpp, obj, m, end_marker, max - total, entries, &m, &truncated, y);
+ if (ret == -ENOENT) {
+ ret = 0;
+ }
+
+ if (ret < 0) {
+ return ret;
+ }
+
+ for (auto& entry : entries) {
+ buckets->add(RGWBucketEnt(user, std::move(entry)));
+ total++;
+ }
+
+ } while (truncated && total < max);
+
+ if (is_truncated) {
+ *is_truncated = truncated;
+ }
+
+ return 0;
+}
+
+int RGWSI_User_RADOS::flush_bucket_stats(const DoutPrefixProvider *dpp,
+ const rgw_user& user,
+ const RGWBucketEnt& ent,
+ optional_yield y)
+{
+ rgw_raw_obj obj = get_buckets_obj(user);
+
+ return cls_user_flush_bucket_stats(dpp, obj, ent, y);
+}
+
+int RGWSI_User_RADOS::reset_bucket_stats(const DoutPrefixProvider *dpp,
+ const rgw_user& user,
+ optional_yield y)
+{
+ return cls_user_reset_stats(dpp, user, y);
+}
+
+int RGWSI_User_RADOS::cls_user_reset_stats(const DoutPrefixProvider *dpp, const rgw_user& user, optional_yield y)
+{
+ rgw_raw_obj obj = get_buckets_obj(user);
+ auto rados_obj = svc.rados->obj(obj);
+ int rval, r = rados_obj.open(dpp);
+ if (r < 0) {
+ return r;
+ }
+
+ cls_user_reset_stats2_op call;
+ cls_user_reset_stats2_ret ret;
+
+ do {
+ buffer::list in, out;
+ librados::ObjectWriteOperation op;
+
+ call.time = real_clock::now();
+ ret.update_call(call);
+
+ encode(call, in);
+ op.exec("user", "reset_user_stats2", in, &out, &rval);
+ r = rados_obj.operate(dpp, &op, y, librados::OPERATION_RETURNVEC);
+ if (r < 0) {
+ return r;
+ }
+ try {
+ auto bliter = out.cbegin();
+ decode(ret, bliter);
+ } catch (ceph::buffer::error& err) {
+ return -EINVAL;
+ }
+ } while (ret.truncated);
+
+ return rval;
+}
+
+int RGWSI_User_RADOS::complete_flush_stats(const DoutPrefixProvider *dpp,
+ const rgw_user& user, optional_yield y)
+{
+ rgw_raw_obj obj = get_buckets_obj(user);
+ auto rados_obj = svc.rados->obj(obj);
+ int r = rados_obj.open(dpp);
+ if (r < 0) {
+ return r;
+ }
+ librados::ObjectWriteOperation op;
+ ::cls_user_complete_stats_sync(op);
+ return rados_obj.operate(dpp, &op, y);
+}
+
+int RGWSI_User_RADOS::cls_user_get_header(const DoutPrefixProvider *dpp,
+ const rgw_user& user, cls_user_header *header,
+ optional_yield y)
+{
+ rgw_raw_obj obj = get_buckets_obj(user);
+ auto rados_obj = svc.rados->obj(obj);
+ int r = rados_obj.open(dpp);
+ if (r < 0) {
+ return r;
+ }
+ int rc;
+ bufferlist ibl;
+ librados::ObjectReadOperation op;
+ ::cls_user_get_header(op, header, &rc);
+ return rados_obj.operate(dpp, &op, &ibl, y);
+}
+
+int RGWSI_User_RADOS::cls_user_get_header_async(const DoutPrefixProvider *dpp, const string& user_str, RGWGetUserHeader_CB *cb)
+{
+ rgw_raw_obj obj = get_buckets_obj(rgw_user(user_str));
+ auto rados_obj = svc.rados->obj(obj);
+ int r = rados_obj.open(dpp);
+ if (r < 0) {
+ return r;
+ }
+
+ auto& ref = rados_obj.get_ref();
+
+ r = ::cls_user_get_header_async(ref.pool.ioctx(), ref.obj.oid, cb);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+int RGWSI_User_RADOS::read_stats(const DoutPrefixProvider *dpp,
+ RGWSI_MetaBackend::Context *ctx,
+ const rgw_user& user, RGWStorageStats *stats,
+ ceph::real_time *last_stats_sync,
+ ceph::real_time *last_stats_update,
+ optional_yield y)
+{
+ string user_str = user.to_str();
+
+ RGWUserInfo info;
+ real_time mtime;
+ int ret = read_user_info(ctx, user, &info, nullptr, &mtime, nullptr, nullptr, y, dpp);
+ if (ret < 0)
+ {
+ return ret;
+ }
+
+ cls_user_header header;
+ int r = cls_user_get_header(dpp, rgw_user(user_str), &header, y);
+ if (r < 0 && r != -ENOENT)
+ return r;
+
+ const cls_user_stats& hs = header.stats;
+
+ stats->size = hs.total_bytes;
+ stats->size_rounded = hs.total_bytes_rounded;
+ stats->num_objects = hs.total_entries;
+
+ if (last_stats_sync) {
+ *last_stats_sync = header.last_stats_sync;
+ }
+
+ if (last_stats_update) {
+ *last_stats_update = header.last_stats_update;
+ }
+
+ return 0;
+}
+
+class RGWGetUserStatsContext : public RGWGetUserHeader_CB {
+ RGWGetUserStats_CB *cb;
+
+public:
+ explicit RGWGetUserStatsContext(RGWGetUserStats_CB * const cb)
+ : cb(cb) {}
+
+ void handle_response(int r, cls_user_header& header) override {
+ const cls_user_stats& hs = header.stats;
+ if (r >= 0) {
+ RGWStorageStats stats;
+
+ stats.size = hs.total_bytes;
+ stats.size_rounded = hs.total_bytes_rounded;
+ stats.num_objects = hs.total_entries;
+
+ cb->set_response(stats);
+ }
+
+ cb->handle_response(r);
+
+ cb->put();
+ }
+};
+
+int RGWSI_User_RADOS::read_stats_async(const DoutPrefixProvider *dpp,
+ const rgw_user& user, RGWGetUserStats_CB *_cb)
+{
+ string user_str = user.to_str();
+
+ RGWGetUserStatsContext *cb = new RGWGetUserStatsContext(_cb);
+ int r = cls_user_get_header_async(dpp, user_str, cb);
+ if (r < 0) {
+ delete cb;
+ return r;
+ }
+
+ return 0;
+}
+
diff --git a/src/rgw/services/svc_user_rados.h b/src/rgw/services/svc_user_rados.h
new file mode 100644
index 000000000..177f720d6
--- /dev/null
+++ b/src/rgw/services/svc_user_rados.h
@@ -0,0 +1,211 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#pragma once
+
+#include "rgw_service.h"
+
+#include "svc_meta_be.h"
+#include "svc_user.h"
+#include "rgw_bucket.h"
+
+class RGWSI_RADOS;
+class RGWSI_Zone;
+class RGWSI_SysObj;
+class RGWSI_SysObj_Cache;
+class RGWSI_Meta;
+class RGWSI_SyncModules;
+class RGWSI_MetaBackend_Handler;
+
+struct rgw_cache_entry_info;
+
+class RGWGetUserHeader_CB;
+class RGWGetUserStats_CB;
+
+template <class T>
+class RGWChainedCacheImpl;
+
+class RGWSI_User_RADOS : public RGWSI_User
+{
+ friend class PutOperation;
+
+ std::unique_ptr<RGWSI_MetaBackend::Module> be_module;
+ RGWSI_MetaBackend_Handler *be_handler;
+
+ struct user_info_cache_entry {
+ RGWUserInfo info;
+ RGWObjVersionTracker objv_tracker;
+ real_time mtime;
+ };
+
+ using RGWChainedCacheImpl_user_info_cache_entry = RGWChainedCacheImpl<user_info_cache_entry>;
+ std::unique_ptr<RGWChainedCacheImpl_user_info_cache_entry> uinfo_cache;
+
+ rgw_raw_obj get_buckets_obj(const rgw_user& user_id) const;
+
+ int get_user_info_from_index(RGWSI_MetaBackend::Context *ctx,
+ const std::string& key,
+ const rgw_pool& pool,
+ RGWUserInfo *info,
+ RGWObjVersionTracker * const objv_tracker,
+ real_time * const pmtime,
+ optional_yield y,
+ const DoutPrefixProvider *dpp);
+
+ int remove_uid_index(RGWSI_MetaBackend::Context *ctx, const RGWUserInfo& user_info, RGWObjVersionTracker *objv_tracker,
+ optional_yield y, const DoutPrefixProvider *dpp);
+
+ int remove_key_index(const DoutPrefixProvider *dpp, const RGWAccessKey& access_key, optional_yield y);
+ int remove_email_index(const DoutPrefixProvider *dpp, const std::string& email, optional_yield y);
+ int remove_swift_name_index(const DoutPrefixProvider *dpp, const std::string& swift_name, optional_yield y);
+
+ /* admin management */
+ int cls_user_update_buckets(const DoutPrefixProvider *dpp, rgw_raw_obj& obj, std::list<cls_user_bucket_entry>& entries, bool add, optional_yield y);
+ int cls_user_add_bucket(const DoutPrefixProvider *dpp, rgw_raw_obj& obj, const cls_user_bucket_entry& entry, optional_yield y);
+ int cls_user_remove_bucket(const DoutPrefixProvider *dpp, rgw_raw_obj& obj, const cls_user_bucket& bucket, optional_yield y);
+
+ /* quota stats */
+ int cls_user_flush_bucket_stats(const DoutPrefixProvider *dpp, rgw_raw_obj& user_obj,
+ const RGWBucketEnt& ent, optional_yield y);
+ int cls_user_list_buckets(const DoutPrefixProvider *dpp,
+ rgw_raw_obj& obj,
+ const std::string& in_marker,
+ const std::string& end_marker,
+ const int max_entries,
+ std::list<cls_user_bucket_entry>& entries,
+ std::string * const out_marker,
+ bool * const truncated,
+ optional_yield y);
+
+ int cls_user_reset_stats(const DoutPrefixProvider *dpp, const rgw_user& user, optional_yield y);
+ int cls_user_get_header(const DoutPrefixProvider *dpp, const rgw_user& user, cls_user_header *header, optional_yield y);
+ int cls_user_get_header_async(const DoutPrefixProvider *dpp, const std::string& user, RGWGetUserHeader_CB *cb);
+
+ int do_start(optional_yield, const DoutPrefixProvider *dpp) override;
+public:
+ struct Svc {
+ RGWSI_User_RADOS *user{nullptr};
+ RGWSI_RADOS *rados{nullptr};
+ RGWSI_Zone *zone{nullptr};
+ RGWSI_SysObj *sysobj{nullptr};
+ RGWSI_SysObj_Cache *cache{nullptr};
+ RGWSI_Meta *meta{nullptr};
+ RGWSI_MetaBackend *meta_be{nullptr};
+ RGWSI_SyncModules *sync_modules{nullptr};
+ } svc;
+
+ RGWSI_User_RADOS(CephContext *cct);
+ ~RGWSI_User_RADOS();
+
+ void init(RGWSI_RADOS *_rados_svc,
+ RGWSI_Zone *_zone_svc, RGWSI_SysObj *_sysobj_svc,
+ RGWSI_SysObj_Cache *_cache_svc, RGWSI_Meta *_meta_svc,
+ RGWSI_MetaBackend *_meta_be_svc,
+ RGWSI_SyncModules *_sync_modules);
+
+ RGWSI_MetaBackend_Handler *get_be_handler() override {
+ return be_handler;
+ }
+
+ int read_user_info(RGWSI_MetaBackend::Context *ctx,
+ const rgw_user& user,
+ RGWUserInfo *info,
+ RGWObjVersionTracker * const objv_tracker,
+ real_time * const pmtime,
+ rgw_cache_entry_info * const cache_info,
+ std::map<std::string, bufferlist> * const pattrs,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) override;
+
+ int store_user_info(RGWSI_MetaBackend::Context *ctx,
+ const RGWUserInfo& info,
+ RGWUserInfo *old_info,
+ RGWObjVersionTracker *objv_tracker,
+ const real_time& mtime,
+ bool exclusive,
+ std::map<std::string, bufferlist> *attrs,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) override;
+
+ int remove_user_info(RGWSI_MetaBackend::Context *ctx,
+ const RGWUserInfo& info,
+ RGWObjVersionTracker *objv_tracker,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) override;
+
+ int get_user_info_by_email(RGWSI_MetaBackend::Context *ctx,
+ const std::string& email, RGWUserInfo *info,
+ RGWObjVersionTracker *objv_tracker,
+ real_time *pmtime,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) override;
+ int get_user_info_by_swift(RGWSI_MetaBackend::Context *ctx,
+ const std::string& swift_name,
+ RGWUserInfo *info, /* out */
+ RGWObjVersionTracker * const objv_tracker,
+ real_time * const pmtime,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) override;
+ int get_user_info_by_access_key(RGWSI_MetaBackend::Context *ctx,
+ const std::string& access_key,
+ RGWUserInfo *info,
+ RGWObjVersionTracker* objv_tracker,
+ real_time *pmtime,
+ optional_yield y,
+ const DoutPrefixProvider *dpp) override;
+
+ /* user buckets directory */
+
+ int add_bucket(const DoutPrefixProvider *dpp,
+ const rgw_user& user,
+ const rgw_bucket& bucket,
+ ceph::real_time creation_time,
+ optional_yield y) override;
+ int remove_bucket(const DoutPrefixProvider *dpp,
+ const rgw_user& user,
+ const rgw_bucket& _bucket,
+ optional_yield y) override;
+ int list_buckets(const DoutPrefixProvider *dpp,
+ const rgw_user& user,
+ const std::string& marker,
+ const std::string& end_marker,
+ uint64_t max,
+ RGWUserBuckets *buckets,
+ bool *is_truncated,
+ optional_yield y) override;
+
+ /* quota related */
+ int flush_bucket_stats(const DoutPrefixProvider *dpp,
+ const rgw_user& user,
+ const RGWBucketEnt& ent, optional_yield y) override;
+
+ int complete_flush_stats(const DoutPrefixProvider *dpp,
+ const rgw_user& user, optional_yield y) override;
+
+ int reset_bucket_stats(const DoutPrefixProvider *dpp,
+ const rgw_user& user,
+ optional_yield y) override;
+ int read_stats(const DoutPrefixProvider *dpp,
+ RGWSI_MetaBackend::Context *ctx,
+ const rgw_user& user, RGWStorageStats *stats,
+ ceph::real_time *last_stats_sync, /* last time a full stats sync completed */
+ ceph::real_time *last_stats_update,
+ optional_yield y) override; /* last time a stats update was done */
+
+ int read_stats_async(const DoutPrefixProvider *dpp, const rgw_user& user,
+ RGWGetUserStats_CB *cb) override;
+};
+
diff --git a/src/rgw/services/svc_zone.cc b/src/rgw/services/svc_zone.cc
new file mode 100644
index 000000000..180d93712
--- /dev/null
+++ b/src/rgw/services/svc_zone.cc
@@ -0,0 +1,1100 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include "svc_zone.h"
+#include "svc_rados.h"
+#include "svc_sys_obj.h"
+#include "svc_sync_modules.h"
+
+#include "rgw_zone.h"
+#include "rgw_rest_conn.h"
+#include "rgw_bucket_sync.h"
+
+#include "common/errno.h"
+#include "include/random.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+using namespace std;
+using namespace rgw_zone_defaults;
+
+RGWSI_Zone::RGWSI_Zone(CephContext *cct) : RGWServiceInstance(cct)
+{
+}
+
+void RGWSI_Zone::init(RGWSI_SysObj *_sysobj_svc,
+ RGWSI_RADOS * _rados_svc,
+ RGWSI_SyncModules * _sync_modules_svc,
+ RGWSI_Bucket_Sync *_bucket_sync_svc)
+{
+ sysobj_svc = _sysobj_svc;
+ rados_svc = _rados_svc;
+ sync_modules_svc = _sync_modules_svc;
+ bucket_sync_svc = _bucket_sync_svc;
+
+ realm = new RGWRealm();
+ zonegroup = new RGWZoneGroup();
+ zone_public_config = new RGWZone();
+ zone_params = new RGWZoneParams();
+ current_period = new RGWPeriod();
+}
+
+RGWSI_Zone::~RGWSI_Zone()
+{
+ delete realm;
+ delete zonegroup;
+ delete zone_public_config;
+ delete zone_params;
+ delete current_period;
+}
+
+std::shared_ptr<RGWBucketSyncPolicyHandler> RGWSI_Zone::get_sync_policy_handler(std::optional<rgw_zone_id> zone) const {
+ if (!zone || *zone == zone_id()) {
+ return sync_policy_handler;
+ }
+ auto iter = sync_policy_handlers.find(*zone);
+ if (iter == sync_policy_handlers.end()) {
+ return std::shared_ptr<RGWBucketSyncPolicyHandler>();
+ }
+ return iter->second;
+}
+
+bool RGWSI_Zone::zone_syncs_from(const RGWZone& target_zone, const RGWZone& source_zone) const
+{
+ return target_zone.syncs_from(source_zone.name) &&
+ sync_modules_svc->get_manager()->supports_data_export(source_zone.tier_type);
+}
+
+bool RGWSI_Zone::zone_syncs_from(const RGWZone& source_zone) const
+{
+ auto target_zone = get_zone();
+ bool found = false;
+
+ for (auto s : data_sync_source_zones) {
+ if (s->id == source_zone.id) {
+ found = true;
+ break;
+ }
+ }
+ return found && target_zone.syncs_from(source_zone.name) &&
+ sync_modules_svc->get_manager()->supports_data_export(source_zone.tier_type);
+}
+
+int RGWSI_Zone::search_realm_with_zone(const DoutPrefixProvider *dpp,
+ const rgw_zone_id& zid,
+ RGWRealm *prealm,
+ RGWPeriod *pperiod,
+ RGWZoneGroup *pzonegroup,
+ bool *pfound,
+ optional_yield y)
+{
+ auto& found = *pfound;
+
+ found = false;
+
+ list<string> realms;
+ int r = list_realms(dpp, realms);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: failed to list realms: r=" << r << dendl;
+ return r;
+ }
+
+ for (auto& realm_name : realms) {
+ string realm_id;
+ RGWRealm realm(realm_id, realm_name);
+ r = realm.init(dpp, cct, sysobj_svc, y);
+ if (r < 0) {
+ ldpp_dout(dpp, 0) << "WARNING: can't open realm " << realm_name << ": " << cpp_strerror(-r) << " ... skipping" << dendl;
+ continue;
+ }
+
+ r = realm.find_zone(dpp, zid, pperiod,
+ pzonegroup, &found, y);
+ if (r < 0) {
+ ldpp_dout(dpp, 20) << __func__ << "(): ERROR: realm.find_zone() returned r=" << r<< dendl;
+ return r;
+ }
+
+ if (found) {
+ *prealm = realm;
+ ldpp_dout(dpp, 20) << __func__ << "(): found realm_id=" << realm_id << " realm_name=" << realm_name << dendl;
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+int RGWSI_Zone::do_start(optional_yield y, const DoutPrefixProvider *dpp)
+{
+ int ret = sysobj_svc->start(y, dpp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ assert(sysobj_svc->is_started()); /* if not then there's ordering issue */
+
+ ret = rados_svc->start(y, dpp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = realm->init(dpp, cct, sysobj_svc, y);
+ if (ret < 0 && ret != -ENOENT) {
+ ldpp_dout(dpp, 0) << "failed reading realm info: ret "<< ret << " " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ ldpp_dout(dpp, 20) << "realm " << realm->get_name() << " " << realm->get_id() << dendl;
+ ret = current_period->init(dpp, cct, sysobj_svc, realm->get_id(), y,
+ realm->get_name());
+ if (ret < 0 && ret != -ENOENT) {
+ ldpp_dout(dpp, 0) << "failed reading current period info: " << " " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ ret = zone_params->init(dpp, cct, sysobj_svc, y);
+ bool found_zone = (ret == 0);
+ if (ret < 0 && ret != -ENOENT) {
+ lderr(cct) << "failed reading zone info: ret "<< ret << " " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ cur_zone_id = rgw_zone_id(zone_params->get_id());
+
+ bool found_period_conf = false;
+
+ /* try to find zone in period config (if we have one) */
+ if (found_zone &&
+ !current_period->get_id().empty()) {
+ found_period_conf = current_period->find_zone(dpp,
+ cur_zone_id,
+ zonegroup,
+ y);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: current_period->find_zone() returned ret=" << ret << dendl;
+ return ret;
+ }
+ if (!found_period_conf) {
+ ldpp_dout(dpp, 0) << "period (" << current_period->get_id() << " does not have zone " << cur_zone_id << " configured" << dendl;
+ }
+ }
+
+ RGWRealm search_realm;
+
+ if (found_zone &&
+ !found_period_conf) {
+ ldpp_dout(dpp, 20) << "searching for the correct realm" << dendl;
+ ret = search_realm_with_zone(dpp,
+ cur_zone_id,
+ realm,
+ current_period,
+ zonegroup,
+ &found_period_conf,
+ y);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "ERROR: search_realm_conf() failed: ret="<< ret << dendl;
+ return ret;
+ }
+ }
+ bool zg_initialized = found_period_conf;
+
+ if (!zg_initialized) {
+ /* couldn't find a proper period config, use local zonegroup */
+ ret = zonegroup->init(dpp, cct, sysobj_svc, y);
+ zg_initialized = (ret == 0);
+ if (ret < 0 && ret != -ENOENT) {
+ ldpp_dout(dpp, 0) << "failed reading zonegroup info: " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ }
+
+ auto& zonegroup_param = cct->_conf->rgw_zonegroup;
+ bool init_from_period = found_period_conf;
+ bool explicit_zg = !zonegroup_param.empty();
+
+ if (!zg_initialized &&
+ (!explicit_zg || zonegroup_param == default_zonegroup_name)) {
+ /* we couldn't initialize any zonegroup,
+ falling back to a non-multisite config with default zonegroup */
+ ret = create_default_zg(dpp, y);
+ if (ret < 0) {
+ return ret;
+ }
+ zg_initialized = true;
+ }
+
+ if (!zg_initialized) {
+ ldpp_dout(dpp, 0) << "ERROR: could not find zonegroup (" << zonegroup_param << ")" << dendl;
+ return -ENOENT;
+ }
+
+ /* we have zonegroup now */
+
+ if (explicit_zg &&
+ zonegroup->get_name() != zonegroup_param) {
+ ldpp_dout(dpp, 0) << "ERROR: incorrect zonegroup: " << zonegroup_param << " (got: " << zonegroup_param << ", expected: " << zonegroup->get_name() << ")" << dendl;
+ return -EINVAL;
+ }
+
+ auto& zone_param = cct->_conf->rgw_zone;
+ bool explicit_zone = !zone_param.empty();
+
+ if (!found_zone) {
+ if ((!explicit_zone || zone_param == default_zone_name) &&
+ zonegroup->get_name() == default_zonegroup_name) {
+ ret = init_default_zone(dpp, y);
+ if (ret < 0 && ret != -ENOENT) {
+ return ret;
+ }
+ cur_zone_id = zone_params->get_id();
+ } else {
+ ldpp_dout(dpp, 0) << "ERROR: could not find zone (" << zone_param << ")" << dendl;
+ return -ENOENT;
+ }
+ }
+
+ /* we have zone now */
+
+ auto zone_iter = zonegroup->zones.find(zone_params->get_id());
+ if (zone_iter == zonegroup->zones.end()) {
+ /* shouldn't happen if relying on period config */
+ if (!init_from_period) {
+ ldpp_dout(dpp, -1) << "Cannot find zone id=" << zone_params->get_id() << " (name=" << zone_params->get_name() << ")" << dendl;
+ return -EINVAL;
+ }
+ ldpp_dout(dpp, 1) << "Cannot find zone id=" << zone_params->get_id() << " (name=" << zone_params->get_name() << "), switching to local zonegroup configuration" << dendl;
+ init_from_period = false;
+ zone_iter = zonegroup->zones.find(zone_params->get_id());
+ }
+ if (zone_iter == zonegroup->zones.end()) {
+ ldpp_dout(dpp, -1) << "Cannot find zone id=" << zone_params->get_id() << " (name=" << zone_params->get_name() << ")" << dendl;
+ return -EINVAL;
+ }
+ *zone_public_config = zone_iter->second;
+ ldout(cct, 20) << "zone " << zone_params->get_name() << " found" << dendl;
+
+ ldpp_dout(dpp, 4) << "Realm: " << std::left << setw(20) << realm->get_name() << " (" << realm->get_id() << ")" << dendl;
+ ldpp_dout(dpp, 4) << "ZoneGroup: " << std::left << setw(20) << zonegroup->get_name() << " (" << zonegroup->get_id() << ")" << dendl;
+ ldpp_dout(dpp, 4) << "Zone: " << std::left << setw(20) << zone_params->get_name() << " (" << zone_params->get_id() << ")" << dendl;
+
+ if (init_from_period) {
+ ldpp_dout(dpp, 4) << "using period configuration: " << current_period->get_id() << ":" << current_period->get_epoch() << dendl;
+ ret = init_zg_from_period(dpp, y);
+ if (ret < 0) {
+ return ret;
+ }
+ } else {
+ ldout(cct, 10) << "cannot find current period zonegroup using local zonegroup configuration" << dendl;
+ ret = init_zg_from_local(dpp, y);
+ if (ret < 0) {
+ return ret;
+ }
+ // read period_config into current_period
+ auto& period_config = current_period->get_config();
+ ret = period_config.read(dpp, sysobj_svc, zonegroup->realm_id, y);
+ if (ret < 0 && ret != -ENOENT) {
+ ldout(cct, 0) << "ERROR: failed to read period config: "
+ << cpp_strerror(ret) << dendl;
+ return ret;
+ }
+ }
+
+ zone_short_id = current_period->get_map().get_zone_short_id(zone_params->get_id());
+
+ for (auto ziter : zonegroup->zones) {
+ auto zone_handler = std::make_shared<RGWBucketSyncPolicyHandler>(this, sync_modules_svc, bucket_sync_svc, ziter.second.id);
+ ret = zone_handler->init(dpp, y);
+ if (ret < 0) {
+ ldpp_dout(dpp, -1) << "ERROR: could not initialize zone policy handler for zone=" << ziter.second.name << dendl;
+ return ret;
+ }
+ sync_policy_handlers[ziter.second.id] = zone_handler;
+ }
+
+ sync_policy_handler = sync_policy_handlers[zone_id()]; /* we made sure earlier that zonegroup->zones has our zone */
+
+ set<rgw_zone_id> source_zones;
+ set<rgw_zone_id> target_zones;
+
+ sync_policy_handler->reflect(dpp, nullptr, nullptr,
+ nullptr, nullptr,
+ &source_zones,
+ &target_zones,
+ false); /* relaxed: also get all zones that we allow to sync to/from */
+
+ ret = sync_modules_svc->start(y, dpp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ auto sync_modules = sync_modules_svc->get_manager();
+ RGWSyncModuleRef sm;
+ if (!sync_modules->get_module(zone_public_config->tier_type, &sm)) {
+ ldpp_dout(dpp, -1) << "ERROR: tier type not found: " << zone_public_config->tier_type << dendl;
+ return -EINVAL;
+ }
+
+ writeable_zone = sm->supports_writes();
+ exports_data = sm->supports_data_export();
+
+ /* first build all zones index */
+ for (auto ziter : zonegroup->zones) {
+ const rgw_zone_id& id = ziter.first;
+ RGWZone& z = ziter.second;
+ zone_id_by_name[z.name] = id;
+ zone_by_id[id] = z;
+ }
+
+ if (zone_by_id.find(zone_id()) == zone_by_id.end()) {
+ ldpp_dout(dpp, 0) << "WARNING: could not find zone config in zonegroup for local zone (" << zone_id() << "), will use defaults" << dendl;
+ }
+
+ for (const auto& ziter : zonegroup->zones) {
+ const rgw_zone_id& id = ziter.first;
+ const RGWZone& z = ziter.second;
+ if (id == zone_id()) {
+ continue;
+ }
+ if (z.endpoints.empty()) {
+ ldpp_dout(dpp, 0) << "WARNING: can't generate connection for zone " << z.id << " id " << z.name << ": no endpoints defined" << dendl;
+ continue;
+ }
+ ldpp_dout(dpp, 20) << "generating connection object for zone " << z.name << " id " << z.id << dendl;
+ RGWRESTConn *conn = new RGWRESTConn(cct, z.id, z.endpoints, zone_params->system_key, zonegroup->get_id(), zonegroup->api_name);
+ zone_conn_map[id] = conn;
+
+ bool zone_is_source = source_zones.find(z.id) != source_zones.end();
+ bool zone_is_target = target_zones.find(z.id) != target_zones.end();
+
+ if (zone_is_source || zone_is_target) {
+ if (zone_is_source && sync_modules->supports_data_export(z.tier_type)) {
+ data_sync_source_zones.push_back(&z);
+ }
+ if (zone_is_target) {
+ zone_data_notify_to_map[id] = conn;
+ }
+ } else {
+ ldpp_dout(dpp, 20) << "NOTICE: not syncing to/from zone " << z.name << " id " << z.id << dendl;
+ }
+ }
+
+ ldpp_dout(dpp, 20) << "started zone id=" << zone_params->get_id() << " (name=" << zone_params->get_name() <<
+ ") with tier type = " << zone_public_config->tier_type << dendl;
+
+ return 0;
+}
+
+void RGWSI_Zone::shutdown()
+{
+ delete rest_master_conn;
+
+ for (auto& item : zone_conn_map) {
+ auto conn = item.second;
+ delete conn;
+ }
+
+ for (auto& item : zonegroup_conn_map) {
+ auto conn = item.second;
+ delete conn;
+ }
+}
+
+int RGWSI_Zone::list_regions(const DoutPrefixProvider *dpp, list<string>& regions)
+{
+ RGWZoneGroup zonegroup;
+ RGWSI_SysObj::Pool syspool = sysobj_svc->get_pool(zonegroup.get_pool(cct));
+
+ return syspool.list_prefixed_objs(dpp, region_info_oid_prefix, &regions);
+}
+
+int RGWSI_Zone::list_zonegroups(const DoutPrefixProvider *dpp, list<string>& zonegroups)
+{
+ RGWZoneGroup zonegroup;
+ RGWSI_SysObj::Pool syspool = sysobj_svc->get_pool(zonegroup.get_pool(cct));
+
+ return syspool.list_prefixed_objs(dpp, zonegroup_names_oid_prefix, &zonegroups);
+}
+
+int RGWSI_Zone::list_zones(const DoutPrefixProvider *dpp, list<string>& zones)
+{
+ RGWZoneParams zoneparams;
+ RGWSI_SysObj::Pool syspool = sysobj_svc->get_pool(zoneparams.get_pool(cct));
+
+ return syspool.list_prefixed_objs(dpp, zone_names_oid_prefix, &zones);
+}
+
+int RGWSI_Zone::list_realms(const DoutPrefixProvider *dpp, list<string>& realms)
+{
+ RGWRealm realm(cct, sysobj_svc);
+ RGWSI_SysObj::Pool syspool = sysobj_svc->get_pool(realm.get_pool(cct));
+
+ return syspool.list_prefixed_objs(dpp, realm_names_oid_prefix, &realms);
+}
+
+int RGWSI_Zone::list_periods(const DoutPrefixProvider *dpp, list<string>& periods)
+{
+ RGWPeriod period;
+ list<string> raw_periods;
+ RGWSI_SysObj::Pool syspool = sysobj_svc->get_pool(period.get_pool(cct));
+ int ret = syspool.list_prefixed_objs(dpp, period.get_info_oid_prefix(), &raw_periods);
+ if (ret < 0) {
+ return ret;
+ }
+ for (const auto& oid : raw_periods) {
+ size_t pos = oid.find(".");
+ if (pos != std::string::npos) {
+ periods.push_back(oid.substr(0, pos));
+ } else {
+ periods.push_back(oid);
+ }
+ }
+ periods.sort(); // unique() only detects duplicates if they're adjacent
+ periods.unique();
+ return 0;
+}
+
+
+int RGWSI_Zone::list_periods(const DoutPrefixProvider *dpp, const string& current_period, list<string>& periods, optional_yield y)
+{
+ int ret = 0;
+ string period_id = current_period;
+ while(!period_id.empty()) {
+ RGWPeriod period(period_id);
+ ret = period.init(dpp, cct, sysobj_svc, y);
+ if (ret < 0) {
+ return ret;
+ }
+ periods.push_back(period.get_id());
+ period_id = period.get_predecessor();
+ }
+
+ return ret;
+}
+
+/**
+ * Add new connection to connections map
+ * @param zonegroup_conn_map map which new connection will be added to
+ * @param zonegroup zonegroup which new connection will connect to
+ * @param new_connection pointer to new connection instance
+ */
+static void add_new_connection_to_map(map<string, RGWRESTConn *> &zonegroup_conn_map,
+ const RGWZoneGroup &zonegroup, RGWRESTConn *new_connection)
+{
+ // Delete if connection is already exists
+ map<string, RGWRESTConn *>::iterator iterZoneGroup = zonegroup_conn_map.find(zonegroup.get_id());
+ if (iterZoneGroup != zonegroup_conn_map.end()) {
+ delete iterZoneGroup->second;
+ }
+
+ // Add new connection to connections map
+ zonegroup_conn_map[zonegroup.get_id()] = new_connection;
+}
+
+int RGWSI_Zone::init_zg_from_period(const DoutPrefixProvider *dpp, optional_yield y)
+{
+ ldout(cct, 20) << "period zonegroup name " << zonegroup->get_name() << dendl;
+
+ map<string, RGWZoneGroup>::const_iterator iter =
+ current_period->get_map().zonegroups.find(zonegroup->get_id());
+
+ if (iter != current_period->get_map().zonegroups.end()) {
+ ldpp_dout(dpp, 20) << "using current period zonegroup " << zonegroup->get_name() << dendl;
+ *zonegroup = iter->second;
+ int ret = zonegroup->init(dpp, cct, sysobj_svc, y, false);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "failed init zonegroup: " << " " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ }
+ for (iter = current_period->get_map().zonegroups.begin();
+ iter != current_period->get_map().zonegroups.end(); ++iter){
+ const RGWZoneGroup& zg = iter->second;
+ // use endpoints from the zonegroup's master zone
+ auto master = zg.zones.find(zg.master_zone);
+ if (master == zg.zones.end()) {
+ // Check for empty zonegroup which can happen if zone was deleted before removal
+ if (zg.zones.size() == 0)
+ continue;
+ // fix missing master zone for a single zone zonegroup
+ if (zg.master_zone.empty() && zg.zones.size() == 1) {
+ master = zg.zones.begin();
+ ldpp_dout(dpp, 0) << "zonegroup " << zg.get_name() << " missing master_zone, setting zone " <<
+ master->second.name << " id:" << master->second.id << " as master" << dendl;
+ if (zonegroup->get_id() == zg.get_id()) {
+ zonegroup->master_zone = master->second.id;
+ int ret = zonegroup->update(dpp, y);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "error updating zonegroup : " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ } else {
+ RGWZoneGroup fixed_zg(zg.get_id(),zg.get_name());
+ int ret = fixed_zg.init(dpp, cct, sysobj_svc, y);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "error initializing zonegroup : " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ fixed_zg.master_zone = master->second.id;
+ ret = fixed_zg.update(dpp, y);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "error initializing zonegroup : " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ }
+ } else {
+ ldpp_dout(dpp, 0) << "zonegroup " << zg.get_name() << " missing zone for master_zone=" <<
+ zg.master_zone << dendl;
+ return -EINVAL;
+ }
+ }
+ const auto& endpoints = master->second.endpoints;
+ add_new_connection_to_map(zonegroup_conn_map, zg, new RGWRESTConn(cct, zg.get_id(), endpoints, zone_params->system_key, zonegroup->get_id(), zg.api_name));
+ if (!current_period->get_master_zonegroup().empty() &&
+ zg.get_id() == current_period->get_master_zonegroup()) {
+ rest_master_conn = new RGWRESTConn(cct, zg.get_id(), endpoints, zone_params->system_key, zonegroup->get_id(), zg.api_name);
+ }
+ }
+
+ return 0;
+}
+
+int RGWSI_Zone::create_default_zg(const DoutPrefixProvider *dpp, optional_yield y)
+{
+ ldout(cct, 10) << "Creating default zonegroup " << dendl;
+ int ret = zonegroup->create_default(dpp, y);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "failure in zonegroup create_default: ret "<< ret << " " << cpp_strerror(-ret)
+ << dendl;
+ return ret;
+ }
+ ret = zonegroup->init(dpp, cct, sysobj_svc, y);
+ if (ret < 0) {
+ ldout(cct, 0) << "failure in zonegroup create_default: ret "<< ret << " " << cpp_strerror(-ret)
+ << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWSI_Zone::init_default_zone(const DoutPrefixProvider *dpp, optional_yield y)
+{
+ ldpp_dout(dpp, 10) << " Using default name "<< default_zone_name << dendl;
+ zone_params->set_name(default_zone_name);
+ int ret = zone_params->init(dpp, cct, sysobj_svc, y);
+ if (ret < 0 && ret != -ENOENT) {
+ ldpp_dout(dpp, 0) << "failed reading zone params info: " << " " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+
+ return 0;
+}
+
+int RGWSI_Zone::init_zg_from_local(const DoutPrefixProvider *dpp, optional_yield y)
+{
+ ldpp_dout(dpp, 20) << "zonegroup " << zonegroup->get_name() << dendl;
+ if (zonegroup->is_master_zonegroup()) {
+ // use endpoints from the zonegroup's master zone
+ auto master = zonegroup->zones.find(zonegroup->master_zone);
+ if (master == zonegroup->zones.end()) {
+ // fix missing master zone for a single zone zonegroup
+ if (zonegroup->master_zone.empty() && zonegroup->zones.size() == 1) {
+ master = zonegroup->zones.begin();
+ ldpp_dout(dpp, 0) << "zonegroup " << zonegroup->get_name() << " missing master_zone, setting zone " <<
+ master->second.name << " id:" << master->second.id << " as master" << dendl;
+ zonegroup->master_zone = master->second.id;
+ int ret = zonegroup->update(dpp, y);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "error initializing zonegroup : " << cpp_strerror(-ret) << dendl;
+ return ret;
+ }
+ } else {
+ ldpp_dout(dpp, 0) << "zonegroup " << zonegroup->get_name() << " missing zone for "
+ "master_zone=" << zonegroup->master_zone << dendl;
+ return -EINVAL;
+ }
+ }
+ const auto& endpoints = master->second.endpoints;
+ rest_master_conn = new RGWRESTConn(cct, zonegroup->get_id(), endpoints, zone_params->system_key, zonegroup->get_id(), zonegroup->api_name);
+ }
+
+ return 0;
+}
+
+const RGWZoneParams& RGWSI_Zone::get_zone_params() const
+{
+ return *zone_params;
+}
+
+const RGWZone& RGWSI_Zone::get_zone() const
+{
+ return *zone_public_config;
+}
+
+const RGWZoneGroup& RGWSI_Zone::get_zonegroup() const
+{
+ return *zonegroup;
+}
+
+int RGWSI_Zone::get_zonegroup(const string& id, RGWZoneGroup& zg) const
+{
+ int ret = 0;
+ if (id == zonegroup->get_id()) {
+ zg = *zonegroup;
+ } else if (!current_period->get_id().empty()) {
+ ret = current_period->get_zonegroup(zg, id);
+ }
+ return ret;
+}
+
+const RGWRealm& RGWSI_Zone::get_realm() const
+{
+ return *realm;
+}
+
+const RGWPeriod& RGWSI_Zone::get_current_period() const
+{
+ return *current_period;
+}
+
+const string& RGWSI_Zone::get_current_period_id() const
+{
+ return current_period->get_id();
+}
+
+bool RGWSI_Zone::has_zonegroup_api(const std::string& api) const
+{
+ if (!current_period->get_id().empty()) {
+ const auto& zonegroups_by_api = current_period->get_map().zonegroups_by_api;
+ if (zonegroups_by_api.find(api) != zonegroups_by_api.end())
+ return true;
+ } else if (zonegroup->api_name == api) {
+ return true;
+ }
+ return false;
+}
+
+bool RGWSI_Zone::zone_is_writeable()
+{
+ return writeable_zone && !get_zone().is_read_only();
+}
+
+uint32_t RGWSI_Zone::get_zone_short_id() const
+{
+ return zone_short_id;
+}
+
+const string& RGWSI_Zone::zone_name() const
+{
+ return get_zone_params().get_name();
+}
+
+RGWZone* RGWSI_Zone::find_zone(const rgw_zone_id& id)
+{
+ auto iter = zone_by_id.find(id);
+ if (iter == zone_by_id.end()) {
+ return nullptr;
+ }
+ return &(iter->second);
+}
+
+RGWRESTConn *RGWSI_Zone::get_zone_conn(const rgw_zone_id& zone_id) {
+ auto citer = zone_conn_map.find(zone_id.id);
+ if (citer == zone_conn_map.end()) {
+ return NULL;
+ }
+
+ return citer->second;
+}
+
+RGWRESTConn *RGWSI_Zone::get_zone_conn_by_name(const string& name) {
+ auto i = zone_id_by_name.find(name);
+ if (i == zone_id_by_name.end()) {
+ return NULL;
+ }
+
+ return get_zone_conn(i->second);
+}
+
+bool RGWSI_Zone::find_zone_id_by_name(const string& name, rgw_zone_id *id) {
+ auto i = zone_id_by_name.find(name);
+ if (i == zone_id_by_name.end()) {
+ return false;
+ }
+ *id = i->second;
+ return true;
+}
+
+bool RGWSI_Zone::need_to_sync() const
+{
+ return !(zonegroup->master_zone.empty() ||
+ !rest_master_conn ||
+ current_period->get_id().empty());
+}
+
+bool RGWSI_Zone::need_to_log_data() const
+{
+ return (zone_public_config->log_data && sync_module_exports_data());
+}
+
+bool RGWSI_Zone::is_meta_master() const
+{
+ if (!zonegroup->is_master_zonegroup()) {
+ return false;
+ }
+
+ return (zonegroup->master_zone == zone_public_config->id);
+}
+
+bool RGWSI_Zone::need_to_log_metadata() const
+{
+ return is_meta_master() &&
+ (zonegroup->zones.size() > 1 || current_period->is_multi_zonegroups_with_zones());
+}
+
+bool RGWSI_Zone::can_reshard() const
+{
+ if (current_period->get_id().empty()) {
+ return true; // no realm
+ }
+ if (zonegroup->zones.size() == 1 && current_period->is_single_zonegroup()) {
+ return true; // single zone/zonegroup
+ }
+ // 'resharding' feature enabled in zonegroup
+ return zonegroup->supports(rgw::zone_features::resharding);
+}
+
+/**
+ * Check to see if the bucket metadata could be synced
+ * bucket: the bucket to check
+ * Returns false is the bucket is not synced
+ */
+bool RGWSI_Zone::is_syncing_bucket_meta(const rgw_bucket& bucket)
+{
+
+ /* no current period */
+ if (current_period->get_id().empty()) {
+ return false;
+ }
+
+ /* zonegroup is not master zonegroup */
+ if (!zonegroup->is_master_zonegroup()) {
+ return false;
+ }
+
+ /* single zonegroup and a single zone */
+ if (current_period->is_single_zonegroup() && zonegroup->zones.size() == 1) {
+ return false;
+ }
+
+ /* zone is not master */
+ if (zonegroup->master_zone != zone_public_config->id) {
+ return false;
+ }
+
+ return true;
+}
+
+
+int RGWSI_Zone::select_new_bucket_location(const DoutPrefixProvider *dpp, const RGWUserInfo& user_info, const string& zonegroup_id,
+ const rgw_placement_rule& request_rule,
+ rgw_placement_rule *pselected_rule_name, RGWZonePlacementInfo *rule_info,
+ optional_yield y)
+{
+ /* first check that zonegroup exists within current period. */
+ RGWZoneGroup zonegroup;
+ int ret = get_zonegroup(zonegroup_id, zonegroup);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "could not find zonegroup " << zonegroup_id << " in current period" << dendl;
+ return ret;
+ }
+
+ const rgw_placement_rule *used_rule;
+
+ /* find placement rule. Hierarchy: request rule > user default rule > zonegroup default rule */
+ std::map<std::string, RGWZoneGroupPlacementTarget>::const_iterator titer;
+
+ if (!request_rule.name.empty()) {
+ used_rule = &request_rule;
+ titer = zonegroup.placement_targets.find(request_rule.name);
+ if (titer == zonegroup.placement_targets.end()) {
+ ldpp_dout(dpp, 0) << "could not find requested placement id " << request_rule
+ << " within zonegroup " << dendl;
+ return -ERR_INVALID_LOCATION_CONSTRAINT;
+ }
+ } else if (!user_info.default_placement.name.empty()) {
+ used_rule = &user_info.default_placement;
+ titer = zonegroup.placement_targets.find(user_info.default_placement.name);
+ if (titer == zonegroup.placement_targets.end()) {
+ ldpp_dout(dpp, 0) << "could not find user default placement id " << user_info.default_placement
+ << " within zonegroup " << dendl;
+ return -ERR_INVALID_LOCATION_CONSTRAINT;
+ }
+ } else {
+ if (zonegroup.default_placement.name.empty()) { // zonegroup default rule as fallback, it should not be empty.
+ ldpp_dout(dpp, 0) << "misconfiguration, zonegroup default placement id should not be empty." << dendl;
+ return -ERR_ZONEGROUP_DEFAULT_PLACEMENT_MISCONFIGURATION;
+ } else {
+ used_rule = &zonegroup.default_placement;
+ titer = zonegroup.placement_targets.find(zonegroup.default_placement.name);
+ if (titer == zonegroup.placement_targets.end()) {
+ ldpp_dout(dpp, 0) << "could not find zonegroup default placement id " << zonegroup.default_placement
+ << " within zonegroup " << dendl;
+ return -ERR_INVALID_LOCATION_CONSTRAINT;
+ }
+ }
+ }
+
+ /* now check tag for the rule, whether user is permitted to use rule */
+ const auto& target_rule = titer->second;
+ if (!target_rule.user_permitted(user_info.placement_tags)) {
+ ldpp_dout(dpp, 0) << "user not permitted to use placement rule " << titer->first << dendl;
+ return -EPERM;
+ }
+
+ const string *storage_class = &request_rule.storage_class;
+
+ if (storage_class->empty()) {
+ storage_class = &used_rule->storage_class;
+ }
+
+ rgw_placement_rule rule(titer->first, *storage_class);
+
+ if (pselected_rule_name) {
+ *pselected_rule_name = rule;
+ }
+
+ return select_bucket_location_by_rule(dpp, rule, rule_info, y);
+}
+
+int RGWSI_Zone::select_bucket_location_by_rule(const DoutPrefixProvider *dpp, const rgw_placement_rule& location_rule, RGWZonePlacementInfo *rule_info, optional_yield y)
+{
+ if (location_rule.name.empty()) {
+ /* we can only reach here if we're trying to set a bucket location from a bucket
+ * created on a different zone, using a legacy / default pool configuration
+ */
+ if (rule_info) {
+ return select_legacy_bucket_placement(dpp, rule_info, y);
+ }
+
+ return 0;
+ }
+
+ /*
+ * make sure that zone has this rule configured. We're
+ * checking it for the local zone, because that's where this bucket object is going to
+ * reside.
+ */
+ auto piter = zone_params->placement_pools.find(location_rule.name);
+ if (piter == zone_params->placement_pools.end()) {
+ /* couldn't find, means we cannot really place data for this bucket in this zone */
+ ldpp_dout(dpp, 0) << "ERROR: This zone does not contain placement rule "
+ << location_rule << " present in the zonegroup!" << dendl;
+ return -EINVAL;
+ }
+
+ auto storage_class = location_rule.get_storage_class();
+ if (!piter->second.storage_class_exists(storage_class)) {
+ ldpp_dout(dpp, 5) << "requested storage class does not exist: " << storage_class << dendl;
+ return -EINVAL;
+ }
+
+
+ RGWZonePlacementInfo& placement_info = piter->second;
+
+ if (rule_info) {
+ *rule_info = placement_info;
+ }
+
+ return 0;
+}
+
+int RGWSI_Zone::select_bucket_placement(const DoutPrefixProvider *dpp, const RGWUserInfo& user_info, const string& zonegroup_id,
+ const rgw_placement_rule& placement_rule,
+ rgw_placement_rule *pselected_rule, RGWZonePlacementInfo *rule_info,
+ optional_yield y)
+{
+ if (!zone_params->placement_pools.empty()) {
+ return select_new_bucket_location(dpp, user_info, zonegroup_id, placement_rule,
+ pselected_rule, rule_info, y);
+ }
+
+ if (pselected_rule) {
+ pselected_rule->clear();
+ }
+
+ if (rule_info) {
+ return select_legacy_bucket_placement(dpp, rule_info, y);
+ }
+
+ return 0;
+}
+
+int RGWSI_Zone::select_legacy_bucket_placement(const DoutPrefixProvider *dpp, RGWZonePlacementInfo *rule_info,
+ optional_yield y)
+{
+ bufferlist map_bl;
+ map<string, bufferlist> m;
+ string pool_name;
+ bool write_map = false;
+
+ rgw_raw_obj obj(zone_params->domain_root, avail_pools);
+
+ auto sysobj = sysobj_svc->get_obj(obj);
+ int ret = sysobj.rop().read(dpp, &map_bl, y);
+ if (ret < 0) {
+ goto read_omap;
+ }
+
+ try {
+ auto iter = map_bl.cbegin();
+ decode(m, iter);
+ } catch (buffer::error& err) {
+ ldpp_dout(dpp, 0) << "ERROR: couldn't decode avail_pools" << dendl;
+ }
+
+read_omap:
+ if (m.empty()) {
+ ret = sysobj.omap().get_all(dpp, &m, y);
+
+ write_map = true;
+ }
+
+ if (ret < 0 || m.empty()) {
+ vector<rgw_pool> pools;
+ string s = string("default.") + default_storage_pool_suffix;
+ pools.push_back(rgw_pool(s));
+ vector<int> retcodes;
+ bufferlist bl;
+ ret = rados_svc->pool().create(dpp, pools, &retcodes);
+ if (ret < 0)
+ return ret;
+ ret = sysobj.omap().set(dpp, s, bl, y);
+ if (ret < 0)
+ return ret;
+ m[s] = bl;
+ }
+
+ if (write_map) {
+ bufferlist new_bl;
+ encode(m, new_bl);
+ ret = sysobj.wop().write(dpp, new_bl, y);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "WARNING: could not save avail pools map info ret=" << ret << dendl;
+ }
+ }
+
+ auto miter = m.begin();
+ if (m.size() > 1) {
+ // choose a pool at random
+ auto r = ceph::util::generate_random_number<size_t>(0, m.size() - 1);
+ std::advance(miter, r);
+ }
+ pool_name = miter->first;
+
+ rgw_pool pool = pool_name;
+
+ rule_info->storage_classes.set_storage_class(RGW_STORAGE_CLASS_STANDARD, &pool, nullptr);
+ rule_info->data_extra_pool = pool_name;
+ rule_info->index_pool = pool_name;
+ rule_info->index_type = rgw::BucketIndexType::Normal;
+
+ return 0;
+}
+
+int RGWSI_Zone::update_placement_map(const DoutPrefixProvider *dpp, optional_yield y)
+{
+ bufferlist header;
+ map<string, bufferlist> m;
+ rgw_raw_obj obj(zone_params->domain_root, avail_pools);
+
+ auto sysobj = sysobj_svc->get_obj(obj);
+ int ret = sysobj.omap().get_all(dpp, &m, y);
+ if (ret < 0)
+ return ret;
+
+ bufferlist new_bl;
+ encode(m, new_bl);
+ ret = sysobj.wop().write(dpp, new_bl, y);
+ if (ret < 0) {
+ ldpp_dout(dpp, 0) << "WARNING: could not save avail pools map info ret=" << ret << dendl;
+ }
+
+ return ret;
+}
+
+int RGWSI_Zone::add_bucket_placement(const DoutPrefixProvider *dpp, const rgw_pool& new_pool, optional_yield y)
+{
+ int ret = rados_svc->pool(new_pool).lookup();
+ if (ret < 0) { // DNE, or something
+ return ret;
+ }
+
+ rgw_raw_obj obj(zone_params->domain_root, avail_pools);
+ auto sysobj = sysobj_svc->get_obj(obj);
+
+ bufferlist empty_bl;
+ ret = sysobj.omap().set(dpp, new_pool.to_str(), empty_bl, y);
+
+ // don't care about return value
+ update_placement_map(dpp, y);
+
+ return ret;
+}
+
+int RGWSI_Zone::remove_bucket_placement(const DoutPrefixProvider *dpp, const rgw_pool& old_pool, optional_yield y)
+{
+ rgw_raw_obj obj(zone_params->domain_root, avail_pools);
+ auto sysobj = sysobj_svc->get_obj(obj);
+ int ret = sysobj.omap().del(dpp, old_pool.to_str(), y);
+
+ // don't care about return value
+ update_placement_map(dpp, y);
+
+ return ret;
+}
+
+int RGWSI_Zone::list_placement_set(const DoutPrefixProvider *dpp, set<rgw_pool>& names, optional_yield y)
+{
+ bufferlist header;
+ map<string, bufferlist> m;
+
+ rgw_raw_obj obj(zone_params->domain_root, avail_pools);
+ auto sysobj = sysobj_svc->get_obj(obj);
+ int ret = sysobj.omap().get_all(dpp, &m, y);
+ if (ret < 0)
+ return ret;
+
+ names.clear();
+ map<string, bufferlist>::iterator miter;
+ for (miter = m.begin(); miter != m.end(); ++miter) {
+ names.insert(rgw_pool(miter->first));
+ }
+
+ return names.size();
+}
+
+bool RGWSI_Zone::get_redirect_zone_endpoint(string *endpoint)
+{
+ if (zone_public_config->redirect_zone.empty()) {
+ return false;
+ }
+
+ auto iter = zone_conn_map.find(zone_public_config->redirect_zone);
+ if (iter == zone_conn_map.end()) {
+ ldout(cct, 0) << "ERROR: cannot find entry for redirect zone: " << zone_public_config->redirect_zone << dendl;
+ return false;
+ }
+
+ RGWRESTConn *conn = iter->second;
+
+ int ret = conn->get_url(*endpoint);
+ if (ret < 0) {
+ ldout(cct, 0) << "ERROR: redirect zone, conn->get_endpoint() returned ret=" << ret << dendl;
+ return false;
+ }
+
+ return true;
+}
+
diff --git a/src/rgw/services/svc_zone.h b/src/rgw/services/svc_zone.h
new file mode 100644
index 000000000..7b0a277c4
--- /dev/null
+++ b/src/rgw/services/svc_zone.h
@@ -0,0 +1,165 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#pragma once
+
+#include "rgw_service.h"
+
+
+class RGWSI_RADOS;
+class RGWSI_SysObj;
+class RGWSI_SyncModules;
+class RGWSI_Bucket_Sync;
+
+class RGWRealm;
+class RGWZoneGroup;
+class RGWZone;
+class RGWZoneParams;
+class RGWPeriod;
+class RGWZonePlacementInfo;
+
+class RGWBucketSyncPolicyHandler;
+
+class RGWRESTConn;
+
+struct rgw_sync_policy_info;
+
+class RGWSI_Zone : public RGWServiceInstance
+{
+ friend struct RGWServices_Def;
+
+ RGWSI_SysObj *sysobj_svc{nullptr};
+ RGWSI_RADOS *rados_svc{nullptr};
+ RGWSI_SyncModules *sync_modules_svc{nullptr};
+ RGWSI_Bucket_Sync *bucket_sync_svc{nullptr};
+
+ RGWRealm *realm{nullptr};
+ RGWZoneGroup *zonegroup{nullptr};
+ RGWZone *zone_public_config{nullptr}; /* external zone params, e.g., entrypoints, log flags, etc. */
+ RGWZoneParams *zone_params{nullptr}; /* internal zone params, e.g., rados pools */
+ RGWPeriod *current_period{nullptr};
+ rgw_zone_id cur_zone_id;
+ uint32_t zone_short_id{0};
+ bool writeable_zone{false};
+ bool exports_data{false};
+
+ std::shared_ptr<RGWBucketSyncPolicyHandler> sync_policy_handler;
+ std::map<rgw_zone_id, std::shared_ptr<RGWBucketSyncPolicyHandler> > sync_policy_handlers;
+
+ RGWRESTConn *rest_master_conn{nullptr};
+ std::map<rgw_zone_id, RGWRESTConn *> zone_conn_map;
+ std::vector<const RGWZone*> data_sync_source_zones;
+ std::map<rgw_zone_id, RGWRESTConn *> zone_data_notify_to_map;
+ std::map<std::string, RGWRESTConn *> zonegroup_conn_map;
+
+ std::map<std::string, rgw_zone_id> zone_id_by_name;
+ std::map<rgw_zone_id, RGWZone> zone_by_id;
+
+ std::unique_ptr<rgw_sync_policy_info> sync_policy;
+
+ void init(RGWSI_SysObj *_sysobj_svc,
+ RGWSI_RADOS *_rados_svc,
+ RGWSI_SyncModules *_sync_modules_svc,
+ RGWSI_Bucket_Sync *_bucket_sync_svc);
+ int do_start(optional_yield y, const DoutPrefixProvider *dpp) override;
+ void shutdown() override;
+
+ int init_zg_from_period(const DoutPrefixProvider *dpp, optional_yield y);
+ int init_zg_from_local(const DoutPrefixProvider *dpp, optional_yield y);
+
+ int update_placement_map(const DoutPrefixProvider *dpp, optional_yield y);
+
+ int create_default_zg(const DoutPrefixProvider *dpp, optional_yield y);
+ int init_default_zone(const DoutPrefixProvider *dpp, optional_yield y);
+
+ int search_realm_with_zone(const DoutPrefixProvider *dpp,
+ const rgw_zone_id& zid,
+ RGWRealm *prealm,
+ RGWPeriod *pperiod,
+ RGWZoneGroup *pzonegroup,
+ bool *pfound,
+ optional_yield y);
+public:
+ RGWSI_Zone(CephContext *cct);
+ ~RGWSI_Zone();
+
+ const RGWZoneParams& get_zone_params() const;
+ const RGWPeriod& get_current_period() const;
+ const RGWRealm& get_realm() const;
+ const RGWZoneGroup& get_zonegroup() const;
+ int get_zonegroup(const std::string& id, RGWZoneGroup& zonegroup) const;
+ const RGWZone& get_zone() const;
+
+ std::shared_ptr<RGWBucketSyncPolicyHandler> get_sync_policy_handler(std::optional<rgw_zone_id> zone = std::nullopt) const;
+
+ const std::string& zone_name() const;
+ const rgw_zone_id& zone_id() const {
+ return cur_zone_id;
+ }
+ uint32_t get_zone_short_id() const;
+
+ const std::string& get_current_period_id() const;
+ bool has_zonegroup_api(const std::string& api) const;
+
+ bool zone_is_writeable();
+ bool zone_syncs_from(const RGWZone& target_zone, const RGWZone& source_zone) const;
+ bool zone_syncs_from(const RGWZone& source_zone) const;
+ bool get_redirect_zone_endpoint(std::string *endpoint);
+ bool sync_module_supports_writes() const { return writeable_zone; }
+ bool sync_module_exports_data() const { return exports_data; }
+
+ RGWRESTConn *get_master_conn() {
+ return rest_master_conn;
+ }
+
+ std::map<std::string, RGWRESTConn *>& get_zonegroup_conn_map() {
+ return zonegroup_conn_map;
+ }
+
+ std::map<rgw_zone_id, RGWRESTConn *>& get_zone_conn_map() {
+ return zone_conn_map;
+ }
+
+ std::vector<const RGWZone*>& get_data_sync_source_zones() {
+ return data_sync_source_zones;
+ }
+
+ std::map<rgw_zone_id, RGWRESTConn *>& get_zone_data_notify_to_map() {
+ return zone_data_notify_to_map;
+ }
+
+ RGWZone* find_zone(const rgw_zone_id& id);
+
+ RGWRESTConn *get_zone_conn(const rgw_zone_id& zone_id);
+ RGWRESTConn *get_zone_conn_by_name(const std::string& name);
+ bool find_zone_id_by_name(const std::string& name, rgw_zone_id *id);
+
+ int select_bucket_placement(const DoutPrefixProvider *dpp, const RGWUserInfo& user_info, const std::string& zonegroup_id,
+ const rgw_placement_rule& rule,
+ rgw_placement_rule *pselected_rule, RGWZonePlacementInfo *rule_info, optional_yield y);
+ int select_legacy_bucket_placement(const DoutPrefixProvider *dpp, RGWZonePlacementInfo *rule_info, optional_yield y);
+ int select_new_bucket_location(const DoutPrefixProvider *dpp, const RGWUserInfo& user_info, const std::string& zonegroup_id,
+ const rgw_placement_rule& rule,
+ rgw_placement_rule *pselected_rule_name, RGWZonePlacementInfo *rule_info,
+ optional_yield y);
+ int select_bucket_location_by_rule(const DoutPrefixProvider *dpp, const rgw_placement_rule& location_rule, RGWZonePlacementInfo *rule_info, optional_yield y);
+
+ int add_bucket_placement(const DoutPrefixProvider *dpp, const rgw_pool& new_pool, optional_yield y);
+ int remove_bucket_placement(const DoutPrefixProvider *dpp, const rgw_pool& old_pool, optional_yield y);
+ int list_placement_set(const DoutPrefixProvider *dpp, std::set<rgw_pool>& names, optional_yield y);
+
+ bool is_meta_master() const;
+
+ bool need_to_sync() const;
+ bool need_to_log_data() const;
+ bool need_to_log_metadata() const;
+ bool can_reshard() const;
+ bool is_syncing_bucket_meta(const rgw_bucket& bucket);
+
+ int list_zonegroups(const DoutPrefixProvider *dpp, std::list<std::string>& zonegroups);
+ int list_regions(const DoutPrefixProvider *dpp, std::list<std::string>& regions);
+ int list_zones(const DoutPrefixProvider *dpp, std::list<std::string>& zones);
+ int list_realms(const DoutPrefixProvider *dpp, std::list<std::string>& realms);
+ int list_periods(const DoutPrefixProvider *dpp, std::list<std::string>& periods);
+ int list_periods(const DoutPrefixProvider *dpp, const std::string& current_period, std::list<std::string>& periods, optional_yield y);
+};
diff --git a/src/rgw/services/svc_zone_utils.cc b/src/rgw/services/svc_zone_utils.cc
new file mode 100644
index 000000000..712bb97c9
--- /dev/null
+++ b/src/rgw/services/svc_zone_utils.cc
@@ -0,0 +1,64 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include "svc_zone_utils.h"
+#include "svc_rados.h"
+#include "svc_zone.h"
+
+#include "rgw_zone.h"
+
+using namespace std;
+
+int RGWSI_ZoneUtils::do_start(optional_yield, const DoutPrefixProvider *dpp)
+{
+ init_unique_trans_id_deps();
+
+ return 0;
+}
+
+string RGWSI_ZoneUtils::gen_host_id() {
+ /* uint64_t needs 16, two '-' separators and a trailing null */
+ const string& zone_name = zone_svc->get_zone().name;
+ const string& zonegroup_name = zone_svc->get_zonegroup().get_name();
+ char charbuf[16 + zone_name.size() + zonegroup_name.size() + 2 + 1];
+ snprintf(charbuf, sizeof(charbuf), "%llx-%s-%s", (unsigned long long)rados_svc->instance_id(), zone_name.c_str(), zonegroup_name.c_str());
+ return string(charbuf);
+}
+
+string RGWSI_ZoneUtils::unique_id(uint64_t unique_num)
+{
+ char buf[32];
+ snprintf(buf, sizeof(buf), ".%llu.%llu", (unsigned long long)rados_svc->instance_id(), (unsigned long long)unique_num);
+ string s = zone_svc->get_zone_params().get_id() + buf;
+ return s;
+}
+
+void RGWSI_ZoneUtils::init_unique_trans_id_deps() {
+ char buf[16 + 2 + 1]; /* uint64_t needs 16, 2 hyphens add further 2 */
+
+ snprintf(buf, sizeof(buf), "-%llx-", (unsigned long long)rados_svc->instance_id());
+ url_encode(string(buf) + zone_svc->get_zone().name, trans_id_suffix);
+}
+
+/* In order to preserve compatibility with Swift API, transaction ID
+ * should contain at least 32 characters satisfying following spec:
+ * - first 21 chars must be in range [0-9a-f]. Swift uses this
+ * space for storing fragment of UUID obtained through a call to
+ * uuid4() function of Python's uuid module;
+ * - char no. 22 must be a hyphen;
+ * - at least 10 next characters constitute hex-formatted timestamp
+ * padded with zeroes if necessary. All bytes must be in [0-9a-f]
+ * range;
+ * - last, optional part of transaction ID is any url-encoded string
+ * without restriction on length. */
+string RGWSI_ZoneUtils::unique_trans_id(const uint64_t unique_num) {
+ char buf[41]; /* 2 + 21 + 1 + 16 (timestamp can consume up to 16) + 1 */
+ time_t timestamp = time(NULL);
+
+ snprintf(buf, sizeof(buf), "tx%021llx-%010llx",
+ (unsigned long long)unique_num,
+ (unsigned long long)timestamp);
+
+ return string(buf) + trans_id_suffix;
+}
+
diff --git a/src/rgw/services/svc_zone_utils.h b/src/rgw/services/svc_zone_utils.h
new file mode 100644
index 000000000..43e3fee8d
--- /dev/null
+++ b/src/rgw/services/svc_zone_utils.h
@@ -0,0 +1,38 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#pragma once
+
+#include "rgw_service.h"
+
+
+class RGWSI_RADOS;
+class RGWSI_Zone;
+
+class RGWSI_ZoneUtils : public RGWServiceInstance
+{
+ friend struct RGWServices_Def;
+
+ RGWSI_RADOS *rados_svc{nullptr};
+ RGWSI_Zone *zone_svc{nullptr};
+
+ std::string trans_id_suffix;
+
+ void init(RGWSI_RADOS *_rados_svc,
+ RGWSI_Zone *_zone_svc) {
+ rados_svc = _rados_svc;
+ zone_svc = _zone_svc;
+ }
+
+ int do_start(optional_yield, const DoutPrefixProvider *dpp) override;
+
+ void init_unique_trans_id_deps();
+
+public:
+ RGWSI_ZoneUtils(CephContext *cct): RGWServiceInstance(cct) {}
+
+ std::string gen_host_id();
+ std::string unique_id(uint64_t unique_num);
+
+ std::string unique_trans_id(const uint64_t unique_num);
+};