diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
commit | 19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch) | |
tree | 42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/rgw/services | |
parent | Initial commit. (diff) | |
download | ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip |
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/rgw/services')
61 files changed, 12699 insertions, 0 deletions
diff --git a/src/rgw/services/svc_bi.h b/src/rgw/services/svc_bi.h new file mode 100644 index 000000000..abb68e394 --- /dev/null +++ b/src/rgw/services/svc_bi.h @@ -0,0 +1,44 @@ + +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#pragma once + +#include "rgw/rgw_service.h" + +class RGWBucketInfo; +struct RGWBucketEnt; + + +class RGWSI_BucketIndex : public RGWServiceInstance +{ +public: + RGWSI_BucketIndex(CephContext *cct) : RGWServiceInstance(cct) {} + virtual ~RGWSI_BucketIndex() {} + + virtual int init_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info) = 0; + virtual int clean_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info) = 0; + + virtual int read_stats(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + RGWBucketEnt *stats, + optional_yield y) = 0; + + virtual int handle_overwrite(const DoutPrefixProvider *dpp, + const RGWBucketInfo& info, + const RGWBucketInfo& orig_info) = 0; +}; + diff --git a/src/rgw/services/svc_bi_rados.cc b/src/rgw/services/svc_bi_rados.cc new file mode 100644 index 000000000..dd9bcc6ad --- /dev/null +++ b/src/rgw/services/svc_bi_rados.cc @@ -0,0 +1,463 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "svc_bi_rados.h" +#include "svc_bilog_rados.h" +#include "svc_zone.h" + +#include "rgw/rgw_bucket.h" +#include "rgw/rgw_zone.h" +#include "rgw/rgw_datalog.h" + +#include "cls/rgw/cls_rgw_client.h" + +#define dout_subsys ceph_subsys_rgw + +static string dir_oid_prefix = ".dir."; + +RGWSI_BucketIndex_RADOS::RGWSI_BucketIndex_RADOS(CephContext *cct) : RGWSI_BucketIndex(cct) +{ +} + +void RGWSI_BucketIndex_RADOS::init(RGWSI_Zone *zone_svc, + RGWSI_RADOS *rados_svc, + RGWSI_BILog_RADOS *bilog_svc, + RGWDataChangesLog *datalog_rados_svc) +{ + svc.zone = zone_svc; + svc.rados = rados_svc; + svc.bilog = bilog_svc; + svc.datalog_rados = datalog_rados_svc; +} + +int RGWSI_BucketIndex_RADOS::open_pool(const DoutPrefixProvider *dpp, + const rgw_pool& pool, + RGWSI_RADOS::Pool *index_pool, + bool mostly_omap) +{ + *index_pool = svc.rados->pool(pool); + return index_pool->open(dpp, RGWSI_RADOS::OpenParams() + .set_mostly_omap(mostly_omap)); +} + +int RGWSI_BucketIndex_RADOS::open_bucket_index_pool(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + RGWSI_RADOS::Pool *index_pool) +{ + const rgw_pool& explicit_pool = bucket_info.bucket.explicit_placement.index_pool; + + if (!explicit_pool.empty()) { + return open_pool(dpp, explicit_pool, index_pool, false); + } + + auto& zonegroup = svc.zone->get_zonegroup(); + auto& zone_params = svc.zone->get_zone_params(); + + const rgw_placement_rule *rule = &bucket_info.placement_rule; + if (rule->empty()) { + rule = &zonegroup.default_placement; + } + auto iter = zone_params.placement_pools.find(rule->name); + if (iter == zone_params.placement_pools.end()) { + ldpp_dout(dpp, 0) << "could not find placement rule " << *rule << " within zonegroup " << dendl; + return -EINVAL; + } + + int r = open_pool(dpp, iter->second.index_pool, index_pool, true); + if (r < 0) + return r; + + return 0; +} + +int RGWSI_BucketIndex_RADOS::open_bucket_index_base(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + RGWSI_RADOS::Pool *index_pool, + string *bucket_oid_base) +{ + const rgw_bucket& bucket = bucket_info.bucket; + int r = open_bucket_index_pool(dpp, bucket_info, index_pool); + if (r < 0) + return r; + + if (bucket.bucket_id.empty()) { + ldpp_dout(dpp, 0) << "ERROR: empty bucket_id for bucket operation" << dendl; + return -EIO; + } + + *bucket_oid_base = dir_oid_prefix; + bucket_oid_base->append(bucket.bucket_id); + + return 0; + +} + +int RGWSI_BucketIndex_RADOS::open_bucket_index(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + RGWSI_RADOS::Pool *index_pool, + string *bucket_oid) +{ + const rgw_bucket& bucket = bucket_info.bucket; + int r = open_bucket_index_pool(dpp, bucket_info, index_pool); + if (r < 0) { + ldpp_dout(dpp, 20) << __func__ << ": open_bucket_index_pool() returned " + << r << dendl; + return r; + } + + if (bucket.bucket_id.empty()) { + ldpp_dout(dpp, 0) << "ERROR: empty bucket id for bucket operation" << dendl; + return -EIO; + } + + *bucket_oid = dir_oid_prefix; + bucket_oid->append(bucket.bucket_id); + + return 0; +} + +static void get_bucket_index_objects(const string& bucket_oid_base, + uint32_t num_shards, + map<int, string> *_bucket_objects, + int shard_id = -1) +{ + auto& bucket_objects = *_bucket_objects; + if (!num_shards) { + bucket_objects[0] = bucket_oid_base; + } else { + char buf[bucket_oid_base.size() + 32]; + if (shard_id < 0) { + for (uint32_t i = 0; i < num_shards; ++i) { + snprintf(buf, sizeof(buf), "%s.%d", bucket_oid_base.c_str(), i); + bucket_objects[i] = buf; + } + } else { + if ((uint32_t)shard_id > num_shards) { + return; + } + snprintf(buf, sizeof(buf), "%s.%d", bucket_oid_base.c_str(), shard_id); + bucket_objects[shard_id] = buf; + } + } +} + +static void get_bucket_instance_ids(const RGWBucketInfo& bucket_info, + int shard_id, + map<int, string> *result) +{ + const rgw_bucket& bucket = bucket_info.bucket; + string plain_id = bucket.name + ":" + bucket.bucket_id; + + if (!bucket_info.layout.current_index.layout.normal.num_shards) { + (*result)[0] = plain_id; + } else { + char buf[16]; + if (shard_id < 0) { + for (uint32_t i = 0; i < bucket_info.layout.current_index.layout.normal.num_shards; ++i) { + snprintf(buf, sizeof(buf), ":%d", i); + (*result)[i] = plain_id + buf; + } + } else { + if ((uint32_t)shard_id > bucket_info.layout.current_index.layout.normal.num_shards) { + return; + } + snprintf(buf, sizeof(buf), ":%d", shard_id); + (*result)[shard_id] = plain_id + buf; + } + } +} + +int RGWSI_BucketIndex_RADOS::open_bucket_index(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + std::optional<int> _shard_id, + RGWSI_RADOS::Pool *index_pool, + map<int, string> *bucket_objs, + map<int, string> *bucket_instance_ids) +{ + int shard_id = _shard_id.value_or(-1); + string bucket_oid_base; + int ret = open_bucket_index_base(dpp, bucket_info, index_pool, &bucket_oid_base); + if (ret < 0) { + ldpp_dout(dpp, 20) << __func__ << ": open_bucket_index_pool() returned " + << ret << dendl; + return ret; + } + + get_bucket_index_objects(bucket_oid_base, bucket_info.layout.current_index.layout.normal.num_shards, bucket_objs, shard_id); + if (bucket_instance_ids) { + get_bucket_instance_ids(bucket_info, shard_id, bucket_instance_ids); + } + return 0; +} + +void RGWSI_BucketIndex_RADOS::get_bucket_index_object(const string& bucket_oid_base, + uint32_t num_shards, + int shard_id, + uint64_t gen_id, + string *bucket_obj) +{ + if (!num_shards) { + // By default with no sharding, we use the bucket oid as itself + (*bucket_obj) = bucket_oid_base; + } else { + char buf[bucket_oid_base.size() + 64]; + if (gen_id != 0) { + snprintf(buf, sizeof(buf), "%s.%" PRIu64 ".%d", bucket_oid_base.c_str(), gen_id, shard_id); + (*bucket_obj) = buf; + } else { + // for backward compatibility, gen_id(0) will not be added in the object name + snprintf(buf, sizeof(buf), "%s.%d", bucket_oid_base.c_str(), shard_id); + (*bucket_obj) = buf; + } + } +} + +int RGWSI_BucketIndex_RADOS::get_bucket_index_object(const string& bucket_oid_base, const string& obj_key, + uint32_t num_shards, rgw::BucketHashType hash_type, + string *bucket_obj, int *shard_id) +{ + int r = 0; + switch (hash_type) { + case rgw::BucketHashType::Mod: + if (!num_shards) { + // By default with no sharding, we use the bucket oid as itself + (*bucket_obj) = bucket_oid_base; + if (shard_id) { + *shard_id = -1; + } + } else { + uint32_t sid = bucket_shard_index(obj_key, num_shards); + char buf[bucket_oid_base.size() + 32]; + snprintf(buf, sizeof(buf), "%s.%d", bucket_oid_base.c_str(), sid); + (*bucket_obj) = buf; + if (shard_id) { + *shard_id = (int)sid; + } + } + break; + default: + r = -ENOTSUP; + } + return r; +} + +int RGWSI_BucketIndex_RADOS::open_bucket_index_shard(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + const string& obj_key, + RGWSI_RADOS::Obj *bucket_obj, + int *shard_id) +{ + string bucket_oid_base; + + RGWSI_RADOS::Pool pool; + + int ret = open_bucket_index_base(dpp, bucket_info, &pool, &bucket_oid_base); + if (ret < 0) { + ldpp_dout(dpp, 20) << __func__ << ": open_bucket_index_pool() returned " + << ret << dendl; + return ret; + } + + string oid; + + ret = get_bucket_index_object(bucket_oid_base, obj_key, bucket_info.layout.current_index.layout.normal.num_shards, + bucket_info.layout.current_index.layout.normal.hash_type, &oid, shard_id); + if (ret < 0) { + ldpp_dout(dpp, 10) << "get_bucket_index_object() returned ret=" << ret << dendl; + return ret; + } + + *bucket_obj = svc.rados->obj(pool, oid); + + return 0; +} + +int RGWSI_BucketIndex_RADOS::open_bucket_index_shard(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + int shard_id, + const rgw::bucket_index_layout_generation& idx_layout, + RGWSI_RADOS::Obj *bucket_obj) +{ + RGWSI_RADOS::Pool index_pool; + string bucket_oid_base; + int ret = open_bucket_index_base(dpp, bucket_info, &index_pool, &bucket_oid_base); + if (ret < 0) { + ldpp_dout(dpp, 20) << __func__ << ": open_bucket_index_pool() returned " + << ret << dendl; + return ret; + } + + string oid; + + get_bucket_index_object(bucket_oid_base, idx_layout.layout.normal.num_shards, + shard_id, idx_layout.gen, &oid); + + *bucket_obj = svc.rados->obj(index_pool, oid); + + return 0; +} + +int RGWSI_BucketIndex_RADOS::cls_bucket_head(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + int shard_id, + vector<rgw_bucket_dir_header> *headers, + map<int, string> *bucket_instance_ids, + optional_yield y) +{ + RGWSI_RADOS::Pool index_pool; + map<int, string> oids; + int r = open_bucket_index(dpp, bucket_info, shard_id, &index_pool, &oids, bucket_instance_ids); + if (r < 0) + return r; + + map<int, struct rgw_cls_list_ret> list_results; + for (auto& iter : oids) { + list_results.emplace(iter.first, rgw_cls_list_ret()); + } + + r = CLSRGWIssueGetDirHeader(index_pool.ioctx(), oids, list_results, cct->_conf->rgw_bucket_index_max_aio)(); + if (r < 0) + return r; + + map<int, struct rgw_cls_list_ret>::iterator iter = list_results.begin(); + for(; iter != list_results.end(); ++iter) { + headers->push_back(std::move(iter->second.dir.header)); + } + return 0; +} + + +int RGWSI_BucketIndex_RADOS::init_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info) +{ + RGWSI_RADOS::Pool index_pool; + + string dir_oid = dir_oid_prefix; + int r = open_bucket_index_pool(dpp, bucket_info, &index_pool); + if (r < 0) { + return r; + } + + dir_oid.append(bucket_info.bucket.bucket_id); + + map<int, string> bucket_objs; + get_bucket_index_objects(dir_oid, bucket_info.layout.current_index.layout.normal.num_shards, &bucket_objs); + + return CLSRGWIssueBucketIndexInit(index_pool.ioctx(), + bucket_objs, + cct->_conf->rgw_bucket_index_max_aio)(); +} + +int RGWSI_BucketIndex_RADOS::clean_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info) +{ + RGWSI_RADOS::Pool index_pool; + + std::string dir_oid = dir_oid_prefix; + int r = open_bucket_index_pool(dpp, bucket_info, &index_pool); + if (r < 0) { + return r; + } + + dir_oid.append(bucket_info.bucket.bucket_id); + + std::map<int, std::string> bucket_objs; + get_bucket_index_objects(dir_oid, bucket_info.layout.current_index.layout.normal.num_shards, &bucket_objs); + + return CLSRGWIssueBucketIndexClean(index_pool.ioctx(), + bucket_objs, + cct->_conf->rgw_bucket_index_max_aio)(); +} + +int RGWSI_BucketIndex_RADOS::read_stats(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + RGWBucketEnt *result, + optional_yield y) +{ + vector<rgw_bucket_dir_header> headers; + + result->bucket = bucket_info.bucket; + int r = cls_bucket_head(dpp, bucket_info, RGW_NO_SHARD, &headers, nullptr, y); + if (r < 0) { + return r; + } + + auto hiter = headers.begin(); + for (; hiter != headers.end(); ++hiter) { + RGWObjCategory category = RGWObjCategory::Main; + auto iter = (hiter->stats).find(category); + if (iter != hiter->stats.end()) { + struct rgw_bucket_category_stats& stats = iter->second; + result->count += stats.num_entries; + result->size += stats.total_size; + result->size_rounded += stats.total_size_rounded; + } + } + + result->placement_rule = std::move(bucket_info.placement_rule); + + return 0; +} + +int RGWSI_BucketIndex_RADOS::get_reshard_status(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, list<cls_rgw_bucket_instance_entry> *status) +{ + map<int, string> bucket_objs; + + RGWSI_RADOS::Pool index_pool; + + int r = open_bucket_index(dpp, bucket_info, + std::nullopt, + &index_pool, + &bucket_objs, + nullptr); + if (r < 0) { + return r; + } + + for (auto i : bucket_objs) { + cls_rgw_bucket_instance_entry entry; + + int ret = cls_rgw_get_bucket_resharding(index_pool.ioctx(), i.second, &entry); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, -1) << "ERROR: " << __func__ << ": cls_rgw_get_bucket_resharding() returned ret=" << ret << dendl; + return ret; + } + + status->push_back(entry); + } + + return 0; +} + +int RGWSI_BucketIndex_RADOS::handle_overwrite(const DoutPrefixProvider *dpp, + const RGWBucketInfo& info, + const RGWBucketInfo& orig_info) +{ + bool new_sync_enabled = info.datasync_flag_enabled(); + bool old_sync_enabled = orig_info.datasync_flag_enabled(); + + if (old_sync_enabled != new_sync_enabled) { + int shards_num = info.layout.current_index.layout.normal.num_shards? info.layout.current_index.layout.normal.num_shards : 1; + int shard_id = info.layout.current_index.layout.normal.num_shards? 0 : -1; + + int ret; + if (!new_sync_enabled) { + ret = svc.bilog->log_stop(dpp, info, -1); + } else { + ret = svc.bilog->log_start(dpp, info, -1); + } + if (ret < 0) { + ldpp_dout(dpp, -1) << "ERROR: failed writing bilog (bucket=" << info.bucket << "); ret=" << ret << dendl; + return ret; + } + + for (int i = 0; i < shards_num; ++i, ++shard_id) { + ret = svc.datalog_rados->add_entry(dpp, info, shard_id); + if (ret < 0) { + ldpp_dout(dpp, -1) << "ERROR: failed writing data log (info.bucket=" << info.bucket << ", shard_id=" << shard_id << ")" << dendl; + return ret; + } + } + } + + return 0; +} + diff --git a/src/rgw/services/svc_bi_rados.h b/src/rgw/services/svc_bi_rados.h new file mode 100644 index 000000000..9037f43c8 --- /dev/null +++ b/src/rgw/services/svc_bi_rados.h @@ -0,0 +1,141 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#pragma once + +#include "rgw/rgw_datalog.h" +#include "rgw/rgw_service.h" +#include "rgw/rgw_tools.h" + +#include "svc_bi.h" +#include "svc_rados.h" + +struct rgw_bucket_dir_header; + +class RGWSI_BILog_RADOS; + +#define RGW_NO_SHARD -1 + +#define RGW_SHARDS_PRIME_0 7877 +#define RGW_SHARDS_PRIME_1 65521 + +class RGWSI_BucketIndex_RADOS : public RGWSI_BucketIndex +{ + friend class RGWSI_BILog_RADOS; + + int open_pool(const DoutPrefixProvider *dpp, + const rgw_pool& pool, + RGWSI_RADOS::Pool *index_pool, + bool mostly_omap); + + int open_bucket_index_pool(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + RGWSI_RADOS::Pool *index_pool); + int open_bucket_index_base(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + RGWSI_RADOS::Pool *index_pool, + string *bucket_oid_base); + + void get_bucket_index_object(const string& bucket_oid_base, + uint32_t num_shards, + int shard_id, + uint64_t gen_id, + string *bucket_obj); + int get_bucket_index_object(const string& bucket_oid_base, const string& obj_key, + uint32_t num_shards, rgw::BucketHashType hash_type, + string *bucket_obj, int *shard_id); + + int cls_bucket_head(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + int shard_id, + vector<rgw_bucket_dir_header> *headers, + map<int, string> *bucket_instance_ids, + optional_yield y); + +public: + + struct Svc { + RGWSI_Zone *zone{nullptr}; + RGWSI_RADOS *rados{nullptr}; + RGWSI_BILog_RADOS *bilog{nullptr}; + RGWDataChangesLog *datalog_rados{nullptr}; + } svc; + + RGWSI_BucketIndex_RADOS(CephContext *cct); + + void init(RGWSI_Zone *zone_svc, + RGWSI_RADOS *rados_svc, + RGWSI_BILog_RADOS *bilog_svc, + RGWDataChangesLog *datalog_rados_svc); + + static int shards_max() { + return RGW_SHARDS_PRIME_1; + } + + static int shard_id(const string& key, int max_shards) { + return rgw_shard_id(key, max_shards); + } + + static uint32_t bucket_shard_index(const std::string& key, + int num_shards) { + uint32_t sid = ceph_str_hash_linux(key.c_str(), key.size()); + uint32_t sid2 = sid ^ ((sid & 0xFF) << 24); + return rgw_shards_mod(sid2, num_shards); + } + + int init_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info); + int clean_index(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info); + + + /* RADOS specific */ + + int read_stats(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + RGWBucketEnt *stats, + optional_yield y) override; + + int get_reshard_status(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, + std::list<cls_rgw_bucket_instance_entry> *status); + + int handle_overwrite(const DoutPrefixProvider *dpp, const RGWBucketInfo& info, + const RGWBucketInfo& orig_info) override; + + int open_bucket_index_shard(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + const string& obj_key, + RGWSI_RADOS::Obj *bucket_obj, + int *shard_id); + + int open_bucket_index_shard(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + int shard_id, + const rgw::bucket_index_layout_generation& idx_layout, + RGWSI_RADOS::Obj *bucket_obj); + + int open_bucket_index(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + RGWSI_RADOS::Pool *index_pool, + string *bucket_oid); + + int open_bucket_index(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + std::optional<int> shard_id, + RGWSI_RADOS::Pool *index_pool, + map<int, string> *bucket_objs, + map<int, string> *bucket_instance_ids); +}; + + diff --git a/src/rgw/services/svc_bilog_rados.cc b/src/rgw/services/svc_bilog_rados.cc new file mode 100644 index 000000000..06cf5ce7a --- /dev/null +++ b/src/rgw/services/svc_bilog_rados.cc @@ -0,0 +1,205 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "svc_bilog_rados.h" +#include "svc_bi_rados.h" + +#include "cls/rgw/cls_rgw_client.h" + + +#define dout_subsys ceph_subsys_rgw + +RGWSI_BILog_RADOS::RGWSI_BILog_RADOS(CephContext *cct) : RGWServiceInstance(cct) +{ +} + +void RGWSI_BILog_RADOS::init(RGWSI_BucketIndex_RADOS *bi_rados_svc) +{ + svc.bi = bi_rados_svc; +} + +int RGWSI_BILog_RADOS::log_trim(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, int shard_id, string& start_marker, string& end_marker) +{ + RGWSI_RADOS::Pool index_pool; + map<int, string> bucket_objs; + + BucketIndexShardsManager start_marker_mgr; + BucketIndexShardsManager end_marker_mgr; + + int r = svc.bi->open_bucket_index(dpp, bucket_info, shard_id, &index_pool, &bucket_objs, nullptr); + if (r < 0) { + return r; + } + + r = start_marker_mgr.from_string(start_marker, shard_id); + if (r < 0) { + return r; + } + + r = end_marker_mgr.from_string(end_marker, shard_id); + if (r < 0) { + return r; + } + + return CLSRGWIssueBILogTrim(index_pool.ioctx(), start_marker_mgr, end_marker_mgr, bucket_objs, + cct->_conf->rgw_bucket_index_max_aio)(); +} + +int RGWSI_BILog_RADOS::log_start(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, int shard_id) +{ + RGWSI_RADOS::Pool index_pool; + map<int, string> bucket_objs; + int r = svc.bi->open_bucket_index(dpp, bucket_info, shard_id, &index_pool, &bucket_objs, nullptr); + if (r < 0) + return r; + + return CLSRGWIssueResyncBucketBILog(index_pool.ioctx(), bucket_objs, cct->_conf->rgw_bucket_index_max_aio)(); +} + +int RGWSI_BILog_RADOS::log_stop(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, int shard_id) +{ + RGWSI_RADOS::Pool index_pool; + map<int, string> bucket_objs; + int r = svc.bi->open_bucket_index(dpp, bucket_info, shard_id, &index_pool, &bucket_objs, nullptr); + if (r < 0) + return r; + + return CLSRGWIssueBucketBILogStop(index_pool.ioctx(), bucket_objs, cct->_conf->rgw_bucket_index_max_aio)(); +} + +static void build_bucket_index_marker(const string& shard_id_str, + const string& shard_marker, + string *marker) { + if (marker) { + *marker = shard_id_str; + marker->append(BucketIndexShardsManager::KEY_VALUE_SEPARATOR); + marker->append(shard_marker); + } +} + +int RGWSI_BILog_RADOS::log_list(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, int shard_id, string& marker, uint32_t max, + std::list<rgw_bi_log_entry>& result, bool *truncated) +{ + ldpp_dout(dpp, 20) << __func__ << ": " << bucket_info.bucket << " marker " << marker << " shard_id=" << shard_id << " max " << max << dendl; + result.clear(); + + RGWSI_RADOS::Pool index_pool; + map<int, string> oids; + map<int, cls_rgw_bi_log_list_ret> bi_log_lists; + int r = svc.bi->open_bucket_index(dpp, bucket_info, shard_id, &index_pool, &oids, nullptr); + if (r < 0) + return r; + + BucketIndexShardsManager marker_mgr; + bool has_shards = (oids.size() > 1 || shard_id >= 0); + // If there are multiple shards for the bucket index object, the marker + // should have the pattern '{shard_id_1}#{shard_marker_1},{shard_id_2}# + // {shard_marker_2}...', if there is no sharding, the bi_log_list should + // only contain one record, and the key is the bucket instance id. + r = marker_mgr.from_string(marker, shard_id); + if (r < 0) + return r; + + r = CLSRGWIssueBILogList(index_pool.ioctx(), marker_mgr, max, oids, bi_log_lists, cct->_conf->rgw_bucket_index_max_aio)(); + if (r < 0) + return r; + + map<int, list<rgw_bi_log_entry>::iterator> vcurrents; + map<int, list<rgw_bi_log_entry>::iterator> vends; + if (truncated) { + *truncated = false; + } + map<int, cls_rgw_bi_log_list_ret>::iterator miter = bi_log_lists.begin(); + for (; miter != bi_log_lists.end(); ++miter) { + int shard_id = miter->first; + vcurrents[shard_id] = miter->second.entries.begin(); + vends[shard_id] = miter->second.entries.end(); + if (truncated) { + *truncated = (*truncated || miter->second.truncated); + } + } + + size_t total = 0; + bool has_more = true; + map<int, list<rgw_bi_log_entry>::iterator>::iterator viter; + map<int, list<rgw_bi_log_entry>::iterator>::iterator eiter; + while (total < max && has_more) { + has_more = false; + + viter = vcurrents.begin(); + eiter = vends.begin(); + + for (; total < max && viter != vcurrents.end(); ++viter, ++eiter) { + assert (eiter != vends.end()); + + int shard_id = viter->first; + list<rgw_bi_log_entry>::iterator& liter = viter->second; + + if (liter == eiter->second){ + continue; + } + rgw_bi_log_entry& entry = *(liter); + if (has_shards) { + char buf[16]; + snprintf(buf, sizeof(buf), "%d", shard_id); + string tmp_id; + build_bucket_index_marker(buf, entry.id, &tmp_id); + entry.id.swap(tmp_id); + } + marker_mgr.add(shard_id, entry.id); + result.push_back(entry); + total++; + has_more = true; + ++liter; + } + } + + if (truncated) { + for (viter = vcurrents.begin(), eiter = vends.begin(); viter != vcurrents.end(); ++viter, ++eiter) { + assert (eiter != vends.end()); + *truncated = (*truncated || (viter->second != eiter->second)); + } + } + + // Refresh marker, if there are multiple shards, the output will look like + // '{shard_oid_1}#{shard_marker_1},{shard_oid_2}#{shard_marker_2}...', + // if there is no sharding, the simply marker (without oid) is returned + if (has_shards) { + marker_mgr.to_string(&marker); + } else { + if (!result.empty()) { + marker = result.rbegin()->id; + } + } + + return 0; +} + +int RGWSI_BILog_RADOS::get_log_status(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + int shard_id, + map<int, string> *markers, + optional_yield y) +{ + vector<rgw_bucket_dir_header> headers; + map<int, string> bucket_instance_ids; + int r = svc.bi->cls_bucket_head(dpp, bucket_info, shard_id, &headers, &bucket_instance_ids, y); + if (r < 0) + return r; + + ceph_assert(headers.size() == bucket_instance_ids.size()); + + auto iter = headers.begin(); + map<int, string>::iterator viter = bucket_instance_ids.begin(); + + for(; iter != headers.end(); ++iter, ++viter) { + if (shard_id >= 0) { + (*markers)[shard_id] = iter->max_marker; + } else { + (*markers)[viter->first] = iter->max_marker; + } + } + + return 0; +} + diff --git a/src/rgw/services/svc_bilog_rados.h b/src/rgw/services/svc_bilog_rados.h new file mode 100644 index 000000000..84f5679af --- /dev/null +++ b/src/rgw/services/svc_bilog_rados.h @@ -0,0 +1,60 @@ + +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#pragma once + +#include "rgw/rgw_service.h" + +#include "svc_rados.h" + + + + +class RGWSI_BILog_RADOS : public RGWServiceInstance +{ +public: + struct Svc { + RGWSI_BucketIndex_RADOS *bi{nullptr}; + } svc; + + RGWSI_BILog_RADOS(CephContext *cct); + + void init(RGWSI_BucketIndex_RADOS *bi_rados_svc); + + int log_start(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, int shard_id); + int log_stop(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, int shard_id); + + int log_trim(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + int shard_id, + std::string& start_marker, + std::string& end_marker); + int log_list(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + int shard_id, + std::string& marker, + uint32_t max, + std::list<rgw_bi_log_entry>& result, + bool *truncated); + + int get_log_status(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + int shard_id, + map<int, string> *markers, + optional_yield y); +}; + diff --git a/src/rgw/services/svc_bucket.cc b/src/rgw/services/svc_bucket.cc new file mode 100644 index 000000000..dff3677cf --- /dev/null +++ b/src/rgw/services/svc_bucket.cc @@ -0,0 +1,25 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + + +#include "svc_bucket.h" + +#define dout_subsys ceph_subsys_rgw + +string RGWSI_Bucket::get_entrypoint_meta_key(const rgw_bucket& bucket) +{ + if (bucket.bucket_id.empty()) { + return bucket.get_key(); + } + + rgw_bucket b(bucket); + b.bucket_id.clear(); + + return b.get_key(); +} + +string RGWSI_Bucket::get_bi_meta_key(const rgw_bucket& bucket) +{ + return bucket.get_key(); +} + diff --git a/src/rgw/services/svc_bucket.h b/src/rgw/services/svc_bucket.h new file mode 100644 index 000000000..fcdabca8b --- /dev/null +++ b/src/rgw/services/svc_bucket.h @@ -0,0 +1,111 @@ + +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#pragma once + +#include "rgw/rgw_service.h" + +#include "svc_bucket_types.h" + +class RGWSI_Bucket : public RGWServiceInstance +{ +public: + RGWSI_Bucket(CephContext *cct) : RGWServiceInstance(cct) {} + virtual ~RGWSI_Bucket() {} + + static string get_entrypoint_meta_key(const rgw_bucket& bucket); + static string get_bi_meta_key(const rgw_bucket& bucket); + + virtual RGWSI_Bucket_BE_Handler& get_ep_be_handler() = 0; + virtual RGWSI_BucketInstance_BE_Handler& get_bi_be_handler() = 0; + + virtual int read_bucket_entrypoint_info(RGWSI_Bucket_EP_Ctx& ctx, + const string& key, + RGWBucketEntryPoint *entry_point, + RGWObjVersionTracker *objv_tracker, + real_time *pmtime, + map<string, bufferlist> *pattrs, + optional_yield y, + const DoutPrefixProvider *dpp, + rgw_cache_entry_info *cache_info = nullptr, + boost::optional<obj_version> refresh_version = boost::none) = 0; + + virtual int store_bucket_entrypoint_info(RGWSI_Bucket_EP_Ctx& ctx, + const string& key, + RGWBucketEntryPoint& info, + bool exclusive, + real_time mtime, + map<string, bufferlist> *pattrs, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) = 0; + + virtual int remove_bucket_entrypoint_info(RGWSI_Bucket_EP_Ctx& ctx, + const string& key, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) = 0; + + virtual int read_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx, + const string& key, + RGWBucketInfo *info, + real_time *pmtime, + map<string, bufferlist> *pattrs, + optional_yield y, + const DoutPrefixProvider *dpp, + rgw_cache_entry_info *cache_info = nullptr, + boost::optional<obj_version> refresh_version = boost::none) = 0; + + virtual int read_bucket_info(RGWSI_Bucket_X_Ctx& ep_ctx, + const rgw_bucket& bucket, + RGWBucketInfo *info, + real_time *pmtime, + map<string, bufferlist> *pattrs, + boost::optional<obj_version> refresh_version, + optional_yield y, + const DoutPrefixProvider *dpp) = 0; + + virtual int store_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx, + const string& key, + RGWBucketInfo& info, + std::optional<RGWBucketInfo *> orig_info, /* nullopt: orig_info was not fetched, + nullptr: orig_info was not found (new bucket instance */ + bool exclusive, + real_time mtime, + map<string, bufferlist> *pattrs, + optional_yield y, + const DoutPrefixProvider *dpp) = 0; + + virtual int remove_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx, + const string& key, + const RGWBucketInfo& bucket_info, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) = 0; + + virtual int read_bucket_stats(RGWSI_Bucket_X_Ctx& ctx, + const rgw_bucket& bucket, + RGWBucketEnt *ent, + optional_yield y, + const DoutPrefixProvider *dpp) = 0; + + virtual int read_buckets_stats(RGWSI_Bucket_X_Ctx& ctx, + map<string, RGWBucketEnt>& m, + optional_yield y, + const DoutPrefixProvider *dpp) = 0; +}; + diff --git a/src/rgw/services/svc_bucket_sobj.cc b/src/rgw/services/svc_bucket_sobj.cc new file mode 100644 index 000000000..5272a17d0 --- /dev/null +++ b/src/rgw/services/svc_bucket_sobj.cc @@ -0,0 +1,642 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + + +#include "svc_bucket_sobj.h" +#include "svc_zone.h" +#include "svc_sys_obj.h" +#include "svc_sys_obj_cache.h" +#include "svc_bi.h" +#include "svc_meta.h" +#include "svc_meta_be_sobj.h" +#include "svc_sync_modules.h" + +#include "rgw/rgw_bucket.h" +#include "rgw/rgw_tools.h" +#include "rgw/rgw_zone.h" + +#define dout_subsys ceph_subsys_rgw + +#define RGW_BUCKET_INSTANCE_MD_PREFIX ".bucket.meta." + +class RGWSI_Bucket_SObj_Module : public RGWSI_MBSObj_Handler_Module { + RGWSI_Bucket_SObj::Svc& svc; + + const string prefix; +public: + RGWSI_Bucket_SObj_Module(RGWSI_Bucket_SObj::Svc& _svc) : RGWSI_MBSObj_Handler_Module("bucket"), + svc(_svc) {} + + void get_pool_and_oid(const string& key, rgw_pool *pool, string *oid) override { + if (pool) { + *pool = svc.zone->get_zone_params().domain_root; + } + if (oid) { + *oid = key; + } + } + + const string& get_oid_prefix() override { + return prefix; + } + + bool is_valid_oid(const string& oid) override { + return (!oid.empty() && oid[0] != '.'); + } + + string key_to_oid(const string& key) override { + return key; + } + + string oid_to_key(const string& oid) override { + /* should have been called after is_valid_oid(), + * so no need to check for validity */ + return oid; + } +}; + +class RGWSI_BucketInstance_SObj_Module : public RGWSI_MBSObj_Handler_Module { + RGWSI_Bucket_SObj::Svc& svc; + + const string prefix; +public: + RGWSI_BucketInstance_SObj_Module(RGWSI_Bucket_SObj::Svc& _svc) : RGWSI_MBSObj_Handler_Module("bucket.instance"), + svc(_svc), prefix(RGW_BUCKET_INSTANCE_MD_PREFIX) {} + + void get_pool_and_oid(const string& key, rgw_pool *pool, string *oid) override { + if (pool) { + *pool = svc.zone->get_zone_params().domain_root; + } + if (oid) { + *oid = key_to_oid(key); + } + } + + const string& get_oid_prefix() override { + return prefix; + } + + bool is_valid_oid(const string& oid) override { + return (oid.compare(0, prefix.size(), RGW_BUCKET_INSTANCE_MD_PREFIX) == 0); + } + +// 'tenant/' is used in bucket instance keys for sync to avoid parsing ambiguity +// with the existing instance[:shard] format. once we parse the shard, the / is +// replaced with a : to match the [tenant:]instance format + string key_to_oid(const string& key) override { + string oid = prefix + key; + + // replace tenant/ with tenant: + auto c = oid.find('/', prefix.size()); + if (c != string::npos) { + oid[c] = ':'; + } + + return oid; + } + + // convert bucket instance oids back to the tenant/ format for metadata keys. + // it's safe to parse 'tenant:' only for oids, because they won't contain the + // optional :shard at the end + string oid_to_key(const string& oid) override { + /* this should have been called after oid was checked for validity */ + + if (oid.size() < prefix.size()) { /* just sanity check */ + return string(); + } + + string key = oid.substr(prefix.size()); + + // find first : (could be tenant:bucket or bucket:instance) + auto c = key.find(':'); + if (c != string::npos) { + // if we find another :, the first one was for tenant + if (key.find(':', c + 1) != string::npos) { + key[c] = '/'; + } + } + + return key; + } + + /* + * hash entry for mdlog placement. Use the same hash key we'd have for the bucket entry + * point, so that the log entries end up at the same log shard, so that we process them + * in order + */ + string get_hash_key(const string& key) override { + string k = "bucket:"; + int pos = key.find(':'); + if (pos < 0) + k.append(key); + else + k.append(key.substr(0, pos)); + + return k; + } +}; + +RGWSI_Bucket_SObj::RGWSI_Bucket_SObj(CephContext *cct): RGWSI_Bucket(cct) { +} + +RGWSI_Bucket_SObj::~RGWSI_Bucket_SObj() { +} + +void RGWSI_Bucket_SObj::init(RGWSI_Zone *_zone_svc, RGWSI_SysObj *_sysobj_svc, + RGWSI_SysObj_Cache *_cache_svc, RGWSI_BucketIndex *_bi, + RGWSI_Meta *_meta_svc, RGWSI_MetaBackend *_meta_be_svc, + RGWSI_SyncModules *_sync_modules_svc, + RGWSI_Bucket_Sync *_bucket_sync_svc) +{ + svc.bucket = this; + svc.zone = _zone_svc; + svc.sysobj = _sysobj_svc; + svc.cache = _cache_svc; + svc.bi = _bi; + svc.meta = _meta_svc; + svc.meta_be = _meta_be_svc; + svc.sync_modules = _sync_modules_svc; + svc.bucket_sync = _bucket_sync_svc; +} + +int RGWSI_Bucket_SObj::do_start(optional_yield, const DoutPrefixProvider *dpp) +{ + binfo_cache.reset(new RGWChainedCacheImpl<bucket_info_cache_entry>); + binfo_cache->init(svc.cache); + + /* create first backend handler for bucket entrypoints */ + + RGWSI_MetaBackend_Handler *ep_handler; + + int r = svc.meta->create_be_handler(RGWSI_MetaBackend::Type::MDBE_SOBJ, &ep_handler); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to create be handler: r=" << r << dendl; + return r; + } + + ep_be_handler = ep_handler; + + RGWSI_MetaBackend_Handler_SObj *ep_bh = static_cast<RGWSI_MetaBackend_Handler_SObj *>(ep_handler); + + auto ep_module = new RGWSI_Bucket_SObj_Module(svc); + ep_be_module.reset(ep_module); + ep_bh->set_module(ep_module); + + /* create a second backend handler for bucket instance */ + + RGWSI_MetaBackend_Handler *bi_handler; + + r = svc.meta->create_be_handler(RGWSI_MetaBackend::Type::MDBE_SOBJ, &bi_handler); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to create be handler: r=" << r << dendl; + return r; + } + + bi_be_handler = bi_handler; + + RGWSI_MetaBackend_Handler_SObj *bi_bh = static_cast<RGWSI_MetaBackend_Handler_SObj *>(bi_handler); + + auto bi_module = new RGWSI_BucketInstance_SObj_Module(svc); + bi_be_module.reset(bi_module); + bi_bh->set_module(bi_module); + + return 0; +} + +int RGWSI_Bucket_SObj::read_bucket_entrypoint_info(RGWSI_Bucket_EP_Ctx& ctx, + const string& key, + RGWBucketEntryPoint *entry_point, + RGWObjVersionTracker *objv_tracker, + real_time *pmtime, + map<string, bufferlist> *pattrs, + optional_yield y, + const DoutPrefixProvider *dpp, + rgw_cache_entry_info *cache_info, + boost::optional<obj_version> refresh_version) +{ + bufferlist bl; + + auto params = RGWSI_MBSObj_GetParams(&bl, pattrs, pmtime).set_cache_info(cache_info) + .set_refresh_version(refresh_version); + + int ret = svc.meta_be->get_entry(ctx.get(), key, params, objv_tracker, y, dpp); + if (ret < 0) { + return ret; + } + + auto iter = bl.cbegin(); + try { + decode(*entry_point, iter); + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: could not decode buffer info, caught buffer::error" << dendl; + return -EIO; + } + return 0; +} + +int RGWSI_Bucket_SObj::store_bucket_entrypoint_info(RGWSI_Bucket_EP_Ctx& ctx, + const string& key, + RGWBucketEntryPoint& info, + bool exclusive, + real_time mtime, + map<string, bufferlist> *pattrs, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + bufferlist bl; + encode(info, bl); + + RGWSI_MBSObj_PutParams params(bl, pattrs, mtime, exclusive); + + int ret = svc.meta_be->put(ctx.get(), key, params, objv_tracker, y, dpp); + if (ret < 0) { + return ret; + } + + return ret; +} + +int RGWSI_Bucket_SObj::remove_bucket_entrypoint_info(RGWSI_Bucket_EP_Ctx& ctx, + const string& key, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + RGWSI_MBSObj_RemoveParams params; + return svc.meta_be->remove(ctx.get(), key, params, objv_tracker, y, dpp); +} + +int RGWSI_Bucket_SObj::read_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx, + const string& key, + RGWBucketInfo *info, + real_time *pmtime, map<string, bufferlist> *pattrs, + optional_yield y, + const DoutPrefixProvider *dpp, + rgw_cache_entry_info *cache_info, + boost::optional<obj_version> refresh_version) +{ + string cache_key("bi/"); + cache_key.append(key); + + if (auto e = binfo_cache->find(cache_key)) { + if (refresh_version && + e->info.objv_tracker.read_version.compare(&(*refresh_version))) { + ldpp_dout(dpp, -1) << "WARNING: The bucket info cache is inconsistent. This is " + << "a failure that should be debugged. I am a nice machine, " + << "so I will try to recover." << dendl; + binfo_cache->invalidate(key); + } else { + *info = e->info; + if (pattrs) + *pattrs = e->attrs; + if (pmtime) + *pmtime = e->mtime; + return 0; + } + } + + bucket_info_cache_entry e; + rgw_cache_entry_info ci; + + int ret = do_read_bucket_instance_info(ctx, key, + &e.info, &e.mtime, &e.attrs, + &ci, refresh_version, y, dpp); + *info = e.info; + + if (ret < 0) { + if (ret != -ENOENT) { + ldpp_dout(dpp, -1) << "ERROR: do_read_bucket_instance_info failed: " << ret << dendl; + } else { + ldpp_dout(dpp, 20) << "do_read_bucket_instance_info, bucket instance not found (key=" << key << ")" << dendl; + } + return ret; + } + + if (pmtime) { + *pmtime = e.mtime; + } + if (pattrs) { + *pattrs = e.attrs; + } + if (cache_info) { + *cache_info = ci; + } + + /* chain to only bucket instance and *not* bucket entrypoint */ + if (!binfo_cache->put(dpp, svc.cache, cache_key, &e, {&ci})) { + ldpp_dout(dpp, 20) << "couldn't put binfo cache entry, might have raced with data changes" << dendl; + } + + if (refresh_version && + refresh_version->compare(&info->objv_tracker.read_version)) { + ldpp_dout(dpp, -1) << "WARNING: The OSD has the same version I have. Something may " + << "have gone squirrelly. An administrator may have forced a " + << "change; otherwise there is a problem somewhere." << dendl; + } + + return 0; +} + +int RGWSI_Bucket_SObj::do_read_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx, + const string& key, + RGWBucketInfo *info, + real_time *pmtime, map<string, bufferlist> *pattrs, + rgw_cache_entry_info *cache_info, + boost::optional<obj_version> refresh_version, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + bufferlist bl; + RGWObjVersionTracker ot; + + auto params = RGWSI_MBSObj_GetParams(&bl, pattrs, pmtime).set_cache_info(cache_info) + .set_refresh_version(refresh_version); + + int ret = svc.meta_be->get_entry(ctx.get(), key, params, &ot, y, dpp); + if (ret < 0) { + return ret; + } + + auto iter = bl.cbegin(); + try { + decode(*info, iter); + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: could not decode buffer info, caught buffer::error" << dendl; + return -EIO; + } + info->objv_tracker = ot; + return 0; +} + +int RGWSI_Bucket_SObj::read_bucket_info(RGWSI_Bucket_X_Ctx& ctx, + const rgw_bucket& bucket, + RGWBucketInfo *info, + real_time *pmtime, + map<string, bufferlist> *pattrs, + boost::optional<obj_version> refresh_version, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + rgw_cache_entry_info cache_info; + + if (!bucket.bucket_id.empty()) { + return read_bucket_instance_info(ctx.bi, get_bi_meta_key(bucket), + info, + pmtime, pattrs, + y, + dpp, + &cache_info, refresh_version); + } + + string bucket_entry = get_entrypoint_meta_key(bucket); + string cache_key("b/"); + cache_key.append(bucket_entry); + + if (auto e = binfo_cache->find(cache_key)) { + bool found_version = (bucket.bucket_id.empty() || + bucket.bucket_id == e->info.bucket.bucket_id); + + if (!found_version || + (refresh_version && + e->info.objv_tracker.read_version.compare(&(*refresh_version)))) { + lderr(cct) << "WARNING: The bucket info cache is inconsistent. This is " + << "a failure that should be debugged. I am a nice machine, " + << "so I will try to recover." << dendl; + binfo_cache->invalidate(cache_key); + } else { + *info = e->info; + if (pattrs) + *pattrs = e->attrs; + if (pmtime) + *pmtime = e->mtime; + return 0; + } + } + + RGWBucketEntryPoint entry_point; + real_time ep_mtime; + RGWObjVersionTracker ot; + rgw_cache_entry_info entry_cache_info; + int ret = read_bucket_entrypoint_info(ctx.ep, bucket_entry, + &entry_point, &ot, &ep_mtime, pattrs, + y, + dpp, + &entry_cache_info, refresh_version); + if (ret < 0) { + /* only init these fields */ + info->bucket = bucket; + return ret; + } + + if (entry_point.has_bucket_info) { + *info = entry_point.old_bucket_info; + info->bucket.tenant = bucket.tenant; + ldpp_dout(dpp, 20) << "rgw_get_bucket_info: old bucket info, bucket=" << info->bucket << " owner " << info->owner << dendl; + return 0; + } + + /* data is in the bucket instance object, we need to get attributes from there, clear everything + * that we got + */ + if (pattrs) { + pattrs->clear(); + } + + ldpp_dout(dpp, 20) << "rgw_get_bucket_info: bucket instance: " << entry_point.bucket << dendl; + + + /* read bucket instance info */ + + bucket_info_cache_entry e; + + ret = read_bucket_instance_info(ctx.bi, get_bi_meta_key(entry_point.bucket), + &e.info, &e.mtime, &e.attrs, + y, + dpp, + &cache_info, refresh_version); + *info = e.info; + if (ret < 0) { + ldpp_dout(dpp, -1) << "ERROR: read_bucket_instance_from_oid failed: " << ret << dendl; + info->bucket = bucket; + // XXX and why return anything in case of an error anyway? + return ret; + } + + if (pmtime) + *pmtime = e.mtime; + if (pattrs) + *pattrs = e.attrs; + + /* chain to both bucket entry point and bucket instance */ + if (!binfo_cache->put(dpp, svc.cache, cache_key, &e, {&entry_cache_info, &cache_info})) { + ldpp_dout(dpp, 20) << "couldn't put binfo cache entry, might have raced with data changes" << dendl; + } + + if (refresh_version && + refresh_version->compare(&info->objv_tracker.read_version)) { + ldpp_dout(dpp, -1) << "WARNING: The OSD has the same version I have. Something may " + << "have gone squirrelly. An administrator may have forced a " + << "change; otherwise there is a problem somewhere." << dendl; + } + + return 0; +} + + +int RGWSI_Bucket_SObj::store_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx, + const string& key, + RGWBucketInfo& info, + std::optional<RGWBucketInfo *> orig_info, + bool exclusive, + real_time mtime, + map<string, bufferlist> *pattrs, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + bufferlist bl; + encode(info, bl); + + /* + * we might need some special handling if overwriting + */ + RGWBucketInfo shared_bucket_info; + if (!orig_info && !exclusive) { /* if exclusive, we're going to fail when try + to overwrite, so the whole check here is moot */ + /* + * we're here because orig_info wasn't passed in + * we don't have info about what was there before, so need to fetch first + */ + int r = read_bucket_instance_info(ctx, + key, + &shared_bucket_info, + nullptr, nullptr, + y, + dpp, + nullptr, boost::none); + if (r < 0) { + if (r != -ENOENT) { + ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): read_bucket_instance_info() of key=" << key << " returned r=" << r << dendl; + return r; + } + } else { + orig_info = &shared_bucket_info; + } + } + + if (orig_info && *orig_info && !exclusive) { + int r = svc.bi->handle_overwrite(dpp, info, *(orig_info.value())); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): svc.bi->handle_overwrite() of key=" << key << " returned r=" << r << dendl; + return r; + } + } + + RGWSI_MBSObj_PutParams params(bl, pattrs, mtime, exclusive); + + int ret = svc.meta_be->put(ctx.get(), key, params, &info.objv_tracker, y, dpp); + + if (ret >= 0) { + int r = svc.bucket_sync->handle_bi_update(dpp, info, + orig_info.value_or(nullptr), + y); + if (r < 0) { + return r; + } + } else if (ret == -EEXIST) { + /* well, if it's exclusive we shouldn't overwrite it, because we might race with another + * bucket operation on this specific bucket (e.g., being synced from the master), but + * since bucket instace meta object is unique for this specific bucket instace, we don't + * need to return an error. + * A scenario where we'd get -EEXIST here, is in a multi-zone config, we're not on the + * master, creating a bucket, sending bucket creation to the master, we create the bucket + * locally, while in the sync thread we sync the new bucket. + */ + ret = 0; + } + + if (ret < 0) { + return ret; + } + + return ret; +} + +int RGWSI_Bucket_SObj::remove_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx, + const string& key, + const RGWBucketInfo& info, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + RGWSI_MBSObj_RemoveParams params; + int ret = svc.meta_be->remove_entry(dpp, ctx.get(), key, params, objv_tracker, y); + + if (ret < 0 && + ret != -ENOENT) { + return ret; + } + + int r = svc.bucket_sync->handle_bi_removal(dpp, info, y); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to update bucket instance sync index: r=" << r << dendl; + /* returning success as index is just keeping hints, so will keep extra hints, + * but bucket removal succeeded + */ + } + + return 0; +} + +int RGWSI_Bucket_SObj::read_bucket_stats(const RGWBucketInfo& bucket_info, + RGWBucketEnt *ent, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + ent->count = 0; + ent->size = 0; + ent->size_rounded = 0; + + vector<rgw_bucket_dir_header> headers; + + int r = svc.bi->read_stats(dpp, bucket_info, ent, y); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): read_stats returned r=" << r << dendl; + return r; + } + + return 0; +} + +int RGWSI_Bucket_SObj::read_bucket_stats(RGWSI_Bucket_X_Ctx& ctx, + const rgw_bucket& bucket, + RGWBucketEnt *ent, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + RGWBucketInfo bucket_info; + int ret = read_bucket_info(ctx, bucket, &bucket_info, nullptr, nullptr, boost::none, y, dpp); + if (ret < 0) { + return ret; + } + + return read_bucket_stats(bucket_info, ent, y, dpp); +} + +int RGWSI_Bucket_SObj::read_buckets_stats(RGWSI_Bucket_X_Ctx& ctx, + map<string, RGWBucketEnt>& m, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + map<string, RGWBucketEnt>::iterator iter; + for (iter = m.begin(); iter != m.end(); ++iter) { + RGWBucketEnt& ent = iter->second; + int r = read_bucket_stats(ctx, ent.bucket, &ent, y, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): read_bucket_stats returned r=" << r << dendl; + return r; + } + } + + return m.size(); +}
\ No newline at end of file diff --git a/src/rgw/services/svc_bucket_sobj.h b/src/rgw/services/svc_bucket_sobj.h new file mode 100644 index 000000000..776367e8a --- /dev/null +++ b/src/rgw/services/svc_bucket_sobj.h @@ -0,0 +1,180 @@ + +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#pragma once + +#include "rgw/rgw_service.h" + +#include "svc_meta_be.h" +#include "svc_bucket_types.h" +#include "svc_bucket.h" + +class RGWSI_Zone; +class RGWSI_SysObj; +class RGWSI_SysObj_Cache; +class RGWSI_Meta; +class RGWSI_SyncModules; +class RGWSI_Bucket_Sync; + +struct rgw_cache_entry_info; + +template <class T> +class RGWChainedCacheImpl; + +class RGWSI_Bucket_SObj : public RGWSI_Bucket +{ + struct bucket_info_cache_entry { + RGWBucketInfo info; + real_time mtime; + map<string, bufferlist> attrs; + }; + + using RGWChainedCacheImpl_bucket_info_cache_entry = RGWChainedCacheImpl<bucket_info_cache_entry>; + unique_ptr<RGWChainedCacheImpl_bucket_info_cache_entry> binfo_cache; + + RGWSI_Bucket_BE_Handler ep_be_handler; + std::unique_ptr<RGWSI_MetaBackend::Module> ep_be_module; + RGWSI_BucketInstance_BE_Handler bi_be_handler; + std::unique_ptr<RGWSI_MetaBackend::Module> bi_be_module; + + int do_start(optional_yield, const DoutPrefixProvider *dpp) override; + + int do_read_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx, + const string& key, + RGWBucketInfo *info, + real_time *pmtime, + map<string, bufferlist> *pattrs, + rgw_cache_entry_info *cache_info, + boost::optional<obj_version> refresh_version, + optional_yield y, + const DoutPrefixProvider *dpp); + + int read_bucket_stats(const RGWBucketInfo& bucket_info, + RGWBucketEnt *ent, + optional_yield y, + const DoutPrefixProvider *dpp); + +public: + struct Svc { + RGWSI_Bucket_SObj *bucket{nullptr}; + RGWSI_BucketIndex *bi{nullptr}; + RGWSI_Zone *zone{nullptr}; + RGWSI_SysObj *sysobj{nullptr}; + RGWSI_SysObj_Cache *cache{nullptr}; + RGWSI_Meta *meta{nullptr}; + RGWSI_MetaBackend *meta_be{nullptr}; + RGWSI_SyncModules *sync_modules{nullptr}; + RGWSI_Bucket_Sync *bucket_sync{nullptr}; + } svc; + + RGWSI_Bucket_SObj(CephContext *cct); + ~RGWSI_Bucket_SObj(); + + RGWSI_Bucket_BE_Handler& get_ep_be_handler() override { + return ep_be_handler; + } + + RGWSI_BucketInstance_BE_Handler& get_bi_be_handler() override { + return bi_be_handler; + } + + void init(RGWSI_Zone *_zone_svc, + RGWSI_SysObj *_sysobj_svc, + RGWSI_SysObj_Cache *_cache_svc, + RGWSI_BucketIndex *_bi, + RGWSI_Meta *_meta_svc, + RGWSI_MetaBackend *_meta_be_svc, + RGWSI_SyncModules *_sync_modules_svc, + RGWSI_Bucket_Sync *_bucket_sync_svc); + + + int read_bucket_entrypoint_info(RGWSI_Bucket_EP_Ctx& ctx, + const string& key, + RGWBucketEntryPoint *entry_point, + RGWObjVersionTracker *objv_tracker, + real_time *pmtime, + map<string, bufferlist> *pattrs, + optional_yield y, + const DoutPrefixProvider *dpp, + rgw_cache_entry_info *cache_info = nullptr, + boost::optional<obj_version> refresh_version = boost::none) override; + + int store_bucket_entrypoint_info(RGWSI_Bucket_EP_Ctx& ctx, + const string& key, + RGWBucketEntryPoint& info, + bool exclusive, + real_time mtime, + map<string, bufferlist> *pattrs, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) override; + + int remove_bucket_entrypoint_info(RGWSI_Bucket_EP_Ctx& ctx, + const string& key, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) override; + + int read_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx, + const string& key, + RGWBucketInfo *info, + real_time *pmtime, + map<string, bufferlist> *pattrs, + optional_yield y, + const DoutPrefixProvider *dpp, + rgw_cache_entry_info *cache_info = nullptr, + boost::optional<obj_version> refresh_version = boost::none) override; + + int read_bucket_info(RGWSI_Bucket_X_Ctx& ep_ctx, + const rgw_bucket& bucket, + RGWBucketInfo *info, + real_time *pmtime, + map<string, bufferlist> *pattrs, + boost::optional<obj_version> refresh_version, + optional_yield y, + const DoutPrefixProvider *dpp) override; + + int store_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx, + const string& key, + RGWBucketInfo& info, + std::optional<RGWBucketInfo *> orig_info, /* nullopt: orig_info was not fetched, + nullptr: orig_info was not found (new bucket instance */ + bool exclusive, + real_time mtime, + map<string, bufferlist> *pattrs, + optional_yield y, + const DoutPrefixProvider *dpp) override; + + int remove_bucket_instance_info(RGWSI_Bucket_BI_Ctx& ctx, + const string& key, + const RGWBucketInfo& bucket_info, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) override; + + int read_bucket_stats(RGWSI_Bucket_X_Ctx& ctx, + const rgw_bucket& bucket, + RGWBucketEnt *ent, + optional_yield y, + const DoutPrefixProvider *dpp) override; + + int read_buckets_stats(RGWSI_Bucket_X_Ctx& ctx, + map<string, RGWBucketEnt>& m, + optional_yield y, + const DoutPrefixProvider *dpp) override; +}; + diff --git a/src/rgw/services/svc_bucket_sync.h b/src/rgw/services/svc_bucket_sync.h new file mode 100644 index 000000000..d90856b7a --- /dev/null +++ b/src/rgw/services/svc_bucket_sync.h @@ -0,0 +1,55 @@ + +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#pragma once + +#include "rgw/rgw_service.h" + +#include "svc_bucket_types.h" + +class RGWBucketSyncPolicyHandler; +using RGWBucketSyncPolicyHandlerRef = std::shared_ptr<RGWBucketSyncPolicyHandler>; + + +class RGWSI_Bucket_Sync : public RGWServiceInstance +{ +public: + RGWSI_Bucket_Sync(CephContext *cct) : RGWServiceInstance(cct) {} + + virtual int get_policy_handler(RGWSI_Bucket_X_Ctx& ctx, + std::optional<rgw_zone_id> zone, + std::optional<rgw_bucket> bucket, + RGWBucketSyncPolicyHandlerRef *handler, + optional_yield y, + const DoutPrefixProvider *dpp) = 0; + + virtual int handle_bi_update(const DoutPrefixProvider *dpp, + RGWBucketInfo& bucket_info, + RGWBucketInfo *orig_bucket_info, + optional_yield y) = 0; + virtual int handle_bi_removal(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + optional_yield y) = 0; + + virtual int get_bucket_sync_hints(const DoutPrefixProvider *dpp, + const rgw_bucket& bucket, + std::set<rgw_bucket> *sources, + std::set<rgw_bucket> *dests, + optional_yield y) = 0; +}; + + diff --git a/src/rgw/services/svc_bucket_sync_sobj.cc b/src/rgw/services/svc_bucket_sync_sobj.cc new file mode 100644 index 000000000..885033442 --- /dev/null +++ b/src/rgw/services/svc_bucket_sync_sobj.cc @@ -0,0 +1,885 @@ +#include "svc_bucket_sync_sobj.h" +#include "svc_zone.h" +#include "svc_sys_obj_cache.h" +#include "svc_bucket_sobj.h" + +#include "rgw/rgw_bucket_sync.h" +#include "rgw/rgw_zone.h" +#include "rgw/rgw_sync_policy.h" + +#define dout_subsys ceph_subsys_rgw + +static string bucket_sync_sources_oid_prefix = "bucket.sync-source-hints"; +static string bucket_sync_targets_oid_prefix = "bucket.sync-target-hints"; + +class RGWSI_Bucket_Sync_SObj_HintIndexManager { + CephContext *cct; + + struct { + RGWSI_Zone *zone; + RGWSI_SysObj *sysobj; + } svc; + +public: + RGWSI_Bucket_Sync_SObj_HintIndexManager(RGWSI_Zone *_zone_svc, + RGWSI_SysObj *_sysobj_svc) { + svc.zone = _zone_svc; + svc.sysobj = _sysobj_svc; + + cct = svc.zone->ctx(); + } + + rgw_raw_obj get_sources_obj(const rgw_bucket& bucket) const; + rgw_raw_obj get_dests_obj(const rgw_bucket& bucket) const; + + template <typename C1, typename C2> + int update_hints(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + C1& added_dests, + C2& removed_dests, + C1& added_sources, + C2& removed_sources, + optional_yield y); +}; + +RGWSI_Bucket_Sync_SObj::RGWSI_Bucket_Sync_SObj(CephContext *cct) : RGWSI_Bucket_Sync(cct) { +} +RGWSI_Bucket_Sync_SObj::~RGWSI_Bucket_Sync_SObj() { +} + +void RGWSI_Bucket_Sync_SObj::init(RGWSI_Zone *_zone_svc, + RGWSI_SysObj *_sysobj_svc, + RGWSI_SysObj_Cache *_cache_svc, + RGWSI_Bucket_SObj *bucket_sobj_svc) +{ + svc.zone = _zone_svc; + svc.sysobj = _sysobj_svc; + svc.cache = _cache_svc; + svc.bucket_sobj = bucket_sobj_svc; + + hint_index_mgr.reset(new RGWSI_Bucket_Sync_SObj_HintIndexManager(svc.zone, svc.sysobj)); +} + +int RGWSI_Bucket_Sync_SObj::do_start(optional_yield, const DoutPrefixProvider *dpp) +{ + sync_policy_cache.reset(new RGWChainedCacheImpl<bucket_sync_policy_cache_entry>); + sync_policy_cache->init(svc.cache); + + return 0; +} + +void RGWSI_Bucket_Sync_SObj::get_hint_entities(RGWSI_Bucket_X_Ctx& ctx, + const std::set<rgw_zone_id>& zones, + const std::set<rgw_bucket>& buckets, + std::set<rgw_sync_bucket_entity> *hint_entities, + optional_yield y, const DoutPrefixProvider *dpp) +{ + vector<rgw_bucket> hint_buckets; + + hint_buckets.reserve(buckets.size()); + + for (auto& b : buckets) { + RGWBucketInfo hint_bucket_info; + int ret = svc.bucket_sobj->read_bucket_info(ctx, b, &hint_bucket_info, + nullptr, nullptr, boost::none, + y, dpp); + if (ret < 0) { + ldpp_dout(dpp, 20) << "could not init bucket info for hint bucket=" << b << " ... skipping" << dendl; + continue; + } + + hint_buckets.emplace_back(std::move(hint_bucket_info.bucket)); + } + + for (auto& zone : zones) { + for (auto& b : hint_buckets) { + hint_entities->insert(rgw_sync_bucket_entity(zone, b)); + } + } +} + +int RGWSI_Bucket_Sync_SObj::resolve_policy_hints(RGWSI_Bucket_X_Ctx& ctx, + rgw_sync_bucket_entity& self_entity, + RGWBucketSyncPolicyHandlerRef& handler, + RGWBucketSyncPolicyHandlerRef& zone_policy_handler, + std::map<optional_zone_bucket, RGWBucketSyncPolicyHandlerRef>& temp_map, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + set<rgw_zone_id> source_zones; + set<rgw_zone_id> target_zones; + + zone_policy_handler->reflect(nullptr, nullptr, + nullptr, nullptr, + &source_zones, + &target_zones, + false); /* relaxed: also get all zones that we allow to sync to/from */ + + std::set<rgw_sync_bucket_entity> hint_entities; + + get_hint_entities(ctx, source_zones, handler->get_source_hints(), &hint_entities, y, dpp); + get_hint_entities(ctx, target_zones, handler->get_target_hints(), &hint_entities, y, dpp); + + std::set<rgw_sync_bucket_pipe> resolved_sources; + std::set<rgw_sync_bucket_pipe> resolved_dests; + + for (auto& hint_entity : hint_entities) { + if (!hint_entity.zone || + !hint_entity.bucket) { + continue; /* shouldn't really happen */ + } + + auto& zid = *hint_entity.zone; + auto& hint_bucket = *hint_entity.bucket; + + RGWBucketSyncPolicyHandlerRef hint_bucket_handler; + + auto iter = temp_map.find(optional_zone_bucket(zid, hint_bucket)); + if (iter != temp_map.end()) { + hint_bucket_handler = iter->second; + } else { + int r = do_get_policy_handler(ctx, zid, hint_bucket, temp_map, &hint_bucket_handler, y, dpp); + if (r < 0) { + ldpp_dout(dpp, 20) << "could not get bucket sync policy handler for hint bucket=" << hint_bucket << " ... skipping" << dendl; + continue; + } + } + + hint_bucket_handler->get_pipes(&resolved_dests, + &resolved_sources, + self_entity); /* flipping resolved dests and sources as these are + relative to the remote entity */ + } + + handler->set_resolved_hints(std::move(resolved_sources), std::move(resolved_dests)); + + return 0; +} + +int RGWSI_Bucket_Sync_SObj::do_get_policy_handler(RGWSI_Bucket_X_Ctx& ctx, + std::optional<rgw_zone_id> zone, + std::optional<rgw_bucket> _bucket, + std::map<optional_zone_bucket, RGWBucketSyncPolicyHandlerRef>& temp_map, + RGWBucketSyncPolicyHandlerRef *handler, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + if (!_bucket) { + *handler = svc.zone->get_sync_policy_handler(zone); + return 0; + } + + auto& bucket = *_bucket; + + string zone_key; + string bucket_key; + + if (zone && *zone != svc.zone->zone_id()) { + zone_key = zone->id; + } + + bucket_key = RGWSI_Bucket::get_bi_meta_key(bucket); + + string cache_key("bi/" + zone_key + "/" + bucket_key); + + if (auto e = sync_policy_cache->find(cache_key)) { + *handler = e->handler; + return 0; + } + + bucket_sync_policy_cache_entry e; + rgw_cache_entry_info cache_info; + + RGWBucketInfo bucket_info; + map<string, bufferlist> attrs; + + int r = svc.bucket_sobj->read_bucket_instance_info(ctx.bi, + bucket_key, + &bucket_info, + nullptr, + &attrs, + y, + dpp, + &cache_info); + if (r < 0) { + if (r != -ENOENT) { + ldpp_dout(dpp, 0) << "ERROR: svc.bucket->read_bucket_instance_info(key=" << bucket_key << ") returned r=" << r << dendl; + } + return r; + } + + auto zone_policy_handler = svc.zone->get_sync_policy_handler(zone); + if (!zone_policy_handler) { + ldpp_dout(dpp, 20) << "ERROR: could not find policy handler for zone=" << zone << dendl; + return -ENOENT; + } + + e.handler.reset(zone_policy_handler->alloc_child(bucket_info, std::move(attrs))); + + r = e.handler->init(dpp, y); + if (r < 0) { + ldpp_dout(dpp, 20) << "ERROR: failed to init bucket sync policy handler: r=" << r << dendl; + return r; + } + + temp_map.emplace(optional_zone_bucket{zone, bucket}, e.handler); + + rgw_sync_bucket_entity self_entity(zone.value_or(svc.zone->zone_id()), bucket); + + r = resolve_policy_hints(ctx, self_entity, + e.handler, + zone_policy_handler, + temp_map, y, dpp); + if (r < 0) { + ldpp_dout(dpp, 20) << "ERROR: failed to resolve policy hints: bucket_key=" << bucket_key << ", r=" << r << dendl; + return r; + } + + if (!sync_policy_cache->put(dpp, svc.cache, cache_key, &e, {&cache_info})) { + ldpp_dout(dpp, 20) << "couldn't put bucket_sync_policy cache entry, might have raced with data changes" << dendl; + } + + *handler = e.handler; + + return 0; +} + +int RGWSI_Bucket_Sync_SObj::get_policy_handler(RGWSI_Bucket_X_Ctx& ctx, + std::optional<rgw_zone_id> zone, + std::optional<rgw_bucket> _bucket, + RGWBucketSyncPolicyHandlerRef *handler, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + std::map<optional_zone_bucket, RGWBucketSyncPolicyHandlerRef> temp_map; + return do_get_policy_handler(ctx, zone, _bucket, temp_map, handler, y, dpp); +} + +static bool diff_sets(std::set<rgw_bucket>& orig_set, + std::set<rgw_bucket>& new_set, + vector<rgw_bucket> *added, + vector<rgw_bucket> *removed) +{ + auto oiter = orig_set.begin(); + auto niter = new_set.begin(); + + while (oiter != orig_set.end() && + niter != new_set.end()) { + if (*oiter == *niter) { + ++oiter; + ++niter; + continue; + } + while (*oiter < *niter && + oiter != orig_set.end()) { + removed->push_back(*oiter); + ++oiter; + } + while (*niter < *oiter + && niter != new_set.end()) { + added->push_back(*niter); + ++niter; + } + } + for (; oiter != orig_set.end(); ++oiter) { + removed->push_back(*oiter); + } + for (; niter != new_set.end(); ++niter) { + added->push_back(*niter); + } + + return !(removed->empty() && added->empty()); +} + + +class RGWSI_BS_SObj_HintIndexObj +{ + friend class RGWSI_Bucket_Sync_SObj; + + CephContext *cct; + struct { + RGWSI_SysObj *sysobj; + } svc; + + RGWSysObjectCtx obj_ctx; + rgw_raw_obj obj; + RGWSysObj sysobj; + + RGWObjVersionTracker ot; + + bool has_data{false}; + +public: + struct bi_entry { + rgw_bucket bucket; + map<rgw_bucket /* info_source */, obj_version> sources; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(bucket, bl); + encode(sources, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(bucket, bl); + decode(sources, bl); + DECODE_FINISH(bl); + } + + bool add(const rgw_bucket& info_source, + const obj_version& info_source_ver) { + auto& ver = sources[info_source]; + + if (ver == info_source_ver) { /* already updated */ + return false; + } + + if (info_source_ver.tag == ver.tag && + info_source_ver.ver < ver.ver) { + return false; + } + + ver = info_source_ver; + + return true; + } + + bool remove(const rgw_bucket& info_source, + const obj_version& info_source_ver) { + auto iter = sources.find(info_source); + if (iter == sources.end()) { + return false; + } + + auto& ver = iter->second; + + if (info_source_ver.tag == ver.tag && + info_source_ver.ver < ver.ver) { + return false; + } + + sources.erase(info_source); + return true; + } + + bool empty() const { + return sources.empty(); + } + }; + + struct single_instance_info { + map<rgw_bucket, bi_entry> entries; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(entries, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(entries, bl); + DECODE_FINISH(bl); + } + + bool add_entry(const rgw_bucket& info_source, + const obj_version& info_source_ver, + const rgw_bucket& bucket) { + auto& entry = entries[bucket]; + + if (!entry.add(info_source, info_source_ver)) { + return false; + } + + entry.bucket = bucket; + + return true; + } + + bool remove_entry(const rgw_bucket& info_source, + const obj_version& info_source_ver, + const rgw_bucket& bucket) { + auto iter = entries.find(bucket); + if (iter == entries.end()) { + return false; + } + + if (!iter->second.remove(info_source, info_source_ver)) { + return false; + } + + if (iter->second.empty()) { + entries.erase(iter); + } + + return true; + } + + void clear() { + entries.clear(); + } + + bool empty() const { + return entries.empty(); + } + + void get_entities(std::set<rgw_bucket> *result) const { + for (auto& iter : entries) { + result->insert(iter.first); + } + } + }; + + struct info_map { + map<rgw_bucket, single_instance_info> instances; + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(instances, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(instances, bl); + DECODE_FINISH(bl); + } + + bool empty() const { + return instances.empty(); + } + + void clear() { + instances.clear(); + } + + void get_entities(const rgw_bucket& bucket, + std::set<rgw_bucket> *result) const { + auto iter = instances.find(bucket); + if (iter == instances.end()) { + return; + } + iter->second.get_entities(result); + } + } info; + + RGWSI_BS_SObj_HintIndexObj(RGWSI_SysObj *_sysobj_svc, + const rgw_raw_obj& _obj) : cct(_sysobj_svc->ctx()), + obj_ctx(_sysobj_svc->init_obj_ctx()), + obj(_obj), + sysobj(obj_ctx.get_obj(obj)) + { + svc.sysobj = _sysobj_svc; + } + + template <typename C1, typename C2> + int update(const DoutPrefixProvider *dpp, + const rgw_bucket& entity, + const RGWBucketInfo& info_source, + C1 *add, + C2 *remove, + optional_yield y); + +private: + template <typename C1, typename C2> + void update_entries(const rgw_bucket& info_source, + const obj_version& info_source_ver, + C1 *add, + C2 *remove, + single_instance_info *instance); + + int read(const DoutPrefixProvider *dpp, optional_yield y); + int flush(const DoutPrefixProvider *dpp, optional_yield y); + + void invalidate() { + has_data = false; + info.clear(); + } + + void get_entities(const rgw_bucket& bucket, + std::set<rgw_bucket> *result) const { + info.get_entities(bucket, result); + } +}; +WRITE_CLASS_ENCODER(RGWSI_BS_SObj_HintIndexObj::bi_entry) +WRITE_CLASS_ENCODER(RGWSI_BS_SObj_HintIndexObj::single_instance_info) +WRITE_CLASS_ENCODER(RGWSI_BS_SObj_HintIndexObj::info_map) + +template <typename C1, typename C2> +int RGWSI_BS_SObj_HintIndexObj::update(const DoutPrefixProvider *dpp, + const rgw_bucket& entity, + const RGWBucketInfo& info_source, + C1 *add, + C2 *remove, + optional_yield y) +{ + int r = 0; + + auto& info_source_ver = info_source.objv_tracker.read_version; + +#define MAX_RETRIES 25 + + for (int i = 0; i < MAX_RETRIES; ++i) { + if (!has_data) { + r = read(dpp, y); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: cannot update hint index: failed to read: r=" << r << dendl; + return r; + } + } + + auto& instance = info.instances[entity]; + + update_entries(info_source.bucket, + info_source_ver, + add, remove, + &instance); + + if (instance.empty()) { + info.instances.erase(entity); + } + + r = flush(dpp, y); + if (r >= 0) { + return 0; + } + + if (r != -ECANCELED) { + ldpp_dout(dpp, 0) << "ERROR: failed to flush hint index: obj=" << obj << " r=" << r << dendl; + return r; + } + + invalidate(); + } + ldpp_dout(dpp, 0) << "ERROR: failed to flush hint index: too many retries (obj=" << obj << "), likely a bug" << dendl; + + return -EIO; +} + +template <typename C1, typename C2> +void RGWSI_BS_SObj_HintIndexObj::update_entries(const rgw_bucket& info_source, + const obj_version& info_source_ver, + C1 *add, + C2 *remove, + single_instance_info *instance) +{ + if (remove) { + for (auto& bucket : *remove) { + instance->remove_entry(info_source, info_source_ver, bucket); + } + } + + if (add) { + for (auto& bucket : *add) { + instance->add_entry(info_source, info_source_ver, bucket); + } + } +} + +int RGWSI_BS_SObj_HintIndexObj::read(const DoutPrefixProvider *dpp, optional_yield y) { + RGWObjVersionTracker _ot; + bufferlist bl; + int r = sysobj.rop() + .set_objv_tracker(&_ot) /* forcing read of current version */ + .read(dpp, &bl, y); + if (r < 0 && r != -ENOENT) { + ldpp_dout(dpp, 0) << "ERROR: failed reading data (obj=" << obj << "), r=" << r << dendl; + return r; + } + + ot = _ot; + + if (r >= 0) { + auto iter = bl.cbegin(); + try { + decode(info, iter); + has_data = true; + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): failed to decode entries, ignoring" << dendl; + info.clear(); + } + } else { + info.clear(); + } + + return 0; +} + +int RGWSI_BS_SObj_HintIndexObj::flush(const DoutPrefixProvider *dpp, optional_yield y) { + int r; + + if (!info.empty()) { + bufferlist bl; + encode(info, bl); + + r = sysobj.wop() + .set_objv_tracker(&ot) /* forcing read of current version */ + .write(dpp, bl, y); + + } else { /* remove */ + r = sysobj.wop() + .set_objv_tracker(&ot) + .remove(dpp, y); + } + + if (r < 0) { + return r; + } + + return 0; +} + +rgw_raw_obj RGWSI_Bucket_Sync_SObj_HintIndexManager::get_sources_obj(const rgw_bucket& bucket) const +{ + rgw_bucket b = bucket; + b.bucket_id.clear(); + return rgw_raw_obj(svc.zone->get_zone_params().log_pool, + bucket_sync_sources_oid_prefix + "." + b.get_key()); +} + +rgw_raw_obj RGWSI_Bucket_Sync_SObj_HintIndexManager::get_dests_obj(const rgw_bucket& bucket) const +{ + rgw_bucket b = bucket; + b.bucket_id.clear(); + return rgw_raw_obj(svc.zone->get_zone_params().log_pool, + bucket_sync_targets_oid_prefix + "." + b.get_key()); +} + +template <typename C1, typename C2> +int RGWSI_Bucket_Sync_SObj_HintIndexManager::update_hints(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + C1& added_dests, + C2& removed_dests, + C1& added_sources, + C2& removed_sources, + optional_yield y) +{ + C1 self_entity = { bucket_info.bucket }; + + if (!added_dests.empty() || + !removed_dests.empty()) { + /* update our dests */ + RGWSI_BS_SObj_HintIndexObj index(svc.sysobj, + get_dests_obj(bucket_info.bucket)); + int r = index.update(dpp, bucket_info.bucket, + bucket_info, + &added_dests, + &removed_dests, + y); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to update targets index for bucket=" << bucket_info.bucket << " r=" << r << dendl; + return r; + } + + /* update dest buckets */ + for (auto& dest_bucket : added_dests) { + RGWSI_BS_SObj_HintIndexObj dep_index(svc.sysobj, + get_sources_obj(dest_bucket)); + int r = dep_index.update(dpp, dest_bucket, + bucket_info, + &self_entity, + static_cast<C2 *>(nullptr), + y); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to update targets index for bucket=" << dest_bucket << " r=" << r << dendl; + return r; + } + } + /* update removed dest buckets */ + for (auto& dest_bucket : removed_dests) { + RGWSI_BS_SObj_HintIndexObj dep_index(svc.sysobj, + get_sources_obj(dest_bucket)); + int r = dep_index.update(dpp, dest_bucket, + bucket_info, + static_cast<C1 *>(nullptr), + &self_entity, + y); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to update targets index for bucket=" << dest_bucket << " r=" << r << dendl; + return r; + } + } + } + + if (!added_sources.empty() || + !removed_sources.empty()) { + RGWSI_BS_SObj_HintIndexObj index(svc.sysobj, + get_sources_obj(bucket_info.bucket)); + /* update our sources */ + int r = index.update(dpp, bucket_info.bucket, + bucket_info, + &added_sources, + &removed_sources, + y); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to update targets index for bucket=" << bucket_info.bucket << " r=" << r << dendl; + return r; + } + + /* update added sources buckets */ + for (auto& source_bucket : added_sources) { + RGWSI_BS_SObj_HintIndexObj dep_index(svc.sysobj, + get_dests_obj(source_bucket)); + int r = dep_index.update(dpp, source_bucket, + bucket_info, + &self_entity, + static_cast<C2 *>(nullptr), + y); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to update targets index for bucket=" << source_bucket << " r=" << r << dendl; + return r; + } + } + /* update removed dest buckets */ + for (auto& source_bucket : removed_sources) { + RGWSI_BS_SObj_HintIndexObj dep_index(svc.sysobj, + get_dests_obj(source_bucket)); + int r = dep_index.update(dpp, source_bucket, + bucket_info, + static_cast<C1 *>(nullptr), + &self_entity, + y); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to update targets index for bucket=" << source_bucket << " r=" << r << dendl; + return r; + } + } + } + + return 0; +} + +int RGWSI_Bucket_Sync_SObj::handle_bi_removal(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + optional_yield y) +{ + std::set<rgw_bucket> sources_set; + std::set<rgw_bucket> dests_set; + + if (bucket_info.sync_policy) { + bucket_info.sync_policy->get_potential_related_buckets(bucket_info.bucket, + &sources_set, + &dests_set); + } + + std::vector<rgw_bucket> removed_sources; + removed_sources.reserve(sources_set.size()); + for (auto& e : sources_set) { + removed_sources.push_back(e); + } + + std::vector<rgw_bucket> removed_dests; + removed_dests.reserve(dests_set.size()); + for (auto& e : dests_set) { + removed_dests.push_back(e); + } + + std::vector<rgw_bucket> added_sources; + std::vector<rgw_bucket> added_dests; + + return hint_index_mgr->update_hints(dpp, bucket_info, + added_dests, + removed_dests, + added_sources, + removed_sources, + y); +} + +int RGWSI_Bucket_Sync_SObj::handle_bi_update(const DoutPrefixProvider *dpp, + RGWBucketInfo& bucket_info, + RGWBucketInfo *orig_bucket_info, + optional_yield y) +{ + std::set<rgw_bucket> orig_sources; + std::set<rgw_bucket> orig_dests; + + if (orig_bucket_info && + orig_bucket_info->sync_policy) { + orig_bucket_info->sync_policy->get_potential_related_buckets(bucket_info.bucket, + &orig_sources, + &orig_dests); + } + + std::set<rgw_bucket> sources; + std::set<rgw_bucket> dests; + if (bucket_info.sync_policy) { + bucket_info.sync_policy->get_potential_related_buckets(bucket_info.bucket, + &sources, + &dests); + } + + std::vector<rgw_bucket> removed_sources; + std::vector<rgw_bucket> added_sources; + bool found = diff_sets(orig_sources, sources, &added_sources, &removed_sources); + ldpp_dout(dpp, 20) << __func__ << "(): bucket=" << bucket_info.bucket << ": orig_sources=" << orig_sources << " new_sources=" << sources << dendl; + ldpp_dout(dpp, 20) << __func__ << "(): bucket=" << bucket_info.bucket << ": potential sources added=" << added_sources << " removed=" << removed_sources << dendl; + + std::vector<rgw_bucket> removed_dests; + std::vector<rgw_bucket> added_dests; + found = found || diff_sets(orig_dests, dests, &added_dests, &removed_dests); + + ldpp_dout(dpp, 20) << __func__ << "(): bucket=" << bucket_info.bucket << ": orig_dests=" << orig_dests << " new_dests=" << dests << dendl; + ldpp_dout(dpp, 20) << __func__ << "(): bucket=" << bucket_info.bucket << ": potential dests added=" << added_dests << " removed=" << removed_dests << dendl; + + if (!found) { + return 0; + } + + return hint_index_mgr->update_hints(dpp, bucket_info, + dests, /* set all dests, not just the ones that were added */ + removed_dests, + sources, /* set all sources, not just that the ones that were added */ + removed_sources, + y); +} + +int RGWSI_Bucket_Sync_SObj::get_bucket_sync_hints(const DoutPrefixProvider *dpp, + const rgw_bucket& bucket, + std::set<rgw_bucket> *sources, + std::set<rgw_bucket> *dests, + optional_yield y) +{ + if (!sources && !dests) { + return 0; + } + + if (sources) { + RGWSI_BS_SObj_HintIndexObj index(svc.sysobj, + hint_index_mgr->get_sources_obj(bucket)); + int r = index.read(dpp, y); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to update sources index for bucket=" << bucket << " r=" << r << dendl; + return r; + } + + index.get_entities(bucket, sources); + + if (!bucket.bucket_id.empty()) { + rgw_bucket b = bucket; + b.bucket_id.clear(); + index.get_entities(b, sources); + } + } + + if (dests) { + RGWSI_BS_SObj_HintIndexObj index(svc.sysobj, + hint_index_mgr->get_dests_obj(bucket)); + int r = index.read(dpp, y); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to read targets index for bucket=" << bucket << " r=" << r << dendl; + return r; + } + + index.get_entities(bucket, dests); + + if (!bucket.bucket_id.empty()) { + rgw_bucket b = bucket; + b.bucket_id.clear(); + index.get_entities(b, dests); + } + } + + return 0; +} diff --git a/src/rgw/services/svc_bucket_sync_sobj.h b/src/rgw/services/svc_bucket_sync_sobj.h new file mode 100644 index 000000000..60786665d --- /dev/null +++ b/src/rgw/services/svc_bucket_sync_sobj.h @@ -0,0 +1,123 @@ + +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#pragma once + +#include "rgw/rgw_service.h" + +#include "svc_meta_be.h" +#include "svc_bucket_sync.h" + +class RGWSI_Zone; +class RGWSI_SysObj_Cache; +class RGWSI_Bucket_SObj; + +template <class T> +class RGWChainedCacheImpl; + +class RGWSI_Bucket_Sync_SObj_HintIndexManager; + +struct rgw_sync_bucket_entity; + +class RGWSI_Bucket_Sync_SObj : public RGWSI_Bucket_Sync +{ + struct bucket_sync_policy_cache_entry { + std::shared_ptr<RGWBucketSyncPolicyHandler> handler; + }; + + unique_ptr<RGWChainedCacheImpl<bucket_sync_policy_cache_entry> > sync_policy_cache; + + std::unique_ptr<RGWSI_Bucket_Sync_SObj_HintIndexManager> hint_index_mgr; + + int do_start(optional_yield, const DoutPrefixProvider *dpp) override; + + struct optional_zone_bucket { + optional<rgw_zone_id> zone; + optional<rgw_bucket> bucket; + + optional_zone_bucket(const optional<rgw_zone_id>& _zone, + const optional<rgw_bucket>& _bucket) : zone(_zone), bucket(_bucket) {} + + bool operator<(const optional_zone_bucket& ozb) const { + if (zone < ozb.zone) { + return true; + } + if (zone > ozb.zone) { + return false; + } + return bucket < ozb.bucket; + } + }; + + void get_hint_entities(RGWSI_Bucket_X_Ctx& ctx, + const std::set<rgw_zone_id>& zone_names, + const std::set<rgw_bucket>& buckets, + std::set<rgw_sync_bucket_entity> *hint_entities, + optional_yield y, const DoutPrefixProvider *); + int resolve_policy_hints(RGWSI_Bucket_X_Ctx& ctx, + rgw_sync_bucket_entity& self_entity, + RGWBucketSyncPolicyHandlerRef& handler, + RGWBucketSyncPolicyHandlerRef& zone_policy_handler, + std::map<optional_zone_bucket, RGWBucketSyncPolicyHandlerRef>& temp_map, + optional_yield y, + const DoutPrefixProvider *dpp); + int do_get_policy_handler(RGWSI_Bucket_X_Ctx& ctx, + std::optional<rgw_zone_id> zone, + std::optional<rgw_bucket> _bucket, + std::map<optional_zone_bucket, RGWBucketSyncPolicyHandlerRef>& temp_map, + RGWBucketSyncPolicyHandlerRef *handler, + optional_yield y, + const DoutPrefixProvider *dpp); +public: + struct Svc { + RGWSI_Zone *zone{nullptr}; + RGWSI_SysObj *sysobj{nullptr}; + RGWSI_SysObj_Cache *cache{nullptr}; + RGWSI_Bucket_SObj *bucket_sobj{nullptr}; + } svc; + + RGWSI_Bucket_Sync_SObj(CephContext *cct); + ~RGWSI_Bucket_Sync_SObj(); + + void init(RGWSI_Zone *_zone_svc, + RGWSI_SysObj *_sysobj_svc, + RGWSI_SysObj_Cache *_cache_svc, + RGWSI_Bucket_SObj *_bucket_sobj_svc); + + + int get_policy_handler(RGWSI_Bucket_X_Ctx& ctx, + std::optional<rgw_zone_id> zone, + std::optional<rgw_bucket> bucket, + RGWBucketSyncPolicyHandlerRef *handler, + optional_yield y, + const DoutPrefixProvider *dpp); + + int handle_bi_update(const DoutPrefixProvider *dpp, + RGWBucketInfo& bucket_info, + RGWBucketInfo *orig_bucket_info, + optional_yield y) override; + int handle_bi_removal(const DoutPrefixProvider *dpp, + const RGWBucketInfo& bucket_info, + optional_yield y) override; + + int get_bucket_sync_hints(const DoutPrefixProvider *dpp, + const rgw_bucket& bucket, + std::set<rgw_bucket> *sources, + std::set<rgw_bucket> *dests, + optional_yield y) override; +}; + diff --git a/src/rgw/services/svc_bucket_types.h b/src/rgw/services/svc_bucket_types.h new file mode 100644 index 000000000..30e5309d5 --- /dev/null +++ b/src/rgw/services/svc_bucket_types.h @@ -0,0 +1,38 @@ + +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#pragma once + +#include "common/ptr_wrapper.h" + +#include "svc_meta_be.h" +#include "svc_meta_be_types.h" + +class RGWSI_MetaBackend_Handler; + +using RGWSI_Bucket_BE_Handler = ptr_wrapper<RGWSI_MetaBackend_Handler, RGWSI_META_BE_TYPES::BUCKET>; +using RGWSI_BucketInstance_BE_Handler = ptr_wrapper<RGWSI_MetaBackend_Handler, RGWSI_META_BE_TYPES::BI>; + + +using RGWSI_Bucket_EP_Ctx = ptr_wrapper<RGWSI_MetaBackend::Context, RGWSI_META_BE_TYPES::BUCKET>; +using RGWSI_Bucket_BI_Ctx = ptr_wrapper<RGWSI_MetaBackend::Context, RGWSI_META_BE_TYPES::BI>; + +struct RGWSI_Bucket_X_Ctx { + RGWSI_Bucket_EP_Ctx ep; + RGWSI_Bucket_BI_Ctx bi; +}; + diff --git a/src/rgw/services/svc_cls.cc b/src/rgw/services/svc_cls.cc new file mode 100644 index 000000000..d2aaa6d88 --- /dev/null +++ b/src/rgw/services/svc_cls.cc @@ -0,0 +1,476 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + + +#include "svc_cls.h" +#include "svc_rados.h" +#include "svc_zone.h" + +#include "rgw/rgw_zone.h" + +#include "cls/otp/cls_otp_client.h" +#include "cls/log/cls_log_client.h" +#include "cls/lock/cls_lock_client.h" + + +#define dout_subsys ceph_subsys_rgw + +static string log_lock_name = "rgw_log_lock"; + +int RGWSI_Cls::do_start(optional_yield y, const DoutPrefixProvider *dpp) +{ + int r = mfa.do_start(y, dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to start mfa service" << dendl; + return r; + } + + return 0; +} + +int RGWSI_Cls::MFA::get_mfa_obj(const DoutPrefixProvider *dpp, const rgw_user& user, std::optional<RGWSI_RADOS::Obj> *obj) +{ + string oid = get_mfa_oid(user); + rgw_raw_obj o(zone_svc->get_zone_params().otp_pool, oid); + + obj->emplace(rados_svc->obj(o)); + int r = (*obj)->open(dpp); + if (r < 0) { + ldpp_dout(dpp, 4) << "failed to open rados context for " << o << dendl; + return r; + } + + return 0; +} + +int RGWSI_Cls::MFA::get_mfa_ref(const DoutPrefixProvider *dpp, const rgw_user& user, rgw_rados_ref *ref) +{ + std::optional<RGWSI_RADOS::Obj> obj; + int r = get_mfa_obj(dpp, user, &obj); + if (r < 0) { + return r; + } + *ref = obj->get_ref(); + return 0; +} + +int RGWSI_Cls::MFA::check_mfa(const DoutPrefixProvider *dpp, const rgw_user& user, const string& otp_id, const string& pin, optional_yield y) +{ + rgw_rados_ref ref; + int r = get_mfa_ref(dpp, user, &ref); + if (r < 0) { + return r; + } + + rados::cls::otp::otp_check_t result; + + r = rados::cls::otp::OTP::check(cct, ref.pool.ioctx(), ref.obj.oid, otp_id, pin, &result); + if (r < 0) + return r; + + ldpp_dout(dpp, 20) << "OTP check, otp_id=" << otp_id << " result=" << (int)result.result << dendl; + + return (result.result == rados::cls::otp::OTP_CHECK_SUCCESS ? 0 : -EACCES); +} + +void RGWSI_Cls::MFA::prepare_mfa_write(librados::ObjectWriteOperation *op, + RGWObjVersionTracker *objv_tracker, + const ceph::real_time& mtime) +{ + RGWObjVersionTracker ot; + + if (objv_tracker) { + ot = *objv_tracker; + } + + if (ot.write_version.tag.empty()) { + if (ot.read_version.tag.empty()) { + ot.generate_new_write_ver(cct); + } else { + ot.write_version = ot.read_version; + ot.write_version.ver++; + } + } + + ot.prepare_op_for_write(op); + struct timespec mtime_ts = real_clock::to_timespec(mtime); + op->mtime2(&mtime_ts); +} + +int RGWSI_Cls::MFA::create_mfa(const DoutPrefixProvider *dpp, const rgw_user& user, const rados::cls::otp::otp_info_t& config, + RGWObjVersionTracker *objv_tracker, const ceph::real_time& mtime, optional_yield y) +{ + std::optional<RGWSI_RADOS::Obj> obj; + int r = get_mfa_obj(dpp, user, &obj); + if (r < 0) { + return r; + } + + librados::ObjectWriteOperation op; + prepare_mfa_write(&op, objv_tracker, mtime); + rados::cls::otp::OTP::create(&op, config); + r = obj->operate(dpp, &op, y); + if (r < 0) { + ldpp_dout(dpp, 20) << "OTP create, otp_id=" << config.id << " result=" << (int)r << dendl; + return r; + } + + return 0; +} + +int RGWSI_Cls::MFA::remove_mfa(const DoutPrefixProvider *dpp, + const rgw_user& user, const string& id, + RGWObjVersionTracker *objv_tracker, + const ceph::real_time& mtime, + optional_yield y) +{ + std::optional<RGWSI_RADOS::Obj> obj; + int r = get_mfa_obj(dpp, user, &obj); + if (r < 0) { + return r; + } + + librados::ObjectWriteOperation op; + prepare_mfa_write(&op, objv_tracker, mtime); + rados::cls::otp::OTP::remove(&op, id); + r = obj->operate(dpp, &op, y); + if (r < 0) { + ldpp_dout(dpp, 20) << "OTP remove, otp_id=" << id << " result=" << (int)r << dendl; + return r; + } + + return 0; +} + +int RGWSI_Cls::MFA::get_mfa(const DoutPrefixProvider *dpp, const rgw_user& user, const string& id, rados::cls::otp::otp_info_t *result, + optional_yield y) +{ + rgw_rados_ref ref; + + int r = get_mfa_ref(dpp, user, &ref); + if (r < 0) { + return r; + } + + r = rados::cls::otp::OTP::get(nullptr, ref.pool.ioctx(), ref.obj.oid, id, result); + if (r < 0) { + return r; + } + + return 0; +} + +int RGWSI_Cls::MFA::list_mfa(const DoutPrefixProvider *dpp, const rgw_user& user, list<rados::cls::otp::otp_info_t> *result, + optional_yield y) +{ + rgw_rados_ref ref; + + int r = get_mfa_ref(dpp, user, &ref); + if (r < 0) { + return r; + } + + r = rados::cls::otp::OTP::get_all(nullptr, ref.pool.ioctx(), ref.obj.oid, result); + if (r < 0) { + return r; + } + + return 0; +} + +int RGWSI_Cls::MFA::otp_get_current_time(const DoutPrefixProvider *dpp, const rgw_user& user, ceph::real_time *result, + optional_yield y) +{ + rgw_rados_ref ref; + + int r = get_mfa_ref(dpp, user, &ref); + if (r < 0) { + return r; + } + + r = rados::cls::otp::OTP::get_current_time(ref.pool.ioctx(), ref.obj.oid, result); + if (r < 0) { + return r; + } + + return 0; +} + +int RGWSI_Cls::MFA::set_mfa(const DoutPrefixProvider *dpp, const string& oid, const list<rados::cls::otp::otp_info_t>& entries, + bool reset_obj, RGWObjVersionTracker *objv_tracker, + const real_time& mtime, + optional_yield y) +{ + rgw_raw_obj o(zone_svc->get_zone_params().otp_pool, oid); + auto obj = rados_svc->obj(o); + int r = obj.open(dpp); + if (r < 0) { + ldpp_dout(dpp, 4) << "failed to open rados context for " << o << dendl; + return r; + } + librados::ObjectWriteOperation op; + if (reset_obj) { + op.remove(); + op.set_op_flags2(LIBRADOS_OP_FLAG_FAILOK); + op.create(false); + } + prepare_mfa_write(&op, objv_tracker, mtime); + rados::cls::otp::OTP::set(&op, entries); + r = obj.operate(dpp, &op, y); + if (r < 0) { + ldpp_dout(dpp, 20) << "OTP set entries.size()=" << entries.size() << " result=" << (int)r << dendl; + return r; + } + + return 0; +} + +int RGWSI_Cls::MFA::list_mfa(const DoutPrefixProvider *dpp, const string& oid, list<rados::cls::otp::otp_info_t> *result, + RGWObjVersionTracker *objv_tracker, ceph::real_time *pmtime, + optional_yield y) +{ + rgw_raw_obj o(zone_svc->get_zone_params().otp_pool, oid); + auto obj = rados_svc->obj(o); + int r = obj.open(dpp); + if (r < 0) { + ldpp_dout(dpp, 4) << "failed to open rados context for " << o << dendl; + return r; + } + auto& ref = obj.get_ref(); + librados::ObjectReadOperation op; + struct timespec mtime_ts; + if (pmtime) { + op.stat2(nullptr, &mtime_ts, nullptr); + } + objv_tracker->prepare_op_for_read(&op); + r = rados::cls::otp::OTP::get_all(&op, ref.pool.ioctx(), ref.obj.oid, result); + if (r < 0) { + return r; + } + if (pmtime) { + *pmtime = ceph::real_clock::from_timespec(mtime_ts); + } + + return 0; +} + +void RGWSI_Cls::TimeLog::prepare_entry(cls_log_entry& entry, + const real_time& ut, + const string& section, + const string& key, + bufferlist& bl) +{ + cls_log_add_prepare_entry(entry, utime_t(ut), section, key, bl); +} + +int RGWSI_Cls::TimeLog::init_obj(const DoutPrefixProvider *dpp, const string& oid, RGWSI_RADOS::Obj& obj) +{ + rgw_raw_obj o(zone_svc->get_zone_params().log_pool, oid); + obj = rados_svc->obj(o); + return obj.open(dpp); + +} +int RGWSI_Cls::TimeLog::add(const DoutPrefixProvider *dpp, + const string& oid, + const real_time& ut, + const string& section, + const string& key, + bufferlist& bl, + optional_yield y) +{ + RGWSI_RADOS::Obj obj; + + int r = init_obj(dpp, oid, obj); + if (r < 0) { + return r; + } + + librados::ObjectWriteOperation op; + utime_t t(ut); + cls_log_add(op, t, section, key, bl); + + return obj.operate(dpp, &op, y); +} + +int RGWSI_Cls::TimeLog::add(const DoutPrefixProvider *dpp, + const string& oid, + std::list<cls_log_entry>& entries, + librados::AioCompletion *completion, + bool monotonic_inc, + optional_yield y) +{ + RGWSI_RADOS::Obj obj; + + int r = init_obj(dpp, oid, obj); + if (r < 0) { + return r; + } + + librados::ObjectWriteOperation op; + cls_log_add(op, entries, monotonic_inc); + + if (!completion) { + r = obj.operate(dpp, &op, y); + } else { + r = obj.aio_operate(completion, &op); + } + return r; +} + +int RGWSI_Cls::TimeLog::list(const DoutPrefixProvider *dpp, + const string& oid, + const real_time& start_time, + const real_time& end_time, + int max_entries, std::list<cls_log_entry>& entries, + const string& marker, + string *out_marker, + bool *truncated, + optional_yield y) +{ + RGWSI_RADOS::Obj obj; + + int r = init_obj(dpp, oid, obj); + if (r < 0) { + return r; + } + + librados::ObjectReadOperation op; + + utime_t st(start_time); + utime_t et(end_time); + + cls_log_list(op, st, et, marker, max_entries, entries, + out_marker, truncated); + + bufferlist obl; + + int ret = obj.operate(dpp, &op, &obl, y); + if (ret < 0) + return ret; + + return 0; +} + +int RGWSI_Cls::TimeLog::info(const DoutPrefixProvider *dpp, + const string& oid, + cls_log_header *header, + optional_yield y) +{ + RGWSI_RADOS::Obj obj; + + int r = init_obj(dpp, oid, obj); + if (r < 0) { + return r; + } + + librados::ObjectReadOperation op; + + cls_log_info(op, header); + + bufferlist obl; + + int ret = obj.operate(dpp, &op, &obl, y); + if (ret < 0) + return ret; + + return 0; +} + +int RGWSI_Cls::TimeLog::info_async(const DoutPrefixProvider *dpp, + RGWSI_RADOS::Obj& obj, + const string& oid, + cls_log_header *header, + librados::AioCompletion *completion) +{ + int r = init_obj(dpp, oid, obj); + if (r < 0) { + return r; + } + + librados::ObjectReadOperation op; + + cls_log_info(op, header); + + int ret = obj.aio_operate(completion, &op, nullptr); + if (ret < 0) + return ret; + + return 0; +} + +int RGWSI_Cls::TimeLog::trim(const DoutPrefixProvider *dpp, + const string& oid, + const real_time& start_time, + const real_time& end_time, + const string& from_marker, + const string& to_marker, + librados::AioCompletion *completion, + optional_yield y) +{ + RGWSI_RADOS::Obj obj; + + int r = init_obj(dpp, oid, obj); + if (r < 0) { + return r; + } + + utime_t st(start_time); + utime_t et(end_time); + + librados::ObjectWriteOperation op; + cls_log_trim(op, st, et, from_marker, to_marker); + + if (!completion) { + r = obj.operate(dpp, &op, y); + } else { + r = obj.aio_operate(completion, &op); + } + return r; +} + +int RGWSI_Cls::Lock::lock_exclusive(const DoutPrefixProvider *dpp, + const rgw_pool& pool, + const string& oid, + timespan& duration, + string& zone_id, + string& owner_id, + std::optional<string> lock_name) +{ + auto p = rados_svc->pool(pool); + int r = p.open(dpp); + if (r < 0) { + return r; + } + + uint64_t msec = std::chrono::duration_cast<std::chrono::milliseconds>(duration).count(); + utime_t ut(msec / 1000, msec % 1000); + + rados::cls::lock::Lock l(lock_name.value_or(log_lock_name)); + l.set_duration(ut); + l.set_cookie(owner_id); + l.set_tag(zone_id); + l.set_may_renew(true); + + return l.lock_exclusive(&p.ioctx(), oid); +} + +int RGWSI_Cls::Lock::unlock(const DoutPrefixProvider *dpp, + const rgw_pool& pool, + const string& oid, + string& zone_id, + string& owner_id, + std::optional<string> lock_name) +{ + auto p = rados_svc->pool(pool); + int r = p.open(dpp); + if (r < 0) { + return r; + } + + rados::cls::lock::Lock l(lock_name.value_or(log_lock_name)); + l.set_tag(zone_id); + l.set_cookie(owner_id); + + return l.unlock(&p.ioctx(), oid); +} + diff --git a/src/rgw/services/svc_cls.h b/src/rgw/services/svc_cls.h new file mode 100644 index 000000000..61487b2f9 --- /dev/null +++ b/src/rgw/services/svc_cls.h @@ -0,0 +1,166 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#pragma once + +#include "cls/otp/cls_otp_types.h" +#include "cls/log/cls_log_types.h" + +#include "rgw/rgw_service.h" + +#include "svc_rados.h" + + +class RGWSI_Cls : public RGWServiceInstance +{ + RGWSI_Zone *zone_svc{nullptr}; + RGWSI_RADOS *rados_svc{nullptr}; + + class ClsSubService : public RGWServiceInstance { + friend class RGWSI_Cls; + + RGWSI_Cls *cls_svc{nullptr}; + RGWSI_Zone *zone_svc{nullptr}; + RGWSI_RADOS *rados_svc{nullptr}; + + void init(RGWSI_Cls *_cls_svc, RGWSI_Zone *_zone_svc, RGWSI_RADOS *_rados_svc) { + cls_svc = _cls_svc; + zone_svc = _cls_svc->zone_svc; + rados_svc = _cls_svc->rados_svc; + } + + public: + ClsSubService(CephContext *cct) : RGWServiceInstance(cct) {} + }; + +public: + class MFA : public ClsSubService { + int get_mfa_obj(const DoutPrefixProvider *dpp, const rgw_user& user, std::optional<RGWSI_RADOS::Obj> *obj); + int get_mfa_ref(const DoutPrefixProvider *dpp, const rgw_user& user, rgw_rados_ref *ref); + + void prepare_mfa_write(librados::ObjectWriteOperation *op, + RGWObjVersionTracker *objv_tracker, + const ceph::real_time& mtime); + + public: + MFA(CephContext *cct): ClsSubService(cct) {} + + string get_mfa_oid(const rgw_user& user) { + return string("user:") + user.to_str(); + } + + int check_mfa(const DoutPrefixProvider *dpp, const rgw_user& user, const string& otp_id, const string& pin, optional_yield y); + int create_mfa(const DoutPrefixProvider *dpp, const rgw_user& user, const rados::cls::otp::otp_info_t& config, + RGWObjVersionTracker *objv_tracker, const ceph::real_time& mtime, optional_yield y); + int remove_mfa(const DoutPrefixProvider *dpp, + const rgw_user& user, const string& id, + RGWObjVersionTracker *objv_tracker, + const ceph::real_time& mtime, + optional_yield y); + int get_mfa(const DoutPrefixProvider *dpp, const rgw_user& user, const string& id, rados::cls::otp::otp_info_t *result, optional_yield y); + int list_mfa(const DoutPrefixProvider *dpp, const rgw_user& user, list<rados::cls::otp::otp_info_t> *result, optional_yield y); + int otp_get_current_time(const DoutPrefixProvider *dpp, const rgw_user& user, ceph::real_time *result, optional_yield y); + int set_mfa(const DoutPrefixProvider *dpp, const string& oid, const list<rados::cls::otp::otp_info_t>& entries, + bool reset_obj, RGWObjVersionTracker *objv_tracker, + const real_time& mtime, optional_yield y); + int list_mfa(const DoutPrefixProvider *dpp, const string& oid, list<rados::cls::otp::otp_info_t> *result, + RGWObjVersionTracker *objv_tracker, ceph::real_time *pmtime, optional_yield y); + } mfa; + + class TimeLog : public ClsSubService { + int init_obj(const DoutPrefixProvider *dpp, const string& oid, RGWSI_RADOS::Obj& obj); + public: + TimeLog(CephContext *cct): ClsSubService(cct) {} + + void prepare_entry(cls_log_entry& entry, + const real_time& ut, + const string& section, + const string& key, + bufferlist& bl); + int add(const DoutPrefixProvider *dpp, + const string& oid, + const real_time& ut, + const string& section, + const string& key, + bufferlist& bl, + optional_yield y); + int add(const DoutPrefixProvider *dpp, + const string& oid, + std::list<cls_log_entry>& entries, + librados::AioCompletion *completion, + bool monotonic_inc, + optional_yield y); + int list(const DoutPrefixProvider *dpp, + const string& oid, + const real_time& start_time, + const real_time& end_time, + int max_entries, list<cls_log_entry>& entries, + const string& marker, + string *out_marker, + bool *truncated, + optional_yield y); + int info(const DoutPrefixProvider *dpp, + const string& oid, + cls_log_header *header, + optional_yield y); + int info_async(const DoutPrefixProvider *dpp, + RGWSI_RADOS::Obj& obj, + const string& oid, + cls_log_header *header, + librados::AioCompletion *completion); + int trim(const DoutPrefixProvider *dpp, + const string& oid, + const real_time& start_time, + const real_time& end_time, + const string& from_marker, + const string& to_marker, + librados::AioCompletion *completion, + optional_yield y); + } timelog; + + class Lock : public ClsSubService { + int init_obj(const string& oid, RGWSI_RADOS::Obj& obj); + public: + Lock(CephContext *cct): ClsSubService(cct) {} + int lock_exclusive(const DoutPrefixProvider *dpp, + const rgw_pool& pool, + const string& oid, + timespan& duration, + string& zone_id, + string& owner_id, + std::optional<string> lock_name = std::nullopt); + int unlock(const DoutPrefixProvider *dpp, + const rgw_pool& pool, + const string& oid, + string& zone_id, + string& owner_id, + std::optional<string> lock_name = std::nullopt); + } lock; + + RGWSI_Cls(CephContext *cct): RGWServiceInstance(cct), mfa(cct), timelog(cct), lock(cct) {} + + void init(RGWSI_Zone *_zone_svc, RGWSI_RADOS *_rados_svc) { + rados_svc = _rados_svc; + zone_svc = _zone_svc; + + mfa.init(this, zone_svc, rados_svc); + timelog.init(this, zone_svc, rados_svc); + lock.init(this, zone_svc, rados_svc); + } + + int do_start(optional_yield, const DoutPrefixProvider *dpp) override; +}; + diff --git a/src/rgw/services/svc_config_key.h b/src/rgw/services/svc_config_key.h new file mode 100644 index 000000000..e008aea5f --- /dev/null +++ b/src/rgw/services/svc_config_key.h @@ -0,0 +1,31 @@ + + +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#pragma once + +#include "rgw/rgw_service.h" + +class RGWSI_ConfigKey : public RGWServiceInstance +{ +public: + RGWSI_ConfigKey(CephContext *cct) : RGWServiceInstance(cct) {} + virtual ~RGWSI_ConfigKey() {} + + virtual int get(const string& key, bool secure, bufferlist *result) = 0; +}; + diff --git a/src/rgw/services/svc_config_key_rados.cc b/src/rgw/services/svc_config_key_rados.cc new file mode 100644 index 000000000..9bb0344af --- /dev/null +++ b/src/rgw/services/svc_config_key_rados.cc @@ -0,0 +1,46 @@ + +#include "svc_rados.h" +#include "svc_config_key_rados.h" + +int RGWSI_ConfigKey_RADOS::do_start(optional_yield, const DoutPrefixProvider *dpp) +{ + maybe_insecure_mon_conn = !svc.rados->check_secure_mon_conn(); + + return 0; +} + +void RGWSI_ConfigKey_RADOS::warn_if_insecure() +{ + if (!maybe_insecure_mon_conn || + warned_insecure.test_and_set()) { + return; + } + + string s = "rgw is configured to optionally allow insecure connections to the monitors (auth_supported, ms_mon_client_mode), ssl certificates stored at the monitor configuration could leak"; + + svc.rados->clog_warn(s); + + lderr(ctx()) << __func__ << "(): WARNING: " << s << dendl; +} + +int RGWSI_ConfigKey_RADOS::get(const string& key, bool secure, bufferlist *result) +{ + string cmd = + "{" + "\"prefix\": \"config-key get\", " + "\"key\": \"" + key + "\"" + "}"; + + bufferlist inbl; + auto handle = svc.rados->handle(); + int ret = handle.mon_command(cmd, inbl, result, nullptr); + if (ret < 0) { + return ret; + } + + if (secure) { + warn_if_insecure(); + } + + return 0; +} diff --git a/src/rgw/services/svc_config_key_rados.h b/src/rgw/services/svc_config_key_rados.h new file mode 100644 index 000000000..e0de60cac --- /dev/null +++ b/src/rgw/services/svc_config_key_rados.h @@ -0,0 +1,52 @@ + + +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#pragma once + +#include <atomic> + +#include "rgw/rgw_service.h" + +#include "svc_config_key.h" + +class RGWSI_RADOS; + +class RGWSI_ConfigKey_RADOS : public RGWSI_ConfigKey +{ + bool maybe_insecure_mon_conn{false}; + std::atomic_flag warned_insecure = ATOMIC_FLAG_INIT; + + int do_start(optional_yield, const DoutPrefixProvider *dpp) override; + + void warn_if_insecure(); + +public: + struct Svc { + RGWSI_RADOS *rados{nullptr}; + } svc; + + void init(RGWSI_RADOS *rados_svc) { + svc.rados = rados_svc; + } + + RGWSI_ConfigKey_RADOS(CephContext *cct) : RGWSI_ConfigKey(cct) {} + + int get(const string& key, bool secure, bufferlist *result) override; +}; + + diff --git a/src/rgw/services/svc_finisher.cc b/src/rgw/services/svc_finisher.cc new file mode 100644 index 000000000..83e1b1514 --- /dev/null +++ b/src/rgw/services/svc_finisher.cc @@ -0,0 +1,56 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "common/Finisher.h" + +#include "svc_finisher.h" + +int RGWSI_Finisher::do_start(optional_yield, const DoutPrefixProvider *dpp) +{ + finisher = new Finisher(cct); + finisher->start(); + + return 0; +} + +void RGWSI_Finisher::shutdown() +{ + if (finalized) { + return; + } + + if (finisher) { + finisher->stop(); + + map<int, ShutdownCB *> cbs; + cbs.swap(shutdown_cbs); /* move cbs out, in case caller unregisetrs */ + for (auto& iter : cbs) { + iter.second->call(); + } + delete finisher; + } + + finalized = true; +} + +RGWSI_Finisher::~RGWSI_Finisher() +{ + shutdown(); +} + +void RGWSI_Finisher::register_caller(ShutdownCB *cb, int *phandle) +{ + *phandle = ++handles_counter; + shutdown_cbs[*phandle] = cb; +} + +void RGWSI_Finisher::unregister_caller(int handle) +{ + shutdown_cbs.erase(handle); +} + +void RGWSI_Finisher::schedule_context(Context *c) +{ + finisher->queue(c); +} + diff --git a/src/rgw/services/svc_finisher.h b/src/rgw/services/svc_finisher.h new file mode 100644 index 000000000..2f3ae5223 --- /dev/null +++ b/src/rgw/services/svc_finisher.h @@ -0,0 +1,44 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#pragma once + +#include "rgw/rgw_service.h" + +class Context; +class Finisher; + +class RGWSI_Finisher : public RGWServiceInstance +{ + friend struct RGWServices_Def; +public: + class ShutdownCB; + +private: + Finisher *finisher{nullptr}; + bool finalized{false}; + + void shutdown() override; + + std::map<int, ShutdownCB *> shutdown_cbs; + std::atomic<int> handles_counter{0}; + +protected: + void init() {} + int do_start(optional_yield y, const DoutPrefixProvider *dpp) override; + +public: + RGWSI_Finisher(CephContext *cct): RGWServiceInstance(cct) {} + ~RGWSI_Finisher(); + + class ShutdownCB { + public: + virtual ~ShutdownCB() {} + virtual void call() = 0; + }; + + void register_caller(ShutdownCB *cb, int *phandle); + void unregister_caller(int handle); + + void schedule_context(Context *c); +}; diff --git a/src/rgw/services/svc_mdlog.cc b/src/rgw/services/svc_mdlog.cc new file mode 100644 index 000000000..f93c44d68 --- /dev/null +++ b/src/rgw/services/svc_mdlog.cc @@ -0,0 +1,411 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "svc_mdlog.h" +#include "svc_rados.h" +#include "svc_zone.h" +#include "svc_sys_obj.h" + +#include "rgw/rgw_tools.h" +#include "rgw/rgw_mdlog.h" +#include "rgw/rgw_coroutine.h" +#include "rgw/rgw_cr_rados.h" +#include "rgw/rgw_zone.h" + +#include "common/errno.h" + +#include <boost/asio/yield.hpp> + +#define dout_subsys ceph_subsys_rgw + +using Svc = RGWSI_MDLog::Svc; +using Cursor = RGWPeriodHistory::Cursor; + +RGWSI_MDLog::RGWSI_MDLog(CephContext *cct, bool _run_sync) : RGWServiceInstance(cct), run_sync(_run_sync) { +} + +RGWSI_MDLog::~RGWSI_MDLog() { +} + +int RGWSI_MDLog::init(RGWSI_RADOS *_rados_svc, RGWSI_Zone *_zone_svc, RGWSI_SysObj *_sysobj_svc, RGWSI_Cls *_cls_svc) +{ + svc.zone = _zone_svc; + svc.sysobj = _sysobj_svc; + svc.mdlog = this; + svc.rados = _rados_svc; + svc.cls = _cls_svc; + + return 0; +} + +int RGWSI_MDLog::do_start(optional_yield y, const DoutPrefixProvider *dpp) +{ + auto& current_period = svc.zone->get_current_period(); + + current_log = get_log(current_period.get_id()); + + period_puller.reset(new RGWPeriodPuller(svc.zone, svc.sysobj)); + period_history.reset(new RGWPeriodHistory(cct, period_puller.get(), + current_period)); + + if (run_sync && + svc.zone->need_to_sync()) { + // initialize the log period history + svc.mdlog->init_oldest_log_period(y, dpp); + } + return 0; +} + +int RGWSI_MDLog::read_history(RGWMetadataLogHistory *state, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) const +{ + auto obj_ctx = svc.sysobj->init_obj_ctx(); + auto& pool = svc.zone->get_zone_params().log_pool; + const auto& oid = RGWMetadataLogHistory::oid; + bufferlist bl; + int ret = rgw_get_system_obj(obj_ctx, pool, oid, bl, objv_tracker, nullptr, y, dpp); + if (ret < 0) { + return ret; + } + if (bl.length() == 0) { + /* bad history object, remove it */ + rgw_raw_obj obj(pool, oid); + auto sysobj = obj_ctx.get_obj(obj); + ret = sysobj.wop().remove(dpp, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: meta history is empty, but cannot remove it (" << cpp_strerror(-ret) << ")" << dendl; + return ret; + } + return -ENOENT; + } + try { + auto p = bl.cbegin(); + state->decode(p); + } catch (buffer::error& e) { + ldpp_dout(dpp, 1) << "failed to decode the mdlog history: " + << e.what() << dendl; + return -EIO; + } + return 0; +} + +int RGWSI_MDLog::write_history(const DoutPrefixProvider *dpp, + const RGWMetadataLogHistory& state, + RGWObjVersionTracker *objv_tracker, + optional_yield y, bool exclusive) +{ + bufferlist bl; + state.encode(bl); + + auto& pool = svc.zone->get_zone_params().log_pool; + const auto& oid = RGWMetadataLogHistory::oid; + auto obj_ctx = svc.sysobj->init_obj_ctx(); + return rgw_put_system_obj(dpp, obj_ctx, pool, oid, bl, + exclusive, objv_tracker, real_time{}, y); +} + +namespace mdlog { + +using Cursor = RGWPeriodHistory::Cursor; + +/// read the mdlog history and use it to initialize the given cursor +class ReadHistoryCR : public RGWCoroutine { + const DoutPrefixProvider *dpp; + Svc svc; + Cursor *cursor; + RGWObjVersionTracker *objv_tracker; + RGWMetadataLogHistory state; + RGWAsyncRadosProcessor *async_processor; + + public: + ReadHistoryCR(const DoutPrefixProvider *dpp, + const Svc& svc, + Cursor *cursor, + RGWObjVersionTracker *objv_tracker) + : RGWCoroutine(svc.zone->ctx()), dpp(dpp), svc(svc), + cursor(cursor), + objv_tracker(objv_tracker), + async_processor(svc.rados->get_async_processor()) + {} + + int operate(const DoutPrefixProvider *dpp) { + reenter(this) { + yield { + rgw_raw_obj obj{svc.zone->get_zone_params().log_pool, + RGWMetadataLogHistory::oid}; + constexpr bool empty_on_enoent = false; + + using ReadCR = RGWSimpleRadosReadCR<RGWMetadataLogHistory>; + call(new ReadCR(dpp, async_processor, svc.sysobj, obj, + &state, empty_on_enoent, objv_tracker)); + } + if (retcode < 0) { + ldpp_dout(dpp, 1) << "failed to read mdlog history: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + *cursor = svc.mdlog->period_history->lookup(state.oldest_realm_epoch); + if (!*cursor) { + return set_cr_error(cursor->get_error()); + } + + ldpp_dout(dpp, 10) << "read mdlog history with oldest period id=" + << state.oldest_period_id << " realm_epoch=" + << state.oldest_realm_epoch << dendl; + return set_cr_done(); + } + return 0; + } +}; + +/// write the given cursor to the mdlog history +class WriteHistoryCR : public RGWCoroutine { + const DoutPrefixProvider *dpp; + Svc svc; + Cursor cursor; + RGWObjVersionTracker *objv; + RGWMetadataLogHistory state; + RGWAsyncRadosProcessor *async_processor; + + public: + WriteHistoryCR(const DoutPrefixProvider *dpp, + Svc& svc, + const Cursor& cursor, + RGWObjVersionTracker *objv) + : RGWCoroutine(svc.zone->ctx()), dpp(dpp), svc(svc), + cursor(cursor), objv(objv), + async_processor(svc.rados->get_async_processor()) + {} + + int operate(const DoutPrefixProvider *dpp) { + reenter(this) { + state.oldest_period_id = cursor.get_period().get_id(); + state.oldest_realm_epoch = cursor.get_epoch(); + + yield { + rgw_raw_obj obj{svc.zone->get_zone_params().log_pool, + RGWMetadataLogHistory::oid}; + + using WriteCR = RGWSimpleRadosWriteCR<RGWMetadataLogHistory>; + call(new WriteCR(dpp, async_processor, svc.sysobj, obj, state, objv)); + } + if (retcode < 0) { + ldpp_dout(dpp, 1) << "failed to write mdlog history: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + + ldpp_dout(dpp, 10) << "wrote mdlog history with oldest period id=" + << state.oldest_period_id << " realm_epoch=" + << state.oldest_realm_epoch << dendl; + return set_cr_done(); + } + return 0; + } +}; + +/// update the mdlog history to reflect trimmed logs +class TrimHistoryCR : public RGWCoroutine { + const DoutPrefixProvider *dpp; + Svc svc; + const Cursor cursor; //< cursor to trimmed period + RGWObjVersionTracker *objv; //< to prevent racing updates + Cursor next; //< target cursor for oldest log period + Cursor existing; //< existing cursor read from disk + + public: + TrimHistoryCR(const DoutPrefixProvider *dpp, const Svc& svc, Cursor cursor, RGWObjVersionTracker *objv) + : RGWCoroutine(svc.zone->ctx()), dpp(dpp), svc(svc), + cursor(cursor), objv(objv), next(cursor) { + next.next(); // advance past cursor + } + + int operate(const DoutPrefixProvider *dpp) { + reenter(this) { + // read an existing history, and write the new history if it's newer + yield call(new ReadHistoryCR(dpp, svc, &existing, objv)); + if (retcode < 0) { + return set_cr_error(retcode); + } + // reject older trims with ECANCELED + if (cursor.get_epoch() < existing.get_epoch()) { + ldpp_dout(dpp, 4) << "found oldest log epoch=" << existing.get_epoch() + << ", rejecting trim at epoch=" << cursor.get_epoch() << dendl; + return set_cr_error(-ECANCELED); + } + // overwrite with updated history + yield call(new WriteHistoryCR(dpp, svc, next, objv)); + if (retcode < 0) { + return set_cr_error(retcode); + } + return set_cr_done(); + } + return 0; + } +}; + +} // mdlog namespace + +// traverse all the way back to the beginning of the period history, and +// return a cursor to the first period in a fully attached history +Cursor RGWSI_MDLog::find_oldest_period(const DoutPrefixProvider *dpp, optional_yield y) +{ + auto cursor = period_history->get_current(); + + while (cursor) { + // advance to the period's predecessor + if (!cursor.has_prev()) { + auto& predecessor = cursor.get_period().get_predecessor(); + if (predecessor.empty()) { + // this is the first period, so our logs must start here + ldpp_dout(dpp, 10) << "find_oldest_period returning first " + "period " << cursor.get_period().get_id() << dendl; + return cursor; + } + // pull the predecessor and add it to our history + RGWPeriod period; + int r = period_puller->pull(dpp, predecessor, period, y); + if (r < 0) { + return cursor; + } + auto prev = period_history->insert(std::move(period)); + if (!prev) { + return prev; + } + ldpp_dout(dpp, 20) << "find_oldest_period advancing to " + "predecessor period " << predecessor << dendl; + ceph_assert(cursor.has_prev()); + } + cursor.prev(); + } + ldpp_dout(dpp, 10) << "find_oldest_period returning empty cursor" << dendl; + return cursor; +} + +Cursor RGWSI_MDLog::init_oldest_log_period(optional_yield y, const DoutPrefixProvider *dpp) +{ + // read the mdlog history + RGWMetadataLogHistory state; + RGWObjVersionTracker objv; + int ret = read_history(&state, &objv, y, dpp); + + if (ret == -ENOENT) { + // initialize the mdlog history and write it + ldpp_dout(dpp, 10) << "initializing mdlog history" << dendl; + auto cursor = find_oldest_period(dpp, y); + if (!cursor) { + return cursor; + } + // write the initial history + state.oldest_realm_epoch = cursor.get_epoch(); + state.oldest_period_id = cursor.get_period().get_id(); + + constexpr bool exclusive = true; // don't overwrite + int ret = write_history(dpp, state, &objv, y, exclusive); + if (ret < 0 && ret != -EEXIST) { + ldpp_dout(dpp, 1) << "failed to write mdlog history: " + << cpp_strerror(ret) << dendl; + return Cursor{ret}; + } + return cursor; + } else if (ret < 0) { + ldpp_dout(dpp, 1) << "failed to read mdlog history: " + << cpp_strerror(ret) << dendl; + return Cursor{ret}; + } + + // if it's already in the history, return it + auto cursor = period_history->lookup(state.oldest_realm_epoch); + if (cursor) { + return cursor; + } else { + cursor = find_oldest_period(dpp, y); + state.oldest_realm_epoch = cursor.get_epoch(); + state.oldest_period_id = cursor.get_period().get_id(); + ldpp_dout(dpp, 10) << "rewriting mdlog history" << dendl; + ret = write_history(dpp, state, &objv, y); + if (ret < 0 && ret != -ECANCELED) { + ldpp_dout(dpp, 1) << "failed to write mdlog history: " + << cpp_strerror(ret) << dendl; + return Cursor{ret}; + } + return cursor; + } + + // pull the oldest period by id + RGWPeriod period; + ret = period_puller->pull(dpp, state.oldest_period_id, period, y); + if (ret < 0) { + ldpp_dout(dpp, 1) << "failed to read period id=" << state.oldest_period_id + << " for mdlog history: " << cpp_strerror(ret) << dendl; + return Cursor{ret}; + } + // verify its realm_epoch + if (period.get_realm_epoch() != state.oldest_realm_epoch) { + ldpp_dout(dpp, 1) << "inconsistent mdlog history: read period id=" + << period.get_id() << " with realm_epoch=" << period.get_realm_epoch() + << ", expected realm_epoch=" << state.oldest_realm_epoch << dendl; + return Cursor{-EINVAL}; + } + // attach the period to our history + return period_history->attach(dpp, std::move(period), y); +} + +Cursor RGWSI_MDLog::read_oldest_log_period(optional_yield y, const DoutPrefixProvider *dpp) const +{ + RGWMetadataLogHistory state; + int ret = read_history(&state, nullptr, y, dpp); + if (ret < 0) { + ldpp_dout(dpp, 1) << "failed to read mdlog history: " + << cpp_strerror(ret) << dendl; + return Cursor{ret}; + } + + ldpp_dout(dpp, 10) << "read mdlog history with oldest period id=" + << state.oldest_period_id << " realm_epoch=" + << state.oldest_realm_epoch << dendl; + + return period_history->lookup(state.oldest_realm_epoch); +} + +RGWCoroutine* RGWSI_MDLog::read_oldest_log_period_cr(const DoutPrefixProvider *dpp, + Cursor *period, RGWObjVersionTracker *objv) const +{ + return new mdlog::ReadHistoryCR(dpp, svc, period, objv); +} + +RGWCoroutine* RGWSI_MDLog::trim_log_period_cr(const DoutPrefixProvider *dpp, + Cursor period, RGWObjVersionTracker *objv) const +{ + return new mdlog::TrimHistoryCR(dpp, svc, period, objv); +} + +RGWMetadataLog* RGWSI_MDLog::get_log(const std::string& period) +{ + // construct the period's log in place if it doesn't exist + auto insert = md_logs.emplace(std::piecewise_construct, + std::forward_as_tuple(period), + std::forward_as_tuple(cct, svc.zone, svc.cls, period)); + return &insert.first->second; +} + +int RGWSI_MDLog::add_entry(const DoutPrefixProvider *dpp, const string& hash_key, const string& section, const string& key, bufferlist& bl) +{ + ceph_assert(current_log); // must have called init() + return current_log->add_entry(dpp, hash_key, section, key, bl); +} + +int RGWSI_MDLog::get_shard_id(const string& hash_key, int *shard_id) +{ + ceph_assert(current_log); // must have called init() + return current_log->get_shard_id(hash_key, shard_id); +} + +int RGWSI_MDLog::pull_period(const DoutPrefixProvider *dpp, const std::string& period_id, RGWPeriod& period, + optional_yield y) +{ + return period_puller->pull(dpp, period_id, period, y); +} + diff --git a/src/rgw/services/svc_mdlog.h b/src/rgw/services/svc_mdlog.h new file mode 100644 index 000000000..57103efb4 --- /dev/null +++ b/src/rgw/services/svc_mdlog.h @@ -0,0 +1,118 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#pragma once + +#include "rgw/rgw_service.h" +#include "rgw/rgw_period_history.h" +#include "rgw/rgw_period_puller.h" + +#include "svc_meta_be.h" + + +class RGWMetadataLog; +class RGWMetadataLogHistory; +class RGWCoroutine; + +class RGWSI_Zone; +class RGWSI_SysObj; +class RGWSI_RADOS; + +namespace mdlog { + class ReadHistoryCR; + class WriteHistoryCR; +} + +class RGWSI_MDLog : public RGWServiceInstance +{ + friend class mdlog::ReadHistoryCR; + friend class mdlog::WriteHistoryCR; + + // maintain a separate metadata log for each period + std::map<std::string, RGWMetadataLog> md_logs; + + // use the current period's log for mutating operations + RGWMetadataLog* current_log{nullptr}; + + bool run_sync; + + // pulls missing periods for period_history + std::unique_ptr<RGWPeriodPuller> period_puller; + // maintains a connected history of periods + std::unique_ptr<RGWPeriodHistory> period_history; + +public: + RGWSI_MDLog(CephContext *cct, bool run_sync); + virtual ~RGWSI_MDLog(); + + struct Svc { + RGWSI_RADOS *rados{nullptr}; + RGWSI_Zone *zone{nullptr}; + RGWSI_SysObj *sysobj{nullptr}; + RGWSI_MDLog *mdlog{nullptr}; + RGWSI_Cls *cls{nullptr}; + } svc; + + int init(RGWSI_RADOS *_rados_svc, + RGWSI_Zone *_zone_svc, + RGWSI_SysObj *_sysobj_svc, + RGWSI_Cls *_cls_svc); + + int do_start(optional_yield y, const DoutPrefixProvider *dpp) override; + + // traverse all the way back to the beginning of the period history, and + // return a cursor to the first period in a fully attached history + RGWPeriodHistory::Cursor find_oldest_period(const DoutPrefixProvider *dpp, optional_yield y); + + /// initialize the oldest log period if it doesn't exist, and attach it to + /// our current history + RGWPeriodHistory::Cursor init_oldest_log_period(optional_yield y, const DoutPrefixProvider *dpp); + + /// read the oldest log period, and return a cursor to it in our existing + /// period history + RGWPeriodHistory::Cursor read_oldest_log_period(optional_yield y, const DoutPrefixProvider *dpp) const; + + /// read the oldest log period asynchronously and write its result to the + /// given cursor pointer + RGWCoroutine* read_oldest_log_period_cr(const DoutPrefixProvider *dpp, + RGWPeriodHistory::Cursor *period, + RGWObjVersionTracker *objv) const; + + /// try to advance the oldest log period when the given period is trimmed, + /// using a rados lock to provide atomicity + RGWCoroutine* trim_log_period_cr(const DoutPrefixProvider *dpp, + RGWPeriodHistory::Cursor period, + RGWObjVersionTracker *objv) const; + int read_history(RGWMetadataLogHistory *state, RGWObjVersionTracker *objv_tracker,optional_yield y, const DoutPrefixProvider *dpp) const; + int write_history(const DoutPrefixProvider *dpp, + const RGWMetadataLogHistory& state, + RGWObjVersionTracker *objv_tracker, + optional_yield y, bool exclusive = false); + + int add_entry(const DoutPrefixProvider *dpp, const string& hash_key, const string& section, const string& key, bufferlist& bl); + + int get_shard_id(const string& hash_key, int *shard_id); + + RGWPeriodHistory *get_period_history() { + return period_history.get(); + } + + int pull_period(const DoutPrefixProvider *dpp, const std::string& period_id, RGWPeriod& period, optional_yield y); + + /// find or create the metadata log for the given period + RGWMetadataLog* get_log(const std::string& period); +}; + diff --git a/src/rgw/services/svc_meta.cc b/src/rgw/services/svc_meta.cc new file mode 100644 index 000000000..a4ca76420 --- /dev/null +++ b/src/rgw/services/svc_meta.cc @@ -0,0 +1,45 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + + +#include "svc_meta.h" + +#include "rgw/rgw_metadata.h" + +#define dout_subsys ceph_subsys_rgw + + +RGWSI_Meta::RGWSI_Meta(CephContext *cct) : RGWServiceInstance(cct) { +} + +RGWSI_Meta::~RGWSI_Meta() {} + +void RGWSI_Meta::init(RGWSI_SysObj *_sysobj_svc, + RGWSI_MDLog *_mdlog_svc, + vector<RGWSI_MetaBackend *>& _be_svc) +{ + sysobj_svc = _sysobj_svc; + mdlog_svc = _mdlog_svc; + + for (auto& be : _be_svc) { + be_svc[be->get_type()] = be; + } +} + +int RGWSI_Meta::create_be_handler(RGWSI_MetaBackend::Type be_type, + RGWSI_MetaBackend_Handler **phandler) +{ + auto iter = be_svc.find(be_type); + if (iter == be_svc.end()) { + ldout(cct, 0) << __func__ << "(): ERROR: backend type not found" << dendl; + return -EINVAL; + } + + auto handler = iter->second->alloc_be_handler(); + + be_handlers.emplace_back(handler); + *phandler = handler; + + return 0; +} + diff --git a/src/rgw/services/svc_meta.h b/src/rgw/services/svc_meta.h new file mode 100644 index 000000000..ef3d6ebbd --- /dev/null +++ b/src/rgw/services/svc_meta.h @@ -0,0 +1,48 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#pragma once + +#include "svc_meta_be.h" + +#include "rgw/rgw_service.h" + + +class RGWMetadataLog; +class RGWCoroutine; + + +class RGWSI_Meta : public RGWServiceInstance +{ + RGWSI_SysObj *sysobj_svc{nullptr}; + RGWSI_MDLog *mdlog_svc{nullptr}; + + map<RGWSI_MetaBackend::Type, RGWSI_MetaBackend *> be_svc; + + vector<unique_ptr<RGWSI_MetaBackend_Handler> > be_handlers; + +public: + RGWSI_Meta(CephContext *cct); + ~RGWSI_Meta(); + + void init(RGWSI_SysObj *_sysobj_svc, + RGWSI_MDLog *_mdlog_svc, + vector<RGWSI_MetaBackend *>& _be_svc); + + int create_be_handler(RGWSI_MetaBackend::Type be_type, + RGWSI_MetaBackend_Handler **phandler); +}; + diff --git a/src/rgw/services/svc_meta_be.cc b/src/rgw/services/svc_meta_be.cc new file mode 100644 index 000000000..0d4daffe2 --- /dev/null +++ b/src/rgw/services/svc_meta_be.cc @@ -0,0 +1,191 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + + +#include "svc_meta_be.h" + +#include "rgw/rgw_mdlog.h" + +#define dout_subsys ceph_subsys_rgw + + +RGWSI_MetaBackend::Context::~Context() {} // needed, even though destructor is pure virtual +RGWSI_MetaBackend::Module::~Module() {} // ditto +RGWSI_MetaBackend::PutParams::~PutParams() {} // ... +RGWSI_MetaBackend::GetParams::~GetParams() {} // ... +RGWSI_MetaBackend::RemoveParams::~RemoveParams() {} // ... + +int RGWSI_MetaBackend::pre_modify(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const string& key, + RGWMetadataLogData& log_data, + RGWObjVersionTracker *objv_tracker, + RGWMDLogStatus op_type, + optional_yield y) +{ + /* if write version has not been set, and there's a read version, set it so that we can + * log it + */ + if (objv_tracker && + objv_tracker->read_version.ver && !objv_tracker->write_version.ver) { + objv_tracker->write_version = objv_tracker->read_version; + objv_tracker->write_version.ver++; + } + + return 0; +} + +int RGWSI_MetaBackend::post_modify(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const string& key, + RGWMetadataLogData& log_data, + RGWObjVersionTracker *objv_tracker, int ret, + optional_yield y) +{ + return ret; +} + +int RGWSI_MetaBackend::prepare_mutate(RGWSI_MetaBackend::Context *ctx, + const string& key, + const real_time& mtime, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + real_time orig_mtime; + + int ret = call_with_get_params(&orig_mtime, [&](GetParams& params) { + return get_entry(ctx, key, params, objv_tracker, y, dpp); + }); + if (ret < 0 && ret != -ENOENT) { + return ret; + } + + if (objv_tracker->write_version.tag.empty()) { + if (objv_tracker->read_version.tag.empty()) { + objv_tracker->generate_new_write_ver(cct); + } else { + objv_tracker->write_version = objv_tracker->read_version; + objv_tracker->write_version.ver++; + } + } + return 0; +} + +int RGWSI_MetaBackend::do_mutate(RGWSI_MetaBackend::Context *ctx, + const string& key, + const ceph::real_time& mtime, + RGWObjVersionTracker *objv_tracker, + RGWMDLogStatus op_type, + optional_yield y, + std::function<int()> f, + bool generic_prepare, + const DoutPrefixProvider *dpp) +{ + int ret; + + if (generic_prepare) { + ret = prepare_mutate(ctx, key, mtime, objv_tracker, y, dpp); + if (ret < 0 || + ret == STATUS_NO_APPLY) { + return ret; + } + } + + RGWMetadataLogData log_data; + ret = pre_modify(dpp, ctx, key, log_data, objv_tracker, op_type, y); + if (ret < 0) { + return ret; + } + + ret = f(); + + /* cascading ret into post_modify() */ + + ret = post_modify(dpp, ctx, key, log_data, objv_tracker, ret, y); + if (ret < 0) + return ret; + + return 0; +} + +int RGWSI_MetaBackend::get(Context *ctx, + const string& key, + GetParams& params, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + return get_entry(ctx, key, params, objv_tracker, y, dpp); +} + +int RGWSI_MetaBackend::put(Context *ctx, + const string& key, + PutParams& params, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + std::function<int()> f = [&]() { + return put_entry(dpp, ctx, key, params, objv_tracker, y); + }; + + return do_mutate(ctx, key, params.mtime, objv_tracker, + MDLOG_STATUS_WRITE, + y, + f, + false, + dpp); +} + +int RGWSI_MetaBackend::remove(Context *ctx, + const string& key, + RemoveParams& params, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + std::function<int()> f = [&]() { + return remove_entry(dpp, ctx, key, params, objv_tracker, y); + }; + + return do_mutate(ctx, key, params.mtime, objv_tracker, + MDLOG_STATUS_REMOVE, + y, + f, + false, + dpp); +} + +int RGWSI_MetaBackend::mutate(Context *ctx, + const std::string& key, + MutateParams& params, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + std::function<int()> f, + const DoutPrefixProvider *dpp) +{ + return do_mutate(ctx, key, params.mtime, objv_tracker, + params.op_type, y, + f, + false, + dpp); +} + +int RGWSI_MetaBackend_Handler::call(std::optional<RGWSI_MetaBackend_CtxParams> bectx_params, + std::function<int(Op *)> f) +{ + return be->call(bectx_params, [&](RGWSI_MetaBackend::Context *ctx) { + ctx->init(this); + Op op(be, ctx); + return f(&op); + }); +} + +RGWSI_MetaBackend_Handler::Op_ManagedCtx::Op_ManagedCtx(RGWSI_MetaBackend_Handler *handler) : Op(handler->be, handler->be->alloc_ctx()) +{ + auto c = ctx(); + c->init(handler); + pctx.reset(c); +} + diff --git a/src/rgw/services/svc_meta_be.h b/src/rgw/services/svc_meta_be.h new file mode 100644 index 000000000..af749d497 --- /dev/null +++ b/src/rgw/services/svc_meta_be.h @@ -0,0 +1,291 @@ + +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#pragma once + +#include "svc_meta_be_params.h" + +#include "rgw/rgw_service.h" +#include "rgw/rgw_mdlog_types.h" + +class RGWMetadataLogData; + +class RGWSI_MDLog; +class RGWSI_Meta; +class RGWObjVersionTracker; +class RGWSI_MetaBackend_Handler; + +class RGWSI_MetaBackend : public RGWServiceInstance +{ + friend class RGWSI_Meta; +public: + class Module; + class Context; +protected: + RGWSI_MDLog *mdlog_svc{nullptr}; + + void base_init(RGWSI_MDLog *_mdlog_svc) { + mdlog_svc = _mdlog_svc; + } + + int prepare_mutate(RGWSI_MetaBackend::Context *ctx, + const std::string& key, + const ceph::real_time& mtime, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp); + + virtual int do_mutate(Context *ctx, + const std::string& key, + const ceph::real_time& mtime, RGWObjVersionTracker *objv_tracker, + RGWMDLogStatus op_type, + optional_yield y, + std::function<int()> f, + bool generic_prepare, + const DoutPrefixProvider *dpp); + + virtual int pre_modify(const DoutPrefixProvider *dpp, + Context *ctx, + const std::string& key, + RGWMetadataLogData& log_data, + RGWObjVersionTracker *objv_tracker, + RGWMDLogStatus op_type, + optional_yield y); + virtual int post_modify(const DoutPrefixProvider *dpp, + Context *ctx, + const std::string& key, + RGWMetadataLogData& log_data, + RGWObjVersionTracker *objv_tracker, int ret, + optional_yield y); +public: + class Module { + /* + * Backend specialization module + */ + public: + virtual ~Module() = 0; + }; + + using ModuleRef = std::shared_ptr<Module>; + + struct Context { /* + * A single metadata operation context. Will be holding info about + * backend and operation itself; operation might span multiple backend + * calls. + */ + virtual ~Context() = 0; + + virtual void init(RGWSI_MetaBackend_Handler *h) = 0; + }; + + virtual Context *alloc_ctx() = 0; + + struct PutParams { + ceph::real_time mtime; + + PutParams() {} + PutParams(const ceph::real_time& _mtime) : mtime(_mtime) {} + virtual ~PutParams() = 0; + }; + + struct GetParams { + GetParams() {} + GetParams(ceph::real_time *_pmtime) : pmtime(_pmtime) {} + virtual ~GetParams(); + + ceph::real_time *pmtime{nullptr}; + }; + + struct RemoveParams { + virtual ~RemoveParams() = 0; + + ceph::real_time mtime; + }; + + struct MutateParams { + ceph::real_time mtime; + RGWMDLogStatus op_type; + + MutateParams() {} + MutateParams(const ceph::real_time& _mtime, + RGWMDLogStatus _op_type) : mtime(_mtime), op_type(_op_type) {} + virtual ~MutateParams() {} + }; + + enum Type { + MDBE_SOBJ = 0, + MDBE_OTP = 1, + }; + + RGWSI_MetaBackend(CephContext *cct) : RGWServiceInstance(cct) {} + virtual ~RGWSI_MetaBackend() {} + + virtual Type get_type() = 0; + + virtual RGWSI_MetaBackend_Handler *alloc_be_handler() = 0; + virtual int call_with_get_params(ceph::real_time *pmtime, std::function<int(RGWSI_MetaBackend::GetParams&)>) = 0; + + /* these should be implemented by backends */ + virtual int get_entry(RGWSI_MetaBackend::Context *ctx, + const std::string& key, + RGWSI_MetaBackend::GetParams& params, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) = 0; + virtual int put_entry(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const std::string& key, + RGWSI_MetaBackend::PutParams& params, + RGWObjVersionTracker *objv_tracker, + optional_yield y) = 0; + virtual int remove_entry(const DoutPrefixProvider *dpp, + Context *ctx, + const std::string& key, + RGWSI_MetaBackend::RemoveParams& params, + RGWObjVersionTracker *objv_tracker, + optional_yield y) = 0; + + virtual int list_init(const DoutPrefixProvider *dpp, RGWSI_MetaBackend::Context *ctx, const string& marker) = 0; + virtual int list_next(RGWSI_MetaBackend::Context *ctx, + int max, list<string> *keys, + bool *truncated) = 0; + virtual int list_get_marker(RGWSI_MetaBackend::Context *ctx, + string *marker) = 0; + + int call(std::function<int(RGWSI_MetaBackend::Context *)> f) { + return call(nullopt, f); + } + + virtual int call(std::optional<RGWSI_MetaBackend_CtxParams> opt, + std::function<int(RGWSI_MetaBackend::Context *)> f) = 0; + + virtual int get_shard_id(RGWSI_MetaBackend::Context *ctx, + const std::string& key, + int *shard_id) = 0; + + /* higher level */ + virtual int get(Context *ctx, + const std::string& key, + GetParams ¶ms, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp); + + virtual int put(Context *ctx, + const std::string& key, + PutParams& params, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp); + + virtual int remove(Context *ctx, + const std::string& key, + RemoveParams& params, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp); + + virtual int mutate(Context *ctx, + const std::string& key, + MutateParams& params, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + std::function<int()> f, + const DoutPrefixProvider *dpp); +}; + +class RGWSI_MetaBackend_Handler { + RGWSI_MetaBackend *be{nullptr}; + +public: + class Op { + friend class RGWSI_MetaBackend_Handler; + + RGWSI_MetaBackend *be; + RGWSI_MetaBackend::Context *be_ctx; + + Op(RGWSI_MetaBackend *_be, + RGWSI_MetaBackend::Context *_ctx) : be(_be), be_ctx(_ctx) {} + + public: + RGWSI_MetaBackend::Context *ctx() { + return be_ctx; + } + + int get(const std::string& key, + RGWSI_MetaBackend::GetParams ¶ms, + RGWObjVersionTracker *objv_tracker, + optional_yield y, const DoutPrefixProvider *dpp) { + return be->get(be_ctx, key, params, objv_tracker, y, dpp); + } + + int put(const std::string& key, + RGWSI_MetaBackend::PutParams& params, + RGWObjVersionTracker *objv_tracker, + optional_yield y, const DoutPrefixProvider *dpp) { + return be->put(be_ctx, key, params, objv_tracker, y, dpp); + } + + int remove(const std::string& key, + RGWSI_MetaBackend::RemoveParams& params, + RGWObjVersionTracker *objv_tracker, + optional_yield y, const DoutPrefixProvider *dpp) { + return be->remove(be_ctx, key, params, objv_tracker, y, dpp); + } + + int mutate(const std::string& key, + RGWSI_MetaBackend::MutateParams& params, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + std::function<int()> f, + const DoutPrefixProvider *dpp) { + return be->mutate(be_ctx, key, params, objv_tracker, y, f, dpp); + } + + int list_init(const DoutPrefixProvider *dpp, const string& marker) { + return be->list_init(dpp, be_ctx, marker); + } + int list_next(int max, list<string> *keys, + bool *truncated) { + return be->list_next(be_ctx, max, keys, truncated); + } + int list_get_marker(string *marker) { + return be->list_get_marker(be_ctx, marker); + } + + int get_shard_id(const std::string& key, int *shard_id) { + return be->get_shard_id(be_ctx, key, shard_id); + } + }; + + class Op_ManagedCtx : public Op { + std::unique_ptr<RGWSI_MetaBackend::Context> pctx; + public: + Op_ManagedCtx(RGWSI_MetaBackend_Handler *handler); + }; + + RGWSI_MetaBackend_Handler(RGWSI_MetaBackend *_be) : be(_be) {} + virtual ~RGWSI_MetaBackend_Handler() {} + + int call(std::function<int(Op *)> f) { + return call(nullopt, f); + } + + virtual int call(std::optional<RGWSI_MetaBackend_CtxParams> bectx_params, + std::function<int(Op *)> f); +}; + diff --git a/src/rgw/services/svc_meta_be_otp.cc b/src/rgw/services/svc_meta_be_otp.cc new file mode 100644 index 000000000..1800f8a3c --- /dev/null +++ b/src/rgw/services/svc_meta_be_otp.cc @@ -0,0 +1,71 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "svc_meta_be_otp.h" + +#include "rgw/rgw_tools.h" +#include "rgw/rgw_metadata.h" +#include "rgw/rgw_mdlog.h" + +#define dout_subsys ceph_subsys_rgw + + +RGWSI_MetaBackend_OTP::RGWSI_MetaBackend_OTP(CephContext *cct) : RGWSI_MetaBackend_SObj(cct) { +} + +RGWSI_MetaBackend_OTP::~RGWSI_MetaBackend_OTP() { +} + +string RGWSI_MetaBackend_OTP::get_meta_key(const rgw_user& user) +{ + return string("otp:user:") + user.to_str(); +} + +RGWSI_MetaBackend_Handler *RGWSI_MetaBackend_OTP::alloc_be_handler() +{ + return new RGWSI_MetaBackend_Handler_OTP(this); +} + +RGWSI_MetaBackend::Context *RGWSI_MetaBackend_OTP::alloc_ctx() +{ + return new Context_OTP(sysobj_svc); +} + +int RGWSI_MetaBackend_OTP::call_with_get_params(ceph::real_time *pmtime, std::function<int(RGWSI_MetaBackend::GetParams&)> cb) +{ + otp_devices_list_t devices; + RGWSI_MBOTP_GetParams params; + params.pdevices = &devices; + params.pmtime = pmtime; + return cb(params); +} + +int RGWSI_MetaBackend_OTP::get_entry(RGWSI_MetaBackend::Context *_ctx, + const string& key, + RGWSI_MetaBackend::GetParams& _params, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + RGWSI_MBOTP_GetParams& params = static_cast<RGWSI_MBOTP_GetParams&>(_params); + + int r = cls_svc->mfa.list_mfa(dpp, key, params.pdevices, objv_tracker, params.pmtime, y); + if (r < 0) { + return r; + } + + return 0; +} + +int RGWSI_MetaBackend_OTP::put_entry(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *_ctx, + const string& key, + RGWSI_MetaBackend::PutParams& _params, + RGWObjVersionTracker *objv_tracker, + optional_yield y) +{ + RGWSI_MBOTP_PutParams& params = static_cast<RGWSI_MBOTP_PutParams&>(_params); + + return cls_svc->mfa.set_mfa(dpp, key, params.devices, true, objv_tracker, params.mtime, y); +} + diff --git a/src/rgw/services/svc_meta_be_otp.h b/src/rgw/services/svc_meta_be_otp.h new file mode 100644 index 000000000..9da97b024 --- /dev/null +++ b/src/rgw/services/svc_meta_be_otp.h @@ -0,0 +1,90 @@ + + +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#pragma once + +#include "rgw/rgw_service.h" + +#include "svc_cls.h" +#include "svc_meta_be.h" +#include "svc_meta_be_sobj.h" +#include "svc_sys_obj.h" + + +using RGWSI_MBOTP_Handler_Module = RGWSI_MBSObj_Handler_Module; +using RGWSI_MetaBackend_Handler_OTP = RGWSI_MetaBackend_Handler_SObj; + +using otp_devices_list_t = list<rados::cls::otp::otp_info_t>; + +struct RGWSI_MBOTP_GetParams : public RGWSI_MetaBackend::GetParams { + otp_devices_list_t *pdevices{nullptr}; +}; + +struct RGWSI_MBOTP_PutParams : public RGWSI_MetaBackend::PutParams { + otp_devices_list_t devices; +}; + +using RGWSI_MBOTP_RemoveParams = RGWSI_MBSObj_RemoveParams; + +class RGWSI_MetaBackend_OTP : public RGWSI_MetaBackend_SObj +{ + RGWSI_Cls *cls_svc{nullptr}; + +public: + struct Context_OTP : public RGWSI_MetaBackend_SObj::Context_SObj { + otp_devices_list_t devices; + + Context_OTP(RGWSI_SysObj*_sysobj_svc) : RGWSI_MetaBackend_SObj::Context_SObj(_sysobj_svc, nullptr) {} + }; + + RGWSI_MetaBackend_OTP(CephContext *cct); + virtual ~RGWSI_MetaBackend_OTP(); + + RGWSI_MetaBackend::Type get_type() { + return MDBE_OTP; + } + + static string get_meta_key(const rgw_user& user); + + void init(RGWSI_SysObj *_sysobj_svc, + RGWSI_MDLog *_mdlog_svc, + RGWSI_Cls *_cls_svc) { + RGWSI_MetaBackend_SObj::init(_sysobj_svc, _mdlog_svc); + cls_svc = _cls_svc; + } + + RGWSI_MetaBackend_Handler *alloc_be_handler() override; + RGWSI_MetaBackend::Context *alloc_ctx() override; + + int call_with_get_params(ceph::real_time *pmtime, std::function<int(RGWSI_MetaBackend::GetParams&)> cb) override; + + int get_entry(RGWSI_MetaBackend::Context *ctx, + const string& key, + RGWSI_MetaBackend::GetParams& _params, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp); + int put_entry(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const string& key, + RGWSI_MetaBackend::PutParams& _params, + RGWObjVersionTracker *objv_tracker, + optional_yield y); +}; + + diff --git a/src/rgw/services/svc_meta_be_params.h b/src/rgw/services/svc_meta_be_params.h new file mode 100644 index 000000000..4b0201e43 --- /dev/null +++ b/src/rgw/services/svc_meta_be_params.h @@ -0,0 +1,32 @@ + + +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#pragma once + +#include <variant> + +struct RGWSysObjectCtx; + +struct RGWSI_MetaBackend_CtxParams_SObj { + RGWSysObjectCtx *sysobj_ctx{nullptr}; + + RGWSI_MetaBackend_CtxParams_SObj() {} + RGWSI_MetaBackend_CtxParams_SObj(RGWSysObjectCtx * _sysobj_ctx) : sysobj_ctx(_sysobj_ctx) {} +}; + +using RGWSI_MetaBackend_CtxParams = std::variant<RGWSI_MetaBackend_CtxParams_SObj>; diff --git a/src/rgw/services/svc_meta_be_sobj.cc b/src/rgw/services/svc_meta_be_sobj.cc new file mode 100644 index 000000000..253e509ca --- /dev/null +++ b/src/rgw/services/svc_meta_be_sobj.cc @@ -0,0 +1,257 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "svc_meta_be_sobj.h" +#include "svc_meta_be_params.h" +#include "svc_mdlog.h" + +#include "rgw/rgw_tools.h" +#include "rgw/rgw_metadata.h" +#include "rgw/rgw_mdlog.h" + +#define dout_subsys ceph_subsys_rgw + + +RGWSI_MetaBackend_SObj::RGWSI_MetaBackend_SObj(CephContext *cct) : RGWSI_MetaBackend(cct) { +} + +RGWSI_MetaBackend_SObj::~RGWSI_MetaBackend_SObj() { +} + +RGWSI_MetaBackend_Handler *RGWSI_MetaBackend_SObj::alloc_be_handler() +{ + return new RGWSI_MetaBackend_Handler_SObj(this); +} + +RGWSI_MetaBackend::Context *RGWSI_MetaBackend_SObj::alloc_ctx() +{ + return new Context_SObj(sysobj_svc); +} + +int RGWSI_MetaBackend_SObj::pre_modify(const DoutPrefixProvider *dpp, RGWSI_MetaBackend::Context *_ctx, + const string& key, + RGWMetadataLogData& log_data, + RGWObjVersionTracker *objv_tracker, + RGWMDLogStatus op_type, + optional_yield y) +{ + auto ctx = static_cast<Context_SObj *>(_ctx); + int ret = RGWSI_MetaBackend::pre_modify(dpp, ctx, key, log_data, + objv_tracker, op_type, + y); + if (ret < 0) { + return ret; + } + + /* if write version has not been set, and there's a read version, set it so that we can + * log it + */ + if (objv_tracker) { + log_data.read_version = objv_tracker->read_version; + log_data.write_version = objv_tracker->write_version; + } + + log_data.status = op_type; + + bufferlist logbl; + encode(log_data, logbl); + + ret = mdlog_svc->add_entry(dpp, ctx->module->get_hash_key(key), ctx->module->get_section(), key, logbl); + if (ret < 0) + return ret; + + return 0; +} + +int RGWSI_MetaBackend_SObj::post_modify(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *_ctx, + const string& key, + RGWMetadataLogData& log_data, + RGWObjVersionTracker *objv_tracker, int ret, + optional_yield y) +{ + auto ctx = static_cast<Context_SObj *>(_ctx); + if (ret >= 0) + log_data.status = MDLOG_STATUS_COMPLETE; + else + log_data.status = MDLOG_STATUS_ABORT; + + bufferlist logbl; + encode(log_data, logbl); + + int r = mdlog_svc->add_entry(dpp, ctx->module->get_hash_key(key), ctx->module->get_section(), key, logbl); + if (ret < 0) + return ret; + + if (r < 0) + return r; + + return RGWSI_MetaBackend::post_modify(dpp, ctx, key, log_data, objv_tracker, ret, y); +} + +int RGWSI_MetaBackend_SObj::get_shard_id(RGWSI_MetaBackend::Context *_ctx, + const std::string& key, + int *shard_id) +{ + auto ctx = static_cast<Context_SObj *>(_ctx); + *shard_id = mdlog_svc->get_shard_id(ctx->module->get_hash_key(key), shard_id); + return 0; +} + +int RGWSI_MetaBackend_SObj::call(std::optional<RGWSI_MetaBackend_CtxParams> opt, + std::function<int(RGWSI_MetaBackend::Context *)> f) +{ + if (!opt) { + RGWSI_MetaBackend_SObj::Context_SObj ctx(sysobj_svc); + return f(&ctx); + } + + try { + auto& opt_sobj = std::get<RGWSI_MetaBackend_CtxParams_SObj>(*opt); // w contains int, not float: will throw + + RGWSI_MetaBackend_SObj::Context_SObj ctx(sysobj_svc, opt_sobj.sysobj_ctx); + return f(&ctx); + } catch (const std::bad_variant_access&) { + ldout(cct, 0) << "ERROR: possible bug: " << __FILE__ << ":" << __LINE__ << ":" << __func__ << "(): bad variant access" << dendl; + } + + return -EINVAL; +} + +void RGWSI_MetaBackend_SObj::Context_SObj::init(RGWSI_MetaBackend_Handler *h) +{ + RGWSI_MetaBackend_Handler_SObj *handler = static_cast<RGWSI_MetaBackend_Handler_SObj *>(h); + module = handler->module; + if (!obj_ctx) { + _obj_ctx.emplace(sysobj_svc->init_obj_ctx()); + obj_ctx = &(*_obj_ctx); + } +} + +int RGWSI_MetaBackend_SObj::call_with_get_params(ceph::real_time *pmtime, std::function<int(RGWSI_MetaBackend::GetParams&)> cb) +{ + bufferlist bl; + RGWSI_MBSObj_GetParams params; + params.pmtime = pmtime; + params.pbl = &bl; + return cb(params); +} + +int RGWSI_MetaBackend_SObj::get_entry(RGWSI_MetaBackend::Context *_ctx, + const string& key, + GetParams& _params, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + RGWSI_MetaBackend_SObj::Context_SObj *ctx = static_cast<RGWSI_MetaBackend_SObj::Context_SObj *>(_ctx); + RGWSI_MBSObj_GetParams& params = static_cast<RGWSI_MBSObj_GetParams&>(_params); + + rgw_pool pool; + string oid; + ctx->module->get_pool_and_oid(key, &pool, &oid); + + return rgw_get_system_obj(*ctx->obj_ctx, pool, oid, *params.pbl, + objv_tracker, params.pmtime, + y, dpp, + params.pattrs, params.cache_info, + params.refresh_version); +} + +int RGWSI_MetaBackend_SObj::put_entry(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *_ctx, + const string& key, + PutParams& _params, + RGWObjVersionTracker *objv_tracker, + optional_yield y) +{ + RGWSI_MetaBackend_SObj::Context_SObj *ctx = static_cast<RGWSI_MetaBackend_SObj::Context_SObj *>(_ctx); + RGWSI_MBSObj_PutParams& params = static_cast<RGWSI_MBSObj_PutParams&>(_params); + + rgw_pool pool; + string oid; + ctx->module->get_pool_and_oid(key, &pool, &oid); + + return rgw_put_system_obj(dpp, *ctx->obj_ctx, pool, oid, params.bl, params.exclusive, + objv_tracker, params.mtime, y, params.pattrs); +} + +int RGWSI_MetaBackend_SObj::remove_entry(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *_ctx, + const string& key, + RemoveParams& params, + RGWObjVersionTracker *objv_tracker, + optional_yield y) +{ + RGWSI_MetaBackend_SObj::Context_SObj *ctx = static_cast<RGWSI_MetaBackend_SObj::Context_SObj *>(_ctx); + + rgw_pool pool; + string oid; + ctx->module->get_pool_and_oid(key, &pool, &oid); + rgw_raw_obj k(pool, oid); + + auto sysobj = ctx->obj_ctx->get_obj(k); + return sysobj.wop() + .set_objv_tracker(objv_tracker) + .remove(dpp, y); +} + +int RGWSI_MetaBackend_SObj::list_init(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *_ctx, + const string& marker) +{ + RGWSI_MetaBackend_SObj::Context_SObj *ctx = static_cast<RGWSI_MetaBackend_SObj::Context_SObj *>(_ctx); + + rgw_pool pool; + + string no_key; + ctx->module->get_pool_and_oid(no_key, &pool, nullptr); + + ctx->list.pool = sysobj_svc->get_pool(pool); + ctx->list.op.emplace(ctx->list.pool->op()); + + string prefix = ctx->module->get_oid_prefix(); + ctx->list.op->init(dpp, marker, prefix); + + return 0; +} + +int RGWSI_MetaBackend_SObj::list_next(RGWSI_MetaBackend::Context *_ctx, + int max, list<string> *keys, + bool *truncated) +{ + RGWSI_MetaBackend_SObj::Context_SObj *ctx = static_cast<RGWSI_MetaBackend_SObj::Context_SObj *>(_ctx); + + vector<string> oids; + + keys->clear(); + + int ret = ctx->list.op->get_next(max, &oids, truncated); + if (ret < 0 && ret != -ENOENT) + return ret; + if (ret == -ENOENT) { + if (truncated) + *truncated = false; + return 0; + } + + auto module = ctx->module; + + for (auto& o : oids) { + if (!module->is_valid_oid(o)) { + continue; + } + keys->emplace_back(module->oid_to_key(o)); + } + + return 0; +} + +int RGWSI_MetaBackend_SObj::list_get_marker(RGWSI_MetaBackend::Context *_ctx, + string *marker) +{ + RGWSI_MetaBackend_SObj::Context_SObj *ctx = static_cast<RGWSI_MetaBackend_SObj::Context_SObj *>(_ctx); + + return ctx->list.op->get_marker(marker); +} + diff --git a/src/rgw/services/svc_meta_be_sobj.h b/src/rgw/services/svc_meta_be_sobj.h new file mode 100644 index 000000000..8c5660a6d --- /dev/null +++ b/src/rgw/services/svc_meta_be_sobj.h @@ -0,0 +1,200 @@ + + +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#pragma once + +#include "rgw/rgw_service.h" + +#include "svc_meta_be.h" +#include "svc_sys_obj.h" + + +class RGWSI_MBSObj_Handler_Module : public RGWSI_MetaBackend::Module { +protected: + string section; +public: + RGWSI_MBSObj_Handler_Module(const string& _section) : section(_section) {} + virtual void get_pool_and_oid(const std::string& key, rgw_pool *pool, std::string *oid) = 0; + virtual const std::string& get_oid_prefix() = 0; + virtual std::string key_to_oid(const std::string& key) = 0; + virtual bool is_valid_oid(const std::string& oid) = 0; + virtual std::string oid_to_key(const std::string& oid) = 0; + + const std::string& get_section() { + return section; + } + + /* key to use for hashing entries for log shard placement */ + virtual std::string get_hash_key(const std::string& key) { + return section + ":" + key; + } +}; + +struct RGWSI_MBSObj_GetParams : public RGWSI_MetaBackend::GetParams { + bufferlist *pbl{nullptr}; + map<string, bufferlist> *pattrs{nullptr}; + rgw_cache_entry_info *cache_info{nullptr}; + boost::optional<obj_version> refresh_version; + + RGWSI_MBSObj_GetParams() {} + RGWSI_MBSObj_GetParams(bufferlist *_pbl, + std::map<string, bufferlist> *_pattrs, + ceph::real_time *_pmtime) : RGWSI_MetaBackend::GetParams(_pmtime), + pbl(_pbl), + pattrs(_pattrs) {} + + RGWSI_MBSObj_GetParams& set_cache_info(rgw_cache_entry_info *_cache_info) { + cache_info = _cache_info; + return *this; + } + RGWSI_MBSObj_GetParams& set_refresh_version(boost::optional<obj_version>& _refresh_version) { + refresh_version = _refresh_version; + return *this; + } +}; + +struct RGWSI_MBSObj_PutParams : public RGWSI_MetaBackend::PutParams { + bufferlist bl; + map<string, bufferlist> *pattrs{nullptr}; + bool exclusive{false}; + + RGWSI_MBSObj_PutParams() {} + RGWSI_MBSObj_PutParams(std::map<string, bufferlist> *_pattrs, + const ceph::real_time& _mtime) : RGWSI_MetaBackend::PutParams(_mtime), + pattrs(_pattrs) {} + RGWSI_MBSObj_PutParams(bufferlist& _bl, + std::map<string, bufferlist> *_pattrs, + const ceph::real_time& _mtime, + bool _exclusive) : RGWSI_MetaBackend::PutParams(_mtime), + bl(_bl), + pattrs(_pattrs), + exclusive(_exclusive) {} +}; + +struct RGWSI_MBSObj_RemoveParams : public RGWSI_MetaBackend::RemoveParams { +}; + +class RGWSI_MetaBackend_SObj : public RGWSI_MetaBackend +{ +protected: + RGWSI_SysObj *sysobj_svc{nullptr}; + +public: + struct Context_SObj : public RGWSI_MetaBackend::Context { + RGWSI_SysObj *sysobj_svc{nullptr}; + + RGWSI_MBSObj_Handler_Module *module{nullptr}; + std::optional<RGWSysObjectCtx> _obj_ctx; + RGWSysObjectCtx *obj_ctx{nullptr}; + struct _list { + std::optional<RGWSI_SysObj::Pool> pool; + std::optional<RGWSI_SysObj::Pool::Op> op; + } list; + + Context_SObj(RGWSI_SysObj *_sysobj_svc, + RGWSysObjectCtx *_oc = nullptr) : sysobj_svc(_sysobj_svc), + obj_ctx(_oc) {} + + void init(RGWSI_MetaBackend_Handler *h) override; + }; + + RGWSI_MetaBackend_SObj(CephContext *cct); + virtual ~RGWSI_MetaBackend_SObj(); + + RGWSI_MetaBackend::Type get_type() { + return MDBE_SOBJ; + } + + void init(RGWSI_SysObj *_sysobj_svc, + RGWSI_MDLog *_mdlog_svc) { + base_init(_mdlog_svc); + sysobj_svc = _sysobj_svc; + } + + RGWSI_MetaBackend_Handler *alloc_be_handler() override; + RGWSI_MetaBackend::Context *alloc_ctx() override; + + + int call_with_get_params(ceph::real_time *pmtime, std::function<int(RGWSI_MetaBackend::GetParams&)> cb) override; + + int pre_modify(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const string& key, + RGWMetadataLogData& log_data, + RGWObjVersionTracker *objv_tracker, + RGWMDLogStatus op_type, + optional_yield y); + int post_modify(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const string& key, + RGWMetadataLogData& log_data, + RGWObjVersionTracker *objv_tracker, int ret, + optional_yield y); + + int get_entry(RGWSI_MetaBackend::Context *ctx, + const string& key, + RGWSI_MetaBackend::GetParams& params, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) override; + int put_entry(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const string& key, + RGWSI_MetaBackend::PutParams& params, + RGWObjVersionTracker *objv_tracker, + optional_yield y) override; + int remove_entry(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const string& key, + RGWSI_MetaBackend::RemoveParams& params, + RGWObjVersionTracker *objv_tracker, + optional_yield y) override; + + int list_init(const DoutPrefixProvider *dpp, RGWSI_MetaBackend::Context *_ctx, const string& marker) override; + int list_next(RGWSI_MetaBackend::Context *_ctx, + int max, list<string> *keys, + bool *truncated) override; + int list_get_marker(RGWSI_MetaBackend::Context *ctx, + string *marker) override; + + int get_shard_id(RGWSI_MetaBackend::Context *ctx, + const std::string& key, + int *shard_id) override; + + int call(std::optional<RGWSI_MetaBackend_CtxParams> opt, + std::function<int(RGWSI_MetaBackend::Context *)> f) override; +}; + + +class RGWSI_MetaBackend_Handler_SObj : public RGWSI_MetaBackend_Handler { + friend class RGWSI_MetaBackend_SObj::Context_SObj; + + RGWSI_MBSObj_Handler_Module *module{nullptr}; + +public: + RGWSI_MetaBackend_Handler_SObj(RGWSI_MetaBackend *be) : + RGWSI_MetaBackend_Handler(be) {} + + void set_module(RGWSI_MBSObj_Handler_Module *_module) { + module = _module; + } + + RGWSI_MBSObj_Handler_Module *get_module() { + return module; + } +}; diff --git a/src/rgw/services/svc_meta_be_types.h b/src/rgw/services/svc_meta_be_types.h new file mode 100644 index 000000000..4a88a8e0b --- /dev/null +++ b/src/rgw/services/svc_meta_be_types.h @@ -0,0 +1,26 @@ + +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#pragma once + +enum RGWSI_META_BE_TYPES { + SOBJ = 1, + OTP = 2, + BUCKET = 3, + BI = 4, + USER = 5, +}; + diff --git a/src/rgw/services/svc_notify.cc b/src/rgw/services/svc_notify.cc new file mode 100644 index 000000000..c634934e4 --- /dev/null +++ b/src/rgw/services/svc_notify.cc @@ -0,0 +1,442 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "include/random.h" +#include "include/Context.h" +#include "common/errno.h" + +#include "rgw/rgw_cache.h" +#include "svc_notify.h" +#include "svc_finisher.h" +#include "svc_zone.h" +#include "svc_rados.h" + +#include "rgw/rgw_zone.h" + +#define dout_subsys ceph_subsys_rgw + +static string notify_oid_prefix = "notify"; + +class RGWWatcher : public DoutPrefixProvider , public librados::WatchCtx2 { + CephContext *cct; + RGWSI_Notify *svc; + int index; + RGWSI_RADOS::Obj obj; + uint64_t watch_handle; + int register_ret{0}; + librados::AioCompletion *register_completion{nullptr}; + + class C_ReinitWatch : public Context { + RGWWatcher *watcher; + public: + explicit C_ReinitWatch(RGWWatcher *_watcher) : watcher(_watcher) {} + void finish(int r) override { + watcher->reinit(); + } + }; + + CephContext *get_cct() const override { return cct; } + unsigned get_subsys() const override { return dout_subsys; } + std::ostream& gen_prefix(std::ostream& out) const override { + return out << "rgw watcher librados: "; + } + +public: + RGWWatcher(CephContext *_cct, RGWSI_Notify *s, int i, RGWSI_RADOS::Obj& o) : cct(_cct), svc(s), index(i), obj(o), watch_handle(0) {} + void handle_notify(uint64_t notify_id, + uint64_t cookie, + uint64_t notifier_id, + bufferlist& bl) override { + ldpp_dout(this, 10) << "RGWWatcher::handle_notify() " + << " notify_id " << notify_id + << " cookie " << cookie + << " notifier " << notifier_id + << " bl.length()=" << bl.length() << dendl; + + if (unlikely(svc->inject_notify_timeout_probability == 1) || + (svc->inject_notify_timeout_probability > 0 && + (svc->inject_notify_timeout_probability > + ceph::util::generate_random_number(0.0, 1.0)))) { + ldpp_dout(this, 0) + << "RGWWatcher::handle_notify() dropping notification! " + << "If this isn't what you want, set " + << "rgw_inject_notify_timeout_probability to zero!" << dendl; + return; + } + + svc->watch_cb(this, notify_id, cookie, notifier_id, bl); + + bufferlist reply_bl; // empty reply payload + obj.notify_ack(notify_id, cookie, reply_bl); + } + void handle_error(uint64_t cookie, int err) override { + lderr(cct) << "RGWWatcher::handle_error cookie " << cookie + << " err " << cpp_strerror(err) << dendl; + svc->remove_watcher(index); + svc->schedule_context(new C_ReinitWatch(this)); + } + + void reinit() { + int ret = unregister_watch(); + if (ret < 0) { + ldout(cct, 0) << "ERROR: unregister_watch() returned ret=" << ret << dendl; + return; + } + ret = register_watch(); + if (ret < 0) { + ldout(cct, 0) << "ERROR: register_watch() returned ret=" << ret << dendl; + return; + } + } + + int unregister_watch() { + int r = svc->unwatch(obj, watch_handle); + if (r < 0) { + return r; + } + svc->remove_watcher(index); + return 0; + } + + int register_watch_async() { + if (register_completion) { + register_completion->release(); + register_completion = nullptr; + } + register_completion = librados::Rados::aio_create_completion(nullptr, nullptr); + register_ret = obj.aio_watch(register_completion, &watch_handle, this); + if (register_ret < 0) { + register_completion->release(); + return register_ret; + } + return 0; + } + + int register_watch_finish() { + if (register_ret < 0) { + return register_ret; + } + if (!register_completion) { + return -EINVAL; + } + register_completion->wait_for_complete(); + int r = register_completion->get_return_value(); + register_completion->release(); + register_completion = nullptr; + if (r < 0) { + return r; + } + svc->add_watcher(index); + return 0; + } + + int register_watch() { + int r = obj.watch(&watch_handle, this); + if (r < 0) { + return r; + } + svc->add_watcher(index); + return 0; + } +}; + + +class RGWSI_Notify_ShutdownCB : public RGWSI_Finisher::ShutdownCB +{ + RGWSI_Notify *svc; +public: + RGWSI_Notify_ShutdownCB(RGWSI_Notify *_svc) : svc(_svc) {} + void call() override { + svc->shutdown(); + } +}; + +string RGWSI_Notify::get_control_oid(int i) +{ + char buf[notify_oid_prefix.size() + 16]; + snprintf(buf, sizeof(buf), "%s.%d", notify_oid_prefix.c_str(), i); + + return string(buf); +} + +// do not call pick_obj_control before init_watch +RGWSI_RADOS::Obj RGWSI_Notify::pick_control_obj(const string& key) +{ + uint32_t r = ceph_str_hash_linux(key.c_str(), key.size()); + + int i = r % num_watchers; + return notify_objs[i]; +} + +int RGWSI_Notify::init_watch(const DoutPrefixProvider *dpp, optional_yield y) +{ + num_watchers = cct->_conf->rgw_num_control_oids; + + bool compat_oid = (num_watchers == 0); + + if (num_watchers <= 0) + num_watchers = 1; + + watchers = new RGWWatcher *[num_watchers]; + + int error = 0; + + notify_objs.resize(num_watchers); + + for (int i=0; i < num_watchers; i++) { + string notify_oid; + + if (!compat_oid) { + notify_oid = get_control_oid(i); + } else { + notify_oid = notify_oid_prefix; + } + + notify_objs[i] = rados_svc->handle().obj({control_pool, notify_oid}); + auto& notify_obj = notify_objs[i]; + + int r = notify_obj.open(dpp); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: notify_obj.open() returned r=" << r << dendl; + return r; + } + + librados::ObjectWriteOperation op; + op.create(false); + r = notify_obj.operate(dpp, &op, y); + if (r < 0 && r != -EEXIST) { + ldpp_dout(dpp, 0) << "ERROR: notify_obj.operate() returned r=" << r << dendl; + return r; + } + + RGWWatcher *watcher = new RGWWatcher(cct, this, i, notify_obj); + watchers[i] = watcher; + + r = watcher->register_watch_async(); + if (r < 0) { + ldpp_dout(dpp, 0) << "WARNING: register_watch_aio() returned " << r << dendl; + error = r; + continue; + } + } + + for (int i = 0; i < num_watchers; ++i) { + int r = watchers[i]->register_watch_finish(); + if (r < 0) { + ldpp_dout(dpp, 0) << "WARNING: async watch returned " << r << dendl; + error = r; + } + } + + if (error < 0) { + return error; + } + + return 0; +} + +void RGWSI_Notify::finalize_watch() +{ + for (int i = 0; i < num_watchers; i++) { + RGWWatcher *watcher = watchers[i]; + watcher->unregister_watch(); + delete watcher; + } + + delete[] watchers; +} + +int RGWSI_Notify::do_start(optional_yield y, const DoutPrefixProvider *dpp) +{ + int r = zone_svc->start(y, dpp); + if (r < 0) { + return r; + } + + assert(zone_svc->is_started()); /* otherwise there's an ordering problem */ + + r = rados_svc->start(y, dpp); + if (r < 0) { + return r; + } + r = finisher_svc->start(y, dpp); + if (r < 0) { + return r; + } + + control_pool = zone_svc->get_zone_params().control_pool; + + int ret = init_watch(dpp, y); + if (ret < 0) { + lderr(cct) << "ERROR: failed to initialize watch: " << cpp_strerror(-ret) << dendl; + return ret; + } + + shutdown_cb = new RGWSI_Notify_ShutdownCB(this); + int handle; + finisher_svc->register_caller(shutdown_cb, &handle); + finisher_handle = handle; + + return 0; +} + +void RGWSI_Notify::shutdown() +{ + if (finalized) { + return; + } + + if (finisher_handle) { + finisher_svc->unregister_caller(*finisher_handle); + } + finalize_watch(); + + delete shutdown_cb; + + finalized = true; +} + +RGWSI_Notify::~RGWSI_Notify() +{ + shutdown(); +} + +int RGWSI_Notify::unwatch(RGWSI_RADOS::Obj& obj, uint64_t watch_handle) +{ + int r = obj.unwatch(watch_handle); + if (r < 0) { + ldout(cct, 0) << "ERROR: rados->unwatch2() returned r=" << r << dendl; + return r; + } + r = rados_svc->handle().watch_flush(); + if (r < 0) { + ldout(cct, 0) << "ERROR: rados->watch_flush() returned r=" << r << dendl; + return r; + } + return 0; +} + +void RGWSI_Notify::add_watcher(int i) +{ + ldout(cct, 20) << "add_watcher() i=" << i << dendl; + std::unique_lock l{watchers_lock}; + watchers_set.insert(i); + if (watchers_set.size() == (size_t)num_watchers) { + ldout(cct, 2) << "all " << num_watchers << " watchers are set, enabling cache" << dendl; + _set_enabled(true); + } +} + +void RGWSI_Notify::remove_watcher(int i) +{ + ldout(cct, 20) << "remove_watcher() i=" << i << dendl; + std::unique_lock l{watchers_lock}; + size_t orig_size = watchers_set.size(); + watchers_set.erase(i); + if (orig_size == (size_t)num_watchers && + watchers_set.size() < orig_size) { /* actually removed */ + ldout(cct, 2) << "removed watcher, disabling cache" << dendl; + _set_enabled(false); + } +} + +int RGWSI_Notify::watch_cb(const DoutPrefixProvider *dpp, + uint64_t notify_id, + uint64_t cookie, + uint64_t notifier_id, + bufferlist& bl) +{ + std::shared_lock l{watchers_lock}; + if (cb) { + return cb->watch_cb(dpp, notify_id, cookie, notifier_id, bl); + } + return 0; +} + +void RGWSI_Notify::set_enabled(bool status) +{ + std::unique_lock l{watchers_lock}; + _set_enabled(status); +} + +void RGWSI_Notify::_set_enabled(bool status) +{ + enabled = status; + if (cb) { + cb->set_enabled(status); + } +} + +int RGWSI_Notify::distribute(const DoutPrefixProvider *dpp, const string& key, + const RGWCacheNotifyInfo& cni, + optional_yield y) +{ + /* The RGW uses the control pool to store the watch notify objects. + The precedence in RGWSI_Notify::do_start is to call to zone_svc->start and later to init_watch(). + The first time, RGW starts in the cluster, the RGW will try to create zone and zonegroup system object. + In that case RGW will try to distribute the cache before it ran init_watch, + which will lead to division by 0 in pick_obj_control (num_watchers is 0). + */ + if (num_watchers > 0) { + RGWSI_RADOS::Obj notify_obj = pick_control_obj(key); + + ldpp_dout(dpp, 10) << "distributing notification oid=" << notify_obj.get_ref().obj + << " cni=" << cni << dendl; + return robust_notify(dpp, notify_obj, cni, y); + } + return 0; +} + +int RGWSI_Notify::robust_notify(const DoutPrefixProvider *dpp, + RGWSI_RADOS::Obj& notify_obj, + const RGWCacheNotifyInfo& cni, + optional_yield y) +{ + bufferlist bl; + encode(cni, bl); + + // First, try to send, without being fancy about it. + auto r = notify_obj.notify(dpp, bl, 0, nullptr, y); + + if (r < 0) { + ldpp_dout(dpp, 1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " Notify failed on object " << cni.obj << ": " + << cpp_strerror(-r) << dendl; + } + + // If we timed out, get serious. + if (r == -ETIMEDOUT) { + RGWCacheNotifyInfo info; + info.op = REMOVE_OBJ; + info.obj = cni.obj; + bufferlist retrybl; + encode(info, retrybl); + + for (auto tries = 0u; + r == -ETIMEDOUT && tries < max_notify_retries; + ++tries) { + ldpp_dout(dpp, 1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " Invalidating obj=" << info.obj << " tries=" + << tries << dendl; + r = notify_obj.notify(dpp, bl, 0, nullptr, y); + if (r < 0) { + ldpp_dout(dpp, 1) << __PRETTY_FUNCTION__ << ":" << __LINE__ + << " invalidation attempt " << tries << " failed: " + << cpp_strerror(-r) << dendl; + } + } + } + return r; +} + +void RGWSI_Notify::register_watch_cb(CB *_cb) +{ + std::unique_lock l{watchers_lock}; + cb = _cb; + _set_enabled(enabled); +} + +void RGWSI_Notify::schedule_context(Context *c) +{ + finisher_svc->schedule_context(c); +} diff --git a/src/rgw/services/svc_notify.h b/src/rgw/services/svc_notify.h new file mode 100644 index 000000000..d4e3e2f51 --- /dev/null +++ b/src/rgw/services/svc_notify.h @@ -0,0 +1,105 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#pragma once + +#include "rgw/rgw_service.h" + +#include "svc_rados.h" + + +class Context; + +class RGWSI_Zone; +class RGWSI_Finisher; + +class RGWWatcher; +class RGWSI_Notify_ShutdownCB; +struct RGWCacheNotifyInfo; + +class RGWSI_Notify : public RGWServiceInstance +{ + friend class RGWWatcher; + friend class RGWSI_Notify_ShutdownCB; + friend class RGWServices_Def; + +public: + class CB; + +private: + RGWSI_Zone *zone_svc{nullptr}; + RGWSI_RADOS *rados_svc{nullptr}; + RGWSI_Finisher *finisher_svc{nullptr}; + + ceph::shared_mutex watchers_lock = ceph::make_shared_mutex("watchers_lock"); + rgw_pool control_pool; + + int num_watchers{0}; + RGWWatcher **watchers{nullptr}; + std::set<int> watchers_set; + vector<RGWSI_RADOS::Obj> notify_objs; + + bool enabled{false}; + + double inject_notify_timeout_probability{0}; + static constexpr unsigned max_notify_retries = 10; + + string get_control_oid(int i); + RGWSI_RADOS::Obj pick_control_obj(const string& key); + + CB *cb{nullptr}; + + std::optional<int> finisher_handle; + RGWSI_Notify_ShutdownCB *shutdown_cb{nullptr}; + + bool finalized{false}; + + int init_watch(const DoutPrefixProvider *dpp, optional_yield y); + void finalize_watch(); + + void init(RGWSI_Zone *_zone_svc, + RGWSI_RADOS *_rados_svc, + RGWSI_Finisher *_finisher_svc) { + zone_svc = _zone_svc; + rados_svc = _rados_svc; + finisher_svc = _finisher_svc; + } + int do_start(optional_yield, const DoutPrefixProvider *dpp) override; + void shutdown() override; + + int unwatch(RGWSI_RADOS::Obj& obj, uint64_t watch_handle); + void add_watcher(int i); + void remove_watcher(int i); + + int watch_cb(const DoutPrefixProvider *dpp, + uint64_t notify_id, + uint64_t cookie, + uint64_t notifier_id, + bufferlist& bl); + void _set_enabled(bool status); + void set_enabled(bool status); + + int robust_notify(const DoutPrefixProvider *dpp, RGWSI_RADOS::Obj& notify_obj, + const RGWCacheNotifyInfo& bl, optional_yield y); + + void schedule_context(Context *c); +public: + RGWSI_Notify(CephContext *cct): RGWServiceInstance(cct) {} + ~RGWSI_Notify(); + + class CB { + public: + virtual ~CB() {} + virtual int watch_cb(const DoutPrefixProvider *dpp, + uint64_t notify_id, + uint64_t cookie, + uint64_t notifier_id, + bufferlist& bl) = 0; + virtual void set_enabled(bool status) = 0; + }; + + int distribute(const DoutPrefixProvider *dpp, const string& key, const RGWCacheNotifyInfo& bl, + optional_yield y); + + void register_watch_cb(CB *cb); +}; diff --git a/src/rgw/services/svc_otp.cc b/src/rgw/services/svc_otp.cc new file mode 100644 index 000000000..fc386ae72 --- /dev/null +++ b/src/rgw/services/svc_otp.cc @@ -0,0 +1,184 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "svc_otp.h" +#include "svc_zone.h" +#include "svc_meta.h" +#include "svc_meta_be_sobj.h" + +#include "rgw/rgw_zone.h" + +#define dout_subsys ceph_subsys_rgw + +class RGW_MB_Handler_Module_OTP : public RGWSI_MBSObj_Handler_Module { + RGWSI_Zone *zone_svc; + string prefix; +public: + RGW_MB_Handler_Module_OTP(RGWSI_Zone *_zone_svc) : RGWSI_MBSObj_Handler_Module("otp"), + zone_svc(_zone_svc) {} + + void get_pool_and_oid(const string& key, rgw_pool *pool, string *oid) override { + if (pool) { + *pool = zone_svc->get_zone_params().otp_pool; + } + + if (oid) { + *oid = key; + } + } + + const string& get_oid_prefix() override { + return prefix; + } + + bool is_valid_oid(const string& oid) override { + return true; + } + + string key_to_oid(const string& key) override { + return key; + } + + string oid_to_key(const string& oid) override { + return oid; + } +}; + +RGWSI_OTP::RGWSI_OTP(CephContext *cct): RGWServiceInstance(cct) { +} + +RGWSI_OTP::~RGWSI_OTP() { +} + +void RGWSI_OTP::init(RGWSI_Zone *_zone_svc, + RGWSI_Meta *_meta_svc, + RGWSI_MetaBackend *_meta_be_svc) +{ + svc.otp = this; + svc.zone = _zone_svc; + svc.meta = _meta_svc; + svc.meta_be = _meta_be_svc; +} + +int RGWSI_OTP::do_start(optional_yield, const DoutPrefixProvider *dpp) +{ + /* create first backend handler for bucket entrypoints */ + + RGWSI_MetaBackend_Handler *_otp_be_handler; + + int r = svc.meta->create_be_handler(RGWSI_MetaBackend::Type::MDBE_OTP, &_otp_be_handler); + if (r < 0) { + ldout(ctx(), 0) << "ERROR: failed to create be handler: r=" << r << dendl; + return r; + } + + be_handler = _otp_be_handler; + + RGWSI_MetaBackend_Handler_OTP *otp_be_handler = static_cast<RGWSI_MetaBackend_Handler_OTP *>(_otp_be_handler); + + auto otp_be_module = new RGW_MB_Handler_Module_OTP(svc.zone); + be_module.reset(otp_be_module); + otp_be_handler->set_module(otp_be_module); + + return 0; +} + +int RGWSI_OTP::read_all(RGWSI_OTP_BE_Ctx& ctx, + const string& key, + otp_devices_list_t *devices, + real_time *pmtime, + RGWObjVersionTracker *objv_tracker, + optional_yield y, const DoutPrefixProvider *dpp) +{ + RGWSI_MBOTP_GetParams params; + params.pdevices = devices; + params.pmtime = pmtime; + + int ret = svc.meta_be->get_entry(ctx.get(), key, params, objv_tracker, y, dpp); + if (ret < 0) { + return ret; + } + + return 0; +} + +int RGWSI_OTP::read_all(RGWSI_OTP_BE_Ctx& ctx, + const rgw_user& uid, + otp_devices_list_t *devices, + real_time *pmtime, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + return read_all(ctx, + uid.to_str(), + devices, + pmtime, + objv_tracker, + y, + dpp); +} + +int RGWSI_OTP::store_all(const DoutPrefixProvider *dpp, + RGWSI_OTP_BE_Ctx& ctx, + const string& key, + const otp_devices_list_t& devices, + real_time mtime, + RGWObjVersionTracker *objv_tracker, + optional_yield y) +{ + RGWSI_MBOTP_PutParams params; + params.mtime = mtime; + params.devices = devices; + + int ret = svc.meta_be->put_entry(dpp, ctx.get(), key, params, objv_tracker, y); + if (ret < 0) { + return ret; + } + + return 0; +} + +int RGWSI_OTP::store_all(const DoutPrefixProvider *dpp, + RGWSI_OTP_BE_Ctx& ctx, + const rgw_user& uid, + const otp_devices_list_t& devices, + real_time mtime, + RGWObjVersionTracker *objv_tracker, + optional_yield y) +{ + return store_all(dpp, ctx, + uid.to_str(), + devices, + mtime, + objv_tracker, + y); +} + +int RGWSI_OTP::remove_all(const DoutPrefixProvider *dpp, + RGWSI_OTP_BE_Ctx& ctx, + const string& key, + RGWObjVersionTracker *objv_tracker, + optional_yield y) +{ + RGWSI_MBOTP_RemoveParams params; + + int ret = svc.meta_be->remove_entry(dpp, ctx.get(), key, params, objv_tracker, y); + if (ret < 0) { + return ret; + } + + return 0; +} + +int RGWSI_OTP::remove_all(const DoutPrefixProvider *dpp, + RGWSI_OTP_BE_Ctx& ctx, + const rgw_user& uid, + RGWObjVersionTracker *objv_tracker, + optional_yield y) +{ + return remove_all(dpp,ctx, + uid.to_str(), + objv_tracker, + y); +} diff --git a/src/rgw/services/svc_otp.h b/src/rgw/services/svc_otp.h new file mode 100644 index 000000000..f4b2e4ed2 --- /dev/null +++ b/src/rgw/services/svc_otp.h @@ -0,0 +1,95 @@ + +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#pragma once + +#include "cls/otp/cls_otp_types.h" + +#include "rgw/rgw_service.h" + +#include "svc_otp_types.h" +#include "svc_meta_be_otp.h" + +class RGWSI_Zone; + +class RGWSI_OTP : public RGWServiceInstance +{ + RGWSI_OTP_BE_Handler be_handler; + std::unique_ptr<RGWSI_MetaBackend::Module> be_module; + + int do_start(optional_yield, const DoutPrefixProvider *dpp) override; + +public: + struct Svc { + RGWSI_OTP *otp{nullptr}; + RGWSI_Zone *zone{nullptr}; + RGWSI_Meta *meta{nullptr}; + RGWSI_MetaBackend *meta_be{nullptr}; + } svc; + + RGWSI_OTP(CephContext *cct); + ~RGWSI_OTP(); + + RGWSI_OTP_BE_Handler& get_be_handler() { + return be_handler; + } + + void init(RGWSI_Zone *_zone_svc, + RGWSI_Meta *_meta_svc, + RGWSI_MetaBackend *_meta_be_svc); + + int read_all(RGWSI_OTP_BE_Ctx& ctx, + const string& key, + otp_devices_list_t *devices, + real_time *pmtime, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp); + int read_all(RGWSI_OTP_BE_Ctx& ctx, + const rgw_user& uid, + otp_devices_list_t *devices, + real_time *pmtime, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp); + int store_all(const DoutPrefixProvider *dpp, + RGWSI_OTP_BE_Ctx& ctx, + const string& key, + const otp_devices_list_t& devices, + real_time mtime, + RGWObjVersionTracker *objv_tracker, + optional_yield y); + int store_all(const DoutPrefixProvider *dpp, + RGWSI_OTP_BE_Ctx& ctx, + const rgw_user& uid, + const otp_devices_list_t& devices, + real_time mtime, + RGWObjVersionTracker *objv_tracker, + optional_yield y); + int remove_all(const DoutPrefixProvider *dpp, + RGWSI_OTP_BE_Ctx& ctx, + const string& key, + RGWObjVersionTracker *objv_tracker, + optional_yield y); + int remove_all(const DoutPrefixProvider *dpp, + RGWSI_OTP_BE_Ctx& ctx, + const rgw_user& uid, + RGWObjVersionTracker *objv_tracker, + optional_yield y); +}; + + diff --git a/src/rgw/services/svc_otp_types.h b/src/rgw/services/svc_otp_types.h new file mode 100644 index 000000000..60e2a79d6 --- /dev/null +++ b/src/rgw/services/svc_otp_types.h @@ -0,0 +1,29 @@ + +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#pragma once + +#include "common/ptr_wrapper.h" + +#include "svc_meta_be.h" +#include "svc_meta_be_types.h" + +class RGWSI_MetaBackend_Handler; + +using RGWSI_OTP_BE_Handler = ptr_wrapper<RGWSI_MetaBackend_Handler, RGWSI_META_BE_TYPES::OTP>; +using RGWSI_OTP_BE_Ctx = ptr_wrapper<RGWSI_MetaBackend::Context, RGWSI_META_BE_TYPES::OTP>; + diff --git a/src/rgw/services/svc_quota.cc b/src/rgw/services/svc_quota.cc new file mode 100644 index 000000000..c7164964c --- /dev/null +++ b/src/rgw/services/svc_quota.cc @@ -0,0 +1,18 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "svc_quota.h" +#include "svc_zone.h" + +#include "rgw/rgw_zone.h" + +const RGWQuotaInfo& RGWSI_Quota::get_bucket_quota() const +{ + return zone_svc->get_current_period().get_config().bucket_quota; +} + +const RGWQuotaInfo& RGWSI_Quota::get_user_quota() const +{ + return zone_svc->get_current_period().get_config().user_quota; +} + diff --git a/src/rgw/services/svc_quota.h b/src/rgw/services/svc_quota.h new file mode 100644 index 000000000..34653ff3f --- /dev/null +++ b/src/rgw/services/svc_quota.h @@ -0,0 +1,22 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#pragma once + +#include "rgw/rgw_service.h" + + +class RGWSI_Quota : public RGWServiceInstance +{ + RGWSI_Zone *zone_svc{nullptr}; + +public: + RGWSI_Quota(CephContext *cct): RGWServiceInstance(cct) {} + + void init(RGWSI_Zone *_zone_svc) { + zone_svc = _zone_svc; + } + + const RGWQuotaInfo& get_bucket_quota() const; + const RGWQuotaInfo& get_user_quota() const; +}; diff --git a/src/rgw/services/svc_rados.cc b/src/rgw/services/svc_rados.cc new file mode 100644 index 000000000..32a6b3a3e --- /dev/null +++ b/src/rgw/services/svc_rados.cc @@ -0,0 +1,416 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "svc_rados.h" + +#include "include/rados/librados.hpp" +#include "common/errno.h" +#include "osd/osd_types.h" +#include "rgw/rgw_tools.h" +#include "rgw/rgw_cr_rados.h" + +#include "auth/AuthRegistry.h" + +#define dout_subsys ceph_subsys_rgw + +RGWSI_RADOS::RGWSI_RADOS(CephContext *cct) : RGWServiceInstance(cct) +{ +} + +RGWSI_RADOS::~RGWSI_RADOS() +{ +} + +int RGWSI_RADOS::do_start(optional_yield, const DoutPrefixProvider *dpp) +{ + int ret = rados.init_with_context(cct); + if (ret < 0) { + return ret; + } + ret = rados.connect(); + if (ret < 0) { + return ret; + } + + async_processor.reset(new RGWAsyncRadosProcessor(cct, cct->_conf->rgw_num_async_rados_threads)); + async_processor->start(); + + return 0; +} + +void RGWSI_RADOS::shutdown() +{ + if (async_processor) { + async_processor->stop(); + } +} + +librados::Rados* RGWSI_RADOS::get_rados_handle() +{ + return &rados; +} + +uint64_t RGWSI_RADOS::instance_id() +{ + return get_rados_handle()->get_instance_id(); +} + +int RGWSI_RADOS::open_pool_ctx(const DoutPrefixProvider *dpp, const rgw_pool& pool, librados::IoCtx& io_ctx, + const OpenParams& params) +{ + return rgw_init_ioctx(dpp, get_rados_handle(), pool, io_ctx, + params.create, + params.mostly_omap); +} + +int RGWSI_RADOS::pool_iterate(librados::IoCtx& io_ctx, + librados::NObjectIterator& iter, + uint32_t num, vector<rgw_bucket_dir_entry>& objs, + RGWAccessListFilter *filter, + bool *is_truncated) +{ + if (iter == io_ctx.nobjects_end()) + return -ENOENT; + + uint32_t i; + + for (i = 0; i < num && iter != io_ctx.nobjects_end(); ++i, ++iter) { + rgw_bucket_dir_entry e; + + string oid = iter->get_oid(); + ldout(cct, 20) << "RGWRados::pool_iterate: got " << oid << dendl; + + // fill it in with initial values; we may correct later + if (filter && !filter->filter(oid, oid)) + continue; + + e.key = oid; + objs.push_back(e); + } + + if (is_truncated) + *is_truncated = (iter != io_ctx.nobjects_end()); + + return objs.size(); +} + +RGWSI_RADOS::Obj::Obj(Pool& pool, const string& oid) : rados_svc(pool.rados_svc) +{ + ref.pool = pool; + ref.obj = rgw_raw_obj(pool.get_pool(), oid); +} + +void RGWSI_RADOS::Obj::init(const rgw_raw_obj& obj) +{ + ref.pool = RGWSI_RADOS::Pool(rados_svc, obj.pool); + ref.obj = obj; +} + +int RGWSI_RADOS::Obj::open(const DoutPrefixProvider *dpp) +{ + int r = ref.pool.open(dpp); + if (r < 0) { + return r; + } + + ref.pool.ioctx().locator_set_key(ref.obj.loc); + + return 0; +} + +int RGWSI_RADOS::Obj::operate(const DoutPrefixProvider *dpp, librados::ObjectWriteOperation *op, + optional_yield y, int flags) +{ + return rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, op, y, flags); +} + +int RGWSI_RADOS::Obj::operate(const DoutPrefixProvider *dpp, librados::ObjectReadOperation *op, + bufferlist *pbl, optional_yield y, int flags) +{ + return rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, op, pbl, y, flags); +} + +int RGWSI_RADOS::Obj::aio_operate(librados::AioCompletion *c, librados::ObjectWriteOperation *op) +{ + return ref.pool.ioctx().aio_operate(ref.obj.oid, c, op); +} + +int RGWSI_RADOS::Obj::aio_operate(librados::AioCompletion *c, librados::ObjectReadOperation *op, + bufferlist *pbl) +{ + return ref.pool.ioctx().aio_operate(ref.obj.oid, c, op, pbl); +} + +int RGWSI_RADOS::Obj::watch(uint64_t *handle, librados::WatchCtx2 *ctx) +{ + return ref.pool.ioctx().watch2(ref.obj.oid, handle, ctx); +} + +int RGWSI_RADOS::Obj::aio_watch(librados::AioCompletion *c, uint64_t *handle, librados::WatchCtx2 *ctx) +{ + return ref.pool.ioctx().aio_watch(ref.obj.oid, c, handle, ctx); +} + +int RGWSI_RADOS::Obj::unwatch(uint64_t handle) +{ + return ref.pool.ioctx().unwatch2(handle); +} + +int RGWSI_RADOS::Obj::notify(const DoutPrefixProvider *dpp, bufferlist& bl, uint64_t timeout_ms, + bufferlist *pbl, optional_yield y) +{ + return rgw_rados_notify(dpp, ref.pool.ioctx(), ref.obj.oid, bl, timeout_ms, pbl, y); +} + +void RGWSI_RADOS::Obj::notify_ack(uint64_t notify_id, + uint64_t cookie, + bufferlist& bl) +{ + ref.pool.ioctx().notify_ack(ref.obj.oid, notify_id, cookie, bl); +} + +uint64_t RGWSI_RADOS::Obj::get_last_version() +{ + return ref.pool.ioctx().get_last_version(); +} + +int RGWSI_RADOS::Pool::create() +{ + librados::Rados *rad = rados_svc->get_rados_handle(); + int r = rad->pool_create(pool.name.c_str()); + if (r < 0) { + ldout(rados_svc->cct, 0) << "WARNING: pool_create returned " << r << dendl; + return r; + } + librados::IoCtx io_ctx; + r = rad->ioctx_create(pool.name.c_str(), io_ctx); + if (r < 0) { + ldout(rados_svc->cct, 0) << "WARNING: ioctx_create returned " << r << dendl; + return r; + } + r = io_ctx.application_enable(pg_pool_t::APPLICATION_NAME_RGW, false); + if (r < 0) { + ldout(rados_svc->cct, 0) << "WARNING: application_enable returned " << r << dendl; + return r; + } + return 0; +} + +int RGWSI_RADOS::Pool::create(const vector<rgw_pool>& pools, vector<int> *retcodes) +{ + vector<librados::PoolAsyncCompletion *> completions; + vector<int> rets; + + librados::Rados *rad = rados_svc->get_rados_handle(); + for (auto iter = pools.begin(); iter != pools.end(); ++iter) { + librados::PoolAsyncCompletion *c = librados::Rados::pool_async_create_completion(); + completions.push_back(c); + auto& pool = *iter; + int ret = rad->pool_create_async(pool.name.c_str(), c); + rets.push_back(ret); + } + + vector<int>::iterator riter; + vector<librados::PoolAsyncCompletion *>::iterator citer; + + bool error = false; + ceph_assert(rets.size() == completions.size()); + for (riter = rets.begin(), citer = completions.begin(); riter != rets.end(); ++riter, ++citer) { + int r = *riter; + librados::PoolAsyncCompletion *c = *citer; + if (r == 0) { + c->wait(); + r = c->get_return_value(); + if (r < 0) { + ldout(rados_svc->cct, 0) << "WARNING: async pool_create returned " << r << dendl; + error = true; + } + } + c->release(); + retcodes->push_back(r); + } + if (error) { + return 0; + } + + std::vector<librados::IoCtx> io_ctxs; + retcodes->clear(); + for (auto pool : pools) { + io_ctxs.emplace_back(); + int ret = rad->ioctx_create(pool.name.c_str(), io_ctxs.back()); + if (ret < 0) { + ldout(rados_svc->cct, 0) << "WARNING: ioctx_create returned " << ret << dendl; + error = true; + } + retcodes->push_back(ret); + } + if (error) { + return 0; + } + + completions.clear(); + for (auto &io_ctx : io_ctxs) { + librados::PoolAsyncCompletion *c = + librados::Rados::pool_async_create_completion(); + completions.push_back(c); + int ret = io_ctx.application_enable_async(pg_pool_t::APPLICATION_NAME_RGW, + false, c); + ceph_assert(ret == 0); + } + + retcodes->clear(); + for (auto c : completions) { + c->wait(); + int ret = c->get_return_value(); + if (ret == -EOPNOTSUPP) { + ret = 0; + } else if (ret < 0) { + ldout(rados_svc->cct, 0) << "WARNING: async application_enable returned " << ret + << dendl; + error = true; + } + c->release(); + retcodes->push_back(ret); + } + return 0; +} + +int RGWSI_RADOS::Pool::lookup() +{ + librados::Rados *rad = rados_svc->get_rados_handle(); + int ret = rad->pool_lookup(pool.name.c_str()); + if (ret < 0) { + return ret; + } + + return 0; +} + +int RGWSI_RADOS::Pool::open(const DoutPrefixProvider *dpp, const OpenParams& params) +{ + return rados_svc->open_pool_ctx(dpp, pool, state.ioctx, params); +} + +int RGWSI_RADOS::Pool::List::init(const DoutPrefixProvider *dpp, const string& marker, RGWAccessListFilter *filter) +{ + if (ctx.initialized) { + return -EINVAL; + } + + if (!pool) { + return -EINVAL; + } + + int r = pool->rados_svc->open_pool_ctx(dpp, pool->pool, ctx.ioctx); + if (r < 0) { + return r; + } + + librados::ObjectCursor oc; + if (!oc.from_str(marker)) { + ldpp_dout(dpp, 10) << "failed to parse cursor: " << marker << dendl; + return -EINVAL; + } + + ctx.iter = ctx.ioctx.nobjects_begin(oc); + ctx.filter = filter; + ctx.initialized = true; + + return 0; +} + +int RGWSI_RADOS::Pool::List::get_next(int max, + std::vector<string> *oids, + bool *is_truncated) +{ + if (!ctx.initialized) { + return -EINVAL; + } + vector<rgw_bucket_dir_entry> objs; + int r = pool->rados_svc->pool_iterate(ctx.ioctx, ctx.iter, max, objs, ctx.filter, is_truncated); + if (r < 0) { + if(r != -ENOENT) { + ldout(pool->rados_svc->cct, 10) << "failed to list objects pool_iterate returned r=" << r << dendl; + } + return r; + } + + for (auto& o : objs) { + oids->push_back(o.key.name); + } + + return oids->size(); +} + +RGWSI_RADOS::Obj RGWSI_RADOS::Handle::obj(const rgw_raw_obj& o) +{ + return RGWSI_RADOS::Obj(rados_svc, o); +} +int RGWSI_RADOS::Handle::watch_flush() +{ + librados::Rados *rad = rados_svc->get_rados_handle(); + return rad->watch_flush(); +} + +int RGWSI_RADOS::Handle::mon_command(std::string cmd, + const bufferlist& inbl, + bufferlist *outbl, + std::string *outs) +{ + librados::Rados *rad = rados_svc->get_rados_handle(); + return rad->mon_command(cmd, inbl, outbl, outs); +} + +int RGWSI_RADOS::Pool::List::get_marker(string *marker) +{ + if (!ctx.initialized) { + return -EINVAL; + } + + *marker = ctx.iter.get_cursor().to_str(); + return 0; +} + +int RGWSI_RADOS::clog_warn(const string& msg) +{ + string cmd = + "{" + "\"prefix\": \"log\", " + "\"level\": \"warn\", " + "\"logtext\": [\"" + msg + "\"]" + "}"; + + bufferlist inbl; + auto h = handle(); + return h.mon_command(cmd, inbl, nullptr, nullptr); +} + +bool RGWSI_RADOS::check_secure_mon_conn() const +{ + AuthRegistry reg(cct); + + reg.refresh_config(); + + std::vector<uint32_t> methods; + std::vector<uint32_t> modes; + + reg.get_supported_methods(CEPH_ENTITY_TYPE_MON, &methods, &modes); + ldout(cct, 20) << __func__ << "(): auth registy supported: methods=" << methods << " modes=" << modes << dendl; + + for (auto method : methods) { + if (!reg.is_secure_method(method)) { + ldout(cct, 20) << __func__ << "(): method " << method << " is insecure" << dendl; + return false; + } + } + + for (auto mode : modes) { + if (!reg.is_secure_mode(mode)) { + ldout(cct, 20) << __func__ << "(): mode " << mode << " is insecure" << dendl; + return false; + } + } + + return true; +} + diff --git a/src/rgw/services/svc_rados.h b/src/rgw/services/svc_rados.h new file mode 100644 index 000000000..d3fa39314 --- /dev/null +++ b/src/rgw/services/svc_rados.h @@ -0,0 +1,246 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#pragma once + +#include "rgw/rgw_service.h" + +#include "include/rados/librados.hpp" +#include "common/async/yield_context.h" +#include "common/RWLock.h" + +class RGWAsyncRadosProcessor; + +class RGWAccessListFilter { +public: + virtual ~RGWAccessListFilter() {} + virtual bool filter(const string& name, string& key) = 0; +}; + +struct RGWAccessListFilterPrefix : public RGWAccessListFilter { + string prefix; + + explicit RGWAccessListFilterPrefix(const string& _prefix) : prefix(_prefix) {} + bool filter(const string& name, string& key) override { + return (prefix.compare(key.substr(0, prefix.size())) == 0); + } +}; + +class RGWSI_RADOS : public RGWServiceInstance +{ + librados::Rados rados; + std::unique_ptr<RGWAsyncRadosProcessor> async_processor; + + int do_start(optional_yield, const DoutPrefixProvider *dpp) override; + +public: + struct OpenParams { + bool create{true}; + bool mostly_omap{false}; + + OpenParams() {} + + OpenParams& set_create(bool _create) { + create = _create; + return *this; + } + OpenParams& set_mostly_omap(bool _mostly_omap) { + mostly_omap = _mostly_omap; + return *this; + } + }; + +private: + int open_pool_ctx(const DoutPrefixProvider *dpp, const rgw_pool& pool, librados::IoCtx& io_ctx, + const OpenParams& params = {}); + int pool_iterate(librados::IoCtx& ioctx, + librados::NObjectIterator& iter, + uint32_t num, vector<rgw_bucket_dir_entry>& objs, + RGWAccessListFilter *filter, + bool *is_truncated); + +public: + RGWSI_RADOS(CephContext *cct); + ~RGWSI_RADOS(); + librados::Rados* get_rados_handle(); + + void init() {} + void shutdown() override; + + uint64_t instance_id(); + bool check_secure_mon_conn() const; + + RGWAsyncRadosProcessor *get_async_processor() { + return async_processor.get(); + } + + int clog_warn(const string& msg); + + class Handle; + + class Pool { + friend class RGWSI_RADOS; + friend Handle; + friend class Obj; + + RGWSI_RADOS *rados_svc{nullptr}; + rgw_pool pool; + + struct State { + librados::IoCtx ioctx; + } state; + + Pool(RGWSI_RADOS *_rados_svc, + const rgw_pool& _pool) : rados_svc(_rados_svc), + pool(_pool) {} + + Pool(RGWSI_RADOS *_rados_svc) : rados_svc(_rados_svc) {} + public: + Pool() {} + + int create(); + int create(const std::vector<rgw_pool>& pools, std::vector<int> *retcodes); + int lookup(); + int open(const DoutPrefixProvider *dpp, const OpenParams& params = {}); + + const rgw_pool& get_pool() { + return pool; + } + + librados::IoCtx& ioctx() { + return state.ioctx; + } + + struct List { + Pool *pool{nullptr}; + + struct Ctx { + bool initialized{false}; + librados::IoCtx ioctx; + librados::NObjectIterator iter; + RGWAccessListFilter *filter{nullptr}; + } ctx; + + List() {} + List(Pool *_pool) : pool(_pool) {} + + int init(const DoutPrefixProvider *dpp, const string& marker, RGWAccessListFilter *filter = nullptr); + int get_next(int max, + std::vector<string> *oids, + bool *is_truncated); + + int get_marker(string *marker); + }; + + List op() { + return List(this); + } + + friend List; + }; + + + struct rados_ref { + RGWSI_RADOS::Pool pool; + rgw_raw_obj obj; + }; + + class Obj { + friend class RGWSI_RADOS; + friend class Handle; + + RGWSI_RADOS *rados_svc{nullptr}; + rados_ref ref; + + void init(const rgw_raw_obj& obj); + + Obj(RGWSI_RADOS *_rados_svc, const rgw_raw_obj& _obj) + : rados_svc(_rados_svc) { + init(_obj); + } + + Obj(Pool& pool, const string& oid); + + public: + Obj() {} + + int open(const DoutPrefixProvider *dpp); + + int operate(const DoutPrefixProvider *dpp, librados::ObjectWriteOperation *op, optional_yield y, + int flags = 0); + int operate(const DoutPrefixProvider *dpp, librados::ObjectReadOperation *op, bufferlist *pbl, + optional_yield y, int flags = 0); + int aio_operate(librados::AioCompletion *c, librados::ObjectWriteOperation *op); + int aio_operate(librados::AioCompletion *c, librados::ObjectReadOperation *op, + bufferlist *pbl); + + int watch(uint64_t *handle, librados::WatchCtx2 *ctx); + int aio_watch(librados::AioCompletion *c, uint64_t *handle, librados::WatchCtx2 *ctx); + int unwatch(uint64_t handle); + int notify(const DoutPrefixProvider *dpp, bufferlist& bl, uint64_t timeout_ms, + bufferlist *pbl, optional_yield y); + void notify_ack(uint64_t notify_id, + uint64_t cookie, + bufferlist& bl); + + uint64_t get_last_version(); + + rados_ref& get_ref() { return ref; } + const rados_ref& get_ref() const { return ref; } + + const rgw_raw_obj& get_raw_obj() const { + return ref.obj; + } + }; + + class Handle { + friend class RGWSI_RADOS; + + RGWSI_RADOS *rados_svc{nullptr}; + + Handle(RGWSI_RADOS *_rados_svc) : rados_svc(_rados_svc) {} + public: + Obj obj(const rgw_raw_obj& o); + + Pool pool(const rgw_pool& p) { + return Pool(rados_svc, p); + } + + int watch_flush(); + + int mon_command(std::string cmd, + const bufferlist& inbl, + bufferlist *outbl, + std::string *outs); + }; + + Handle handle() { + return Handle(this); + } + + Obj obj(const rgw_raw_obj& o) { + return Obj(this, o); + } + + Obj obj(Pool& pool, const string& oid) { + return Obj(pool, oid); + } + + Pool pool() { + return Pool(this); + } + + Pool pool(const rgw_pool& p) { + return Pool(this, p); + } + + friend Obj; + friend Pool; + friend Pool::List; +}; + +using rgw_rados_ref = RGWSI_RADOS::rados_ref; + +inline ostream& operator<<(ostream& out, const RGWSI_RADOS::Obj& obj) { + return out << obj.get_raw_obj(); +} diff --git a/src/rgw/services/svc_sync_modules.cc b/src/rgw/services/svc_sync_modules.cc new file mode 100644 index 000000000..b5490660a --- /dev/null +++ b/src/rgw/services/svc_sync_modules.cc @@ -0,0 +1,44 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "svc_sync_modules.h" +#include "svc_zone.h" + +#include "rgw/rgw_sync_module.h" +#include "rgw/rgw_zone.h" + +#define dout_subsys ceph_subsys_rgw + +void RGWSI_SyncModules::init(RGWSI_Zone *zone_svc) +{ + svc.zone = zone_svc; + sync_modules_manager = new RGWSyncModulesManager(); + rgw_register_sync_modules(sync_modules_manager); +} + +int RGWSI_SyncModules::do_start(optional_yield, const DoutPrefixProvider *dpp) +{ + auto& zone_public_config = svc.zone->get_zone(); + + int ret = sync_modules_manager->create_instance(cct, zone_public_config.tier_type, svc.zone->get_zone_params().tier_config, &sync_module); + if (ret < 0) { + lderr(cct) << "ERROR: failed to start sync module instance, ret=" << ret << dendl; + if (ret == -ENOENT) { + lderr(cct) << "ERROR: " << zone_public_config.tier_type + << " sync module does not exist. valid sync modules: " + << sync_modules_manager->get_registered_module_names() + << dendl; + } + return ret; + } + + ldpp_dout(dpp, 20) << "started sync module instance, tier type = " << zone_public_config.tier_type << dendl; + + return 0; +} + +RGWSI_SyncModules::~RGWSI_SyncModules() +{ + delete sync_modules_manager; +} + diff --git a/src/rgw/services/svc_sync_modules.h b/src/rgw/services/svc_sync_modules.h new file mode 100644 index 000000000..0640ced1d --- /dev/null +++ b/src/rgw/services/svc_sync_modules.h @@ -0,0 +1,34 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#pragma once + +#include "rgw/rgw_service.h" +#include "rgw/rgw_sync_module.h" + +class RGWSI_Zone; + +class RGWSyncModulesManager; + +class RGWSI_SyncModules : public RGWServiceInstance +{ + RGWSyncModulesManager *sync_modules_manager{nullptr}; + RGWSyncModuleInstanceRef sync_module; + + struct Svc { + RGWSI_Zone *zone{nullptr}; + } svc; + +public: + RGWSI_SyncModules(CephContext *cct): RGWServiceInstance(cct) {} + ~RGWSI_SyncModules(); + + RGWSyncModulesManager *get_manager() { + return sync_modules_manager; + } + + void init(RGWSI_Zone *zone_svc); + int do_start(optional_yield, const DoutPrefixProvider *dpp) override; + + RGWSyncModuleInstanceRef& get_sync_module() { return sync_module; } +}; diff --git a/src/rgw/services/svc_sys_obj.cc b/src/rgw/services/svc_sys_obj.cc new file mode 100644 index 000000000..be30e45c5 --- /dev/null +++ b/src/rgw/services/svc_sys_obj.cc @@ -0,0 +1,194 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "svc_sys_obj.h" +#include "svc_sys_obj_core.h" +#include "svc_rados.h" +#include "svc_zone.h" + +#include "rgw/rgw_zone.h" + +#define dout_subsys ceph_subsys_rgw + +RGWSysObjectCtx RGWSI_SysObj::init_obj_ctx() +{ + return RGWSysObjectCtx(this); +} + +RGWSI_SysObj::Obj RGWSI_SysObj::get_obj(RGWSysObjectCtx& obj_ctx, const rgw_raw_obj& obj) +{ + return Obj(core_svc, obj_ctx, obj); +} + +void RGWSI_SysObj::Obj::invalidate() +{ + ctx.invalidate(obj); +} + +RGWSI_SysObj::Obj::ROp::ROp(Obj& _source) : source(_source) { + state.emplace<RGWSI_SysObj_Core::GetObjState>(); +} + +int RGWSI_SysObj::Obj::ROp::stat(optional_yield y, const DoutPrefixProvider *dpp) +{ + RGWSI_SysObj_Core *svc = source.core_svc; + rgw_raw_obj& obj = source.obj; + + return svc->stat(source.get_ctx(), *state, obj, + attrs, raw_attrs, + lastmod, obj_size, + objv_tracker, y, dpp); +} + +int RGWSI_SysObj::Obj::ROp::read(const DoutPrefixProvider *dpp, + int64_t ofs, int64_t end, bufferlist *bl, + optional_yield y) +{ + RGWSI_SysObj_Core *svc = source.core_svc; + rgw_raw_obj& obj = source.get_obj(); + + return svc->read(dpp, source.get_ctx(), *state, + objv_tracker, + obj, bl, ofs, end, + attrs, + raw_attrs, + cache_info, + refresh_version, y); +} + +int RGWSI_SysObj::Obj::ROp::get_attr(const DoutPrefixProvider *dpp, + const char *name, bufferlist *dest, + optional_yield y) +{ + RGWSI_SysObj_Core *svc = source.core_svc; + rgw_raw_obj& obj = source.get_obj(); + + return svc->get_attr(dpp, obj, name, dest, y); +} + +int RGWSI_SysObj::Obj::WOp::remove(const DoutPrefixProvider *dpp, optional_yield y) +{ + RGWSI_SysObj_Core *svc = source.core_svc; + rgw_raw_obj& obj = source.get_obj(); + + return svc->remove(dpp, source.get_ctx(), + objv_tracker, + obj, y); +} + +int RGWSI_SysObj::Obj::WOp::write(const DoutPrefixProvider *dpp, bufferlist& bl, optional_yield y) +{ + RGWSI_SysObj_Core *svc = source.core_svc; + rgw_raw_obj& obj = source.get_obj(); + + return svc->write(dpp, obj, pmtime, attrs, exclusive, + bl, objv_tracker, mtime, y); +} + +int RGWSI_SysObj::Obj::WOp::write_data(const DoutPrefixProvider *dpp, bufferlist& bl, optional_yield y) +{ + RGWSI_SysObj_Core *svc = source.core_svc; + rgw_raw_obj& obj = source.get_obj(); + + return svc->write_data(dpp, obj, bl, exclusive, objv_tracker, y); +} + +int RGWSI_SysObj::Obj::WOp::write_attrs(const DoutPrefixProvider *dpp, optional_yield y) +{ + RGWSI_SysObj_Core *svc = source.core_svc; + rgw_raw_obj& obj = source.get_obj(); + + return svc->set_attrs(dpp, obj, attrs, nullptr, objv_tracker, y); +} + +int RGWSI_SysObj::Obj::WOp::write_attr(const DoutPrefixProvider *dpp, const char *name, bufferlist& bl, + optional_yield y) +{ + RGWSI_SysObj_Core *svc = source.core_svc; + rgw_raw_obj& obj = source.get_obj(); + + map<string, bufferlist> m; + m[name] = bl; + + return svc->set_attrs(dpp, obj, m, nullptr, objv_tracker, y); +} + +int RGWSI_SysObj::Pool::list_prefixed_objs(const DoutPrefixProvider *dpp, const string& prefix, std::function<void(const string&)> cb) +{ + return core_svc->pool_list_prefixed_objs(dpp, pool, prefix, cb); +} + +int RGWSI_SysObj::Pool::Op::init(const DoutPrefixProvider *dpp, const string& marker, const string& prefix) +{ + return source.core_svc->pool_list_objects_init(dpp, source.pool, marker, prefix, &ctx); +} + +int RGWSI_SysObj::Pool::Op::get_next(int max, vector<string> *oids, bool *is_truncated) +{ + return source.core_svc->pool_list_objects_next(ctx, max, oids, is_truncated); +} + +int RGWSI_SysObj::Pool::Op::get_marker(string *marker) +{ + return source.core_svc->pool_list_objects_get_marker(ctx, marker); +} + +int RGWSI_SysObj::Obj::OmapOp::get_all(const DoutPrefixProvider *dpp, std::map<string, bufferlist> *m, + optional_yield y) +{ + RGWSI_SysObj_Core *svc = source.core_svc; + rgw_raw_obj& obj = source.obj; + + return svc->omap_get_all(dpp, obj, m, y); +} + +int RGWSI_SysObj::Obj::OmapOp::get_vals(const DoutPrefixProvider *dpp, + const string& marker, uint64_t count, + std::map<string, bufferlist> *m, + bool *pmore, optional_yield y) +{ + RGWSI_SysObj_Core *svc = source.core_svc; + rgw_raw_obj& obj = source.obj; + + return svc->omap_get_vals(dpp, obj, marker, count, m, pmore, y); +} + +int RGWSI_SysObj::Obj::OmapOp::set(const DoutPrefixProvider *dpp, const std::string& key, bufferlist& bl, + optional_yield y) +{ + RGWSI_SysObj_Core *svc = source.core_svc; + rgw_raw_obj& obj = source.obj; + + return svc->omap_set(dpp, obj, key, bl, must_exist, y); +} + +int RGWSI_SysObj::Obj::OmapOp::set(const DoutPrefixProvider *dpp, const map<std::string, bufferlist>& m, + optional_yield y) +{ + RGWSI_SysObj_Core *svc = source.core_svc; + rgw_raw_obj& obj = source.obj; + + return svc->omap_set(dpp, obj, m, must_exist, y); +} + +int RGWSI_SysObj::Obj::OmapOp::del(const DoutPrefixProvider *dpp, const std::string& key, optional_yield y) +{ + RGWSI_SysObj_Core *svc = source.core_svc; + rgw_raw_obj& obj = source.obj; + + return svc->omap_del(dpp, obj, key, y); +} + +int RGWSI_SysObj::Obj::WNOp::notify(const DoutPrefixProvider *dpp, bufferlist& bl, uint64_t timeout_ms, + bufferlist *pbl, optional_yield y) +{ + RGWSI_SysObj_Core *svc = source.core_svc; + rgw_raw_obj& obj = source.obj; + + return svc->notify(dpp, obj, bl, timeout_ms, pbl, y); +} + +RGWSI_Zone *RGWSI_SysObj::get_zone_svc() +{ + return core_svc->get_zone_svc(); +} diff --git a/src/rgw/services/svc_sys_obj.h b/src/rgw/services/svc_sys_obj.h new file mode 100644 index 000000000..48ae30240 --- /dev/null +++ b/src/rgw/services/svc_sys_obj.h @@ -0,0 +1,293 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#pragma once + +#include "common/static_ptr.h" + +#include "rgw/rgw_service.h" + +#include "svc_rados.h" +#include "svc_sys_obj_types.h" +#include "svc_sys_obj_core_types.h" + + +class RGWSI_Zone; +class RGWSI_SysObj; +class RGWSysObjectCtx; + +struct rgw_cache_entry_info; + +class RGWSI_SysObj : public RGWServiceInstance +{ + friend struct RGWServices_Def; + +public: + class Obj { + friend class ROp; + + RGWSI_SysObj_Core *core_svc; + RGWSysObjectCtx& ctx; + rgw_raw_obj obj; + + public: + Obj(RGWSI_SysObj_Core *_core_svc, + RGWSysObjectCtx& _ctx, + const rgw_raw_obj& _obj) : core_svc(_core_svc), + ctx(_ctx), + obj(_obj) {} + + void invalidate(); + + RGWSysObjectCtx& get_ctx() { + return ctx; + } + + rgw_raw_obj& get_obj() { + return obj; + } + + struct ROp { + Obj& source; + + ceph::static_ptr<RGWSI_SysObj_Obj_GetObjState, sizeof(RGWSI_SysObj_Core_GetObjState)> state; + + RGWObjVersionTracker *objv_tracker{nullptr}; + map<string, bufferlist> *attrs{nullptr}; + bool raw_attrs{false}; + boost::optional<obj_version> refresh_version{boost::none}; + ceph::real_time *lastmod{nullptr}; + uint64_t *obj_size{nullptr}; + rgw_cache_entry_info *cache_info{nullptr}; + + ROp& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { + objv_tracker = _objv_tracker; + return *this; + } + + ROp& set_last_mod(ceph::real_time *_lastmod) { + lastmod = _lastmod; + return *this; + } + + ROp& set_obj_size(uint64_t *_obj_size) { + obj_size = _obj_size; + return *this; + } + + ROp& set_attrs(map<string, bufferlist> *_attrs) { + attrs = _attrs; + return *this; + } + + ROp& set_raw_attrs(bool ra) { + raw_attrs = ra; + return *this; + } + + ROp& set_refresh_version(boost::optional<obj_version>& rf) { + refresh_version = rf; + return *this; + } + + ROp& set_cache_info(rgw_cache_entry_info *ci) { + cache_info = ci; + return *this; + } + + ROp(Obj& _source); + + int stat(optional_yield y, const DoutPrefixProvider *dpp); + int read(const DoutPrefixProvider *dpp, int64_t ofs, int64_t end, bufferlist *pbl, optional_yield y); + int read(const DoutPrefixProvider *dpp, bufferlist *pbl, optional_yield y) { + return read(dpp, 0, -1, pbl, y); + } + int get_attr(const DoutPrefixProvider *dpp, const char *name, bufferlist *dest, optional_yield y); + }; + + struct WOp { + Obj& source; + + RGWObjVersionTracker *objv_tracker{nullptr}; + map<string, bufferlist> attrs; + ceph::real_time mtime; + ceph::real_time *pmtime{nullptr}; + bool exclusive{false}; + + WOp& set_objv_tracker(RGWObjVersionTracker *_objv_tracker) { + objv_tracker = _objv_tracker; + return *this; + } + + WOp& set_attrs(map<string, bufferlist>& _attrs) { + attrs = _attrs; + return *this; + } + + WOp& set_attrs(map<string, bufferlist>&& _attrs) { + attrs = _attrs; + return *this; + } + + WOp& set_mtime(const ceph::real_time& _mtime) { + mtime = _mtime; + return *this; + } + + WOp& set_pmtime(ceph::real_time *_pmtime) { + pmtime = _pmtime; + return *this; + } + + WOp& set_exclusive(bool _exclusive = true) { + exclusive = _exclusive; + return *this; + } + + WOp(Obj& _source) : source(_source) {} + + int remove(const DoutPrefixProvider *dpp, optional_yield y); + int write(const DoutPrefixProvider *dpp, bufferlist& bl, optional_yield y); + + int write_data(const DoutPrefixProvider *dpp, bufferlist& bl, optional_yield y); /* write data only */ + int write_attrs(const DoutPrefixProvider *dpp, optional_yield y); /* write attrs only */ + int write_attr(const DoutPrefixProvider *dpp, const char *name, bufferlist& bl, + optional_yield y); /* write attrs only */ + }; + + struct OmapOp { + Obj& source; + + bool must_exist{false}; + + OmapOp& set_must_exist(bool _must_exist = true) { + must_exist = _must_exist; + return *this; + } + + OmapOp(Obj& _source) : source(_source) {} + + int get_all(const DoutPrefixProvider *dpp, std::map<string, bufferlist> *m, optional_yield y); + int get_vals(const DoutPrefixProvider *dpp, const string& marker, uint64_t count, + std::map<string, bufferlist> *m, + bool *pmore, optional_yield y); + int set(const DoutPrefixProvider *dpp, const std::string& key, bufferlist& bl, optional_yield y); + int set(const DoutPrefixProvider *dpp, const map<std::string, bufferlist>& m, optional_yield y); + int del(const DoutPrefixProvider *dpp, const std::string& key, optional_yield y); + }; + + struct WNOp { + Obj& source; + + WNOp(Obj& _source) : source(_source) {} + + int notify(const DoutPrefixProvider *dpp, bufferlist& bl, uint64_t timeout_ms, bufferlist *pbl, + optional_yield y); + }; + ROp rop() { + return ROp(*this); + } + + WOp wop() { + return WOp(*this); + } + + OmapOp omap() { + return OmapOp(*this); + } + + WNOp wn() { + return WNOp(*this); + } + }; + + class Pool { + friend class Op; + friend class RGWSI_SysObj_Core; + + RGWSI_SysObj_Core *core_svc; + rgw_pool pool; + + protected: + using ListImplInfo = RGWSI_SysObj_Pool_ListInfo; + + struct ListCtx { + ceph::static_ptr<ListImplInfo, sizeof(RGWSI_SysObj_Core_PoolListImplInfo)> impl; /* update this if creating new backend types */ + }; + + public: + Pool(RGWSI_SysObj_Core *_core_svc, + const rgw_pool& _pool) : core_svc(_core_svc), + pool(_pool) {} + + rgw_pool& get_pool() { + return pool; + } + + struct Op { + Pool& source; + ListCtx ctx; + + Op(Pool& _source) : source(_source) {} + + int init(const DoutPrefixProvider *dpp, const std::string& marker, const std::string& prefix); + int get_next(int max, std::vector<string> *oids, bool *is_truncated); + int get_marker(string *marker); + }; + + int list_prefixed_objs(const DoutPrefixProvider *dpp, const std::string& prefix, std::function<void(const string&)> cb); + + template <typename Container> + int list_prefixed_objs(const DoutPrefixProvider *dpp, const string& prefix, + Container *result) { + return list_prefixed_objs(dpp, prefix, [&](const string& val) { + result->push_back(val); + }); + } + + Op op() { + return Op(*this); + } + }; + + friend class Obj; + friend class Obj::ROp; + friend class Obj::WOp; + friend class Pool; + friend class Pool::Op; + +protected: + RGWSI_RADOS *rados_svc{nullptr}; + RGWSI_SysObj_Core *core_svc{nullptr}; + + void init(RGWSI_RADOS *_rados_svc, + RGWSI_SysObj_Core *_core_svc) { + rados_svc = _rados_svc; + core_svc = _core_svc; + } + +public: + RGWSI_SysObj(CephContext *cct): RGWServiceInstance(cct) {} + + RGWSysObjectCtx init_obj_ctx(); + Obj get_obj(RGWSysObjectCtx& obj_ctx, const rgw_raw_obj& obj); + + Pool get_pool(const rgw_pool& pool) { + return Pool(core_svc, pool); + } + + RGWSI_Zone *get_zone_svc(); +}; + +using RGWSysObj = RGWSI_SysObj::Obj; + +class RGWSysObjectCtx : public RGWSysObjectCtxBase +{ + RGWSI_SysObj *sysobj_svc; +public: + RGWSysObjectCtx(RGWSI_SysObj *_sysobj_svc) : sysobj_svc(_sysobj_svc) {} + + RGWSI_SysObj::Obj get_obj(const rgw_raw_obj& obj) { + return sysobj_svc->get_obj(*this, obj); + } +}; diff --git a/src/rgw/services/svc_sys_obj_cache.cc b/src/rgw/services/svc_sys_obj_cache.cc new file mode 100644 index 000000000..3358864a3 --- /dev/null +++ b/src/rgw/services/svc_sys_obj_cache.cc @@ -0,0 +1,643 @@ + +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "common/admin_socket.h" + +#include "svc_sys_obj_cache.h" +#include "svc_zone.h" +#include "svc_notify.h" + +#include "rgw/rgw_zone.h" +#include "rgw/rgw_tools.h" + +#define dout_subsys ceph_subsys_rgw + +class RGWSI_SysObj_Cache_CB : public RGWSI_Notify::CB +{ + RGWSI_SysObj_Cache *svc; +public: + RGWSI_SysObj_Cache_CB(RGWSI_SysObj_Cache *_svc) : svc(_svc) {} + int watch_cb(const DoutPrefixProvider *dpp, + uint64_t notify_id, + uint64_t cookie, + uint64_t notifier_id, + bufferlist& bl) { + return svc->watch_cb(dpp, notify_id, cookie, notifier_id, bl); + } + + void set_enabled(bool status) { + svc->set_enabled(status); + } +}; + +int RGWSI_SysObj_Cache::do_start(optional_yield y, const DoutPrefixProvider *dpp) +{ + int r = asocket.start(); + if (r < 0) { + return r; + } + + r = RGWSI_SysObj_Core::do_start(y, dpp); + if (r < 0) { + return r; + } + + r = notify_svc->start(y, dpp); + if (r < 0) { + return r; + } + + assert(notify_svc->is_started()); + + cb.reset(new RGWSI_SysObj_Cache_CB(this)); + + notify_svc->register_watch_cb(cb.get()); + + return 0; +} + +void RGWSI_SysObj_Cache::shutdown() +{ + asocket.shutdown(); + RGWSI_SysObj_Core::shutdown(); +} + +static string normal_name(rgw_pool& pool, const std::string& oid) { + std::string buf; + buf.reserve(pool.name.size() + pool.ns.size() + oid.size() + 2); + buf.append(pool.name).append("+").append(pool.ns).append("+").append(oid); + return buf; +} + +void RGWSI_SysObj_Cache::normalize_pool_and_obj(const rgw_pool& src_pool, const string& src_obj, rgw_pool& dst_pool, string& dst_obj) +{ + if (src_obj.size()) { + dst_pool = src_pool; + dst_obj = src_obj; + } else { + dst_pool = zone_svc->get_zone_params().domain_root; + dst_obj = src_pool.name; + } +} + + +int RGWSI_SysObj_Cache::remove(const DoutPrefixProvider *dpp, + RGWSysObjectCtxBase& obj_ctx, + RGWObjVersionTracker *objv_tracker, + const rgw_raw_obj& obj, + optional_yield y) + +{ + rgw_pool pool; + string oid; + normalize_pool_and_obj(obj.pool, obj.oid, pool, oid); + + string name = normal_name(pool, oid); + cache.remove(dpp, name); + + ObjectCacheInfo info; + int r = distribute_cache(dpp, name, obj, info, REMOVE_OBJ, y); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: " << __func__ << "(): failed to distribute cache: r=" << r << dendl; + } + + return RGWSI_SysObj_Core::remove(dpp, obj_ctx, objv_tracker, obj, y); +} + +int RGWSI_SysObj_Cache::read(const DoutPrefixProvider *dpp, + RGWSysObjectCtxBase& obj_ctx, + RGWSI_SysObj_Obj_GetObjState& read_state, + RGWObjVersionTracker *objv_tracker, + const rgw_raw_obj& obj, + bufferlist *obl, off_t ofs, off_t end, + map<string, bufferlist> *attrs, + bool raw_attrs, + rgw_cache_entry_info *cache_info, + boost::optional<obj_version> refresh_version, + optional_yield y) +{ + rgw_pool pool; + string oid; + if (ofs != 0) { + return RGWSI_SysObj_Core::read(dpp, obj_ctx, read_state, objv_tracker, + obj, obl, ofs, end, attrs, raw_attrs, + cache_info, refresh_version, y); + } + + normalize_pool_and_obj(obj.pool, obj.oid, pool, oid); + string name = normal_name(pool, oid); + + ObjectCacheInfo info; + + uint32_t flags = (end != 0 ? CACHE_FLAG_DATA : 0); + if (objv_tracker) + flags |= CACHE_FLAG_OBJV; + if (attrs) + flags |= CACHE_FLAG_XATTRS; + + int r = cache.get(dpp, name, info, flags, cache_info); + if (r == 0 && + (!refresh_version || !info.version.compare(&(*refresh_version)))) { + if (info.status < 0) + return info.status; + + bufferlist& bl = info.data; + + bufferlist::iterator i = bl.begin(); + + obl->clear(); + + i.copy_all(*obl); + if (objv_tracker) + objv_tracker->read_version = info.version; + if (attrs) { + if (raw_attrs) { + *attrs = info.xattrs; + } else { + rgw_filter_attrset(info.xattrs, RGW_ATTR_PREFIX, attrs); + } + } + return obl->length(); + } + if(r == -ENODATA) + return -ENOENT; + + map<string, bufferlist> unfiltered_attrset; + r = RGWSI_SysObj_Core::read(dpp, obj_ctx, read_state, objv_tracker, + obj, obl, ofs, end, + (attrs ? &unfiltered_attrset : nullptr), + true, /* cache unfiltered attrs */ + cache_info, + refresh_version, y); + if (r < 0) { + if (r == -ENOENT) { // only update ENOENT, we'd rather retry other errors + info.status = r; + cache.put(dpp, name, info, cache_info); + } + return r; + } + + if (obl->length() == end + 1) { + /* in this case, most likely object contains more data, we can't cache it */ + flags &= ~CACHE_FLAG_DATA; + } else { + bufferptr p(r); + bufferlist& bl = info.data; + bl.clear(); + bufferlist::iterator o = obl->begin(); + o.copy_all(bl); + } + + info.status = 0; + info.flags = flags; + if (objv_tracker) { + info.version = objv_tracker->read_version; + } + if (attrs) { + info.xattrs = std::move(unfiltered_attrset); + if (raw_attrs) { + *attrs = info.xattrs; + } else { + rgw_filter_attrset(info.xattrs, RGW_ATTR_PREFIX, attrs); + } + } + cache.put(dpp, name, info, cache_info); + return r; +} + +int RGWSI_SysObj_Cache::get_attr(const DoutPrefixProvider *dpp, + const rgw_raw_obj& obj, + const char *attr_name, + bufferlist *dest, + optional_yield y) +{ + rgw_pool pool; + string oid; + + normalize_pool_and_obj(obj.pool, obj.oid, pool, oid); + string name = normal_name(pool, oid); + + ObjectCacheInfo info; + + uint32_t flags = CACHE_FLAG_XATTRS; + + int r = cache.get(dpp, name, info, flags, nullptr); + if (r == 0) { + if (info.status < 0) + return info.status; + + auto iter = info.xattrs.find(attr_name); + if (iter == info.xattrs.end()) { + return -ENODATA; + } + + *dest = iter->second; + return dest->length(); + } else if (r == -ENODATA) { + return -ENOENT; + } + /* don't try to cache this one */ + return RGWSI_SysObj_Core::get_attr(dpp, obj, attr_name, dest, y); +} + +int RGWSI_SysObj_Cache::set_attrs(const DoutPrefixProvider *dpp, + const rgw_raw_obj& obj, + map<string, bufferlist>& attrs, + map<string, bufferlist> *rmattrs, + RGWObjVersionTracker *objv_tracker, + optional_yield y) +{ + rgw_pool pool; + string oid; + normalize_pool_and_obj(obj.pool, obj.oid, pool, oid); + ObjectCacheInfo info; + info.xattrs = attrs; + if (rmattrs) { + info.rm_xattrs = *rmattrs; + } + info.status = 0; + info.flags = CACHE_FLAG_MODIFY_XATTRS; + int ret = RGWSI_SysObj_Core::set_attrs(dpp, obj, attrs, rmattrs, objv_tracker, y); + string name = normal_name(pool, oid); + if (ret >= 0) { + if (objv_tracker && objv_tracker->read_version.ver) { + info.version = objv_tracker->read_version; + info.flags |= CACHE_FLAG_OBJV; + } + cache.put(dpp, name, info, NULL); + int r = distribute_cache(dpp, name, obj, info, UPDATE_OBJ, y); + if (r < 0) + ldpp_dout(dpp, 0) << "ERROR: failed to distribute cache for " << obj << dendl; + } else { + cache.remove(dpp, name); + } + + return ret; +} + +int RGWSI_SysObj_Cache::write(const DoutPrefixProvider *dpp, + const rgw_raw_obj& obj, + real_time *pmtime, + map<std::string, bufferlist>& attrs, + bool exclusive, + const bufferlist& data, + RGWObjVersionTracker *objv_tracker, + real_time set_mtime, + optional_yield y) +{ + rgw_pool pool; + string oid; + normalize_pool_and_obj(obj.pool, obj.oid, pool, oid); + ObjectCacheInfo info; + info.xattrs = attrs; + info.status = 0; + info.data = data; + info.flags = CACHE_FLAG_XATTRS | CACHE_FLAG_DATA | CACHE_FLAG_META; + ceph::real_time result_mtime; + int ret = RGWSI_SysObj_Core::write(dpp, obj, &result_mtime, attrs, + exclusive, data, + objv_tracker, set_mtime, y); + if (pmtime) { + *pmtime = result_mtime; + } + if (objv_tracker && objv_tracker->read_version.ver) { + info.version = objv_tracker->read_version; + info.flags |= CACHE_FLAG_OBJV; + } + info.meta.mtime = result_mtime; + info.meta.size = data.length(); + string name = normal_name(pool, oid); + if (ret >= 0) { + cache.put(dpp, name, info, NULL); + int r = distribute_cache(dpp, name, obj, info, UPDATE_OBJ, y); + if (r < 0) + ldpp_dout(dpp, 0) << "ERROR: failed to distribute cache for " << obj << dendl; + } else { + cache.remove(dpp, name); + } + + return ret; +} + +int RGWSI_SysObj_Cache::write_data(const DoutPrefixProvider *dpp, + const rgw_raw_obj& obj, + const bufferlist& data, + bool exclusive, + RGWObjVersionTracker *objv_tracker, + optional_yield y) +{ + rgw_pool pool; + string oid; + normalize_pool_and_obj(obj.pool, obj.oid, pool, oid); + + ObjectCacheInfo info; + info.data = data; + info.meta.size = data.length(); + info.status = 0; + info.flags = CACHE_FLAG_DATA; + + int ret = RGWSI_SysObj_Core::write_data(dpp, obj, data, exclusive, objv_tracker, y); + string name = normal_name(pool, oid); + if (ret >= 0) { + if (objv_tracker && objv_tracker->read_version.ver) { + info.version = objv_tracker->read_version; + info.flags |= CACHE_FLAG_OBJV; + } + cache.put(dpp, name, info, NULL); + int r = distribute_cache(dpp, name, obj, info, UPDATE_OBJ, y); + if (r < 0) + ldpp_dout(dpp, 0) << "ERROR: failed to distribute cache for " << obj << dendl; + } else { + cache.remove(dpp, name); + } + + return ret; +} + +int RGWSI_SysObj_Cache::raw_stat(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, uint64_t *psize, real_time *pmtime, uint64_t *pepoch, + map<string, bufferlist> *attrs, bufferlist *first_chunk, + RGWObjVersionTracker *objv_tracker, + optional_yield y) +{ + rgw_pool pool; + string oid; + normalize_pool_and_obj(obj.pool, obj.oid, pool, oid); + + string name = normal_name(pool, oid); + + uint64_t size; + real_time mtime; + uint64_t epoch; + + ObjectCacheInfo info; + uint32_t flags = CACHE_FLAG_META | CACHE_FLAG_XATTRS; + if (objv_tracker) + flags |= CACHE_FLAG_OBJV; + int r = cache.get(dpp, name, info, flags, NULL); + if (r == 0) { + if (info.status < 0) + return info.status; + + size = info.meta.size; + mtime = info.meta.mtime; + epoch = info.epoch; + if (objv_tracker) + objv_tracker->read_version = info.version; + goto done; + } + if (r == -ENODATA) { + return -ENOENT; + } + r = RGWSI_SysObj_Core::raw_stat(dpp, obj, &size, &mtime, &epoch, &info.xattrs, + first_chunk, objv_tracker, y); + if (r < 0) { + if (r == -ENOENT) { + info.status = r; + cache.put(dpp, name, info, NULL); + } + return r; + } + info.status = 0; + info.epoch = epoch; + info.meta.mtime = mtime; + info.meta.size = size; + info.flags = CACHE_FLAG_META | CACHE_FLAG_XATTRS; + if (objv_tracker) { + info.flags |= CACHE_FLAG_OBJV; + info.version = objv_tracker->read_version; + } + cache.put(dpp, name, info, NULL); +done: + if (psize) + *psize = size; + if (pmtime) + *pmtime = mtime; + if (pepoch) + *pepoch = epoch; + if (attrs) + *attrs = info.xattrs; + return 0; +} + +int RGWSI_SysObj_Cache::distribute_cache(const DoutPrefixProvider *dpp, + const string& normal_name, + const rgw_raw_obj& obj, + ObjectCacheInfo& obj_info, int op, + optional_yield y) +{ + RGWCacheNotifyInfo info; + info.op = op; + info.obj_info = obj_info; + info.obj = obj; + return notify_svc->distribute(dpp, normal_name, info, y); +} + +int RGWSI_SysObj_Cache::watch_cb(const DoutPrefixProvider *dpp, + uint64_t notify_id, + uint64_t cookie, + uint64_t notifier_id, + bufferlist& bl) +{ + RGWCacheNotifyInfo info; + + try { + auto iter = bl.cbegin(); + decode(info, iter); + } catch (buffer::end_of_buffer& err) { + ldout(cct, 0) << "ERROR: got bad notification" << dendl; + return -EIO; + } catch (buffer::error& err) { + ldout(cct, 0) << "ERROR: buffer::error" << dendl; + return -EIO; + } + + rgw_pool pool; + string oid; + normalize_pool_and_obj(info.obj.pool, info.obj.oid, pool, oid); + string name = normal_name(pool, oid); + + switch (info.op) { + case UPDATE_OBJ: + cache.put(dpp, name, info.obj_info, NULL); + break; + case REMOVE_OBJ: + cache.remove(dpp, name); + break; + default: + ldout(cct, 0) << "WARNING: got unknown notification op: " << info.op << dendl; + return -EINVAL; + } + + return 0; +} + +void RGWSI_SysObj_Cache::set_enabled(bool status) +{ + cache.set_enabled(status); +} + +bool RGWSI_SysObj_Cache::chain_cache_entry(const DoutPrefixProvider *dpp, + std::initializer_list<rgw_cache_entry_info *> cache_info_entries, + RGWChainedCache::Entry *chained_entry) +{ + return cache.chain_cache_entry(dpp, cache_info_entries, chained_entry); +} + +void RGWSI_SysObj_Cache::register_chained_cache(RGWChainedCache *cc) +{ + cache.chain_cache(cc); +} + +void RGWSI_SysObj_Cache::unregister_chained_cache(RGWChainedCache *cc) +{ + cache.unchain_cache(cc); +} + +static void cache_list_dump_helper(Formatter* f, + const std::string& name, + const ceph::real_time mtime, + const std::uint64_t size) +{ + f->dump_string("name", name); + f->dump_string("mtime", ceph::to_iso_8601(mtime)); + f->dump_unsigned("size", size); +} + +class RGWSI_SysObj_Cache_ASocketHook : public AdminSocketHook { + RGWSI_SysObj_Cache *svc; + + static constexpr std::string_view admin_commands[][2] = { + { "cache list name=filter,type=CephString,req=false", + "cache list [filter_str]: list object cache, possibly matching substrings" }, + { "cache inspect name=target,type=CephString,req=true", + "cache inspect target: print cache element" }, + { "cache erase name=target,type=CephString,req=true", + "cache erase target: erase element from cache" }, + { "cache zap", + "cache zap: erase all elements from cache" } + }; + +public: + RGWSI_SysObj_Cache_ASocketHook(RGWSI_SysObj_Cache *_svc) : svc(_svc) {} + + int start(); + void shutdown(); + + int call(std::string_view command, const cmdmap_t& cmdmap, + Formatter *f, + std::ostream& ss, + bufferlist& out) override; +}; + +int RGWSI_SysObj_Cache_ASocketHook::start() +{ + auto admin_socket = svc->ctx()->get_admin_socket(); + for (auto cmd : admin_commands) { + int r = admin_socket->register_command(cmd[0], this, cmd[1]); + if (r < 0) { + ldout(svc->ctx(), 0) << "ERROR: fail to register admin socket command (r=" << r + << ")" << dendl; + return r; + } + } + return 0; +} + +void RGWSI_SysObj_Cache_ASocketHook::shutdown() +{ + auto admin_socket = svc->ctx()->get_admin_socket(); + admin_socket->unregister_commands(this); +} + +int RGWSI_SysObj_Cache_ASocketHook::call( + std::string_view command, const cmdmap_t& cmdmap, + Formatter *f, + std::ostream& ss, + bufferlist& out) +{ + if (command == "cache list"sv) { + std::optional<std::string> filter; + if (auto i = cmdmap.find("filter"); i != cmdmap.cend()) { + filter = boost::get<std::string>(i->second); + } + f->open_array_section("cache_entries"); + svc->asocket.call_list(filter, f); + f->close_section(); + return 0; + } else if (command == "cache inspect"sv) { + const auto& target = boost::get<std::string>(cmdmap.at("target")); + if (svc->asocket.call_inspect(target, f)) { + return 0; + } else { + ss << "Unable to find entry "s + target + ".\n"; + return -ENOENT; + } + } else if (command == "cache erase"sv) { + const auto& target = boost::get<std::string>(cmdmap.at("target")); + if (svc->asocket.call_erase(target)) { + return 0; + } else { + ss << "Unable to find entry "s + target + ".\n"; + return -ENOENT; + } + } else if (command == "cache zap"sv) { + svc->asocket.call_zap(); + return 0; + } + return -ENOSYS; +} + +RGWSI_SysObj_Cache::ASocketHandler::ASocketHandler(const DoutPrefixProvider *_dpp, RGWSI_SysObj_Cache *_svc) : dpp(_dpp), svc(_svc) +{ + hook.reset(new RGWSI_SysObj_Cache_ASocketHook(_svc)); +} + +RGWSI_SysObj_Cache::ASocketHandler::~ASocketHandler() +{ +} + +int RGWSI_SysObj_Cache::ASocketHandler::start() +{ + return hook->start(); +} + +void RGWSI_SysObj_Cache::ASocketHandler::shutdown() +{ + return hook->shutdown(); +} + +void RGWSI_SysObj_Cache::ASocketHandler::call_list(const std::optional<std::string>& filter, Formatter* f) +{ + svc->cache.for_each( + [&filter, f] (const string& name, const ObjectCacheEntry& entry) { + if (!filter || name.find(*filter) != name.npos) { + cache_list_dump_helper(f, name, entry.info.meta.mtime, + entry.info.meta.size); + } + }); +} + +int RGWSI_SysObj_Cache::ASocketHandler::call_inspect(const std::string& target, Formatter* f) +{ + if (const auto entry = svc->cache.get(dpp, target)) { + f->open_object_section("cache_entry"); + f->dump_string("name", target.c_str()); + entry->dump(f); + f->close_section(); + return true; + } else { + return false; + } +} + +int RGWSI_SysObj_Cache::ASocketHandler::call_erase(const std::string& target) +{ + return svc->cache.remove(dpp, target); +} + +int RGWSI_SysObj_Cache::ASocketHandler::call_zap() +{ + svc->cache.invalidate_all(); + return 0; +} diff --git a/src/rgw/services/svc_sys_obj_cache.h b/src/rgw/services/svc_sys_obj_cache.h new file mode 100644 index 000000000..af95c06f4 --- /dev/null +++ b/src/rgw/services/svc_sys_obj_cache.h @@ -0,0 +1,221 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#pragma once + +#include "rgw/rgw_service.h" +#include "rgw/rgw_cache.h" + +#include "svc_sys_obj_core.h" + +class RGWSI_Notify; + +class RGWSI_SysObj_Cache_CB; +class RGWSI_SysObj_Cache_ASocketHook; + +class RGWSI_SysObj_Cache : public RGWSI_SysObj_Core +{ + friend class RGWSI_SysObj_Cache_CB; + friend class RGWServices_Def; + friend class ASocketHandler; + + RGWSI_Notify *notify_svc{nullptr}; + ObjectCache cache; + + std::shared_ptr<RGWSI_SysObj_Cache_CB> cb; + + void normalize_pool_and_obj(const rgw_pool& src_pool, const string& src_obj, rgw_pool& dst_pool, string& dst_obj); +protected: + void init(RGWSI_RADOS *_rados_svc, + RGWSI_Zone *_zone_svc, + RGWSI_Notify *_notify_svc) { + core_init(_rados_svc, _zone_svc); + notify_svc = _notify_svc; + } + + int do_start(optional_yield, const DoutPrefixProvider *dpp) override; + void shutdown() override; + + int raw_stat(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, uint64_t *psize, real_time *pmtime, uint64_t *epoch, + map<string, bufferlist> *attrs, bufferlist *first_chunk, + RGWObjVersionTracker *objv_tracker, + optional_yield y) override; + + int read(const DoutPrefixProvider *dpp, + RGWSysObjectCtxBase& obj_ctx, + RGWSI_SysObj_Obj_GetObjState& read_state, + RGWObjVersionTracker *objv_tracker, + const rgw_raw_obj& obj, + bufferlist *bl, off_t ofs, off_t end, + map<string, bufferlist> *attrs, + bool raw_attrs, + rgw_cache_entry_info *cache_info, + boost::optional<obj_version>, + optional_yield y) override; + + int get_attr(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, const char *name, bufferlist *dest, + optional_yield y) override; + + int set_attrs(const DoutPrefixProvider *dpp, + const rgw_raw_obj& obj, + map<string, bufferlist>& attrs, + map<string, bufferlist> *rmattrs, + RGWObjVersionTracker *objv_tracker, + optional_yield y); + + int remove(const DoutPrefixProvider *dpp, + RGWSysObjectCtxBase& obj_ctx, + RGWObjVersionTracker *objv_tracker, + const rgw_raw_obj& obj, + optional_yield y) override; + + int write(const DoutPrefixProvider *dpp, + const rgw_raw_obj& obj, + real_time *pmtime, + map<std::string, bufferlist>& attrs, + bool exclusive, + const bufferlist& data, + RGWObjVersionTracker *objv_tracker, + real_time set_mtime, + optional_yield y) override; + + int write_data(const DoutPrefixProvider *dpp, + const rgw_raw_obj& obj, + const bufferlist& bl, + bool exclusive, + RGWObjVersionTracker *objv_tracker, + optional_yield y); + + int distribute_cache(const DoutPrefixProvider *dpp, const string& normal_name, const rgw_raw_obj& obj, + ObjectCacheInfo& obj_info, int op, + optional_yield y); + + int watch_cb(const DoutPrefixProvider *dpp, + uint64_t notify_id, + uint64_t cookie, + uint64_t notifier_id, + bufferlist& bl); + + void set_enabled(bool status); + +public: + RGWSI_SysObj_Cache(const DoutPrefixProvider *dpp, CephContext *cct) : RGWSI_SysObj_Core(cct), asocket(dpp, this) { + cache.set_ctx(cct); + } + + bool chain_cache_entry(const DoutPrefixProvider *dpp, + std::initializer_list<rgw_cache_entry_info *> cache_info_entries, + RGWChainedCache::Entry *chained_entry); + void register_chained_cache(RGWChainedCache *cc); + void unregister_chained_cache(RGWChainedCache *cc); + + class ASocketHandler { + const DoutPrefixProvider *dpp; + RGWSI_SysObj_Cache *svc; + + std::unique_ptr<RGWSI_SysObj_Cache_ASocketHook> hook; + + public: + ASocketHandler(const DoutPrefixProvider *dpp, RGWSI_SysObj_Cache *_svc); + ~ASocketHandler(); + + int start(); + void shutdown(); + + // `call_list` must iterate over all cache entries and call + // `cache_list_dump_helper` with the supplied Formatter on any that + // include `filter` as a substring. + // + void call_list(const std::optional<std::string>& filter, Formatter* f); + + // `call_inspect` must look up the requested target and, if found, + // dump it to the supplied Formatter and return true. If not found, + // it must return false. + // + int call_inspect(const std::string& target, Formatter* f); + + // `call_erase` must erase the requested target and return true. If + // the requested target does not exist, it should return false. + int call_erase(const std::string& target); + + // `call_zap` must erase the cache. + int call_zap(); + } asocket; +}; + +template <class T> +class RGWChainedCacheImpl : public RGWChainedCache { + RGWSI_SysObj_Cache *svc{nullptr}; + ceph::timespan expiry; + RWLock lock; + + std::unordered_map<std::string, std::pair<T, ceph::coarse_mono_time>> entries; + +public: + RGWChainedCacheImpl() : lock("RGWChainedCacheImpl::lock") {} + ~RGWChainedCacheImpl() { + if (!svc) { + return; + } + svc->unregister_chained_cache(this); + } + + void unregistered() override { + svc = nullptr; + } + + void init(RGWSI_SysObj_Cache *_svc) { + if (!_svc) { + return; + } + svc = _svc; + svc->register_chained_cache(this); + expiry = std::chrono::seconds(svc->ctx()->_conf.get_val<uint64_t>( + "rgw_cache_expiry_interval")); + } + + boost::optional<T> find(const string& key) { + std::shared_lock rl{lock}; + auto iter = entries.find(key); + if (iter == entries.end()) { + return boost::none; + } + if (expiry.count() && + (ceph::coarse_mono_clock::now() - iter->second.second) > expiry) { + return boost::none; + } + + return iter->second.first; + } + + bool put(const DoutPrefixProvider *dpp, RGWSI_SysObj_Cache *svc, const string& key, T *entry, + std::initializer_list<rgw_cache_entry_info *> cache_info_entries) { + if (!svc) { + return false; + } + + Entry chain_entry(this, key, entry); + + /* we need the svc cache to call us under its lock to maintain lock ordering */ + return svc->chain_cache_entry(dpp, cache_info_entries, &chain_entry); + } + + void chain_cb(const string& key, void *data) override { + T *entry = static_cast<T *>(data); + std::unique_lock wl{lock}; + entries[key].first = *entry; + if (expiry.count() > 0) { + entries[key].second = ceph::coarse_mono_clock::now(); + } + } + + void invalidate(const string& key) override { + std::unique_lock wl{lock}; + entries.erase(key); + } + + void invalidate_all() override { + std::unique_lock wl{lock}; + entries.clear(); + } +}; /* RGWChainedCacheImpl */ diff --git a/src/rgw/services/svc_sys_obj_core.cc b/src/rgw/services/svc_sys_obj_core.cc new file mode 100644 index 000000000..2e194dfee --- /dev/null +++ b/src/rgw/services/svc_sys_obj_core.cc @@ -0,0 +1,724 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "svc_sys_obj_core.h" +#include "svc_rados.h" +#include "svc_zone.h" + +#include "rgw/rgw_tools.h" + +#define dout_subsys ceph_subsys_rgw + +int RGWSI_SysObj_Core_GetObjState::get_rados_obj(const DoutPrefixProvider *dpp, + RGWSI_RADOS *rados_svc, + RGWSI_Zone *zone_svc, + const rgw_raw_obj& obj, + RGWSI_RADOS::Obj **pobj) +{ + if (!has_rados_obj) { + if (obj.oid.empty()) { + ldpp_dout(dpp, 0) << "ERROR: obj.oid is empty" << dendl; + return -EINVAL; + } + + rados_obj = rados_svc->obj(obj); + int r = rados_obj.open(dpp); + if (r < 0) { + return r; + } + has_rados_obj = true; + } + *pobj = &rados_obj; + return 0; +} + +int RGWSI_SysObj_Core::get_rados_obj(const DoutPrefixProvider *dpp, + RGWSI_Zone *zone_svc, + const rgw_raw_obj& obj, + RGWSI_RADOS::Obj *pobj) +{ + if (obj.oid.empty()) { + ldpp_dout(dpp, 0) << "ERROR: obj.oid is empty" << dendl; + return -EINVAL; + } + + *pobj = rados_svc->obj(obj); + int r = pobj->open(dpp); + if (r < 0) { + return r; + } + + return 0; +} + +int RGWSI_SysObj_Core::get_system_obj_state_impl(RGWSysObjectCtxBase *rctx, + const rgw_raw_obj& obj, + RGWSysObjState **state, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + if (obj.empty()) { + return -EINVAL; + } + + RGWSysObjState *s = rctx->get_state(obj); + ldpp_dout(dpp, 20) << "get_system_obj_state: rctx=" << (void *)rctx << " obj=" << obj << " state=" << (void *)s << " s->prefetch_data=" << s->prefetch_data << dendl; + *state = s; + if (s->has_attrs) { + return 0; + } + + s->obj = obj; + + int r = raw_stat(dpp, obj, &s->size, &s->mtime, &s->epoch, &s->attrset, + (s->prefetch_data ? &s->data : nullptr), objv_tracker, y); + if (r == -ENOENT) { + s->exists = false; + s->has_attrs = true; + s->mtime = real_time(); + return 0; + } + if (r < 0) + return r; + + s->exists = true; + s->has_attrs = true; + s->obj_tag = s->attrset[RGW_ATTR_ID_TAG]; + + if (s->obj_tag.length()) { + ldpp_dout(dpp, 20) << "get_system_obj_state: setting s->obj_tag to " << s->obj_tag.c_str() << dendl; + } else { + ldpp_dout(dpp, 20) << "get_system_obj_state: s->obj_tag was set empty" << dendl; + } + + return 0; +} + +int RGWSI_SysObj_Core::get_system_obj_state(RGWSysObjectCtxBase *rctx, + const rgw_raw_obj& obj, + RGWSysObjState **state, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + int ret; + + do { + ret = get_system_obj_state_impl(rctx, obj, state, objv_tracker, y, dpp); + } while (ret == -EAGAIN); + + return ret; +} + +int RGWSI_SysObj_Core::raw_stat(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, uint64_t *psize, real_time *pmtime, uint64_t *epoch, + map<string, bufferlist> *attrs, bufferlist *first_chunk, + RGWObjVersionTracker *objv_tracker, + optional_yield y) +{ + RGWSI_RADOS::Obj rados_obj; + int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); + if (r < 0) { + return r; + } + + uint64_t size = 0; + struct timespec mtime_ts; + + librados::ObjectReadOperation op; + if (objv_tracker) { + objv_tracker->prepare_op_for_read(&op); + } + op.getxattrs(attrs, nullptr); + if (psize || pmtime) { + op.stat2(&size, &mtime_ts, nullptr); + } + if (first_chunk) { + op.read(0, cct->_conf->rgw_max_chunk_size, first_chunk, nullptr); + } + bufferlist outbl; + r = rados_obj.operate(dpp, &op, &outbl, y); + + if (epoch) { + *epoch = rados_obj.get_last_version(); + } + + if (r < 0) + return r; + + if (psize) + *psize = size; + if (pmtime) + *pmtime = ceph::real_clock::from_timespec(mtime_ts); + + return 0; +} + +int RGWSI_SysObj_Core::stat(RGWSysObjectCtxBase& obj_ctx, + RGWSI_SysObj_Obj_GetObjState& _state, + const rgw_raw_obj& obj, + map<string, bufferlist> *attrs, + bool raw_attrs, + real_time *lastmod, + uint64_t *obj_size, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + RGWSysObjState *astate = nullptr; + + int r = get_system_obj_state(&obj_ctx, obj, &astate, objv_tracker, y, dpp); + if (r < 0) + return r; + + if (!astate->exists) { + return -ENOENT; + } + + if (attrs) { + if (raw_attrs) { + *attrs = astate->attrset; + } else { + rgw_filter_attrset(astate->attrset, RGW_ATTR_PREFIX, attrs); + } + if (cct->_conf->subsys.should_gather<ceph_subsys_rgw, 20>()) { + map<string, bufferlist>::iterator iter; + for (iter = attrs->begin(); iter != attrs->end(); ++iter) { + ldpp_dout(dpp, 20) << "Read xattr: " << iter->first << dendl; + } + } + } + + if (obj_size) + *obj_size = astate->size; + if (lastmod) + *lastmod = astate->mtime; + + return 0; +} + +int RGWSI_SysObj_Core::read(const DoutPrefixProvider *dpp, + RGWSysObjectCtxBase& obj_ctx, + RGWSI_SysObj_Obj_GetObjState& _read_state, + RGWObjVersionTracker *objv_tracker, + const rgw_raw_obj& obj, + bufferlist *bl, off_t ofs, off_t end, + map<string, bufferlist> *attrs, + bool raw_attrs, + rgw_cache_entry_info *cache_info, + boost::optional<obj_version>, + optional_yield y) +{ + auto& read_state = static_cast<GetObjState&>(_read_state); + + uint64_t len; + librados::ObjectReadOperation op; + + if (end < 0) + len = 0; + else + len = end - ofs + 1; + + if (objv_tracker) { + objv_tracker->prepare_op_for_read(&op); + } + + ldpp_dout(dpp, 20) << "rados->read ofs=" << ofs << " len=" << len << dendl; + op.read(ofs, len, bl, nullptr); + + map<string, bufferlist> unfiltered_attrset; + + if (attrs) { + if (raw_attrs) { + op.getxattrs(attrs, nullptr); + } else { + op.getxattrs(&unfiltered_attrset, nullptr); + } + } + + RGWSI_RADOS::Obj rados_obj; + int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); + if (r < 0) { + ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; + return r; + } + r = rados_obj.operate(dpp, &op, nullptr, y); + if (r < 0) { + ldpp_dout(dpp, 20) << "rados_obj.operate() r=" << r << " bl.length=" << bl->length() << dendl; + return r; + } + ldpp_dout(dpp, 20) << "rados_obj.operate() r=" << r << " bl.length=" << bl->length() << dendl; + + uint64_t op_ver = rados_obj.get_last_version(); + + if (read_state.last_ver > 0 && + read_state.last_ver != op_ver) { + ldpp_dout(dpp, 5) << "raced with an object write, abort" << dendl; + return -ECANCELED; + } + + if (attrs && !raw_attrs) { + rgw_filter_attrset(unfiltered_attrset, RGW_ATTR_PREFIX, attrs); + } + + read_state.last_ver = op_ver; + + return bl->length(); +} + +/** + * Get an attribute for a system object. + * obj: the object to get attr + * name: name of the attr to retrieve + * dest: bufferlist to store the result in + * Returns: 0 on success, -ERR# otherwise. + */ +int RGWSI_SysObj_Core::get_attr(const DoutPrefixProvider *dpp, + const rgw_raw_obj& obj, + const char *name, + bufferlist *dest, + optional_yield y) +{ + RGWSI_RADOS::Obj rados_obj; + int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); + if (r < 0) { + ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; + return r; + } + + librados::ObjectReadOperation op; + + int rval; + op.getxattr(name, dest, &rval); + + r = rados_obj.operate(dpp, &op, nullptr, y); + if (r < 0) + return r; + + return 0; +} + +int RGWSI_SysObj_Core::set_attrs(const DoutPrefixProvider *dpp, + const rgw_raw_obj& obj, + map<string, bufferlist>& attrs, + map<string, bufferlist> *rmattrs, + RGWObjVersionTracker *objv_tracker, + optional_yield y) +{ + RGWSI_RADOS::Obj rados_obj; + int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); + if (r < 0) { + ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; + return r; + } + + librados::ObjectWriteOperation op; + + if (objv_tracker) { + objv_tracker->prepare_op_for_write(&op); + } + + map<string, bufferlist>::iterator iter; + if (rmattrs) { + for (iter = rmattrs->begin(); iter != rmattrs->end(); ++iter) { + const string& name = iter->first; + op.rmxattr(name.c_str()); + } + } + + for (iter = attrs.begin(); iter != attrs.end(); ++iter) { + const string& name = iter->first; + bufferlist& bl = iter->second; + + if (!bl.length()) + continue; + + op.setxattr(name.c_str(), bl); + } + + if (!op.size()) + return 0; + + bufferlist bl; + + r = rados_obj.operate(dpp, &op, y); + if (r < 0) + return r; + + if (objv_tracker) { + objv_tracker->apply_write(); + } + return 0; +} + +int RGWSI_SysObj_Core::omap_get_vals(const DoutPrefixProvider *dpp, + const rgw_raw_obj& obj, + const string& marker, + uint64_t count, + std::map<string, bufferlist> *m, + bool *pmore, + optional_yield y) +{ + RGWSI_RADOS::Obj rados_obj; + int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); + if (r < 0) { + ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; + return r; + } + + string start_after = marker; + bool more; + + do { + librados::ObjectReadOperation op; + + std::map<string, bufferlist> t; + int rval; + op.omap_get_vals2(start_after, count, &t, &more, &rval); + + r = rados_obj.operate(dpp, &op, nullptr, y); + if (r < 0) { + return r; + } + if (t.empty()) { + break; + } + count -= t.size(); + start_after = t.rbegin()->first; + m->insert(t.begin(), t.end()); + } while (more && count > 0); + + if (pmore) { + *pmore = more; + } + return 0; +} + +int RGWSI_SysObj_Core::omap_get_all(const DoutPrefixProvider *dpp, + const rgw_raw_obj& obj, + std::map<string, bufferlist> *m, + optional_yield y) +{ + RGWSI_RADOS::Obj rados_obj; + int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); + if (r < 0) { + ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; + return r; + } + +#define MAX_OMAP_GET_ENTRIES 1024 + const int count = MAX_OMAP_GET_ENTRIES; + string start_after; + bool more; + + do { + librados::ObjectReadOperation op; + + std::map<string, bufferlist> t; + int rval; + op.omap_get_vals2(start_after, count, &t, &more, &rval); + + r = rados_obj.operate(dpp, &op, nullptr, y); + if (r < 0) { + return r; + } + if (t.empty()) { + break; + } + start_after = t.rbegin()->first; + m->insert(t.begin(), t.end()); + } while (more); + return 0; +} + +int RGWSI_SysObj_Core::omap_set(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, const std::string& key, + bufferlist& bl, bool must_exist, + optional_yield y) +{ + RGWSI_RADOS::Obj rados_obj; + int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); + if (r < 0) { + ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; + return r; + } + + ldpp_dout(dpp, 15) << "omap_set obj=" << obj << " key=" << key << dendl; + + map<string, bufferlist> m; + m[key] = bl; + librados::ObjectWriteOperation op; + if (must_exist) + op.assert_exists(); + op.omap_set(m); + r = rados_obj.operate(dpp, &op, y); + return r; +} + +int RGWSI_SysObj_Core::omap_set(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, + const std::map<std::string, bufferlist>& m, + bool must_exist, optional_yield y) +{ + RGWSI_RADOS::Obj rados_obj; + int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); + if (r < 0) { + ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; + return r; + } + + librados::ObjectWriteOperation op; + if (must_exist) + op.assert_exists(); + op.omap_set(m); + r = rados_obj.operate(dpp, &op, y); + return r; +} + +int RGWSI_SysObj_Core::omap_del(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, const std::string& key, + optional_yield y) +{ + RGWSI_RADOS::Obj rados_obj; + int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); + if (r < 0) { + ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; + return r; + } + + set<string> k; + k.insert(key); + + librados::ObjectWriteOperation op; + + op.omap_rm_keys(k); + + r = rados_obj.operate(dpp, &op, y); + return r; +} + +int RGWSI_SysObj_Core::notify(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, bufferlist& bl, + uint64_t timeout_ms, bufferlist *pbl, + optional_yield y) +{ + RGWSI_RADOS::Obj rados_obj; + int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); + if (r < 0) { + ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; + return r; + } + + r = rados_obj.notify(dpp, bl, timeout_ms, pbl, y); + return r; +} + +int RGWSI_SysObj_Core::remove(const DoutPrefixProvider *dpp, + RGWSysObjectCtxBase& obj_ctx, + RGWObjVersionTracker *objv_tracker, + const rgw_raw_obj& obj, + optional_yield y) +{ + RGWSI_RADOS::Obj rados_obj; + int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); + if (r < 0) { + ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; + return r; + } + + librados::ObjectWriteOperation op; + + if (objv_tracker) { + objv_tracker->prepare_op_for_write(&op); + } + + op.remove(); + r = rados_obj.operate(dpp, &op, y); + if (r < 0) + return r; + + return 0; +} + +int RGWSI_SysObj_Core::write(const DoutPrefixProvider *dpp, + const rgw_raw_obj& obj, + real_time *pmtime, + map<std::string, bufferlist>& attrs, + bool exclusive, + const bufferlist& data, + RGWObjVersionTracker *objv_tracker, + real_time set_mtime, + optional_yield y) +{ + RGWSI_RADOS::Obj rados_obj; + int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); + if (r < 0) { + ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; + return r; + } + + librados::ObjectWriteOperation op; + + if (exclusive) { + op.create(true); // exclusive create + } else { + op.remove(); + op.set_op_flags2(LIBRADOS_OP_FLAG_FAILOK); + op.create(false); + } + + if (objv_tracker) { + objv_tracker->prepare_op_for_write(&op); + } + + if (real_clock::is_zero(set_mtime)) { + set_mtime = real_clock::now(); + } + + struct timespec mtime_ts = real_clock::to_timespec(set_mtime); + op.mtime2(&mtime_ts); + op.write_full(data); + + bufferlist acl_bl; + + for (map<string, bufferlist>::iterator iter = attrs.begin(); iter != attrs.end(); ++iter) { + const string& name = iter->first; + bufferlist& bl = iter->second; + + if (!bl.length()) + continue; + + op.setxattr(name.c_str(), bl); + } + + r = rados_obj.operate(dpp, &op, y); + if (r < 0) { + return r; + } + + if (objv_tracker) { + objv_tracker->apply_write(); + } + + if (pmtime) { + *pmtime = set_mtime; + } + + return 0; +} + + +int RGWSI_SysObj_Core::write_data(const DoutPrefixProvider *dpp, + const rgw_raw_obj& obj, + const bufferlist& bl, + bool exclusive, + RGWObjVersionTracker *objv_tracker, + optional_yield y) +{ + RGWSI_RADOS::Obj rados_obj; + int r = get_rados_obj(dpp, zone_svc, obj, &rados_obj); + if (r < 0) { + ldpp_dout(dpp, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl; + return r; + } + + librados::ObjectWriteOperation op; + + if (exclusive) { + op.create(true); + } + + if (objv_tracker) { + objv_tracker->prepare_op_for_write(&op); + } + op.write_full(bl); + r = rados_obj.operate(dpp, &op, y); + if (r < 0) + return r; + + if (objv_tracker) { + objv_tracker->apply_write(); + } + return 0; +} + +int RGWSI_SysObj_Core::pool_list_prefixed_objs(const DoutPrefixProvider *dpp, + const rgw_pool& pool, const string& prefix, + std::function<void(const string&)> cb) +{ + bool is_truncated; + + auto rados_pool = rados_svc->pool(pool); + + auto op = rados_pool.op(); + + RGWAccessListFilterPrefix filter(prefix); + + int r = op.init(dpp, string(), &filter); + if (r < 0) { + return r; + } + + do { + vector<string> oids; +#define MAX_OBJS_DEFAULT 1000 + int r = op.get_next(MAX_OBJS_DEFAULT, &oids, &is_truncated); + if (r < 0) { + return r; + } + for (auto& val : oids) { + if (val.size() > prefix.size()) { + cb(val.substr(prefix.size())); + } + } + } while (is_truncated); + + return 0; +} + +int RGWSI_SysObj_Core::pool_list_objects_init(const DoutPrefixProvider *dpp, + const rgw_pool& pool, + const string& marker, + const string& prefix, + RGWSI_SysObj::Pool::ListCtx *_ctx) +{ + _ctx->impl.emplace<PoolListImplInfo>(prefix); + + auto& ctx = static_cast<PoolListImplInfo&>(*_ctx->impl); + + ctx.pool = rados_svc->pool(pool); + ctx.op = ctx.pool.op(); + + int r = ctx.op.init(dpp, marker, &ctx.filter); + if (r < 0) { + ldpp_dout(dpp, 10) << "failed to list objects pool_iterate_begin() returned r=" << r << dendl; + return r; + } + return 0; +} + +int RGWSI_SysObj_Core::pool_list_objects_next(RGWSI_SysObj::Pool::ListCtx& _ctx, + int max, + vector<string> *oids, + bool *is_truncated) +{ + if (!_ctx.impl) { + return -EINVAL; + } + auto& ctx = static_cast<PoolListImplInfo&>(*_ctx.impl); + int r = ctx.op.get_next(max, oids, is_truncated); + if (r < 0) { + if(r != -ENOENT) + ldout(cct, 10) << "failed to list objects pool_iterate returned r=" << r << dendl; + return r; + } + + return oids->size(); +} + +int RGWSI_SysObj_Core::pool_list_objects_get_marker(RGWSI_SysObj::Pool::ListCtx& _ctx, + string *marker) +{ + if (!_ctx.impl) { + return -EINVAL; + } + + auto& ctx = static_cast<PoolListImplInfo&>(*_ctx.impl); + return ctx.op.get_marker(marker); +} diff --git a/src/rgw/services/svc_sys_obj_core.h b/src/rgw/services/svc_sys_obj_core.h new file mode 100644 index 000000000..52c94051c --- /dev/null +++ b/src/rgw/services/svc_sys_obj_core.h @@ -0,0 +1,158 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#pragma once + +#include "rgw/rgw_service.h" + +#include "svc_rados.h" +#include "svc_sys_obj.h" +#include "svc_sys_obj_core_types.h" + + +class RGWSI_Zone; + +struct rgw_cache_entry_info; + +class RGWSI_SysObj_Core : public RGWServiceInstance +{ + friend class RGWServices_Def; + friend class RGWSI_SysObj; + +protected: + RGWSI_RADOS *rados_svc{nullptr}; + RGWSI_Zone *zone_svc{nullptr}; + + using GetObjState = RGWSI_SysObj_Core_GetObjState; + using PoolListImplInfo = RGWSI_SysObj_Core_PoolListImplInfo; + + void core_init(RGWSI_RADOS *_rados_svc, + RGWSI_Zone *_zone_svc) { + rados_svc = _rados_svc; + zone_svc = _zone_svc; + } + int get_rados_obj(const DoutPrefixProvider *dpp, RGWSI_Zone *zone_svc, const rgw_raw_obj& obj, RGWSI_RADOS::Obj *pobj); + + virtual int raw_stat(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, uint64_t *psize, + real_time *pmtime, uint64_t *epoch, + map<string, bufferlist> *attrs, bufferlist *first_chunk, + RGWObjVersionTracker *objv_tracker, + optional_yield y); + + virtual int read(const DoutPrefixProvider *dpp, + RGWSysObjectCtxBase& obj_ctx, + RGWSI_SysObj_Obj_GetObjState& read_state, + RGWObjVersionTracker *objv_tracker, + const rgw_raw_obj& obj, + bufferlist *bl, off_t ofs, off_t end, + map<string, bufferlist> *attrs, + bool raw_attrs, + rgw_cache_entry_info *cache_info, + boost::optional<obj_version>, + optional_yield y); + + virtual int remove(const DoutPrefixProvider *dpp, + RGWSysObjectCtxBase& obj_ctx, + RGWObjVersionTracker *objv_tracker, + const rgw_raw_obj& obj, + optional_yield y); + + virtual int write(const DoutPrefixProvider *dpp, + const rgw_raw_obj& obj, + real_time *pmtime, + map<std::string, bufferlist>& attrs, + bool exclusive, + const bufferlist& data, + RGWObjVersionTracker *objv_tracker, + real_time set_mtime, + optional_yield y); + + virtual int write_data(const DoutPrefixProvider *dpp, + const rgw_raw_obj& obj, + const bufferlist& bl, + bool exclusive, + RGWObjVersionTracker *objv_tracker, + optional_yield y); + + virtual int get_attr(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, + const char *name, bufferlist *dest, + optional_yield y); + + virtual int set_attrs(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, + map<string, bufferlist>& attrs, + map<string, bufferlist> *rmattrs, + RGWObjVersionTracker *objv_tracker, + optional_yield y); + + virtual int omap_get_all(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, std::map<string, bufferlist> *m, + optional_yield y); + virtual int omap_get_vals(const DoutPrefixProvider *dpp, + const rgw_raw_obj& obj, + const string& marker, + uint64_t count, + std::map<string, bufferlist> *m, + bool *pmore, + optional_yield y); + virtual int omap_set(const DoutPrefixProvider *dpp, + const rgw_raw_obj& obj, const std::string& key, + bufferlist& bl, bool must_exist, + optional_yield y); + virtual int omap_set(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, + const map<std::string, bufferlist>& m, bool must_exist, + optional_yield y); + virtual int omap_del(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj, const std::string& key, + optional_yield y); + + virtual int notify(const DoutPrefixProvider *dpp, + const rgw_raw_obj& obj, bufferlist& bl, + uint64_t timeout_ms, bufferlist *pbl, + optional_yield y); + + virtual int pool_list_prefixed_objs(const DoutPrefixProvider *dpp, + const rgw_pool& pool, + const string& prefix, + std::function<void(const string&)> cb); + + virtual int pool_list_objects_init(const DoutPrefixProvider *dpp, + const rgw_pool& pool, + const std::string& marker, + const std::string& prefix, + RGWSI_SysObj::Pool::ListCtx *ctx); + virtual int pool_list_objects_next(RGWSI_SysObj::Pool::ListCtx& ctx, + int max, + vector<string> *oids, + bool *is_truncated); + + virtual int pool_list_objects_get_marker(RGWSI_SysObj::Pool::ListCtx& _ctx, + string *marker); + + /* wrappers */ + int get_system_obj_state_impl(RGWSysObjectCtxBase *rctx, + const rgw_raw_obj& obj, RGWSysObjState **state, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp); + int get_system_obj_state(RGWSysObjectCtxBase *rctx, const rgw_raw_obj& obj, + RGWSysObjState **state, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp); + + int stat(RGWSysObjectCtxBase& obj_ctx, + RGWSI_SysObj_Obj_GetObjState& state, + const rgw_raw_obj& obj, + map<string, bufferlist> *attrs, + bool raw_attrs, + real_time *lastmod, + uint64_t *obj_size, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp); + +public: + RGWSI_SysObj_Core(CephContext *cct): RGWServiceInstance(cct) {} + + RGWSI_Zone *get_zone_svc() { + return zone_svc; + } +}; diff --git a/src/rgw/services/svc_sys_obj_core_types.h b/src/rgw/services/svc_sys_obj_core_types.h new file mode 100644 index 000000000..f45fe77f5 --- /dev/null +++ b/src/rgw/services/svc_sys_obj_core_types.h @@ -0,0 +1,117 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#pragma once + + +#include "rgw/rgw_service.h" + +#include "svc_rados.h" +#include "svc_sys_obj_types.h" + + + +struct RGWSI_SysObj_Core_GetObjState : public RGWSI_SysObj_Obj_GetObjState { + RGWSI_RADOS::Obj rados_obj; + bool has_rados_obj{false}; + uint64_t last_ver{0}; + + RGWSI_SysObj_Core_GetObjState() {} + + int get_rados_obj(const DoutPrefixProvider *dpp, + RGWSI_RADOS *rados_svc, + RGWSI_Zone *zone_svc, + const rgw_raw_obj& obj, + RGWSI_RADOS::Obj **pobj); +}; + +struct RGWSI_SysObj_Core_PoolListImplInfo : public RGWSI_SysObj_Pool_ListInfo { + RGWSI_RADOS::Pool pool; + RGWSI_RADOS::Pool::List op; + RGWAccessListFilterPrefix filter; + + RGWSI_SysObj_Core_PoolListImplInfo(const string& prefix) : op(pool.op()), filter(prefix) {} +}; + +struct RGWSysObjState { + rgw_raw_obj obj; + bool has_attrs{false}; + bool exists{false}; + uint64_t size{0}; + ceph::real_time mtime; + uint64_t epoch{0}; + bufferlist obj_tag; + bool has_data{false}; + bufferlist data; + bool prefetch_data{false}; + uint64_t pg_ver{0}; + + /* important! don't forget to update copy constructor */ + + RGWObjVersionTracker objv_tracker; + + map<string, bufferlist> attrset; + RGWSysObjState() {} + RGWSysObjState(const RGWSysObjState& rhs) : obj (rhs.obj) { + has_attrs = rhs.has_attrs; + exists = rhs.exists; + size = rhs.size; + mtime = rhs.mtime; + epoch = rhs.epoch; + if (rhs.obj_tag.length()) { + obj_tag = rhs.obj_tag; + } + has_data = rhs.has_data; + if (rhs.data.length()) { + data = rhs.data; + } + prefetch_data = rhs.prefetch_data; + pg_ver = rhs.pg_ver; + objv_tracker = rhs.objv_tracker; + } +}; + + +class RGWSysObjectCtxBase { + std::map<rgw_raw_obj, RGWSysObjState> objs_state; + ceph::shared_mutex lock = ceph::make_shared_mutex("RGWSysObjectCtxBase"); + +public: + RGWSysObjectCtxBase() = default; + + RGWSysObjectCtxBase(const RGWSysObjectCtxBase& rhs) : objs_state(rhs.objs_state) {} + RGWSysObjectCtxBase(RGWSysObjectCtxBase&& rhs) : objs_state(std::move(rhs.objs_state)) {} + + RGWSysObjState *get_state(const rgw_raw_obj& obj) { + RGWSysObjState *result; + std::map<rgw_raw_obj, RGWSysObjState>::iterator iter; + lock.lock_shared(); + assert (!obj.empty()); + iter = objs_state.find(obj); + if (iter != objs_state.end()) { + result = &iter->second; + lock.unlock_shared(); + } else { + lock.unlock_shared(); + lock.lock(); + result = &objs_state[obj]; + lock.unlock(); + } + return result; + } + + void set_prefetch_data(rgw_raw_obj& obj) { + std::unique_lock wl{lock}; + assert (!obj.empty()); + objs_state[obj].prefetch_data = true; + } + void invalidate(const rgw_raw_obj& obj) { + std::unique_lock wl{lock}; + auto iter = objs_state.find(obj); + if (iter == objs_state.end()) { + return; + } + objs_state.erase(iter); + } +}; + diff --git a/src/rgw/services/svc_sys_obj_types.h b/src/rgw/services/svc_sys_obj_types.h new file mode 100644 index 000000000..5a54bef29 --- /dev/null +++ b/src/rgw/services/svc_sys_obj_types.h @@ -0,0 +1,15 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + + +#pragma once + + +#include "rgw/rgw_service.h" + + +struct RGWSI_SysObj_Obj_GetObjState { +}; + +struct RGWSI_SysObj_Pool_ListInfo { +}; diff --git a/src/rgw/services/svc_tier_rados.cc b/src/rgw/services/svc_tier_rados.cc new file mode 100644 index 000000000..e977de7bb --- /dev/null +++ b/src/rgw/services/svc_tier_rados.cc @@ -0,0 +1,32 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "svc_tier_rados.h" + +const std::string MP_META_SUFFIX = ".meta"; + +bool MultipartMetaFilter::filter(const string& name, string& key) { + // the length of the suffix so we can skip past it + static const size_t MP_META_SUFFIX_LEN = MP_META_SUFFIX.length(); + + size_t len = name.size(); + + // make sure there's room for suffix plus at least one more + // character + if (len <= MP_META_SUFFIX_LEN) + return false; + + size_t pos = name.find(MP_META_SUFFIX, len - MP_META_SUFFIX_LEN); + if (pos == string::npos) + return false; + + pos = name.rfind('.', pos - 1); + if (pos == string::npos) + return false; + + key = name.substr(0, pos); + + return true; +} + + diff --git a/src/rgw/services/svc_tier_rados.h b/src/rgw/services/svc_tier_rados.h new file mode 100644 index 000000000..f2424990e --- /dev/null +++ b/src/rgw/services/svc_tier_rados.h @@ -0,0 +1,145 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#pragma once + +#include <iomanip> + +#include "rgw/rgw_service.h" + +#include "svc_rados.h" + +extern const std::string MP_META_SUFFIX; + +class RGWMPObj { + string oid; + string prefix; + string meta; + string upload_id; +public: + RGWMPObj() {} + RGWMPObj(const string& _oid, const string& _upload_id) { + init(_oid, _upload_id, _upload_id); + } + void init(const string& _oid, const string& _upload_id) { + init(_oid, _upload_id, _upload_id); + } + void init(const string& _oid, const string& _upload_id, const string& part_unique_str) { + if (_oid.empty()) { + clear(); + return; + } + oid = _oid; + upload_id = _upload_id; + prefix = oid + "."; + meta = prefix + upload_id + MP_META_SUFFIX; + prefix.append(part_unique_str); + } + const string& get_meta() const { return meta; } + string get_part(int num) const { + char buf[16]; + snprintf(buf, 16, ".%d", num); + string s = prefix; + s.append(buf); + return s; + } + string get_part(const string& part) const { + string s = prefix; + s.append("."); + s.append(part); + return s; + } + const string& get_upload_id() const { + return upload_id; + } + const string& get_key() const { + return oid; + } + bool from_meta(const string& meta) { + int end_pos = meta.rfind('.'); // search for ".meta" + if (end_pos < 0) + return false; + int mid_pos = meta.rfind('.', end_pos - 1); // <key>.<upload_id> + if (mid_pos < 0) + return false; + oid = meta.substr(0, mid_pos); + upload_id = meta.substr(mid_pos + 1, end_pos - mid_pos - 1); + init(oid, upload_id, upload_id); + return true; + } + void clear() { + oid = ""; + prefix = ""; + meta = ""; + upload_id = ""; + } + friend std::ostream& operator<<(std::ostream& out, const RGWMPObj& obj) { + return out << "RGWMPObj:{ prefix=" << std::quoted(obj.prefix) << + ", meta=" << std::quoted(obj.meta) << " }"; + } +}; // class RGWMPObj + +/** + * A filter to a) test whether an object name is a multipart meta + * object, and b) filter out just the key used to determine the bucket + * index shard. + * + * Objects for multipart meta have names adorned with an upload id and + * other elements -- specifically a ".", MULTIPART_UPLOAD_ID_PREFIX, + * unique id, and MP_META_SUFFIX. This filter will return true when + * the name provided is such. It will also extract the key used for + * bucket index shard calculation from the adorned name. + */ +class MultipartMetaFilter : public RGWAccessListFilter { +public: + MultipartMetaFilter() {} + + /** + * @param name [in] The object name as it appears in the bucket index. + * @param key [out] An output parameter that will contain the bucket + * index key if this entry is in the form of a multipart meta object. + * @return true if the name provided is in the form of a multipart meta + * object, false otherwise + */ + bool filter(const string& name, string& key) override; +}; + +class RGWSI_Tier_RADOS : public RGWServiceInstance +{ + RGWSI_Zone *zone_svc{nullptr}; + +public: + RGWSI_Tier_RADOS(CephContext *cct): RGWServiceInstance(cct) {} + + void init(RGWSI_Zone *_zone_svc) { + zone_svc = _zone_svc; + } + + static inline bool raw_obj_to_obj(const rgw_bucket& bucket, const rgw_raw_obj& raw_obj, rgw_obj *obj) { + ssize_t pos = raw_obj.oid.find('_'); + if (pos < 0) { + return false; + } + + if (!rgw_obj_key::parse_raw_oid(raw_obj.oid.substr(pos + 1), &obj->key)) { + return false; + } + obj->bucket = bucket; + + return true; + } +}; + diff --git a/src/rgw/services/svc_user.cc b/src/rgw/services/svc_user.cc new file mode 100644 index 000000000..9a07c207b --- /dev/null +++ b/src/rgw/services/svc_user.cc @@ -0,0 +1,11 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + + +#include "svc_user.h" + +RGWSI_User::RGWSI_User(CephContext *cct): RGWServiceInstance(cct) { +} + +RGWSI_User::~RGWSI_User() { +} diff --git a/src/rgw/services/svc_user.h b/src/rgw/services/svc_user.h new file mode 100644 index 000000000..37e533d6d --- /dev/null +++ b/src/rgw/services/svc_user.h @@ -0,0 +1,132 @@ + +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#pragma once + +#include "svc_meta_be.h" + +#include "rgw/rgw_service.h" + +class RGWUserBuckets; +class RGWGetUserStats_CB; + +class RGWSI_User : public RGWServiceInstance +{ +public: + RGWSI_User(CephContext *cct); + virtual ~RGWSI_User(); + + static string get_meta_key(const rgw_user& user) { + return user.to_str(); + } + + static rgw_user user_from_meta_key(const string& key) { + return rgw_user(key); + } + + virtual RGWSI_MetaBackend_Handler *get_be_handler() = 0; + + /* base svc_user interfaces */ + + virtual int read_user_info(RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, + RGWUserInfo *info, + RGWObjVersionTracker * const objv_tracker, + real_time * const pmtime, + rgw_cache_entry_info * const cache_info, + map<string, bufferlist> * const pattrs, + optional_yield y, + const DoutPrefixProvider *dpp) = 0; + + virtual int store_user_info(RGWSI_MetaBackend::Context *ctx, + const RGWUserInfo& info, + RGWUserInfo *old_info, + RGWObjVersionTracker *objv_tracker, + const real_time& mtime, + bool exclusive, + map<string, bufferlist> *attrs, + optional_yield y, + const DoutPrefixProvider *dpp) = 0; + + virtual int remove_user_info(RGWSI_MetaBackend::Context *ctx, + const RGWUserInfo& info, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) = 0; + + virtual int get_user_info_by_email(RGWSI_MetaBackend::Context *ctx, + const string& email, RGWUserInfo *info, + RGWObjVersionTracker *objv_tracker, + real_time *pmtime, + optional_yield y, + const DoutPrefixProvider *dpp) = 0; + virtual int get_user_info_by_swift(RGWSI_MetaBackend::Context *ctx, + const string& swift_name, + RGWUserInfo *info, /* out */ + RGWObjVersionTracker * const objv_tracker, + real_time * const pmtime, + optional_yield y, + const DoutPrefixProvider *dpp) = 0; + virtual int get_user_info_by_access_key(RGWSI_MetaBackend::Context *ctx, + const std::string& access_key, + RGWUserInfo *info, + RGWObjVersionTracker* objv_tracker, + real_time *pmtime, + optional_yield y, + const DoutPrefixProvider *dpp) = 0; + + virtual int add_bucket(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, + const rgw_bucket& bucket, + ceph::real_time creation_time, + optional_yield y) = 0; + virtual int remove_bucket(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, + const rgw_bucket& _bucket, optional_yield) = 0; + virtual int list_buckets(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, + const string& marker, + const string& end_marker, + uint64_t max, + RGWUserBuckets *buckets, + bool *is_truncated, + optional_yield y) = 0; + + virtual int flush_bucket_stats(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, + const RGWBucketEnt& ent, optional_yield y) = 0; + virtual int complete_flush_stats(const DoutPrefixProvider *dpp, RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, optional_yield y) = 0; + virtual int reset_bucket_stats(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, + optional_yield y) = 0; + virtual int read_stats(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, RGWStorageStats *stats, + ceph::real_time *last_stats_sync, /* last time a full stats sync completed */ + ceph::real_time *last_stats_update, + optional_yield y) = 0; /* last time a stats update was done */ + + virtual int read_stats_async(const DoutPrefixProvider *dpp, RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, RGWGetUserStats_CB *cb) = 0; +}; + diff --git a/src/rgw/services/svc_user_rados.cc b/src/rgw/services/svc_user_rados.cc new file mode 100644 index 000000000..2d82234e4 --- /dev/null +++ b/src/rgw/services/svc_user_rados.cc @@ -0,0 +1,972 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include <boost/algorithm/string.hpp> + +#include "svc_user.h" +#include "svc_user_rados.h" +#include "svc_zone.h" +#include "svc_sys_obj.h" +#include "svc_sys_obj_cache.h" +#include "svc_meta.h" +#include "svc_meta_be_sobj.h" +#include "svc_sync_modules.h" + +#include "rgw/rgw_user.h" +#include "rgw/rgw_bucket.h" +#include "rgw/rgw_tools.h" +#include "rgw/rgw_zone.h" +#include "rgw/rgw_rados.h" + +#include "cls/user/cls_user_client.h" + +#define dout_subsys ceph_subsys_rgw + +#define RGW_BUCKETS_OBJ_SUFFIX ".buckets" + +class RGWSI_User_Module : public RGWSI_MBSObj_Handler_Module { + RGWSI_User_RADOS::Svc& svc; + + const string prefix; +public: + RGWSI_User_Module(RGWSI_User_RADOS::Svc& _svc) : RGWSI_MBSObj_Handler_Module("user"), + svc(_svc) {} + + void get_pool_and_oid(const string& key, rgw_pool *pool, string *oid) override { + if (pool) { + *pool = svc.zone->get_zone_params().user_uid_pool; + } + if (oid) { + *oid = key; + } + } + + const string& get_oid_prefix() override { + return prefix; + } + + bool is_valid_oid(const string& oid) override { + // filter out the user.buckets objects + return !boost::algorithm::ends_with(oid, RGW_BUCKETS_OBJ_SUFFIX); + } + + string key_to_oid(const string& key) override { + return key; + } + + string oid_to_key(const string& oid) override { + return oid; + } +}; + +RGWSI_User_RADOS::RGWSI_User_RADOS(CephContext *cct): RGWSI_User(cct) { +} + +RGWSI_User_RADOS::~RGWSI_User_RADOS() { +} + +void RGWSI_User_RADOS::init(RGWSI_RADOS *_rados_svc, + RGWSI_Zone *_zone_svc, RGWSI_SysObj *_sysobj_svc, + RGWSI_SysObj_Cache *_cache_svc, RGWSI_Meta *_meta_svc, + RGWSI_MetaBackend *_meta_be_svc, + RGWSI_SyncModules *_sync_modules_svc) +{ + svc.user = this; + svc.rados = _rados_svc; + svc.zone = _zone_svc; + svc.sysobj = _sysobj_svc; + svc.cache = _cache_svc; + svc.meta = _meta_svc; + svc.meta_be = _meta_be_svc; + svc.sync_modules = _sync_modules_svc; +} + +int RGWSI_User_RADOS::do_start(optional_yield, const DoutPrefixProvider *dpp) +{ + uinfo_cache.reset(new RGWChainedCacheImpl<user_info_cache_entry>); + uinfo_cache->init(svc.cache); + + int r = svc.meta->create_be_handler(RGWSI_MetaBackend::Type::MDBE_SOBJ, &be_handler); + if (r < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to create be handler: r=" << r << dendl; + return r; + } + + RGWSI_MetaBackend_Handler_SObj *bh = static_cast<RGWSI_MetaBackend_Handler_SObj *>(be_handler); + + auto module = new RGWSI_User_Module(svc); + be_module.reset(module); + bh->set_module(module); + return 0; +} + +rgw_raw_obj RGWSI_User_RADOS::get_buckets_obj(const rgw_user& user) const +{ + string oid = user.to_str() + RGW_BUCKETS_OBJ_SUFFIX; + return rgw_raw_obj(svc.zone->get_zone_params().user_uid_pool, oid); +} + +int RGWSI_User_RADOS::read_user_info(RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, + RGWUserInfo *info, + RGWObjVersionTracker * const objv_tracker, + real_time * const pmtime, + rgw_cache_entry_info * const cache_info, + map<string, bufferlist> * const pattrs, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + if(user.id == RGW_USER_ANON_ID) { + ldpp_dout(dpp, 20) << "RGWSI_User_RADOS::read_user_info(): anonymous user" << dendl; + return -ENOENT; + } + bufferlist bl; + RGWUID user_id; + + RGWSI_MBSObj_GetParams params(&bl, pattrs, pmtime); + params.set_cache_info(cache_info); + + int ret = svc.meta_be->get_entry(ctx, get_meta_key(user), params, objv_tracker, y, dpp); + if (ret < 0) { + return ret; + } + + auto iter = bl.cbegin(); + try { + decode(user_id, iter); + if (user_id.user_id != user) { + ldpp_dout(dpp, -1) << "ERROR: rgw_get_user_info_by_uid(): user id mismatch: " << user_id.user_id << " != " << user << dendl; + return -EIO; + } + if (!iter.end()) { + decode(*info, iter); + } + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: failed to decode user info, caught buffer::error" << dendl; + return -EIO; + } + + return 0; +} + +class PutOperation +{ + RGWSI_User_RADOS::Svc& svc; + RGWSI_MetaBackend_SObj::Context_SObj *ctx; + RGWUID ui; + const RGWUserInfo& info; + RGWUserInfo *old_info; + RGWObjVersionTracker *objv_tracker; + const real_time& mtime; + bool exclusive; + map<string, bufferlist> *pattrs; + RGWObjVersionTracker ot; + string err_msg; + optional_yield y; + + void set_err_msg(string msg) { + if (!err_msg.empty()) { + err_msg = std::move(msg); + } + } + +public: + PutOperation(RGWSI_User_RADOS::Svc& svc, + RGWSI_MetaBackend::Context *_ctx, + const RGWUserInfo& info, + RGWUserInfo *old_info, + RGWObjVersionTracker *objv_tracker, + const real_time& mtime, + bool exclusive, + map<string, bufferlist> *pattrs, + optional_yield y) : + svc(svc), info(info), old_info(old_info), + objv_tracker(objv_tracker), mtime(mtime), + exclusive(exclusive), pattrs(pattrs), y(y) { + ctx = static_cast<RGWSI_MetaBackend_SObj::Context_SObj *>(_ctx); + ui.user_id = info.user_id; + } + + int prepare(const DoutPrefixProvider *dpp) { + if (objv_tracker) { + ot = *objv_tracker; + } + + if (ot.write_version.tag.empty()) { + if (ot.read_version.tag.empty()) { + ot.generate_new_write_ver(svc.meta_be->ctx()); + } else { + ot.write_version = ot.read_version; + ot.write_version.ver++; + } + } + + for (auto iter = info.swift_keys.begin(); iter != info.swift_keys.end(); ++iter) { + if (old_info && old_info->swift_keys.count(iter->first) != 0) + continue; + auto& k = iter->second; + /* check if swift mapping exists */ + RGWUserInfo inf; + int r = svc.user->get_user_info_by_swift(ctx, k.id, &inf, nullptr, nullptr, y, dpp); + if (r >= 0 && inf.user_id != info.user_id && + (!old_info || inf.user_id != old_info->user_id)) { + ldpp_dout(dpp, 0) << "WARNING: can't store user info, swift id (" << k.id + << ") already mapped to another user (" << info.user_id << ")" << dendl; + return -EEXIST; + } + } + + /* check if access keys already exist */ + for (auto iter = info.access_keys.begin(); iter != info.access_keys.end(); ++iter) { + if (old_info && old_info->access_keys.count(iter->first) != 0) + continue; + auto& k = iter->second; + RGWUserInfo inf; + int r = svc.user->get_user_info_by_access_key(ctx, k.id, &inf, nullptr, nullptr, y, dpp); + if (r >= 0 && inf.user_id != info.user_id && + (!old_info || inf.user_id != old_info->user_id)) { + ldpp_dout(dpp, 0) << "WARNING: can't store user info, access key already mapped to another user" << dendl; + return -EEXIST; + } + } + + return 0; + } + + int put(const DoutPrefixProvider *dpp) { + bufferlist data_bl; + encode(ui, data_bl); + encode(info, data_bl); + + RGWSI_MBSObj_PutParams params(data_bl, pattrs, mtime, exclusive); + + int ret = svc.meta_be->put(ctx, RGWSI_User::get_meta_key(info.user_id), params, &ot, y, dpp); + if (ret < 0) + return ret; + + return 0; + } + + int complete(const DoutPrefixProvider *dpp) { + int ret; + + bufferlist link_bl; + encode(ui, link_bl); + + auto& obj_ctx = *ctx->obj_ctx; + + if (!info.user_email.empty()) { + if (!old_info || + old_info->user_email.compare(info.user_email) != 0) { /* only if new index changed */ + ret = rgw_put_system_obj(dpp, obj_ctx, svc.zone->get_zone_params().user_email_pool, info.user_email, + link_bl, exclusive, NULL, real_time(), y); + if (ret < 0) + return ret; + } + } + + const bool renamed = old_info && old_info->user_id != info.user_id; + for (auto iter = info.access_keys.begin(); iter != info.access_keys.end(); ++iter) { + auto& k = iter->second; + if (old_info && old_info->access_keys.count(iter->first) != 0 && !renamed) + continue; + + ret = rgw_put_system_obj(dpp, obj_ctx, svc.zone->get_zone_params().user_keys_pool, k.id, + link_bl, exclusive, NULL, real_time(), y); + if (ret < 0) + return ret; + } + + for (auto siter = info.swift_keys.begin(); siter != info.swift_keys.end(); ++siter) { + auto& k = siter->second; + if (old_info && old_info->swift_keys.count(siter->first) != 0 && !renamed) + continue; + + ret = rgw_put_system_obj(dpp, obj_ctx, svc.zone->get_zone_params().user_swift_pool, k.id, + link_bl, exclusive, NULL, real_time(), y); + if (ret < 0) + return ret; + } + + if (old_info) { + ret = remove_old_indexes(*old_info, info, y, dpp); + if (ret < 0) { + return ret; + } + } + + return 0; + } + + int remove_old_indexes(const RGWUserInfo& old_info, const RGWUserInfo& new_info, optional_yield y, const DoutPrefixProvider *dpp) { + int ret; + + if (!old_info.user_id.empty() && + old_info.user_id != new_info.user_id) { + if (old_info.user_id.tenant != new_info.user_id.tenant) { + ldpp_dout(dpp, 0) << "ERROR: tenant mismatch: " << old_info.user_id.tenant << " != " << new_info.user_id.tenant << dendl; + return -EINVAL; + } + ret = svc.user->remove_uid_index(ctx, old_info, nullptr, y, dpp); + if (ret < 0 && ret != -ENOENT) { + set_err_msg("ERROR: could not remove index for uid " + old_info.user_id.to_str()); + return ret; + } + } + + if (!old_info.user_email.empty() && + old_info.user_email != new_info.user_email) { + ret = svc.user->remove_email_index(dpp, ctx, old_info.user_email, y); + if (ret < 0 && ret != -ENOENT) { + set_err_msg("ERROR: could not remove index for email " + old_info.user_email); + return ret; + } + } + + for ([[maybe_unused]] const auto& [name, access_key] : old_info.access_keys) { + if (!new_info.access_keys.count(access_key.id)) { + ret = svc.user->remove_key_index(dpp, ctx, access_key, y); + if (ret < 0 && ret != -ENOENT) { + set_err_msg("ERROR: could not remove index for key " + access_key.id); + return ret; + } + } + } + + for (auto old_iter = old_info.swift_keys.begin(); old_iter != old_info.swift_keys.end(); ++old_iter) { + const auto& swift_key = old_iter->second; + auto new_iter = new_info.swift_keys.find(swift_key.id); + if (new_iter == new_info.swift_keys.end()) { + ret = svc.user->remove_swift_name_index(dpp, ctx, swift_key.id, y); + if (ret < 0 && ret != -ENOENT) { + set_err_msg("ERROR: could not remove index for swift_name " + swift_key.id); + return ret; + } + } + } + + return 0; + } + + const string& get_err_msg() { + return err_msg; + } +}; + +int RGWSI_User_RADOS::store_user_info(RGWSI_MetaBackend::Context *ctx, + const RGWUserInfo& info, + RGWUserInfo *old_info, + RGWObjVersionTracker *objv_tracker, + const real_time& mtime, + bool exclusive, + map<string, bufferlist> *attrs, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + PutOperation op(svc, ctx, + info, old_info, + objv_tracker, + mtime, exclusive, + attrs, + y); + + int r = op.prepare(dpp); + if (r < 0) { + return r; + } + + r = op.put(dpp); + if (r < 0) { + return r; + } + + r = op.complete(dpp); + if (r < 0) { + return r; + } + + return 0; +} + +int RGWSI_User_RADOS::remove_key_index(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *_ctx, + const RGWAccessKey& access_key, + optional_yield y) +{ + RGWSI_MetaBackend_SObj::Context_SObj *ctx = static_cast<RGWSI_MetaBackend_SObj::Context_SObj *>(_ctx); + rgw_raw_obj obj(svc.zone->get_zone_params().user_keys_pool, access_key.id); + auto sysobj = ctx->obj_ctx->get_obj(obj); + return sysobj.wop().remove(dpp, y); +} + +int RGWSI_User_RADOS::remove_email_index(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *_ctx, + const string& email, + optional_yield y) +{ + if (email.empty()) { + return 0; + } + RGWSI_MetaBackend_SObj::Context_SObj *ctx = static_cast<RGWSI_MetaBackend_SObj::Context_SObj *>(_ctx); + rgw_raw_obj obj(svc.zone->get_zone_params().user_email_pool, email); + auto sysobj = ctx->obj_ctx->get_obj(obj); + return sysobj.wop().remove(dpp, y); +} + +int RGWSI_User_RADOS::remove_swift_name_index(const DoutPrefixProvider *dpp, RGWSI_MetaBackend::Context *_ctx, const string& swift_name, + optional_yield y) +{ + RGWSI_MetaBackend_SObj::Context_SObj *ctx = static_cast<RGWSI_MetaBackend_SObj::Context_SObj *>(_ctx); + rgw_raw_obj obj(svc.zone->get_zone_params().user_swift_pool, swift_name); + auto sysobj = ctx->obj_ctx->get_obj(obj); + return sysobj.wop().remove(dpp, y); +} + +/** + * delete a user's presence from the RGW system. + * First remove their bucket ACLs, then delete them + * from the user and user email pools. This leaves the pools + * themselves alone, as well as any ACLs embedded in object xattrs. + */ +int RGWSI_User_RADOS::remove_user_info(RGWSI_MetaBackend::Context *_ctx, + const RGWUserInfo& info, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) +{ + int ret; + + auto kiter = info.access_keys.begin(); + for (; kiter != info.access_keys.end(); ++kiter) { + ldpp_dout(dpp, 10) << "removing key index: " << kiter->first << dendl; + ret = remove_key_index(dpp, _ctx, kiter->second, y); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << "ERROR: could not remove " << kiter->first << " (access key object), should be fixed (err=" << ret << ")" << dendl; + return ret; + } + } + + auto siter = info.swift_keys.begin(); + for (; siter != info.swift_keys.end(); ++siter) { + auto& k = siter->second; + ldpp_dout(dpp, 10) << "removing swift subuser index: " << k.id << dendl; + /* check if swift mapping exists */ + ret = remove_swift_name_index(dpp, _ctx, k.id, y); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << "ERROR: could not remove " << k.id << " (swift name object), should be fixed (err=" << ret << ")" << dendl; + return ret; + } + } + + ldpp_dout(dpp, 10) << "removing email index: " << info.user_email << dendl; + ret = remove_email_index(dpp, _ctx, info.user_email, y); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << "ERROR: could not remove email index object for " + << info.user_email << ", should be fixed (err=" << ret << ")" << dendl; + return ret; + } + + rgw_raw_obj uid_bucks = get_buckets_obj(info.user_id); + ldpp_dout(dpp, 10) << "removing user buckets index" << dendl; + RGWSI_MetaBackend_SObj::Context_SObj *ctx = static_cast<RGWSI_MetaBackend_SObj::Context_SObj *>(_ctx); + auto sysobj = ctx->obj_ctx->get_obj(uid_bucks); + ret = sysobj.wop().remove(dpp, y); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << "ERROR: could not remove " << info.user_id << ":" << uid_bucks << ", should be fixed (err=" << ret << ")" << dendl; + return ret; + } + + ret = remove_uid_index(ctx, info, objv_tracker, y, dpp); + if (ret < 0 && ret != -ENOENT) { + return ret; + } + + return 0; +} + +int RGWSI_User_RADOS::remove_uid_index(RGWSI_MetaBackend::Context *ctx, const RGWUserInfo& user_info, RGWObjVersionTracker *objv_tracker, + optional_yield y, const DoutPrefixProvider *dpp) +{ + ldpp_dout(dpp, 10) << "removing user index: " << user_info.user_id << dendl; + + RGWSI_MBSObj_RemoveParams params; + int ret = svc.meta_be->remove(ctx, get_meta_key(user_info.user_id), params, objv_tracker, y, dpp); + if (ret < 0 && ret != -ENOENT && ret != -ECANCELED) { + string key; + user_info.user_id.to_str(key); + rgw_raw_obj uid_obj(svc.zone->get_zone_params().user_uid_pool, key); + ldpp_dout(dpp, 0) << "ERROR: could not remove " << user_info.user_id << ":" << uid_obj << ", should be fixed (err=" << ret << ")" << dendl; + return ret; + } + + return 0; +} + +int RGWSI_User_RADOS::get_user_info_from_index(RGWSI_MetaBackend::Context *_ctx, + const string& key, + const rgw_pool& pool, + RGWUserInfo *info, + RGWObjVersionTracker * const objv_tracker, + real_time * const pmtime, optional_yield y, const DoutPrefixProvider *dpp) +{ + RGWSI_MetaBackend_SObj::Context_SObj *ctx = static_cast<RGWSI_MetaBackend_SObj::Context_SObj *>(_ctx); + + string cache_key = pool.to_str() + "/" + key; + + if (auto e = uinfo_cache->find(cache_key)) { + *info = e->info; + if (objv_tracker) + *objv_tracker = e->objv_tracker; + if (pmtime) + *pmtime = e->mtime; + return 0; + } + + user_info_cache_entry e; + bufferlist bl; + RGWUID uid; + + int ret = rgw_get_system_obj(*ctx->obj_ctx, pool, key, bl, nullptr, &e.mtime, y, dpp); + if (ret < 0) + return ret; + + rgw_cache_entry_info cache_info; + + auto iter = bl.cbegin(); + try { + decode(uid, iter); + + int ret = read_user_info(ctx, uid.user_id, + &e.info, &e.objv_tracker, nullptr, &cache_info, nullptr, + y, dpp); + if (ret < 0) { + return ret; + } + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: failed to decode user info, caught buffer::error" << dendl; + return -EIO; + } + + uinfo_cache->put(dpp, svc.cache, cache_key, &e, { &cache_info }); + + *info = e.info; + if (objv_tracker) + *objv_tracker = e.objv_tracker; + if (pmtime) + *pmtime = e.mtime; + + return 0; +} + +/** + * Given an email, finds the user info associated with it. + * returns: 0 on success, -ERR# on failure (including nonexistence) + */ +int RGWSI_User_RADOS::get_user_info_by_email(RGWSI_MetaBackend::Context *ctx, + const string& email, RGWUserInfo *info, + RGWObjVersionTracker *objv_tracker, + real_time *pmtime, optional_yield y, + const DoutPrefixProvider *dpp) +{ + return get_user_info_from_index(ctx, email, svc.zone->get_zone_params().user_email_pool, + info, objv_tracker, pmtime, y, dpp); +} + +/** + * Given an swift username, finds the user_info associated with it. + * returns: 0 on success, -ERR# on failure (including nonexistence) + */ +int RGWSI_User_RADOS::get_user_info_by_swift(RGWSI_MetaBackend::Context *ctx, + const string& swift_name, + RGWUserInfo *info, /* out */ + RGWObjVersionTracker * const objv_tracker, + real_time * const pmtime, optional_yield y, + const DoutPrefixProvider *dpp) +{ + return get_user_info_from_index(ctx, + swift_name, + svc.zone->get_zone_params().user_swift_pool, + info, objv_tracker, pmtime, y, dpp); +} + +/** + * Given an access key, finds the user info associated with it. + * returns: 0 on success, -ERR# on failure (including nonexistence) + */ +int RGWSI_User_RADOS::get_user_info_by_access_key(RGWSI_MetaBackend::Context *ctx, + const std::string& access_key, + RGWUserInfo *info, + RGWObjVersionTracker* objv_tracker, + real_time *pmtime, optional_yield y, + const DoutPrefixProvider *dpp) +{ + return get_user_info_from_index(ctx, + access_key, + svc.zone->get_zone_params().user_keys_pool, + info, objv_tracker, pmtime, y, dpp); +} + +int RGWSI_User_RADOS::cls_user_update_buckets(const DoutPrefixProvider *dpp, rgw_raw_obj& obj, list<cls_user_bucket_entry>& entries, bool add, optional_yield y) +{ + auto rados_obj = svc.rados->obj(obj); + int r = rados_obj.open(dpp); + if (r < 0) { + return r; + } + + librados::ObjectWriteOperation op; + cls_user_set_buckets(op, entries, add); + r = rados_obj.operate(dpp, &op, y); + if (r < 0) { + return r; + } + + return 0; +} + +int RGWSI_User_RADOS::cls_user_add_bucket(const DoutPrefixProvider *dpp, rgw_raw_obj& obj, const cls_user_bucket_entry& entry, optional_yield y) +{ + list<cls_user_bucket_entry> l; + l.push_back(entry); + + return cls_user_update_buckets(dpp, obj, l, true, y); +} + +int RGWSI_User_RADOS::cls_user_remove_bucket(const DoutPrefixProvider *dpp, rgw_raw_obj& obj, const cls_user_bucket& bucket, optional_yield y) +{ + auto rados_obj = svc.rados->obj(obj); + int r = rados_obj.open(dpp); + if (r < 0) { + return r; + } + + librados::ObjectWriteOperation op; + ::cls_user_remove_bucket(op, bucket); + r = rados_obj.operate(dpp, &op, y); + if (r < 0) + return r; + + return 0; +} + +int RGWSI_User_RADOS::add_bucket(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, + const rgw_bucket& bucket, + ceph::real_time creation_time, + optional_yield y) +{ + int ret; + + cls_user_bucket_entry new_bucket; + + bucket.convert(&new_bucket.bucket); + new_bucket.size = 0; + if (real_clock::is_zero(creation_time)) + new_bucket.creation_time = real_clock::now(); + else + new_bucket.creation_time = creation_time; + + rgw_raw_obj obj = get_buckets_obj(user); + ret = cls_user_add_bucket(dpp, obj, new_bucket, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: error adding bucket to user: ret=" << ret << dendl; + return ret; + } + + return 0; +} + + +int RGWSI_User_RADOS::remove_bucket(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, + const rgw_bucket& _bucket, + optional_yield y) +{ + cls_user_bucket bucket; + bucket.name = _bucket.name; + rgw_raw_obj obj = get_buckets_obj(user); + int ret = cls_user_remove_bucket(dpp, obj, bucket, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: error removing bucket from user: ret=" << ret << dendl; + } + + return 0; +} + +int RGWSI_User_RADOS::cls_user_flush_bucket_stats(const DoutPrefixProvider *dpp, + rgw_raw_obj& user_obj, + const RGWBucketEnt& ent, optional_yield y) +{ + cls_user_bucket_entry entry; + ent.convert(&entry); + + list<cls_user_bucket_entry> entries; + entries.push_back(entry); + + int r = cls_user_update_buckets(dpp, user_obj, entries, false, y); + if (r < 0) { + ldpp_dout(dpp, 20) << "cls_user_update_buckets() returned " << r << dendl; + return r; + } + + return 0; +} + +int RGWSI_User_RADOS::cls_user_list_buckets(const DoutPrefixProvider *dpp, + rgw_raw_obj& obj, + const string& in_marker, + const string& end_marker, + const int max_entries, + list<cls_user_bucket_entry>& entries, + string * const out_marker, + bool * const truncated, + optional_yield y) +{ + auto rados_obj = svc.rados->obj(obj); + int r = rados_obj.open(dpp); + if (r < 0) { + return r; + } + + librados::ObjectReadOperation op; + int rc; + + cls_user_bucket_list(op, in_marker, end_marker, max_entries, entries, out_marker, truncated, &rc); + bufferlist ibl; + r = rados_obj.operate(dpp, &op, &ibl, y); + if (r < 0) + return r; + if (rc < 0) + return rc; + + return 0; +} + +int RGWSI_User_RADOS::list_buckets(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, + const string& marker, + const string& end_marker, + uint64_t max, + RGWUserBuckets *buckets, + bool *is_truncated, optional_yield y) +{ + int ret; + + buckets->clear(); + if (user.id == RGW_USER_ANON_ID) { + ldpp_dout(dpp, 20) << "RGWSI_User_RADOS::list_buckets(): anonymous user" << dendl; + *is_truncated = false; + return 0; + } + rgw_raw_obj obj = get_buckets_obj(user); + + bool truncated = false; + string m = marker; + + uint64_t total = 0; + + do { + std::list<cls_user_bucket_entry> entries; + ret = cls_user_list_buckets(dpp, obj, m, end_marker, max - total, entries, &m, &truncated, y); + if (ret == -ENOENT) { + ret = 0; + } + + if (ret < 0) { + return ret; + } + + for (auto& entry : entries) { + buckets->add(RGWBucketEnt(user, std::move(entry))); + total++; + } + + } while (truncated && total < max); + + if (is_truncated) { + *is_truncated = truncated; + } + + return 0; +} + +int RGWSI_User_RADOS::flush_bucket_stats(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, + const RGWBucketEnt& ent, + optional_yield y) +{ + rgw_raw_obj obj = get_buckets_obj(user); + + return cls_user_flush_bucket_stats(dpp, obj, ent, y); +} + +int RGWSI_User_RADOS::reset_bucket_stats(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, + optional_yield y) +{ + return cls_user_reset_stats(dpp, user, y); +} + +int RGWSI_User_RADOS::cls_user_reset_stats(const DoutPrefixProvider *dpp, const rgw_user& user, optional_yield y) +{ + rgw_raw_obj obj = get_buckets_obj(user); + auto rados_obj = svc.rados->obj(obj); + int rval, r = rados_obj.open(dpp); + if (r < 0) { + return r; + } + + cls_user_reset_stats2_op call; + cls_user_reset_stats2_ret ret; + + do { + buffer::list in, out; + librados::ObjectWriteOperation op; + + call.time = real_clock::now(); + ret.update_call(call); + + encode(call, in); + op.exec("user", "reset_user_stats2", in, &out, &rval); + r = rados_obj.operate(dpp, &op, y, librados::OPERATION_RETURNVEC); + if (r < 0) { + return r; + } + try { + auto bliter = out.cbegin(); + decode(ret, bliter); + } catch (ceph::buffer::error& err) { + return -EINVAL; + } + } while (ret.truncated); + + return rval; +} + +int RGWSI_User_RADOS::complete_flush_stats(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, optional_yield y) +{ + rgw_raw_obj obj = get_buckets_obj(user); + auto rados_obj = svc.rados->obj(obj); + int r = rados_obj.open(dpp); + if (r < 0) { + return r; + } + librados::ObjectWriteOperation op; + ::cls_user_complete_stats_sync(op); + return rados_obj.operate(dpp, &op, y); +} + +int RGWSI_User_RADOS::cls_user_get_header(const DoutPrefixProvider *dpp, + const rgw_user& user, cls_user_header *header, + optional_yield y) +{ + rgw_raw_obj obj = get_buckets_obj(user); + auto rados_obj = svc.rados->obj(obj); + int r = rados_obj.open(dpp); + if (r < 0) { + return r; + } + int rc; + bufferlist ibl; + librados::ObjectReadOperation op; + ::cls_user_get_header(op, header, &rc); + return rados_obj.operate(dpp, &op, &ibl, y); +} + +int RGWSI_User_RADOS::cls_user_get_header_async(const DoutPrefixProvider *dpp, const string& user_str, RGWGetUserHeader_CB *cb) +{ + rgw_raw_obj obj = get_buckets_obj(rgw_user(user_str)); + auto rados_obj = svc.rados->obj(obj); + int r = rados_obj.open(dpp); + if (r < 0) { + return r; + } + + auto& ref = rados_obj.get_ref(); + + r = ::cls_user_get_header_async(ref.pool.ioctx(), ref.obj.oid, cb); + if (r < 0) { + return r; + } + + return 0; +} + +int RGWSI_User_RADOS::read_stats(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, RGWStorageStats *stats, + ceph::real_time *last_stats_sync, + ceph::real_time *last_stats_update, + optional_yield y) +{ + string user_str = user.to_str(); + + cls_user_header header; + int r = cls_user_get_header(dpp, rgw_user(user_str), &header, y); + if (r < 0) + return r; + + const cls_user_stats& hs = header.stats; + + stats->size = hs.total_bytes; + stats->size_rounded = hs.total_bytes_rounded; + stats->num_objects = hs.total_entries; + + if (last_stats_sync) { + *last_stats_sync = header.last_stats_sync; + } + + if (last_stats_update) { + *last_stats_update = header.last_stats_update; + } + + return 0; +} + +class RGWGetUserStatsContext : public RGWGetUserHeader_CB { + RGWGetUserStats_CB *cb; + +public: + explicit RGWGetUserStatsContext(RGWGetUserStats_CB * const cb) + : cb(cb) {} + + void handle_response(int r, cls_user_header& header) override { + const cls_user_stats& hs = header.stats; + if (r >= 0) { + RGWStorageStats stats; + + stats.size = hs.total_bytes; + stats.size_rounded = hs.total_bytes_rounded; + stats.num_objects = hs.total_entries; + + cb->set_response(stats); + } + + cb->handle_response(r); + + cb->put(); + } +}; + +int RGWSI_User_RADOS::read_stats_async(const DoutPrefixProvider *dpp, RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, RGWGetUserStats_CB *_cb) +{ + string user_str = user.to_str(); + + RGWGetUserStatsContext *cb = new RGWGetUserStatsContext(_cb); + int r = cls_user_get_header_async(dpp, user_str, cb); + if (r < 0) { + delete cb; + return r; + } + + return 0; +} + diff --git a/src/rgw/services/svc_user_rados.h b/src/rgw/services/svc_user_rados.h new file mode 100644 index 000000000..f0b025e9d --- /dev/null +++ b/src/rgw/services/svc_user_rados.h @@ -0,0 +1,218 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2019 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#pragma once + +#include "rgw/rgw_service.h" + +#include "svc_meta_be.h" +#include "svc_user.h" + +class RGWSI_RADOS; +class RGWSI_Zone; +class RGWSI_SysObj; +class RGWSI_SysObj_Cache; +class RGWSI_Meta; +class RGWSI_SyncModules; +class RGWSI_MetaBackend_Handler; + +struct rgw_cache_entry_info; + +class RGWUserBuckets; + +class RGWGetUserHeader_CB; +class RGWGetUserStats_CB; + +template <class T> +class RGWChainedCacheImpl; + +class RGWSI_User_RADOS : public RGWSI_User +{ + friend class PutOperation; + + std::unique_ptr<RGWSI_MetaBackend::Module> be_module; + RGWSI_MetaBackend_Handler *be_handler; + + struct user_info_cache_entry { + RGWUserInfo info; + RGWObjVersionTracker objv_tracker; + real_time mtime; + }; + + using RGWChainedCacheImpl_user_info_cache_entry = RGWChainedCacheImpl<user_info_cache_entry>; + unique_ptr<RGWChainedCacheImpl_user_info_cache_entry> uinfo_cache; + + rgw_raw_obj get_buckets_obj(const rgw_user& user_id) const; + + int get_user_info_from_index(RGWSI_MetaBackend::Context *ctx, + const string& key, + const rgw_pool& pool, + RGWUserInfo *info, + RGWObjVersionTracker * const objv_tracker, + real_time * const pmtime, + optional_yield y, + const DoutPrefixProvider *dpp); + + int remove_uid_index(RGWSI_MetaBackend::Context *ctx, const RGWUserInfo& user_info, RGWObjVersionTracker *objv_tracker, + optional_yield y, const DoutPrefixProvider *dpp); + + int remove_key_index(const DoutPrefixProvider *dpp, RGWSI_MetaBackend::Context *ctx, const RGWAccessKey& access_key, optional_yield y); + int remove_email_index(const DoutPrefixProvider *dpp, RGWSI_MetaBackend::Context *ctx, const string& email, optional_yield y); + int remove_swift_name_index(const DoutPrefixProvider *dpp, RGWSI_MetaBackend::Context *ctx, const string& swift_name, optional_yield y); + + /* admin management */ + int cls_user_update_buckets(const DoutPrefixProvider *dpp, rgw_raw_obj& obj, list<cls_user_bucket_entry>& entries, bool add, optional_yield y); + int cls_user_add_bucket(const DoutPrefixProvider *dpp, rgw_raw_obj& obj, const cls_user_bucket_entry& entry, optional_yield y); + int cls_user_remove_bucket(const DoutPrefixProvider *dpp, rgw_raw_obj& obj, const cls_user_bucket& bucket, optional_yield y); + + /* quota stats */ + int cls_user_flush_bucket_stats(const DoutPrefixProvider *dpp, rgw_raw_obj& user_obj, + const RGWBucketEnt& ent, optional_yield y); + int cls_user_list_buckets(const DoutPrefixProvider *dpp, + rgw_raw_obj& obj, + const string& in_marker, + const string& end_marker, + const int max_entries, + list<cls_user_bucket_entry>& entries, + string * const out_marker, + bool * const truncated, + optional_yield y); + + int cls_user_reset_stats(const DoutPrefixProvider *dpp, const rgw_user& user, optional_yield y); + int cls_user_get_header(const DoutPrefixProvider *dpp, const rgw_user& user, cls_user_header *header, optional_yield y); + int cls_user_get_header_async(const DoutPrefixProvider *dpp, const string& user, RGWGetUserHeader_CB *cb); + + int do_start(optional_yield, const DoutPrefixProvider *dpp) override; +public: + struct Svc { + RGWSI_User_RADOS *user{nullptr}; + RGWSI_RADOS *rados{nullptr}; + RGWSI_Zone *zone{nullptr}; + RGWSI_SysObj *sysobj{nullptr}; + RGWSI_SysObj_Cache *cache{nullptr}; + RGWSI_Meta *meta{nullptr}; + RGWSI_MetaBackend *meta_be{nullptr}; + RGWSI_SyncModules *sync_modules{nullptr}; + } svc; + + RGWSI_User_RADOS(CephContext *cct); + ~RGWSI_User_RADOS(); + + void init(RGWSI_RADOS *_rados_svc, + RGWSI_Zone *_zone_svc, RGWSI_SysObj *_sysobj_svc, + RGWSI_SysObj_Cache *_cache_svc, RGWSI_Meta *_meta_svc, + RGWSI_MetaBackend *_meta_be_svc, + RGWSI_SyncModules *_sync_modules); + + RGWSI_MetaBackend_Handler *get_be_handler() override { + return be_handler; + } + + int read_user_info(RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, + RGWUserInfo *info, + RGWObjVersionTracker * const objv_tracker, + real_time * const pmtime, + rgw_cache_entry_info * const cache_info, + map<string, bufferlist> * const pattrs, + optional_yield y, + const DoutPrefixProvider *dpp) override; + + int store_user_info(RGWSI_MetaBackend::Context *ctx, + const RGWUserInfo& info, + RGWUserInfo *old_info, + RGWObjVersionTracker *objv_tracker, + const real_time& mtime, + bool exclusive, + map<string, bufferlist> *attrs, + optional_yield y, + const DoutPrefixProvider *dpp) override; + + int remove_user_info(RGWSI_MetaBackend::Context *ctx, + const RGWUserInfo& info, + RGWObjVersionTracker *objv_tracker, + optional_yield y, + const DoutPrefixProvider *dpp) override; + + int get_user_info_by_email(RGWSI_MetaBackend::Context *ctx, + const string& email, RGWUserInfo *info, + RGWObjVersionTracker *objv_tracker, + real_time *pmtime, + optional_yield y, + const DoutPrefixProvider *dpp) override; + int get_user_info_by_swift(RGWSI_MetaBackend::Context *ctx, + const string& swift_name, + RGWUserInfo *info, /* out */ + RGWObjVersionTracker * const objv_tracker, + real_time * const pmtime, + optional_yield y, + const DoutPrefixProvider *dpp) override; + int get_user_info_by_access_key(RGWSI_MetaBackend::Context *ctx, + const std::string& access_key, + RGWUserInfo *info, + RGWObjVersionTracker* objv_tracker, + real_time *pmtime, + optional_yield y, + const DoutPrefixProvider *dpp) override; + + /* user buckets directory */ + + int add_bucket(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, + const rgw_bucket& bucket, + ceph::real_time creation_time, + optional_yield y) override; + int remove_bucket(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, + const rgw_bucket& _bucket, + optional_yield y) override; + int list_buckets(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, + const string& marker, + const string& end_marker, + uint64_t max, + RGWUserBuckets *buckets, + bool *is_truncated, + optional_yield y) override; + + /* quota related */ + int flush_bucket_stats(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, + const RGWBucketEnt& ent, optional_yield y) override; + + int complete_flush_stats(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, optional_yield y) override; + + int reset_bucket_stats(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, + optional_yield y) override; + int read_stats(const DoutPrefixProvider *dpp, + RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, RGWStorageStats *stats, + ceph::real_time *last_stats_sync, /* last time a full stats sync completed */ + ceph::real_time *last_stats_update, + optional_yield y) override; /* last time a stats update was done */ + + int read_stats_async(const DoutPrefixProvider *dpp, RGWSI_MetaBackend::Context *ctx, + const rgw_user& user, RGWGetUserStats_CB *cb) override; +}; + diff --git a/src/rgw/services/svc_zone.cc b/src/rgw/services/svc_zone.cc new file mode 100644 index 000000000..c12db4800 --- /dev/null +++ b/src/rgw/services/svc_zone.cc @@ -0,0 +1,1311 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "svc_zone.h" +#include "svc_rados.h" +#include "svc_sys_obj.h" +#include "svc_sync_modules.h" + +#include "rgw/rgw_zone.h" +#include "rgw/rgw_rest_conn.h" +#include "rgw/rgw_bucket_sync.h" + +#include "common/errno.h" +#include "include/random.h" + +#define dout_subsys ceph_subsys_rgw + +using namespace rgw_zone_defaults; + +RGWSI_Zone::RGWSI_Zone(CephContext *cct) : RGWServiceInstance(cct) +{ +} + +void RGWSI_Zone::init(RGWSI_SysObj *_sysobj_svc, + RGWSI_RADOS * _rados_svc, + RGWSI_SyncModules * _sync_modules_svc, + RGWSI_Bucket_Sync *_bucket_sync_svc) +{ + sysobj_svc = _sysobj_svc; + rados_svc = _rados_svc; + sync_modules_svc = _sync_modules_svc; + bucket_sync_svc = _bucket_sync_svc; + + realm = new RGWRealm(); + zonegroup = new RGWZoneGroup(); + zone_public_config = new RGWZone(); + zone_params = new RGWZoneParams(); + current_period = new RGWPeriod(); +} + +RGWSI_Zone::~RGWSI_Zone() +{ + delete realm; + delete zonegroup; + delete zone_public_config; + delete zone_params; + delete current_period; +} + +std::shared_ptr<RGWBucketSyncPolicyHandler> RGWSI_Zone::get_sync_policy_handler(std::optional<rgw_zone_id> zone) const { + if (!zone || *zone == zone_id()) { + return sync_policy_handler; + } + auto iter = sync_policy_handlers.find(*zone); + if (iter == sync_policy_handlers.end()) { + return std::shared_ptr<RGWBucketSyncPolicyHandler>(); + } + return iter->second; +} + +bool RGWSI_Zone::zone_syncs_from(const RGWZone& target_zone, const RGWZone& source_zone) const +{ + return target_zone.syncs_from(source_zone.name) && + sync_modules_svc->get_manager()->supports_data_export(source_zone.tier_type); +} + +int RGWSI_Zone::do_start(optional_yield y, const DoutPrefixProvider *dpp) +{ + int ret = sysobj_svc->start(y, dpp); + if (ret < 0) { + return ret; + } + + assert(sysobj_svc->is_started()); /* if not then there's ordering issue */ + + ret = rados_svc->start(y, dpp); + if (ret < 0) { + return ret; + } + + ret = realm->init(dpp, cct, sysobj_svc, y); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << "failed reading realm info: ret "<< ret << " " << cpp_strerror(-ret) << dendl; + return ret; + } else if (ret != -ENOENT) { + ldpp_dout(dpp, 20) << "realm " << realm->get_name() << " " << realm->get_id() << dendl; + ret = current_period->init(dpp, cct, sysobj_svc, realm->get_id(), y, + realm->get_name()); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << "failed reading current period info: " << " " << cpp_strerror(-ret) << dendl; + return ret; + } + ldpp_dout(dpp, 20) << "current period " << current_period->get_id() << dendl; + } + + ret = replace_region_with_zonegroup(dpp, y); + if (ret < 0) { + ldpp_dout(dpp, -1) << "failed converting region to zonegroup : ret "<< ret << " " << cpp_strerror(-ret) << dendl; + return ret; + } + + ret = convert_regionmap(dpp, y); + if (ret < 0) { + ldpp_dout(dpp, -1) << "failed converting regionmap: " << cpp_strerror(-ret) << dendl; + return ret; + } + + bool zg_initialized = false; + + if (!current_period->get_id().empty()) { + ret = init_zg_from_period(dpp, &zg_initialized, y); + if (ret < 0) { + return ret; + } + } + + bool creating_defaults = false; + bool using_local = (!zg_initialized); + if (using_local) { + ldpp_dout(dpp, 10) << " cannot find current period zonegroup using local zonegroup" << dendl; + ret = init_zg_from_local(dpp, &creating_defaults, y); + if (ret < 0) { + return ret; + } + // read period_config into current_period + auto& period_config = current_period->get_config(); + ret = period_config.read(dpp, sysobj_svc, zonegroup->realm_id, y); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << "ERROR: failed to read period config: " + << cpp_strerror(ret) << dendl; + return ret; + } + } + + ldpp_dout(dpp, 10) << "Cannot find current period zone using local zone" << dendl; + if (creating_defaults && cct->_conf->rgw_zone.empty()) { + ldpp_dout(dpp, 10) << " Using default name "<< default_zone_name << dendl; + zone_params->set_name(default_zone_name); + } + + ret = zone_params->init(dpp, cct, sysobj_svc, y); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, -1) << "failed reading zone info: ret "<< ret << " " << cpp_strerror(-ret) << dendl; + return ret; + } + + cur_zone_id = rgw_zone_id(zone_params->get_id()); + + auto zone_iter = zonegroup->zones.find(zone_params->get_id()); + if (zone_iter == zonegroup->zones.end()) { + if (using_local) { + ldpp_dout(dpp, -1) << "Cannot find zone id=" << zone_params->get_id() << " (name=" << zone_params->get_name() << ")" << dendl; + return -EINVAL; + } + ldpp_dout(dpp, 1) << "Cannot find zone id=" << zone_params->get_id() << " (name=" << zone_params->get_name() << "), switching to local zonegroup configuration" << dendl; + ret = init_zg_from_local(dpp, &creating_defaults, y); + if (ret < 0) { + return ret; + } + zone_iter = zonegroup->zones.find(zone_params->get_id()); + } + if (zone_iter != zonegroup->zones.end()) { + *zone_public_config = zone_iter->second; + ldpp_dout(dpp, 20) << "zone " << zone_params->get_name() << " found" << dendl; + } else { + ldpp_dout(dpp, -1) << "Cannot find zone id=" << zone_params->get_id() << " (name=" << zone_params->get_name() << ")" << dendl; + return -EINVAL; + } + + zone_short_id = current_period->get_map().get_zone_short_id(zone_params->get_id()); + + for (auto ziter : zonegroup->zones) { + auto zone_handler = std::make_shared<RGWBucketSyncPolicyHandler>(this, sync_modules_svc, bucket_sync_svc, ziter.second.id); + ret = zone_handler->init(dpp, y); + if (ret < 0) { + ldpp_dout(dpp, -1) << "ERROR: could not initialize zone policy handler for zone=" << ziter.second.name << dendl; + return ret; + } + sync_policy_handlers[ziter.second.id] = zone_handler; + } + + sync_policy_handler = sync_policy_handlers[zone_id()]; /* we made sure earlier that zonegroup->zones has our zone */ + + set<rgw_zone_id> source_zones; + set<rgw_zone_id> target_zones; + + sync_policy_handler->reflect(nullptr, nullptr, + nullptr, nullptr, + &source_zones, + &target_zones, + false); /* relaxed: also get all zones that we allow to sync to/from */ + + ret = sync_modules_svc->start(y, dpp); + if (ret < 0) { + return ret; + } + + auto sync_modules = sync_modules_svc->get_manager(); + RGWSyncModuleRef sm; + if (!sync_modules->get_module(zone_public_config->tier_type, &sm)) { + ldpp_dout(dpp, -1) << "ERROR: tier type not found: " << zone_public_config->tier_type << dendl; + return -EINVAL; + } + + writeable_zone = sm->supports_writes(); + exports_data = sm->supports_data_export(); + + /* first build all zones index */ + for (auto ziter : zonegroup->zones) { + const rgw_zone_id& id = ziter.first; + RGWZone& z = ziter.second; + zone_id_by_name[z.name] = id; + zone_by_id[id] = z; + } + + if (zone_by_id.find(zone_id()) == zone_by_id.end()) { + ldpp_dout(dpp, 0) << "WARNING: could not find zone config in zonegroup for local zone (" << zone_id() << "), will use defaults" << dendl; + } + + for (const auto& ziter : zonegroup->zones) { + const rgw_zone_id& id = ziter.first; + const RGWZone& z = ziter.second; + if (id == zone_id()) { + continue; + } + if (z.endpoints.empty()) { + ldpp_dout(dpp, 0) << "WARNING: can't generate connection for zone " << z.id << " id " << z.name << ": no endpoints defined" << dendl; + continue; + } + ldpp_dout(dpp, 20) << "generating connection object for zone " << z.name << " id " << z.id << dendl; + RGWRESTConn *conn = new RGWRESTConn(cct, this, z.id, z.endpoints); + zone_conn_map[id] = conn; + + bool zone_is_source = source_zones.find(z.id) != source_zones.end(); + bool zone_is_target = target_zones.find(z.id) != target_zones.end(); + + if (zone_is_source || zone_is_target) { + if (zone_is_source && sync_modules->supports_data_export(z.tier_type)) { + data_sync_source_zones.push_back(&z); + } + if (zone_is_target) { + zone_data_notify_to_map[id] = conn; + } + } else { + ldpp_dout(dpp, 20) << "NOTICE: not syncing to/from zone " << z.name << " id " << z.id << dendl; + } + } + + ldpp_dout(dpp, 20) << "started zone id=" << zone_params->get_id() << " (name=" << zone_params->get_name() << + ") with tier type = " << zone_public_config->tier_type << dendl; + + return 0; +} + +void RGWSI_Zone::shutdown() +{ + delete rest_master_conn; + + for (auto& item : zone_conn_map) { + auto conn = item.second; + delete conn; + } + + for (auto& item : zonegroup_conn_map) { + auto conn = item.second; + delete conn; + } +} + +int RGWSI_Zone::list_regions(const DoutPrefixProvider *dpp, list<string>& regions) +{ + RGWZoneGroup zonegroup; + RGWSI_SysObj::Pool syspool = sysobj_svc->get_pool(zonegroup.get_pool(cct)); + + return syspool.list_prefixed_objs(dpp, region_info_oid_prefix, ®ions); +} + +int RGWSI_Zone::list_zonegroups(const DoutPrefixProvider *dpp, list<string>& zonegroups) +{ + RGWZoneGroup zonegroup; + RGWSI_SysObj::Pool syspool = sysobj_svc->get_pool(zonegroup.get_pool(cct)); + + return syspool.list_prefixed_objs(dpp, zonegroup_names_oid_prefix, &zonegroups); +} + +int RGWSI_Zone::list_zones(const DoutPrefixProvider *dpp, list<string>& zones) +{ + RGWZoneParams zoneparams; + RGWSI_SysObj::Pool syspool = sysobj_svc->get_pool(zoneparams.get_pool(cct)); + + return syspool.list_prefixed_objs(dpp, zone_names_oid_prefix, &zones); +} + +int RGWSI_Zone::list_realms(const DoutPrefixProvider *dpp, list<string>& realms) +{ + RGWRealm realm(cct, sysobj_svc); + RGWSI_SysObj::Pool syspool = sysobj_svc->get_pool(realm.get_pool(cct)); + + return syspool.list_prefixed_objs(dpp, realm_names_oid_prefix, &realms); +} + +int RGWSI_Zone::list_periods(const DoutPrefixProvider *dpp, list<string>& periods) +{ + RGWPeriod period; + list<string> raw_periods; + RGWSI_SysObj::Pool syspool = sysobj_svc->get_pool(period.get_pool(cct)); + int ret = syspool.list_prefixed_objs(dpp, period.get_info_oid_prefix(), &raw_periods); + if (ret < 0) { + return ret; + } + for (const auto& oid : raw_periods) { + size_t pos = oid.find("."); + if (pos != std::string::npos) { + periods.push_back(oid.substr(0, pos)); + } else { + periods.push_back(oid); + } + } + periods.sort(); // unique() only detects duplicates if they're adjacent + periods.unique(); + return 0; +} + + +int RGWSI_Zone::list_periods(const DoutPrefixProvider *dpp, const string& current_period, list<string>& periods, optional_yield y) +{ + int ret = 0; + string period_id = current_period; + while(!period_id.empty()) { + RGWPeriod period(period_id); + ret = period.init(dpp, cct, sysobj_svc, y); + if (ret < 0) { + return ret; + } + periods.push_back(period.get_id()); + period_id = period.get_predecessor(); + } + + return ret; +} + +/** + * Replace all region configuration with zonegroup for + * backward compatability + * Returns 0 on success, -ERR# on failure. + */ +int RGWSI_Zone::replace_region_with_zonegroup(const DoutPrefixProvider *dpp, optional_yield y) +{ + /* copy default region */ + /* convert default region to default zonegroup */ + string default_oid = cct->_conf->rgw_default_region_info_oid; + if (default_oid.empty()) { + default_oid = default_region_info_oid; + } + + RGWZoneGroup default_zonegroup; + rgw_pool pool{default_zonegroup.get_pool(cct)}; + string oid = "converted"; + bufferlist bl; + + RGWSysObjectCtx obj_ctx = sysobj_svc->init_obj_ctx(); + RGWSysObj sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid)); + + int ret = sysobj.rop().read(dpp, &bl, y); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << __func__ << " failed to read converted: ret "<< ret << " " << cpp_strerror(-ret) + << dendl; + return ret; + } else if (ret != -ENOENT) { + ldpp_dout(dpp, 20) << "System already converted " << dendl; + return 0; + } + + string default_region; + ret = default_zonegroup.init(dpp, cct, sysobj_svc, y, false, true); + if (ret < 0) { + ldpp_dout(dpp, 0) << __func__ << " failed init default region: ret "<< ret << " " << cpp_strerror(-ret) << dendl; + return ret; + } + ret = default_zonegroup.read_default_id(dpp, default_region, y, true); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << __func__ << " failed reading old default region: ret "<< ret << " " << cpp_strerror(-ret) << dendl; + return ret; + } + + /* convert regions to zonegroups */ + list<string> regions; + ret = list_regions(dpp, regions); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << __func__ << " failed to list regions: ret "<< ret << " " << cpp_strerror(-ret) << dendl; + return ret; + } else if (ret == -ENOENT || regions.empty()) { + RGWZoneParams zoneparams(default_zone_name); + int ret = zoneparams.init(dpp, cct, sysobj_svc, y); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << __func__ << ": error initializing default zone params: " << cpp_strerror(-ret) << dendl; + return ret; + } + /* update master zone */ + RGWZoneGroup default_zg(default_zonegroup_name); + ret = default_zg.init(dpp, cct, sysobj_svc, y); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << __func__ << ": error in initializing default zonegroup: " << cpp_strerror(-ret) << dendl; + return ret; + } + if (ret != -ENOENT && default_zg.master_zone.empty()) { + default_zg.master_zone = zoneparams.get_id(); + return default_zg.update(dpp, y); + } + return 0; + } + + string master_region; + rgw_zone_id master_zone; + for (list<string>::iterator iter = regions.begin(); iter != regions.end(); ++iter) { + if (*iter != default_zonegroup_name){ + RGWZoneGroup region(*iter); + int ret = region.init(dpp, cct, sysobj_svc, y, true, true); + if (ret < 0) { + ldpp_dout(dpp, 0) << __func__ << " failed init region "<< *iter << ": " << cpp_strerror(-ret) << dendl; + return ret; + } + if (region.is_master_zonegroup()) { + master_region = region.get_id(); + master_zone = region.master_zone; + } + } + } + + /* create realm if there is none. + The realm name will be the region and zone concatenated + realm id will be mds of its name */ + if (realm->get_id().empty() && !master_region.empty() && !master_zone.empty()) { + string new_realm_name = master_region + "." + master_zone.id; + unsigned char md5[CEPH_CRYPTO_MD5_DIGESTSIZE]; + char md5_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1]; + MD5 hash; + // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes + hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); + hash.Update((const unsigned char *)new_realm_name.c_str(), new_realm_name.length()); + hash.Final(md5); + buf_to_hex(md5, CEPH_CRYPTO_MD5_DIGESTSIZE, md5_str); + string new_realm_id(md5_str); + RGWRealm new_realm(new_realm_id,new_realm_name); + ret = new_realm.init(dpp, cct, sysobj_svc, y, false); + if (ret < 0) { + ldpp_dout(dpp, 0) << __func__ << " Error initing new realm: " << cpp_strerror(-ret) << dendl; + return ret; + } + ret = new_realm.create(dpp, y); + if (ret < 0 && ret != -EEXIST) { + ldpp_dout(dpp, 0) << __func__ << " Error creating new realm: " << cpp_strerror(-ret) << dendl; + return ret; + } + ret = new_realm.set_as_default(dpp, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << __func__ << " Error setting realm as default: " << cpp_strerror(-ret) << dendl; + return ret; + } + ret = realm->init(dpp, cct, sysobj_svc, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << __func__ << " Error initing realm: " << cpp_strerror(-ret) << dendl; + return ret; + } + ret = current_period->init(dpp, cct, sysobj_svc, realm->get_id(), y, + realm->get_name()); + if (ret < 0) { + ldpp_dout(dpp, 0) << __func__ << " Error initing current period: " << cpp_strerror(-ret) << dendl; + return ret; + } + } + + list<string>::iterator iter; + /* create zonegroups */ + for (iter = regions.begin(); iter != regions.end(); ++iter) + { + ldpp_dout(dpp, 0) << __func__ << " Converting " << *iter << dendl; + /* check to see if we don't have already a zonegroup with this name */ + RGWZoneGroup new_zonegroup(*iter); + ret = new_zonegroup.init(dpp, cct , sysobj_svc, y); + if (ret == 0 && new_zonegroup.get_id() != *iter) { + ldpp_dout(dpp, 0) << __func__ << " zonegroup "<< *iter << " already exists id " << new_zonegroup.get_id () << + " skipping conversion " << dendl; + continue; + } + RGWZoneGroup zonegroup(*iter); + zonegroup.set_id(*iter); + int ret = zonegroup.init(dpp, cct, sysobj_svc, y, true, true); + if (ret < 0) { + ldpp_dout(dpp, 0) << __func__ << " failed init zonegroup: ret "<< ret << " " << cpp_strerror(-ret) << dendl; + return ret; + } + zonegroup.realm_id = realm->get_id(); + /* fix default region master zone */ + if (*iter == default_zonegroup_name && zonegroup.master_zone.empty()) { + ldpp_dout(dpp, 0) << __func__ << " Setting default zone as master for default region" << dendl; + zonegroup.master_zone = default_zone_name; + } + ret = zonegroup.update(dpp, y); + if (ret < 0 && ret != -EEXIST) { + ldpp_dout(dpp, 0) << __func__ << " failed to update zonegroup " << *iter << ": ret "<< ret << " " << cpp_strerror(-ret) + << dendl; + return ret; + } + ret = zonegroup.update_name(dpp, y); + if (ret < 0 && ret != -EEXIST) { + ldpp_dout(dpp, 0) << __func__ << " failed to update_name for zonegroup " << *iter << ": ret "<< ret << " " << cpp_strerror(-ret) + << dendl; + return ret; + } + if (zonegroup.get_name() == default_region) { + ret = zonegroup.set_as_default(dpp, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << __func__ << " failed to set_as_default " << *iter << ": ret "<< ret << " " << cpp_strerror(-ret) + << dendl; + return ret; + } + } + for (auto iter = zonegroup.zones.begin(); iter != zonegroup.zones.end(); + ++iter) { + ldpp_dout(dpp, 0) << __func__ << " Converting zone" << iter->first << dendl; + RGWZoneParams zoneparams(iter->first, iter->second.name); + zoneparams.set_id(iter->first.id); + zoneparams.realm_id = realm->get_id(); + ret = zoneparams.init(dpp, cct, sysobj_svc, y); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << __func__ << " failed to init zoneparams " << iter->first << ": " << cpp_strerror(-ret) << dendl; + return ret; + } else if (ret == -ENOENT) { + ldpp_dout(dpp, 0) << __func__ << " zone is part of another cluster " << iter->first << " skipping " << dendl; + continue; + } + zonegroup.realm_id = realm->get_id(); + ret = zoneparams.update(dpp, y); + if (ret < 0 && ret != -EEXIST) { + ldpp_dout(dpp, 0) << __func__ << " failed to update zoneparams " << iter->first << ": " << cpp_strerror(-ret) << dendl; + return ret; + } + ret = zoneparams.update_name(dpp, y); + if (ret < 0 && ret != -EEXIST) { + ldpp_dout(dpp, 0) << __func__ << " failed to init zoneparams " << iter->first << ": " << cpp_strerror(-ret) << dendl; + return ret; + } + } + + if (!current_period->get_id().empty()) { + ret = current_period->add_zonegroup(dpp, zonegroup, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << __func__ << " failed to add zonegroup to current_period: " << cpp_strerror(-ret) << dendl; + return ret; + } + } + } + + if (!current_period->get_id().empty()) { + ret = current_period->update(dpp, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << __func__ << " failed to update new period: " << cpp_strerror(-ret) << dendl; + return ret; + } + ret = current_period->store_info(dpp, false, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << __func__ << " failed to store new period: " << cpp_strerror(-ret) << dendl; + return ret; + } + ret = current_period->reflect(dpp, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << __func__ << " failed to update local objects: " << cpp_strerror(-ret) << dendl; + return ret; + } + } + + for (auto const& iter : regions) { + RGWZoneGroup zonegroup(iter); + int ret = zonegroup.init(dpp, cct, sysobj_svc, y, true, true); + if (ret < 0) { + ldpp_dout(dpp, 0) << __func__ << " failed init zonegroup" << iter << ": ret "<< ret << " " << cpp_strerror(-ret) << dendl; + return ret; + } + ret = zonegroup.delete_obj(dpp, y, true); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << __func__ << " failed to delete region " << iter << ": ret "<< ret << " " << cpp_strerror(-ret) + << dendl; + return ret; + } + } + + /* mark as converted */ + ret = sysobj.wop() + .set_exclusive(true) + .write(dpp, bl, y); + if (ret < 0 ) { + ldpp_dout(dpp, 0) << __func__ << " failed to mark cluster as converted: ret "<< ret << " " << cpp_strerror(-ret) + << dendl; + return ret; + } + + return 0; +} + +/** + * Add new connection to connections map + * @param zonegroup_conn_map map which new connection will be added to + * @param zonegroup zonegroup which new connection will connect to + * @param new_connection pointer to new connection instance + */ +static void add_new_connection_to_map(map<string, RGWRESTConn *> &zonegroup_conn_map, + const RGWZoneGroup &zonegroup, RGWRESTConn *new_connection) +{ + // Delete if connection is already exists + map<string, RGWRESTConn *>::iterator iterZoneGroup = zonegroup_conn_map.find(zonegroup.get_id()); + if (iterZoneGroup != zonegroup_conn_map.end()) { + delete iterZoneGroup->second; + } + + // Add new connection to connections map + zonegroup_conn_map[zonegroup.get_id()] = new_connection; +} + +int RGWSI_Zone::init_zg_from_period(const DoutPrefixProvider *dpp, bool *initialized, optional_yield y) +{ + *initialized = false; + + if (current_period->get_id().empty()) { + return 0; + } + + int ret = zonegroup->init(dpp, cct, sysobj_svc, y); + ldpp_dout(dpp, 20) << "period zonegroup init ret " << ret << dendl; + if (ret == -ENOENT) { + return 0; + } + if (ret < 0) { + ldpp_dout(dpp, 0) << "failed reading zonegroup info: " << cpp_strerror(-ret) << dendl; + return ret; + } + ldpp_dout(dpp, 20) << "period zonegroup name " << zonegroup->get_name() << dendl; + + map<string, RGWZoneGroup>::const_iterator iter = + current_period->get_map().zonegroups.find(zonegroup->get_id()); + + if (iter != current_period->get_map().zonegroups.end()) { + ldpp_dout(dpp, 20) << "using current period zonegroup " << zonegroup->get_name() << dendl; + *zonegroup = iter->second; + ret = zonegroup->init(dpp, cct, sysobj_svc, y, false); + if (ret < 0) { + ldpp_dout(dpp, 0) << "failed init zonegroup: " << " " << cpp_strerror(-ret) << dendl; + return ret; + } + ret = zone_params->init(dpp, cct, sysobj_svc, y); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << "failed reading zone params info: " << " " << cpp_strerror(-ret) << dendl; + return ret; + } if (ret ==-ENOENT && zonegroup->get_name() == default_zonegroup_name) { + ldpp_dout(dpp, 10) << " Using default name "<< default_zone_name << dendl; + zone_params->set_name(default_zone_name); + ret = zone_params->init(dpp, cct, sysobj_svc, y); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << "failed reading zone params info: " << " " << cpp_strerror(-ret) << dendl; + return ret; + } + } + } + for (iter = current_period->get_map().zonegroups.begin(); + iter != current_period->get_map().zonegroups.end(); ++iter){ + const RGWZoneGroup& zg = iter->second; + // use endpoints from the zonegroup's master zone + auto master = zg.zones.find(zg.master_zone); + if (master == zg.zones.end()) { + // Check for empty zonegroup which can happen if zone was deleted before removal + if (zg.zones.size() == 0) + continue; + // fix missing master zone for a single zone zonegroup + if (zg.master_zone.empty() && zg.zones.size() == 1) { + master = zg.zones.begin(); + ldpp_dout(dpp, 0) << "zonegroup " << zg.get_name() << " missing master_zone, setting zone " << + master->second.name << " id:" << master->second.id << " as master" << dendl; + if (zonegroup->get_id() == zg.get_id()) { + zonegroup->master_zone = master->second.id; + ret = zonegroup->update(dpp, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "error updating zonegroup : " << cpp_strerror(-ret) << dendl; + return ret; + } + } else { + RGWZoneGroup fixed_zg(zg.get_id(),zg.get_name()); + ret = fixed_zg.init(dpp, cct, sysobj_svc, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "error initializing zonegroup : " << cpp_strerror(-ret) << dendl; + return ret; + } + fixed_zg.master_zone = master->second.id; + ret = fixed_zg.update(dpp, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "error initializing zonegroup : " << cpp_strerror(-ret) << dendl; + return ret; + } + } + } else { + ldpp_dout(dpp, 0) << "zonegroup " << zg.get_name() << " missing zone for master_zone=" << + zg.master_zone << dendl; + return -EINVAL; + } + } + const auto& endpoints = master->second.endpoints; + add_new_connection_to_map(zonegroup_conn_map, zg, new RGWRESTConn(cct, this, zg.get_id(), endpoints)); + if (!current_period->get_master_zonegroup().empty() && + zg.get_id() == current_period->get_master_zonegroup()) { + rest_master_conn = new RGWRESTConn(cct, this, zg.get_id(), endpoints); + } + } + + *initialized = true; + + return 0; +} + +int RGWSI_Zone::init_zg_from_local(const DoutPrefixProvider *dpp, bool *creating_defaults, optional_yield y) +{ + int ret = zonegroup->init(dpp, cct, sysobj_svc, y); + if ( (ret < 0 && ret != -ENOENT) || (ret == -ENOENT && !cct->_conf->rgw_zonegroup.empty())) { + ldpp_dout(dpp, 0) << "failed reading zonegroup info: ret "<< ret << " " << cpp_strerror(-ret) << dendl; + return ret; + } else if (ret == -ENOENT) { + *creating_defaults = true; + ldpp_dout(dpp, 10) << "Creating default zonegroup " << dendl; + ret = zonegroup->create_default(dpp, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "failure in zonegroup create_default: ret "<< ret << " " << cpp_strerror(-ret) + << dendl; + return ret; + } + ret = zonegroup->init(dpp, cct, sysobj_svc, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "failure in zonegroup create_default: ret "<< ret << " " << cpp_strerror(-ret) + << dendl; + return ret; + } + } + ldpp_dout(dpp, 20) << "zonegroup " << zonegroup->get_name() << dendl; + if (zonegroup->is_master_zonegroup()) { + // use endpoints from the zonegroup's master zone + auto master = zonegroup->zones.find(zonegroup->master_zone); + if (master == zonegroup->zones.end()) { + // fix missing master zone for a single zone zonegroup + if (zonegroup->master_zone.empty() && zonegroup->zones.size() == 1) { + master = zonegroup->zones.begin(); + ldpp_dout(dpp, 0) << "zonegroup " << zonegroup->get_name() << " missing master_zone, setting zone " << + master->second.name << " id:" << master->second.id << " as master" << dendl; + zonegroup->master_zone = master->second.id; + ret = zonegroup->update(dpp, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "error initializing zonegroup : " << cpp_strerror(-ret) << dendl; + return ret; + } + } else { + ldpp_dout(dpp, 0) << "zonegroup " << zonegroup->get_name() << " missing zone for " + "master_zone=" << zonegroup->master_zone << dendl; + return -EINVAL; + } + } + const auto& endpoints = master->second.endpoints; + rest_master_conn = new RGWRESTConn(cct, this, zonegroup->get_id(), endpoints); + } + + return 0; +} + +int RGWSI_Zone::convert_regionmap(const DoutPrefixProvider *dpp, optional_yield y) +{ + RGWZoneGroupMap zonegroupmap; + + string pool_name = cct->_conf->rgw_zone_root_pool; + if (pool_name.empty()) { + pool_name = RGW_DEFAULT_ZONE_ROOT_POOL; + } + string oid = region_map_oid; + + rgw_pool pool(pool_name); + bufferlist bl; + + RGWSysObjectCtx obj_ctx = sysobj_svc->init_obj_ctx(); + RGWSysObj sysobj = sysobj_svc->get_obj(obj_ctx, rgw_raw_obj(pool, oid)); + + int ret = sysobj.rop().read(dpp, &bl, y); + if (ret < 0 && ret != -ENOENT) { + return ret; + } else if (ret == -ENOENT) { + return 0; + } + + try { + auto iter = bl.cbegin(); + decode(zonegroupmap, iter); + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "error decoding regionmap from " << pool << ":" << oid << dendl; + return -EIO; + } + + for (map<string, RGWZoneGroup>::iterator iter = zonegroupmap.zonegroups.begin(); + iter != zonegroupmap.zonegroups.end(); ++iter) { + RGWZoneGroup& zonegroup = iter->second; + ret = zonegroup.init(dpp, cct, sysobj_svc, y, false); + ret = zonegroup.update(dpp, y); + if (ret < 0 && ret != -ENOENT) { + ldpp_dout(dpp, 0) << "Error could not update zonegroup " << zonegroup.get_name() << ": " << + cpp_strerror(-ret) << dendl; + return ret; + } else if (ret == -ENOENT) { + ret = zonegroup.create(dpp, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "Error could not create " << zonegroup.get_name() << ": " << + cpp_strerror(-ret) << dendl; + return ret; + } + } + } + + current_period->set_user_quota(zonegroupmap.user_quota); + current_period->set_bucket_quota(zonegroupmap.bucket_quota); + + // remove the region_map so we don't try to convert again + ret = sysobj.wop().remove(dpp, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "Error could not remove " << sysobj.get_obj() + << " after upgrading to zonegroup map: " << cpp_strerror(ret) << dendl; + return ret; + } + + return 0; +} + +const RGWZoneParams& RGWSI_Zone::get_zone_params() const +{ + return *zone_params; +} + +const RGWZone& RGWSI_Zone::get_zone() const +{ + return *zone_public_config; +} + +const RGWZoneGroup& RGWSI_Zone::get_zonegroup() const +{ + return *zonegroup; +} + +int RGWSI_Zone::get_zonegroup(const string& id, RGWZoneGroup& zg) const +{ + int ret = 0; + if (id == zonegroup->get_id()) { + zg = *zonegroup; + } else if (!current_period->get_id().empty()) { + ret = current_period->get_zonegroup(zg, id); + } + return ret; +} + +const RGWRealm& RGWSI_Zone::get_realm() const +{ + return *realm; +} + +const RGWPeriod& RGWSI_Zone::get_current_period() const +{ + return *current_period; +} + +const string& RGWSI_Zone::get_current_period_id() const +{ + return current_period->get_id(); +} + +bool RGWSI_Zone::has_zonegroup_api(const std::string& api) const +{ + if (!current_period->get_id().empty()) { + const auto& zonegroups_by_api = current_period->get_map().zonegroups_by_api; + if (zonegroups_by_api.find(api) != zonegroups_by_api.end()) + return true; + } else if (zonegroup->api_name == api) { + return true; + } + return false; +} + +bool RGWSI_Zone::zone_is_writeable() +{ + return writeable_zone && !get_zone().is_read_only(); +} + +uint32_t RGWSI_Zone::get_zone_short_id() const +{ + return zone_short_id; +} + +const string& RGWSI_Zone::zone_name() const +{ + return get_zone_params().get_name(); +} + +bool RGWSI_Zone::find_zone(const rgw_zone_id& id, RGWZone **zone) +{ + auto iter = zone_by_id.find(id); + if (iter == zone_by_id.end()) { + return false; + } + *zone = &(iter->second); + return true; +} + +RGWRESTConn *RGWSI_Zone::get_zone_conn(const rgw_zone_id& zone_id) { + auto citer = zone_conn_map.find(zone_id.id); + if (citer == zone_conn_map.end()) { + return NULL; + } + + return citer->second; +} + +RGWRESTConn *RGWSI_Zone::get_zone_conn_by_name(const string& name) { + auto i = zone_id_by_name.find(name); + if (i == zone_id_by_name.end()) { + return NULL; + } + + return get_zone_conn(i->second); +} + +bool RGWSI_Zone::find_zone_id_by_name(const string& name, rgw_zone_id *id) { + auto i = zone_id_by_name.find(name); + if (i == zone_id_by_name.end()) { + return false; + } + *id = i->second; + return true; +} + +bool RGWSI_Zone::need_to_sync() const +{ + return !(zonegroup->master_zone.empty() || + !rest_master_conn || + current_period->get_id().empty()); +} + +bool RGWSI_Zone::need_to_log_data() const +{ + return zone_public_config->log_data; +} + +bool RGWSI_Zone::is_meta_master() const +{ + if (!zonegroup->is_master_zonegroup()) { + return false; + } + + return (zonegroup->master_zone == zone_public_config->id); +} + +bool RGWSI_Zone::need_to_log_metadata() const +{ + return is_meta_master() && + (zonegroup->zones.size() > 1 || current_period->is_multi_zonegroups_with_zones()); +} + +bool RGWSI_Zone::can_reshard() const +{ + return current_period->get_id().empty() || + (zonegroup->zones.size() == 1 && current_period->is_single_zonegroup()); +} + +/** + * Check to see if the bucket metadata could be synced + * bucket: the bucket to check + * Returns false is the bucket is not synced + */ +bool RGWSI_Zone::is_syncing_bucket_meta(const rgw_bucket& bucket) +{ + + /* no current period */ + if (current_period->get_id().empty()) { + return false; + } + + /* zonegroup is not master zonegroup */ + if (!zonegroup->is_master_zonegroup()) { + return false; + } + + /* single zonegroup and a single zone */ + if (current_period->is_single_zonegroup() && zonegroup->zones.size() == 1) { + return false; + } + + /* zone is not master */ + if (zonegroup->master_zone != zone_public_config->id) { + return false; + } + + return true; +} + + +int RGWSI_Zone::select_new_bucket_location(const DoutPrefixProvider *dpp, const RGWUserInfo& user_info, const string& zonegroup_id, + const rgw_placement_rule& request_rule, + rgw_placement_rule *pselected_rule_name, RGWZonePlacementInfo *rule_info, + optional_yield y) +{ + /* first check that zonegroup exists within current period. */ + RGWZoneGroup zonegroup; + int ret = get_zonegroup(zonegroup_id, zonegroup); + if (ret < 0) { + ldpp_dout(dpp, 0) << "could not find zonegroup " << zonegroup_id << " in current period" << dendl; + return ret; + } + + const rgw_placement_rule *used_rule; + + /* find placement rule. Hierarchy: request rule > user default rule > zonegroup default rule */ + std::map<std::string, RGWZoneGroupPlacementTarget>::const_iterator titer; + + if (!request_rule.name.empty()) { + used_rule = &request_rule; + titer = zonegroup.placement_targets.find(request_rule.name); + if (titer == zonegroup.placement_targets.end()) { + ldpp_dout(dpp, 0) << "could not find requested placement id " << request_rule + << " within zonegroup " << dendl; + return -ERR_INVALID_LOCATION_CONSTRAINT; + } + } else if (!user_info.default_placement.name.empty()) { + used_rule = &user_info.default_placement; + titer = zonegroup.placement_targets.find(user_info.default_placement.name); + if (titer == zonegroup.placement_targets.end()) { + ldpp_dout(dpp, 0) << "could not find user default placement id " << user_info.default_placement + << " within zonegroup " << dendl; + return -ERR_INVALID_LOCATION_CONSTRAINT; + } + } else { + if (zonegroup.default_placement.name.empty()) { // zonegroup default rule as fallback, it should not be empty. + ldpp_dout(dpp, 0) << "misconfiguration, zonegroup default placement id should not be empty." << dendl; + return -ERR_ZONEGROUP_DEFAULT_PLACEMENT_MISCONFIGURATION; + } else { + used_rule = &zonegroup.default_placement; + titer = zonegroup.placement_targets.find(zonegroup.default_placement.name); + if (titer == zonegroup.placement_targets.end()) { + ldpp_dout(dpp, 0) << "could not find zonegroup default placement id " << zonegroup.default_placement + << " within zonegroup " << dendl; + return -ERR_INVALID_LOCATION_CONSTRAINT; + } + } + } + + /* now check tag for the rule, whether user is permitted to use rule */ + const auto& target_rule = titer->second; + if (!target_rule.user_permitted(user_info.placement_tags)) { + ldpp_dout(dpp, 0) << "user not permitted to use placement rule " << titer->first << dendl; + return -EPERM; + } + + const string *storage_class = &request_rule.storage_class; + + if (storage_class->empty()) { + storage_class = &used_rule->storage_class; + } + + rgw_placement_rule rule(titer->first, *storage_class); + + if (pselected_rule_name) { + *pselected_rule_name = rule; + } + + return select_bucket_location_by_rule(dpp, rule, rule_info, y); +} + +int RGWSI_Zone::select_bucket_location_by_rule(const DoutPrefixProvider *dpp, const rgw_placement_rule& location_rule, RGWZonePlacementInfo *rule_info, optional_yield y) +{ + if (location_rule.name.empty()) { + /* we can only reach here if we're trying to set a bucket location from a bucket + * created on a different zone, using a legacy / default pool configuration + */ + if (rule_info) { + return select_legacy_bucket_placement(dpp, rule_info, y); + } + + return 0; + } + + /* + * make sure that zone has this rule configured. We're + * checking it for the local zone, because that's where this bucket object is going to + * reside. + */ + auto piter = zone_params->placement_pools.find(location_rule.name); + if (piter == zone_params->placement_pools.end()) { + /* couldn't find, means we cannot really place data for this bucket in this zone */ + ldpp_dout(dpp, 0) << "ERROR: This zone does not contain placement rule " + << location_rule << " present in the zonegroup!" << dendl; + return -EINVAL; + } + + auto storage_class = location_rule.get_storage_class(); + if (!piter->second.storage_class_exists(storage_class)) { + ldpp_dout(dpp, 5) << "requested storage class does not exist: " << storage_class << dendl; + return -EINVAL; + } + + + RGWZonePlacementInfo& placement_info = piter->second; + + if (rule_info) { + *rule_info = placement_info; + } + + return 0; +} + +int RGWSI_Zone::select_bucket_placement(const DoutPrefixProvider *dpp, const RGWUserInfo& user_info, const string& zonegroup_id, + const rgw_placement_rule& placement_rule, + rgw_placement_rule *pselected_rule, RGWZonePlacementInfo *rule_info, + optional_yield y) +{ + if (!zone_params->placement_pools.empty()) { + return select_new_bucket_location(dpp, user_info, zonegroup_id, placement_rule, + pselected_rule, rule_info, y); + } + + if (pselected_rule) { + pselected_rule->clear(); + } + + if (rule_info) { + return select_legacy_bucket_placement(dpp, rule_info, y); + } + + return 0; +} + +int RGWSI_Zone::select_legacy_bucket_placement(const DoutPrefixProvider *dpp, RGWZonePlacementInfo *rule_info, + optional_yield y) +{ + bufferlist map_bl; + map<string, bufferlist> m; + string pool_name; + bool write_map = false; + + rgw_raw_obj obj(zone_params->domain_root, avail_pools); + + auto obj_ctx = sysobj_svc->init_obj_ctx(); + auto sysobj = obj_ctx.get_obj(obj); + + int ret = sysobj.rop().read(dpp, &map_bl, y); + if (ret < 0) { + goto read_omap; + } + + try { + auto iter = map_bl.cbegin(); + decode(m, iter); + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: couldn't decode avail_pools" << dendl; + } + +read_omap: + if (m.empty()) { + ret = sysobj.omap().get_all(dpp, &m, y); + + write_map = true; + } + + if (ret < 0 || m.empty()) { + vector<rgw_pool> pools; + string s = string("default.") + default_storage_pool_suffix; + pools.push_back(rgw_pool(s)); + vector<int> retcodes; + bufferlist bl; + ret = rados_svc->pool().create(pools, &retcodes); + if (ret < 0) + return ret; + ret = sysobj.omap().set(dpp, s, bl, y); + if (ret < 0) + return ret; + m[s] = bl; + } + + if (write_map) { + bufferlist new_bl; + encode(m, new_bl); + ret = sysobj.wop().write(dpp, new_bl, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "WARNING: could not save avail pools map info ret=" << ret << dendl; + } + } + + auto miter = m.begin(); + if (m.size() > 1) { + // choose a pool at random + auto r = ceph::util::generate_random_number<size_t>(0, m.size() - 1); + std::advance(miter, r); + } + pool_name = miter->first; + + rgw_pool pool = pool_name; + + rule_info->storage_classes.set_storage_class(RGW_STORAGE_CLASS_STANDARD, &pool, nullptr); + rule_info->data_extra_pool = pool_name; + rule_info->index_pool = pool_name; + rule_info->index_type = rgw::BucketIndexType::Normal; + + return 0; +} + +int RGWSI_Zone::update_placement_map(const DoutPrefixProvider *dpp, optional_yield y) +{ + bufferlist header; + map<string, bufferlist> m; + rgw_raw_obj obj(zone_params->domain_root, avail_pools); + + auto obj_ctx = sysobj_svc->init_obj_ctx(); + auto sysobj = obj_ctx.get_obj(obj); + + int ret = sysobj.omap().get_all(dpp, &m, y); + if (ret < 0) + return ret; + + bufferlist new_bl; + encode(m, new_bl); + ret = sysobj.wop().write(dpp, new_bl, y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "WARNING: could not save avail pools map info ret=" << ret << dendl; + } + + return ret; +} + +int RGWSI_Zone::add_bucket_placement(const DoutPrefixProvider *dpp, const rgw_pool& new_pool, optional_yield y) +{ + int ret = rados_svc->pool(new_pool).lookup(); + if (ret < 0) { // DNE, or something + return ret; + } + + rgw_raw_obj obj(zone_params->domain_root, avail_pools); + auto obj_ctx = sysobj_svc->init_obj_ctx(); + auto sysobj = obj_ctx.get_obj(obj); + + bufferlist empty_bl; + ret = sysobj.omap().set(dpp, new_pool.to_str(), empty_bl, y); + + // don't care about return value + update_placement_map(dpp, y); + + return ret; +} + +int RGWSI_Zone::remove_bucket_placement(const DoutPrefixProvider *dpp, const rgw_pool& old_pool, optional_yield y) +{ + rgw_raw_obj obj(zone_params->domain_root, avail_pools); + auto obj_ctx = sysobj_svc->init_obj_ctx(); + auto sysobj = obj_ctx.get_obj(obj); + + int ret = sysobj.omap().del(dpp, old_pool.to_str(), y); + + // don't care about return value + update_placement_map(dpp, y); + + return ret; +} + +int RGWSI_Zone::list_placement_set(const DoutPrefixProvider *dpp, set<rgw_pool>& names, optional_yield y) +{ + bufferlist header; + map<string, bufferlist> m; + + rgw_raw_obj obj(zone_params->domain_root, avail_pools); + auto obj_ctx = sysobj_svc->init_obj_ctx(); + auto sysobj = obj_ctx.get_obj(obj); + int ret = sysobj.omap().get_all(dpp, &m, y); + if (ret < 0) + return ret; + + names.clear(); + map<string, bufferlist>::iterator miter; + for (miter = m.begin(); miter != m.end(); ++miter) { + names.insert(rgw_pool(miter->first)); + } + + return names.size(); +} + +bool RGWSI_Zone::get_redirect_zone_endpoint(string *endpoint) +{ + if (zone_public_config->redirect_zone.empty()) { + return false; + } + + auto iter = zone_conn_map.find(zone_public_config->redirect_zone); + if (iter == zone_conn_map.end()) { + ldout(cct, 0) << "ERROR: cannot find entry for redirect zone: " << zone_public_config->redirect_zone << dendl; + return false; + } + + RGWRESTConn *conn = iter->second; + + int ret = conn->get_url(*endpoint); + if (ret < 0) { + ldout(cct, 0) << "ERROR: redirect zone, conn->get_endpoint() returned ret=" << ret << dendl; + return false; + } + + return true; +} + diff --git a/src/rgw/services/svc_zone.h b/src/rgw/services/svc_zone.h new file mode 100644 index 000000000..9f503df5e --- /dev/null +++ b/src/rgw/services/svc_zone.h @@ -0,0 +1,155 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#pragma once + +#include "rgw/rgw_service.h" + + +class RGWSI_RADOS; +class RGWSI_SysObj; +class RGWSI_SyncModules; +class RGWSI_Bucket_Sync; + +class RGWRealm; +class RGWZoneGroup; +class RGWZone; +class RGWZoneParams; +class RGWPeriod; +class RGWZonePlacementInfo; + +class RGWBucketSyncPolicyHandler; + +class RGWRESTConn; + +struct rgw_sync_policy_info; + +class RGWSI_Zone : public RGWServiceInstance +{ + friend struct RGWServices_Def; + + RGWSI_SysObj *sysobj_svc{nullptr}; + RGWSI_RADOS *rados_svc{nullptr}; + RGWSI_SyncModules *sync_modules_svc{nullptr}; + RGWSI_Bucket_Sync *bucket_sync_svc{nullptr}; + + RGWRealm *realm{nullptr}; + RGWZoneGroup *zonegroup{nullptr}; + RGWZone *zone_public_config{nullptr}; /* external zone params, e.g., entrypoints, log flags, etc. */ + RGWZoneParams *zone_params{nullptr}; /* internal zone params, e.g., rados pools */ + RGWPeriod *current_period{nullptr}; + rgw_zone_id cur_zone_id; + uint32_t zone_short_id{0}; + bool writeable_zone{false}; + bool exports_data{false}; + + std::shared_ptr<RGWBucketSyncPolicyHandler> sync_policy_handler; + std::map<rgw_zone_id, std::shared_ptr<RGWBucketSyncPolicyHandler> > sync_policy_handlers; + + RGWRESTConn *rest_master_conn{nullptr}; + map<rgw_zone_id, RGWRESTConn *> zone_conn_map; + std::vector<const RGWZone*> data_sync_source_zones; + map<rgw_zone_id, RGWRESTConn *> zone_data_notify_to_map; + map<string, RGWRESTConn *> zonegroup_conn_map; + + map<string, rgw_zone_id> zone_id_by_name; + map<rgw_zone_id, RGWZone> zone_by_id; + + std::unique_ptr<rgw_sync_policy_info> sync_policy; + + void init(RGWSI_SysObj *_sysobj_svc, + RGWSI_RADOS *_rados_svc, + RGWSI_SyncModules *_sync_modules_svc, + RGWSI_Bucket_Sync *_bucket_sync_svc); + int do_start(optional_yield y, const DoutPrefixProvider *dpp) override; + void shutdown() override; + + int replace_region_with_zonegroup(const DoutPrefixProvider *dpp, optional_yield y); + int init_zg_from_period(const DoutPrefixProvider *dpp, bool *initialized, optional_yield y); + int init_zg_from_local(const DoutPrefixProvider *dpp, bool *creating_defaults, optional_yield y); + int convert_regionmap(const DoutPrefixProvider *dpp, optional_yield y); + + int update_placement_map(const DoutPrefixProvider *dpp, optional_yield y); +public: + RGWSI_Zone(CephContext *cct); + ~RGWSI_Zone(); + + const RGWZoneParams& get_zone_params() const; + const RGWPeriod& get_current_period() const; + const RGWRealm& get_realm() const; + const RGWZoneGroup& get_zonegroup() const; + int get_zonegroup(const string& id, RGWZoneGroup& zonegroup) const; + const RGWZone& get_zone() const; + + std::shared_ptr<RGWBucketSyncPolicyHandler> get_sync_policy_handler(std::optional<rgw_zone_id> zone = nullopt) const; + + const string& zone_name() const; + const rgw_zone_id& zone_id() const { + return cur_zone_id; + } + uint32_t get_zone_short_id() const; + + const string& get_current_period_id() const; + bool has_zonegroup_api(const std::string& api) const; + + bool zone_is_writeable(); + bool zone_syncs_from(const RGWZone& target_zone, const RGWZone& source_zone) const; + bool get_redirect_zone_endpoint(string *endpoint); + bool sync_module_supports_writes() const { return writeable_zone; } + bool sync_module_exports_data() const { return exports_data; } + + RGWRESTConn *get_master_conn() { + return rest_master_conn; + } + + map<string, RGWRESTConn *>& get_zonegroup_conn_map() { + return zonegroup_conn_map; + } + + map<rgw_zone_id, RGWRESTConn *>& get_zone_conn_map() { + return zone_conn_map; + } + + std::vector<const RGWZone*>& get_data_sync_source_zones() { + return data_sync_source_zones; + } + + map<rgw_zone_id, RGWRESTConn *>& get_zone_data_notify_to_map() { + return zone_data_notify_to_map; + } + + bool find_zone(const rgw_zone_id& id, RGWZone **zone); + + RGWRESTConn *get_zone_conn(const rgw_zone_id& zone_id); + RGWRESTConn *get_zone_conn_by_name(const string& name); + bool find_zone_id_by_name(const string& name, rgw_zone_id *id); + + int select_bucket_placement(const DoutPrefixProvider *dpp, const RGWUserInfo& user_info, const string& zonegroup_id, + const rgw_placement_rule& rule, + rgw_placement_rule *pselected_rule, RGWZonePlacementInfo *rule_info, optional_yield y); + int select_legacy_bucket_placement(const DoutPrefixProvider *dpp, RGWZonePlacementInfo *rule_info, optional_yield y); + int select_new_bucket_location(const DoutPrefixProvider *dpp, const RGWUserInfo& user_info, const string& zonegroup_id, + const rgw_placement_rule& rule, + rgw_placement_rule *pselected_rule_name, RGWZonePlacementInfo *rule_info, + optional_yield y); + int select_bucket_location_by_rule(const DoutPrefixProvider *dpp, const rgw_placement_rule& location_rule, RGWZonePlacementInfo *rule_info, optional_yield y); + + int add_bucket_placement(const DoutPrefixProvider *dpp, const rgw_pool& new_pool, optional_yield y); + int remove_bucket_placement(const DoutPrefixProvider *dpp, const rgw_pool& old_pool, optional_yield y); + int list_placement_set(const DoutPrefixProvider *dpp, set<rgw_pool>& names, optional_yield y); + + bool is_meta_master() const; + + bool need_to_sync() const; + bool need_to_log_data() const; + bool need_to_log_metadata() const; + bool can_reshard() const; + bool is_syncing_bucket_meta(const rgw_bucket& bucket); + + int list_zonegroups(const DoutPrefixProvider *dpp, list<string>& zonegroups); + int list_regions(const DoutPrefixProvider *dpp, list<string>& regions); + int list_zones(const DoutPrefixProvider *dpp, list<string>& zones); + int list_realms(const DoutPrefixProvider *dpp, list<string>& realms); + int list_periods(const DoutPrefixProvider *dpp, list<string>& periods); + int list_periods(const DoutPrefixProvider *dpp, const string& current_period, list<string>& periods, optional_yield y); +}; diff --git a/src/rgw/services/svc_zone_utils.cc b/src/rgw/services/svc_zone_utils.cc new file mode 100644 index 000000000..b292d7b99 --- /dev/null +++ b/src/rgw/services/svc_zone_utils.cc @@ -0,0 +1,62 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "svc_zone_utils.h" +#include "svc_rados.h" +#include "svc_zone.h" + +#include "rgw/rgw_zone.h" + +int RGWSI_ZoneUtils::do_start(optional_yield, const DoutPrefixProvider *dpp) +{ + init_unique_trans_id_deps(); + + return 0; +} + +string RGWSI_ZoneUtils::gen_host_id() { + /* uint64_t needs 16, two '-' separators and a trailing null */ + const string& zone_name = zone_svc->get_zone().name; + const string& zonegroup_name = zone_svc->get_zonegroup().get_name(); + char charbuf[16 + zone_name.size() + zonegroup_name.size() + 2 + 1]; + snprintf(charbuf, sizeof(charbuf), "%llx-%s-%s", (unsigned long long)rados_svc->instance_id(), zone_name.c_str(), zonegroup_name.c_str()); + return string(charbuf); +} + +string RGWSI_ZoneUtils::unique_id(uint64_t unique_num) +{ + char buf[32]; + snprintf(buf, sizeof(buf), ".%llu.%llu", (unsigned long long)rados_svc->instance_id(), (unsigned long long)unique_num); + string s = zone_svc->get_zone_params().get_id() + buf; + return s; +} + +void RGWSI_ZoneUtils::init_unique_trans_id_deps() { + char buf[16 + 2 + 1]; /* uint64_t needs 16, 2 hyphens add further 2 */ + + snprintf(buf, sizeof(buf), "-%llx-", (unsigned long long)rados_svc->instance_id()); + url_encode(string(buf) + zone_svc->get_zone().name, trans_id_suffix); +} + +/* In order to preserve compatibility with Swift API, transaction ID + * should contain at least 32 characters satisfying following spec: + * - first 21 chars must be in range [0-9a-f]. Swift uses this + * space for storing fragment of UUID obtained through a call to + * uuid4() function of Python's uuid module; + * - char no. 22 must be a hyphen; + * - at least 10 next characters constitute hex-formatted timestamp + * padded with zeroes if necessary. All bytes must be in [0-9a-f] + * range; + * - last, optional part of transaction ID is any url-encoded string + * without restriction on length. */ +string RGWSI_ZoneUtils::unique_trans_id(const uint64_t unique_num) { + char buf[41]; /* 2 + 21 + 1 + 16 (timestamp can consume up to 16) + 1 */ + time_t timestamp = time(NULL); + + snprintf(buf, sizeof(buf), "tx%021llx-%010llx", + (unsigned long long)unique_num, + (unsigned long long)timestamp); + + return string(buf) + trans_id_suffix; +} + diff --git a/src/rgw/services/svc_zone_utils.h b/src/rgw/services/svc_zone_utils.h new file mode 100644 index 000000000..24abe528e --- /dev/null +++ b/src/rgw/services/svc_zone_utils.h @@ -0,0 +1,38 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#pragma once + +#include "rgw/rgw_service.h" + + +class RGWSI_RADOS; +class RGWSI_Zone; + +class RGWSI_ZoneUtils : public RGWServiceInstance +{ + friend struct RGWServices_Def; + + RGWSI_RADOS *rados_svc{nullptr}; + RGWSI_Zone *zone_svc{nullptr}; + + string trans_id_suffix; + + void init(RGWSI_RADOS *_rados_svc, + RGWSI_Zone *_zone_svc) { + rados_svc = _rados_svc; + zone_svc = _zone_svc; + } + + int do_start(optional_yield, const DoutPrefixProvider *dpp) override; + + void init_unique_trans_id_deps(); + +public: + RGWSI_ZoneUtils(CephContext *cct): RGWServiceInstance(cct) {} + + string gen_host_id(); + string unique_id(uint64_t unique_num); + + string unique_trans_id(const uint64_t unique_num); +}; |