diff options
Diffstat (limited to 'src/rgw/rgw_metadata.cc')
-rw-r--r-- | src/rgw/rgw_metadata.cc | 1178 |
1 files changed, 1178 insertions, 0 deletions
diff --git a/src/rgw/rgw_metadata.cc b/src/rgw/rgw_metadata.cc new file mode 100644 index 00000000..9741cba9 --- /dev/null +++ b/src/rgw/rgw_metadata.cc @@ -0,0 +1,1178 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <boost/intrusive_ptr.hpp> +#include "common/ceph_json.h" +#include "common/errno.h" +#include "rgw_metadata.h" +#include "rgw_coroutine.h" +#include "cls/version/cls_version_types.h" + +#include "rgw_rados.h" +#include "rgw_zone.h" +#include "rgw_tools.h" + +#include "rgw_cr_rados.h" + +#include "services/svc_zone.h" + +#include "include/ceph_assert.h" + +#include <boost/asio/yield.hpp> + +#define dout_subsys ceph_subsys_rgw + +void LogStatusDump::dump(Formatter *f) const { + string s; + switch (status) { + case MDLOG_STATUS_WRITE: + s = "write"; + break; + case MDLOG_STATUS_SETATTRS: + s = "set_attrs"; + break; + case MDLOG_STATUS_REMOVE: + s = "remove"; + break; + case MDLOG_STATUS_COMPLETE: + s = "complete"; + break; + case MDLOG_STATUS_ABORT: + s = "abort"; + break; + default: + s = "unknown"; + break; + } + encode_json("status", s, f); +} + +void RGWMetadataLogData::encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + encode(read_version, bl); + encode(write_version, bl); + uint32_t s = (uint32_t)status; + encode(s, bl); + ENCODE_FINISH(bl); +} + +void RGWMetadataLogData::decode(bufferlist::const_iterator& bl) { + DECODE_START(1, bl); + decode(read_version, bl); + decode(write_version, bl); + uint32_t s; + decode(s, bl); + status = (RGWMDLogStatus)s; + DECODE_FINISH(bl); +} + +void RGWMetadataLogData::dump(Formatter *f) const { + encode_json("read_version", read_version, f); + encode_json("write_version", write_version, f); + encode_json("status", LogStatusDump(status), f); +} + +void decode_json_obj(RGWMDLogStatus& status, JSONObj *obj) { + string s; + JSONDecoder::decode_json("status", s, obj); + if (s == "complete") { + status = MDLOG_STATUS_COMPLETE; + } else if (s == "write") { + status = MDLOG_STATUS_WRITE; + } else if (s == "remove") { + status = MDLOG_STATUS_REMOVE; + } else if (s == "set_attrs") { + status = MDLOG_STATUS_SETATTRS; + } else if (s == "abort") { + status = MDLOG_STATUS_ABORT; + } else { + status = MDLOG_STATUS_UNKNOWN; + } +} + +void RGWMetadataLogData::decode_json(JSONObj *obj) { + JSONDecoder::decode_json("read_version", read_version, obj); + JSONDecoder::decode_json("write_version", write_version, obj); + JSONDecoder::decode_json("status", status, obj); +} + + +int RGWMetadataLog::add_entry(RGWMetadataHandler *handler, const string& section, const string& key, bufferlist& bl) { + if (!store->svc.zone->need_to_log_metadata()) + return 0; + + string oid; + + string hash_key; + handler->get_hash_key(section, key, hash_key); + + int shard_id; + store->shard_name(prefix, cct->_conf->rgw_md_log_max_shards, hash_key, oid, &shard_id); + mark_modified(shard_id); + real_time now = real_clock::now(); + return store->time_log_add(oid, now, section, key, bl); +} + +int RGWMetadataLog::store_entries_in_shard(list<cls_log_entry>& entries, int shard_id, librados::AioCompletion *completion) +{ + string oid; + + mark_modified(shard_id); + store->shard_name(prefix, shard_id, oid); + return store->time_log_add(oid, entries, completion, false); +} + +void RGWMetadataLog::init_list_entries(int shard_id, const real_time& from_time, const real_time& end_time, + string& marker, void **handle) +{ + LogListCtx *ctx = new LogListCtx(); + + ctx->cur_shard = shard_id; + ctx->from_time = from_time; + ctx->end_time = end_time; + ctx->marker = marker; + + get_shard_oid(ctx->cur_shard, ctx->cur_oid); + + *handle = (void *)ctx; +} + +void RGWMetadataLog::complete_list_entries(void *handle) { + LogListCtx *ctx = static_cast<LogListCtx *>(handle); + delete ctx; +} + +int RGWMetadataLog::list_entries(void *handle, + int max_entries, + list<cls_log_entry>& entries, + string *last_marker, + bool *truncated) { + LogListCtx *ctx = static_cast<LogListCtx *>(handle); + + if (!max_entries) { + *truncated = false; + return 0; + } + + std::string next_marker; + int ret = store->time_log_list(ctx->cur_oid, ctx->from_time, ctx->end_time, + max_entries, entries, ctx->marker, + &next_marker, truncated); + if ((ret < 0) && (ret != -ENOENT)) + return ret; + + ctx->marker = std::move(next_marker); + if (last_marker) { + *last_marker = ctx->marker; + } + + if (ret == -ENOENT) + *truncated = false; + + return 0; +} + +int RGWMetadataLog::get_info(int shard_id, RGWMetadataLogInfo *info) +{ + string oid; + get_shard_oid(shard_id, oid); + + cls_log_header header; + + int ret = store->time_log_info(oid, &header); + if ((ret < 0) && (ret != -ENOENT)) + return ret; + + info->marker = header.max_marker; + info->last_update = header.max_time.to_real_time(); + + return 0; +} + +static void _mdlog_info_completion(librados::completion_t cb, void *arg) +{ + auto infoc = static_cast<RGWMetadataLogInfoCompletion *>(arg); + infoc->finish(cb); + infoc->put(); // drop the ref from get_info_async() +} + +RGWMetadataLogInfoCompletion::RGWMetadataLogInfoCompletion(info_callback_t cb) + : completion(librados::Rados::aio_create_completion((void *)this, nullptr, + _mdlog_info_completion)), + callback(cb) +{ +} + +RGWMetadataLogInfoCompletion::~RGWMetadataLogInfoCompletion() +{ + completion->release(); +} + +int RGWMetadataLog::get_info_async(int shard_id, RGWMetadataLogInfoCompletion *completion) +{ + string oid; + get_shard_oid(shard_id, oid); + + completion->get(); // hold a ref until the completion fires + + return store->time_log_info_async(completion->get_io_ctx(), oid, + &completion->get_header(), + completion->get_completion()); +} + +int RGWMetadataLog::trim(int shard_id, const real_time& from_time, const real_time& end_time, + const string& start_marker, const string& end_marker) +{ + string oid; + get_shard_oid(shard_id, oid); + + return store->time_log_trim(oid, from_time, end_time, start_marker, + end_marker, nullptr); +} + +int RGWMetadataLog::lock_exclusive(int shard_id, timespan duration, string& zone_id, string& owner_id) { + string oid; + get_shard_oid(shard_id, oid); + + return store->lock_exclusive(store->svc.zone->get_zone_params().log_pool, oid, duration, zone_id, owner_id); +} + +int RGWMetadataLog::unlock(int shard_id, string& zone_id, string& owner_id) { + string oid; + get_shard_oid(shard_id, oid); + + return store->unlock(store->svc.zone->get_zone_params().log_pool, oid, zone_id, owner_id); +} + +void RGWMetadataLog::mark_modified(int shard_id) +{ + lock.get_read(); + if (modified_shards.find(shard_id) != modified_shards.end()) { + lock.unlock(); + return; + } + lock.unlock(); + + RWLock::WLocker wl(lock); + modified_shards.insert(shard_id); +} + +void RGWMetadataLog::read_clear_modified(set<int> &modified) +{ + RWLock::WLocker wl(lock); + modified.swap(modified_shards); + modified_shards.clear(); +} + +obj_version& RGWMetadataObject::get_version() +{ + return objv; +} + +class RGWMetadataTopHandler : public RGWMetadataHandler { + struct iter_data { + set<string> sections; + set<string>::iterator iter; + }; + +public: + RGWMetadataTopHandler() {} + + string get_type() override { return string(); } + + int get(RGWRados *store, string& entry, RGWMetadataObject **obj) override { return -ENOTSUP; } + int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, + real_time mtime, JSONObj *obj, sync_type_t sync_type) override { return -ENOTSUP; } + + virtual void get_pool_and_oid(RGWRados *store, const string& key, rgw_pool& pool, string& oid) override {} + + int remove(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker) override { return -ENOTSUP; } + + int list_keys_init(RGWRados *store, const string& marker, void **phandle) override { + iter_data *data = new iter_data; + list<string> sections; + store->meta_mgr->get_sections(sections); + for (auto& s : sections) { + data->sections.insert(s); + } + data->iter = data->sections.lower_bound(marker); + + *phandle = data; + + return 0; + } + int list_keys_next(void *handle, int max, list<string>& keys, bool *truncated) override { + iter_data *data = static_cast<iter_data *>(handle); + for (int i = 0; i < max && data->iter != data->sections.end(); ++i, ++(data->iter)) { + keys.push_back(*data->iter); + } + + *truncated = (data->iter != data->sections.end()); + + return 0; + } + void list_keys_complete(void *handle) override { + iter_data *data = static_cast<iter_data *>(handle); + + delete data; + } + + virtual string get_marker(void *handle) override { + iter_data *data = static_cast<iter_data *>(handle); + + if (data->iter != data->sections.end()) { + return *(data->iter); + } + + return string(); + } +}; + +static RGWMetadataTopHandler md_top_handler; + + +RGWMetadataManager::RGWMetadataManager(CephContext *_cct, RGWRados *_store) + : cct(_cct), store(_store) +{ +} + +RGWMetadataManager::~RGWMetadataManager() +{ + map<string, RGWMetadataHandler *>::iterator iter; + + for (iter = handlers.begin(); iter != handlers.end(); ++iter) { + delete iter->second; + } + + handlers.clear(); +} + +const std::string RGWMetadataLogHistory::oid = "meta.history"; + +namespace { + +int read_history(RGWRados *store, RGWMetadataLogHistory *state, + RGWObjVersionTracker *objv_tracker) +{ + auto obj_ctx = store->svc.sysobj->init_obj_ctx(); + auto& pool = store->svc.zone->get_zone_params().log_pool; + const auto& oid = RGWMetadataLogHistory::oid; + bufferlist bl; + int ret = rgw_get_system_obj(store, obj_ctx, pool, oid, bl, objv_tracker, nullptr); + if (ret < 0) { + return ret; + } + if (bl.length() == 0) { + /* bad history object, remove it */ + rgw_raw_obj obj(pool, oid); + auto sysobj = obj_ctx.get_obj(obj); + ret = sysobj.wop().remove(); + if (ret < 0) { + ldout(store->ctx(), 0) << "ERROR: meta history is empty, but cannot remove it (" << cpp_strerror(-ret) << ")" << dendl; + return ret; + } + return -ENOENT; + } + try { + auto p = bl.cbegin(); + state->decode(p); + } catch (buffer::error& e) { + ldout(store->ctx(), 1) << "failed to decode the mdlog history: " + << e.what() << dendl; + return -EIO; + } + return 0; +} + +int write_history(RGWRados *store, const RGWMetadataLogHistory& state, + RGWObjVersionTracker *objv_tracker, bool exclusive = false) +{ + bufferlist bl; + state.encode(bl); + + auto& pool = store->svc.zone->get_zone_params().log_pool; + const auto& oid = RGWMetadataLogHistory::oid; + return rgw_put_system_obj(store, pool, oid, bl, + exclusive, objv_tracker, real_time{}); +} + +using Cursor = RGWPeriodHistory::Cursor; + +/// read the mdlog history and use it to initialize the given cursor +class ReadHistoryCR : public RGWCoroutine { + RGWRados *store; + Cursor *cursor; + RGWObjVersionTracker *objv_tracker; + RGWMetadataLogHistory state; + public: + ReadHistoryCR(RGWRados *store, Cursor *cursor, + RGWObjVersionTracker *objv_tracker) + : RGWCoroutine(store->ctx()), store(store), cursor(cursor), + objv_tracker(objv_tracker) + {} + + int operate() { + reenter(this) { + yield { + rgw_raw_obj obj{store->svc.zone->get_zone_params().log_pool, + RGWMetadataLogHistory::oid}; + constexpr bool empty_on_enoent = false; + + using ReadCR = RGWSimpleRadosReadCR<RGWMetadataLogHistory>; + call(new ReadCR(store->get_async_rados(), store->svc.sysobj, obj, + &state, empty_on_enoent, objv_tracker)); + } + if (retcode < 0) { + ldout(cct, 1) << "failed to read mdlog history: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + *cursor = store->period_history->lookup(state.oldest_realm_epoch); + if (!*cursor) { + return set_cr_error(cursor->get_error()); + } + + ldout(cct, 10) << "read mdlog history with oldest period id=" + << state.oldest_period_id << " realm_epoch=" + << state.oldest_realm_epoch << dendl; + return set_cr_done(); + } + return 0; + } +}; + +/// write the given cursor to the mdlog history +class WriteHistoryCR : public RGWCoroutine { + RGWRados *store; + Cursor cursor; + RGWObjVersionTracker *objv; + RGWMetadataLogHistory state; + public: + WriteHistoryCR(RGWRados *store, const Cursor& cursor, + RGWObjVersionTracker *objv) + : RGWCoroutine(store->ctx()), store(store), cursor(cursor), objv(objv) + {} + + int operate() { + reenter(this) { + state.oldest_period_id = cursor.get_period().get_id(); + state.oldest_realm_epoch = cursor.get_epoch(); + + yield { + rgw_raw_obj obj{store->svc.zone->get_zone_params().log_pool, + RGWMetadataLogHistory::oid}; + + using WriteCR = RGWSimpleRadosWriteCR<RGWMetadataLogHistory>; + call(new WriteCR(store->get_async_rados(), store->svc.sysobj, obj, state, objv)); + } + if (retcode < 0) { + ldout(cct, 1) << "failed to write mdlog history: " + << cpp_strerror(retcode) << dendl; + return set_cr_error(retcode); + } + + ldout(cct, 10) << "wrote mdlog history with oldest period id=" + << state.oldest_period_id << " realm_epoch=" + << state.oldest_realm_epoch << dendl; + return set_cr_done(); + } + return 0; + } +}; + +/// update the mdlog history to reflect trimmed logs +class TrimHistoryCR : public RGWCoroutine { + RGWRados *store; + const Cursor cursor; //< cursor to trimmed period + RGWObjVersionTracker *objv; //< to prevent racing updates + Cursor next; //< target cursor for oldest log period + Cursor existing; //< existing cursor read from disk + + public: + TrimHistoryCR(RGWRados *store, Cursor cursor, RGWObjVersionTracker *objv) + : RGWCoroutine(store->ctx()), + store(store), cursor(cursor), objv(objv), next(cursor) + { + next.next(); // advance past cursor + } + + int operate() { + reenter(this) { + // read an existing history, and write the new history if it's newer + yield call(new ReadHistoryCR(store, &existing, objv)); + if (retcode < 0) { + return set_cr_error(retcode); + } + // reject older trims with ECANCELED + if (cursor.get_epoch() < existing.get_epoch()) { + ldout(cct, 4) << "found oldest log epoch=" << existing.get_epoch() + << ", rejecting trim at epoch=" << cursor.get_epoch() << dendl; + return set_cr_error(-ECANCELED); + } + // overwrite with updated history + yield call(new WriteHistoryCR(store, next, objv)); + if (retcode < 0) { + return set_cr_error(retcode); + } + return set_cr_done(); + } + return 0; + } +}; + +// traverse all the way back to the beginning of the period history, and +// return a cursor to the first period in a fully attached history +Cursor find_oldest_period(RGWRados *store) +{ + auto cct = store->ctx(); + auto cursor = store->period_history->get_current(); + + while (cursor) { + // advance to the period's predecessor + if (!cursor.has_prev()) { + auto& predecessor = cursor.get_period().get_predecessor(); + if (predecessor.empty()) { + // this is the first period, so our logs must start here + ldout(cct, 10) << "find_oldest_period returning first " + "period " << cursor.get_period().get_id() << dendl; + return cursor; + } + // pull the predecessor and add it to our history + RGWPeriod period; + int r = store->period_puller->pull(predecessor, period); + if (r < 0) { + return cursor; + } + auto prev = store->period_history->insert(std::move(period)); + if (!prev) { + return prev; + } + ldout(cct, 20) << "find_oldest_period advancing to " + "predecessor period " << predecessor << dendl; + ceph_assert(cursor.has_prev()); + } + cursor.prev(); + } + ldout(cct, 10) << "find_oldest_period returning empty cursor" << dendl; + return cursor; +} + +} // anonymous namespace + +Cursor RGWMetadataManager::init_oldest_log_period() +{ + // read the mdlog history + RGWMetadataLogHistory state; + RGWObjVersionTracker objv; + int ret = read_history(store, &state, &objv); + + if (ret == -ENOENT) { + // initialize the mdlog history and write it + ldout(cct, 10) << "initializing mdlog history" << dendl; + auto cursor = find_oldest_period(store); + if (!cursor) { + return cursor; + } + + // write the initial history + state.oldest_realm_epoch = cursor.get_epoch(); + state.oldest_period_id = cursor.get_period().get_id(); + + constexpr bool exclusive = true; // don't overwrite + int ret = write_history(store, state, &objv, exclusive); + if (ret < 0 && ret != -EEXIST) { + ldout(cct, 1) << "failed to write mdlog history: " + << cpp_strerror(ret) << dendl; + return Cursor{ret}; + } + return cursor; + } else if (ret < 0) { + ldout(cct, 1) << "failed to read mdlog history: " + << cpp_strerror(ret) << dendl; + return Cursor{ret}; + } + + // if it's already in the history, return it + auto cursor = store->period_history->lookup(state.oldest_realm_epoch); + if (cursor) { + return cursor; + } else { + cursor = find_oldest_period(store); + state.oldest_realm_epoch = cursor.get_epoch(); + state.oldest_period_id = cursor.get_period().get_id(); + ldout(cct, 10) << "rewriting mdlog history" << dendl; + ret = write_history(store, state, &objv); + if (ret < 0 && ret != -ECANCELED) { + ldout(cct, 1) << "failed to write mdlog history: " + << cpp_strerror(ret) << dendl; + return Cursor{ret}; + } + return cursor; + } + + // pull the oldest period by id + RGWPeriod period; + ret = store->period_puller->pull(state.oldest_period_id, period); + if (ret < 0) { + ldout(cct, 1) << "failed to read period id=" << state.oldest_period_id + << " for mdlog history: " << cpp_strerror(ret) << dendl; + return Cursor{ret}; + } + // verify its realm_epoch + if (period.get_realm_epoch() != state.oldest_realm_epoch) { + ldout(cct, 1) << "inconsistent mdlog history: read period id=" + << period.get_id() << " with realm_epoch=" << period.get_realm_epoch() + << ", expected realm_epoch=" << state.oldest_realm_epoch << dendl; + return Cursor{-EINVAL}; + } + // attach the period to our history + return store->period_history->attach(std::move(period)); +} + +Cursor RGWMetadataManager::read_oldest_log_period() const +{ + RGWMetadataLogHistory state; + int ret = read_history(store, &state, nullptr); + if (ret < 0) { + ldout(store->ctx(), 1) << "failed to read mdlog history: " + << cpp_strerror(ret) << dendl; + return Cursor{ret}; + } + + ldout(store->ctx(), 10) << "read mdlog history with oldest period id=" + << state.oldest_period_id << " realm_epoch=" + << state.oldest_realm_epoch << dendl; + + return store->period_history->lookup(state.oldest_realm_epoch); +} + +RGWCoroutine* RGWMetadataManager::read_oldest_log_period_cr(Cursor *period, + RGWObjVersionTracker *objv) const +{ + return new ReadHistoryCR(store, period, objv); +} + +RGWCoroutine* RGWMetadataManager::trim_log_period_cr(Cursor period, + RGWObjVersionTracker *objv) const +{ + return new TrimHistoryCR(store, period, objv); +} + +int RGWMetadataManager::init(const std::string& current_period) +{ + // open a log for the current period + current_log = get_log(current_period); + return 0; +} + +RGWMetadataLog* RGWMetadataManager::get_log(const std::string& period) +{ + // construct the period's log in place if it doesn't exist + auto insert = md_logs.emplace(std::piecewise_construct, + std::forward_as_tuple(period), + std::forward_as_tuple(cct, store, period)); + return &insert.first->second; +} + +int RGWMetadataManager::register_handler(RGWMetadataHandler *handler) +{ + string type = handler->get_type(); + + if (handlers.find(type) != handlers.end()) + return -EINVAL; + + handlers[type] = handler; + + return 0; +} + +RGWMetadataHandler *RGWMetadataManager::get_handler(const string& type) +{ + map<string, RGWMetadataHandler *>::iterator iter = handlers.find(type); + if (iter == handlers.end()) + return NULL; + + return iter->second; +} + +void RGWMetadataManager::parse_metadata_key(const string& metadata_key, string& type, string& entry) +{ + auto pos = metadata_key.find(':'); + if (pos == string::npos) { + type = metadata_key; + } else { + type = metadata_key.substr(0, pos); + entry = metadata_key.substr(pos + 1); + } +} + +int RGWMetadataManager::find_handler(const string& metadata_key, RGWMetadataHandler **handler, string& entry) +{ + string type; + + parse_metadata_key(metadata_key, type, entry); + + if (type.empty()) { + *handler = &md_top_handler; + return 0; + } + + map<string, RGWMetadataHandler *>::iterator iter = handlers.find(type); + if (iter == handlers.end()) + return -ENOENT; + + *handler = iter->second; + + return 0; + +} + +int RGWMetadataManager::get(string& metadata_key, Formatter *f) +{ + RGWMetadataHandler *handler; + string entry; + int ret = find_handler(metadata_key, &handler, entry); + if (ret < 0) { + return ret; + } + + RGWMetadataObject *obj; + + ret = handler->get(store, entry, &obj); + if (ret < 0) { + return ret; + } + + f->open_object_section("metadata_info"); + encode_json("key", metadata_key, f); + encode_json("ver", obj->get_version(), f); + real_time mtime = obj->get_mtime(); + if (!real_clock::is_zero(mtime)) { + utime_t ut(mtime); + encode_json("mtime", ut, f); + } + encode_json("data", *obj, f); + f->close_section(); + + delete obj; + + return 0; +} + +int RGWMetadataManager::put(string& metadata_key, bufferlist& bl, + RGWMetadataHandler::sync_type_t sync_type, + obj_version *existing_version) +{ + RGWMetadataHandler *handler; + string entry; + + int ret = find_handler(metadata_key, &handler, entry); + if (ret < 0) { + return ret; + } + + JSONParser parser; + if (!parser.parse(bl.c_str(), bl.length())) { + return -EINVAL; + } + + RGWObjVersionTracker objv_tracker; + + obj_version *objv = &objv_tracker.write_version; + + utime_t mtime; + + try { + JSONDecoder::decode_json("key", metadata_key, &parser); + JSONDecoder::decode_json("ver", *objv, &parser); + JSONDecoder::decode_json("mtime", mtime, &parser); + } catch (JSONDecoder::err& e) { + return -EINVAL; + } + + JSONObj *jo = parser.find_obj("data"); + if (!jo) { + return -EINVAL; + } + + ret = handler->put(store, entry, objv_tracker, mtime.to_real_time(), jo, sync_type); + if (existing_version) { + *existing_version = objv_tracker.read_version; + } + return ret; +} + +int RGWMetadataManager::prepare_mutate(RGWRados *store, + rgw_pool& pool, const string& oid, + const real_time& mtime, + RGWObjVersionTracker *objv_tracker, + RGWMetadataHandler::sync_type_t sync_mode) +{ + bufferlist bl; + real_time orig_mtime; + auto obj_ctx = store->svc.sysobj->init_obj_ctx(); + int ret = rgw_get_system_obj(store, obj_ctx, pool, oid, + bl, objv_tracker, &orig_mtime, + nullptr, nullptr); + if (ret < 0 && ret != -ENOENT) { + return ret; + } + if (ret != -ENOENT && + !RGWMetadataHandler::check_versions(objv_tracker->read_version, orig_mtime, + objv_tracker->write_version, mtime, sync_mode)) { + return STATUS_NO_APPLY; + } + + if (objv_tracker->write_version.tag.empty()) { + if (objv_tracker->read_version.tag.empty()) { + objv_tracker->generate_new_write_ver(store->ctx()); + } else { + objv_tracker->write_version = objv_tracker->read_version; + objv_tracker->write_version.ver++; + } + } + return 0; +} + +int RGWMetadataManager::remove(string& metadata_key) +{ + RGWMetadataHandler *handler; + string entry; + + int ret = find_handler(metadata_key, &handler, entry); + if (ret < 0) { + return ret; + } + + RGWMetadataObject *obj; + ret = handler->get(store, entry, &obj); + if (ret < 0) { + return ret; + } + RGWObjVersionTracker objv_tracker; + objv_tracker.read_version = obj->get_version(); + delete obj; + + return handler->remove(store, entry, objv_tracker); +} + +int RGWMetadataManager::lock_exclusive(string& metadata_key, timespan duration, string& owner_id) { + RGWMetadataHandler *handler; + string entry; + string zone_id; + + int ret = find_handler(metadata_key, &handler, entry); + if (ret < 0) + return ret; + + rgw_pool pool; + string oid; + + handler->get_pool_and_oid(store, entry, pool, oid); + + return store->lock_exclusive(pool, oid, duration, zone_id, owner_id); +} + +int RGWMetadataManager::unlock(string& metadata_key, string& owner_id) { + librados::IoCtx io_ctx; + RGWMetadataHandler *handler; + string entry; + string zone_id; + + int ret = find_handler(metadata_key, &handler, entry); + if (ret < 0) + return ret; + + rgw_pool pool; + string oid; + + handler->get_pool_and_oid(store, entry, pool, oid); + + return store->unlock(pool, oid, zone_id, owner_id); +} + +struct list_keys_handle { + void *handle; + RGWMetadataHandler *handler; +}; + +int RGWMetadataManager::list_keys_init(const string& section, void **handle) +{ + return list_keys_init(section, string(), handle); +} + +int RGWMetadataManager::list_keys_init(const string& section, + const string& marker, void **handle) +{ + string entry; + RGWMetadataHandler *handler; + + int ret; + + ret = find_handler(section, &handler, entry); + if (ret < 0) { + return -ENOENT; + } + + list_keys_handle *h = new list_keys_handle; + h->handler = handler; + ret = handler->list_keys_init(store, marker, &h->handle); + if (ret < 0) { + delete h; + return ret; + } + + *handle = (void *)h; + + return 0; +} + +int RGWMetadataManager::list_keys_next(void *handle, int max, list<string>& keys, bool *truncated) +{ + list_keys_handle *h = static_cast<list_keys_handle *>(handle); + + RGWMetadataHandler *handler = h->handler; + + return handler->list_keys_next(h->handle, max, keys, truncated); +} + +void RGWMetadataManager::list_keys_complete(void *handle) +{ + list_keys_handle *h = static_cast<list_keys_handle *>(handle); + + RGWMetadataHandler *handler = h->handler; + + handler->list_keys_complete(h->handle); + delete h; +} + +string RGWMetadataManager::get_marker(void *handle) +{ + list_keys_handle *h = static_cast<list_keys_handle *>(handle); + + return h->handler->get_marker(h->handle); +} + +void RGWMetadataManager::dump_log_entry(cls_log_entry& entry, Formatter *f) +{ + f->open_object_section("entry"); + f->dump_string("id", entry.id); + f->dump_string("section", entry.section); + f->dump_string("name", entry.name); + entry.timestamp.gmtime_nsec(f->dump_stream("timestamp")); + + try { + RGWMetadataLogData log_data; + auto iter = entry.data.cbegin(); + decode(log_data, iter); + + encode_json("data", log_data, f); + } catch (buffer::error& err) { + lderr(cct) << "failed to decode log entry: " << entry.section << ":" << entry.name<< " ts=" << entry.timestamp << dendl; + } + f->close_section(); +} + +void RGWMetadataManager::get_sections(list<string>& sections) +{ + for (map<string, RGWMetadataHandler *>::iterator iter = handlers.begin(); iter != handlers.end(); ++iter) { + sections.push_back(iter->first); + } +} + +int RGWMetadataManager::pre_modify(RGWMetadataHandler *handler, string& section, const string& key, + RGWMetadataLogData& log_data, RGWObjVersionTracker *objv_tracker, + RGWMDLogStatus op_type) +{ + section = handler->get_type(); + + /* if write version has not been set, and there's a read version, set it so that we can + * log it + */ + if (objv_tracker) { + if (objv_tracker->read_version.ver && !objv_tracker->write_version.ver) { + objv_tracker->write_version = objv_tracker->read_version; + objv_tracker->write_version.ver++; + } + log_data.read_version = objv_tracker->read_version; + log_data.write_version = objv_tracker->write_version; + } + + log_data.status = op_type; + + bufferlist logbl; + encode(log_data, logbl); + + ceph_assert(current_log); // must have called init() + int ret = current_log->add_entry(handler, section, key, logbl); + if (ret < 0) + return ret; + + return 0; +} + +int RGWMetadataManager::post_modify(RGWMetadataHandler *handler, const string& section, const string& key, RGWMetadataLogData& log_data, + RGWObjVersionTracker *objv_tracker, int ret) +{ + if (ret >= 0) + log_data.status = MDLOG_STATUS_COMPLETE; + else + log_data.status = MDLOG_STATUS_ABORT; + + bufferlist logbl; + encode(log_data, logbl); + + ceph_assert(current_log); // must have called init() + int r = current_log->add_entry(handler, section, key, logbl); + if (ret < 0) + return ret; + + if (r < 0) + return r; + + return 0; +} + +string RGWMetadataManager::heap_oid(RGWMetadataHandler *handler, const string& key, const obj_version& objv) +{ + char buf[objv.tag.size() + 32]; + snprintf(buf, sizeof(buf), "%s:%lld", objv.tag.c_str(), (long long)objv.ver); + return string(".meta:") + handler->get_type() + ":" + key + ":" + buf; +} + +int RGWMetadataManager::store_in_heap(RGWMetadataHandler *handler, const string& key, bufferlist& bl, + RGWObjVersionTracker *objv_tracker, real_time mtime, + map<string, bufferlist> *pattrs) +{ + if (!objv_tracker) { + return -EINVAL; + } + + rgw_pool heap_pool(store->svc.zone->get_zone_params().metadata_heap); + + if (heap_pool.empty()) { + return 0; + } + + RGWObjVersionTracker otracker; + otracker.write_version = objv_tracker->write_version; + string oid = heap_oid(handler, key, objv_tracker->write_version); + int ret = rgw_put_system_obj(store, heap_pool, oid, + bl, false, &otracker, mtime, pattrs); + if (ret < 0) { + ldout(store->ctx(), 0) << "ERROR: rgw_put_system_obj() oid=" << oid << " returned ret=" << ret << dendl; + return ret; + } + + return 0; +} + +int RGWMetadataManager::remove_from_heap(RGWMetadataHandler *handler, const string& key, RGWObjVersionTracker *objv_tracker) +{ + if (!objv_tracker) { + return -EINVAL; + } + + rgw_pool heap_pool(store->svc.zone->get_zone_params().metadata_heap); + + if (heap_pool.empty()) { + return 0; + } + + string oid = heap_oid(handler, key, objv_tracker->write_version); + rgw_raw_obj obj(heap_pool, oid); + auto obj_ctx = store->svc.sysobj->init_obj_ctx(); + auto sysobj = obj_ctx.get_obj(obj); + int ret = sysobj.wop().remove(); + if (ret < 0) { + ldout(store->ctx(), 0) << "ERROR: sysobj.wop().remove() oid=" << oid << " returned ret=" << ret << dendl; + return ret; + } + + return 0; +} + +int RGWMetadataManager::put_entry(RGWMetadataHandler *handler, const string& key, bufferlist& bl, bool exclusive, + RGWObjVersionTracker *objv_tracker, real_time mtime, map<string, bufferlist> *pattrs) +{ + string section; + RGWMetadataLogData log_data; + int ret = pre_modify(handler, section, key, log_data, objv_tracker, MDLOG_STATUS_WRITE); + if (ret < 0) + return ret; + + string oid; + rgw_pool pool; + + handler->get_pool_and_oid(store, key, pool, oid); + + ret = store_in_heap(handler, key, bl, objv_tracker, mtime, pattrs); + if (ret < 0) { + ldout(store->ctx(), 0) << "ERROR: " << __func__ << ": store_in_heap() key=" << key << " returned ret=" << ret << dendl; + goto done; + } + + ret = rgw_put_system_obj(store, pool, oid, bl, exclusive, + objv_tracker, mtime, pattrs); + + if (ret < 0) { + int r = remove_from_heap(handler, key, objv_tracker); + if (r < 0) { + ldout(store->ctx(), 0) << "ERROR: " << __func__ << ": remove_from_heap() key=" << key << " returned ret=" << r << dendl; + } + } +done: + /* cascading ret into post_modify() */ + + ret = post_modify(handler, section, key, log_data, objv_tracker, ret); + if (ret < 0) + return ret; + + return 0; +} + +int RGWMetadataManager::remove_entry(RGWMetadataHandler *handler, + const string& key, + RGWObjVersionTracker *objv_tracker) +{ + string section; + RGWMetadataLogData log_data; + int ret = pre_modify(handler, section, key, log_data, objv_tracker, MDLOG_STATUS_REMOVE); + if (ret < 0) { + return ret; + } + + string oid; + rgw_pool pool; + + handler->get_pool_and_oid(store, key, pool, oid); + + rgw_raw_obj obj(pool, oid); + + auto obj_ctx = store->svc.sysobj->init_obj_ctx(); + auto sysobj = obj_ctx.get_obj(obj); + ret = sysobj.wop() + .set_objv_tracker(objv_tracker) + .remove(); + /* cascading ret into post_modify() */ + + ret = post_modify(handler, section, key, log_data, objv_tracker, ret); + if (ret < 0) { + return ret; + } + + return 0; +} + +int RGWMetadataManager::get_log_shard_id(const string& section, + const string& key, int *shard_id) +{ + RGWMetadataHandler *handler = get_handler(section); + if (!handler) { + return -EINVAL; + } + string hash_key; + handler->get_hash_key(section, key, hash_key); + *shard_id = store->key_to_shard_id(hash_key, cct->_conf->rgw_md_log_max_shards); + return 0; +} |