From 17d6a993fc17d533460c5f40f3908c708e057c18 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 23 May 2024 18:45:17 +0200 Subject: Merging upstream version 18.2.3. Signed-off-by: Daniel Baumann --- src/rgw/driver/rados/rgw_bucket.cc | 2 +- src/rgw/driver/rados/rgw_cr_rados.cc | 2 +- src/rgw/driver/rados/rgw_d3n_datacache.cc | 2 +- src/rgw/driver/rados/rgw_datalog.cc | 3 +- src/rgw/driver/rados/rgw_object_expirer_core.cc | 2 +- src/rgw/driver/rados/rgw_pubsub_push.cc | 169 +++++++------------- src/rgw/driver/rados/rgw_putobj_processor.cc | 24 ++- src/rgw/driver/rados/rgw_putobj_processor.h | 14 +- src/rgw/driver/rados/rgw_rados.cc | 196 +++++++++++++++--------- src/rgw/driver/rados/rgw_rados.h | 50 +++--- src/rgw/driver/rados/rgw_reshard.cc | 8 +- src/rgw/driver/rados/rgw_reshard.h | 4 +- src/rgw/driver/rados/rgw_sal_rados.cc | 40 +++-- src/rgw/driver/rados/rgw_sal_rados.h | 16 +- src/rgw/driver/rados/rgw_sync_module_aws.cc | 2 +- src/rgw/driver/rados/rgw_tools.cc | 3 +- src/rgw/driver/rados/rgw_zone.cc | 4 +- 17 files changed, 289 insertions(+), 252 deletions(-) (limited to 'src/rgw/driver/rados') diff --git a/src/rgw/driver/rados/rgw_bucket.cc b/src/rgw/driver/rados/rgw_bucket.cc index 32cd1ccf9..d2b4a6b34 100644 --- a/src/rgw/driver/rados/rgw_bucket.cc +++ b/src/rgw/driver/rados/rgw_bucket.cc @@ -158,7 +158,7 @@ int rgw_remove_object(const DoutPrefixProvider *dpp, rgw::sal::Driver* driver, r std::unique_ptr object = bucket->get_object(key); - return object->delete_object(dpp, null_yield); + return object->delete_object(dpp, null_yield, rgw::sal::FLAG_LOG_OP); } static void set_err_msg(std::string *sink, std::string msg) diff --git a/src/rgw/driver/rados/rgw_cr_rados.cc b/src/rgw/driver/rados/rgw_cr_rados.cc index d8e0ecba6..996a3758f 100644 --- a/src/rgw/driver/rados/rgw_cr_rados.cc +++ b/src/rgw/driver/rados/rgw_cr_rados.cc @@ -922,7 +922,7 @@ int RGWAsyncRemoveObj::_send_request(const DoutPrefixProvider *dpp) del_op->params.high_precision_time = true; del_op->params.zones_trace = &zones_trace; - ret = del_op->delete_obj(dpp, null_yield); + ret = del_op->delete_obj(dpp, null_yield, true); if (ret < 0) { ldpp_dout(dpp, 20) << __func__ << "(): delete_obj() obj=" << obj << " returned ret=" << ret << dendl; } diff --git a/src/rgw/driver/rados/rgw_d3n_datacache.cc b/src/rgw/driver/rados/rgw_d3n_datacache.cc index f1bf731ae..b744a16a3 100644 --- a/src/rgw/driver/rados/rgw_d3n_datacache.cc +++ b/src/rgw/driver/rados/rgw_d3n_datacache.cc @@ -105,7 +105,7 @@ void D3nDataCache::init(CephContext *_cct) { struct aioinit ainit{0}; ainit.aio_threads = cct->_conf.get_val("rgw_d3n_libaio_aio_threads"); ainit.aio_num = cct->_conf.get_val("rgw_d3n_libaio_aio_num"); - ainit.aio_idle_time = 10; + ainit.aio_idle_time = 5; aio_init(&ainit); #endif } diff --git a/src/rgw/driver/rados/rgw_datalog.cc b/src/rgw/driver/rados/rgw_datalog.cc index 7ca37abf6..14994a615 100644 --- a/src/rgw/driver/rados/rgw_datalog.cc +++ b/src/rgw/driver/rados/rgw_datalog.cc @@ -720,7 +720,8 @@ int RGWDataChangesLog::add_entry(const DoutPrefixProvider *dpp, ldpp_dout(dpp, 20) << "RGWDataChangesLog::add_entry() sending update with now=" << now << " cur_expiration=" << expiration << dendl; auto be = bes->head(); - ret = be->push(dpp, index, now, change.key, std::move(bl), y); + // TODO: pass y once we fix the deadlock from https://tracker.ceph.com/issues/63373 + ret = be->push(dpp, index, now, change.key, std::move(bl), null_yield); now = real_clock::now(); diff --git a/src/rgw/driver/rados/rgw_object_expirer_core.cc b/src/rgw/driver/rados/rgw_object_expirer_core.cc index ec1bf3fb6..d6beeeb06 100644 --- a/src/rgw/driver/rados/rgw_object_expirer_core.cc +++ b/src/rgw/driver/rados/rgw_object_expirer_core.cc @@ -219,7 +219,7 @@ int RGWObjectExpirer::garbage_single_object(const DoutPrefixProvider *dpp, objex std::unique_ptr obj = bucket->get_object(key); obj->set_atomic(); - ret = obj->delete_object(dpp, null_yield); + ret = obj->delete_object(dpp, null_yield, rgw::sal::FLAG_LOG_OP); return ret; } diff --git a/src/rgw/driver/rados/rgw_pubsub_push.cc b/src/rgw/driver/rados/rgw_pubsub_push.cc index bdb24ce9a..05dc9e65d 100644 --- a/src/rgw/driver/rados/rgw_pubsub_push.cc +++ b/src/rgw/driver/rados/rgw_pubsub_push.cc @@ -115,6 +115,55 @@ public: } }; +namespace { +// this allows waiting untill "finish()" is called from a different thread +// waiting could be blocking the waiting thread or yielding, depending +// with compilation flag support and whether the optional_yield is set +class Waiter { + using Signature = void(boost::system::error_code); + using Completion = ceph::async::Completion; + using CompletionInit = boost::asio::async_completion; + std::unique_ptr completion = nullptr; + int ret; + + bool done = false; + mutable std::mutex lock; + mutable std::condition_variable cond; + +public: + int wait(optional_yield y) { + std::unique_lock l{lock}; + if (done) { + return ret; + } + if (y) { + boost::system::error_code ec; + auto&& token = y.get_yield_context()[ec]; + CompletionInit init(token); + completion = Completion::create(y.get_io_context().get_executor(), + std::move(init.completion_handler)); + l.unlock(); + init.result.get(); + return -ec.value(); + } + cond.wait(l, [this]{return (done==true);}); + return ret; + } + + void finish(int r) { + std::unique_lock l{lock}; + ret = r; + done = true; + if (completion) { + boost::system::error_code ec(-ret, boost::system::system_category()); + Completion::post(std::move(completion), ec); + } else { + cond.notify_all(); + } + } +}; +} // namespace + #ifdef WITH_RADOSGW_AMQP_ENDPOINT class RGWPubSubAMQPEndpoint : public RGWPubSubEndpoint { private: @@ -187,71 +236,17 @@ public: } } - // this allows waiting untill "finish()" is called from a different thread - // waiting could be blocking the waiting thread or yielding, depending - // with compilation flag support and whether the optional_yield is set - class Waiter { - using Signature = void(boost::system::error_code); - using Completion = ceph::async::Completion; - std::unique_ptr completion = nullptr; - int ret; - - mutable std::atomic done = false; - mutable std::mutex lock; - mutable std::condition_variable cond; - - template - auto async_wait(ExecutionContext& ctx, CompletionToken&& token) { - boost::asio::async_completion init(token); - auto& handler = init.completion_handler; - { - std::unique_lock l{lock}; - completion = Completion::create(ctx.get_executor(), std::move(handler)); - } - return init.result.get(); - } - - public: - int wait(optional_yield y) { - if (done) { - return ret; - } - if (y) { - auto& io_ctx = y.get_io_context(); - auto& yield_ctx = y.get_yield_context(); - boost::system::error_code ec; - async_wait(io_ctx, yield_ctx[ec]); - return -ec.value(); - } - std::unique_lock l(lock); - cond.wait(l, [this]{return (done==true);}); - return ret; - } - - void finish(int r) { - std::unique_lock l{lock}; - ret = r; - done = true; - if (completion) { - boost::system::error_code ec(-ret, boost::system::system_category()); - Completion::post(std::move(completion), ec); - } else { - cond.notify_all(); - } - } - }; - int send_to_completion_async(CephContext* cct, const rgw_pubsub_s3_event& event, optional_yield y) override { if (ack_level == ack_level_t::None) { return amqp::publish(conn_id, topic, json_format_pubsub_event(event)); } else { // TODO: currently broker and routable are the same - this will require different flags but the same mechanism - // note: dynamic allocation of Waiter is needed when this is invoked from a beast coroutine - auto w = std::unique_ptr(new Waiter); + auto w = std::make_unique(); const auto rc = amqp::publish_with_confirm(conn_id, topic, json_format_pubsub_event(event), - std::bind(&Waiter::finish, w.get(), std::placeholders::_1)); + [wp = w.get()](int r) { wp->finish(r);} + ); if (rc < 0) { // failed to publish, does not wait for reply return rc; @@ -314,70 +309,16 @@ public: } } - // this allows waiting untill "finish()" is called from a different thread - // waiting could be blocking the waiting thread or yielding, depending - // with compilation flag support and whether the optional_yield is set - class Waiter { - using Signature = void(boost::system::error_code); - using Completion = ceph::async::Completion; - std::unique_ptr completion = nullptr; - int ret; - - mutable std::atomic done = false; - mutable std::mutex lock; - mutable std::condition_variable cond; - - template - auto async_wait(ExecutionContext& ctx, CompletionToken&& token) { - boost::asio::async_completion init(token); - auto& handler = init.completion_handler; - { - std::unique_lock l{lock}; - completion = Completion::create(ctx.get_executor(), std::move(handler)); - } - return init.result.get(); - } - - public: - int wait(optional_yield y) { - if (done) { - return ret; - } - if (y) { - auto& io_ctx = y.get_io_context(); - auto& yield_ctx = y.get_yield_context(); - boost::system::error_code ec; - async_wait(io_ctx, yield_ctx[ec]); - return -ec.value(); - } - std::unique_lock l(lock); - cond.wait(l, [this]{return (done==true);}); - return ret; - } - - void finish(int r) { - std::unique_lock l{lock}; - ret = r; - done = true; - if (completion) { - boost::system::error_code ec(-ret, boost::system::system_category()); - Completion::post(std::move(completion), ec); - } else { - cond.notify_all(); - } - } - }; - int send_to_completion_async(CephContext* cct, const rgw_pubsub_s3_event& event, optional_yield y) override { if (ack_level == ack_level_t::None) { return kafka::publish(conn_name, topic, json_format_pubsub_event(event)); } else { - // note: dynamic allocation of Waiter is needed when this is invoked from a beast coroutine - auto w = std::unique_ptr(new Waiter); + auto w = std::make_unique(); const auto rc = kafka::publish_with_confirm(conn_name, topic, json_format_pubsub_event(event), - std::bind(&Waiter::finish, w.get(), std::placeholders::_1)); + [wp = w.get()](int r) { wp->finish(r); } + ); if (rc < 0) { // failed to publish, does not wait for reply return rc; diff --git a/src/rgw/driver/rados/rgw_putobj_processor.cc b/src/rgw/driver/rados/rgw_putobj_processor.cc index e453db5a9..dc3b5c9ee 100644 --- a/src/rgw/driver/rados/rgw_putobj_processor.cc +++ b/src/rgw/driver/rados/rgw_putobj_processor.cc @@ -124,6 +124,11 @@ void RadosWriter::add_write_hint(librados::ObjectWriteOperation& op) { op.set_alloc_hint2(0, 0, alloc_hint_flags); } +void RadosWriter::set_head_obj(const rgw_obj& head) +{ + head_obj = head; +} + int RadosWriter::set_stripe_obj(const rgw_raw_obj& raw_obj) { stripe_obj = store->svc.rados->obj(raw_obj); @@ -339,7 +344,8 @@ int AtomicObjectProcessor::complete(size_t accounted_size, const char *if_nomatch, const std::string *user_data, rgw_zone_set *zones_trace, - bool *pcanceled, optional_yield y) + bool *pcanceled, optional_yield y, + uint32_t flags) { int r = writer.drain(); if (r < 0) { @@ -376,7 +382,8 @@ int AtomicObjectProcessor::complete(size_t accounted_size, read_cloudtier_info_from_attrs(attrs, obj_op.meta.category, manifest); - r = obj_op.write_meta(dpp, actual_size, accounted_size, attrs, y); + r = obj_op.write_meta(dpp, actual_size, accounted_size, attrs, y, + flags & rgw::sal::FLAG_LOG_OP); if (r < 0) { if (r == -ETIMEDOUT) { // The head object write may eventually succeed, clear the set of objects for deletion. if it @@ -451,6 +458,9 @@ int MultipartObjectProcessor::prepare_head() RGWSI_Tier_RADOS::raw_obj_to_obj(head_obj.bucket, stripe_obj, &head_obj); head_obj.index_hash_source = target_obj.key.name; + // point part uploads at the part head instead of the final multipart head + writer.set_head_obj(head_obj); + r = writer.set_stripe_obj(stripe_obj); if (r < 0) { return r; @@ -480,7 +490,8 @@ int MultipartObjectProcessor::complete(size_t accounted_size, const char *if_nomatch, const std::string *user_data, rgw_zone_set *zones_trace, - bool *pcanceled, optional_yield y) + bool *pcanceled, optional_yield y, + uint32_t flags) { int r = writer.drain(); if (r < 0) { @@ -504,7 +515,8 @@ int MultipartObjectProcessor::complete(size_t accounted_size, obj_op.meta.zones_trace = zones_trace; obj_op.meta.modify_tail = true; - r = obj_op.write_meta(dpp, actual_size, accounted_size, attrs, y); + r = obj_op.write_meta(dpp, actual_size, accounted_size, attrs, y, + flags & rgw::sal::FLAG_LOG_OP); if (r < 0) return r; @@ -684,7 +696,7 @@ int AppendObjectProcessor::complete(size_t accounted_size, const string &etag, c ceph::real_time set_mtime, rgw::sal::Attrs& attrs, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const string *user_data, rgw_zone_set *zones_trace, bool *pcanceled, - optional_yield y) + optional_yield y, uint32_t flags) { int r = writer.drain(); if (r < 0) @@ -742,7 +754,7 @@ int AppendObjectProcessor::complete(size_t accounted_size, const string &etag, c } r = obj_op.write_meta(dpp, actual_size + cur_size, accounted_size + *cur_accounted_size, - attrs, y); + attrs, y, flags & rgw::sal::FLAG_LOG_OP); if (r < 0) { return r; } diff --git a/src/rgw/driver/rados/rgw_putobj_processor.h b/src/rgw/driver/rados/rgw_putobj_processor.h index fa9200f32..b1946c049 100644 --- a/src/rgw/driver/rados/rgw_putobj_processor.h +++ b/src/rgw/driver/rados/rgw_putobj_processor.h @@ -69,7 +69,7 @@ class RadosWriter : public rgw::sal::DataProcessor { RGWRados *const store; const RGWBucketInfo& bucket_info; RGWObjectCtx& obj_ctx; - const rgw_obj head_obj; + rgw_obj head_obj; RGWSI_RADOS::Obj stripe_obj; // current stripe object RawObjSet written; // set of written objects for deletion const DoutPrefixProvider *dpp; @@ -88,6 +88,9 @@ class RadosWriter : public rgw::sal::DataProcessor { // add alloc hint to osd void add_write_hint(librados::ObjectWriteOperation& op); + // change the head object + void set_head_obj(const rgw_obj& head); + // change the current stripe object int set_stripe_obj(const rgw_raw_obj& obj); @@ -191,7 +194,8 @@ class AtomicObjectProcessor : public ManifestObjectProcessor { const char *if_match, const char *if_nomatch, const std::string *user_data, rgw_zone_set *zones_trace, bool *canceled, - optional_yield y) override; + optional_yield y, + uint32_t flags) override; }; @@ -238,7 +242,8 @@ class MultipartObjectProcessor : public ManifestObjectProcessor { const char *if_match, const char *if_nomatch, const std::string *user_data, rgw_zone_set *zones_trace, bool *canceled, - optional_yield y) override; + optional_yield y, + uint32_t flags) override; }; @@ -274,7 +279,8 @@ class MultipartObjectProcessor : public ManifestObjectProcessor { std::map& attrs, ceph::real_time delete_at, const char *if_match, const char *if_nomatch, const std::string *user_data, rgw_zone_set *zones_trace, bool *canceled, - optional_yield y) override; + optional_yield y, + uint32_t flags) override; }; } // namespace putobj diff --git a/src/rgw/driver/rados/rgw_rados.cc b/src/rgw/driver/rados/rgw_rados.cc index 10018d4a6..566b7d6c2 100644 --- a/src/rgw/driver/rados/rgw_rados.cc +++ b/src/rgw/driver/rados/rgw_rados.cc @@ -900,9 +900,11 @@ void RGWIndexCompletionManager::process() continue; } - // This null_yield can stay, for now, since we're in our own thread - add_datalog_entry(&dpp, store->svc.datalog_rados, bucket_info, - bs.shard_id, null_yield); + if (c->log_op) { + // This null_yield can stay, for now, since we're in our own thread + add_datalog_entry(&dpp, store->svc.datalog_rados, bucket_info, + bs.shard_id, null_yield); + } } } } @@ -3014,7 +3016,8 @@ int RGWRados::Object::Write::_do_write_meta(const DoutPrefixProvider *dpp, uint64_t size, uint64_t accounted_size, map& attrs, bool assume_noent, bool modify_tail, - void *_index_op, optional_yield y) + void *_index_op, optional_yield y, + bool log_op) { RGWRados::Bucket::UpdateIndex *index_op = static_cast(_index_op); RGWRados *store = target->get_store(); @@ -3180,7 +3183,7 @@ int RGWRados::Object::Write::_do_write_meta(const DoutPrefixProvider *dpp, if (!index_op->is_prepared()) { tracepoint(rgw_rados, prepare_enter, req_id.c_str()); - r = index_op->prepare(dpp, CLS_RGW_OP_ADD, &state->write_tag, y); + r = index_op->prepare(dpp, CLS_RGW_OP_ADD, &state->write_tag, y, log_op); tracepoint(rgw_rados, prepare_exit, req_id.c_str()); if (r < 0) return r; @@ -3214,7 +3217,7 @@ int RGWRados::Object::Write::_do_write_meta(const DoutPrefixProvider *dpp, meta.set_mtime, etag, content_type, storage_class, &acl_bl, meta.category, meta.remove_objs, y, - meta.user_data, meta.appendable); + meta.user_data, meta.appendable, log_op); tracepoint(rgw_rados, complete_exit, req_id.c_str()); if (r < 0) goto done_cancel; @@ -3228,7 +3231,7 @@ int RGWRados::Object::Write::_do_write_meta(const DoutPrefixProvider *dpp, state = NULL; if (versioned_op && meta.olh_epoch) { - r = store->set_olh(dpp, target->get_ctx(), target->get_bucket_info(), obj, false, NULL, *meta.olh_epoch, real_time(), false, y, meta.zones_trace); + r = store->set_olh(dpp, target->get_ctx(), target->get_bucket_info(), obj, false, NULL, *meta.olh_epoch, real_time(), false, y, meta.zones_trace, log_op); if (r < 0) { return r; } @@ -3259,7 +3262,7 @@ int RGWRados::Object::Write::_do_write_meta(const DoutPrefixProvider *dpp, return 0; done_cancel: - int ret = index_op->cancel(dpp, meta.remove_objs, y); + int ret = index_op->cancel(dpp, meta.remove_objs, y, log_op); if (ret < 0) { ldpp_dout(dpp, 0) << "ERROR: index_op.cancel() returned ret=" << ret << dendl; } @@ -3304,7 +3307,7 @@ done_cancel: } int RGWRados::Object::Write::write_meta(const DoutPrefixProvider *dpp, uint64_t size, uint64_t accounted_size, - map& attrs, optional_yield y) + map& attrs, optional_yield y, bool log_op) { RGWBucketInfo& bucket_info = target->get_bucket_info(); @@ -3315,13 +3318,13 @@ int RGWRados::Object::Write::write_meta(const DoutPrefixProvider *dpp, uint64_t bool assume_noent = (meta.if_match == NULL && meta.if_nomatch == NULL); int r; if (assume_noent) { - r = _do_write_meta(dpp, size, accounted_size, attrs, assume_noent, meta.modify_tail, (void *)&index_op, y); + r = _do_write_meta(dpp, size, accounted_size, attrs, assume_noent, meta.modify_tail, (void *)&index_op, y, log_op); if (r == -EEXIST) { assume_noent = false; } } if (!assume_noent) { - r = _do_write_meta(dpp, size, accounted_size, attrs, assume_noent, meta.modify_tail, (void *)&index_op, y); + r = _do_write_meta(dpp, size, accounted_size, attrs, assume_noent, meta.modify_tail, (void *)&index_op, y, log_op); } return r; } @@ -4198,7 +4201,7 @@ int RGWRados::fetch_remote_obj(RGWObjectCtx& obj_ctx, bool canceled = false; ret = processor.complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at, nullptr, nullptr, nullptr, - zones_trace, &canceled, null_yield); + zones_trace, &canceled, null_yield, rgw::sal::FLAG_LOG_OP); if (ret < 0) { goto set_err_state; } @@ -4409,6 +4412,12 @@ int RGWRados::copy_obj(RGWObjectCtx& obj_ctx, if (lh != attrs.end()) src_attrs[RGW_ATTR_OBJECT_LEGAL_HOLD] = lh->second; + if (dest_bucket_info.flags & BUCKET_VERSIONS_SUSPENDED) { + src_attrs.erase(RGW_ATTR_OLH_ID_TAG); + src_attrs.erase(RGW_ATTR_OLH_INFO); + src_attrs.erase(RGW_ATTR_OLH_VER); + } + set_copy_attrs(src_attrs, attrs, attrs_mod); attrs.erase(RGW_ATTR_ID_TAG); attrs.erase(RGW_ATTR_PG_VER); @@ -4652,7 +4661,8 @@ int RGWRados::copy_obj_data(RGWObjectCtx& obj_ctx, real_time delete_at, string *petag, const DoutPrefixProvider *dpp, - optional_yield y) + optional_yield y, + bool log_op) { string tag; append_rand_alpha(cct, tag, tag, 32); @@ -4718,7 +4728,8 @@ int RGWRados::copy_obj_data(RGWObjectCtx& obj_ctx, } return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at, - nullptr, nullptr, nullptr, nullptr, nullptr, y); + nullptr, nullptr, nullptr, nullptr, nullptr, y, + log_op ? rgw::sal::FLAG_LOG_OP : 0); } int RGWRados::transition_obj(RGWObjectCtx& obj_ctx, @@ -4728,7 +4739,8 @@ int RGWRados::transition_obj(RGWObjectCtx& obj_ctx, const real_time& mtime, uint64_t olh_epoch, const DoutPrefixProvider *dpp, - optional_yield y) + optional_yield y, + bool log_op) { rgw::sal::Attrs attrs; real_time read_mtime; @@ -4769,7 +4781,8 @@ int RGWRados::transition_obj(RGWObjectCtx& obj_ctx, real_time(), nullptr /* petag */, dpp, - y); + y, + log_op); if (ret < 0) { return ret; } @@ -5340,7 +5353,7 @@ struct tombstone_entry { * obj: name of the object to delete * Returns: 0 on success, -ERR# otherwise. */ -int RGWRados::Object::Delete::delete_obj(optional_yield y, const DoutPrefixProvider *dpp) +int RGWRados::Object::Delete::delete_obj(optional_yield y, const DoutPrefixProvider *dpp, bool log_op) { RGWRados *store = target->get_store(); const rgw_obj& src_obj = target->get_obj(); @@ -5354,6 +5367,8 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y, const DoutPrefixProvi bool explicit_marker_version = (!params.marker_version_id.empty()); if (params.versioning_status & BUCKET_VERSIONED || explicit_marker_version) { + bool add_log = log_op && store->svc.zone->need_to_log_data(); + if (instance.empty() || explicit_marker_version) { rgw_obj marker = obj; marker.key.instance.clear(); @@ -5382,7 +5397,9 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y, const DoutPrefixProvi meta.mtime = params.mtime; } - int r = store->set_olh(dpp, target->get_ctx(), target->get_bucket_info(), marker, true, &meta, params.olh_epoch, params.unmod_since, params.high_precision_time, y, params.zones_trace); + int r = store->set_olh(dpp, target->get_ctx(), target->get_bucket_info(), marker, true, + &meta, params.olh_epoch, params.unmod_since, params.high_precision_time, + y, params.zones_trace, add_log); if (r < 0) { return r; } @@ -5394,7 +5411,8 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y, const DoutPrefixProvi return r; } result.delete_marker = dirent.is_delete_marker(); - r = store->unlink_obj_instance(dpp, target->get_ctx(), target->get_bucket_info(), obj, params.olh_epoch, y, params.zones_trace); + r = store->unlink_obj_instance(dpp, target->get_ctx(), target->get_bucket_info(), obj, params.olh_epoch, + y, params.zones_trace, add_log); if (r < 0) { return r; } @@ -5408,8 +5426,10 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y, const DoutPrefixProvi return r; } - add_datalog_entry(dpp, store->svc.datalog_rados, - target->get_bucket_info(), bs->shard_id, y); + if (add_log) { + add_datalog_entry(dpp, store->svc.datalog_rados, + target->get_bucket_info(), bs->shard_id, y); + } return 0; } @@ -5488,7 +5508,7 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y, const DoutPrefixProvi index_op.set_zones_trace(params.zones_trace); index_op.set_bilog_flags(params.bilog_flags); - r = index_op.prepare(dpp, CLS_RGW_OP_DEL, &state->write_tag, y); + r = index_op.prepare(dpp, CLS_RGW_OP_DEL, &state->write_tag, y, log_op); if (r < 0) return r; @@ -5507,7 +5527,7 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y, const DoutPrefixProvi tombstone_entry entry{*state}; obj_tombstone_cache->add(obj, entry); } - r = index_op.complete_del(dpp, poolid, ioctx.get_last_version(), state->mtime, params.remove_objs, y); + r = index_op.complete_del(dpp, poolid, ioctx.get_last_version(), state->mtime, params.remove_objs, y, log_op); int ret = target->complete_atomic_modification(dpp); if (ret < 0) { @@ -5515,7 +5535,7 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y, const DoutPrefixProvi } /* other than that, no need to propagate error */ } else { - int ret = index_op.cancel(dpp, params.remove_objs, y); + int ret = index_op.cancel(dpp, params.remove_objs, y, log_op); if (ret < 0) { ldpp_dout(dpp, 0) << "ERROR: index_op.cancel() returned ret=" << ret << dendl; } @@ -5541,7 +5561,8 @@ int RGWRados::delete_obj(const DoutPrefixProvider *dpp, int versioning_status, // versioning flags defined in enum RGWBucketFlags uint16_t bilog_flags, const real_time& expiration_time, - rgw_zone_set *zones_trace) + rgw_zone_set *zones_trace, + bool log_op) { RGWRados::Object del_target(this, bucket_info, obj_ctx, obj); RGWRados::Object::Delete del_op(&del_target); @@ -5552,7 +5573,7 @@ int RGWRados::delete_obj(const DoutPrefixProvider *dpp, del_op.params.expiration_time = expiration_time; del_op.params.zones_trace = zones_trace; - return del_op.delete_obj(null_yield, dpp); + return del_op.delete_obj(null_yield, dpp, log_op ? rgw::sal::FLAG_LOG_OP : 0); } int RGWRados::delete_raw_obj(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj) @@ -6308,6 +6329,10 @@ int RGWRados::Object::Read::prepare(optional_yield y, const DoutPrefixProvider * } } + if (params.lastmod) { + *params.lastmod = astate->mtime; + } + /* Convert all times go GMT to make them compatible */ if (conds.mod_ptr || conds.unmod_ptr) { obj_time_weight src_weight; @@ -6357,8 +6382,6 @@ int RGWRados::Object::Read::prepare(optional_yield y, const DoutPrefixProvider * if (params.obj_size) *params.obj_size = astate->size; - if (params.lastmod) - *params.lastmod = astate->mtime; return 0; } @@ -6440,7 +6463,8 @@ int RGWRados::Bucket::UpdateIndex::guard_reshard(const DoutPrefixProvider *dpp, return 0; } -int RGWRados::Bucket::UpdateIndex::prepare(const DoutPrefixProvider *dpp, RGWModifyOp op, const string *write_tag, optional_yield y) +int RGWRados::Bucket::UpdateIndex::prepare(const DoutPrefixProvider *dpp, RGWModifyOp op, const string *write_tag, + optional_yield y, bool log_op) { if (blind) { return 0; @@ -6455,8 +6479,10 @@ int RGWRados::Bucket::UpdateIndex::prepare(const DoutPrefixProvider *dpp, RGWMod } } + bool add_log = log_op && store->svc.zone->need_to_log_data(); + int r = guard_reshard(dpp, obj, nullptr, [&](BucketShard *bs) -> int { - return store->cls_obj_prepare_op(dpp, *bs, op, optag, obj, bilog_flags, y, zones_trace); + return store->cls_obj_prepare_op(dpp, *bs, op, optag, obj, bilog_flags, y, zones_trace, add_log); }); if (r < 0) { @@ -6476,7 +6502,8 @@ int RGWRados::Bucket::UpdateIndex::complete(const DoutPrefixProvider *dpp, int64 list *remove_objs, optional_yield y, const string *user_data, - bool appendable) + bool appendable, + bool log_op) { if (blind) { return 0; @@ -6512,10 +6539,13 @@ int RGWRados::Bucket::UpdateIndex::complete(const DoutPrefixProvider *dpp, int64 ent.meta.content_type = content_type; ent.meta.appendable = appendable; - ret = store->cls_obj_complete_add(*bs, obj, optag, poolid, epoch, ent, category, remove_objs, bilog_flags, zones_trace); + bool add_log = log_op && store->svc.zone->need_to_log_data(); - add_datalog_entry(dpp, store->svc.datalog_rados, - target->bucket_info, bs->shard_id, y); + ret = store->cls_obj_complete_add(*bs, obj, optag, poolid, epoch, ent, category, remove_objs, bilog_flags, zones_trace, add_log); + if (add_log) { + add_datalog_entry(dpp, store->svc.datalog_rados, + target->bucket_info, bs->shard_id, y); + } return ret; } @@ -6524,7 +6554,8 @@ int RGWRados::Bucket::UpdateIndex::complete_del(const DoutPrefixProvider *dpp, int64_t poolid, uint64_t epoch, real_time& removed_mtime, list *remove_objs, - optional_yield y) + optional_yield y, + bool log_op) { if (blind) { return 0; @@ -6538,10 +6569,14 @@ int RGWRados::Bucket::UpdateIndex::complete_del(const DoutPrefixProvider *dpp, return ret; } - ret = store->cls_obj_complete_del(*bs, optag, poolid, epoch, obj, removed_mtime, remove_objs, bilog_flags, zones_trace); + bool add_log = log_op && store->svc.zone->need_to_log_data(); - add_datalog_entry(dpp, store->svc.datalog_rados, - target->bucket_info, bs->shard_id, y); + ret = store->cls_obj_complete_del(*bs, optag, poolid, epoch, obj, removed_mtime, remove_objs, bilog_flags, zones_trace, add_log); + + if (add_log) { + add_datalog_entry(dpp, store->svc.datalog_rados, + target->bucket_info, bs->shard_id, y); + } return ret; } @@ -6549,7 +6584,8 @@ int RGWRados::Bucket::UpdateIndex::complete_del(const DoutPrefixProvider *dpp, int RGWRados::Bucket::UpdateIndex::cancel(const DoutPrefixProvider *dpp, list *remove_objs, - optional_yield y) + optional_yield y, + bool log_op) { if (blind) { return 0; @@ -6557,17 +6593,21 @@ int RGWRados::Bucket::UpdateIndex::cancel(const DoutPrefixProvider *dpp, RGWRados *store = target->get_store(); BucketShard *bs; + bool add_log = log_op && store->svc.zone->need_to_log_data(); + int ret = guard_reshard(dpp, obj, &bs, [&](BucketShard *bs) -> int { - return store->cls_obj_complete_cancel(*bs, optag, obj, remove_objs, bilog_flags, zones_trace); + return store->cls_obj_complete_cancel(*bs, optag, obj, remove_objs, bilog_flags, zones_trace, add_log); }); - /* - * need to update data log anyhow, so that whoever follows needs to update its internal markers - * for following the specific bucket shard log. Otherwise they end up staying behind, and users - * have no way to tell that they're all caught up - */ - add_datalog_entry(dpp, store->svc.datalog_rados, - target->bucket_info, bs->shard_id, y); + if (add_log) { + /* + * need to update data log anyhow, so that whoever follows needs to update its internal markers + * for following the specific bucket shard log. Otherwise they end up staying behind, and users + * have no way to tell that they're all caught up + */ + add_datalog_entry(dpp, store->svc.datalog_rados, + target->bucket_info, bs->shard_id, y); + } return ret; } @@ -7304,7 +7344,7 @@ int RGWRados::bucket_index_link_olh(const DoutPrefixProvider *dpp, RGWBucketInfo cls_rgw_bucket_link_olh(op, key, olh_state.olh_tag, delete_marker, op_tag, meta, olh_epoch, unmod_since, high_precision_time, - svc.zone->need_to_log_data(), zones_trace); + log_data_change, zones_trace); return rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, null_yield); }); if (r < 0) { @@ -7329,7 +7369,7 @@ int RGWRados::bucket_index_unlink_instance(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const rgw_obj& obj_instance, const string& op_tag, const string& olh_tag, - uint64_t olh_epoch, rgw_zone_set *_zones_trace) + uint64_t olh_epoch, rgw_zone_set *_zones_trace, bool log_op) { rgw_rados_ref ref; int r = get_obj_head_ref(dpp, bucket_info, obj_instance, &ref); @@ -7353,7 +7393,7 @@ int RGWRados::bucket_index_unlink_instance(const DoutPrefixProvider *dpp, op.assert_exists(); // bucket index shard must exist cls_rgw_guard_bucket_resharding(op, -ERR_BUSY_RESHARDING); cls_rgw_bucket_unlink_instance(op, key, op_tag, - olh_tag, olh_epoch, svc.zone->need_to_log_data(), zones_trace); + olh_tag, olh_epoch, log_op, zones_trace); return rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, null_yield); }); if (r < 0) { @@ -7556,7 +7596,8 @@ int RGWRados::apply_olh_log(const DoutPrefixProvider *dpp, bufferlist& olh_tag, std::map >& log, uint64_t *plast_ver, - rgw_zone_set* zones_trace) + rgw_zone_set* zones_trace, + bool log_op) { if (log.empty()) { return 0; @@ -7669,7 +7710,7 @@ int RGWRados::apply_olh_log(const DoutPrefixProvider *dpp, liter != remove_instances.end(); ++liter) { cls_rgw_obj_key& key = *liter; rgw_obj obj_instance(bucket, key); - int ret = delete_obj(dpp, obj_ctx, bucket_info, obj_instance, 0, RGW_BILOG_FLAG_VERSIONED_OP, ceph::real_time(), zones_trace); + int ret = delete_obj(dpp, obj_ctx, bucket_info, obj_instance, 0, RGW_BILOG_FLAG_VERSIONED_OP, ceph::real_time(), zones_trace, log_op); if (ret < 0 && ret != -ENOENT) { ldpp_dout(dpp, 0) << "ERROR: delete_obj() returned " << ret << " obj_instance=" << obj_instance << dendl; return ret; @@ -7773,7 +7814,7 @@ int RGWRados::clear_olh(const DoutPrefixProvider *dpp, /* * read olh log and apply it */ -int RGWRados::update_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, RGWObjState *state, RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_zone_set *zones_trace) +int RGWRados::update_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, RGWObjState *state, RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_zone_set *zones_trace, bool log_op) { map > log; bool is_truncated; @@ -7784,7 +7825,7 @@ int RGWRados::update_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, R if (ret < 0) { return ret; } - ret = apply_olh_log(dpp, obj_ctx, *state, bucket_info, obj, state->olh_tag, log, &ver_marker, zones_trace); + ret = apply_olh_log(dpp, obj_ctx, *state, bucket_info, obj, state->olh_tag, log, &ver_marker, zones_trace, log_op); if (ret < 0) { return ret; } @@ -7853,7 +7894,7 @@ int RGWRados::set_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, // it's possible that the pending xattr from this op prevented the olh // object from being cleaned by another thread that was deleting the last // existing version. We invoke a best-effort update_olh here to handle this case. - int r = update_olh(dpp, obj_ctx, state, bucket_info, olh_obj); + int r = update_olh(dpp, obj_ctx, state, bucket_info, olh_obj, zones_trace, log_data_change); if (r < 0 && r != -ECANCELED) { ldpp_dout(dpp, 20) << "update_olh() target_obj=" << olh_obj << " returned " << r << dendl; } @@ -7867,7 +7908,7 @@ int RGWRados::set_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, return -EIO; } - ret = update_olh(dpp, obj_ctx, state, bucket_info, olh_obj); + ret = update_olh(dpp, obj_ctx, state, bucket_info, olh_obj, zones_trace, log_data_change); if (ret == -ECANCELED) { /* already did what we needed, no need to retry, raced with another user */ ret = 0; } @@ -7880,7 +7921,7 @@ int RGWRados::set_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, } int RGWRados::unlink_obj_instance(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj, - uint64_t olh_epoch, optional_yield y, rgw_zone_set *zones_trace) + uint64_t olh_epoch, optional_yield y, rgw_zone_set *zones_trace, bool log_op) { string op_tag; @@ -7913,7 +7954,7 @@ int RGWRados::unlink_obj_instance(const DoutPrefixProvider *dpp, RGWObjectCtx& o string olh_tag(state->olh_tag.c_str(), state->olh_tag.length()); - ret = bucket_index_unlink_instance(dpp, bucket_info, target_obj, op_tag, olh_tag, olh_epoch, zones_trace); + ret = bucket_index_unlink_instance(dpp, bucket_info, target_obj, op_tag, olh_tag, olh_epoch, zones_trace, log_op); if (ret < 0) { olh_cancel_modification(dpp, bucket_info, *state, olh_obj, op_tag, y); ldpp_dout(dpp, 20) << "bucket_index_unlink_instance() target_obj=" << target_obj << " returned " << ret << dendl; @@ -7923,7 +7964,7 @@ int RGWRados::unlink_obj_instance(const DoutPrefixProvider *dpp, RGWObjectCtx& o // it's possible that the pending xattr from this op prevented the olh // object from being cleaned by another thread that was deleting the last // existing version. We invoke a best-effort update_olh here to handle this case. - int r = update_olh(dpp, obj_ctx, state, bucket_info, olh_obj, zones_trace); + int r = update_olh(dpp, obj_ctx, state, bucket_info, olh_obj, zones_trace, log_op); if (r < 0 && r != -ECANCELED) { ldpp_dout(dpp, 20) << "update_olh() target_obj=" << olh_obj << " returned " << r << dendl; } @@ -7937,7 +7978,7 @@ int RGWRados::unlink_obj_instance(const DoutPrefixProvider *dpp, RGWObjectCtx& o return -EIO; } - ret = update_olh(dpp, obj_ctx, state, bucket_info, olh_obj, zones_trace); + ret = update_olh(dpp, obj_ctx, state, bucket_info, olh_obj, zones_trace, log_op); if (ret == -ECANCELED) { /* already did what we needed, no need to retry, raced with another user */ return 0; } @@ -8781,7 +8822,8 @@ bool RGWRados::process_expire_objects(const DoutPrefixProvider *dpp) } int RGWRados::cls_obj_prepare_op(const DoutPrefixProvider *dpp, BucketShard& bs, RGWModifyOp op, string& tag, - rgw_obj& obj, uint16_t bilog_flags, optional_yield y, rgw_zone_set *_zones_trace) + rgw_obj& obj, uint16_t bilog_flags, optional_yield y, rgw_zone_set *_zones_trace, + bool log_op) { const bool bitx = cct->_conf->rgw_bucket_index_transaction_instrumentation; ldout_bitx(bitx, dpp, 10) << "ENTERING " << __func__ << ": bucket-shard=" << bs << " obj=" << obj << " tag=" << tag << " op=" << op << dendl_bitx; @@ -8798,7 +8840,7 @@ int RGWRados::cls_obj_prepare_op(const DoutPrefixProvider *dpp, BucketShard& bs, cls_rgw_obj_key key(obj.key.get_index_key_name(), obj.key.instance); cls_rgw_guard_bucket_resharding(o, -ERR_BUSY_RESHARDING); - cls_rgw_bucket_prepare_op(o, op, tag, key, obj.key.get_loc(), svc.zone->need_to_log_data(), bilog_flags, zones_trace); + cls_rgw_bucket_prepare_op(o, op, tag, key, obj.key.get_loc(), log_op, bilog_flags, zones_trace); int ret = bs.bucket_obj.operate(dpp, &o, y); ldout_bitx(bitx, dpp, 10) << "EXITING " << __func__ << ": ret=" << ret << dendl_bitx; return ret; @@ -8807,12 +8849,14 @@ int RGWRados::cls_obj_prepare_op(const DoutPrefixProvider *dpp, BucketShard& bs, int RGWRados::cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModifyOp op, string& tag, int64_t pool, uint64_t epoch, rgw_bucket_dir_entry& ent, RGWObjCategory category, - list *remove_objs, uint16_t bilog_flags, rgw_zone_set *_zones_trace) + list *remove_objs, uint16_t bilog_flags, + rgw_zone_set *_zones_trace, bool log_op) { const bool bitx = cct->_conf->rgw_bucket_index_transaction_instrumentation; ldout_bitx_c(bitx, cct, 10) << "ENTERING " << __func__ << ": bucket-shard=" << bs << " obj=" << obj << " tag=" << tag << " op=" << op << - ", remove_objs=" << (remove_objs ? *remove_objs : std::list()) << dendl_bitx; + ", remove_objs=" << (remove_objs ? *remove_objs : std::list()) << + ", log_op=" << log_op << dendl_bitx; ldout_bitx_c(bitx, cct, 25) << "BACKTRACE: " << __func__ << ": " << ClibBackTrace(0) << dendl_bitx; ObjectWriteOperation o; @@ -8834,10 +8878,10 @@ int RGWRados::cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModify cls_rgw_obj_key key(ent.key.name, ent.key.instance); cls_rgw_guard_bucket_resharding(o, -ERR_BUSY_RESHARDING); cls_rgw_bucket_complete_op(o, op, tag, ver, key, dir_meta, remove_objs, - svc.zone->need_to_log_data(), bilog_flags, &zones_trace); + log_op, bilog_flags, &zones_trace); complete_op_data *arg; index_completion_manager->create_completion(obj, op, tag, ver, key, dir_meta, remove_objs, - svc.zone->need_to_log_data(), bilog_flags, &zones_trace, &arg); + log_op, bilog_flags, &zones_trace, &arg); librados::AioCompletion *completion = arg->rados_completion; int ret = bs.bucket_obj.aio_operate(arg->rados_completion, &o); completion->release(); /* can't reference arg here, as it might have already been released */ @@ -8849,9 +8893,12 @@ int RGWRados::cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModify int RGWRados::cls_obj_complete_add(BucketShard& bs, const rgw_obj& obj, string& tag, int64_t pool, uint64_t epoch, rgw_bucket_dir_entry& ent, RGWObjCategory category, - list *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace) + list *remove_objs, uint16_t bilog_flags, + rgw_zone_set *zones_trace, bool log_op) { - return cls_obj_complete_op(bs, obj, CLS_RGW_OP_ADD, tag, pool, epoch, ent, category, remove_objs, bilog_flags, zones_trace); + return cls_obj_complete_op(bs, obj, CLS_RGW_OP_ADD, tag, pool, epoch, + ent, category, remove_objs, bilog_flags, + zones_trace, log_op); } int RGWRados::cls_obj_complete_del(BucketShard& bs, string& tag, @@ -8860,26 +8907,27 @@ int RGWRados::cls_obj_complete_del(BucketShard& bs, string& tag, real_time& removed_mtime, list *remove_objs, uint16_t bilog_flags, - rgw_zone_set *zones_trace) + rgw_zone_set *zones_trace, + bool log_op) { rgw_bucket_dir_entry ent; ent.meta.mtime = removed_mtime; obj.key.get_index_key(&ent.key); return cls_obj_complete_op(bs, obj, CLS_RGW_OP_DEL, tag, pool, epoch, ent, RGWObjCategory::None, remove_objs, - bilog_flags, zones_trace); + bilog_flags, zones_trace, log_op); } int RGWRados::cls_obj_complete_cancel(BucketShard& bs, string& tag, rgw_obj& obj, list *remove_objs, - uint16_t bilog_flags, rgw_zone_set *zones_trace) + uint16_t bilog_flags, rgw_zone_set *zones_trace, bool log_op) { rgw_bucket_dir_entry ent; obj.key.get_index_key(&ent.key); return cls_obj_complete_op(bs, obj, CLS_RGW_OP_CANCEL, tag, -1 /* pool id */, 0, ent, RGWObjCategory::None, remove_objs, bilog_flags, - zones_trace); + zones_trace, log_op); } int RGWRados::cls_obj_set_bucket_tag_timeout(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, uint64_t timeout) @@ -9846,6 +9894,10 @@ int RGWRados::check_bucket_shards(const RGWBucketInfo& bucket_info, return 0; } + if (! is_layout_reshardable(bucket_info.layout)) { + return 0; + } + bool need_resharding = false; uint32_t num_source_shards = rgw::current_num_shards(bucket_info.layout); const uint32_t max_dynamic_shards = diff --git a/src/rgw/driver/rados/rgw_rados.h b/src/rgw/driver/rados/rgw_rados.h index 75a5e1b54..d77dd5c5f 100644 --- a/src/rgw/driver/rados/rgw_rados.h +++ b/src/rgw/driver/rados/rgw_rados.h @@ -812,9 +812,10 @@ public: uint64_t size, uint64_t accounted_size, std::map& attrs, bool modify_tail, bool assume_noent, - void *index_op, optional_yield y); + void *index_op, optional_yield y, + bool log_op = true); int write_meta(const DoutPrefixProvider *dpp, uint64_t size, uint64_t accounted_size, - std::map& attrs, optional_yield y); + std::map& attrs, optional_yield y, bool log_op = true); int write_data(const char *data, uint64_t ofs, uint64_t len, bool exclusive); const req_state* get_req_state() { return nullptr; /* XXX dang Only used by LTTng, and it handles null anyway */ @@ -852,7 +853,7 @@ public: explicit Delete(RGWRados::Object *_target) : target(_target) {} - int delete_obj(optional_yield y, const DoutPrefixProvider *dpp); + int delete_obj(optional_yield y, const DoutPrefixProvider *dpp, bool log_op = true); }; struct Stat { @@ -957,7 +958,7 @@ public: zones_trace = _zones_trace; } - int prepare(const DoutPrefixProvider *dpp, RGWModifyOp, const std::string *write_tag, optional_yield y); + int prepare(const DoutPrefixProvider *dpp, RGWModifyOp, const std::string *write_tag, optional_yield y, bool log_op = true); int complete(const DoutPrefixProvider *dpp, int64_t poolid, uint64_t epoch, uint64_t size, uint64_t accounted_size, ceph::real_time& ut, const std::string& etag, const std::string& content_type, @@ -966,15 +967,18 @@ public: std::list *remove_objs, optional_yield y, const std::string *user_data = nullptr, - bool appendable = false); + bool appendable = false, + bool log_op = true); int complete_del(const DoutPrefixProvider *dpp, int64_t poolid, uint64_t epoch, ceph::real_time& removed_mtime, /* mtime of removed object */ std::list *remove_objs, - optional_yield y); + optional_yield y, + bool log_op = true); int cancel(const DoutPrefixProvider *dpp, std::list *remove_objs, - optional_yield y); + optional_yield y, + bool log_op = true); const std::string *get_optag() { return &optag; } @@ -1195,7 +1199,8 @@ public: ceph::real_time delete_at, std::string *petag, const DoutPrefixProvider *dpp, - optional_yield y); + optional_yield y, + bool log_op = true); int transition_obj(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, @@ -1204,7 +1209,8 @@ public: const real_time& mtime, uint64_t olh_epoch, const DoutPrefixProvider *dpp, - optional_yield y); + optional_yield y, + bool log_op = true); int check_bucket_empty(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, optional_yield y); @@ -1234,7 +1240,8 @@ public: int versioning_status, // versioning flags defined in enum RGWBucketFlags uint16_t bilog_flags = 0, const ceph::real_time& expiration_time = ceph::real_time(), - rgw_zone_set *zones_trace = nullptr); + rgw_zone_set *zones_trace = nullptr, + bool log_op = true); int delete_raw_obj(const DoutPrefixProvider *dpp, const rgw_raw_obj& obj); @@ -1319,7 +1326,8 @@ public: RGWBucketInfo& bucket_info, const rgw_obj& obj_instance, const std::string& op_tag, const std::string& olh_tag, - uint64_t olh_epoch, rgw_zone_set *zones_trace = nullptr); + uint64_t olh_epoch, rgw_zone_set *zones_trace = nullptr, + bool log_op = true); int bucket_index_read_olh_log(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance, uint64_t ver_marker, @@ -1328,8 +1336,8 @@ public: int bucket_index_clear_olh(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, const std::string& olh_tag, const rgw_obj& obj_instance); int apply_olh_log(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, RGWObjState& obj_state, RGWBucketInfo& bucket_info, const rgw_obj& obj, bufferlist& obj_tag, std::map >& log, - uint64_t *plast_ver, rgw_zone_set *zones_trace = nullptr); - int update_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, RGWObjState *state, RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_zone_set *zones_trace = nullptr); + uint64_t *plast_ver, rgw_zone_set *zones_trace = nullptr, bool log_op = true); + int update_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, RGWObjState *state, RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_zone_set *zones_trace = nullptr, bool log_op = true); int clear_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, const rgw_obj& obj, @@ -1343,7 +1351,7 @@ public: int repair_olh(const DoutPrefixProvider *dpp, RGWObjState* state, const RGWBucketInfo& bucket_info, const rgw_obj& obj); int unlink_obj_instance(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj, - uint64_t olh_epoch, optional_yield y, rgw_zone_set *zones_trace = nullptr); + uint64_t olh_epoch, optional_yield y, rgw_zone_set *zones_trace = nullptr, bool log_op = true); void check_pending_olh_entries(const DoutPrefixProvider *dpp, std::map& pending_entries, std::map *rm_pending_entries); int remove_olh_pending_entries(const DoutPrefixProvider *dpp, const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, std::map& pending_attrs); @@ -1400,16 +1408,20 @@ public: std::map *pattrs, bool create_entry_point, const DoutPrefixProvider *dpp, optional_yield y); - int cls_obj_prepare_op(const DoutPrefixProvider *dpp, BucketShard& bs, RGWModifyOp op, std::string& tag, rgw_obj& obj, uint16_t bilog_flags, optional_yield y, rgw_zone_set *zones_trace = nullptr); + int cls_obj_prepare_op(const DoutPrefixProvider *dpp, BucketShard& bs, RGWModifyOp op, std::string& tag, rgw_obj& obj, + uint16_t bilog_flags, optional_yield y, rgw_zone_set *zones_trace = nullptr, bool log_op = true); int cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModifyOp op, std::string& tag, int64_t pool, uint64_t epoch, - rgw_bucket_dir_entry& ent, RGWObjCategory category, std::list *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr); + rgw_bucket_dir_entry& ent, RGWObjCategory category, std::list *remove_objs, + uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr, bool log_op = true); int cls_obj_complete_add(BucketShard& bs, const rgw_obj& obj, std::string& tag, int64_t pool, uint64_t epoch, rgw_bucket_dir_entry& ent, - RGWObjCategory category, std::list *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr); + RGWObjCategory category, std::list *remove_objs, uint16_t bilog_flags, + rgw_zone_set *zones_trace = nullptr, bool log_op = true); int cls_obj_complete_del(BucketShard& bs, std::string& tag, int64_t pool, uint64_t epoch, rgw_obj& obj, - ceph::real_time& removed_mtime, std::list *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr); + ceph::real_time& removed_mtime, std::list *remove_objs, + uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr, bool log_op = true); int cls_obj_complete_cancel(BucketShard& bs, std::string& tag, rgw_obj& obj, std::list *remove_objs, - uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr); + uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr, bool log_op = true); int cls_obj_set_bucket_tag_timeout(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, uint64_t timeout); using ent_map_t = diff --git a/src/rgw/driver/rados/rgw_reshard.cc b/src/rgw/driver/rados/rgw_reshard.cc index 2abf02908..25cb4df3c 100644 --- a/src/rgw/driver/rados/rgw_reshard.cc +++ b/src/rgw/driver/rados/rgw_reshard.cc @@ -994,11 +994,11 @@ int RGWBucketReshard::execute(int num_shards, return 0; } // execute -bool RGWBucketReshard::can_reshard(const RGWBucketInfo& bucket, - const RGWSI_Zone* zone_svc) +bool RGWBucketReshard::should_zone_reshard_now(const RGWBucketInfo& bucket, + const RGWSI_Zone* zone_svc) { return !zone_svc->need_to_log_data() || - bucket.layout.logs.size() < max_bilog_history; + bucket.layout.logs.size() < max_bilog_history; } @@ -1240,7 +1240,7 @@ int RGWReshard::process_entry(const cls_rgw_reshard_entry& entry, return 0; } - if (!RGWBucketReshard::can_reshard(bucket_info, store->svc()->zone)) { + if (!RGWBucketReshard::should_zone_reshard_now(bucket_info, store->svc()->zone)) { ldpp_dout(dpp, 1) << "Bucket " << bucket_info.bucket << " is not " "eligible for resharding until peer zones finish syncing one " "or more of its old log generations" << dendl; diff --git a/src/rgw/driver/rados/rgw_reshard.h b/src/rgw/driver/rados/rgw_reshard.h index 59819f3a5..7388c459e 100644 --- a/src/rgw/driver/rados/rgw_reshard.h +++ b/src/rgw/driver/rados/rgw_reshard.h @@ -175,8 +175,8 @@ public: // too large by refusing to reshard the bucket until the old logs get trimmed static constexpr size_t max_bilog_history = 4; - static bool can_reshard(const RGWBucketInfo& bucket, - const RGWSI_Zone* zone_svc); + static bool should_zone_reshard_now(const RGWBucketInfo& bucket, + const RGWSI_Zone* zone_svc); }; // RGWBucketReshard diff --git a/src/rgw/driver/rados/rgw_sal_rados.cc b/src/rgw/driver/rados/rgw_sal_rados.cc index 9acdb79d3..414f44bb8 100644 --- a/src/rgw/driver/rados/rgw_sal_rados.cc +++ b/src/rgw/driver/rados/rgw_sal_rados.cc @@ -1972,9 +1972,11 @@ int RadosObject::transition(Bucket* bucket, const real_time& mtime, uint64_t olh_epoch, const DoutPrefixProvider* dpp, - optional_yield y) + optional_yield y, + uint32_t flags) { - return store->getRados()->transition_obj(*rados_ctx, bucket->get_info(), get_obj(), placement_rule, mtime, olh_epoch, dpp, y); + return store->getRados()->transition_obj(*rados_ctx, bucket->get_info(), get_obj(), placement_rule, + mtime, olh_epoch, dpp, y, flags & FLAG_LOG_OP); } int RadosObject::transition_to_cloud(Bucket* bucket, @@ -2253,7 +2255,7 @@ RadosObject::RadosDeleteOp::RadosDeleteOp(RadosObject *_source) : parent_op(&op_target) { } -int RadosObject::RadosDeleteOp::delete_obj(const DoutPrefixProvider* dpp, optional_yield y) +int RadosObject::RadosDeleteOp::delete_obj(const DoutPrefixProvider* dpp, optional_yield y, uint32_t flags) { parent_op.params.bucket_owner = params.bucket_owner.get_id(); parent_op.params.versioning_status = params.versioning_status; @@ -2270,7 +2272,7 @@ int RadosObject::RadosDeleteOp::delete_obj(const DoutPrefixProvider* dpp, option parent_op.params.abortmp = params.abortmp; parent_op.params.parts_accounted_size = params.parts_accounted_size; - int ret = parent_op.delete_obj(y, dpp); + int ret = parent_op.delete_obj(y, dpp, flags & FLAG_LOG_OP); if (ret < 0) return ret; @@ -2282,15 +2284,16 @@ int RadosObject::RadosDeleteOp::delete_obj(const DoutPrefixProvider* dpp, option int RadosObject::delete_object(const DoutPrefixProvider* dpp, optional_yield y, - bool prevent_versioning) + uint32_t flags) { RGWRados::Object del_target(store->getRados(), bucket->get_info(), *rados_ctx, get_obj()); RGWRados::Object::Delete del_op(&del_target); del_op.params.bucket_owner = bucket->get_info().owner; - del_op.params.versioning_status = prevent_versioning ? 0 : bucket->get_info().versioning_status(); + del_op.params.versioning_status = (flags & FLAG_PREVENT_VERSIONING) + ? 0 : bucket->get_info().versioning_status(); - return del_op.delete_obj(y, dpp); + return del_op.delete_obj(y, dpp, flags & FLAG_LOG_OP); } int RadosObject::delete_obj_aio(const DoutPrefixProvider* dpp, RGWObjState* astate, @@ -2459,7 +2462,7 @@ int RadosMultipartUpload::abort(const DoutPrefixProvider *dpp, CephContext *cct) std::unique_ptr obj = bucket->get_object( rgw_obj_key(obj_part->oid, std::string(), RGW_OBJ_NS_MULTIPART)); obj->set_hash_source(mp_obj.get_key()); - ret = obj->delete_object(dpp, null_yield); + ret = obj->delete_object(dpp, null_yield, 0); if (ret < 0 && ret != -ENOENT) return ret; } else { @@ -2509,7 +2512,7 @@ int RadosMultipartUpload::abort(const DoutPrefixProvider *dpp, CephContext *cct) del_op->params.parts_accounted_size = parts_accounted_size; // and also remove the metadata obj - ret = del_op->delete_obj(dpp, null_yield); + ret = del_op->delete_obj(dpp, null_yield, 0); if (ret < 0) { ldpp_dout(dpp, 20) << __func__ << ": del_op.delete_obj returned " << ret << dendl; @@ -2694,7 +2697,7 @@ int RadosMultipartUpload::complete(const DoutPrefixProvider *dpp, int marker = 0; uint64_t min_part_size = cct->_conf->rgw_multipart_min_part_size; auto etags_iter = part_etags.begin(); - rgw::sal::Attrs attrs = target_obj->get_attrs(); + rgw::sal::Attrs& attrs = target_obj->get_attrs(); do { ret = list_parts(dpp, cct, max_parts, marker, &marker, &truncated); @@ -3120,10 +3123,11 @@ int RadosAtomicWriter::complete(size_t accounted_size, const std::string& etag, const char *if_match, const char *if_nomatch, const std::string *user_data, rgw_zone_set *zones_trace, bool *canceled, - optional_yield y) + optional_yield y, + uint32_t flags) { return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at, - if_match, if_nomatch, user_data, zones_trace, canceled, y); + if_match, if_nomatch, user_data, zones_trace, canceled, y, flags); } int RadosAppendWriter::prepare(optional_yield y) @@ -3143,10 +3147,11 @@ int RadosAppendWriter::complete(size_t accounted_size, const std::string& etag, const char *if_match, const char *if_nomatch, const std::string *user_data, rgw_zone_set *zones_trace, bool *canceled, - optional_yield y) + optional_yield y, + uint32_t flags) { return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at, - if_match, if_nomatch, user_data, zones_trace, canceled, y); + if_match, if_nomatch, user_data, zones_trace, canceled, y, flags); } int RadosMultipartWriter::prepare(optional_yield y) @@ -3166,10 +3171,11 @@ int RadosMultipartWriter::complete(size_t accounted_size, const std::string& eta const char *if_match, const char *if_nomatch, const std::string *user_data, rgw_zone_set *zones_trace, bool *canceled, - optional_yield y) + optional_yield y, + uint32_t flags) { return processor.complete(accounted_size, etag, mtime, set_mtime, attrs, delete_at, - if_match, if_nomatch, user_data, zones_trace, canceled, y); + if_match, if_nomatch, user_data, zones_trace, canceled, y, flags); } const std::string& RadosZoneGroup::get_endpoint() const @@ -3331,7 +3337,7 @@ const std::string_view RadosZone::get_tier_type() if (local_zone) return store->svc()->zone->get_zone().tier_type; - return rgw_zone.id; + return rgw_zone.tier_type; } RGWBucketSyncPolicyHandlerRef RadosZone::get_sync_policy_handler() diff --git a/src/rgw/driver/rados/rgw_sal_rados.h b/src/rgw/driver/rados/rgw_sal_rados.h index 4d2dc9709..299be16e6 100644 --- a/src/rgw/driver/rados/rgw_sal_rados.h +++ b/src/rgw/driver/rados/rgw_sal_rados.h @@ -370,7 +370,7 @@ class RadosObject : public StoreObject { public: RadosDeleteOp(RadosObject* _source); - virtual int delete_obj(const DoutPrefixProvider* dpp, optional_yield y) override; + virtual int delete_obj(const DoutPrefixProvider* dpp, optional_yield y, uint32_t flags) override; }; RadosObject(RadosStore *_st, const rgw_obj_key& _k) @@ -402,7 +402,7 @@ class RadosObject : public StoreObject { rados_ctx->invalidate(get_obj()); } virtual int delete_object(const DoutPrefixProvider* dpp, - optional_yield y, bool prevent_versioning) override; + optional_yield y, uint32_t flags) override; virtual int delete_obj_aio(const DoutPrefixProvider* dpp, RGWObjState* astate, Completions* aio, bool keep_index_consistent, optional_yield y) override; virtual int copy_object(User* user, @@ -453,7 +453,8 @@ class RadosObject : public StoreObject { const real_time& mtime, uint64_t olh_epoch, const DoutPrefixProvider* dpp, - optional_yield y) override; + optional_yield y, + uint32_t flags) override; virtual int transition_to_cloud(Bucket* bucket, rgw::sal::PlacementTier* tier, rgw_bucket_dir_entry& o, @@ -822,7 +823,8 @@ public: const char *if_match, const char *if_nomatch, const std::string *user_data, rgw_zone_set *zones_trace, bool *canceled, - optional_yield y) override; + optional_yield y, + uint32_t flags) override; }; class RadosAppendWriter : public StoreWriter { @@ -869,7 +871,8 @@ public: const char *if_match, const char *if_nomatch, const std::string *user_data, rgw_zone_set *zones_trace, bool *canceled, - optional_yield y) override; + optional_yield y, + uint32_t flags) override; }; class RadosMultipartWriter : public StoreWriter { @@ -914,7 +917,8 @@ public: const char *if_match, const char *if_nomatch, const std::string *user_data, rgw_zone_set *zones_trace, bool *canceled, - optional_yield y) override; + optional_yield y, + uint32_t flags) override; }; class RadosLuaManager : public StoreLuaManager { diff --git a/src/rgw/driver/rados/rgw_sync_module_aws.cc b/src/rgw/driver/rados/rgw_sync_module_aws.cc index cefcd9dd1..cdcd831e9 100644 --- a/src/rgw/driver/rados/rgw_sync_module_aws.cc +++ b/src/rgw/driver/rados/rgw_sync_module_aws.cc @@ -487,7 +487,7 @@ struct AWSSyncConfig { } bool do_find_profile(const rgw_bucket bucket, std::shared_ptr *result) { - const string& name = bucket.name; + const string& name = bucket.get_namespaced_name(); auto iter = explicit_profiles.upper_bound(name); if (iter == explicit_profiles.begin()) { return false; diff --git a/src/rgw/driver/rados/rgw_tools.cc b/src/rgw/driver/rados/rgw_tools.cc index 66651da5c..bc58c71ea 100644 --- a/src/rgw/driver/rados/rgw_tools.cc +++ b/src/rgw/driver/rados/rgw_tools.cc @@ -422,7 +422,8 @@ int RGWDataAccess::Object::put(bufferlist& data, attrs, delete_at, nullptr, nullptr, puser_data, - nullptr, nullptr, y); + nullptr, nullptr, y, + rgw::sal::FLAG_LOG_OP); } void RGWDataAccess::Object::set_policy(const RGWAccessControlPolicy& policy) diff --git a/src/rgw/driver/rados/rgw_zone.cc b/src/rgw/driver/rados/rgw_zone.cc index ed09f24f6..5c3f55b3d 100644 --- a/src/rgw/driver/rados/rgw_zone.cc +++ b/src/rgw/driver/rados/rgw_zone.cc @@ -1005,10 +1005,12 @@ int create_zone(const DoutPrefixProvider* dpp, optional_yield y, } // add default placement with empty pool name + RGWZonePlacementInfo placement; rgw_pool pool; - auto& placement = info.placement_pools["default-placement"]; placement.storage_classes.set_storage_class( RGW_STORAGE_CLASS_STANDARD, &pool, nullptr); + // don't overwrite if it already exists + info.placement_pools.emplace("default-placement", std::move(placement)); // build a set of all pool names used by other zones std::set pools; -- cgit v1.2.3