diff options
Diffstat (limited to '')
-rw-r--r-- | src/rgw/rgw_realm_reloader.cc | 188 |
1 files changed, 188 insertions, 0 deletions
diff --git a/src/rgw/rgw_realm_reloader.cc b/src/rgw/rgw_realm_reloader.cc new file mode 100644 index 000000000..182cf1639 --- /dev/null +++ b/src/rgw/rgw_realm_reloader.cc @@ -0,0 +1,188 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab ft=cpp + +#include "rgw_realm_reloader.h" + +#include "rgw_auth_registry.h" +#include "rgw_bucket.h" +#include "rgw_log.h" +#include "rgw_rest.h" +#include "rgw_user.h" +#include "rgw_process_env.h" +#include "rgw_sal.h" +#include "rgw_sal_rados.h" + +#include "services/svc_zone.h" + +#include "common/errno.h" + +#define dout_subsys ceph_subsys_rgw + +#undef dout_prefix +#define dout_prefix (*_dout << "rgw realm reloader: ") + + +// safe callbacks from SafeTimer are unneccessary. reload() can take a long +// time, so we don't want to hold the mutex and block handle_notify() for the +// duration +static constexpr bool USE_SAFE_TIMER_CALLBACKS = false; + + +RGWRealmReloader::RGWRealmReloader(RGWProcessEnv& env, + const rgw::auth::ImplicitTenants& implicit_tenants, + std::map<std::string, std::string>& service_map_meta, + Pauser* frontends) + : env(env), + implicit_tenants(implicit_tenants), + service_map_meta(service_map_meta), + frontends(frontends), + timer(env.driver->ctx(), mutex, USE_SAFE_TIMER_CALLBACKS), + mutex(ceph::make_mutex("RGWRealmReloader")), + reload_scheduled(nullptr) +{ + timer.init(); +} + +RGWRealmReloader::~RGWRealmReloader() +{ + std::lock_guard lock{mutex}; + timer.shutdown(); +} + +class RGWRealmReloader::C_Reload : public Context { + RGWRealmReloader* reloader; + public: + explicit C_Reload(RGWRealmReloader* reloader) : reloader(reloader) {} + void finish(int r) override { reloader->reload(); } +}; + +void RGWRealmReloader::handle_notify(RGWRealmNotify type, + bufferlist::const_iterator& p) +{ + if (!env.driver) { + /* we're in the middle of reload */ + return; + } + + CephContext *const cct = env.driver->ctx(); + + std::lock_guard lock{mutex}; + if (reload_scheduled) { + ldout(cct, 4) << "Notification on realm, reconfiguration " + "already scheduled" << dendl; + return; + } + + reload_scheduled = new C_Reload(this); + cond.notify_one(); // wake reload() if it blocked on a bad configuration + + // schedule reload() without delay + timer.add_event_after(0, reload_scheduled); + + ldout(cct, 4) << "Notification on realm, reconfiguration scheduled" << dendl; +} + +void RGWRealmReloader::reload() +{ + CephContext *const cct = env.driver->ctx(); + const DoutPrefix dp(cct, dout_subsys, "rgw realm reloader: "); + ldpp_dout(&dp, 1) << "Pausing frontends for realm update..." << dendl; + + frontends->pause(); + + ldpp_dout(&dp, 1) << "Frontends paused" << dendl; + + // TODO: make RGWRados responsible for rgw_log_usage lifetime + rgw_log_usage_finalize(); + + // destroy the existing driver + DriverManager::close_storage(env.driver); + env.driver = nullptr; + + ldpp_dout(&dp, 1) << "driver closed" << dendl; + { + // allow a new notify to reschedule us. it's important that we do this + // before we start loading the new realm, or we could miss some updates + std::lock_guard lock{mutex}; + reload_scheduled = nullptr; + } + + + while (!env.driver) { + // recreate and initialize a new driver + DriverManager::Config cfg; + cfg.store_name = "rados"; + cfg.filter_name = "none"; + env.driver = + DriverManager::get_storage(&dp, cct, + cfg, + cct->_conf->rgw_enable_gc_threads, + cct->_conf->rgw_enable_lc_threads, + cct->_conf->rgw_enable_quota_threads, + cct->_conf->rgw_run_sync_thread, + cct->_conf.get_val<bool>("rgw_dynamic_resharding"), + cct->_conf->rgw_cache_enabled); + + ldpp_dout(&dp, 1) << "Creating new driver" << dendl; + + rgw::sal::Driver* store_cleanup = nullptr; + { + std::unique_lock lock{mutex}; + + // failure to recreate RGWRados is not a recoverable error, but we + // don't want to assert or abort the entire cluster. instead, just + // sleep until we get another notification, and retry until we get + // a working configuration + if (env.driver == nullptr) { + ldpp_dout(&dp, -1) << "Failed to reinitialize RGWRados after a realm " + "configuration update. Waiting for a new update." << dendl; + + // sleep until another event is scheduled + cond.wait(lock, [this] { return reload_scheduled; }); + ldout(cct, 1) << "Woke up with a new configuration, retrying " + "RGWRados initialization." << dendl; + } + + if (reload_scheduled) { + // cancel the event; we'll handle it now + timer.cancel_event(reload_scheduled); + reload_scheduled = nullptr; + + // if we successfully created a driver, clean it up outside of the lock, + // then continue to loop and recreate another + std::swap(env.driver, store_cleanup); + } + } + + if (store_cleanup) { + ldpp_dout(&dp, 4) << "Got another notification, restarting RGWRados " + "initialization." << dendl; + + DriverManager::close_storage(store_cleanup); + } + } + + int r = env.driver->register_to_service_map(&dp, "rgw", service_map_meta); + if (r < 0) { + ldpp_dout(&dp, -1) << "ERROR: failed to register to service map: " << cpp_strerror(-r) << dendl; + + /* ignore error */ + } + + ldpp_dout(&dp, 1) << "Finishing initialization of new driver" << dendl; + // finish initializing the new driver + ldpp_dout(&dp, 1) << " - REST subsystem init" << dendl; + rgw_rest_init(cct, env.driver->get_zone()->get_zonegroup()); + ldpp_dout(&dp, 1) << " - usage subsystem init" << dendl; + rgw_log_usage_init(cct, env.driver); + + /* Initialize the registry of auth strategies which will coordinate + * the dynamic reconfiguration. */ + env.auth_registry = rgw::auth::StrategyRegistry::create( + cct, implicit_tenants, env.driver); + env.lua.manager = env.driver->get_lua_manager(); + + ldpp_dout(&dp, 1) << "Resuming frontends with new realm configuration." << dendl; + + frontends->resume(env.driver); +} |