summaryrefslogtreecommitdiffstats
path: root/src/rgw/rgw_realm_reloader.cc
blob: af599d2a90d75b3ec7741c0e0c484cb4d2ca95fc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab ft=cpp

#include "rgw_realm_reloader.h"

#include "rgw_bucket.h"
#include "rgw_log.h"
#include "rgw_rest.h"
#include "rgw_user.h"
#include "rgw_sal.h"
#include "rgw_sal_rados.h"

#include "services/svc_zone.h"

#include "common/errno.h"

#define dout_subsys ceph_subsys_rgw

#undef dout_prefix
#define dout_prefix (*_dout << "rgw realm reloader: ")


// safe callbacks from SafeTimer are unneccessary. reload() can take a long
// time, so we don't want to hold the mutex and block handle_notify() for the
// duration
static constexpr bool USE_SAFE_TIMER_CALLBACKS = false;


RGWRealmReloader::RGWRealmReloader(rgw::sal::RGWRadosStore*& store, std::map<std::string, std::string>& service_map_meta,
                                   Pauser* frontends)
  : store(store),
    service_map_meta(service_map_meta),
    frontends(frontends),
    timer(store->ctx(), mutex, USE_SAFE_TIMER_CALLBACKS),
    mutex(ceph::make_mutex("RGWRealmReloader")),
    reload_scheduled(nullptr)
{
  timer.init();
}

RGWRealmReloader::~RGWRealmReloader()
{
  std::lock_guard lock{mutex};
  timer.shutdown();
}

class RGWRealmReloader::C_Reload : public Context {
  RGWRealmReloader* reloader;
 public:
  explicit C_Reload(RGWRealmReloader* reloader) : reloader(reloader) {}
  void finish(int r) override { reloader->reload(); }
};

void RGWRealmReloader::handle_notify(RGWRealmNotify type,
                                     bufferlist::const_iterator& p)
{
  if (!store) {
    /* we're in the middle of reload */
    return;
  }

  CephContext *const cct = store->ctx();

  std::lock_guard lock{mutex};
  if (reload_scheduled) {
    ldout(cct, 4) << "Notification on realm, reconfiguration "
        "already scheduled" << dendl;
    return;
  }

  reload_scheduled = new C_Reload(this);
  cond.notify_one(); // wake reload() if it blocked on a bad configuration

  // schedule reload() without delay
  timer.add_event_after(0, reload_scheduled);

  ldout(cct, 4) << "Notification on realm, reconfiguration scheduled" << dendl;
}

void RGWRealmReloader::reload()
{
  CephContext *const cct = store->ctx();
  const DoutPrefix dp(cct, dout_subsys, "rgw realm reloader: ");
  ldpp_dout(&dp, 1) << "Pausing frontends for realm update..." << dendl;

  frontends->pause();

  ldpp_dout(&dp, 1) << "Frontends paused" << dendl;

  // TODO: make RGWRados responsible for rgw_log_usage lifetime
  rgw_log_usage_finalize();

  // destroy the existing store
  RGWStoreManager::close_storage(store);
  store = nullptr;

  ldpp_dout(&dp, 1) << "Store closed" << dendl;
  {
    // allow a new notify to reschedule us. it's important that we do this
    // before we start loading the new realm, or we could miss some updates
    std::lock_guard lock{mutex};
    reload_scheduled = nullptr;
  }


  while (!store) {
    // recreate and initialize a new store
    store =
      RGWStoreManager::get_storage(&dp, cct,
				   cct->_conf->rgw_enable_gc_threads,
				   cct->_conf->rgw_enable_lc_threads,
				   cct->_conf->rgw_enable_quota_threads,
				   cct->_conf->rgw_run_sync_thread,
				   cct->_conf.get_val<bool>("rgw_dynamic_resharding"),
				   cct->_conf->rgw_cache_enabled);

    ldpp_dout(&dp, 1) << "Creating new store" << dendl;

    rgw::sal::RGWRadosStore* store_cleanup = nullptr;
    {
      std::unique_lock lock{mutex};

      // failure to recreate RGWRados is not a recoverable error, but we
      // don't want to assert or abort the entire cluster.  instead, just
      // sleep until we get another notification, and retry until we get
      // a working configuration
      if (store == nullptr) {
        ldpp_dout(&dp, -1) << "Failed to reinitialize RGWRados after a realm "
            "configuration update. Waiting for a new update." << dendl;

        // sleep until another event is scheduled
	cond.wait(lock, [this] { return reload_scheduled; });
        ldout(cct, 1) << "Woke up with a new configuration, retrying "
            "RGWRados initialization." << dendl;
      }

      if (reload_scheduled) {
        // cancel the event; we'll handle it now
        timer.cancel_event(reload_scheduled);
        reload_scheduled = nullptr;

        // if we successfully created a store, clean it up outside of the lock,
        // then continue to loop and recreate another
        std::swap(store, store_cleanup);
      }
    }

    if (store_cleanup) {
      ldpp_dout(&dp, 4) << "Got another notification, restarting RGWRados "
          "initialization." << dendl;

      RGWStoreManager::close_storage(store_cleanup);
    }
  }

  int r = store->getRados()->register_to_service_map("rgw", service_map_meta);
  if (r < 0) {
    ldpp_dout(&dp, -1) << "ERROR: failed to register to service map: " << cpp_strerror(-r) << dendl;

    /* ignore error */
  }

  ldpp_dout(&dp, 1) << "Finishing initialization of new store" << dendl;
  // finish initializing the new store
  ldpp_dout(&dp, 1) << " - REST subsystem init" << dendl;
  rgw_rest_init(cct, store->svc()->zone->get_zonegroup());
  ldpp_dout(&dp, 1) << " - usage subsystem init" << dendl;
  rgw_log_usage_init(cct, store->getRados());

  ldpp_dout(&dp, 1) << "Resuming frontends with new realm configuration." << dendl;

  frontends->resume(store);
}