summaryrefslogtreecommitdiffstats
path: root/src/mds/SnapServer.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/mds/SnapServer.cc')
-rw-r--r--src/mds/SnapServer.cc510
1 files changed, 510 insertions, 0 deletions
diff --git a/src/mds/SnapServer.cc b/src/mds/SnapServer.cc
new file mode 100644
index 000000000..f16480035
--- /dev/null
+++ b/src/mds/SnapServer.cc
@@ -0,0 +1,510 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "SnapServer.h"
+#include "MDSRank.h"
+#include "osd/OSDMap.h"
+#include "osdc/Objecter.h"
+#include "mon/MonClient.h"
+
+#include "include/types.h"
+#include "messages/MMDSTableRequest.h"
+#include "messages/MRemoveSnaps.h"
+
+#include "msg/Messenger.h"
+
+#include "common/config.h"
+#include "include/ceph_assert.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_mds
+#undef dout_prefix
+#define dout_prefix *_dout << "mds." << rank << ".snap "
+
+
+void SnapServer::reset_state()
+{
+ last_snap = 1; /* snapid 1 reserved for initial root snaprealm */
+ snaps.clear();
+ need_to_purge.clear();
+ pending_update.clear();
+ pending_destroy.clear();
+ pending_noop.clear();
+
+ // find any removed snapshot in data pools
+ if (mds) { // only if I'm running in a live MDS
+ snapid_t first_free = 0;
+ mds->objecter->with_osdmap([&](const OSDMap& o) {
+ for (const auto p : mds->mdsmap->get_data_pools()) {
+ const pg_pool_t *pi = o.get_pg_pool(p);
+ if (!pi) {
+ // If pool isn't in OSDMap yet then can't have any snaps
+ // needing removal, skip.
+ continue;
+ }
+ if (pi->snap_seq > first_free) {
+ first_free = pi->snap_seq;
+ }
+ }
+ });
+ if (first_free > last_snap)
+ last_snap = first_free;
+ }
+ last_created = last_snap;
+ last_destroyed = last_snap;
+ snaprealm_v2_since = last_snap + 1;
+
+ MDSTableServer::reset_state();
+}
+
+
+// SERVER
+
+void SnapServer::_prepare(const bufferlist& bl, uint64_t reqid, mds_rank_t bymds, bufferlist& out)
+{
+ using ceph::decode;
+ using ceph::encode;
+ auto p = bl.cbegin();
+ __u32 op;
+ decode(op, p);
+
+ switch (op) {
+ case TABLE_OP_CREATE:
+ {
+ SnapInfo info;
+ decode(info.ino, p);
+ if (!p.end()) {
+ decode(info.name, p);
+ decode(info.stamp, p);
+ info.snapid = ++last_snap;
+ pending_update[version] = info;
+ dout(10) << "prepare v" << version << " create " << info << dendl;
+ } else {
+ pending_noop.insert(version);
+ dout(10) << "prepare v" << version << " noop" << dendl;
+ }
+
+ encode(last_snap, out);
+ }
+ break;
+
+ case TABLE_OP_DESTROY:
+ {
+ inodeno_t ino;
+ snapid_t snapid;
+ decode(ino, p); // not used, currently.
+ decode(snapid, p);
+
+ // bump last_snap... we use it as a version value on the snaprealm.
+ ++last_snap;
+
+ pending_destroy[version] = pair<snapid_t,snapid_t>(snapid, last_snap);
+ dout(10) << "prepare v" << version << " destroy " << snapid << " seq " << last_snap << dendl;
+
+ encode(last_snap, out);
+ }
+ break;
+
+ case TABLE_OP_UPDATE:
+ {
+ SnapInfo info;
+ decode(info.ino, p);
+ decode(info.snapid, p);
+ decode(info.name, p);
+ decode(info.stamp, p);
+
+ pending_update[version] = info;
+ dout(10) << "prepare v" << version << " update " << info << dendl;
+ }
+ break;
+
+ default:
+ ceph_abort();
+ }
+ //dump();
+}
+
+void SnapServer::_get_reply_buffer(version_t tid, bufferlist *pbl) const
+{
+ using ceph::encode;
+ auto p = pending_update.find(tid);
+ if (p != pending_update.end()) {
+ if (pbl && !snaps.count(p->second.snapid)) // create
+ encode(p->second.snapid, *pbl);
+ return;
+ }
+ auto q = pending_destroy.find(tid);
+ if (q != pending_destroy.end()) {
+ if (pbl)
+ encode(q->second.second, *pbl);
+ return;
+ }
+ auto r = pending_noop.find(tid);
+ if (r != pending_noop.end()) {
+ if (pbl)
+ encode(last_snap, *pbl);
+ return;
+ }
+ assert (0 == "tid not found");
+}
+
+void SnapServer::_commit(version_t tid, cref_t<MMDSTableRequest> req)
+{
+ if (pending_update.count(tid)) {
+ SnapInfo &info = pending_update[tid];
+ string opname;
+ if (snaps.count(info.snapid)) {
+ opname = "update";
+ if (info.stamp == utime_t())
+ info.stamp = snaps[info.snapid].stamp;
+ } else {
+ opname = "create";
+ if (info.snapid > last_created)
+ last_created = info.snapid;
+ }
+ dout(7) << "commit " << tid << " " << opname << " " << info << dendl;
+ snaps[info.snapid] = info;
+ pending_update.erase(tid);
+ }
+
+ else if (pending_destroy.count(tid)) {
+ snapid_t sn = pending_destroy[tid].first;
+ snapid_t seq = pending_destroy[tid].second;
+ dout(7) << "commit " << tid << " destroy " << sn << " seq " << seq << dendl;
+ snaps.erase(sn);
+ if (seq > last_destroyed)
+ last_destroyed = seq;
+
+ for (const auto p : mds->mdsmap->get_data_pools()) {
+ need_to_purge[p].insert(sn);
+ need_to_purge[p].insert(seq);
+ }
+
+ pending_destroy.erase(tid);
+ }
+ else if (pending_noop.count(tid)) {
+ dout(7) << "commit " << tid << " noop" << dendl;
+ pending_noop.erase(tid);
+ }
+ else
+ ceph_abort();
+
+ //dump();
+}
+
+void SnapServer::_rollback(version_t tid)
+{
+ if (pending_update.count(tid)) {
+ SnapInfo &info = pending_update[tid];
+ string opname;
+ if (snaps.count(info.snapid))
+ opname = "update";
+ else
+ opname = "create";
+ dout(7) << "rollback " << tid << " " << opname << " " << info << dendl;
+ pending_update.erase(tid);
+ }
+
+ else if (pending_destroy.count(tid)) {
+ dout(7) << "rollback " << tid << " destroy " << pending_destroy[tid] << dendl;
+ pending_destroy.erase(tid);
+ }
+
+ else if (pending_noop.count(tid)) {
+ dout(7) << "rollback " << tid << " noop" << dendl;
+ pending_noop.erase(tid);
+ }
+
+ else
+ ceph_abort();
+
+ //dump();
+}
+
+void SnapServer::_server_update(bufferlist& bl)
+{
+ using ceph::decode;
+ auto p = bl.cbegin();
+ map<int, vector<snapid_t> > purge;
+ decode(purge, p);
+
+ dout(7) << "_server_update purged " << purge << dendl;
+ for (map<int, vector<snapid_t> >::iterator p = purge.begin();
+ p != purge.end();
+ ++p) {
+ for (vector<snapid_t>::iterator q = p->second.begin();
+ q != p->second.end();
+ ++q)
+ need_to_purge[p->first].erase(*q);
+ if (need_to_purge[p->first].empty())
+ need_to_purge.erase(p->first);
+ }
+}
+
+bool SnapServer::_notify_prep(version_t tid)
+{
+ using ceph::encode;
+ bufferlist bl;
+ char type = 'F';
+ encode(type, bl);
+ encode(snaps, bl);
+ encode(pending_update, bl);
+ encode(pending_destroy, bl);
+ encode(last_created, bl);
+ encode(last_destroyed, bl);
+ ceph_assert(version == tid);
+
+ for (auto &p : active_clients) {
+ auto m = make_message<MMDSTableRequest>(table, TABLESERVER_OP_NOTIFY_PREP, 0, version);
+ m->bl = bl;
+ mds->send_message_mds(m, p);
+ }
+ return true;
+}
+
+void SnapServer::handle_query(const cref_t<MMDSTableRequest> &req)
+{
+ using ceph::encode;
+ using ceph::decode;
+ char op;
+ auto p = req->bl.cbegin();
+ decode(op, p);
+
+ auto reply = make_message<MMDSTableRequest>(table, TABLESERVER_OP_QUERY_REPLY, req->reqid, version);
+
+ switch (op) {
+ case 'F': // full
+ version_t have_version;
+ decode(have_version, p);
+ ceph_assert(have_version <= version);
+ if (have_version == version) {
+ char type = 'U';
+ encode(type, reply->bl);
+ } else {
+ char type = 'F';
+ encode(type, reply->bl);
+ encode(snaps, reply->bl);
+ encode(pending_update, reply->bl);
+ encode(pending_destroy, reply->bl);
+ encode(last_created, reply->bl);
+ encode(last_destroyed, reply->bl);
+ }
+ // FIXME: implement incremental change
+ break;
+ default:
+ ceph_abort();
+ };
+
+ mds->send_message(reply, req->get_connection());
+}
+
+void SnapServer::check_osd_map(bool force)
+{
+ if (!force && version == last_checked_osdmap) {
+ dout(10) << "check_osd_map - version unchanged" << dendl;
+ return;
+ }
+ dout(10) << "check_osd_map need_to_purge=" << need_to_purge << dendl;
+
+ map<int32_t, vector<snapid_t> > all_purge;
+ map<int32_t, vector<snapid_t> > all_purged;
+
+ // NOTE: this is only needed for support during upgrades from pre-octopus,
+ // since starting with octopus we now get an explicit ack after we remove a
+ // snap.
+ mds->objecter->with_osdmap(
+ [this, &all_purged, &all_purge](const OSDMap& osdmap) {
+ for (const auto& p : need_to_purge) {
+ int id = p.first;
+ const pg_pool_t *pi = osdmap.get_pg_pool(id);
+ if (pi == NULL) {
+ // The pool is gone. So are the snapshots.
+ all_purged[id] = std::vector<snapid_t>(p.second.begin(),
+ p.second.end());
+ continue;
+ }
+
+ for (const auto& q : p.second) {
+ if (pi->is_removed_snap(q)) {
+ dout(10) << " osdmap marks " << q << " as removed" << dendl;
+ all_purged[id].push_back(q);
+ } else {
+ all_purge[id].push_back(q);
+ }
+ }
+ }
+ });
+
+ if (!all_purged.empty()) {
+ // prepare to remove from need_to_purge list
+ bufferlist bl;
+ using ceph::encode;
+ encode(all_purged, bl);
+ do_server_update(bl);
+ }
+
+ if (!all_purge.empty()) {
+ dout(10) << "requesting removal of " << all_purge << dendl;
+ auto m = make_message<MRemoveSnaps>(all_purge);
+ mon_client->send_mon_message(m.detach());
+ }
+
+ last_checked_osdmap = version;
+}
+
+void SnapServer::handle_remove_snaps(const cref_t<MRemoveSnaps> &m)
+{
+ dout(10) << __func__ << " " << *m << dendl;
+
+ map<int32_t, vector<snapid_t> > all_purged;
+ int num = 0;
+
+ for (const auto& [id, snaps] : need_to_purge) {
+ auto i = m->snaps.find(id);
+ if (i == m->snaps.end()) {
+ continue;
+ }
+ for (const auto& q : snaps) {
+ if (std::find(i->second.begin(), i->second.end(), q) != i->second.end()) {
+ dout(10) << " mon reports " << q << " is removed" << dendl;
+ all_purged[id].push_back(q);
+ ++num;
+ }
+ }
+ }
+
+ dout(10) << __func__ << " " << num << " now removed" << dendl;
+ if (num) {
+ bufferlist bl;
+ using ceph::encode;
+ encode(all_purged, bl);
+ do_server_update(bl);
+ }
+}
+
+
+void SnapServer::dump(Formatter *f) const
+{
+ f->open_object_section("snapserver");
+
+ f->dump_int("last_snap", last_snap);
+ f->dump_int("last_created", last_created);
+ f->dump_int("last_destroyed", last_destroyed);
+
+ f->open_array_section("pending_noop");
+ for(set<version_t>::const_iterator i = pending_noop.begin(); i != pending_noop.end(); ++i) {
+ f->dump_unsigned("version", *i);
+ }
+ f->close_section();
+
+ f->open_array_section("snaps");
+ for (map<snapid_t, SnapInfo>::const_iterator i = snaps.begin(); i != snaps.end(); ++i) {
+ f->open_object_section("snap");
+ i->second.dump(f);
+ f->close_section();
+ }
+ f->close_section();
+
+ f->open_object_section("need_to_purge");
+ for (map<int, set<snapid_t> >::const_iterator i = need_to_purge.begin(); i != need_to_purge.end(); ++i) {
+ CachedStackStringStream css;
+ *css << i->first;
+ f->open_array_section(css->strv());
+ for (set<snapid_t>::const_iterator s = i->second.begin(); s != i->second.end(); ++s) {
+ f->dump_unsigned("snapid", s->val);
+ }
+ f->close_section();
+ }
+ f->close_section();
+
+ f->open_array_section("pending_update");
+ for(map<version_t, SnapInfo>::const_iterator i = pending_update.begin(); i != pending_update.end(); ++i) {
+ f->open_object_section("snap");
+ f->dump_unsigned("version", i->first);
+ f->open_object_section("snapinfo");
+ i->second.dump(f);
+ f->close_section();
+ f->close_section();
+ }
+ f->close_section();
+
+ f->open_array_section("pending_destroy");
+ for(map<version_t, pair<snapid_t, snapid_t> >::const_iterator i = pending_destroy.begin(); i != pending_destroy.end(); ++i) {
+ f->open_object_section("snap");
+ f->dump_unsigned("version", i->first);
+ f->dump_unsigned("removed_snap", i->second.first);
+ f->dump_unsigned("seq", i->second.second);
+ f->close_section();
+ }
+ f->close_section();
+
+ f->close_section();
+}
+
+void SnapServer::generate_test_instances(std::list<SnapServer*>& ls)
+{
+ list<SnapInfo*> snapinfo_instances;
+ SnapInfo::generate_test_instances(snapinfo_instances);
+ SnapInfo populated_snapinfo = *(snapinfo_instances.back());
+ for (auto& info : snapinfo_instances) {
+ delete info;
+ info = nullptr;
+ }
+
+ SnapServer *blank = new SnapServer();
+ ls.push_back(blank);
+ SnapServer *populated = new SnapServer();
+ populated->last_snap = 123;
+ populated->snaps[456] = populated_snapinfo;
+ populated->need_to_purge[2].insert(012);
+ populated->pending_update[234] = populated_snapinfo;
+ populated->pending_destroy[345].first = 567;
+ populated->pending_destroy[345].second = 768;
+ populated->pending_noop.insert(890);
+
+ ls.push_back(populated);
+}
+
+bool SnapServer::force_update(snapid_t last, snapid_t v2_since,
+ map<snapid_t, SnapInfo>& _snaps)
+{
+ bool modified = false;
+ if (last > last_snap) {
+ derr << " updating last_snap " << last_snap << " -> " << last << dendl;
+ last_snap = last;
+ last_created = last;
+ last_destroyed = last;
+ modified = true;
+ }
+ if (v2_since > snaprealm_v2_since) {
+ derr << " updating snaprealm_v2_since " << snaprealm_v2_since
+ << " -> " << v2_since << dendl;
+ snaprealm_v2_since = v2_since;
+ modified = true;
+ }
+ if (snaps != _snaps) {
+ derr << " updating snaps {" << snaps << "} -> {" << _snaps << "}" << dendl;
+ snaps = _snaps;
+ modified = true;
+ }
+
+ if (modified) {
+ need_to_purge.clear();
+ pending_update.clear();
+ pending_destroy.clear();
+ pending_noop.clear();
+ MDSTableServer::reset_state();
+ }
+ return modified;
+}