summaryrefslogtreecommitdiffstats
path: root/src/mds/events
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
commit483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch)
treee5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/mds/events
parentInitial commit. (diff)
downloadceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.tar.xz
ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.zip
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/mds/events/ECommitted.h43
-rw-r--r--src/mds/events/EExport.h58
-rw-r--r--src/mds/events/EFragment.h81
-rw-r--r--src/mds/events/EImportFinish.h53
-rw-r--r--src/mds/events/EImportStart.h61
-rw-r--r--src/mds/events/EMetaBlob.h600
-rw-r--r--src/mds/events/ENoOp.h35
-rw-r--r--src/mds/events/EOpen.h61
-rw-r--r--src/mds/events/EResetJournal.h39
-rw-r--r--src/mds/events/ESession.h67
-rw-r--r--src/mds/events/ESessions.h61
-rw-r--r--src/mds/events/ESlaveUpdate.h157
-rw-r--r--src/mds/events/ESubtreeMap.h48
-rw-r--r--src/mds/events/ETableClient.h49
-rw-r--r--src/mds/events/ETableServer.h59
-rw-r--r--src/mds/events/EUpdate.h55
16 files changed, 1527 insertions, 0 deletions
diff --git a/src/mds/events/ECommitted.h b/src/mds/events/ECommitted.h
new file mode 100644
index 00000000..0459f9d0
--- /dev/null
+++ b/src/mds/events/ECommitted.h
@@ -0,0 +1,43 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_MDS_ECOMMITTED_H
+#define CEPH_MDS_ECOMMITTED_H
+
+#include "../LogEvent.h"
+#include "EMetaBlob.h"
+
+class ECommitted : public LogEvent {
+public:
+ metareqid_t reqid;
+
+ ECommitted() : LogEvent(EVENT_COMMITTED) { }
+ explicit ECommitted(metareqid_t r) :
+ LogEvent(EVENT_COMMITTED), reqid(r) { }
+
+ void print(ostream& out) const override {
+ out << "ECommitted " << reqid;
+ }
+
+ void encode(bufferlist &bl, uint64_t features) const override;
+ void decode(bufferlist::const_iterator &bl) override;
+ void dump(Formatter *f) const override;
+ static void generate_test_instances(list<ECommitted*>& ls);
+
+ void update_segment() override {}
+ void replay(MDSRank *mds) override;
+};
+WRITE_CLASS_ENCODER_FEATURES(ECommitted)
+
+#endif
diff --git a/src/mds/events/EExport.h b/src/mds/events/EExport.h
new file mode 100644
index 00000000..94e39a84
--- /dev/null
+++ b/src/mds/events/EExport.h
@@ -0,0 +1,58 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_EEXPORT_H
+#define CEPH_EEXPORT_H
+
+#include "common/config.h"
+#include "include/types.h"
+
+#include "../MDSRank.h"
+
+#include "EMetaBlob.h"
+#include "../LogEvent.h"
+
+class EExport : public LogEvent {
+public:
+ EMetaBlob metablob; // exported dir
+protected:
+ dirfrag_t base;
+ set<dirfrag_t> bounds;
+ mds_rank_t target;
+
+public:
+ EExport() :
+ LogEvent(EVENT_EXPORT), target(MDS_RANK_NONE) { }
+ EExport(MDLog *mdlog, CDir *dir, mds_rank_t t) :
+ LogEvent(EVENT_EXPORT),
+ base(dir->dirfrag()), target(t) { }
+
+ set<dirfrag_t> &get_bounds() { return bounds; }
+
+ void print(ostream& out) const override {
+ out << "EExport " << base << " to mds." << target << " " << metablob;
+ }
+
+ EMetaBlob *get_metablob() override { return &metablob; }
+
+ void encode(bufferlist& bl, uint64_t features) const override;
+ void decode(bufferlist::const_iterator &bl) override;
+ void dump(Formatter *f) const override;
+ static void generate_test_instances(list<EExport*>& ls);
+ void replay(MDSRank *mds) override;
+
+};
+WRITE_CLASS_ENCODER_FEATURES(EExport)
+
+#endif
diff --git a/src/mds/events/EFragment.h b/src/mds/events/EFragment.h
new file mode 100644
index 00000000..90d9238b
--- /dev/null
+++ b/src/mds/events/EFragment.h
@@ -0,0 +1,81 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_MDS_EFRAGMENT_H
+#define CEPH_MDS_EFRAGMENT_H
+
+#include "../LogEvent.h"
+#include "EMetaBlob.h"
+
+struct dirfrag_rollback {
+ fnode_t fnode;
+ dirfrag_rollback() { }
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& bl);
+};
+WRITE_CLASS_ENCODER(dirfrag_rollback)
+
+class EFragment : public LogEvent {
+public:
+ EMetaBlob metablob;
+ __u8 op{0};
+ inodeno_t ino;
+ frag_t basefrag;
+ __s32 bits{0}; // positive for split (from basefrag), negative for merge (to basefrag)
+ frag_vec_t orig_frags;
+ bufferlist rollback;
+
+ EFragment() : LogEvent(EVENT_FRAGMENT) { }
+ EFragment(MDLog *mdlog, int o, dirfrag_t df, int b) :
+ LogEvent(EVENT_FRAGMENT),
+ op(o), ino(df.ino), basefrag(df.frag), bits(b) { }
+
+ void print(ostream& out) const override {
+ out << "EFragment " << op_name(op) << " " << ino << " " << basefrag << " by " << bits << " " << metablob;
+ }
+
+ enum {
+ OP_PREPARE = 1,
+ OP_COMMIT = 2,
+ OP_ROLLBACK = 3,
+ OP_FINISH = 4 // finish deleting orphan dirfrags
+ };
+ static std::string_view op_name(int o) {
+ switch (o) {
+ case OP_PREPARE: return "prepare";
+ case OP_COMMIT: return "commit";
+ case OP_ROLLBACK: return "rollback";
+ case OP_FINISH: return "finish";
+ default: return "???";
+ }
+ }
+
+ void add_orig_frag(frag_t df, dirfrag_rollback *drb=NULL) {
+ using ceph::encode;
+ orig_frags.push_back(df);
+ if (drb)
+ encode(*drb, rollback);
+ }
+
+ EMetaBlob *get_metablob() override { return &metablob; }
+
+ void encode(bufferlist &bl, uint64_t features) const override;
+ void decode(bufferlist::const_iterator &bl) override;
+ void dump(Formatter *f) const override;
+ static void generate_test_instances(list<EFragment*>& ls);
+ void replay(MDSRank *mds) override;
+};
+WRITE_CLASS_ENCODER_FEATURES(EFragment)
+
+#endif
diff --git a/src/mds/events/EImportFinish.h b/src/mds/events/EImportFinish.h
new file mode 100644
index 00000000..699c0527
--- /dev/null
+++ b/src/mds/events/EImportFinish.h
@@ -0,0 +1,53 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_EIMPORTFINISH_H
+#define CEPH_EIMPORTFINISH_H
+
+#include "common/config.h"
+#include "include/types.h"
+
+#include "../MDSRank.h"
+#include "../LogEvent.h"
+
+class EImportFinish : public LogEvent {
+ protected:
+ dirfrag_t base; // imported dir
+ bool success;
+
+ public:
+ EImportFinish(CDir *dir, bool s) : LogEvent(EVENT_IMPORTFINISH),
+ base(dir->dirfrag()),
+ success(s) { }
+ EImportFinish() : LogEvent(EVENT_IMPORTFINISH), base(), success(false) { }
+
+ void print(ostream& out) const override {
+ out << "EImportFinish " << base;
+ if (success)
+ out << " success";
+ else
+ out << " failed";
+ }
+
+ void encode(bufferlist& bl, uint64_t features) const override;
+ void decode(bufferlist::const_iterator &bl) override;
+ void dump(Formatter *f) const override;
+ static void generate_test_instances(list<EImportFinish*>& ls);
+
+ void replay(MDSRank *mds) override;
+
+};
+WRITE_CLASS_ENCODER_FEATURES(EImportFinish)
+
+#endif
diff --git a/src/mds/events/EImportStart.h b/src/mds/events/EImportStart.h
new file mode 100644
index 00000000..276469e8
--- /dev/null
+++ b/src/mds/events/EImportStart.h
@@ -0,0 +1,61 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_EIMPORTSTART_H
+#define CEPH_EIMPORTSTART_H
+
+#include "common/config.h"
+#include "include/types.h"
+
+class MDLog;
+class MDSRank;
+
+#include "EMetaBlob.h"
+#include "../LogEvent.h"
+
+class EImportStart : public LogEvent {
+protected:
+ dirfrag_t base;
+ vector<dirfrag_t> bounds;
+ mds_rank_t from;
+
+public:
+ EMetaBlob metablob;
+ bufferlist client_map; // encoded map<__u32,entity_inst_t>
+ version_t cmapv{0};
+
+ EImportStart(MDLog *log, dirfrag_t di, const vector<dirfrag_t>& b, mds_rank_t f) :
+ LogEvent(EVENT_IMPORTSTART),
+ base(di), bounds(b), from(f) { }
+ EImportStart() :
+ LogEvent(EVENT_IMPORTSTART), from(MDS_RANK_NONE) { }
+
+ void print(ostream& out) const override {
+ out << "EImportStart " << base << " from mds." << from << " " << metablob;
+ }
+
+ EMetaBlob *get_metablob() override { return &metablob; }
+
+ void encode(bufferlist &bl, uint64_t features) const override;
+ void decode(bufferlist::const_iterator &bl) override;
+ void dump(Formatter *f) const override;
+ static void generate_test_instances(list<EImportStart*>& ls);
+
+ void update_segment() override;
+ void replay(MDSRank *mds) override;
+
+};
+WRITE_CLASS_ENCODER_FEATURES(EImportStart)
+
+#endif
diff --git a/src/mds/events/EMetaBlob.h b/src/mds/events/EMetaBlob.h
new file mode 100644
index 00000000..ac09a8fe
--- /dev/null
+++ b/src/mds/events/EMetaBlob.h
@@ -0,0 +1,600 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_MDS_EMETABLOB_H
+#define CEPH_MDS_EMETABLOB_H
+
+#include <string_view>
+
+#include <stdlib.h>
+
+#include "../CInode.h"
+#include "../CDir.h"
+#include "../CDentry.h"
+#include "../LogSegment.h"
+
+#include "include/interval_set.h"
+
+class MDSRank;
+class MDLog;
+class LogSegment;
+struct MDSlaveUpdate;
+
+/*
+ * a bunch of metadata in the journal
+ */
+
+/* notes:
+ *
+ * - make sure you adjust the inode.version for any modified inode you
+ * journal. CDir and CDentry maintain a projected_version, but CInode
+ * doesn't, since the journaled inode usually has to be modified
+ * manually anyway (to delay the change in the MDS's cache until after
+ * it is journaled).
+ *
+ */
+
+
+class EMetaBlob {
+
+public:
+ /* fullbit - a regular dentry + inode
+ *
+ * We encode this one a bit weirdly, just because (also, it's marginally faster
+ * on multiple encodes, which I think can happen):
+ * Encode a bufferlist on struct creation with all data members, without a struct_v.
+ * When encode is called, encode struct_v and then append the bufferlist.
+ * Decode straight into the appropriate variables.
+ *
+ * So, if you add members, encode them in the constructor and then change
+ * the struct_v in the encode function!
+ */
+ struct fullbit {
+ static const int STATE_DIRTY = (1<<0);
+ static const int STATE_DIRTYPARENT = (1<<1);
+ static const int STATE_DIRTYPOOL = (1<<2);
+ static const int STATE_NEED_SNAPFLUSH = (1<<3);
+ std::string dn; // dentry
+ snapid_t dnfirst, dnlast;
+ version_t dnv{0};
+ CInode::mempool_inode inode; // if it's not XXX should not be part of mempool; wait for std::pmr to simplify
+ fragtree_t dirfragtree;
+ CInode::mempool_xattr_map xattrs;
+ std::string symlink;
+ snapid_t oldest_snap;
+ bufferlist snapbl;
+ __u8 state{0};
+ CInode::mempool_old_inode_map old_inodes; // XXX should not be part of mempool; wait for std::pmr to simplify
+
+ fullbit(std::string_view d, snapid_t df, snapid_t dl,
+ version_t v, const CInode::mempool_inode& i, const fragtree_t &dft,
+ const CInode::mempool_xattr_map &xa, std::string_view sym,
+ snapid_t os, const bufferlist &sbl, __u8 st,
+ const CInode::mempool_old_inode_map *oi = NULL) :
+ dn(d), dnfirst(df), dnlast(dl), dnv(v), inode(i), xattrs(xa),
+ oldest_snap(os), state(st)
+ {
+ if (i.is_symlink())
+ symlink = sym;
+ if (i.is_dir())
+ dirfragtree = dft;
+ if (oi)
+ old_inodes = *oi;
+ snapbl = sbl;
+ }
+ explicit fullbit(bufferlist::const_iterator &p) {
+ decode(p);
+ }
+ fullbit() {}
+ fullbit(const fullbit&) = delete;
+ ~fullbit() {}
+ fullbit& operator=(const fullbit&) = delete;
+
+ void encode(bufferlist& bl, uint64_t features) const;
+ void decode(bufferlist::const_iterator &bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<EMetaBlob::fullbit*>& ls);
+
+ void update_inode(MDSRank *mds, CInode *in);
+ bool is_dirty() const { return (state & STATE_DIRTY); }
+ bool is_dirty_parent() const { return (state & STATE_DIRTYPARENT); }
+ bool is_dirty_pool() const { return (state & STATE_DIRTYPOOL); }
+ bool need_snapflush() const { return (state & STATE_NEED_SNAPFLUSH); }
+
+ void print(ostream& out) const {
+ out << " fullbit dn " << dn << " [" << dnfirst << "," << dnlast << "] dnv " << dnv
+ << " inode " << inode.ino
+ << " state=" << state << std::endl;
+ }
+ string state_string() const {
+ string state_string;
+ bool marked_already = false;
+ if (is_dirty()) {
+ state_string.append("dirty");
+ marked_already = true;
+ }
+ if (is_dirty_parent()) {
+ state_string.append(marked_already ? "+dirty_parent" : "dirty_parent");
+ if (is_dirty_pool())
+ state_string.append("+dirty_pool");
+ }
+ return state_string;
+ }
+ };
+ WRITE_CLASS_ENCODER_FEATURES(fullbit)
+
+ /* remotebit - a dentry + remote inode link (i.e. just an ino)
+ */
+ struct remotebit {
+ std::string dn;
+ snapid_t dnfirst, dnlast;
+ version_t dnv;
+ inodeno_t ino;
+ unsigned char d_type;
+ bool dirty;
+
+ remotebit(std::string_view d, snapid_t df, snapid_t dl, version_t v, inodeno_t i, unsigned char dt, bool dr) :
+ dn(d), dnfirst(df), dnlast(dl), dnv(v), ino(i), d_type(dt), dirty(dr) { }
+ explicit remotebit(bufferlist::const_iterator &p) { decode(p); }
+ remotebit(): dnfirst(0), dnlast(0), dnv(0), ino(0),
+ d_type('\0'), dirty(false) {}
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator &bl);
+ void print(ostream& out) const {
+ out << " remotebit dn " << dn << " [" << dnfirst << "," << dnlast << "] dnv " << dnv
+ << " ino " << ino
+ << " dirty=" << dirty << std::endl;
+ }
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<remotebit*>& ls);
+ };
+ WRITE_CLASS_ENCODER(remotebit)
+
+ /*
+ * nullbit - a null dentry
+ */
+ struct nullbit {
+ std::string dn;
+ snapid_t dnfirst, dnlast;
+ version_t dnv;
+ bool dirty;
+
+ nullbit(std::string_view d, snapid_t df, snapid_t dl, version_t v, bool dr) :
+ dn(d), dnfirst(df), dnlast(dl), dnv(v), dirty(dr) { }
+ explicit nullbit(bufferlist::const_iterator &p) { decode(p); }
+ nullbit(): dnfirst(0), dnlast(0), dnv(0), dirty(false) {}
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator &bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<nullbit*>& ls);
+ void print(ostream& out) const {
+ out << " nullbit dn " << dn << " [" << dnfirst << "," << dnlast << "] dnv " << dnv
+ << " dirty=" << dirty << std::endl;
+ }
+ };
+ WRITE_CLASS_ENCODER(nullbit)
+
+
+ /* dirlump - contains metadata for any dir we have contents for.
+ */
+public:
+ struct dirlump {
+ static const int STATE_COMPLETE = (1<<1);
+ static const int STATE_DIRTY = (1<<2); // dirty due to THIS journal item, that is!
+ static const int STATE_NEW = (1<<3); // new directory
+ static const int STATE_IMPORTING = (1<<4); // importing directory
+ static const int STATE_DIRTYDFT = (1<<5); // dirty dirfragtree
+
+ //version_t dirv;
+ fnode_t fnode;
+ __u32 state;
+ __u32 nfull, nremote, nnull;
+
+ private:
+ mutable bufferlist dnbl;
+ mutable bool dn_decoded;
+ mutable list<fullbit> dfull;
+ mutable vector<remotebit> dremote;
+ mutable vector<nullbit> dnull;
+
+ public:
+ dirlump() : state(0), nfull(0), nremote(0), nnull(0), dn_decoded(true) { }
+ dirlump(const dirlump&) = delete;
+ dirlump& operator=(const dirlump&) = delete;
+
+ bool is_complete() const { return state & STATE_COMPLETE; }
+ void mark_complete() { state |= STATE_COMPLETE; }
+ bool is_dirty() const { return state & STATE_DIRTY; }
+ void mark_dirty() { state |= STATE_DIRTY; }
+ bool is_new() const { return state & STATE_NEW; }
+ void mark_new() { state |= STATE_NEW; }
+ bool is_importing() { return state & STATE_IMPORTING; }
+ void mark_importing() { state |= STATE_IMPORTING; }
+ bool is_dirty_dft() { return state & STATE_DIRTYDFT; }
+ void mark_dirty_dft() { state |= STATE_DIRTYDFT; }
+
+ const list<fullbit> &get_dfull() const { return dfull; }
+ list<fullbit> &_get_dfull() { return dfull; }
+ const vector<remotebit> &get_dremote() const { return dremote; }
+ const vector<nullbit> &get_dnull() const { return dnull; }
+
+ template< class... Args>
+ void add_dfull(Args&&... args) {
+ dfull.emplace_back(std::forward<Args>(args)...);
+ }
+ template< class... Args>
+ void add_dremote(Args&&... args) {
+ dremote.emplace_back(std::forward<Args>(args)...);
+ }
+ template< class... Args>
+ void add_dnull(Args&&... args) {
+ dnull.emplace_back(std::forward<Args>(args)...);
+ }
+
+ void print(dirfrag_t dirfrag, ostream& out) const {
+ out << "dirlump " << dirfrag << " v " << fnode.version
+ << " state " << state
+ << " num " << nfull << "/" << nremote << "/" << nnull
+ << std::endl;
+ _decode_bits();
+ for (const auto& p : dfull)
+ p.print(out);
+ for (const auto& p : dremote)
+ p.print(out);
+ for (const auto& p : dnull)
+ p.print(out);
+ }
+
+ string state_string() const {
+ string state_string;
+ bool marked_already = false;
+ if (is_complete()) {
+ state_string.append("complete");
+ marked_already = true;
+ }
+ if (is_dirty()) {
+ state_string.append(marked_already ? "+dirty" : "dirty");
+ marked_already = true;
+ }
+ if (is_new()) {
+ state_string.append(marked_already ? "+new" : "new");
+ }
+ return state_string;
+ }
+
+ // if this changes, update the versioning in encode for it!
+ void _encode_bits(uint64_t features) const {
+ using ceph::encode;
+ if (!dn_decoded) return;
+ encode(dfull, dnbl, features);
+ encode(dremote, dnbl);
+ encode(dnull, dnbl);
+ }
+ void _decode_bits() const {
+ using ceph::decode;
+ if (dn_decoded) return;
+ auto p = dnbl.cbegin();
+ decode(dfull, p);
+ decode(dremote, p);
+ decode(dnull, p);
+ dn_decoded = true;
+ }
+
+ void encode(bufferlist& bl, uint64_t features) const;
+ void decode(bufferlist::const_iterator &bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<dirlump*>& ls);
+ };
+ WRITE_CLASS_ENCODER_FEATURES(dirlump)
+
+ // my lumps. preserve the order we added them in a list.
+ vector<dirfrag_t> lump_order;
+ map<dirfrag_t, dirlump> lump_map;
+ list<fullbit> roots;
+public:
+ vector<pair<__u8,version_t> > table_tids; // tableclient transactions
+
+ inodeno_t opened_ino;
+public:
+ inodeno_t renamed_dirino;
+ vector<frag_t> renamed_dir_frags;
+private:
+
+ // ino (pre)allocation. may involve both inotable AND session state.
+ version_t inotablev, sessionmapv;
+ inodeno_t allocated_ino; // inotable
+ interval_set<inodeno_t> preallocated_inos; // inotable + session
+ inodeno_t used_preallocated_ino; // session
+ entity_name_t client_name; // session
+
+ // inodes i've truncated
+ vector<inodeno_t> truncate_start; // start truncate
+ map<inodeno_t, LogSegment::seq_t> truncate_finish; // finished truncate (started in segment blah)
+
+public:
+ vector<inodeno_t> destroyed_inodes;
+private:
+
+ // idempotent op(s)
+ vector<pair<metareqid_t,uint64_t> > client_reqs;
+ vector<pair<metareqid_t,uint64_t> > client_flushes;
+
+ public:
+ void encode(bufferlist& bl, uint64_t features) const;
+ void decode(bufferlist::const_iterator& bl);
+ void get_inodes(std::set<inodeno_t> &inodes) const;
+ void get_paths(std::vector<std::string> &paths) const;
+ void get_dentries(std::map<dirfrag_t, std::set<std::string> > &dentries) const;
+ entity_name_t get_client_name() const {return client_name;}
+
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<EMetaBlob*>& ls);
+ // soft stateadd
+ uint64_t last_subtree_map;
+ uint64_t event_seq;
+
+ // for replay, in certain cases
+ //LogSegment *_segment;
+
+ EMetaBlob() : opened_ino(0), renamed_dirino(0),
+ inotablev(0), sessionmapv(0), allocated_ino(0),
+ last_subtree_map(0), event_seq(0)
+ {}
+ EMetaBlob(const EMetaBlob&) = delete;
+ ~EMetaBlob() { }
+ EMetaBlob& operator=(const EMetaBlob&) = delete;
+
+ void print(ostream& out) {
+ for (const auto &p : lump_order)
+ lump_map[p].print(p, out);
+ }
+
+ void add_client_req(metareqid_t r, uint64_t tid=0) {
+ client_reqs.push_back(pair<metareqid_t,uint64_t>(r, tid));
+ }
+ void add_client_flush(metareqid_t r, uint64_t tid=0) {
+ client_flushes.push_back(pair<metareqid_t,uint64_t>(r, tid));
+ }
+
+ void add_table_transaction(int table, version_t tid) {
+ table_tids.push_back(pair<__u8, version_t>(table, tid));
+ }
+
+ void add_opened_ino(inodeno_t ino) {
+ ceph_assert(!opened_ino);
+ opened_ino = ino;
+ }
+
+ void set_ino_alloc(inodeno_t alloc,
+ inodeno_t used_prealloc,
+ interval_set<inodeno_t>& prealloc,
+ entity_name_t client,
+ version_t sv, version_t iv) {
+ allocated_ino = alloc;
+ used_preallocated_ino = used_prealloc;
+ preallocated_inos = prealloc;
+ client_name = client;
+ sessionmapv = sv;
+ inotablev = iv;
+ }
+
+ void add_truncate_start(inodeno_t ino) {
+ truncate_start.push_back(ino);
+ }
+ void add_truncate_finish(inodeno_t ino, uint64_t segoff) {
+ truncate_finish[ino] = segoff;
+ }
+
+ bool rewrite_truncate_finish(MDSRank const *mds, std::map<uint64_t, uint64_t> const &old_to_new);
+
+ void add_destroyed_inode(inodeno_t ino) {
+ destroyed_inodes.push_back(ino);
+ }
+
+ void add_null_dentry(CDentry *dn, bool dirty) {
+ add_null_dentry(add_dir(dn->get_dir(), false), dn, dirty);
+ }
+ void add_null_dentry(dirlump& lump, CDentry *dn, bool dirty) {
+ // add the dir
+ lump.nnull++;
+ lump.add_dnull(dn->get_name(), dn->first, dn->last,
+ dn->get_projected_version(), dirty);
+ }
+
+ void add_remote_dentry(CDentry *dn, bool dirty) {
+ add_remote_dentry(add_dir(dn->get_dir(), false), dn, dirty, 0, 0);
+ }
+ void add_remote_dentry(CDentry *dn, bool dirty, inodeno_t rino, int rdt) {
+ add_remote_dentry(add_dir(dn->get_dir(), false), dn, dirty, rino, rdt);
+ }
+ void add_remote_dentry(dirlump& lump, CDentry *dn, bool dirty,
+ inodeno_t rino=0, unsigned char rdt=0) {
+ if (!rino) {
+ rino = dn->get_projected_linkage()->get_remote_ino();
+ rdt = dn->get_projected_linkage()->get_remote_d_type();
+ }
+ lump.nremote++;
+ lump.add_dremote(dn->get_name(), dn->first, dn->last,
+ dn->get_projected_version(), rino, rdt, dirty);
+ }
+
+ // return remote pointer to to-be-journaled inode
+ void add_primary_dentry(CDentry *dn, CInode *in, bool dirty,
+ bool dirty_parent=false, bool dirty_pool=false,
+ bool need_snapflush=false) {
+ __u8 state = 0;
+ if (dirty) state |= fullbit::STATE_DIRTY;
+ if (dirty_parent) state |= fullbit::STATE_DIRTYPARENT;
+ if (dirty_pool) state |= fullbit::STATE_DIRTYPOOL;
+ if (need_snapflush) state |= fullbit::STATE_NEED_SNAPFLUSH;
+ add_primary_dentry(add_dir(dn->get_dir(), false), dn, in, state);
+ }
+ void add_primary_dentry(dirlump& lump, CDentry *dn, CInode *in, __u8 state) {
+ if (!in)
+ in = dn->get_projected_linkage()->get_inode();
+
+ // make note of where this inode was last journaled
+ in->last_journaled = event_seq;
+ //cout << "journaling " << in->inode.ino << " at " << my_offset << std::endl;
+
+ const auto pi = in->get_projected_inode();
+ if ((state & fullbit::STATE_DIRTY) && pi->is_backtrace_updated())
+ state |= fullbit::STATE_DIRTYPARENT;
+
+ bufferlist snapbl;
+ const sr_t *sr = in->get_projected_srnode();
+ if (sr)
+ sr->encode(snapbl);
+
+ lump.nfull++;
+ lump.add_dfull(dn->get_name(), dn->first, dn->last, dn->get_projected_version(),
+ *pi, in->dirfragtree, *in->get_projected_xattrs(), in->symlink,
+ in->oldest_snap, snapbl, state, &in->old_inodes);
+ }
+
+ // convenience: primary or remote? figure it out.
+ void add_dentry(CDentry *dn, bool dirty) {
+ dirlump& lump = add_dir(dn->get_dir(), false);
+ add_dentry(lump, dn, dirty, false, false);
+ }
+ void add_import_dentry(CDentry *dn) {
+ bool dirty_parent = false;
+ bool dirty_pool = false;
+ if (dn->get_linkage()->is_primary()) {
+ dirty_parent = dn->get_linkage()->get_inode()->is_dirty_parent();
+ dirty_pool = dn->get_linkage()->get_inode()->is_dirty_pool();
+ }
+ dirlump& lump = add_dir(dn->get_dir(), false);
+ add_dentry(lump, dn, dn->is_dirty(), dirty_parent, dirty_pool);
+ }
+ void add_dentry(dirlump& lump, CDentry *dn, bool dirty, bool dirty_parent, bool dirty_pool) {
+ // primary or remote
+ if (dn->get_projected_linkage()->is_remote()) {
+ add_remote_dentry(dn, dirty);
+ return;
+ } else if (dn->get_projected_linkage()->is_null()) {
+ add_null_dentry(dn, dirty);
+ return;
+ }
+ ceph_assert(dn->get_projected_linkage()->is_primary());
+ add_primary_dentry(dn, 0, dirty, dirty_parent, dirty_pool);
+ }
+
+ void add_root(bool dirty, CInode *in) {
+ in->last_journaled = event_seq;
+ //cout << "journaling " << in->inode.ino << " at " << my_offset << std::endl;
+
+ const auto& pi = *(in->get_projected_inode());
+ const auto& pdft = in->dirfragtree;
+ const auto& px = *(in->get_projected_xattrs());
+
+ bufferlist snapbl;
+ const sr_t *sr = in->get_projected_srnode();
+ if (sr)
+ sr->encode(snapbl);
+
+ for (auto p = roots.begin(); p != roots.end(); ++p) {
+ if (p->inode.ino == in->ino()) {
+ roots.erase(p);
+ break;
+ }
+ }
+
+ string empty;
+ roots.emplace_back(empty, in->first, in->last, 0, pi, pdft, px, in->symlink,
+ in->oldest_snap, snapbl, (dirty ? fullbit::STATE_DIRTY : 0),
+ &in->old_inodes);
+ }
+
+ dirlump& add_dir(CDir *dir, bool dirty, bool complete=false) {
+ return add_dir(dir->dirfrag(), dir->get_projected_fnode(), dir->get_projected_version(),
+ dirty, complete);
+ }
+ dirlump& add_new_dir(CDir *dir) {
+ return add_dir(dir->dirfrag(), dir->get_projected_fnode(), dir->get_projected_version(),
+ true, true, true); // dirty AND complete AND new
+ }
+ dirlump& add_import_dir(CDir *dir) {
+ // dirty=false would be okay in some cases
+ return add_dir(dir->dirfrag(), dir->get_projected_fnode(), dir->get_projected_version(),
+ dir->is_dirty(), dir->is_complete(), false, true, dir->is_dirty_dft());
+ }
+ dirlump& add_fragmented_dir(CDir *dir, bool dirty, bool dirtydft) {
+ return add_dir(dir->dirfrag(), dir->get_projected_fnode(), dir->get_projected_version(),
+ dirty, false, false, false, dirtydft);
+ }
+ dirlump& add_dir(dirfrag_t df, const fnode_t *pf, version_t pv, bool dirty,
+ bool complete=false, bool isnew=false,
+ bool importing=false, bool dirty_dft=false) {
+ if (lump_map.count(df) == 0)
+ lump_order.push_back(df);
+
+ dirlump& l = lump_map[df];
+ l.fnode = *pf;
+ l.fnode.version = pv;
+ if (complete) l.mark_complete();
+ if (dirty) l.mark_dirty();
+ if (isnew) l.mark_new();
+ if (importing) l.mark_importing();
+ if (dirty_dft) l.mark_dirty_dft();
+ return l;
+ }
+
+ static const int TO_AUTH_SUBTREE_ROOT = 0; // default.
+ static const int TO_ROOT = 1;
+
+ void add_dir_context(CDir *dir, int mode = TO_AUTH_SUBTREE_ROOT);
+
+ bool empty() {
+ return roots.empty() && lump_order.empty() && table_tids.empty() &&
+ truncate_start.empty() && truncate_finish.empty() &&
+ destroyed_inodes.empty() && client_reqs.empty() &&
+ opened_ino == 0 && inotablev == 0 && sessionmapv == 0;
+ }
+
+ void print(ostream& out) const {
+ out << "[metablob";
+ if (!lump_order.empty())
+ out << " " << lump_order.front() << ", " << lump_map.size() << " dirs";
+ if (!table_tids.empty())
+ out << " table_tids=" << table_tids;
+ if (allocated_ino || preallocated_inos.size()) {
+ if (allocated_ino)
+ out << " alloc_ino=" << allocated_ino;
+ if (preallocated_inos.size())
+ out << " prealloc_ino=" << preallocated_inos;
+ if (used_preallocated_ino)
+ out << " used_prealloc_ino=" << used_preallocated_ino;
+ out << " v" << inotablev;
+ }
+ out << "]";
+ }
+
+ void update_segment(LogSegment *ls);
+ void replay(MDSRank *mds, LogSegment *ls, MDSlaveUpdate *su=NULL);
+};
+WRITE_CLASS_ENCODER_FEATURES(EMetaBlob)
+WRITE_CLASS_ENCODER_FEATURES(EMetaBlob::fullbit)
+WRITE_CLASS_ENCODER(EMetaBlob::remotebit)
+WRITE_CLASS_ENCODER(EMetaBlob::nullbit)
+WRITE_CLASS_ENCODER_FEATURES(EMetaBlob::dirlump)
+
+inline ostream& operator<<(ostream& out, const EMetaBlob& t) {
+ t.print(out);
+ return out;
+}
+
+#endif
diff --git a/src/mds/events/ENoOp.h b/src/mds/events/ENoOp.h
new file mode 100644
index 00000000..1bf5161e
--- /dev/null
+++ b/src/mds/events/ENoOp.h
@@ -0,0 +1,35 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_MDS_ENOOP_H
+#define CEPH_MDS_ENOOP_H
+
+#include "../LogEvent.h"
+
+class ENoOp : public LogEvent {
+ uint32_t pad_size;
+
+public:
+ ENoOp() : LogEvent(EVENT_NOOP), pad_size(0) { }
+ explicit ENoOp(uint32_t size_) : LogEvent(EVENT_NOOP), pad_size(size_){ }
+
+ void encode(bufferlist& bl, uint64_t features) const override;
+ void decode(bufferlist::const_iterator& bl) override;
+ void dump(Formatter *f) const override {}
+
+ void replay(MDSRank *mds) override;
+};
+WRITE_CLASS_ENCODER_FEATURES(ENoOp)
+
+#endif
diff --git a/src/mds/events/EOpen.h b/src/mds/events/EOpen.h
new file mode 100644
index 00000000..192745d9
--- /dev/null
+++ b/src/mds/events/EOpen.h
@@ -0,0 +1,61 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_MDS_EOPEN_H
+#define CEPH_MDS_EOPEN_H
+
+#include "../LogEvent.h"
+#include "EMetaBlob.h"
+
+class EOpen : public LogEvent {
+public:
+ EMetaBlob metablob;
+ vector<inodeno_t> inos;
+ vector<vinodeno_t> snap_inos;
+
+ EOpen() : LogEvent(EVENT_OPEN) { }
+ explicit EOpen(MDLog *mdlog) :
+ LogEvent(EVENT_OPEN) { }
+
+ void print(ostream& out) const override {
+ out << "EOpen " << metablob << ", " << inos.size() << " open files";
+ }
+
+ EMetaBlob *get_metablob() override { return &metablob; }
+
+ void add_clean_inode(CInode *in) {
+ if (!in->is_base()) {
+ metablob.add_dir_context(in->get_projected_parent_dn()->get_dir());
+ metablob.add_primary_dentry(in->get_projected_parent_dn(), 0, false);
+ if (in->last == CEPH_NOSNAP)
+ inos.push_back(in->ino());
+ else
+ snap_inos.push_back(in->vino());
+ }
+ }
+ void add_ino(inodeno_t ino) {
+ inos.push_back(ino);
+ }
+
+ void encode(bufferlist& bl, uint64_t features) const override;
+ void decode(bufferlist::const_iterator& bl) override;
+ void dump(Formatter *f) const override;
+ static void generate_test_instances(list<EOpen*>& ls);
+
+ void update_segment() override;
+ void replay(MDSRank *mds) override;
+};
+WRITE_CLASS_ENCODER_FEATURES(EOpen)
+
+#endif
diff --git a/src/mds/events/EResetJournal.h b/src/mds/events/EResetJournal.h
new file mode 100644
index 00000000..3004978a
--- /dev/null
+++ b/src/mds/events/EResetJournal.h
@@ -0,0 +1,39 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#ifndef CEPH_MDS_ERESETJOURNAL_H
+#define CEPH_MDS_ERESETJOURNAL_H
+
+#include "../LogEvent.h"
+
+// generic log event
+class EResetJournal : public LogEvent {
+ public:
+ EResetJournal() : LogEvent(EVENT_RESETJOURNAL) { }
+ ~EResetJournal() override {}
+
+ void encode(bufferlist& bl, uint64_t features) const override;
+ void decode(bufferlist::const_iterator& bl) override;
+ void dump(Formatter *f) const override;
+ static void generate_test_instances(list<EResetJournal*>& ls);
+ void print(ostream& out) const override {
+ out << "EResetJournal";
+ }
+
+ void replay(MDSRank *mds) override;
+};
+WRITE_CLASS_ENCODER_FEATURES(EResetJournal)
+
+#endif
diff --git a/src/mds/events/ESession.h b/src/mds/events/ESession.h
new file mode 100644
index 00000000..0b65765e
--- /dev/null
+++ b/src/mds/events/ESession.h
@@ -0,0 +1,67 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_MDS_ESESSION_H
+#define CEPH_MDS_ESESSION_H
+
+#include "common/config.h"
+#include "include/types.h"
+
+#include "../LogEvent.h"
+
+class ESession : public LogEvent {
+ protected:
+ entity_inst_t client_inst;
+ bool open; // open or close
+ version_t cmapv{0}; // client map version
+
+ interval_set<inodeno_t> inos;
+ version_t inotablev{0};
+
+ // Client metadata stored during open
+ client_metadata_t client_metadata;
+
+ public:
+ ESession() : LogEvent(EVENT_SESSION), open(false) { }
+ ESession(const entity_inst_t& inst, bool o, version_t v,
+ const client_metadata_t& cm) :
+ LogEvent(EVENT_SESSION),
+ client_inst(inst), open(o), cmapv(v), inotablev(0),
+ client_metadata(cm) { }
+ ESession(const entity_inst_t& inst, bool o, version_t v,
+ const interval_set<inodeno_t>& i, version_t iv) :
+ LogEvent(EVENT_SESSION),
+ client_inst(inst), open(o), cmapv(v), inos(i), inotablev(iv) { }
+
+ void encode(bufferlist& bl, uint64_t features) const override;
+ void decode(bufferlist::const_iterator& bl) override;
+ void dump(Formatter *f) const override;
+ static void generate_test_instances(list<ESession*>& ls);
+
+ void print(ostream& out) const override {
+ if (open)
+ out << "ESession " << client_inst << " open cmapv " << cmapv;
+ else
+ out << "ESession " << client_inst << " close cmapv " << cmapv;
+ if (inos.size())
+ out << " (" << inos.size() << " inos, v" << inotablev << ")";
+ }
+
+ void update_segment() override;
+ void replay(MDSRank *mds) override;
+ entity_inst_t get_client_inst() const {return client_inst;}
+};
+WRITE_CLASS_ENCODER_FEATURES(ESession)
+
+#endif
diff --git a/src/mds/events/ESessions.h b/src/mds/events/ESessions.h
new file mode 100644
index 00000000..aa0eeff8
--- /dev/null
+++ b/src/mds/events/ESessions.h
@@ -0,0 +1,61 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_MDS_ESESSIONS_H
+#define CEPH_MDS_ESESSIONS_H
+
+#include "common/config.h"
+#include "include/types.h"
+
+#include "../LogEvent.h"
+
+class ESessions : public LogEvent {
+protected:
+ version_t cmapv; // client map version
+ bool old_style_encode;
+
+public:
+ map<client_t,entity_inst_t> client_map;
+ map<client_t,client_metadata_t> client_metadata_map;
+
+ ESessions() : LogEvent(EVENT_SESSIONS), cmapv(0), old_style_encode(false) { }
+ ESessions(version_t pv, map<client_t,entity_inst_t>&& cm,
+ map<client_t,client_metadata_t>&& cmm) :
+ LogEvent(EVENT_SESSIONS),
+ cmapv(pv), old_style_encode(false),
+ client_map(std::move(cm)),
+ client_metadata_map(std::move(cmm)) {}
+
+ void mark_old_encoding() { old_style_encode = true; }
+
+ void encode(bufferlist &bl, uint64_t features) const override;
+ void decode_old(bufferlist::const_iterator &bl);
+ void decode_new(bufferlist::const_iterator &bl);
+ void decode(bufferlist::const_iterator &bl) override {
+ if (old_style_encode) decode_old(bl);
+ else decode_new(bl);
+ }
+ void dump(Formatter *f) const override;
+ static void generate_test_instances(list<ESessions*>& ls);
+
+ void print(ostream& out) const override {
+ out << "ESessions " << client_map.size() << " opens cmapv " << cmapv;
+ }
+
+ void update_segment() override;
+ void replay(MDSRank *mds) override;
+};
+WRITE_CLASS_ENCODER_FEATURES(ESessions)
+
+#endif
diff --git a/src/mds/events/ESlaveUpdate.h b/src/mds/events/ESlaveUpdate.h
new file mode 100644
index 00000000..23ca430b
--- /dev/null
+++ b/src/mds/events/ESlaveUpdate.h
@@ -0,0 +1,157 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_MDS_ESLAVEUPDATE_H
+#define CEPH_MDS_ESLAVEUPDATE_H
+
+#include <string_view>
+
+#include "../LogEvent.h"
+#include "EMetaBlob.h"
+
+/*
+ * rollback records, for remote/slave updates, which may need to be manually
+ * rolled back during journal replay. (or while active if master fails, but in
+ * that case these records aren't needed.)
+ */
+struct link_rollback {
+ metareqid_t reqid;
+ inodeno_t ino;
+ bool was_inc;
+ utime_t old_ctime;
+ utime_t old_dir_mtime;
+ utime_t old_dir_rctime;
+ bufferlist snapbl;
+
+ link_rollback() : ino(0), was_inc(false) {}
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<link_rollback*>& ls);
+};
+WRITE_CLASS_ENCODER(link_rollback)
+
+/*
+ * this is only used on an empty dir with a dirfrag on a remote node.
+ * we are auth for nothing. all we need to do is relink the directory
+ * in the hierarchy properly during replay to avoid breaking the
+ * subtree map.
+ */
+struct rmdir_rollback {
+ metareqid_t reqid;
+ dirfrag_t src_dir;
+ string src_dname;
+ dirfrag_t dest_dir;
+ string dest_dname;
+ bufferlist snapbl;
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<rmdir_rollback*>& ls);
+};
+WRITE_CLASS_ENCODER(rmdir_rollback)
+
+struct rename_rollback {
+ struct drec {
+ dirfrag_t dirfrag;
+ utime_t dirfrag_old_mtime;
+ utime_t dirfrag_old_rctime;
+ inodeno_t ino, remote_ino;
+ string dname;
+ char remote_d_type;
+ utime_t old_ctime;
+
+ drec() : remote_d_type((char)S_IFREG) {}
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<drec*>& ls);
+ };
+ WRITE_CLASS_MEMBER_ENCODER(drec)
+
+ metareqid_t reqid;
+ drec orig_src, orig_dest;
+ drec stray; // we know this is null, but we want dname, old mtime/rctime
+ utime_t ctime;
+ bufferlist srci_snapbl;
+ bufferlist desti_snapbl;
+
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<rename_rollback*>& ls);
+};
+WRITE_CLASS_ENCODER(rename_rollback::drec)
+WRITE_CLASS_ENCODER(rename_rollback)
+
+
+class ESlaveUpdate : public LogEvent {
+public:
+ const static int OP_PREPARE = 1;
+ const static int OP_COMMIT = 2;
+ const static int OP_ROLLBACK = 3;
+
+ const static int LINK = 1;
+ const static int RENAME = 2;
+ const static int RMDIR = 3;
+
+ /*
+ * we journal a rollback metablob that contains the unmodified metadata
+ * too, because we may be updating previously dirty metadata, which
+ * will allow old log segments to be trimmed. if we end of rolling back,
+ * those updates could be lost.. so we re-journal the unmodified metadata,
+ * and replay will apply _either_ commit or rollback.
+ */
+ EMetaBlob commit;
+ bufferlist rollback;
+ string type;
+ metareqid_t reqid;
+ mds_rank_t master;
+ __u8 op; // prepare, commit, abort
+ __u8 origop; // link | rename
+
+ ESlaveUpdate() : LogEvent(EVENT_SLAVEUPDATE), master(0), op(0), origop(0) { }
+ ESlaveUpdate(MDLog *mdlog, std::string_view s, metareqid_t ri, int mastermds, int o, int oo) :
+ LogEvent(EVENT_SLAVEUPDATE),
+ type(s),
+ reqid(ri),
+ master(mastermds),
+ op(o), origop(oo) { }
+
+ void print(ostream& out) const override {
+ if (type.length())
+ out << type << " ";
+ out << " " << (int)op;
+ if (origop == LINK) out << " link";
+ if (origop == RENAME) out << " rename";
+ out << " " << reqid;
+ out << " for mds." << master;
+ out << commit;
+ }
+
+ EMetaBlob *get_metablob() override { return &commit; }
+
+ void encode(bufferlist& bl, uint64_t features) const override;
+ void decode(bufferlist::const_iterator& bl) override;
+ void dump(Formatter *f) const override;
+ static void generate_test_instances(list<ESlaveUpdate*>& ls);
+
+ void replay(MDSRank *mds) override;
+};
+WRITE_CLASS_ENCODER_FEATURES(ESlaveUpdate)
+
+#endif
diff --git a/src/mds/events/ESubtreeMap.h b/src/mds/events/ESubtreeMap.h
new file mode 100644
index 00000000..08d4a581
--- /dev/null
+++ b/src/mds/events/ESubtreeMap.h
@@ -0,0 +1,48 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_MDS_ESUBTREEMAP_H
+#define CEPH_MDS_ESUBTREEMAP_H
+
+#include "../LogEvent.h"
+#include "EMetaBlob.h"
+
+class ESubtreeMap : public LogEvent {
+public:
+ EMetaBlob metablob;
+ map<dirfrag_t, vector<dirfrag_t> > subtrees;
+ set<dirfrag_t> ambiguous_subtrees;
+ uint64_t expire_pos;
+ uint64_t event_seq;
+
+ ESubtreeMap() : LogEvent(EVENT_SUBTREEMAP), expire_pos(0), event_seq(0) { }
+
+ void print(ostream& out) const override {
+ out << "ESubtreeMap " << subtrees.size() << " subtrees "
+ << ", " << ambiguous_subtrees.size() << " ambiguous "
+ << metablob;
+ }
+
+ EMetaBlob *get_metablob() override { return &metablob; }
+
+ void encode(bufferlist& bl, uint64_t features) const override;
+ void decode(bufferlist::const_iterator& bl) override;
+ void dump(Formatter *f) const override;
+ static void generate_test_instances(list<ESubtreeMap*>& ls);
+
+ void replay(MDSRank *mds) override;
+};
+WRITE_CLASS_ENCODER_FEATURES(ESubtreeMap)
+
+#endif
diff --git a/src/mds/events/ETableClient.h b/src/mds/events/ETableClient.h
new file mode 100644
index 00000000..bf3e752d
--- /dev/null
+++ b/src/mds/events/ETableClient.h
@@ -0,0 +1,49 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_MDS_ETABLECLIENT_H
+#define CEPH_MDS_ETABLECLIENT_H
+
+#include "common/config.h"
+#include "include/types.h"
+
+#include "../mds_table_types.h"
+#include "../LogEvent.h"
+
+struct ETableClient : public LogEvent {
+ __u16 table;
+ __s16 op;
+ version_t tid;
+
+ ETableClient() : LogEvent(EVENT_TABLECLIENT), table(0), op(0), tid(0) { }
+ ETableClient(int t, int o, version_t ti) :
+ LogEvent(EVENT_TABLECLIENT),
+ table(t), op(o), tid(ti) { }
+
+ void encode(bufferlist& bl, uint64_t features) const override;
+ void decode(bufferlist::const_iterator& bl) override;
+ void dump(Formatter *f) const override;
+ static void generate_test_instances(list<ETableClient*>& ls);
+
+ void print(ostream& out) const override {
+ out << "ETableClient " << get_mdstable_name(table) << " " << get_mdstableserver_opname(op);
+ if (tid) out << " tid " << tid;
+ }
+
+ //void update_segment();
+ void replay(MDSRank *mds) override;
+};
+WRITE_CLASS_ENCODER_FEATURES(ETableClient)
+
+#endif
diff --git a/src/mds/events/ETableServer.h b/src/mds/events/ETableServer.h
new file mode 100644
index 00000000..0005b132
--- /dev/null
+++ b/src/mds/events/ETableServer.h
@@ -0,0 +1,59 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_MDS_ETABLESERVER_H
+#define CEPH_MDS_ETABLESERVER_H
+
+#include "common/config.h"
+#include "include/types.h"
+
+#include "../mds_table_types.h"
+#include "../LogEvent.h"
+
+struct ETableServer : public LogEvent {
+ __u16 table;
+ __s16 op;
+ uint64_t reqid;
+ mds_rank_t bymds;
+ bufferlist mutation;
+ version_t tid;
+ version_t version;
+
+ ETableServer() : LogEvent(EVENT_TABLESERVER), table(0), op(0),
+ reqid(0), bymds(MDS_RANK_NONE), tid(0), version(0) { }
+ ETableServer(int t, int o, uint64_t ri, mds_rank_t m, version_t ti, version_t v) :
+ LogEvent(EVENT_TABLESERVER),
+ table(t), op(o), reqid(ri), bymds(m), tid(ti), version(v) { }
+
+ void encode(bufferlist& bl, uint64_t features) const override;
+ void decode(bufferlist::const_iterator& bl) override;
+ void dump(Formatter *f) const override;
+ static void generate_test_instances(list<ETableServer*>& ls);
+
+ void print(ostream& out) const override {
+ out << "ETableServer " << get_mdstable_name(table)
+ << " " << get_mdstableserver_opname(op);
+ if (reqid) out << " reqid " << reqid;
+ if (bymds >= 0) out << " mds." << bymds;
+ if (tid) out << " tid " << tid;
+ if (version) out << " version " << version;
+ if (mutation.length()) out << " mutation=" << mutation.length() << " bytes";
+ }
+
+ void update_segment() override;
+ void replay(MDSRank *mds) override;
+};
+WRITE_CLASS_ENCODER_FEATURES(ETableServer)
+
+#endif
diff --git a/src/mds/events/EUpdate.h b/src/mds/events/EUpdate.h
new file mode 100644
index 00000000..dc710d52
--- /dev/null
+++ b/src/mds/events/EUpdate.h
@@ -0,0 +1,55 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_MDS_EUPDATE_H
+#define CEPH_MDS_EUPDATE_H
+
+#include <string_view>
+
+#include "../LogEvent.h"
+#include "EMetaBlob.h"
+
+class EUpdate : public LogEvent {
+public:
+ EMetaBlob metablob;
+ string type;
+ bufferlist client_map;
+ version_t cmapv;
+ metareqid_t reqid;
+ bool had_slaves;
+
+ EUpdate() : LogEvent(EVENT_UPDATE), cmapv(0), had_slaves(false) { }
+ EUpdate(MDLog *mdlog, std::string_view s) :
+ LogEvent(EVENT_UPDATE),
+ type(s), cmapv(0), had_slaves(false) { }
+
+ void print(ostream& out) const override {
+ if (type.length())
+ out << "EUpdate " << type << " ";
+ out << metablob;
+ }
+
+ EMetaBlob *get_metablob() override { return &metablob; }
+
+ void encode(bufferlist& bl, uint64_t features) const override;
+ void decode(bufferlist::const_iterator& bl) override;
+ void dump(Formatter *f) const override;
+ static void generate_test_instances(list<EUpdate*>& ls);
+
+ void update_segment() override;
+ void replay(MDSRank *mds) override;
+};
+WRITE_CLASS_ENCODER_FEATURES(EUpdate)
+
+#endif