summaryrefslogtreecommitdiffstats
path: root/src/mds/CInode.h
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/mds/CInode.h
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/mds/CInode.h')
-rw-r--r--src/mds/CInode.h1252
1 files changed, 1252 insertions, 0 deletions
diff --git a/src/mds/CInode.h b/src/mds/CInode.h
new file mode 100644
index 000000000..979b45174
--- /dev/null
+++ b/src/mds/CInode.h
@@ -0,0 +1,1252 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_CINODE_H
+#define CEPH_CINODE_H
+
+#include <list>
+#include <map>
+#include <set>
+#include <string_view>
+
+#include "common/config.h"
+#include "common/RefCountedObj.h"
+#include "include/compat.h"
+#include "include/counter.h"
+#include "include/elist.h"
+#include "include/types.h"
+#include "include/lru.h"
+#include "include/compact_set.h"
+
+#include "MDSCacheObject.h"
+#include "MDSContext.h"
+#include "flock.h"
+
+#include "BatchOp.h"
+#include "CDentry.h"
+#include "SimpleLock.h"
+#include "ScatterLock.h"
+#include "LocalLockC.h"
+#include "Capability.h"
+#include "SnapRealm.h"
+#include "Mutation.h"
+
+#include "messages/MClientCaps.h"
+
+#define dout_context g_ceph_context
+
+class Context;
+class CDir;
+class CInode;
+class MDCache;
+class LogSegment;
+struct SnapRealm;
+class Session;
+struct ObjectOperation;
+class EMetaBlob;
+
+struct cinode_lock_info_t {
+ int lock;
+ int wr_caps;
+};
+
+struct CInodeCommitOperation {
+public:
+ CInodeCommitOperation(int prio, int64_t po)
+ : pool(po), priority(prio) {
+ }
+ CInodeCommitOperation(int prio, int64_t po, file_layout_t l, uint64_t f, std::string_view s)
+ : pool(po), priority(prio), _layout(l), _features(f), _symlink(s) {
+ update_layout_symlink = true;
+ }
+
+ void update(ObjectOperation &op, inode_backtrace_t &bt);
+ int64_t get_pool() { return pool; }
+
+private:
+ int64_t pool; ///< pool id
+ int priority;
+ bool update_layout_symlink = false;
+ file_layout_t _layout;
+ uint64_t _features;
+ std::string_view _symlink;
+};
+
+struct CInodeCommitOperations {
+ std::vector<CInodeCommitOperation> ops_vec;
+ inode_backtrace_t bt;
+ version_t version;
+ CInode *in;
+};
+
+/**
+ * Base class for CInode, containing the backing store data and
+ * serialization methods. This exists so that we can read and
+ * handle CInodes from the backing store without hitting all
+ * the business logic in CInode proper.
+ */
+class InodeStoreBase {
+public:
+ using mempool_inode = inode_t<mempool::mds_co::pool_allocator>;
+ using inode_ptr = std::shared_ptr<mempool_inode>;
+ using inode_const_ptr = std::shared_ptr<const mempool_inode>;
+
+ template <typename ...Args>
+ static inode_ptr allocate_inode(Args && ...args) {
+ static mempool::mds_co::pool_allocator<mempool_inode> allocator;
+ return std::allocate_shared<mempool_inode>(allocator, std::forward<Args>(args)...);
+ }
+
+ using mempool_xattr_map = xattr_map<mempool::mds_co::pool_allocator>; // FIXME bufferptr not in mempool
+ using xattr_map_ptr = std::shared_ptr<mempool_xattr_map>;
+ using xattr_map_const_ptr = std::shared_ptr<const mempool_xattr_map>;
+
+ template <typename ...Args>
+ static xattr_map_ptr allocate_xattr_map(Args && ...args) {
+ static mempool::mds_co::pool_allocator<mempool_xattr_map> allocator;
+ return std::allocate_shared<mempool_xattr_map>(allocator, std::forward<Args>(args)...);
+ }
+
+ using mempool_old_inode = old_inode_t<mempool::mds_co::pool_allocator>;
+ using mempool_old_inode_map = mempool::mds_co::map<snapid_t, mempool_old_inode>;
+ using old_inode_map_ptr = std::shared_ptr<mempool_old_inode_map>;
+ using old_inode_map_const_ptr = std::shared_ptr<const mempool_old_inode_map>;
+
+ template <typename ...Args>
+ static old_inode_map_ptr allocate_old_inode_map(Args && ...args) {
+ static mempool::mds_co::pool_allocator<mempool_old_inode_map> allocator;
+ return std::allocate_shared<mempool_old_inode_map>(allocator, std::forward<Args>(args)...);
+ }
+
+ void reset_inode(inode_const_ptr&& ptr) {
+ inode = std::move(ptr);
+ }
+
+ void reset_xattrs(xattr_map_const_ptr&& ptr) {
+ xattrs = std::move(ptr);
+ }
+
+ void reset_old_inodes(old_inode_map_const_ptr&& ptr) {
+ old_inodes = std::move(ptr);
+ }
+
+ void encode_xattrs(bufferlist &bl) const;
+ void decode_xattrs(bufferlist::const_iterator &p);
+ void encode_old_inodes(bufferlist &bl, uint64_t features) const;
+ void decode_old_inodes(bufferlist::const_iterator &p);
+
+ /* Helpers */
+ static object_t get_object_name(inodeno_t ino, frag_t fg, std::string_view suffix);
+
+ /* Full serialization for use in ".inode" root inode objects */
+ void encode(ceph::buffer::list &bl, uint64_t features, const ceph::buffer::list *snap_blob=NULL) const;
+ void decode(ceph::buffer::list::const_iterator &bl, ceph::buffer::list& snap_blob);
+
+ /* Serialization without ENCODE_START/FINISH blocks for use embedded in dentry */
+ void encode_bare(ceph::buffer::list &bl, uint64_t features, const ceph::buffer::list *snap_blob=NULL) const;
+ void decode_bare(ceph::buffer::list::const_iterator &bl, ceph::buffer::list &snap_blob, __u8 struct_v=5);
+
+ /* For test/debug output */
+ void dump(ceph::Formatter *f) const;
+
+ void decode_json(JSONObj *obj);
+ static void xattrs_cb(InodeStoreBase::mempool_xattr_map& c, JSONObj *obj);
+ static void old_indoes_cb(InodeStoreBase::mempool_old_inode_map& c, JSONObj *obj);
+
+ /* For use by offline tools */
+ __u32 hash_dentry_name(std::string_view dn);
+ frag_t pick_dirfrag(std::string_view dn);
+
+ mempool::mds_co::string symlink; // symlink dest, if symlink
+ fragtree_t dirfragtree; // dir frag tree, if any. always consistent with our dirfrag map.
+ snapid_t oldest_snap = CEPH_NOSNAP;
+ damage_flags_t damage_flags = 0;
+
+protected:
+ static inode_const_ptr empty_inode;
+
+ // Following members are pointers to constant data, the constant data can
+ // be shared by CInode and log events. To update these members in CInode,
+ // read-copy-update should be used.
+ inode_const_ptr inode = empty_inode;
+ xattr_map_const_ptr xattrs;
+ old_inode_map_const_ptr old_inodes; // key = last, value.first = first
+};
+
+inline void decode_noshare(InodeStoreBase::mempool_xattr_map& xattrs,
+ ceph::buffer::list::const_iterator &p)
+{
+ decode_noshare<mempool::mds_co::pool_allocator>(xattrs, p);
+}
+
+class InodeStore : public InodeStoreBase {
+public:
+ mempool_inode* get_inode() {
+ if (inode == empty_inode)
+ reset_inode(allocate_inode());
+ return const_cast<mempool_inode*>(inode.get());
+ }
+ mempool_xattr_map* get_xattrs() { return const_cast<mempool_xattr_map*>(xattrs.get()); }
+
+ void encode(ceph::buffer::list &bl, uint64_t features) const {
+ InodeStoreBase::encode(bl, features, &snap_blob);
+ }
+ void decode(ceph::buffer::list::const_iterator &bl) {
+ InodeStoreBase::decode(bl, snap_blob);
+ }
+ void encode_bare(ceph::buffer::list &bl, uint64_t features) const {
+ InodeStoreBase::encode_bare(bl, features, &snap_blob);
+ }
+ void decode_bare(ceph::buffer::list::const_iterator &bl) {
+ InodeStoreBase::decode_bare(bl, snap_blob);
+ }
+
+ static void generate_test_instances(std::list<InodeStore*>& ls);
+
+ using InodeStoreBase::inode;
+ using InodeStoreBase::xattrs;
+ using InodeStoreBase::old_inodes;
+
+ // FIXME bufferlist not part of mempool
+ ceph::buffer::list snap_blob; // Encoded copy of SnapRealm, because we can't
+ // rehydrate it without full MDCache
+};
+WRITE_CLASS_ENCODER_FEATURES(InodeStore)
+
+// just for ceph-dencoder
+class InodeStoreBare : public InodeStore {
+public:
+ void encode(ceph::buffer::list &bl, uint64_t features) const {
+ InodeStore::encode_bare(bl, features);
+ }
+ void decode(ceph::buffer::list::const_iterator &bl) {
+ InodeStore::decode_bare(bl);
+ }
+ static void generate_test_instances(std::list<InodeStoreBare*>& ls);
+};
+WRITE_CLASS_ENCODER_FEATURES(InodeStoreBare)
+
+// cached inode wrapper
+class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CInode> {
+ public:
+ MEMPOOL_CLASS_HELPERS();
+
+ using mempool_cap_map = mempool::mds_co::map<client_t, Capability>;
+ /**
+ * @defgroup Scrubbing and fsck
+ */
+
+ /**
+ * Report the results of validation against a particular inode.
+ * Each member is a pair of bools.
+ * <member>.first represents if validation was performed against the member.
+ * <member.second represents if the member passed validation.
+ * performed_validation is set to true if the validation was actually
+ * run. It might not be run if, for instance, the inode is marked as dirty.
+ * passed_validation is set to true if everything that was checked
+ * passed its validation.
+ */
+ struct validated_data {
+ template<typename T>struct member_status {
+ bool checked = false;
+ bool passed = false;
+ bool repaired = false;
+ int ondisk_read_retval = 0;
+ T ondisk_value;
+ T memory_value;
+ std::stringstream error_str;
+ };
+
+ struct raw_stats_t {
+ frag_info_t dirstat;
+ nest_info_t rstat;
+ };
+
+ validated_data() {}
+
+ void dump(ceph::Formatter *f) const;
+
+ bool all_damage_repaired() const;
+
+ bool performed_validation = false;
+ bool passed_validation = false;
+
+ member_status<inode_backtrace_t> backtrace;
+ member_status<mempool_inode> inode; // XXX should not be in mempool; wait for pmr
+ member_status<raw_stats_t> raw_stats;
+ };
+
+ // friends
+ friend class Server;
+ friend class Locker;
+ friend class Migrator;
+ friend class MDCache;
+ friend class StrayManager;
+ friend class CDir;
+ friend std::ostream& operator<<(std::ostream&, const CInode&);
+
+ class scrub_info_t {
+ public:
+ scrub_info_t() {}
+
+ version_t last_scrub_version = 0;
+ utime_t last_scrub_stamp;
+
+ bool last_scrub_dirty = false; /// are our stamps dirty with respect to disk state?
+ bool scrub_in_progress = false; /// are we currently scrubbing?
+
+ fragset_t queued_frags;
+
+ ScrubHeaderRef header;
+ };
+
+ // -- pins --
+ static const int PIN_DIRFRAG = -1;
+ static const int PIN_CAPS = 2; // client caps
+ static const int PIN_IMPORTING = -4; // importing
+ static const int PIN_OPENINGDIR = 7;
+ static const int PIN_REMOTEPARENT = 8;
+ static const int PIN_BATCHOPENJOURNAL = 9;
+ static const int PIN_SCATTERED = 10;
+ static const int PIN_STICKYDIRS = 11;
+ //static const int PIN_PURGING = -12;
+ static const int PIN_FREEZING = 13;
+ static const int PIN_FROZEN = 14;
+ static const int PIN_IMPORTINGCAPS = -15;
+ static const int PIN_PASTSNAPPARENT = -16;
+ static const int PIN_OPENINGSNAPPARENTS = 17;
+ static const int PIN_TRUNCATING = 18;
+ static const int PIN_STRAY = 19; // we pin our stray inode while active
+ static const int PIN_NEEDSNAPFLUSH = 20;
+ static const int PIN_DIRTYRSTAT = 21;
+ static const int PIN_EXPORTINGCAPS = 22;
+ static const int PIN_DIRTYPARENT = 23;
+ static const int PIN_DIRWAITER = 24;
+
+ // -- dump flags --
+ static const int DUMP_INODE_STORE_BASE = (1 << 0);
+ static const int DUMP_MDS_CACHE_OBJECT = (1 << 1);
+ static const int DUMP_LOCKS = (1 << 2);
+ static const int DUMP_STATE = (1 << 3);
+ static const int DUMP_CAPS = (1 << 4);
+ static const int DUMP_PATH = (1 << 5);
+ static const int DUMP_DIRFRAGS = (1 << 6);
+ static const int DUMP_ALL = (-1);
+ static const int DUMP_DEFAULT = DUMP_ALL & (~DUMP_PATH) & (~DUMP_DIRFRAGS);
+
+ // -- state --
+ static const int STATE_EXPORTING = (1<<0); // on nonauth bystander.
+ static const int STATE_OPENINGDIR = (1<<1);
+ static const int STATE_FREEZING = (1<<2);
+ static const int STATE_FROZEN = (1<<3);
+ static const int STATE_AMBIGUOUSAUTH = (1<<4);
+ static const int STATE_EXPORTINGCAPS = (1<<5);
+ static const int STATE_NEEDSRECOVER = (1<<6);
+ static const int STATE_RECOVERING = (1<<7);
+ static const int STATE_PURGING = (1<<8);
+ static const int STATE_DIRTYPARENT = (1<<9);
+ static const int STATE_DIRTYRSTAT = (1<<10);
+ static const int STATE_STRAYPINNED = (1<<11);
+ static const int STATE_FROZENAUTHPIN = (1<<12);
+ static const int STATE_DIRTYPOOL = (1<<13);
+ static const int STATE_REPAIRSTATS = (1<<14);
+ static const int STATE_MISSINGOBJS = (1<<15);
+ static const int STATE_EVALSTALECAPS = (1<<16);
+ static const int STATE_QUEUEDEXPORTPIN = (1<<17);
+ static const int STATE_TRACKEDBYOFT = (1<<18); // tracked by open file table
+ static const int STATE_DELAYEDEXPORTPIN = (1<<19);
+ static const int STATE_DISTEPHEMERALPIN = (1<<20);
+ static const int STATE_RANDEPHEMERALPIN = (1<<21);
+ static const int STATE_CLIENTWRITEABLE = (1<<22);
+
+ // orphan inode needs notification of releasing reference
+ static const int STATE_ORPHAN = STATE_NOTIFYREF;
+
+ static const int MASK_STATE_EXPORTED =
+ (STATE_DIRTY|STATE_NEEDSRECOVER|STATE_DIRTYPARENT|STATE_DIRTYPOOL|
+ STATE_DISTEPHEMERALPIN|STATE_RANDEPHEMERALPIN);
+ static const int MASK_STATE_EXPORT_KEPT =
+ (STATE_FROZEN|STATE_AMBIGUOUSAUTH|STATE_EXPORTINGCAPS|
+ STATE_QUEUEDEXPORTPIN|STATE_TRACKEDBYOFT|STATE_DELAYEDEXPORTPIN|
+ STATE_DISTEPHEMERALPIN|STATE_RANDEPHEMERALPIN);
+
+ /* These are for "permanent" state markers that are passed around between
+ * MDS. Nothing protects/updates it like a typical MDS lock.
+ *
+ * Currently, we just use this for REPLICATED inodes. The reason we need to
+ * replicate the random epin state is because the directory inode is still
+ * under the authority of the parent subtree. So it's not exported normally
+ * and we can't pass around the state that way. The importer of the dirfrags
+ * still needs to know that the inode is random pinned though otherwise it
+ * doesn't know that the dirfrags are pinned.
+ */
+ static const int MASK_STATE_REPLICATED = STATE_RANDEPHEMERALPIN;
+
+ // -- waiters --
+ static const uint64_t WAIT_DIR = (1<<0);
+ static const uint64_t WAIT_FROZEN = (1<<1);
+ static const uint64_t WAIT_TRUNC = (1<<2);
+ static const uint64_t WAIT_FLOCK = (1<<3);
+
+ static const uint64_t WAIT_ANY_MASK = (uint64_t)(-1);
+
+ // misc
+ static const unsigned EXPORT_NONCE = 1; // nonce given to replicas created by export
+
+ // ---------------------------
+ CInode() = delete;
+ CInode(MDCache *c, bool auth=true, snapid_t f=2, snapid_t l=CEPH_NOSNAP);
+ ~CInode() override {
+ close_dirfrags();
+ close_snaprealm();
+ clear_file_locks();
+ ceph_assert(num_projected_srnodes == 0);
+ ceph_assert(num_caps_notable == 0);
+ ceph_assert(num_subtree_roots == 0);
+ ceph_assert(num_exporting_dirs == 0);
+ ceph_assert(batch_ops.empty());
+ }
+
+ std::map<int, std::unique_ptr<BatchOp>> batch_ops;
+
+ std::string_view pin_name(int p) const override;
+
+ std::ostream& print_db_line_prefix(std::ostream& out) const override;
+
+ const scrub_info_t *scrub_info() const {
+ if (!scrub_infop)
+ scrub_info_create();
+ return scrub_infop.get();
+ }
+
+ const ScrubHeaderRef& get_scrub_header() {
+ static const ScrubHeaderRef nullref;
+ return scrub_infop ? scrub_infop->header : nullref;
+ }
+
+ bool scrub_is_in_progress() const {
+ return (scrub_infop && scrub_infop->scrub_in_progress);
+ }
+ /**
+ * Start scrubbing on this inode. That could be very short if it's
+ * a file, or take a long time if we're recursively scrubbing a directory.
+ * @pre It is not currently scrubbing
+ * @post it has set up internal scrubbing state
+ * @param scrub_version What version are we scrubbing at (usually, parent
+ * directory's get_projected_version())
+ */
+ void scrub_initialize(ScrubHeaderRef& header);
+ /**
+ * Call this once the scrub has been completed, whether it's a full
+ * recursive scrub on a directory or simply the data on a file (or
+ * anything in between).
+ * @param c An out param which is filled in with a Context* that must
+ * be complete()ed.
+ */
+ void scrub_finished();
+
+ void scrub_aborted();
+
+ fragset_t& scrub_queued_frags() {
+ ceph_assert(scrub_infop);
+ return scrub_infop->queued_frags;
+ }
+
+ bool is_multiversion() const {
+ return snaprealm || // other snaprealms will link to me
+ get_inode()->is_dir() || // links to me in other snaps
+ get_inode()->nlink > 1 || // there are remote links, possibly snapped, that will need to find me
+ is_any_old_inodes(); // once multiversion, always multiversion. until old_inodes gets cleaned out.
+ }
+ snapid_t get_oldest_snap();
+
+ bool is_dirty_rstat() {
+ return state_test(STATE_DIRTYRSTAT);
+ }
+ void mark_dirty_rstat();
+ void clear_dirty_rstat();
+
+ //bool hack_accessed = false;
+ //utime_t hack_load_stamp;
+
+ /**
+ * Projection methods, used to store inode changes until they have been journaled,
+ * at which point they are popped.
+ * Usage:
+ * project_inode as needed. If you're changing xattrs or sr_t, then pass true
+ * as needed then change the xattrs/snapnode member as needed. (Dirty
+ * exception: project_past_snaprealm_parent allows you to project the
+ * snapnode after doing project_inode (i.e. you don't need to pass
+ * snap=true).
+ *
+ * Then, journal. Once journaling is done, pop_and_dirty_projected_inode.
+ * This function will take care of the inode itself, the xattrs, and the snaprealm.
+ */
+
+ struct projected_inode {
+ static sr_t* const UNDEF_SRNODE;
+
+ inode_ptr const inode;
+ xattr_map_ptr const xattrs;
+ sr_t* const snapnode;
+
+ projected_inode() = delete;
+ explicit projected_inode(inode_ptr&& i, xattr_map_ptr&& x, sr_t *s=nullptr) :
+ inode(std::move(i)), xattrs(std::move(x)), snapnode(s) {}
+ };
+ projected_inode project_inode(const MutationRef& mut,
+ bool xattr = false, bool snap = false);
+
+ void pop_and_dirty_projected_inode(LogSegment *ls, const MutationRef& mut);
+
+ version_t get_projected_version() const {
+ if (projected_nodes.empty())
+ return get_inode()->version;
+ else
+ return projected_nodes.back().inode->version;
+ }
+ bool is_projected() const {
+ return !projected_nodes.empty();
+ }
+
+ const inode_const_ptr& get_projected_inode() const {
+ if (projected_nodes.empty())
+ return get_inode();
+ else
+ return projected_nodes.back().inode;
+ }
+ // inode should have already been projected in caller's context
+ mempool_inode* _get_projected_inode() {
+ ceph_assert(!projected_nodes.empty());
+ return const_cast<mempool_inode*>(projected_nodes.back().inode.get());
+ }
+ const inode_const_ptr& get_previous_projected_inode() const {
+ ceph_assert(!projected_nodes.empty());
+ auto it = projected_nodes.rbegin();
+ ++it;
+ if (it != projected_nodes.rend())
+ return it->inode;
+ else
+ return get_inode();
+ }
+
+ const xattr_map_const_ptr& get_projected_xattrs() {
+ if (projected_nodes.empty())
+ return xattrs;
+ else
+ return projected_nodes.back().xattrs;
+ }
+ const xattr_map_const_ptr& get_previous_projected_xattrs() {
+ ceph_assert(!projected_nodes.empty());
+ auto it = projected_nodes.rbegin();
+ ++it;
+ if (it != projected_nodes.rend())
+ return it->xattrs;
+ else
+ return xattrs;
+ }
+
+ sr_t *prepare_new_srnode(snapid_t snapid);
+ void project_snaprealm(sr_t *new_srnode);
+ sr_t *project_snaprealm(snapid_t snapid=0) {
+ sr_t* new_srnode = prepare_new_srnode(snapid);
+ project_snaprealm(new_srnode);
+ return new_srnode;
+ }
+ const sr_t *get_projected_srnode() const;
+
+ void mark_snaprealm_global(sr_t *new_srnode);
+ void clear_snaprealm_global(sr_t *new_srnode);
+ bool is_projected_snaprealm_global() const;
+
+ void record_snaprealm_past_parent(sr_t *new_snap, SnapRealm *newparent);
+ void record_snaprealm_parent_dentry(sr_t *new_snap, SnapRealm *newparent,
+ CDentry *dn, bool primary_dn);
+ void project_snaprealm_past_parent(SnapRealm *newparent);
+ void early_pop_projected_snaprealm();
+
+ const mempool_old_inode& cow_old_inode(snapid_t follows, bool cow_head);
+ void split_old_inode(snapid_t snap);
+ snapid_t pick_old_inode(snapid_t last) const;
+ void pre_cow_old_inode();
+ bool has_snap_data(snapid_t s);
+ void purge_stale_snap_data(const std::set<snapid_t>& snaps);
+
+ size_t get_num_dirfrags() const { return dirfrags.size(); }
+ CDir* get_dirfrag(frag_t fg) {
+ auto pi = dirfrags.find(fg);
+ if (pi != dirfrags.end()) {
+ //assert(g_conf()->debug_mds < 2 || dirfragtree.is_leaf(fg)); // performance hack FIXME
+ return pi->second;
+ }
+ return NULL;
+ }
+ std::pair<bool, std::vector<CDir*>> get_dirfrags_under(frag_t fg);
+ CDir* get_approx_dirfrag(frag_t fg);
+
+ template<typename Container>
+ void get_dirfrags(Container& ls) const {
+ // all dirfrags
+ if constexpr (std::is_same_v<Container, std::vector<CDir*>>)
+ ls.reserve(ls.size() + dirfrags.size());
+ for (const auto &p : dirfrags)
+ ls.push_back(p.second);
+ }
+
+ auto get_dirfrags() const {
+ std::vector<CDir*> result;
+ get_dirfrags(result);
+ return result;
+ }
+
+ void get_nested_dirfrags(std::vector<CDir*>&) const;
+ std::vector<CDir*> get_nested_dirfrags() const {
+ std::vector<CDir*> v;
+ get_nested_dirfrags(v);
+ return v;
+ }
+ void get_subtree_dirfrags(std::vector<CDir*>&) const;
+ std::vector<CDir*> get_subtree_dirfrags() const {
+ std::vector<CDir*> v;
+ get_subtree_dirfrags(v);
+ return v;
+ }
+ int get_num_subtree_roots() const {
+ return num_subtree_roots;
+ }
+
+ CDir *get_or_open_dirfrag(MDCache *mdcache, frag_t fg);
+ CDir *add_dirfrag(CDir *dir);
+ void close_dirfrag(frag_t fg);
+ void close_dirfrags();
+ bool has_subtree_root_dirfrag(int auth=-1);
+ bool has_subtree_or_exporting_dirfrag();
+
+ void force_dirfrags();
+ void verify_dirfrags();
+
+ void get_stickydirs();
+ void put_stickydirs();
+
+ void add_need_snapflush(CInode *snapin, snapid_t snapid, client_t client);
+ void remove_need_snapflush(CInode *snapin, snapid_t snapid, client_t client);
+ std::pair<bool,bool> split_need_snapflush(CInode *cowin, CInode *in);
+
+ // -- accessors --
+
+ inodeno_t ino() const { return get_inode()->ino; }
+ vinodeno_t vino() const { return vinodeno_t(ino(), last); }
+ int d_type() const { return IFTODT(get_inode()->mode); }
+ bool is_root() const { return ino() == CEPH_INO_ROOT; }
+ bool is_stray() const { return MDS_INO_IS_STRAY(ino()); }
+ mds_rank_t get_stray_owner() const {
+ return (mds_rank_t)MDS_INO_STRAY_OWNER(ino());
+ }
+ bool is_mdsdir() const { return MDS_INO_IS_MDSDIR(ino()); }
+ bool is_base() const { return MDS_INO_IS_BASE(ino()); }
+ bool is_system() const { return ino() < MDS_INO_SYSTEM_BASE; }
+ bool is_lost_and_found() const { return ino() == CEPH_INO_LOST_AND_FOUND; }
+ bool is_normal() const { return !(is_base() || is_system() || is_stray()); }
+ bool is_file() const { return get_inode()->is_file(); }
+ bool is_symlink() const { return get_inode()->is_symlink(); }
+ bool is_dir() const { return get_inode()->is_dir(); }
+
+ bool is_head() const { return last == CEPH_NOSNAP; }
+
+ // note: this overloads MDSCacheObject
+ bool is_ambiguous_auth() const {
+ return state_test(STATE_AMBIGUOUSAUTH) ||
+ MDSCacheObject::is_ambiguous_auth();
+ }
+ void set_ambiguous_auth() {
+ state_set(STATE_AMBIGUOUSAUTH);
+ }
+ void clear_ambiguous_auth(MDSContext::vec& finished);
+ void clear_ambiguous_auth();
+
+ const inode_const_ptr& get_inode() const {
+ return inode;
+ }
+
+ // only used for updating newly allocated CInode
+ mempool_inode* _get_inode() {
+ if (inode == empty_inode)
+ reset_inode(allocate_inode());
+ return const_cast<mempool_inode*>(inode.get());
+ }
+
+ const xattr_map_const_ptr& get_xattrs() const { return xattrs; }
+
+ bool is_any_old_inodes() const { return old_inodes && !old_inodes->empty(); }
+ const old_inode_map_const_ptr& get_old_inodes() const { return old_inodes; }
+
+ CDentry* get_parent_dn() { return parent; }
+ const CDentry* get_parent_dn() const { return parent; }
+ CDentry* get_projected_parent_dn() { return !projected_parent.empty() ? projected_parent.back() : parent; }
+ const CDentry* get_projected_parent_dn() const { return !projected_parent.empty() ? projected_parent.back() : parent; }
+ const CDentry* get_oldest_parent_dn() const {
+ if (parent)
+ return parent;
+ return !projected_parent.empty() ? projected_parent.front(): NULL;
+ }
+ CDir *get_parent_dir();
+ const CDir *get_projected_parent_dir() const;
+ CDir *get_projected_parent_dir();
+ CInode *get_parent_inode();
+
+ bool is_lt(const MDSCacheObject *r) const override {
+ const CInode *o = static_cast<const CInode*>(r);
+ return ino() < o->ino() ||
+ (ino() == o->ino() && last < o->last);
+ }
+
+ // -- misc --
+ bool is_ancestor_of(const CInode *other) const;
+ bool is_projected_ancestor_of(const CInode *other) const;
+
+ void make_path_string(std::string& s, bool projected=false, const CDentry *use_parent=NULL) const;
+ void make_path(filepath& s, bool projected=false) const;
+ void name_stray_dentry(std::string& dname);
+
+ // -- dirtyness --
+ version_t get_version() const { return get_inode()->version; }
+
+ version_t pre_dirty();
+ void _mark_dirty(LogSegment *ls);
+ void mark_dirty(LogSegment *ls);
+ void mark_clean();
+
+ void store(MDSContext *fin);
+ void _stored(int r, version_t cv, Context *fin);
+ /**
+ * Flush a CInode to disk. This includes the backtrace, the parent
+ * directory's link, and the Inode object itself (if a base directory).
+ * @pre is_auth() on both the inode and its containing directory
+ * @pre can_auth_pin()
+ * @param fin The Context to call when the flush is completed.
+ */
+ void flush(MDSContext *fin);
+ void fetch(MDSContext *fin);
+ void _fetched(ceph::buffer::list& bl, ceph::buffer::list& bl2, Context *fin);
+
+ void _commit_ops(int r, C_GatherBuilder &gather_bld,
+ std::vector<CInodeCommitOperation> &ops_vec,
+ inode_backtrace_t &bt);
+ void build_backtrace(int64_t pool, inode_backtrace_t& bt);
+ void _store_backtrace(std::vector<CInodeCommitOperation> &ops_vec,
+ inode_backtrace_t &bt, int op_prio);
+ void store_backtrace(CInodeCommitOperations &op, int op_prio);
+ void store_backtrace(MDSContext *fin, int op_prio=-1);
+ void _stored_backtrace(int r, version_t v, Context *fin);
+ void fetch_backtrace(Context *fin, ceph::buffer::list *backtrace);
+
+ void mark_dirty_parent(LogSegment *ls, bool dirty_pool=false);
+ void clear_dirty_parent();
+ void verify_diri_backtrace(ceph::buffer::list &bl, int err);
+ bool is_dirty_parent() { return state_test(STATE_DIRTYPARENT); }
+ bool is_dirty_pool() { return state_test(STATE_DIRTYPOOL); }
+
+ void encode_snap_blob(ceph::buffer::list &bl);
+ void decode_snap_blob(const ceph::buffer::list &bl);
+ void encode_store(ceph::buffer::list& bl, uint64_t features);
+ void decode_store(ceph::buffer::list::const_iterator& bl);
+
+ void add_dir_waiter(frag_t fg, MDSContext *c);
+ void take_dir_waiting(frag_t fg, MDSContext::vec& ls);
+ bool is_waiting_for_dir(frag_t fg) {
+ return waiting_on_dir.count(fg);
+ }
+ void add_waiter(uint64_t tag, MDSContext *c) override;
+ void take_waiting(uint64_t tag, MDSContext::vec& ls) override;
+
+ // -- encode/decode helpers --
+ void _encode_base(ceph::buffer::list& bl, uint64_t features);
+ void _decode_base(ceph::buffer::list::const_iterator& p);
+ void _encode_locks_full(ceph::buffer::list& bl);
+ void _decode_locks_full(ceph::buffer::list::const_iterator& p);
+ void _encode_locks_state_for_replica(ceph::buffer::list& bl, bool need_recover);
+ void _encode_locks_state_for_rejoin(ceph::buffer::list& bl, int rep);
+ void _decode_locks_state_for_replica(ceph::buffer::list::const_iterator& p, bool is_new);
+ void _decode_locks_rejoin(ceph::buffer::list::const_iterator& p, MDSContext::vec& waiters,
+ std::list<SimpleLock*>& eval_locks, bool survivor);
+
+ // -- import/export --
+ void encode_export(ceph::buffer::list& bl);
+ void finish_export();
+ void abort_export() {
+ put(PIN_TEMPEXPORTING);
+ ceph_assert(state_test(STATE_EXPORTINGCAPS));
+ state_clear(STATE_EXPORTINGCAPS);
+ put(PIN_EXPORTINGCAPS);
+ }
+ void decode_import(ceph::buffer::list::const_iterator& p, LogSegment *ls);
+
+ // for giving to clients
+ int encode_inodestat(ceph::buffer::list& bl, Session *session, SnapRealm *realm,
+ snapid_t snapid=CEPH_NOSNAP, unsigned max_bytes=0,
+ int getattr_wants=0);
+ void encode_cap_message(const ceph::ref_t<MClientCaps> &m, Capability *cap);
+
+ SimpleLock* get_lock(int type) override;
+
+ void set_object_info(MDSCacheObjectInfo &info) override;
+
+ void encode_lock_state(int type, ceph::buffer::list& bl) override;
+ void decode_lock_state(int type, const ceph::buffer::list& bl) override;
+ void encode_lock_iauth(ceph::buffer::list& bl);
+ void decode_lock_iauth(ceph::buffer::list::const_iterator& p);
+ void encode_lock_ilink(ceph::buffer::list& bl);
+ void decode_lock_ilink(ceph::buffer::list::const_iterator& p);
+ void encode_lock_idft(ceph::buffer::list& bl);
+ void decode_lock_idft(ceph::buffer::list::const_iterator& p);
+ void encode_lock_ifile(ceph::buffer::list& bl);
+ void decode_lock_ifile(ceph::buffer::list::const_iterator& p);
+ void encode_lock_inest(ceph::buffer::list& bl);
+ void decode_lock_inest(ceph::buffer::list::const_iterator& p);
+ void encode_lock_ixattr(ceph::buffer::list& bl);
+ void decode_lock_ixattr(ceph::buffer::list::const_iterator& p);
+ void encode_lock_isnap(ceph::buffer::list& bl);
+ void decode_lock_isnap(ceph::buffer::list::const_iterator& p);
+ void encode_lock_iflock(ceph::buffer::list& bl);
+ void decode_lock_iflock(ceph::buffer::list::const_iterator& p);
+ void encode_lock_ipolicy(ceph::buffer::list& bl);
+ void decode_lock_ipolicy(ceph::buffer::list::const_iterator& p);
+
+ void _finish_frag_update(CDir *dir, MutationRef& mut);
+
+ void clear_dirty_scattered(int type) override;
+ bool is_dirty_scattered();
+ void clear_scatter_dirty(); // on rejoin ack
+
+ void start_scatter(ScatterLock *lock);
+ void finish_scatter_update(ScatterLock *lock, CDir *dir,
+ version_t inode_version, version_t dir_accounted_version);
+ void finish_scatter_gather_update(int type, MutationRef& mut);
+ void finish_scatter_gather_update_accounted(int type, EMetaBlob *metablob);
+
+ // -- snap --
+ void open_snaprealm(bool no_split=false);
+ void close_snaprealm(bool no_join=false);
+ SnapRealm *find_snaprealm() const;
+ void encode_snap(ceph::buffer::list& bl);
+ void decode_snap(ceph::buffer::list::const_iterator& p);
+
+ client_t get_loner() const { return loner_cap; }
+ client_t get_wanted_loner() const { return want_loner_cap; }
+
+ // this is the loner state our locks should aim for
+ client_t get_target_loner() const {
+ if (loner_cap == want_loner_cap)
+ return loner_cap;
+ else
+ return -1;
+ }
+
+ client_t calc_ideal_loner();
+ void set_loner_cap(client_t l);
+ bool choose_ideal_loner();
+ bool try_set_loner();
+ bool try_drop_loner();
+
+ // choose new lock state during recovery, based on issued caps
+ void choose_lock_state(SimpleLock *lock, int allissued);
+ void choose_lock_states(int dirty_caps);
+
+ int count_nonstale_caps();
+ bool multiple_nonstale_caps();
+
+ bool is_any_caps() { return !client_caps.empty(); }
+ bool is_any_nonstale_caps() { return count_nonstale_caps(); }
+
+ const mempool::mds_co::compact_map<int32_t,int32_t>& get_mds_caps_wanted() const { return mds_caps_wanted; }
+ void set_mds_caps_wanted(mempool::mds_co::compact_map<int32_t,int32_t>& m);
+ void set_mds_caps_wanted(mds_rank_t mds, int32_t wanted);
+
+ const mempool_cap_map& get_client_caps() const { return client_caps; }
+ Capability *get_client_cap(client_t client) {
+ auto client_caps_entry = client_caps.find(client);
+ if (client_caps_entry != client_caps.end())
+ return &client_caps_entry->second;
+ return 0;
+ }
+ int get_client_cap_pending(client_t client) const {
+ auto client_caps_entry = client_caps.find(client);
+ if (client_caps_entry != client_caps.end()) {
+ return client_caps_entry->second.pending();
+ } else {
+ return 0;
+ }
+ }
+
+ int get_num_caps_notable() const { return num_caps_notable; }
+ void adjust_num_caps_notable(int d);
+
+ Capability *add_client_cap(client_t client, Session *session,
+ SnapRealm *conrealm=nullptr, bool new_inode=false);
+ void remove_client_cap(client_t client);
+ void move_to_realm(SnapRealm *realm);
+
+ Capability *reconnect_cap(client_t client, const cap_reconnect_t& icr, Session *session);
+ void clear_client_caps_after_export();
+ void export_client_caps(std::map<client_t,Capability::Export>& cl);
+
+ // caps allowed
+ int get_caps_liked() const;
+ int get_caps_allowed_ever() const;
+ int get_caps_allowed_by_type(int type) const;
+ int get_caps_careful() const;
+ int get_xlocker_mask(client_t client) const;
+ int get_caps_allowed_for_client(Session *s, Capability *cap,
+ const mempool_inode *file_i) const;
+
+ // caps issued, wanted
+ int get_caps_issued(int *ploner = 0, int *pother = 0, int *pxlocker = 0,
+ int shift = 0, int mask = -1);
+ bool is_any_caps_wanted() const;
+ int get_caps_wanted(int *ploner = 0, int *pother = 0, int shift = 0, int mask = -1) const;
+ bool issued_caps_need_gather(SimpleLock *lock);
+
+ // client writeable
+ bool is_clientwriteable() const { return state & STATE_CLIENTWRITEABLE; }
+ void mark_clientwriteable();
+ void clear_clientwriteable();
+
+ // -- authority --
+ mds_authority_t authority() const override;
+
+ // -- auth pins --
+ bool can_auth_pin(int *err_ret=nullptr) const override;
+ void auth_pin(void *by) override;
+ void auth_unpin(void *by) override;
+
+ // -- freeze --
+ bool is_freezing_inode() const { return state_test(STATE_FREEZING); }
+ bool is_frozen_inode() const { return state_test(STATE_FROZEN); }
+ bool is_frozen_auth_pin() const { return state_test(STATE_FROZENAUTHPIN); }
+ bool is_frozen() const override;
+ bool is_frozen_dir() const;
+ bool is_freezing() const override;
+
+ /* Freeze the inode. auth_pin_allowance lets the caller account for any
+ * auth_pins it is itself holding/responsible for. */
+ bool freeze_inode(int auth_pin_allowance=0);
+ void unfreeze_inode(MDSContext::vec& finished);
+ void unfreeze_inode();
+
+ void freeze_auth_pin();
+ void unfreeze_auth_pin();
+
+ // -- reference counting --
+ void bad_put(int by) override {
+ generic_dout(0) << " bad put " << *this << " by " << by << " " << pin_name(by) << " was " << ref
+#ifdef MDS_REF_SET
+ << " (" << ref_map << ")"
+#endif
+ << dendl;
+#ifdef MDS_REF_SET
+ ceph_assert(ref_map[by] > 0);
+#endif
+ ceph_assert(ref > 0);
+ }
+ void bad_get(int by) override {
+ generic_dout(0) << " bad get " << *this << " by " << by << " " << pin_name(by) << " was " << ref
+#ifdef MDS_REF_SET
+ << " (" << ref_map << ")"
+#endif
+ << dendl;
+#ifdef MDS_REF_SET
+ ceph_assert(ref_map[by] >= 0);
+#endif
+ }
+ void first_get() override;
+ void last_put() override;
+ void _put() override;
+
+ // -- hierarchy stuff --
+ void set_primary_parent(CDentry *p) {
+ ceph_assert(parent == 0 ||
+ g_conf().get_val<bool>("mds_hack_allow_loading_invalid_metadata"));
+ parent = p;
+ }
+ void remove_primary_parent(CDentry *dn) {
+ ceph_assert(dn == parent);
+ parent = 0;
+ }
+ void add_remote_parent(CDentry *p);
+ void remove_remote_parent(CDentry *p);
+ int num_remote_parents() {
+ return remote_parents.size();
+ }
+
+ void push_projected_parent(CDentry *dn) {
+ projected_parent.push_back(dn);
+ }
+ void pop_projected_parent() {
+ ceph_assert(projected_parent.size());
+ parent = projected_parent.front();
+ projected_parent.pop_front();
+ }
+ bool is_parent_projected() const {
+ return !projected_parent.empty();
+ }
+
+ mds_rank_t get_export_pin(bool inherit=true) const;
+ void check_pin_policy(mds_rank_t target);
+ void set_export_pin(mds_rank_t rank);
+ void queue_export_pin(mds_rank_t target);
+ void maybe_export_pin(bool update=false);
+
+ void set_ephemeral_pin(bool dist, bool rand);
+ void clear_ephemeral_pin(bool dist, bool rand);
+
+ void setxattr_ephemeral_dist(bool val=false);
+ bool is_ephemeral_dist() const {
+ return state_test(STATE_DISTEPHEMERALPIN);
+ }
+
+ double get_ephemeral_rand() const;
+ void maybe_ephemeral_rand(double threshold=-1.0);
+ void setxattr_ephemeral_rand(double prob=0.0);
+ bool is_ephemeral_rand() const {
+ return state_test(STATE_RANDEPHEMERALPIN);
+ }
+
+ bool has_ephemeral_policy() const {
+ return get_inode()->export_ephemeral_random_pin > 0.0 ||
+ get_inode()->export_ephemeral_distributed_pin;
+ }
+ bool is_ephemerally_pinned() const {
+ return state_test(STATE_DISTEPHEMERALPIN) ||
+ state_test(STATE_RANDEPHEMERALPIN);
+ }
+
+ void print(std::ostream& out) const override;
+ void dump(ceph::Formatter *f, int flags = DUMP_DEFAULT) const;
+
+ /**
+ * Validate that the on-disk state of an inode matches what
+ * we expect from our memory state. Currently this checks that:
+ * 1) The backtrace associated with the file data exists and is correct
+ * 2) For directories, the actual inode metadata matches our memory state,
+ * 3) For directories, the rstats match
+ *
+ * @param results A freshly-created validated_data struct, with values set
+ * as described in the struct documentation.
+ * @param mdr The request to be responeded upon the completion of the
+ * validation (or NULL)
+ * @param fin Context to call back on completion (or NULL)
+ */
+ void validate_disk_state(validated_data *results,
+ MDSContext *fin);
+ static void dump_validation_results(const validated_data& results,
+ ceph::Formatter *f);
+
+ //bool hack_accessed = false;
+ //utime_t hack_load_stamp;
+
+ MDCache *mdcache;
+
+ SnapRealm *snaprealm = nullptr;
+ SnapRealm *containing_realm = nullptr;
+ snapid_t first, last;
+ mempool::mds_co::compact_set<snapid_t> dirty_old_rstats;
+
+ uint64_t last_journaled = 0; // log offset for the last time i was journaled
+ //loff_t last_open_journaled; // log offset for the last journaled EOpen
+ utime_t last_dirstat_prop;
+
+ // list item node for when we have unpropagated rstat data
+ elist<CInode*>::item dirty_rstat_item;
+
+ mempool::mds_co::set<client_t> client_snap_caps;
+ mempool::mds_co::compact_map<snapid_t, mempool::mds_co::set<client_t> > client_need_snapflush;
+
+ // LogSegment lists i (may) belong to
+ elist<CInode*>::item item_dirty;
+ elist<CInode*>::item item_caps;
+ elist<CInode*>::item item_open_file;
+ elist<CInode*>::item item_dirty_parent;
+ elist<CInode*>::item item_dirty_dirfrag_dir;
+ elist<CInode*>::item item_dirty_dirfrag_nest;
+ elist<CInode*>::item item_dirty_dirfrag_dirfragtree;
+
+ // also update RecoveryQueue::RecoveryQueue() if you change this
+ elist<CInode*>::item& item_recover_queue = item_dirty_dirfrag_dir;
+ elist<CInode*>::item& item_recover_queue_front = item_dirty_dirfrag_nest;
+
+ inode_load_vec_t pop;
+ elist<CInode*>::item item_pop_lru;
+
+ // -- locks --
+ static LockType versionlock_type;
+ static LockType authlock_type;
+ static LockType linklock_type;
+ static LockType dirfragtreelock_type;
+ static LockType filelock_type;
+ static LockType xattrlock_type;
+ static LockType snaplock_type;
+ static LockType nestlock_type;
+ static LockType flocklock_type;
+ static LockType policylock_type;
+
+ // FIXME not part of mempool
+ LocalLockC versionlock;
+ SimpleLock authlock;
+ SimpleLock linklock;
+ ScatterLock dirfragtreelock;
+ ScatterLock filelock;
+ SimpleLock xattrlock;
+ SimpleLock snaplock;
+ ScatterLock nestlock;
+ SimpleLock flocklock;
+ SimpleLock policylock;
+
+ // -- caps -- (new)
+ // client caps
+ client_t loner_cap = -1, want_loner_cap = -1;
+
+protected:
+ ceph_lock_state_t *get_fcntl_lock_state() {
+ if (!fcntl_locks)
+ fcntl_locks = new ceph_lock_state_t(g_ceph_context, CEPH_LOCK_FCNTL);
+ return fcntl_locks;
+ }
+ void clear_fcntl_lock_state() {
+ delete fcntl_locks;
+ fcntl_locks = NULL;
+ }
+ ceph_lock_state_t *get_flock_lock_state() {
+ if (!flock_locks)
+ flock_locks = new ceph_lock_state_t(g_ceph_context, CEPH_LOCK_FLOCK);
+ return flock_locks;
+ }
+ void clear_flock_lock_state() {
+ delete flock_locks;
+ flock_locks = NULL;
+ }
+ void clear_file_locks() {
+ clear_fcntl_lock_state();
+ clear_flock_lock_state();
+ }
+ void _encode_file_locks(ceph::buffer::list& bl) const {
+ using ceph::encode;
+ bool has_fcntl_locks = fcntl_locks && !fcntl_locks->empty();
+ encode(has_fcntl_locks, bl);
+ if (has_fcntl_locks)
+ encode(*fcntl_locks, bl);
+ bool has_flock_locks = flock_locks && !flock_locks->empty();
+ encode(has_flock_locks, bl);
+ if (has_flock_locks)
+ encode(*flock_locks, bl);
+ }
+ void _decode_file_locks(ceph::buffer::list::const_iterator& p) {
+ using ceph::decode;
+ bool has_fcntl_locks;
+ decode(has_fcntl_locks, p);
+ if (has_fcntl_locks)
+ decode(*get_fcntl_lock_state(), p);
+ else
+ clear_fcntl_lock_state();
+ bool has_flock_locks;
+ decode(has_flock_locks, p);
+ if (has_flock_locks)
+ decode(*get_flock_lock_state(), p);
+ else
+ clear_flock_lock_state();
+ }
+
+ /**
+ * Return the pool ID where we currently write backtraces for
+ * this inode (in addition to inode.old_pools)
+ *
+ * @returns a pool ID >=0
+ */
+ int64_t get_backtrace_pool() const;
+
+ // parent dentries in cache
+ CDentry *parent = nullptr; // primary link
+ mempool::mds_co::compact_set<CDentry*> remote_parents; // if hard linked
+
+ mempool::mds_co::list<CDentry*> projected_parent; // for in-progress rename, (un)link, etc.
+
+ mds_authority_t inode_auth = CDIR_AUTH_DEFAULT;
+
+ // -- distributed state --
+ // file capabilities
+ mempool_cap_map client_caps; // client -> caps
+ mempool::mds_co::compact_map<int32_t, int32_t> mds_caps_wanted; // [auth] mds -> caps wanted
+ int replica_caps_wanted = 0; // [replica] what i've requested from auth
+ int num_caps_notable = 0;
+
+ ceph_lock_state_t *fcntl_locks = nullptr;
+ ceph_lock_state_t *flock_locks = nullptr;
+
+ // -- waiting --
+ mempool::mds_co::compact_map<frag_t, MDSContext::vec > waiting_on_dir;
+
+
+ // -- freezing inode --
+ int auth_pin_freeze_allowance = 0;
+ elist<CInode*>::item item_freezing_inode;
+ void maybe_finish_freeze_inode();
+private:
+
+ friend class ValidationContinuation;
+
+ /**
+ * Create a scrub_info_t struct for the scrub_infop pointer.
+ */
+ void scrub_info_create() const;
+ /**
+ * Delete the scrub_info_t struct if it's not got any useful data
+ */
+ void scrub_maybe_delete_info();
+
+ void pop_projected_snaprealm(sr_t *next_snaprealm, bool early);
+
+ bool _validate_disk_state(class ValidationContinuation *c,
+ int rval, int stage);
+
+ struct projected_const_node {
+ inode_const_ptr inode;
+ xattr_map_const_ptr xattrs;
+ sr_t *snapnode;
+
+ projected_const_node() = delete;
+ projected_const_node(projected_const_node&&) = default;
+ explicit projected_const_node(const inode_const_ptr& i, const xattr_map_const_ptr& x, sr_t *s) :
+ inode(i), xattrs(x), snapnode(s) {}
+ };
+
+ mempool::mds_co::list<projected_const_node> projected_nodes; // projected values (only defined while dirty)
+ size_t num_projected_srnodes = 0;
+
+ // -- cache infrastructure --
+ mempool::mds_co::compact_map<frag_t,CDir*> dirfrags; // cached dir fragments under this Inode
+
+ //for the purpose of quickly determining whether there's a subtree root or exporting dir
+ int num_subtree_roots = 0;
+ int num_exporting_dirs = 0;
+
+ int stickydir_ref = 0;
+ std::unique_ptr<scrub_info_t> scrub_infop;
+ /** @} Scrubbing and fsck */
+};
+
+std::ostream& operator<<(std::ostream& out, const CInode& in);
+
+extern cinode_lock_info_t cinode_lock_info[];
+extern int num_cinode_locks;
+#undef dout_context
+#endif