summaryrefslogtreecommitdiffstats
path: root/src/mds/Capability.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/mds/Capability.h')
-rw-r--r--src/mds/Capability.h406
1 files changed, 406 insertions, 0 deletions
diff --git a/src/mds/Capability.h b/src/mds/Capability.h
new file mode 100644
index 00000000..a54f013c
--- /dev/null
+++ b/src/mds/Capability.h
@@ -0,0 +1,406 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+
+#ifndef CEPH_CAPABILITY_H
+#define CEPH_CAPABILITY_H
+
+#include "include/buffer_fwd.h"
+#include "include/counter.h"
+#include "include/mempool.h"
+#include "include/xlist.h"
+
+#include "common/config.h"
+
+#include "mdstypes.h"
+
+
+/*
+
+ Capability protocol notes.
+
+- two types of cap events from mds -> client:
+ - cap "issue" in a MClientReply, or an MClientCaps IMPORT op.
+ - cap "update" (revocation or grant) .. an MClientCaps message.
+- if client has cap, the mds should have it too.
+
+- if client has no dirty data, it can release it without waiting for an mds ack.
+ - client may thus get a cap _update_ and not have the cap. ignore it.
+
+- mds should track seq of last issue. any release
+ attempt will only succeed if the client has seen the latest.
+
+- a UPDATE updates the clients issued caps, wanted, etc. it may also flush dirty metadata.
+ - 'caps' are which caps the client retains.
+ - if 0, client wishes to release the cap
+ - 'wanted' is which caps the client wants.
+ - 'dirty' is which metadata is to be written.
+ - client gets a FLUSH_ACK with matching dirty flags indicating which caps were written.
+
+- a FLUSH_ACK acks a FLUSH.
+ - 'dirty' is the _original_ FLUSH's dirty (i.e., which metadata was written back)
+ - 'seq' is the _original_ FLUSH's seq.
+ - 'caps' is the _original_ FLUSH's caps (not actually important)
+ - client can conclude that (dirty & ~caps) bits were successfully cleaned.
+
+- a FLUSHSNAP flushes snapshot metadata.
+ - 'dirty' indicates which caps, were dirty, if any.
+ - mds writes metadata. if dirty!=0, replies with FLUSHSNAP_ACK.
+
+ */
+
+class CInode;
+class Session;
+
+namespace ceph {
+ class Formatter;
+}
+
+class Capability : public Counter<Capability> {
+public:
+ MEMPOOL_CLASS_HELPERS();
+
+ struct Export {
+ int64_t cap_id = 0;
+ int32_t wanted = 0;
+ int32_t issued = 0;
+ int32_t pending = 0;
+ snapid_t client_follows;
+ ceph_seq_t seq = 0;
+ ceph_seq_t mseq = 0;
+ utime_t last_issue_stamp;
+ uint32_t state = 0;
+ Export() {}
+ Export(int64_t id, int w, int i, int p, snapid_t cf,
+ ceph_seq_t s, ceph_seq_t m, utime_t lis, unsigned st) :
+ cap_id(id), wanted(w), issued(i), pending(p), client_follows(cf),
+ seq(s), mseq(m), last_issue_stamp(lis), state(st) {}
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::const_iterator &p);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<Export*>& ls);
+ };
+ struct Import {
+ int64_t cap_id;
+ ceph_seq_t issue_seq;
+ ceph_seq_t mseq;
+ Import() : cap_id(0), issue_seq(0), mseq(0) {}
+ Import(int64_t i, ceph_seq_t s, ceph_seq_t m) : cap_id(i), issue_seq(s), mseq(m) {}
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::const_iterator &p);
+ void dump(Formatter *f) const;
+ };
+ struct revoke_info {
+ __u32 before;
+ ceph_seq_t seq, last_issue;
+ revoke_info() : before(0), seq(0), last_issue(0) {}
+ revoke_info(__u32 b, ceph_seq_t s, ceph_seq_t li) : before(b), seq(s), last_issue(li) {}
+ void encode(bufferlist& bl) const;
+ void decode(bufferlist::const_iterator& bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<revoke_info*>& ls);
+ };
+
+ const static unsigned STATE_NOTABLE = (1<<0);
+ const static unsigned STATE_NEW = (1<<1);
+ const static unsigned STATE_IMPORTING = (1<<2);
+ const static unsigned STATE_NEEDSNAPFLUSH = (1<<3);
+ const static unsigned STATE_CLIENTWRITEABLE = (1<<4);
+ const static unsigned STATE_NOINLINE = (1<<5);
+ const static unsigned STATE_NOPOOLNS = (1<<6);
+ const static unsigned STATE_NOQUOTA = (1<<7);
+
+ const static unsigned MASK_STATE_EXPORTED =
+ (STATE_CLIENTWRITEABLE | STATE_NOINLINE | STATE_NOPOOLNS | STATE_NOQUOTA);
+
+ Capability(CInode *i=nullptr, Session *s=nullptr, uint64_t id=0);
+ Capability(const Capability& other) = delete;
+
+ const Capability& operator=(const Capability& other) = delete;
+
+ int pending() const {
+ return _pending;
+ }
+ int issued() const {
+ return _issued;
+ }
+ int revoking() const {
+ return _issued & ~_pending;
+ }
+ ceph_seq_t issue(unsigned c, bool reval=false) {
+ if (reval)
+ revalidate();
+
+ if (_pending & ~c) {
+ // revoking (and maybe adding) bits. note caps prior to this revocation
+ _revokes.emplace_back(_pending, last_sent, last_issue);
+ _pending = c;
+ _issued |= c;
+ if (!is_notable())
+ mark_notable();
+ } else if (~_pending & c) {
+ // adding bits only. remove obsolete revocations?
+ _pending |= c;
+ _issued |= c;
+ // drop old _revokes with no bits we don't have
+ while (!_revokes.empty() &&
+ (_revokes.back().before & ~_pending) == 0)
+ _revokes.pop_back();
+ } else {
+ // no change.
+ ceph_assert(_pending == c);
+ }
+ //last_issue =
+ inc_last_seq();
+ return last_sent;
+ }
+ ceph_seq_t issue_norevoke(unsigned c, bool reval=false) {
+ if (reval)
+ revalidate();
+
+ _pending |= c;
+ _issued |= c;
+ clear_new();
+
+ inc_last_seq();
+ return last_sent;
+ }
+ void confirm_receipt(ceph_seq_t seq, unsigned caps) {
+ bool was_revoking = (_issued & ~_pending);
+ if (seq == last_sent) {
+ _revokes.clear();
+ _issued = caps;
+ // don't add bits
+ _pending &= caps;
+ } else {
+ // can i forget any revocations?
+ while (!_revokes.empty() && _revokes.front().seq < seq)
+ _revokes.pop_front();
+ if (!_revokes.empty()) {
+ if (_revokes.front().seq == seq)
+ _revokes.begin()->before = caps;
+ calc_issued();
+ } else {
+ // seq < last_sent
+ _issued = caps | _pending;
+ }
+ }
+
+ if (was_revoking && _issued == _pending) {
+ item_revoking_caps.remove_myself();
+ item_client_revoking_caps.remove_myself();
+ maybe_clear_notable();
+ }
+ //check_rdcaps_list();
+ }
+ // we may get a release racing with revocations, which means our revokes will be ignored
+ // by the client. clean them out of our _revokes history so we don't wait on them.
+ void clean_revoke_from(ceph_seq_t li) {
+ bool changed = false;
+ while (!_revokes.empty() && _revokes.front().last_issue <= li) {
+ _revokes.pop_front();
+ changed = true;
+ }
+ if (changed) {
+ bool was_revoking = (_issued & ~_pending);
+ calc_issued();
+ if (was_revoking && _issued == _pending) {
+ item_revoking_caps.remove_myself();
+ item_client_revoking_caps.remove_myself();
+ maybe_clear_notable();
+ }
+ }
+ }
+ ceph_seq_t get_mseq() const { return mseq; }
+ void inc_mseq() { mseq++; }
+
+ utime_t get_last_issue_stamp() const { return last_issue_stamp; }
+ utime_t get_last_revoke_stamp() const { return last_revoke_stamp; }
+
+ void set_last_issue() { last_issue = last_sent; }
+ void set_last_issue_stamp(utime_t t) { last_issue_stamp = t; }
+ void set_last_revoke_stamp(utime_t t) { last_revoke_stamp = t; }
+ void reset_num_revoke_warnings() { num_revoke_warnings = 0; }
+ void inc_num_revoke_warnings() { ++num_revoke_warnings; }
+ unsigned get_num_revoke_warnings() const { return num_revoke_warnings; }
+
+ void set_cap_id(uint64_t i) { cap_id = i; }
+ uint64_t get_cap_id() const { return cap_id; }
+
+ //ceph_seq_t get_last_issue() { return last_issue; }
+
+ bool is_suppress() const { return suppress > 0; }
+ void inc_suppress() { suppress++; }
+ void dec_suppress() { suppress--; }
+
+ static bool is_wanted_notable(int wanted) {
+ return wanted & (CEPH_CAP_ANY_WR|CEPH_CAP_FILE_WR|CEPH_CAP_FILE_RD);
+ }
+ bool is_notable() const { return state & STATE_NOTABLE; }
+
+ bool is_stale() const;
+ bool is_valid() const;
+ bool is_new() const { return state & STATE_NEW; }
+ void mark_new() { state |= STATE_NEW; }
+ void clear_new() { state &= ~STATE_NEW; }
+ bool is_importing() const { return state & STATE_IMPORTING; }
+ void mark_importing() { state |= STATE_IMPORTING; }
+ void clear_importing() { state &= ~STATE_IMPORTING; }
+ bool need_snapflush() const { return state & STATE_NEEDSNAPFLUSH; }
+ void mark_needsnapflush() { state |= STATE_NEEDSNAPFLUSH; }
+ void clear_needsnapflush() { state &= ~STATE_NEEDSNAPFLUSH; }
+
+ bool is_clientwriteable() const { return state & STATE_CLIENTWRITEABLE; }
+ void mark_clientwriteable() {
+ if (!is_clientwriteable()) {
+ state |= STATE_CLIENTWRITEABLE;
+ if (!is_notable())
+ mark_notable();
+ }
+ }
+ void clear_clientwriteable() {
+ if (is_clientwriteable()) {
+ state &= ~STATE_CLIENTWRITEABLE;
+ maybe_clear_notable();
+ }
+ }
+
+ bool is_noinline() const { return state & STATE_NOINLINE; }
+ bool is_nopoolns() const { return state & STATE_NOPOOLNS; }
+ bool is_noquota() const { return state & STATE_NOQUOTA; }
+
+ CInode *get_inode() const { return inode; }
+ Session *get_session() const { return session; }
+ client_t get_client() const;
+
+ // caps this client wants to hold
+ int wanted() const { return _wanted; }
+ void set_wanted(int w);
+
+ void inc_last_seq() { last_sent++; }
+ ceph_seq_t get_last_seq() const {
+ return last_sent;
+ }
+ ceph_seq_t get_last_issue() const { return last_issue; }
+
+ void reset_seq() {
+ last_sent = 0;
+ last_issue = 0;
+ }
+
+ // -- exports --
+ Export make_export() const {
+ return Export(cap_id, wanted(), issued(), pending(), client_follows, get_last_seq(), mseq+1, last_issue_stamp, state);
+ }
+ void merge(const Export& other, bool auth_cap) {
+ // issued + pending
+ int newpending = other.pending | pending();
+ if (other.issued & ~newpending)
+ issue(other.issued | newpending);
+ else
+ issue(newpending);
+ last_issue_stamp = other.last_issue_stamp;
+
+ client_follows = other.client_follows;
+
+ state |= other.state & MASK_STATE_EXPORTED;
+ if ((other.state & STATE_CLIENTWRITEABLE) && !is_notable())
+ mark_notable();
+
+ // wanted
+ set_wanted(wanted() | other.wanted);
+ if (auth_cap)
+ mseq = other.mseq;
+ }
+ void merge(int otherwanted, int otherissued) {
+ // issued + pending
+ int newpending = pending();
+ if (otherissued & ~newpending)
+ issue(otherissued | newpending);
+ else
+ issue(newpending);
+
+ // wanted
+ set_wanted(wanted() | otherwanted);
+ }
+
+ void revoke() {
+ if (revoking())
+ confirm_receipt(last_sent, pending());
+ }
+
+ // serializers
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::const_iterator &bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<Capability*>& ls);
+
+ snapid_t client_follows;
+ version_t client_xattr_version;
+ version_t client_inline_version;
+ int64_t last_rbytes;
+ int64_t last_rsize;
+
+ xlist<Capability*>::item item_session_caps;
+ xlist<Capability*>::item item_snaprealm_caps;
+ xlist<Capability*>::item item_revoking_caps;
+ xlist<Capability*>::item item_client_revoking_caps;
+
+private:
+ CInode *inode;
+ Session *session;
+
+ uint64_t cap_id;
+ uint32_t cap_gen;
+
+ __u32 _wanted; // what the client wants (ideally)
+
+ utime_t last_issue_stamp;
+ utime_t last_revoke_stamp;
+ unsigned num_revoke_warnings;
+
+ // track in-flight caps --------------
+ // - add new caps to _pending
+ // - track revocations in _revokes list
+ __u32 _pending, _issued;
+ mempool::mds_co::list<revoke_info> _revokes;
+
+ ceph_seq_t last_sent;
+ ceph_seq_t last_issue;
+ ceph_seq_t mseq;
+
+ int suppress;
+ unsigned state;
+
+ void calc_issued() {
+ _issued = _pending;
+ for (const auto &r : _revokes) {
+ _issued |= r.before;
+ }
+ }
+
+ void revalidate();
+
+ void mark_notable();
+ void maybe_clear_notable();
+};
+
+WRITE_CLASS_ENCODER(Capability::Export)
+WRITE_CLASS_ENCODER(Capability::Import)
+WRITE_CLASS_ENCODER(Capability::revoke_info)
+WRITE_CLASS_ENCODER(Capability)
+
+
+
+#endif