summaryrefslogtreecommitdiffstats
path: root/src/client/RWRef.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/client/RWRef.h244
1 files changed, 244 insertions, 0 deletions
diff --git a/src/client/RWRef.h b/src/client/RWRef.h
new file mode 100644
index 000000000..9035a0937
--- /dev/null
+++ b/src/client/RWRef.h
@@ -0,0 +1,244 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2020 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ * ============
+ *
+ * This is a common read/write reference framework, which will work
+ * simliarly to a RW lock, the difference here is that for the "readers"
+ * they won't hold any lock but will increase a reference instead when
+ * the "require" state is matched, or set a flag to tell the callers
+ * that the "require" state is not matched and also there is no any
+ * wait mechanism for "readers" to wait the state until it matches. It
+ * will let the callers determine what to do next.
+ *
+ * The usage, such as in libcephfs's client/Client.cc case:
+ *
+ * The Readers:
+ *
+ * For the ll_read()/ll_write(), etc fucntions, they will work as
+ * "readers", in the beginning they just need to define a RWRef
+ * object and in RWRef constructor it will check if the state is
+ * MOUNTED or MOUTING, if not it will fail and return directly with
+ * doing nothing, or it will increase the reference and continue.
+ * And when destructing the RWRef object, in the RWRef destructor
+ * it will decrease the reference and notify the "writers" who maybe
+ * waiting.
+ *
+ * The Writers:
+ *
+ * And for the _unmount() function , as a "writer", in the beginning
+ * it will also just need to define a RWRef object and in RWRef
+ * constructor it will update the state to next stage first, which then
+ * will fail all the new comming "readers", and then wait for all the
+ * "readers" to finish.
+ *
+ * With this we can get rid of the locks for all the "readers" and they
+ * can run in parallel. And we won't have any potential deadlock issue
+ * with RWRef, such as:
+ *
+ * With RWLock:
+ *
+ * ThreadA: ThreadB:
+ *
+ * write_lock<RWLock1>.lock(); another_lock.lock();
+ * state = NEXT_STATE; ...
+ * another_lock.lock(); read_lock<RWLock1>.lock();
+ * ... if (state == STATE) {
+ * ...
+ * }
+ * ...
+ *
+ * With RWRef:
+ *
+ * ThreadA: ThreadB:
+ *
+ * w = RWRef(myS, NEXT_STATE, false); another_lock.lock();
+ * another_lock.lock(); r = RWRef(myS, STATE);
+ * ... if (r.is_state_satisfied()) {
+ * ...
+ * }
+ * ...
+ *
+ * And also in ThreadA, if it needs to do the cond.wait(&another_lock),
+ * it will goto sleep by holding the write_lock<RWLock1> for the RWLock
+ * case, if the ThreadBs are for some IOs, they may stuck for a very long
+ * time that may get timedout in the uplayer which may keep retrying.
+ * With the RWRef, the ThreadB will fail or continue directly without any
+ * stuck, and the uplayer will knew what next to do quickly.
+ */
+
+#ifndef CEPH_RWRef_Posix__H
+#define CEPH_RWRef_Posix__H
+
+#include <string>
+#include "include/ceph_assert.h"
+#include "common/ceph_mutex.h"
+
+/* The status mechanism info */
+template<typename T>
+struct RWRefState {
+ public:
+ template <typename T1> friend class RWRef;
+
+ /*
+ * This will be status mechanism. Currently you need to define
+ * it by yourself.
+ */
+ T state;
+
+ /*
+ * User defined method to check whether the "require" state
+ * is in the proper range we need.
+ *
+ * For example for the client/Client.cc:
+ * In some reader operation cases we need to make sure the
+ * client state is in mounting or mounted states, then it
+ * will set the "require = mounting" in class RWRef's constructor.
+ * Then the check_reader_state() should return truth if the
+ * state is already in mouting or mounted state.
+ */
+ virtual int check_reader_state(T require) const = 0;
+
+ /*
+ * User defined method to check whether the "require" state
+ * is in the proper range we need.
+ *
+ * This will usually be the state migration check.
+ */
+ virtual int check_writer_state(T require) const = 0;
+
+ /*
+ * User defined method to check whether the "require"
+ * state is valid or not.
+ */
+ virtual bool is_valid_state(T require) const = 0;
+
+ int64_t get_state() const {
+ std::scoped_lock l{lock};
+ return state;
+ }
+
+ bool check_current_state(T require) const {
+ ceph_assert(is_valid_state(require));
+
+ std::scoped_lock l{lock};
+ return state == require;
+ }
+
+ RWRefState(T init_state, const char *lockname, uint64_t _reader_cnt=0)
+ : state(init_state), lock(ceph::make_mutex(lockname)), reader_cnt(_reader_cnt) {}
+ virtual ~RWRefState() {}
+
+ private:
+ mutable ceph::mutex lock;
+ ceph::condition_variable cond;
+ uint64_t reader_cnt = 0;
+};
+
+template<typename T>
+class RWRef {
+public:
+ RWRef(const RWRef& other) = delete;
+ const RWRef& operator=(const RWRef& other) = delete;
+
+ RWRef(RWRefState<T> &s, T require, bool ir=true)
+ :S(s), is_reader(ir) {
+ ceph_assert(S.is_valid_state(require));
+
+ std::scoped_lock l{S.lock};
+ if (likely(is_reader)) { // Readers will update the reader_cnt
+ if (S.check_reader_state(require)) {
+ S.reader_cnt++;
+ satisfied = true;
+ }
+ } else { // Writers will update the state
+ is_reader = false;
+
+ /*
+ * If the current state is not the same as "require"
+ * then update the state and we are the first writer.
+ *
+ * Or if there already has one writer running or
+ * finished, it will let user to choose to continue
+ * or just break.
+ */
+ if (S.check_writer_state(require)) {
+ first_writer = true;
+ S.state = require;
+ }
+ satisfied = true;
+ }
+ }
+
+ /*
+ * Whether the "require" state is in the proper range of
+ * the states.
+ */
+ bool is_state_satisfied() const {
+ return satisfied;
+ }
+
+ /*
+ * Update the state, and only the writer could do the update.
+ */
+ void update_state(T new_state) {
+ ceph_assert(!is_reader);
+ ceph_assert(S.is_valid_state(new_state));
+
+ std::scoped_lock l{S.lock};
+ S.state = new_state;
+ }
+
+ /*
+ * For current state whether we are the first writer or not
+ */
+ bool is_first_writer() const {
+ return first_writer;
+ }
+
+ /*
+ * Will wait for all the in-flight "readers" to finish
+ */
+ void wait_readers_done() {
+ // Only writers can wait
+ ceph_assert(!is_reader);
+
+ std::unique_lock l{S.lock};
+
+ S.cond.wait(l, [this] {
+ return !S.reader_cnt;
+ });
+ }
+
+ ~RWRef() {
+ std::scoped_lock l{S.lock};
+ if (!is_reader)
+ return;
+
+ if (!satisfied)
+ return;
+
+ /*
+ * Decrease the refcnt and notify the waiters
+ */
+ if (--S.reader_cnt == 0)
+ S.cond.notify_all();
+ }
+
+private:
+ RWRefState<T> &S;
+ bool satisfied = false;
+ bool first_writer = false;
+ bool is_reader = true;
+};
+
+#endif // !CEPH_RWRef_Posix__H