diff options
Diffstat (limited to 'src/client/RWRef.h')
-rw-r--r-- | src/client/RWRef.h | 244 |
1 files changed, 244 insertions, 0 deletions
diff --git a/src/client/RWRef.h b/src/client/RWRef.h new file mode 100644 index 000000000..9035a0937 --- /dev/null +++ b/src/client/RWRef.h @@ -0,0 +1,244 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2020 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + * ============ + * + * This is a common read/write reference framework, which will work + * simliarly to a RW lock, the difference here is that for the "readers" + * they won't hold any lock but will increase a reference instead when + * the "require" state is matched, or set a flag to tell the callers + * that the "require" state is not matched and also there is no any + * wait mechanism for "readers" to wait the state until it matches. It + * will let the callers determine what to do next. + * + * The usage, such as in libcephfs's client/Client.cc case: + * + * The Readers: + * + * For the ll_read()/ll_write(), etc fucntions, they will work as + * "readers", in the beginning they just need to define a RWRef + * object and in RWRef constructor it will check if the state is + * MOUNTED or MOUTING, if not it will fail and return directly with + * doing nothing, or it will increase the reference and continue. + * And when destructing the RWRef object, in the RWRef destructor + * it will decrease the reference and notify the "writers" who maybe + * waiting. + * + * The Writers: + * + * And for the _unmount() function , as a "writer", in the beginning + * it will also just need to define a RWRef object and in RWRef + * constructor it will update the state to next stage first, which then + * will fail all the new comming "readers", and then wait for all the + * "readers" to finish. + * + * With this we can get rid of the locks for all the "readers" and they + * can run in parallel. And we won't have any potential deadlock issue + * with RWRef, such as: + * + * With RWLock: + * + * ThreadA: ThreadB: + * + * write_lock<RWLock1>.lock(); another_lock.lock(); + * state = NEXT_STATE; ... + * another_lock.lock(); read_lock<RWLock1>.lock(); + * ... if (state == STATE) { + * ... + * } + * ... + * + * With RWRef: + * + * ThreadA: ThreadB: + * + * w = RWRef(myS, NEXT_STATE, false); another_lock.lock(); + * another_lock.lock(); r = RWRef(myS, STATE); + * ... if (r.is_state_satisfied()) { + * ... + * } + * ... + * + * And also in ThreadA, if it needs to do the cond.wait(&another_lock), + * it will goto sleep by holding the write_lock<RWLock1> for the RWLock + * case, if the ThreadBs are for some IOs, they may stuck for a very long + * time that may get timedout in the uplayer which may keep retrying. + * With the RWRef, the ThreadB will fail or continue directly without any + * stuck, and the uplayer will knew what next to do quickly. + */ + +#ifndef CEPH_RWRef_Posix__H +#define CEPH_RWRef_Posix__H + +#include <string> +#include "include/ceph_assert.h" +#include "common/ceph_mutex.h" + +/* The status mechanism info */ +template<typename T> +struct RWRefState { + public: + template <typename T1> friend class RWRef; + + /* + * This will be status mechanism. Currently you need to define + * it by yourself. + */ + T state; + + /* + * User defined method to check whether the "require" state + * is in the proper range we need. + * + * For example for the client/Client.cc: + * In some reader operation cases we need to make sure the + * client state is in mounting or mounted states, then it + * will set the "require = mounting" in class RWRef's constructor. + * Then the check_reader_state() should return truth if the + * state is already in mouting or mounted state. + */ + virtual int check_reader_state(T require) const = 0; + + /* + * User defined method to check whether the "require" state + * is in the proper range we need. + * + * This will usually be the state migration check. + */ + virtual int check_writer_state(T require) const = 0; + + /* + * User defined method to check whether the "require" + * state is valid or not. + */ + virtual bool is_valid_state(T require) const = 0; + + int64_t get_state() const { + std::scoped_lock l{lock}; + return state; + } + + bool check_current_state(T require) const { + ceph_assert(is_valid_state(require)); + + std::scoped_lock l{lock}; + return state == require; + } + + RWRefState(T init_state, const char *lockname, uint64_t _reader_cnt=0) + : state(init_state), lock(ceph::make_mutex(lockname)), reader_cnt(_reader_cnt) {} + virtual ~RWRefState() {} + + private: + mutable ceph::mutex lock; + ceph::condition_variable cond; + uint64_t reader_cnt = 0; +}; + +template<typename T> +class RWRef { +public: + RWRef(const RWRef& other) = delete; + const RWRef& operator=(const RWRef& other) = delete; + + RWRef(RWRefState<T> &s, T require, bool ir=true) + :S(s), is_reader(ir) { + ceph_assert(S.is_valid_state(require)); + + std::scoped_lock l{S.lock}; + if (likely(is_reader)) { // Readers will update the reader_cnt + if (S.check_reader_state(require)) { + S.reader_cnt++; + satisfied = true; + } + } else { // Writers will update the state + is_reader = false; + + /* + * If the current state is not the same as "require" + * then update the state and we are the first writer. + * + * Or if there already has one writer running or + * finished, it will let user to choose to continue + * or just break. + */ + if (S.check_writer_state(require)) { + first_writer = true; + S.state = require; + } + satisfied = true; + } + } + + /* + * Whether the "require" state is in the proper range of + * the states. + */ + bool is_state_satisfied() const { + return satisfied; + } + + /* + * Update the state, and only the writer could do the update. + */ + void update_state(T new_state) { + ceph_assert(!is_reader); + ceph_assert(S.is_valid_state(new_state)); + + std::scoped_lock l{S.lock}; + S.state = new_state; + } + + /* + * For current state whether we are the first writer or not + */ + bool is_first_writer() const { + return first_writer; + } + + /* + * Will wait for all the in-flight "readers" to finish + */ + void wait_readers_done() { + // Only writers can wait + ceph_assert(!is_reader); + + std::unique_lock l{S.lock}; + + S.cond.wait(l, [this] { + return !S.reader_cnt; + }); + } + + ~RWRef() { + std::scoped_lock l{S.lock}; + if (!is_reader) + return; + + if (!satisfied) + return; + + /* + * Decrease the refcnt and notify the waiters + */ + if (--S.reader_cnt == 0) + S.cond.notify_all(); + } + +private: + RWRefState<T> &S; + bool satisfied = false; + bool first_writer = false; + bool is_reader = true; +}; + +#endif // !CEPH_RWRef_Posix__H |