1 files changed, 751 insertions, 0 deletions
diff --git a/src/common/WorkQueue.h b/src/common/WorkQueue.h
new file mode 100644
index 00000000..5259f92d
--- /dev/null
+++ b/src/common/WorkQueue.h
@@ -0,0 +1,751 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- 
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software 
+ * Foundation.  See file COPYING.
+ * 
+ */
+
+#ifndef CEPH_WORKQUEUE_H
+#define CEPH_WORKQUEUE_H
+
+#ifdef WITH_SEASTAR
+// for ObjectStore.h
+struct ThreadPool {
+  struct TPHandle {
+  };
+};
+
+#else
+
+#include <atomic>
+#include <list>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "common/ceph_mutex.h"
+#include "include/unordered_map.h"
+#include "common/config_obs.h"
+#include "common/HeartbeatMap.h"
+#include "common/Thread.h"
+#include "include/Context.h"
+
+class CephContext;
+
+/// Pool of threads that share work submitted to multiple work queues.
+class ThreadPool : public md_config_obs_t {
+protected:
+  CephContext *cct;
+  std::string name;
+  std::string thread_name;
+  std::string lockname;
+  ceph::mutex _lock;
+  ceph::condition_variable _cond;
+  bool _stop;
+  int _pause;
+  int _draining;
+  ceph::condition_variable _wait_cond;
+
+public:
+  class TPHandle {
+    friend class ThreadPool;
+    CephContext *cct;
+    heartbeat_handle_d *hb;
+    ceph::coarse_mono_clock::rep grace;
+    ceph::coarse_mono_clock::rep suicide_grace;
+  public:
+    TPHandle(
+      CephContext *cct,
+      heartbeat_handle_d *hb,
+      time_t grace,
+      time_t suicide_grace)
+      : cct(cct), hb(hb), grace(grace), suicide_grace(suicide_grace) {}
+    void reset_tp_timeout();
+    void suspend_tp_timeout();
+  };
+protected:
+
+  /// Basic interface to a work queue used by the worker threads.
+  struct WorkQueue_ {
+    std::string name;
+    time_t timeout_interval, suicide_interval;
+    WorkQueue_(std::string n, time_t ti, time_t sti)
+      : name(std::move(n)), timeout_interval(ti), suicide_interval(sti)
+    { }
+    virtual ~WorkQueue_() {}
+    /// Remove all work items from the queue.
+    virtual void _clear() = 0;
+    /// Check whether there is anything to do.
+    virtual bool _empty() = 0;
+    /// Get the next work item to process.
+    virtual void *_void_dequeue() = 0;
+    /** @brief Process the work item.
+     * This function will be called several times in parallel
+     * and must therefore be thread-safe. */
+    virtual void _void_process(void *item, TPHandle &handle) = 0;
+    /** @brief Synchronously finish processing a work item.
+     * This function is called after _void_process with the global thread pool lock held,
+     * so at most one copy will execute simultaneously for a given thread pool.
+     * It can be used for non-thread-safe finalization. */
+    virtual void _void_process_finish(void *) = 0;
+  };
+
+  // track thread pool size changes
+  unsigned _num_threads;
+  std::string _thread_num_option;
+  const char **_conf_keys;
+
+  const char **get_tracked_conf_keys() const override {
+    return _conf_keys;
+  }
+  void handle_conf_change(const ConfigProxy& conf,
+			  const std::set <std::string> &changed) override;
+
+public:
+  /** @brief Work queue that processes several submitted items at once.
+   * The queue will automatically add itself to the thread pool on construction
+   * and remove itself on destruction. */
+  template<class T>
+  class BatchWorkQueue : public WorkQueue_ {
+    ThreadPool *pool;
+
+    virtual bool _enqueue(T *) = 0;
+    virtual void _dequeue(T *) = 0;
+    virtual void _dequeue(std::list<T*> *) = 0;
+    virtual void _process_finish(const std::list<T*> &) {}
+
+    // virtual methods from WorkQueue_ below
+    void *_void_dequeue() override {
+      std::list<T*> *out(new std::list<T*>);
+      _dequeue(out);
+      if (!out->empty()) {
+	return (void *)out;
+      } else {
+	delete out;
+	return 0;
+      }
+    }
+    void _void_process(void *p, TPHandle &handle) override {
+      _process(*((std::list<T*>*)p), handle);
+    }
+    void _void_process_finish(void *p) override {
+      _process_finish(*(std::list<T*>*)p);
+      delete (std::list<T*> *)p;
+    }
+
+  protected:
+    virtual void _process(const std::list<T*> &items, TPHandle &handle) = 0;
+
+  public:
+    BatchWorkQueue(std::string n, time_t ti, time_t sti, ThreadPool* p)
+      : WorkQueue_(std::move(n), ti, sti), pool(p) {
+      pool->add_work_queue(this);
+    }
+    ~BatchWorkQueue() override {
+      pool->remove_work_queue(this);
+    }
+
+    bool queue(T *item) {
+      pool->_lock.lock();
+      bool r = _enqueue(item);
+      pool->_cond.notify_one();
+      pool->_lock.unlock();
+      return r;
+    }
+    void dequeue(T *item) {
+      pool->_lock.lock();
+      _dequeue(item);
+      pool->_lock.unlock();
+    }
+    void clear() {
+      pool->_lock.lock();
+      _clear();
+      pool->_lock.unlock();
+    }
+
+    void lock() {
+      pool->lock();
+    }
+    void unlock() {
+      pool->unlock();
+    }
+    void wake() {
+      pool->wake();
+    }
+    void _wake() {
+      pool->_wake();
+    }
+    void drain() {
+      pool->drain(this);
+    }
+
+  };
+
+  /** @brief Templated by-value work queue.
+   * Skeleton implementation of a queue that processes items submitted by value.
+   * This is useful if the items are single primitive values or very small objects
+   * (a few bytes). The queue will automatically add itself to the thread pool on
+   * construction and remove itself on destruction. */
+  template<typename T, typename U = T>
+  class WorkQueueVal : public WorkQueue_ {
+    ceph::mutex _lock = ceph::make_mutex("WorkQueueVal::_lock");
+    ThreadPool *pool;
+    std::list<U> to_process;
+    std::list<U> to_finish;
+    virtual void _enqueue(T) = 0;
+    virtual void _enqueue_front(T) = 0;
+    bool _empty() override = 0;
+    virtual U _dequeue() = 0;
+    virtual void _process_finish(U) {}
+
+    void *_void_dequeue() override {
+      {
+	std::lock_guard l(_lock);
+	if (_empty())
+	  return 0;
+	U u = _dequeue();
+	to_process.push_back(u);
+      }
+      return ((void*)1); // Not used
+    }
+    void _void_process(void *, TPHandle &handle) override {
+      _lock.lock();
+      ceph_assert(!to_process.empty());
+      U u = to_process.front();
+      to_process.pop_front();
+      _lock.unlock();
+
+      _process(u, handle);
+
+      _lock.lock();
+      to_finish.push_back(u);
+      _lock.unlock();
+    }
+
+    void _void_process_finish(void *) override {
+      _lock.lock();
+      ceph_assert(!to_finish.empty());
+      U u = to_finish.front();
+      to_finish.pop_front();
+      _lock.unlock();
+
+      _process_finish(u);
+    }
+
+    void _clear() override {}
+
+  public:
+    WorkQueueVal(std::string n, time_t ti, time_t sti, ThreadPool *p)
+      : WorkQueue_(std::move(n), ti, sti), pool(p) {
+      pool->add_work_queue(this);
+    }
+    ~WorkQueueVal() override {
+      pool->remove_work_queue(this);
+    }
+    void queue(T item) {
+      std::lock_guard l(pool->_lock);
+      _enqueue(item);
+      pool->_cond.notify_one();
+    }
+    void queue_front(T item) {
+      std::lock_guard l(pool->_lock);
+      _enqueue_front(item);
+      pool->_cond.notify_one();
+    }
+    void drain() {
+      pool->drain(this);
+    }
+  protected:
+    void lock() {
+      pool->lock();
+    }
+    void unlock() {
+      pool->unlock();
+    }
+    virtual void _process(U u, TPHandle &) = 0;
+  };
+
+  /** @brief Template by-pointer work queue.
+   * Skeleton implementation of a queue that processes items of a given type submitted as pointers.
+   * This is useful when the work item are large or include dynamically allocated memory. The queue
+   * will automatically add itself to the thread pool on construction and remove itself on
+   * destruction. */
+  template<class T>
+  class WorkQueue : public WorkQueue_ {
+    ThreadPool *pool;
+    
+    /// Add a work item to the queue.
+    virtual bool _enqueue(T *) = 0;
+    /// Dequeue a previously submitted work item.
+    virtual void _dequeue(T *) = 0;
+    /// Dequeue a work item and return the original submitted pointer.
+    virtual T *_dequeue() = 0;
+    virtual void _process_finish(T *) {}
+
+    // implementation of virtual methods from WorkQueue_
+    void *_void_dequeue() override {
+      return (void *)_dequeue();
+    }
+    void _void_process(void *p, TPHandle &handle) override {
+      _process(static_cast<T *>(p), handle);
+    }
+    void _void_process_finish(void *p) override {
+      _process_finish(static_cast<T *>(p));
+    }
+
+  protected:
+    /// Process a work item. Called from the worker threads.
+    virtual void _process(T *t, TPHandle &) = 0;
+
+  public:
+    WorkQueue(std::string n, time_t ti, time_t sti, ThreadPool* p)
+      : WorkQueue_(std::move(n), ti, sti), pool(p) {
+      pool->add_work_queue(this);
+    }
+    ~WorkQueue() override {
+      pool->remove_work_queue(this);
+    }
+    
+    bool queue(T *item) {
+      pool->_lock.lock();
+      bool r = _enqueue(item);
+      pool->_cond.notify_one();
+      pool->_lock.unlock();
+      return r;
+    }
+    void dequeue(T *item) {
+      pool->_lock.lock();
+      _dequeue(item);
+      pool->_lock.unlock();
+    }
+    void clear() {
+      pool->_lock.lock();
+      _clear();
+      pool->_lock.unlock();
+    }
+
+    void lock() {
+      pool->lock();
+    }
+    void unlock() {
+      pool->unlock();
+    }
+    /// wake up the thread pool (without lock held)
+    void wake() {
+      pool->wake();
+    }
+    /// wake up the thread pool (with lock already held)
+    void _wake() {
+      pool->_wake();
+    }
+    void _wait() {
+      pool->_wait();
+    }
+    void drain() {
+      pool->drain(this);
+    }
+
+  };
+
+  template<typename T>
+  class PointerWQ : public WorkQueue_ {
+  public:
+    ~PointerWQ() override {
+      m_pool->remove_work_queue(this);
+      ceph_assert(m_processing == 0);
+    }
+    void drain() {
+      {
+        // if this queue is empty and not processing, don't wait for other
+        // queues to finish processing
+        std::lock_guard l(m_pool->_lock);
+        if (m_processing == 0 && m_items.empty()) {
+          return;
+        }
+      }
+      m_pool->drain(this);
+    }
+    void queue(T *item) {
+      std::lock_guard l(m_pool->_lock);
+      m_items.push_back(item);
+      m_pool->_cond.notify_one();
+    }
+    bool empty() {
+      std::lock_guard l(m_pool->_lock);
+      return _empty();
+    }
+  protected:
+    PointerWQ(std::string n, time_t ti, time_t sti, ThreadPool* p)
+      : WorkQueue_(std::move(n), ti, sti), m_pool(p), m_processing(0) {
+    }
+    void register_work_queue() {
+      m_pool->add_work_queue(this);
+    }
+    void _clear() override {
+      ceph_assert(ceph_mutex_is_locked(m_pool->_lock));
+      m_items.clear();
+    }
+    bool _empty() override {
+      ceph_assert(ceph_mutex_is_locked(m_pool->_lock));
+      return m_items.empty();
+    }
+    void *_void_dequeue() override {
+      ceph_assert(ceph_mutex_is_locked(m_pool->_lock));
+      if (m_items.empty()) {
+        return NULL;
+      }
+
+      ++m_processing;
+      T *item = m_items.front();
+      m_items.pop_front();
+      return item;
+    }
+    void _void_process(void *item, ThreadPool::TPHandle &handle) override {
+      process(reinterpret_cast<T *>(item));
+    }
+    void _void_process_finish(void *item) override {
+      ceph_assert(ceph_mutex_is_locked(m_pool->_lock));
+      ceph_assert(m_processing > 0);
+      --m_processing;
+    }
+
+    virtual void process(T *item) = 0;
+    void process_finish() {
+      std::lock_guard locker(m_pool->_lock);
+      _void_process_finish(nullptr);
+    }
+
+    T *front() {
+      ceph_assert(ceph_mutex_is_locked(m_pool->_lock));
+      if (m_items.empty()) {
+        return NULL;
+      }
+      return m_items.front();
+    }
+    void requeue_front(T *item) {
+      std::lock_guard pool_locker(m_pool->_lock);
+      requeue_front(pool_locker, item);
+    }
+    void requeue_front(const std::lock_guard<ceph::mutex>&, T *item) {
+      _void_process_finish(nullptr);
+      m_items.push_front(item);
+    }
+    void requeue_back(T *item) {
+      std::lock_guard pool_locker(m_pool->_lock);
+      requeue_back(pool_locker, item);
+    }
+    void requeue_back(const std::lock_guard<ceph::mutex>&, T *item) {
+      _void_process_finish(nullptr);
+      m_items.push_back(item);
+    }
+    void signal() {
+      std::lock_guard pool_locker(m_pool->_lock);
+      signal(pool_locker);
+    }
+    void signal(const std::lock_guard<ceph::mutex>&) {
+      m_pool->_cond.notify_one();
+    }
+    ceph::mutex &get_pool_lock() {
+      return m_pool->_lock;
+    }
+  private:
+    ThreadPool *m_pool;
+    std::list<T *> m_items;
+    uint32_t m_processing;
+  };
+protected:
+  std::vector<WorkQueue_*> work_queues;
+  int next_work_queue = 0;
+ 
+
+  // threads
+  struct WorkThread : public Thread {
+    ThreadPool *pool;
+    // cppcheck-suppress noExplicitConstructor
+    WorkThread(ThreadPool *p) : pool(p) {}
+    void *entry() override {
+      pool->worker(this);
+      return 0;
+    }
+  };
+  
+  std::set<WorkThread*> _threads;
+  std::list<WorkThread*> _old_threads;  ///< need to be joined
+  int processing;
+
+  void start_threads();
+  void join_old_threads();
+  virtual void worker(WorkThread *wt);
+
+public:
+  ThreadPool(CephContext *cct_, std::string nm, std::string tn, int n, const char *option = NULL);
+  ~ThreadPool() override;
+
+  /// return number of threads currently running
+  int get_num_threads() {
+    std::lock_guard l(_lock);
+    return _num_threads;
+  }
+  
+  /// assign a work queue to this thread pool
+  void add_work_queue(WorkQueue_* wq) {
+    std::lock_guard l(_lock);
+    work_queues.push_back(wq);
+  }
+  /// remove a work queue from this thread pool
+  void remove_work_queue(WorkQueue_* wq) {
+    std::lock_guard l(_lock);
+    unsigned i = 0;
+    while (work_queues[i] != wq)
+      i++;
+    for (i++; i < work_queues.size(); i++) 
+      work_queues[i-1] = work_queues[i];
+    ceph_assert(i == work_queues.size());
+    work_queues.resize(i-1);
+  }
+
+  /// take thread pool lock
+  void lock() {
+    _lock.lock();
+  }
+  /// release thread pool lock
+  void unlock() {
+    _lock.unlock();
+  }
+
+  /// wait for a kick on this thread pool
+  void wait(ceph::condition_variable &c) {
+    std::unique_lock l(_lock, std::adopt_lock);
+    c.wait(l);
+  }
+
+  /// wake up a waiter (with lock already held)
+  void _wake() {
+    _cond.notify_all();
+  }
+  /// wake up a waiter (without lock held)
+  void wake() {
+    std::lock_guard l(_lock);
+    _cond.notify_all();
+  }
+  void _wait() {
+    std::unique_lock l(_lock, std::adopt_lock);
+    _cond.wait(l);
+  }
+
+  /// start thread pool thread
+  void start();
+  /// stop thread pool thread
+  void stop(bool clear_after=true);
+  /// pause thread pool (if it not already paused)
+  void pause();
+  /// pause initiation of new work
+  void pause_new();
+  /// resume work in thread pool.  must match each pause() call 1:1 to resume.
+  void unpause();
+  /** @brief Wait until work completes.
+   * If the parameter is NULL, blocks until all threads are idle.
+   * If it is not NULL, blocks until the given work queue does not have
+   * any items left to process. */
+  void drain(WorkQueue_* wq = 0);
+};
+
+class GenContextWQ :
+  public ThreadPool::WorkQueueVal<GenContext<ThreadPool::TPHandle&>*> {
+  std::list<GenContext<ThreadPool::TPHandle&>*> _queue;
+public:
+  GenContextWQ(const std::string &name, time_t ti, ThreadPool *tp)
+    : ThreadPool::WorkQueueVal<
+      GenContext<ThreadPool::TPHandle&>*>(name, ti, ti*10, tp) {}
+  
+  void _enqueue(GenContext<ThreadPool::TPHandle&> *c) override {
+    _queue.push_back(c);
+  }
+  void _enqueue_front(GenContext<ThreadPool::TPHandle&> *c) override {
+    _queue.push_front(c);
+  }
+  bool _empty() override {
+    return _queue.empty();
+  }
+  GenContext<ThreadPool::TPHandle&> *_dequeue() override {
+    ceph_assert(!_queue.empty());
+    GenContext<ThreadPool::TPHandle&> *c = _queue.front();
+    _queue.pop_front();
+    return c;
+  }
+  void _process(GenContext<ThreadPool::TPHandle&> *c,
+		ThreadPool::TPHandle &tp) override {
+    c->complete(tp);
+  }
+};
+
+class C_QueueInWQ : public Context {
+  GenContextWQ *wq;
+  GenContext<ThreadPool::TPHandle&> *c;
+public:
+  C_QueueInWQ(GenContextWQ *wq, GenContext<ThreadPool::TPHandle &> *c)
+    : wq(wq), c(c) {}
+  void finish(int) override {
+    wq->queue(c);
+  }
+};
+
+/// Work queue that asynchronously completes contexts (executes callbacks).
+/// @see Finisher
+class ContextWQ : public ThreadPool::PointerWQ<Context> {
+public:
+  ContextWQ(const std::string &name, time_t ti, ThreadPool *tp)
+    : ThreadPool::PointerWQ<Context>(name, ti, 0, tp) {
+    this->register_work_queue();
+  }
+
+  void queue(Context *ctx, int result = 0) {
+    if (result != 0) {
+      std::lock_guard locker(m_lock);
+      m_context_results[ctx] = result;
+    }
+    ThreadPool::PointerWQ<Context>::queue(ctx);
+  }
+protected:
+  void _clear() override {
+    ThreadPool::PointerWQ<Context>::_clear();
+
+    std::lock_guard locker(m_lock);
+    m_context_results.clear();
+  }
+
+  void process(Context *ctx) override {
+    int result = 0;
+    {
+      std::lock_guard locker(m_lock);
+      ceph::unordered_map<Context *, int>::iterator it =
+        m_context_results.find(ctx);
+      if (it != m_context_results.end()) {
+        result = it->second;
+        m_context_results.erase(it);
+      }
+    }
+    ctx->complete(result);
+  }
+private:
+  ceph::mutex m_lock = ceph::make_mutex("ContextWQ::m_lock");
+  ceph::unordered_map<Context*, int> m_context_results;
+};
+
+class ShardedThreadPool {
+
+  CephContext *cct;
+  std::string name;
+  std::string thread_name;
+  std::string lockname;
+  ceph::mutex shardedpool_lock;
+  ceph::condition_variable shardedpool_cond;
+  ceph::condition_variable wait_cond;
+  uint32_t num_threads;
+
+  std::atomic<bool> stop_threads = { false };
+  std::atomic<bool> pause_threads = { false };
+  std::atomic<bool> drain_threads = { false };
+
+  uint32_t num_paused;
+  uint32_t num_drained;
+
+public:
+
+  class BaseShardedWQ {
+  
+  public:
+    time_t timeout_interval, suicide_interval;
+    BaseShardedWQ(time_t ti, time_t sti):timeout_interval(ti), suicide_interval(sti) {}
+    virtual ~BaseShardedWQ() {}
+
+    virtual void _process(uint32_t thread_index, heartbeat_handle_d *hb ) = 0;
+    virtual void return_waiting_threads() = 0;
+    virtual void stop_return_waiting_threads() = 0;
+    virtual bool is_shard_empty(uint32_t thread_index) = 0;
+  };      
+
+  template <typename T>
+  class ShardedWQ: public BaseShardedWQ {
+  
+    ShardedThreadPool* sharded_pool;
+
+  protected:
+    virtual void _enqueue(T&&) = 0;
+    virtual void _enqueue_front(T&&) = 0;
+
+
+  public:
+    ShardedWQ(time_t ti, time_t sti, ShardedThreadPool* tp): BaseShardedWQ(ti, sti), 
+                                                                 sharded_pool(tp) {
+      tp->set_wq(this);
+    }
+    ~ShardedWQ() override {}
+
+    void queue(T&& item) {
+      _enqueue(std::move(item));
+    }
+    void queue_front(T&& item) {
+      _enqueue_front(std::move(item));
+    }
+    void drain() {
+      sharded_pool->drain();
+    }
+    
+  };
+
+private:
+
+  BaseShardedWQ* wq;
+  // threads
+  struct WorkThreadSharded : public Thread {
+    ShardedThreadPool *pool;
+    uint32_t thread_index;
+    WorkThreadSharded(ShardedThreadPool *p, uint32_t pthread_index): pool(p),
+      thread_index(pthread_index) {}
+    void *entry() override {
+      pool->shardedthreadpool_worker(thread_index);
+      return 0;
+    }
+  };
+
+  std::vector<WorkThreadSharded*> threads_shardedpool;
+  void start_threads();
+  void shardedthreadpool_worker(uint32_t thread_index);
+  void set_wq(BaseShardedWQ* swq) {
+    wq = swq;
+  }
+
+
+
+public:
+
+  ShardedThreadPool(CephContext *cct_, std::string nm, std::string tn, uint32_t pnum_threads);
+
+  ~ShardedThreadPool(){};
+
+  /// start thread pool thread
+  void start();
+  /// stop thread pool thread
+  void stop();
+  /// pause thread pool (if it not already paused)
+  void pause();
+  /// pause initiation of new work
+  void pause_new();
+  /// resume work in thread pool.  must match each pause() call 1:1 to resume.
+  void unpause();
+  /// wait for all work to complete
+  void drain();
+
+};
+
+#endif
+
+#endif