summaryrefslogtreecommitdiffstats
path: root/src/common/HeartbeatMap.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/common/HeartbeatMap.h99
1 files changed, 99 insertions, 0 deletions
diff --git a/src/common/HeartbeatMap.h b/src/common/HeartbeatMap.h
new file mode 100644
index 000000000..6f486b21c
--- /dev/null
+++ b/src/common/HeartbeatMap.h
@@ -0,0 +1,99 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2011 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_HEARTBEATMAP_H
+#define CEPH_HEARTBEATMAP_H
+
+#include <list>
+#include <atomic>
+#include <string>
+#include <pthread.h>
+
+#include "common/ceph_time.h"
+#include "common/ceph_mutex.h"
+#include "include/common_fwd.h"
+
+namespace ceph {
+
+/*
+ * HeartbeatMap -
+ *
+ * Maintain a set of handles for internal subsystems to periodically
+ * check in with a health check and timeout. Each user can register
+ * and get a handle they can use to set or reset a timeout.
+ *
+ * A simple is_healthy() method checks for any users who are not within
+ * their grace period for a heartbeat.
+ */
+
+struct heartbeat_handle_d {
+ const std::string name;
+ pthread_t thread_id = 0;
+ using clock = ceph::coarse_mono_clock;
+ using time = ceph::coarse_mono_time;
+ std::atomic<time> timeout = clock::zero();
+ std::atomic<time> suicide_timeout = clock::zero();
+ ceph::timespan grace = ceph::timespan::zero();
+ ceph::timespan suicide_grace = ceph::timespan::zero();
+ std::list<heartbeat_handle_d*>::iterator list_item;
+
+ explicit heartbeat_handle_d(const std::string& n)
+ : name(n)
+ { }
+};
+
+class HeartbeatMap {
+ public:
+ // register/unregister
+ heartbeat_handle_d *add_worker(const std::string& name, pthread_t thread_id);
+ void remove_worker(const heartbeat_handle_d *h);
+
+ // reset the timeout so that it expects another touch within grace amount of time
+ void reset_timeout(heartbeat_handle_d *h,
+ ceph::timespan grace,
+ ceph::timespan suicide_grace);
+ // clear the timeout so that it's not checked on
+ void clear_timeout(heartbeat_handle_d *h);
+
+ // return false if any of the timeouts are currently expired.
+ bool is_healthy();
+
+ // touch cct->_conf->heartbeat_file if is_healthy()
+ void check_touch_file();
+
+ // get the number of unhealthy workers
+ int get_unhealthy_workers() const;
+
+ // get the number of total workers
+ int get_total_workers() const;
+
+ explicit HeartbeatMap(CephContext *cct);
+ ~HeartbeatMap();
+
+ private:
+ using clock = ceph::coarse_mono_clock;
+ CephContext *m_cct;
+ ceph::shared_mutex m_rwlock =
+ ceph::make_shared_mutex("HeartbeatMap::m_rwlock");
+ clock::time_point m_inject_unhealthy_until;
+ std::list<heartbeat_handle_d*> m_workers;
+ std::atomic<unsigned> m_unhealthy_workers = { 0 };
+ std::atomic<unsigned> m_total_workers = { 0 };
+
+ bool _check(const heartbeat_handle_d *h, const char *who,
+ ceph::coarse_mono_time now);
+};
+
+}
+#endif