summaryrefslogtreecommitdiffstats
path: root/src/log
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
commit19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/log
parentInitial commit. (diff)
downloadceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz
ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/log')
-rw-r--r--src/log/Entry.h115
-rw-r--r--src/log/Log.cc465
-rw-r--r--src/log/Log.h128
-rw-r--r--src/log/LogClock.h168
-rw-r--r--src/log/SubsystemMap.h113
-rw-r--r--src/log/test.cc381
6 files changed, 1370 insertions, 0 deletions
diff --git a/src/log/Entry.h b/src/log/Entry.h
new file mode 100644
index 000000000..536f1a9dc
--- /dev/null
+++ b/src/log/Entry.h
@@ -0,0 +1,115 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef __CEPH_LOG_ENTRY_H
+#define __CEPH_LOG_ENTRY_H
+
+#include "log/LogClock.h"
+
+#include "common/StackStringStream.h"
+
+#include "boost/container/small_vector.hpp"
+
+#include <pthread.h>
+
+#include <string_view>
+
+namespace ceph {
+namespace logging {
+
+class Entry {
+public:
+ using time = log_time;
+
+ Entry() = delete;
+ Entry(short pr, short sub) :
+ m_stamp(clock().now()),
+ m_thread(pthread_self()),
+ m_prio(pr),
+ m_subsys(sub)
+ {}
+ Entry(const Entry &) = default;
+ Entry& operator=(const Entry &) = default;
+ Entry(Entry &&e) = default;
+ Entry& operator=(Entry &&e) = default;
+ virtual ~Entry() = default;
+
+ virtual std::string_view strv() const = 0;
+ virtual std::size_t size() const = 0;
+
+ time m_stamp;
+ pthread_t m_thread;
+ short m_prio, m_subsys;
+
+ static log_clock& clock() {
+ static log_clock clock;
+ return clock;
+ }
+};
+
+/* This should never be moved to the heap! Only allocate this on the stack. See
+ * CachedStackStringStream for rationale.
+ */
+class MutableEntry : public Entry {
+public:
+ MutableEntry() = delete;
+ MutableEntry(short pr, short sub) : Entry(pr, sub) {}
+ MutableEntry(const MutableEntry&) = delete;
+ MutableEntry& operator=(const MutableEntry&) = delete;
+ MutableEntry(MutableEntry&&) = delete;
+ MutableEntry& operator=(MutableEntry&&) = delete;
+ ~MutableEntry() override = default;
+
+ std::ostream& get_ostream() {
+ return *cos;
+ }
+
+ std::string_view strv() const override {
+ return cos->strv();
+ }
+ std::size_t size() const override {
+ return cos->strv().size();
+ }
+
+private:
+ CachedStackStringStream cos;
+};
+
+class ConcreteEntry : public Entry {
+public:
+ ConcreteEntry() = delete;
+ ConcreteEntry(const Entry& e) : Entry(e) {
+ auto strv = e.strv();
+ str.reserve(strv.size());
+ str.insert(str.end(), strv.begin(), strv.end());
+ }
+ ConcreteEntry& operator=(const Entry& e) {
+ Entry::operator=(e);
+ auto strv = e.strv();
+ str.reserve(strv.size());
+ str.assign(strv.begin(), strv.end());
+ return *this;
+ }
+ ConcreteEntry(ConcreteEntry&& e) : Entry(e), str(std::move(e.str)) {}
+ ConcreteEntry& operator=(ConcreteEntry&& e) {
+ Entry::operator=(e);
+ str = std::move(e.str);
+ return *this;
+ }
+ ~ConcreteEntry() override = default;
+
+ std::string_view strv() const override {
+ return std::string_view(str.data(), str.size());
+ }
+ std::size_t size() const override {
+ return str.size();
+ }
+
+private:
+ boost::container::small_vector<char, 1024> str;
+};
+
+}
+}
+
+#endif
diff --git a/src/log/Log.cc b/src/log/Log.cc
new file mode 100644
index 000000000..28f1dbf21
--- /dev/null
+++ b/src/log/Log.cc
@@ -0,0 +1,465 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "Log.h"
+
+#include "common/errno.h"
+#include "common/safe_io.h"
+#include "common/Graylog.h"
+#include "common/valgrind.h"
+
+#include "include/ceph_assert.h"
+#include "include/compat.h"
+#include "include/on_exit.h"
+
+#include "Entry.h"
+#include "LogClock.h"
+#include "SubsystemMap.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <syslog.h>
+
+#include <iostream>
+#include <set>
+
+#include <fmt/format.h>
+
+#define MAX_LOG_BUF 65536
+
+namespace ceph {
+namespace logging {
+
+static OnExitManager exit_callbacks;
+
+static void log_on_exit(void *p)
+{
+ Log *l = *(Log **)p;
+ if (l)
+ l->flush();
+ delete (Log **)p;// Delete allocated pointer (not Log object, the pointer only!)
+}
+
+Log::Log(const SubsystemMap *s)
+ : m_indirect_this(nullptr),
+ m_subs(s),
+ m_recent(DEFAULT_MAX_RECENT)
+{
+ m_log_buf.reserve(MAX_LOG_BUF);
+}
+
+Log::~Log()
+{
+ if (m_indirect_this) {
+ *m_indirect_this = nullptr;
+ }
+
+ ceph_assert(!is_started());
+ if (m_fd >= 0)
+ VOID_TEMP_FAILURE_RETRY(::close(m_fd));
+}
+
+
+///
+void Log::set_coarse_timestamps(bool coarse) {
+ std::scoped_lock lock(m_flush_mutex);
+ if (coarse)
+ Entry::clock().coarsen();
+ else
+ Entry::clock().refine();
+}
+
+void Log::set_flush_on_exit()
+{
+ std::scoped_lock lock(m_flush_mutex);
+ // Make sure we flush on shutdown. We do this by deliberately
+ // leaking an indirect pointer to ourselves (on_exit() can't
+ // unregister a callback). This is not racy only becuase we
+ // assume that exit() won't race with ~Log().
+ if (m_indirect_this == NULL) {
+ m_indirect_this = new (Log*)(this);
+ exit_callbacks.add_callback(log_on_exit, m_indirect_this);
+ }
+}
+
+void Log::set_max_new(std::size_t n)
+{
+ std::scoped_lock lock(m_queue_mutex);
+ m_max_new = n;
+}
+
+void Log::set_max_recent(std::size_t n)
+{
+ std::scoped_lock lock(m_flush_mutex);
+ m_max_recent = n;
+}
+
+void Log::set_log_file(std::string_view fn)
+{
+ std::scoped_lock lock(m_flush_mutex);
+ m_log_file = fn;
+}
+
+void Log::set_log_stderr_prefix(std::string_view p)
+{
+ std::scoped_lock lock(m_flush_mutex);
+ m_log_stderr_prefix = p;
+}
+
+void Log::reopen_log_file()
+{
+ std::scoped_lock lock(m_flush_mutex);
+ if (!is_started()) {
+ return;
+ }
+ m_flush_mutex_holder = pthread_self();
+ if (m_fd >= 0)
+ VOID_TEMP_FAILURE_RETRY(::close(m_fd));
+ if (m_log_file.length()) {
+ m_fd = ::open(m_log_file.c_str(), O_CREAT|O_WRONLY|O_APPEND|O_CLOEXEC, 0644);
+ if (m_fd >= 0 && (m_uid || m_gid)) {
+ if (::fchown(m_fd, m_uid, m_gid) < 0) {
+ int e = errno;
+ std::cerr << "failed to chown " << m_log_file << ": " << cpp_strerror(e)
+ << std::endl;
+ }
+ }
+ } else {
+ m_fd = -1;
+ }
+ m_flush_mutex_holder = 0;
+}
+
+void Log::chown_log_file(uid_t uid, gid_t gid)
+{
+ std::scoped_lock lock(m_flush_mutex);
+ if (m_fd >= 0) {
+ int r = ::fchown(m_fd, uid, gid);
+ if (r < 0) {
+ r = -errno;
+ std::cerr << "failed to chown " << m_log_file << ": " << cpp_strerror(r)
+ << std::endl;
+ }
+ }
+}
+
+void Log::set_syslog_level(int log, int crash)
+{
+ std::scoped_lock lock(m_flush_mutex);
+ m_syslog_log = log;
+ m_syslog_crash = crash;
+}
+
+void Log::set_stderr_level(int log, int crash)
+{
+ std::scoped_lock lock(m_flush_mutex);
+ m_stderr_log = log;
+ m_stderr_crash = crash;
+}
+
+void Log::set_graylog_level(int log, int crash)
+{
+ std::scoped_lock lock(m_flush_mutex);
+ m_graylog_log = log;
+ m_graylog_crash = crash;
+}
+
+void Log::start_graylog()
+{
+ std::scoped_lock lock(m_flush_mutex);
+ if (! m_graylog.get())
+ m_graylog = std::make_shared<Graylog>(m_subs, "dlog");
+}
+
+
+void Log::stop_graylog()
+{
+ std::scoped_lock lock(m_flush_mutex);
+ m_graylog.reset();
+}
+
+void Log::submit_entry(Entry&& e)
+{
+ std::unique_lock lock(m_queue_mutex);
+ m_queue_mutex_holder = pthread_self();
+
+ if (unlikely(m_inject_segv))
+ *(volatile int *)(0) = 0xdead;
+
+ // wait for flush to catch up
+ while (is_started() &&
+ m_new.size() > m_max_new) {
+ if (m_stop) break; // force addition
+ m_cond_loggers.wait(lock);
+ }
+
+ m_new.emplace_back(std::move(e));
+ m_cond_flusher.notify_all();
+ m_queue_mutex_holder = 0;
+}
+
+void Log::flush()
+{
+ std::scoped_lock lock1(m_flush_mutex);
+ m_flush_mutex_holder = pthread_self();
+
+ {
+ std::scoped_lock lock2(m_queue_mutex);
+ m_queue_mutex_holder = pthread_self();
+ assert(m_flush.empty());
+ m_flush.swap(m_new);
+ m_cond_loggers.notify_all();
+ m_queue_mutex_holder = 0;
+ }
+
+ _flush(m_flush, false);
+ m_flush_mutex_holder = 0;
+}
+
+void Log::_log_safe_write(std::string_view sv)
+{
+ if (m_fd < 0)
+ return;
+ int r = safe_write(m_fd, sv.data(), sv.size());
+ if (r != m_fd_last_error) {
+ if (r < 0)
+ std::cerr << "problem writing to " << m_log_file
+ << ": " << cpp_strerror(r)
+ << std::endl;
+ m_fd_last_error = r;
+ }
+}
+
+void Log::_flush_logbuf()
+{
+ if (m_log_buf.size()) {
+ _log_safe_write(std::string_view(m_log_buf.data(), m_log_buf.size()));
+ m_log_buf.resize(0);
+ }
+}
+
+void Log::_flush(EntryVector& t, bool crash)
+{
+ long len = 0;
+ if (t.empty()) {
+ assert(m_log_buf.empty());
+ return;
+ }
+ if (crash) {
+ len = t.size();
+ }
+ for (auto& e : t) {
+ auto prio = e.m_prio;
+ auto stamp = e.m_stamp;
+ auto sub = e.m_subsys;
+ auto thread = e.m_thread;
+ auto str = e.strv();
+
+ bool should_log = crash || m_subs->get_log_level(sub) >= prio;
+ bool do_fd = m_fd >= 0 && should_log;
+ bool do_syslog = m_syslog_crash >= prio && should_log;
+ bool do_stderr = m_stderr_crash >= prio && should_log;
+ bool do_graylog2 = m_graylog_crash >= prio && should_log;
+
+ if (do_fd || do_syslog || do_stderr) {
+ const std::size_t cur = m_log_buf.size();
+ std::size_t used = 0;
+ const std::size_t allocated = e.size() + 80;
+ m_log_buf.resize(cur + allocated);
+
+ char* const start = m_log_buf.data();
+ char* pos = start + cur;
+
+ if (crash) {
+ used += (std::size_t)snprintf(pos + used, allocated - used, "%6ld> ", -(--len));
+ }
+ used += (std::size_t)append_time(stamp, pos + used, allocated - used);
+ used += (std::size_t)snprintf(pos + used, allocated - used, " %lx %2d ", (unsigned long)thread, prio);
+ memcpy(pos + used, str.data(), str.size());
+ used += str.size();
+ pos[used] = '\0';
+ ceph_assert((used + 1 /* '\n' */) < allocated);
+
+ if (do_syslog) {
+ syslog(LOG_USER|LOG_INFO, "%s", pos);
+ }
+
+ if (do_stderr) {
+ std::cerr << m_log_stderr_prefix << std::string_view(pos, used) << std::endl;
+ }
+
+ /* now add newline */
+ pos[used++] = '\n';
+
+ if (do_fd) {
+ m_log_buf.resize(cur + used);
+ } else {
+ m_log_buf.resize(0);
+ }
+
+ if (m_log_buf.size() > MAX_LOG_BUF) {
+ _flush_logbuf();
+ }
+ }
+
+ if (do_graylog2 && m_graylog) {
+ m_graylog->log_entry(e);
+ }
+
+ m_recent.push_back(std::move(e));
+ }
+ t.clear();
+
+ _flush_logbuf();
+}
+
+void Log::_log_message(std::string_view s, bool crash)
+{
+ if (m_fd >= 0) {
+ std::string b = fmt::format("{}\n", s);
+ int r = safe_write(m_fd, b.data(), b.size());
+ if (r < 0)
+ std::cerr << "problem writing to " << m_log_file << ": " << cpp_strerror(r) << std::endl;
+ }
+ if ((crash ? m_syslog_crash : m_syslog_log) >= 0) {
+ syslog(LOG_USER|LOG_INFO, "%.*s", static_cast<int>(s.size()), s.data());
+ }
+
+ if ((crash ? m_stderr_crash : m_stderr_log) >= 0) {
+ std::cerr << s << std::endl;
+ }
+}
+
+template<typename T>
+static uint64_t tid_to_int(T tid)
+{
+ if constexpr (std::is_pointer_v<T>) {
+ return reinterpret_cast<std::uintptr_t>(tid);
+ } else {
+ return tid;
+ }
+}
+
+void Log::dump_recent()
+{
+ std::scoped_lock lock1(m_flush_mutex);
+ m_flush_mutex_holder = pthread_self();
+
+ {
+ std::scoped_lock lock2(m_queue_mutex);
+ m_queue_mutex_holder = pthread_self();
+ assert(m_flush.empty());
+ m_flush.swap(m_new);
+ m_queue_mutex_holder = 0;
+ }
+
+ _flush(m_flush, false);
+
+ _log_message("--- begin dump of recent events ---", true);
+ std::set<pthread_t> recent_pthread_ids;
+ {
+ EntryVector t;
+ t.insert(t.end(), std::make_move_iterator(m_recent.begin()), std::make_move_iterator(m_recent.end()));
+ m_recent.clear();
+ for (const auto& e : t) {
+ recent_pthread_ids.emplace(e.m_thread);
+ }
+ _flush(t, true);
+ }
+
+ _log_message("--- logging levels ---", true);
+ for (const auto& p : m_subs->m_subsys) {
+ _log_message(fmt::format(" {:2d}/{:2d} {}",
+ p.log_level, p.gather_level, p.name), true);
+ }
+ _log_message(fmt::format(" {:2d}/{:2d} (syslog threshold)",
+ m_syslog_log, m_syslog_crash), true);
+ _log_message(fmt::format(" {:2d}/{:2d} (stderr threshold)",
+ m_stderr_log, m_stderr_crash), true);
+
+ _log_message("--- pthread ID / name mapping for recent threads ---", true);
+ for (const auto pthread_id : recent_pthread_ids)
+ {
+ char pthread_name[16] = {0}; //limited by 16B include terminating null byte.
+ ceph_pthread_getname(pthread_id, pthread_name, sizeof(pthread_name));
+ _log_message(fmt::format(" {} / {}",
+ tid_to_int(pthread_id), pthread_name), true);
+ }
+
+ _log_message(fmt::format(" max_recent {:9}", m_max_recent), true);
+ _log_message(fmt::format(" max_new {:9}", m_max_recent), true);
+ _log_message(fmt::format(" log_file {}", m_log_file), true);
+
+ _log_message("--- end dump of recent events ---", true);
+
+ assert(m_log_buf.empty());
+
+ m_flush_mutex_holder = 0;
+}
+
+void Log::start()
+{
+ ceph_assert(!is_started());
+ {
+ std::scoped_lock lock(m_queue_mutex);
+ m_stop = false;
+ }
+ create("log");
+}
+
+void Log::stop()
+{
+ if (is_started()) {
+ {
+ std::scoped_lock lock(m_queue_mutex);
+ m_stop = true;
+ m_cond_flusher.notify_one();
+ m_cond_loggers.notify_all();
+ }
+ join();
+ }
+}
+
+void *Log::entry()
+{
+ reopen_log_file();
+ {
+ std::unique_lock lock(m_queue_mutex);
+ m_queue_mutex_holder = pthread_self();
+ while (!m_stop) {
+ if (!m_new.empty()) {
+ m_queue_mutex_holder = 0;
+ lock.unlock();
+ flush();
+ lock.lock();
+ m_queue_mutex_holder = pthread_self();
+ continue;
+ }
+
+ m_cond_flusher.wait(lock);
+ }
+ m_queue_mutex_holder = 0;
+ }
+ flush();
+ return NULL;
+}
+
+bool Log::is_inside_log_lock()
+{
+ return
+ pthread_self() == m_queue_mutex_holder ||
+ pthread_self() == m_flush_mutex_holder;
+}
+
+void Log::inject_segv()
+{
+ m_inject_segv = true;
+}
+
+void Log::reset_segv()
+{
+ m_inject_segv = false;
+}
+
+} // ceph::logging::
+} // ceph::
diff --git a/src/log/Log.h b/src/log/Log.h
new file mode 100644
index 000000000..5eeed4101
--- /dev/null
+++ b/src/log/Log.h
@@ -0,0 +1,128 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef __CEPH_LOG_LOG_H
+#define __CEPH_LOG_LOG_H
+
+#include <boost/circular_buffer.hpp>
+
+#include <condition_variable>
+#include <memory>
+#include <mutex>
+#include <queue>
+#include <string>
+#include <string_view>
+
+#include "common/Thread.h"
+#include "common/likely.h"
+
+#include "log/Entry.h"
+
+namespace ceph {
+namespace logging {
+
+class Graylog;
+class SubsystemMap;
+
+class Log : private Thread
+{
+ using EntryRing = boost::circular_buffer<ConcreteEntry>;
+ using EntryVector = std::vector<ConcreteEntry>;
+
+ static const std::size_t DEFAULT_MAX_NEW = 100;
+ static const std::size_t DEFAULT_MAX_RECENT = 10000;
+
+ Log **m_indirect_this;
+
+ const SubsystemMap *m_subs;
+
+ std::mutex m_queue_mutex;
+ std::mutex m_flush_mutex;
+ std::condition_variable m_cond_loggers;
+ std::condition_variable m_cond_flusher;
+
+ pthread_t m_queue_mutex_holder;
+ pthread_t m_flush_mutex_holder;
+
+ EntryVector m_new; ///< new entries
+ EntryRing m_recent; ///< recent (less new) entries we've already written at low detail
+ EntryVector m_flush; ///< entries to be flushed (here to optimize heap allocations)
+
+ std::string m_log_file;
+ int m_fd = -1;
+ uid_t m_uid = 0;
+ gid_t m_gid = 0;
+
+ int m_fd_last_error = 0; ///< last error we say writing to fd (if any)
+
+ int m_syslog_log = -2, m_syslog_crash = -2;
+ int m_stderr_log = -1, m_stderr_crash = -1;
+ int m_graylog_log = -3, m_graylog_crash = -3;
+
+ std::string m_log_stderr_prefix;
+
+ std::shared_ptr<Graylog> m_graylog;
+
+ std::vector<char> m_log_buf;
+
+ bool m_stop = false;
+
+ std::size_t m_max_new = DEFAULT_MAX_NEW;
+ std::size_t m_max_recent = DEFAULT_MAX_RECENT;
+
+ bool m_inject_segv = false;
+
+ void *entry() override;
+
+ void _log_safe_write(std::string_view sv);
+ void _flush_logbuf();
+ void _flush(EntryVector& q, bool crash);
+
+ void _log_message(std::string_view s, bool crash);
+
+public:
+ using Thread::is_started;
+
+ Log(const SubsystemMap *s);
+ ~Log() override;
+
+ void set_flush_on_exit();
+
+ void set_coarse_timestamps(bool coarse);
+ void set_max_new(std::size_t n);
+ void set_max_recent(std::size_t n);
+ void set_log_file(std::string_view fn);
+ void reopen_log_file();
+ void chown_log_file(uid_t uid, gid_t gid);
+ void set_log_stderr_prefix(std::string_view p);
+
+ void flush();
+
+ void dump_recent();
+
+ void set_syslog_level(int log, int crash);
+ void set_stderr_level(int log, int crash);
+ void set_graylog_level(int log, int crash);
+
+ void start_graylog();
+ void stop_graylog();
+
+ std::shared_ptr<Graylog> graylog() { return m_graylog; }
+
+ void submit_entry(Entry&& e);
+
+ void start();
+ void stop();
+
+ /// true if the log lock is held by our thread
+ bool is_inside_log_lock();
+
+ /// induce a segv on the next log event
+ void inject_segv();
+ void reset_segv();
+};
+
+}
+}
+
+#endif
diff --git a/src/log/LogClock.h b/src/log/LogClock.h
new file mode 100644
index 000000000..07531c8e5
--- /dev/null
+++ b/src/log/LogClock.h
@@ -0,0 +1,168 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LOG_CLOCK_H
+#define CEPH_LOG_CLOCK_H
+
+#include <cstdio>
+#include <chrono>
+#include <ctime>
+#include <sys/time.h>
+
+#include "include/ceph_assert.h"
+#include "common/ceph_time.h"
+
+#ifndef suseconds_t
+typedef long suseconds_t;
+#endif
+
+namespace ceph {
+namespace logging {
+namespace _logclock {
+// Because the underlying representations of a duration can be any
+// arithmetic type we wish, slipping a coarseness tag there is the
+// least hacky way to tag them. I'd also considered doing bit-stealing
+// and just setting the low bit of the representation unconditionally
+// to mark it as fine, BUT that would cut our nanosecond precision in
+// half which sort of obviates the point of 'fine'…admittedly real
+// computers probably don't care. More to the point it wouldn't be
+// durable under arithmetic unless we wrote a whole class to support
+// it /anyway/, and if I'm going to do that I may as well add a bool.
+
+// (Yes I know we don't do arithmetic on log timestamps, but I don't
+// want everything to suddenly break because someone did something
+// that the std::chrono::timepoint contract actually supports.)
+struct taggedrep {
+ uint64_t count;
+ bool coarse;
+
+ explicit taggedrep(uint64_t count) : count(count), coarse(true) {}
+ taggedrep(uint64_t count, bool coarse) : count(count), coarse(coarse) {}
+
+ explicit operator uint64_t() {
+ return count;
+ }
+};
+
+// Proper significant figure support would be a bit excessive. Also
+// we'd have to know the precision of the clocks on Linux and FreeBSD
+// and whatever else we want to support.
+inline taggedrep operator +(const taggedrep& l, const taggedrep& r) {
+ return { l.count + r.count, l.coarse || r.coarse };
+}
+inline taggedrep operator -(const taggedrep& l, const taggedrep& r) {
+ return { l.count - r.count, l.coarse || r.coarse };
+}
+inline taggedrep operator *(const taggedrep& l, const taggedrep& r) {
+ return { l.count * r.count, l.coarse || r.coarse };
+}
+inline taggedrep operator /(const taggedrep& l, const taggedrep& r) {
+ return { l.count / r.count, l.coarse || r.coarse };
+}
+inline taggedrep operator %(const taggedrep& l, const taggedrep& r) {
+ return { l.count % r.count, l.coarse || r.coarse };
+}
+
+// You can compare coarse and fine time. You shouldn't do so in any
+// case where ordering actually MATTERS but in practice people won't
+// actually ping-pong their logs back and forth between them.
+inline bool operator ==(const taggedrep& l, const taggedrep& r) {
+ return l.count == r.count;
+}
+inline bool operator !=(const taggedrep& l, const taggedrep& r) {
+ return l.count != r.count;
+}
+inline bool operator <(const taggedrep& l, const taggedrep& r) {
+ return l.count < r.count;
+}
+inline bool operator <=(const taggedrep& l, const taggedrep& r) {
+ return l.count <= r.count;
+}
+inline bool operator >=(const taggedrep& l, const taggedrep& r) {
+ return l.count >= r.count;
+}
+inline bool operator >(const taggedrep& l, const taggedrep& r) {
+ return l.count > r.count;
+}
+}
+class log_clock {
+public:
+ using rep = _logclock::taggedrep;
+ using period = std::nano;
+ using duration = std::chrono::duration<rep, period>;
+ // The second template parameter defaults to the clock's duration
+ // type.
+ using time_point = std::chrono::time_point<log_clock>;
+ static constexpr const bool is_steady = false;
+
+ time_point now() noexcept {
+ return appropriate_now();
+ }
+
+ void coarsen() {
+ appropriate_now = coarse_now;
+ }
+
+ void refine() {
+ appropriate_now = fine_now;
+ }
+
+ // Since our formatting is done in microseconds and we're using it
+ // anyway, we may as well keep this one
+ static timeval to_timeval(time_point t) {
+ auto rep = t.time_since_epoch().count();
+ timespan ts(rep.count);
+ #ifndef _WIN32
+ return { static_cast<time_t>(std::chrono::duration_cast<std::chrono::seconds>(ts).count()),
+ static_cast<suseconds_t>(std::chrono::duration_cast<std::chrono::microseconds>(
+ ts % std::chrono::seconds(1)).count()) };
+ #else
+ return { static_cast<long>(std::chrono::duration_cast<std::chrono::seconds>(ts).count()),
+ static_cast<long>(std::chrono::duration_cast<std::chrono::microseconds>(
+ ts % std::chrono::seconds(1)).count()) };
+ #endif
+ }
+private:
+ static time_point coarse_now() {
+ return time_point(
+ duration(_logclock::taggedrep(coarse_real_clock::now()
+ .time_since_epoch().count(), true)));
+ }
+ static time_point fine_now() {
+ return time_point(
+ duration(_logclock::taggedrep(real_clock::now()
+ .time_since_epoch().count(), false)));
+ }
+ time_point(*appropriate_now)() = coarse_now;
+};
+using log_time = log_clock::time_point;
+inline int append_time(const log_time& t, char *out, int outlen) {
+ bool coarse = t.time_since_epoch().count().coarse;
+ auto tv = log_clock::to_timeval(t);
+ std::tm bdt;
+ time_t t_sec = tv.tv_sec;
+ localtime_r(&t_sec, &bdt);
+ char tz[32] = { 0 };
+ strftime(tz, sizeof(tz), "%z", &bdt);
+
+ int r;
+ if (coarse) {
+ r = std::snprintf(out, outlen, "%04d-%02d-%02dT%02d:%02d:%02d.%03ld%s",
+ bdt.tm_year + 1900, bdt.tm_mon + 1, bdt.tm_mday,
+ bdt.tm_hour, bdt.tm_min, bdt.tm_sec,
+ static_cast<long>(tv.tv_usec / 1000), tz);
+ } else {
+ r = std::snprintf(out, outlen, "%04d-%02d-%02dT%02d:%02d:%02d.%06ld%s",
+ bdt.tm_year + 1900, bdt.tm_mon + 1, bdt.tm_mday,
+ bdt.tm_hour, bdt.tm_min, bdt.tm_sec,
+ static_cast<long>(tv.tv_usec), tz);
+ }
+ // Since our caller just adds the return value to something without
+ // checking it…
+ ceph_assert(r >= 0);
+ return r;
+}
+}
+}
+
+#endif
diff --git a/src/log/SubsystemMap.h b/src/log/SubsystemMap.h
new file mode 100644
index 000000000..4d33ab3b1
--- /dev/null
+++ b/src/log/SubsystemMap.h
@@ -0,0 +1,113 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LOG_SUBSYSTEMS
+#define CEPH_LOG_SUBSYSTEMS
+
+#include <string>
+#include <vector>
+#include <algorithm>
+
+#include "common/likely.h"
+#include "common/subsys_types.h"
+
+#include "include/ceph_assert.h"
+
+namespace ceph {
+namespace logging {
+
+class SubsystemMap {
+ // Access to the current gathering levels must be *FAST* as they are
+ // read over and over from all places in the code (via should_gather()
+ // by i.e. dout).
+ std::array<uint8_t, ceph_subsys_get_num()> m_gather_levels;
+
+ // The rest. Should be as small as possible to not unnecessarily
+ // enlarge md_config_t and spread it other elements across cache
+ // lines. Access can be slow.
+ std::vector<ceph_subsys_item_t> m_subsys;
+
+ friend class Log;
+
+public:
+ SubsystemMap() {
+ constexpr auto s = ceph_subsys_get_as_array();
+ m_subsys.reserve(s.size());
+
+ std::size_t i = 0;
+ for (const ceph_subsys_item_t& item : s) {
+ m_subsys.emplace_back(item);
+ m_gather_levels[i++] = std::max(item.log_level, item.gather_level);
+ }
+ }
+
+ constexpr static std::size_t get_num() {
+ return ceph_subsys_get_num();
+ }
+
+ constexpr static std::size_t get_max_subsys_len() {
+ return ceph_subsys_max_name_length();
+ }
+
+ int get_log_level(unsigned subsys) const {
+ if (subsys >= get_num())
+ subsys = 0;
+ return m_subsys[subsys].log_level;
+ }
+
+ int get_gather_level(unsigned subsys) const {
+ if (subsys >= get_num())
+ subsys = 0;
+ return m_subsys[subsys].gather_level;
+ }
+
+ // TODO(rzarzynski): move to string_view?
+ constexpr const char* get_name(unsigned subsys) const {
+ if (subsys >= get_num())
+ subsys = 0;
+ return ceph_subsys_get_as_array()[subsys].name;
+ }
+
+ template <unsigned SubV, int LvlV>
+ bool should_gather() const {
+ static_assert(SubV < get_num(), "wrong subsystem ID");
+ static_assert(LvlV >= -1 && LvlV <= 200);
+
+ if constexpr (LvlV <= 0) {
+ // handle the -1 and 0 levels entirely at compile-time.
+ // Such debugs are intended to be gathered regardless even
+ // of the user configuration.
+ return true;
+ } else {
+ // we expect that setting level different than the default
+ // is rather unusual.
+ return expect(LvlV <= static_cast<int>(m_gather_levels[SubV]),
+ LvlV <= ceph_subsys_get_max_default_level(SubV));
+ }
+ }
+ bool should_gather(const unsigned sub, int level) const {
+ ceph_assert(sub < m_subsys.size());
+ return level <= static_cast<int>(m_gather_levels[sub]);
+ }
+
+ void set_log_level(unsigned subsys, uint8_t log)
+ {
+ ceph_assert(subsys < m_subsys.size());
+ m_subsys[subsys].log_level = log;
+ m_gather_levels[subsys] = \
+ std::max(log, m_subsys[subsys].gather_level);
+ }
+
+ void set_gather_level(unsigned subsys, uint8_t gather)
+ {
+ ceph_assert(subsys < m_subsys.size());
+ m_subsys[subsys].gather_level = gather;
+ m_gather_levels[subsys] = \
+ std::max(m_subsys[subsys].log_level, gather);
+ }
+};
+
+}
+}
+
+#endif
diff --git a/src/log/test.cc b/src/log/test.cc
new file mode 100644
index 000000000..a2169543b
--- /dev/null
+++ b/src/log/test.cc
@@ -0,0 +1,381 @@
+#include <gtest/gtest.h>
+
+#include "log/Log.h"
+#include "common/Clock.h"
+#include "include/coredumpctl.h"
+#include "SubsystemMap.h"
+
+#include "global/global_init.h"
+#include "common/ceph_argparse.h"
+#include "global/global_context.h"
+#include "common/dout.h"
+
+using namespace ceph::logging;
+
+TEST(Log, Simple)
+{
+ SubsystemMap subs;
+ subs.set_log_level(0, 10);
+ subs.set_gather_level(0, 10);
+
+ subs.set_log_level(1, 20);
+ subs.set_gather_level(1, 1);
+
+ subs.set_log_level(2, 20);
+ subs.set_gather_level(2, 2);
+
+ subs.set_log_level(3, 10);
+ subs.set_gather_level(3, 3);
+
+ Log log(&subs);
+ log.start();
+
+ log.set_log_file("foo");
+ log.reopen_log_file();
+
+ log.set_stderr_level(5, -1);
+
+
+ for (int i=0; i<100; i++) {
+ int sys = i % 4;
+ int l = 5 + (i%4);
+ if (subs.should_gather(sys, l)) {
+ MutableEntry e(l, sys);
+ log.submit_entry(std::move(e));
+ }
+ }
+
+ log.flush();
+
+ log.dump_recent();
+
+ log.stop();
+}
+
+TEST(Log, ReuseBad)
+{
+ SubsystemMap subs;
+ subs.set_log_level(1, 1);
+ subs.set_gather_level(1, 1);
+ Log log(&subs);
+ log.start();
+ log.set_log_file("foo");
+ log.reopen_log_file();
+
+ const int l = 0;
+ {
+ MutableEntry e(l, 1);
+ auto& out = e.get_ostream();
+ out << (std::streambuf*)nullptr;
+ EXPECT_TRUE(out.bad()); // writing nullptr to a stream sets its badbit
+ log.submit_entry(std::move(e));
+ }
+ {
+ MutableEntry e(l, 1);
+ auto& out = e.get_ostream();
+ EXPECT_FALSE(out.bad()); // should not see failures from previous log entry
+ out << "hello world";
+ log.submit_entry(std::move(e));
+ }
+
+ log.flush();
+ log.stop();
+}
+
+int many = 10000;
+
+TEST(Log, ManyNoGather)
+{
+ SubsystemMap subs;
+ subs.set_log_level(1, 1);
+ subs.set_gather_level(1, 1);
+ Log log(&subs);
+ log.start();
+ log.set_log_file("big");
+ log.reopen_log_file();
+ for (int i=0; i<many; i++) {
+ int l = 10;
+ if (subs.should_gather(1, l))
+ log.submit_entry(MutableEntry(1, 0));
+ }
+ log.flush();
+ log.stop();
+}
+
+
+TEST(Log, ManyGatherLog)
+{
+ SubsystemMap subs;
+ subs.set_log_level(1, 20);
+ subs.set_gather_level(1, 10);
+ Log log(&subs);
+ log.start();
+ log.set_log_file("big");
+ log.reopen_log_file();
+ for (int i=0; i<many; i++) {
+ int l = 10;
+ if (subs.should_gather(1, l)) {
+ MutableEntry e(l, 1);
+ e.get_ostream() << "this is a long string asdf asdf asdf asdf asdf asdf asd fasd fasdf ";
+ log.submit_entry(std::move(e));
+ }
+ }
+ log.flush();
+ log.stop();
+}
+
+TEST(Log, ManyGatherLogStackSpillover)
+{
+ SubsystemMap subs;
+ subs.set_log_level(1, 20);
+ subs.set_gather_level(1, 10);
+ Log log(&subs);
+ log.start();
+ log.set_log_file("big");
+ log.reopen_log_file();
+ for (int i=0; i<many; i++) {
+ int l = 10;
+ if (subs.should_gather(1, l)) {
+ MutableEntry e(l, 1);
+ auto& s = e.get_ostream();
+ s << "foo";
+ s << std::string(sizeof(e) * 2, '-');
+ log.submit_entry(std::move(e));
+ }
+ }
+ log.flush();
+ log.stop();
+}
+
+TEST(Log, ManyGather)
+{
+ SubsystemMap subs;
+ subs.set_log_level(1, 20);
+ subs.set_gather_level(1, 1);
+ Log log(&subs);
+ log.start();
+ log.set_log_file("big");
+ log.reopen_log_file();
+ for (int i=0; i<many; i++) {
+ int l = 10;
+ if (subs.should_gather(1, l))
+ log.submit_entry(MutableEntry(l, 1));
+ }
+ log.flush();
+ log.stop();
+}
+
+void do_segv()
+{
+ SubsystemMap subs;
+ subs.set_log_level(1, 20);
+ subs.set_gather_level(1, 1);
+ Log log(&subs);
+ log.start();
+ log.set_log_file("big");
+ log.reopen_log_file();
+
+ log.inject_segv();
+ MutableEntry e(10, 1);
+ {
+ PrCtl unset_dumpable;
+ log.submit_entry(std::move(e)); // this should segv
+ }
+
+ log.flush();
+ log.stop();
+}
+
+TEST(Log, InternalSegv)
+{
+ ASSERT_DEATH(do_segv(), ".*");
+}
+
+TEST(Log, LargeLog)
+{
+ SubsystemMap subs;
+ subs.set_log_level(1, 20);
+ subs.set_gather_level(1, 10);
+ Log log(&subs);
+ log.start();
+ log.set_log_file("big");
+ log.reopen_log_file();
+ int l = 10;
+ {
+ MutableEntry e(l, 1);
+ std::string msg(10000000, 'a');
+ e.get_ostream() << msg;
+ log.submit_entry(std::move(e));
+ }
+ log.flush();
+ log.stop();
+}
+
+TEST(Log, LargeFromSmallLog)
+{
+ SubsystemMap subs;
+ subs.set_log_level(1, 20);
+ subs.set_gather_level(1, 10);
+ Log log(&subs);
+ log.start();
+ log.set_log_file("big");
+ log.reopen_log_file();
+ int l = 10;
+ {
+ MutableEntry e(l, 1);
+ for (int i = 0; i < 1000000; i++) {
+ std::string msg(10, 'a');
+ e.get_ostream() << msg;
+ }
+ log.submit_entry(std::move(e));
+ }
+ log.flush();
+ log.stop();
+}
+
+// Make sure nothing bad happens when we switch
+
+TEST(Log, TimeSwitch)
+{
+ SubsystemMap subs;
+ subs.set_log_level(1, 20);
+ subs.set_gather_level(1, 10);
+ Log log(&subs);
+ log.start();
+ log.set_log_file("time_switch_log");
+ log.reopen_log_file();
+ int l = 10;
+ bool coarse = true;
+ for (auto i = 0U; i < 300; ++i) {
+ MutableEntry e(l, 1);
+ e.get_ostream() << "SQUID THEFT! PUNISHABLE BY DEATH!";
+ log.submit_entry(std::move(e));
+ if (i % 50)
+ log.set_coarse_timestamps(coarse = !coarse);
+ }
+ log.flush();
+ log.stop();
+}
+
+TEST(Log, TimeFormat)
+{
+ static constexpr auto buflen = 128u;
+ char buf[buflen];
+ ceph::logging::log_clock clock;
+ {
+ clock.coarsen();
+ auto t = clock.now();
+ ceph::logging::append_time(t, buf, buflen);
+ auto c = std::strrchr(buf, '.');
+ ASSERT_NE(c, nullptr);
+ ASSERT_EQ(8u, strlen(c + 1));
+ }
+ {
+ clock.refine();
+ auto t = clock.now();
+ ceph::logging::append_time(t, buf, buflen);
+ auto c = std::strrchr(buf, '.');
+ ASSERT_NE(c, nullptr);
+ ASSERT_EQ(11u, std::strlen(c + 1));
+ }
+}
+
+#define dout_subsys ceph_subsys_context
+
+template <int depth, int x> struct do_log
+{
+ void log(CephContext* cct);
+};
+
+template <int x> struct do_log<12, x>
+{
+ void log(CephContext* cct);
+};
+
+template<int depth, int x> void do_log<depth,x>::log(CephContext* cct)
+{
+ ldout(cct, 20) << "Log depth=" << depth << " x=" << x << dendl;
+ if (rand() % 2) {
+ do_log<depth+1, x*2> log;
+ log.log(cct);
+ } else {
+ do_log<depth+1, x*2+1> log;
+ log.log(cct);
+ }
+}
+
+std::string recursion(CephContext* cct)
+{
+ ldout(cct, 20) << "Preparing recursion string" << dendl;
+ return "here-recursion";
+}
+
+template<int x> void do_log<12, x>::log(CephContext* cct)
+{
+ if ((rand() % 16) == 0) {
+ ldout(cct, 20) << "End " << recursion(cct) << "x=" << x << dendl;
+ } else {
+ ldout(cct, 20) << "End x=" << x << dendl;
+ }
+}
+
+TEST(Log, Speed_gather)
+{
+ do_log<0,0> start;
+ g_ceph_context->_conf->subsys.set_gather_level(ceph_subsys_context, 30);
+ g_ceph_context->_conf->subsys.set_log_level(ceph_subsys_context, 0);
+ for (int i=0; i<100000;i++) {
+ ldout(g_ceph_context, 20) << "Iteration " << i << dendl;
+ start.log(g_ceph_context);
+ }
+}
+
+TEST(Log, Speed_nogather)
+{
+ do_log<0,0> start;
+ g_ceph_context->_conf->subsys.set_gather_level(ceph_subsys_context, 0);
+ g_ceph_context->_conf->subsys.set_log_level(ceph_subsys_context, 0);
+ for (int i=0; i<100000;i++) {
+ ldout(g_ceph_context, 20) << "Iteration " << i << dendl;
+ start.log(g_ceph_context);
+ }
+}
+
+TEST(Log, GarbleRecovery)
+{
+ static const char* test_file="log_for_moment";
+
+ Log* saved = g_ceph_context->_log;
+ Log log(&g_ceph_context->_conf->subsys);
+ log.start();
+ unlink(test_file);
+ log.set_log_file(test_file);
+ log.reopen_log_file();
+ g_ceph_context->_log = &log;
+
+ std::string long_message(1000,'c');
+ ldout(g_ceph_context, 0) << long_message << dendl;
+ ldout(g_ceph_context, 0) << "Prologue" << (std::streambuf*)nullptr << long_message << dendl;
+ ldout(g_ceph_context, 0) << "Epitaph" << long_message << dendl;
+
+ g_ceph_context->_log = saved;
+ log.flush();
+ log.stop();
+ struct stat file_status;
+ ASSERT_EQ(stat(test_file, &file_status), 0);
+ ASSERT_GT(file_status.st_size, 2000);
+}
+
+int main(int argc, char **argv)
+{
+ vector<const char*> args;
+ argv_to_vec(argc, (const char **)argv, args);
+
+ auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
+ CODE_ENVIRONMENT_UTILITY,
+ CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
+ common_init_finish(g_ceph_context);
+
+ ::testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}