diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/log | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/log')
-rw-r--r-- | src/log/Entry.h | 115 | ||||
-rw-r--r-- | src/log/Log.cc | 604 | ||||
-rw-r--r-- | src/log/Log.h | 152 | ||||
-rw-r--r-- | src/log/LogClock.h | 168 | ||||
-rw-r--r-- | src/log/SubsystemMap.h | 113 | ||||
-rw-r--r-- | src/log/test.cc | 504 |
6 files changed, 1656 insertions, 0 deletions
diff --git a/src/log/Entry.h b/src/log/Entry.h new file mode 100644 index 000000000..3677c8eb9 --- /dev/null +++ b/src/log/Entry.h @@ -0,0 +1,115 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef __CEPH_LOG_ENTRY_H +#define __CEPH_LOG_ENTRY_H + +#include "log/LogClock.h" + +#include "common/StackStringStream.h" + +#include "boost/container/small_vector.hpp" + +#include <pthread.h> + +#include <string_view> + +namespace ceph { +namespace logging { + +class Entry { +public: + using time = log_time; + + Entry() = delete; + Entry(short pr, short sub) : + m_stamp(clock().now()), + m_thread(pthread_self()), + m_prio(pr), + m_subsys(sub) + {} + Entry(const Entry &) = default; + Entry& operator=(const Entry &) = default; + Entry(Entry &&e) = default; + Entry& operator=(Entry &&e) = default; + virtual ~Entry() = default; + + virtual std::string_view strv() const = 0; + virtual std::size_t size() const = 0; + + time m_stamp; + pthread_t m_thread; + short m_prio, m_subsys; + + static log_clock& clock() { + static log_clock clock; + return clock; + } +}; + +/* This should never be moved to the heap! Only allocate this on the stack. See + * CachedStackStringStream for rationale. + */ +class MutableEntry : public Entry { +public: + MutableEntry() = delete; + MutableEntry(short pr, short sub) : Entry(pr, sub) {} + MutableEntry(const MutableEntry&) = delete; + MutableEntry& operator=(const MutableEntry&) = delete; + MutableEntry(MutableEntry&&) = delete; + MutableEntry& operator=(MutableEntry&&) = delete; + ~MutableEntry() override = default; + + std::ostream& get_ostream() { + return *cos; + } + + std::string_view strv() const override { + return cos->strv(); + } + std::size_t size() const override { + return cos->strv().size(); + } + +private: + CachedStackStringStream cos; +}; + +class ConcreteEntry : public Entry { +public: + ConcreteEntry() = delete; + ConcreteEntry(const Entry& e) : Entry(e) { + auto strv = e.strv(); + str.reserve(strv.size()); + str.insert(str.end(), strv.begin(), strv.end()); + } + ConcreteEntry& operator=(const Entry& e) { + Entry::operator=(e); + auto strv = e.strv(); + str.reserve(strv.size()); + str.assign(strv.begin(), strv.end()); + return *this; + } + ConcreteEntry(ConcreteEntry&& e) noexcept : Entry(e), str(std::move(e.str)) {} + ConcreteEntry& operator=(ConcreteEntry&& e) { + Entry::operator=(e); + str = std::move(e.str); + return *this; + } + ~ConcreteEntry() override = default; + + std::string_view strv() const override { + return std::string_view(str.data(), str.size()); + } + std::size_t size() const override { + return str.size(); + } + +private: + boost::container::small_vector<char, 1024> str; +}; + +} +} + +#endif diff --git a/src/log/Log.cc b/src/log/Log.cc new file mode 100644 index 000000000..69f6df82e --- /dev/null +++ b/src/log/Log.cc @@ -0,0 +1,604 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "Log.h" + +#include "common/errno.h" +#include "common/safe_io.h" +#include "common/Graylog.h" +#include "common/Journald.h" +#include "common/valgrind.h" + +#include "include/ceph_assert.h" +#include "include/compat.h" +#include "include/on_exit.h" +#include "include/uuid.h" + +#include "Entry.h" +#include "LogClock.h" +#include "SubsystemMap.h" + +#include <boost/container/vector.hpp> + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <syslog.h> + +#include <algorithm> +#include <iostream> +#include <set> + +#include <fmt/format.h> +#include <fmt/ostream.h> + +#define MAX_LOG_BUF 65536 + +namespace ceph { +namespace logging { + +static OnExitManager exit_callbacks; + +static void log_on_exit(void *p) +{ + Log *l = *(Log **)p; + if (l) + l->flush(); + delete (Log **)p;// Delete allocated pointer (not Log object, the pointer only!) +} + +Log::Log(const SubsystemMap *s) + : m_indirect_this(nullptr), + m_subs(s), + m_recent(DEFAULT_MAX_RECENT) +{ + m_log_buf.reserve(MAX_LOG_BUF); + _configure_stderr(); +} + +Log::~Log() +{ + if (m_indirect_this) { + *m_indirect_this = nullptr; + } + + ceph_assert(!is_started()); + if (m_fd >= 0) { + VOID_TEMP_FAILURE_RETRY(::close(m_fd)); + m_fd = -1; + } +} + +void Log::_configure_stderr() +{ +#ifndef _WIN32 + struct stat info; + if (int rc = fstat(m_fd_stderr, &info); rc == -1) { + std::cerr << "failed to stat stderr: " << cpp_strerror(errno) << std::endl; + return; + } + + if (S_ISFIFO(info.st_mode)) { + /* Set O_NONBLOCK on FIFO stderr file. We want to ensure atomic debug log + * writes so they do not get partially read by e.g. buggy container + * runtimes. See also IEEE Std 1003.1-2017 and Log::_log_stderr below. + * + * This isn't required on Windows. + */ + int flags = fcntl(m_fd_stderr, F_GETFL); + if (flags == -1) { + std::cerr << "failed to get fcntl flags for stderr: " << cpp_strerror(errno) << std::endl; + return; + } + if (!(flags & O_NONBLOCK)) { + flags |= O_NONBLOCK; + flags = fcntl(m_fd_stderr, F_SETFL, flags); + if (flags == -1) { + std::cerr << "failed to set fcntl flags for stderr: " << cpp_strerror(errno) << std::endl; + return; + } + } + do_stderr_poll = true; + } +#endif // !_WIN32 +} + + +/// +void Log::set_coarse_timestamps(bool coarse) { + std::scoped_lock lock(m_flush_mutex); + if (coarse) + Entry::clock().coarsen(); + else + Entry::clock().refine(); +} + +void Log::set_flush_on_exit() +{ + std::scoped_lock lock(m_flush_mutex); + // Make sure we flush on shutdown. We do this by deliberately + // leaking an indirect pointer to ourselves (on_exit() can't + // unregister a callback). This is not racy only becuase we + // assume that exit() won't race with ~Log(). + if (m_indirect_this == NULL) { + m_indirect_this = new (Log*)(this); + exit_callbacks.add_callback(log_on_exit, m_indirect_this); + } +} + +void Log::set_max_new(std::size_t n) +{ + std::scoped_lock lock(m_queue_mutex); + m_max_new = n; +} + +void Log::set_max_recent(std::size_t n) +{ + std::scoped_lock lock(m_flush_mutex); + m_recent.set_capacity(n); +} + +void Log::set_log_file(std::string_view fn) +{ + std::scoped_lock lock(m_flush_mutex); + m_log_file = fn; +} + +void Log::set_log_stderr_prefix(std::string_view p) +{ + std::scoped_lock lock(m_flush_mutex); + m_log_stderr_prefix = p; +} + +void Log::reopen_log_file() +{ + std::scoped_lock lock(m_flush_mutex); + if (!is_started()) { + return; + } + m_flush_mutex_holder = pthread_self(); + if (m_fd >= 0) { + VOID_TEMP_FAILURE_RETRY(::close(m_fd)); + m_fd = -1; + } + if (m_log_file.length()) { + m_fd = ::open(m_log_file.c_str(), O_CREAT|O_WRONLY|O_APPEND|O_CLOEXEC, 0644); + if (m_fd >= 0 && (m_uid || m_gid)) { + if (::fchown(m_fd, m_uid, m_gid) < 0) { + int e = errno; + std::cerr << "failed to chown " << m_log_file << ": " << cpp_strerror(e) + << std::endl; + } + } + } + m_flush_mutex_holder = 0; +} + +void Log::chown_log_file(uid_t uid, gid_t gid) +{ + std::scoped_lock lock(m_flush_mutex); + if (m_fd >= 0) { + int r = ::fchown(m_fd, uid, gid); + if (r < 0) { + r = -errno; + std::cerr << "failed to chown " << m_log_file << ": " << cpp_strerror(r) + << std::endl; + } + } +} + +void Log::set_syslog_level(int log, int crash) +{ + std::scoped_lock lock(m_flush_mutex); + m_syslog_log = log; + m_syslog_crash = crash; +} + +void Log::set_stderr_level(int log, int crash) +{ + std::scoped_lock lock(m_flush_mutex); + m_stderr_log = log; + m_stderr_crash = crash; +} + +void Log::set_graylog_level(int log, int crash) +{ + std::scoped_lock lock(m_flush_mutex); + m_graylog_log = log; + m_graylog_crash = crash; +} + +void Log::start_graylog(const std::string& host, + const uuid_d& fsid) +{ + std::scoped_lock lock(m_flush_mutex); + if (! m_graylog.get()) { + m_graylog = std::make_shared<Graylog>(m_subs, "dlog"); + m_graylog->set_hostname(host); + m_graylog->set_fsid(fsid); + } +} + + +void Log::stop_graylog() +{ + std::scoped_lock lock(m_flush_mutex); + m_graylog.reset(); +} + +void Log::set_journald_level(int log, int crash) +{ + std::scoped_lock lock(m_flush_mutex); + m_journald_log = log; + m_journald_crash = crash; +} + +void Log::start_journald_logger() +{ + std::scoped_lock lock(m_flush_mutex); + if (!m_journald) { + m_journald = std::make_unique<JournaldLogger>(m_subs); + } +} + +void Log::stop_journald_logger() +{ + std::scoped_lock lock(m_flush_mutex); + m_journald.reset(); +} + +void Log::submit_entry(Entry&& e) +{ + std::unique_lock lock(m_queue_mutex); + m_queue_mutex_holder = pthread_self(); + + if (unlikely(m_inject_segv)) + *(volatile int *)(0) = 0xdead; + + // wait for flush to catch up + while (is_started() && + m_new.size() > m_max_new) { + if (m_stop) break; // force addition + m_cond_loggers.wait(lock); + } + + m_new.emplace_back(std::move(e)); + m_cond_flusher.notify_all(); + m_queue_mutex_holder = 0; +} + +void Log::flush() +{ + std::scoped_lock lock1(m_flush_mutex); + m_flush_mutex_holder = pthread_self(); + + { + std::scoped_lock lock2(m_queue_mutex); + m_queue_mutex_holder = pthread_self(); + assert(m_flush.empty()); + m_flush.swap(m_new); + m_cond_loggers.notify_all(); + m_queue_mutex_holder = 0; + } + + _flush(m_flush, false); + m_flush_mutex_holder = 0; +} + +void Log::_log_safe_write(std::string_view sv) +{ + if (m_fd < 0) + return; + int r = safe_write(m_fd, sv.data(), sv.size()); + if (r != m_fd_last_error) { + if (r < 0) + std::cerr << "problem writing to " << m_log_file + << ": " << cpp_strerror(r) + << std::endl; + m_fd_last_error = r; + } +} + +void Log::set_stderr_fd(int fd) +{ + m_fd_stderr = fd; + _configure_stderr(); +} + +void Log::_log_stderr(std::string_view strv) +{ + if (do_stderr_poll) { + auto& prefix = m_log_stderr_prefix; + size_t const len = prefix.size() + strv.size(); + boost::container::small_vector<char, PIPE_BUF> buf; + buf.resize(len+1, '\0'); + memcpy(buf.data(), prefix.c_str(), prefix.size()); + memcpy(buf.data()+prefix.size(), strv.data(), strv.size()); + + char const* const start = buf.data(); + char const* current = start; + while ((size_t)(current-start) < len) { + auto chunk = std::min<ssize_t>(PIPE_BUF, len-(ssize_t)(current-start)); + while (1) { + ssize_t rc = write(m_fd_stderr, current, chunk); + if (rc == chunk) { + current += chunk; + break; + } else if (rc > 0) { + /* According to IEEE Std 1003.1-2017, this cannot happen: + * + * Write requests to a pipe or FIFO shall be handled in the same way as a regular file with the following exceptions: + * ... + * If the O_NONBLOCK flag is set ... + * ... + * A write request for {PIPE_BUF} or fewer bytes shall have the + * following effect: if there is sufficient space available in + * the pipe, write() shall transfer all the data and return the + * number of bytes requested. Otherwise, write() shall transfer + * no data and return -1 with errno set to [EAGAIN]. + * + * In any case, handle misbehavior gracefully by incrementing current. + */ + current += rc; + break; + } else if (rc == -1) { + if (errno == EAGAIN) { + struct pollfd pfd[1]; + pfd[0].fd = m_fd_stderr; + pfd[0].events = POLLOUT; + poll(pfd, 1, -1); + /* ignore errors / success, just retry the write */ + } else if (errno == EINTR) { + continue; + } else { + /* some other kind of error, no point logging if stderr writes fail */ + return; + } + } + } + } + } else { + fmt::print(std::cerr, "{}{}", m_log_stderr_prefix, strv); + } +} + +void Log::_flush_logbuf() +{ + if (m_log_buf.size()) { + _log_safe_write(std::string_view(m_log_buf.data(), m_log_buf.size())); + m_log_buf.resize(0); + } +} + +void Log::_flush(EntryVector& t, bool crash) +{ + long len = 0; + if (t.empty()) { + assert(m_log_buf.empty()); + return; + } + if (crash) { + len = t.size(); + } + for (auto& e : t) { + auto prio = e.m_prio; + auto stamp = e.m_stamp; + auto sub = e.m_subsys; + auto thread = e.m_thread; + auto str = e.strv(); + + bool should_log = crash || m_subs->get_log_level(sub) >= prio; + bool do_fd = m_fd >= 0 && should_log; + bool do_syslog = m_syslog_crash >= prio && should_log; + bool do_stderr = m_stderr_crash >= prio && should_log; + bool do_graylog2 = m_graylog_crash >= prio && should_log; + bool do_journald = m_journald_crash >= prio && should_log; + + if (do_fd || do_syslog || do_stderr) { + const std::size_t cur = m_log_buf.size(); + std::size_t used = 0; + const std::size_t allocated = e.size() + 80; + m_log_buf.resize(cur + allocated); + + char* const start = m_log_buf.data(); + char* pos = start + cur; + + if (crash) { + used += (std::size_t)snprintf(pos + used, allocated - used, "%6ld> ", -(--len)); + } + used += (std::size_t)append_time(stamp, pos + used, allocated - used); + used += (std::size_t)snprintf(pos + used, allocated - used, " %lx %2d ", (unsigned long)thread, prio); + memcpy(pos + used, str.data(), str.size()); + used += str.size(); + pos[used] = '\0'; + ceph_assert((used + 1 /* '\n' */) < allocated); + + if (do_syslog) { + syslog(LOG_USER|LOG_INFO, "%s", pos); + } + + /* now add newline */ + pos[used++] = '\n'; + + if (do_stderr) { + _log_stderr(std::string_view(pos, used)); + } + + if (do_fd) { + m_log_buf.resize(cur + used); + } else { + m_log_buf.resize(0); + } + + if (m_log_buf.size() > MAX_LOG_BUF) { + _flush_logbuf(); + } + } + + if (do_graylog2 && m_graylog) { + m_graylog->log_entry(e); + } + + if (do_journald && m_journald) { + m_journald->log_entry(e); + } + + m_recent.push_back(std::move(e)); + } + t.clear(); + + _flush_logbuf(); +} + +void Log::_log_message(std::string_view s, bool crash) +{ + if (m_fd >= 0) { + std::string b = fmt::format("{}\n", s); + int r = safe_write(m_fd, b.data(), b.size()); + if (r < 0) + std::cerr << "problem writing to " << m_log_file << ": " << cpp_strerror(r) << std::endl; + } + if ((crash ? m_syslog_crash : m_syslog_log) >= 0) { + syslog(LOG_USER|LOG_INFO, "%.*s", static_cast<int>(s.size()), s.data()); + } + + if ((crash ? m_stderr_crash : m_stderr_log) >= 0) { + std::cerr << s << std::endl; + } +} + +template<typename T> +static uint64_t tid_to_int(T tid) +{ + if constexpr (std::is_pointer_v<T>) { + return reinterpret_cast<std::uintptr_t>(tid); + } else { + return tid; + } +} + +void Log::dump_recent() +{ + std::scoped_lock lock1(m_flush_mutex); + m_flush_mutex_holder = pthread_self(); + + { + std::scoped_lock lock2(m_queue_mutex); + m_queue_mutex_holder = pthread_self(); + assert(m_flush.empty()); + m_flush.swap(m_new); + m_queue_mutex_holder = 0; + } + + _flush(m_flush, false); + + _log_message("--- begin dump of recent events ---", true); + std::set<pthread_t> recent_pthread_ids; + { + EntryVector t; + t.insert(t.end(), std::make_move_iterator(m_recent.begin()), std::make_move_iterator(m_recent.end())); + m_recent.clear(); + for (const auto& e : t) { + recent_pthread_ids.emplace(e.m_thread); + } + _flush(t, true); + } + + _log_message("--- logging levels ---", true); + for (const auto& p : m_subs->m_subsys) { + _log_message(fmt::format(" {:2d}/{:2d} {}", + p.log_level, p.gather_level, p.name), true); + } + _log_message(fmt::format(" {:2d}/{:2d} (syslog threshold)", + m_syslog_log, m_syslog_crash), true); + _log_message(fmt::format(" {:2d}/{:2d} (stderr threshold)", + m_stderr_log, m_stderr_crash), true); + + _log_message("--- pthread ID / name mapping for recent threads ---", true); + for (const auto pthread_id : recent_pthread_ids) + { + char pthread_name[16] = {0}; //limited by 16B include terminating null byte. + ceph_pthread_getname(pthread_id, pthread_name, sizeof(pthread_name)); + // we want the ID to be printed in the same format as we use for a log entry. + // The reason is easier grepping. + _log_message(fmt::format(" {:x} / {}", + tid_to_int(pthread_id), pthread_name), true); + } + + _log_message(fmt::format(" max_recent {:9}", m_recent.capacity()), true); + _log_message(fmt::format(" max_new {:9}", m_max_new), true); + _log_message(fmt::format(" log_file {}", m_log_file), true); + + _log_message("--- end dump of recent events ---", true); + + assert(m_log_buf.empty()); + + m_flush_mutex_holder = 0; +} + +void Log::start() +{ + ceph_assert(!is_started()); + { + std::scoped_lock lock(m_queue_mutex); + m_stop = false; + } + create("log"); +} + +void Log::stop() +{ + if (is_started()) { + { + std::scoped_lock lock(m_queue_mutex); + m_stop = true; + m_cond_flusher.notify_one(); + m_cond_loggers.notify_all(); + } + join(); + } +} + +void *Log::entry() +{ + reopen_log_file(); + { + std::unique_lock lock(m_queue_mutex); + m_queue_mutex_holder = pthread_self(); + while (!m_stop) { + if (!m_new.empty()) { + m_queue_mutex_holder = 0; + lock.unlock(); + flush(); + lock.lock(); + m_queue_mutex_holder = pthread_self(); + continue; + } + + m_cond_flusher.wait(lock); + } + m_queue_mutex_holder = 0; + } + flush(); + return NULL; +} + +bool Log::is_inside_log_lock() +{ + return + pthread_self() == m_queue_mutex_holder || + pthread_self() == m_flush_mutex_holder; +} + +void Log::inject_segv() +{ + m_inject_segv = true; +} + +void Log::reset_segv() +{ + m_inject_segv = false; +} + +} // ceph::logging:: +} // ceph:: diff --git a/src/log/Log.h b/src/log/Log.h new file mode 100644 index 000000000..3a60937af --- /dev/null +++ b/src/log/Log.h @@ -0,0 +1,152 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef __CEPH_LOG_LOG_H +#define __CEPH_LOG_LOG_H + +#include <boost/circular_buffer.hpp> + +#include <condition_variable> +#include <memory> +#include <mutex> +#include <queue> +#include <string> +#include <string_view> + +#include "common/Thread.h" +#include "common/likely.h" + +#include "log/Entry.h" + +#include <unistd.h> + +struct uuid_d; + +namespace ceph { +namespace logging { + +class Graylog; +class JournaldLogger; +class SubsystemMap; + +class Log : private Thread +{ +public: + using Thread::is_started; + + Log(const SubsystemMap *s); + ~Log() override; + + void set_flush_on_exit(); + + void set_coarse_timestamps(bool coarse); + void set_max_new(std::size_t n); + void set_max_recent(std::size_t n); + void set_log_file(std::string_view fn); + void reopen_log_file(); + void chown_log_file(uid_t uid, gid_t gid); + void set_log_stderr_prefix(std::string_view p); + void set_stderr_fd(int fd); + + void flush(); + + void dump_recent(); + + void set_syslog_level(int log, int crash); + void set_stderr_level(int log, int crash); + void set_graylog_level(int log, int crash); + + void start_graylog(const std::string& host, + const uuid_d& fsid); + void stop_graylog(); + + void set_journald_level(int log, int crash); + + void start_journald_logger(); + void stop_journald_logger(); + + std::shared_ptr<Graylog> graylog() { return m_graylog; } + + void submit_entry(Entry&& e); + + void start(); + void stop(); + + /// true if the log lock is held by our thread + bool is_inside_log_lock(); + + /// induce a segv on the next log event + void inject_segv(); + void reset_segv(); + +protected: + using EntryVector = std::vector<ConcreteEntry>; + + virtual void _flush(EntryVector& q, bool crash); + +private: + using EntryRing = boost::circular_buffer<ConcreteEntry>; + + static const std::size_t DEFAULT_MAX_NEW = 100; + static const std::size_t DEFAULT_MAX_RECENT = 10000; + + Log **m_indirect_this; + + const SubsystemMap *m_subs; + + std::mutex m_queue_mutex; + std::mutex m_flush_mutex; + std::condition_variable m_cond_loggers; + std::condition_variable m_cond_flusher; + + pthread_t m_queue_mutex_holder; + pthread_t m_flush_mutex_holder; + + EntryVector m_new; ///< new entries + EntryRing m_recent; ///< recent (less new) entries we've already written at low detail + EntryVector m_flush; ///< entries to be flushed (here to optimize heap allocations) + + std::string m_log_file; + int m_fd = -1; + uid_t m_uid = 0; + gid_t m_gid = 0; + + int m_fd_stderr = STDERR_FILENO; + + int m_fd_last_error = 0; ///< last error we say writing to fd (if any) + + int m_syslog_log = -2, m_syslog_crash = -2; + int m_stderr_log = -1, m_stderr_crash = -1; + int m_graylog_log = -3, m_graylog_crash = -3; + int m_journald_log = -3, m_journald_crash = -3; + + std::string m_log_stderr_prefix; + bool do_stderr_poll = false; + + std::shared_ptr<Graylog> m_graylog; + std::unique_ptr<JournaldLogger> m_journald; + + std::vector<char> m_log_buf; + + bool m_stop = false; + + std::size_t m_max_new = DEFAULT_MAX_NEW; + + bool m_inject_segv = false; + + void *entry() override; + + void _log_safe_write(std::string_view sv); + void _flush_logbuf(); + void _log_message(std::string_view s, bool crash); + void _configure_stderr(); + void _log_stderr(std::string_view strv); + + + +}; + +} +} + +#endif diff --git a/src/log/LogClock.h b/src/log/LogClock.h new file mode 100644 index 000000000..c7a4f33d6 --- /dev/null +++ b/src/log/LogClock.h @@ -0,0 +1,168 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LOG_CLOCK_H +#define CEPH_LOG_CLOCK_H + +#include <cstdio> +#include <chrono> +#include <ctime> +#include <sys/time.h> + +#include "include/ceph_assert.h" +#include "common/ceph_time.h" + +#ifndef HAVE_SUSECONDS_T +typedef long suseconds_t; +#endif + +namespace ceph { +namespace logging { +namespace _logclock { +// Because the underlying representations of a duration can be any +// arithmetic type we wish, slipping a coarseness tag there is the +// least hacky way to tag them. I'd also considered doing bit-stealing +// and just setting the low bit of the representation unconditionally +// to mark it as fine, BUT that would cut our nanosecond precision in +// half which sort of obviates the point of 'fine'…admittedly real +// computers probably don't care. More to the point it wouldn't be +// durable under arithmetic unless we wrote a whole class to support +// it /anyway/, and if I'm going to do that I may as well add a bool. + +// (Yes I know we don't do arithmetic on log timestamps, but I don't +// want everything to suddenly break because someone did something +// that the std::chrono::timepoint contract actually supports.) +struct taggedrep { + uint64_t count; + bool coarse; + + explicit taggedrep(uint64_t count) : count(count), coarse(true) {} + taggedrep(uint64_t count, bool coarse) : count(count), coarse(coarse) {} + + explicit operator uint64_t() { + return count; + } +}; + +// Proper significant figure support would be a bit excessive. Also +// we'd have to know the precision of the clocks on Linux and FreeBSD +// and whatever else we want to support. +inline taggedrep operator +(const taggedrep& l, const taggedrep& r) { + return { l.count + r.count, l.coarse || r.coarse }; +} +inline taggedrep operator -(const taggedrep& l, const taggedrep& r) { + return { l.count - r.count, l.coarse || r.coarse }; +} +inline taggedrep operator *(const taggedrep& l, const taggedrep& r) { + return { l.count * r.count, l.coarse || r.coarse }; +} +inline taggedrep operator /(const taggedrep& l, const taggedrep& r) { + return { l.count / r.count, l.coarse || r.coarse }; +} +inline taggedrep operator %(const taggedrep& l, const taggedrep& r) { + return { l.count % r.count, l.coarse || r.coarse }; +} + +// You can compare coarse and fine time. You shouldn't do so in any +// case where ordering actually MATTERS but in practice people won't +// actually ping-pong their logs back and forth between them. +inline bool operator ==(const taggedrep& l, const taggedrep& r) { + return l.count == r.count; +} +inline bool operator !=(const taggedrep& l, const taggedrep& r) { + return l.count != r.count; +} +inline bool operator <(const taggedrep& l, const taggedrep& r) { + return l.count < r.count; +} +inline bool operator <=(const taggedrep& l, const taggedrep& r) { + return l.count <= r.count; +} +inline bool operator >=(const taggedrep& l, const taggedrep& r) { + return l.count >= r.count; +} +inline bool operator >(const taggedrep& l, const taggedrep& r) { + return l.count > r.count; +} +} +class log_clock { +public: + using rep = _logclock::taggedrep; + using period = std::nano; + using duration = std::chrono::duration<rep, period>; + // The second template parameter defaults to the clock's duration + // type. + using time_point = std::chrono::time_point<log_clock>; + static constexpr const bool is_steady = false; + + time_point now() noexcept { + return appropriate_now(); + } + + void coarsen() { + appropriate_now = coarse_now; + } + + void refine() { + appropriate_now = fine_now; + } + + // Since our formatting is done in microseconds and we're using it + // anyway, we may as well keep this one + static timeval to_timeval(time_point t) { + auto rep = t.time_since_epoch().count(); + timespan ts(rep.count); + #ifndef _WIN32 + return { static_cast<time_t>(std::chrono::duration_cast<std::chrono::seconds>(ts).count()), + static_cast<suseconds_t>(std::chrono::duration_cast<std::chrono::microseconds>( + ts % std::chrono::seconds(1)).count()) }; + #else + return { static_cast<long>(std::chrono::duration_cast<std::chrono::seconds>(ts).count()), + static_cast<long>(std::chrono::duration_cast<std::chrono::microseconds>( + ts % std::chrono::seconds(1)).count()) }; + #endif + } +private: + static time_point coarse_now() { + return time_point( + duration(_logclock::taggedrep(coarse_real_clock::now() + .time_since_epoch().count(), true))); + } + static time_point fine_now() { + return time_point( + duration(_logclock::taggedrep(real_clock::now() + .time_since_epoch().count(), false))); + } + time_point(*appropriate_now)() = coarse_now; +}; +using log_time = log_clock::time_point; +inline int append_time(const log_time& t, char *out, int outlen) { + bool coarse = t.time_since_epoch().count().coarse; + auto tv = log_clock::to_timeval(t); + std::tm bdt; + time_t t_sec = tv.tv_sec; + localtime_r(&t_sec, &bdt); + char tz[32] = { 0 }; + strftime(tz, sizeof(tz), "%z", &bdt); + + int r; + if (coarse) { + r = std::snprintf(out, outlen, "%04d-%02d-%02dT%02d:%02d:%02d.%03ld%s", + bdt.tm_year + 1900, bdt.tm_mon + 1, bdt.tm_mday, + bdt.tm_hour, bdt.tm_min, bdt.tm_sec, + static_cast<long>(tv.tv_usec / 1000), tz); + } else { + r = std::snprintf(out, outlen, "%04d-%02d-%02dT%02d:%02d:%02d.%06ld%s", + bdt.tm_year + 1900, bdt.tm_mon + 1, bdt.tm_mday, + bdt.tm_hour, bdt.tm_min, bdt.tm_sec, + static_cast<long>(tv.tv_usec), tz); + } + // Since our caller just adds the return value to something without + // checking it… + ceph_assert(r >= 0); + return r; +} +} +} + +#endif diff --git a/src/log/SubsystemMap.h b/src/log/SubsystemMap.h new file mode 100644 index 000000000..4d33ab3b1 --- /dev/null +++ b/src/log/SubsystemMap.h @@ -0,0 +1,113 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LOG_SUBSYSTEMS +#define CEPH_LOG_SUBSYSTEMS + +#include <string> +#include <vector> +#include <algorithm> + +#include "common/likely.h" +#include "common/subsys_types.h" + +#include "include/ceph_assert.h" + +namespace ceph { +namespace logging { + +class SubsystemMap { + // Access to the current gathering levels must be *FAST* as they are + // read over and over from all places in the code (via should_gather() + // by i.e. dout). + std::array<uint8_t, ceph_subsys_get_num()> m_gather_levels; + + // The rest. Should be as small as possible to not unnecessarily + // enlarge md_config_t and spread it other elements across cache + // lines. Access can be slow. + std::vector<ceph_subsys_item_t> m_subsys; + + friend class Log; + +public: + SubsystemMap() { + constexpr auto s = ceph_subsys_get_as_array(); + m_subsys.reserve(s.size()); + + std::size_t i = 0; + for (const ceph_subsys_item_t& item : s) { + m_subsys.emplace_back(item); + m_gather_levels[i++] = std::max(item.log_level, item.gather_level); + } + } + + constexpr static std::size_t get_num() { + return ceph_subsys_get_num(); + } + + constexpr static std::size_t get_max_subsys_len() { + return ceph_subsys_max_name_length(); + } + + int get_log_level(unsigned subsys) const { + if (subsys >= get_num()) + subsys = 0; + return m_subsys[subsys].log_level; + } + + int get_gather_level(unsigned subsys) const { + if (subsys >= get_num()) + subsys = 0; + return m_subsys[subsys].gather_level; + } + + // TODO(rzarzynski): move to string_view? + constexpr const char* get_name(unsigned subsys) const { + if (subsys >= get_num()) + subsys = 0; + return ceph_subsys_get_as_array()[subsys].name; + } + + template <unsigned SubV, int LvlV> + bool should_gather() const { + static_assert(SubV < get_num(), "wrong subsystem ID"); + static_assert(LvlV >= -1 && LvlV <= 200); + + if constexpr (LvlV <= 0) { + // handle the -1 and 0 levels entirely at compile-time. + // Such debugs are intended to be gathered regardless even + // of the user configuration. + return true; + } else { + // we expect that setting level different than the default + // is rather unusual. + return expect(LvlV <= static_cast<int>(m_gather_levels[SubV]), + LvlV <= ceph_subsys_get_max_default_level(SubV)); + } + } + bool should_gather(const unsigned sub, int level) const { + ceph_assert(sub < m_subsys.size()); + return level <= static_cast<int>(m_gather_levels[sub]); + } + + void set_log_level(unsigned subsys, uint8_t log) + { + ceph_assert(subsys < m_subsys.size()); + m_subsys[subsys].log_level = log; + m_gather_levels[subsys] = \ + std::max(log, m_subsys[subsys].gather_level); + } + + void set_gather_level(unsigned subsys, uint8_t gather) + { + ceph_assert(subsys < m_subsys.size()); + m_subsys[subsys].gather_level = gather; + m_gather_levels[subsys] = \ + std::max(m_subsys[subsys].log_level, gather); + } +}; + +} +} + +#endif diff --git a/src/log/test.cc b/src/log/test.cc new file mode 100644 index 000000000..08ba5bff6 --- /dev/null +++ b/src/log/test.cc @@ -0,0 +1,504 @@ +#include <gtest/gtest.h> + +#include "log/Log.h" +#include "common/Clock.h" +#include "include/coredumpctl.h" +#include "SubsystemMap.h" + +#include "global/global_init.h" +#include "common/ceph_argparse.h" +#include "global/global_context.h" +#include "common/dout.h" + +#include <unistd.h> + +#include <limits.h> + +using namespace std; +using namespace ceph::logging; + +TEST(Log, Simple) +{ + SubsystemMap subs; + subs.set_log_level(0, 10); + subs.set_gather_level(0, 10); + + subs.set_log_level(1, 20); + subs.set_gather_level(1, 1); + + subs.set_log_level(2, 20); + subs.set_gather_level(2, 2); + + subs.set_log_level(3, 10); + subs.set_gather_level(3, 3); + + Log log(&subs); + log.start(); + + log.set_log_file("foo"); + log.reopen_log_file(); + + log.set_stderr_level(5, -1); + + + for (int i=0; i<100; i++) { + int sys = i % 4; + int l = 5 + (i%4); + if (subs.should_gather(sys, l)) { + MutableEntry e(l, sys); + log.submit_entry(std::move(e)); + } + } + + log.flush(); + + log.dump_recent(); + + log.stop(); +} + +TEST(Log, ReuseBad) +{ + SubsystemMap subs; + subs.set_log_level(1, 1); + subs.set_gather_level(1, 1); + Log log(&subs); + log.start(); + log.set_log_file("foo"); + log.reopen_log_file(); + + const int l = 0; + { + MutableEntry e(l, 1); + auto& out = e.get_ostream(); + out << (std::streambuf*)nullptr; + EXPECT_TRUE(out.bad()); // writing nullptr to a stream sets its badbit + log.submit_entry(std::move(e)); + } + { + MutableEntry e(l, 1); + auto& out = e.get_ostream(); + EXPECT_FALSE(out.bad()); // should not see failures from previous log entry + out << "hello world"; + log.submit_entry(std::move(e)); + } + + log.flush(); + log.stop(); +} + +int many = 10000; + +TEST(Log, ManyNoGather) +{ + SubsystemMap subs; + subs.set_log_level(1, 1); + subs.set_gather_level(1, 1); + Log log(&subs); + log.start(); + log.set_log_file("big"); + log.reopen_log_file(); + for (int i=0; i<many; i++) { + int l = 10; + if (subs.should_gather(1, l)) + log.submit_entry(MutableEntry(1, 0)); + } + log.flush(); + log.stop(); +} + + +TEST(Log, ManyGatherLog) +{ + SubsystemMap subs; + subs.set_log_level(1, 20); + subs.set_gather_level(1, 10); + Log log(&subs); + log.start(); + log.set_log_file("big"); + log.reopen_log_file(); + for (int i=0; i<many; i++) { + int l = 10; + if (subs.should_gather(1, l)) { + MutableEntry e(l, 1); + e.get_ostream() << "this is a long string asdf asdf asdf asdf asdf asdf asd fasd fasdf "; + log.submit_entry(std::move(e)); + } + } + log.flush(); + log.stop(); +} + +TEST(Log, ManyGatherLogStackSpillover) +{ + SubsystemMap subs; + subs.set_log_level(1, 20); + subs.set_gather_level(1, 10); + Log log(&subs); + log.start(); + log.set_log_file("big"); + log.reopen_log_file(); + for (int i=0; i<many; i++) { + int l = 10; + if (subs.should_gather(1, l)) { + MutableEntry e(l, 1); + auto& s = e.get_ostream(); + s << "foo"; + s << std::string(sizeof(e) * 2, '-'); + log.submit_entry(std::move(e)); + } + } + log.flush(); + log.stop(); +} + +TEST(Log, ManyGather) +{ + SubsystemMap subs; + subs.set_log_level(1, 20); + subs.set_gather_level(1, 1); + Log log(&subs); + log.start(); + log.set_log_file("big"); + log.reopen_log_file(); + for (int i=0; i<many; i++) { + int l = 10; + if (subs.should_gather(1, l)) + log.submit_entry(MutableEntry(l, 1)); + } + log.flush(); + log.stop(); +} + +static void readpipe(int fd, int verify) +{ + while (1) { + /* Use larger buffer on receiver as Linux will allow pipes buffers to + * exceed PIPE_BUF. We can't avoid tearing due to small read buffers from + * the Ceph side. + */ + + char buf[65536] = ""; + int rc = read(fd, buf, (sizeof buf) - 1); + if (rc == 0) { + _exit(0); + } else if (rc == -1) { + _exit(1); + } else if (rc > 0) { + if (verify) { + char* p = strrchr(buf, '\n'); + /* verify no torn writes */ + if (p == NULL) { + _exit(2); + } else if (p[1] != '\0') { + write(2, buf, strlen(buf)); + _exit(3); + } + } + } else _exit(100); + usleep(500); + } +} + +TEST(Log, StderrPipeAtomic) +{ + int pfd[2] = {-1, -1}; + int rc = pipe(pfd); + ASSERT_EQ(rc, 0); + pid_t pid = fork(); + if (pid == 0) { + close(pfd[1]); + readpipe(pfd[0], 1); + } else if (pid == (pid_t)-1) { + ASSERT_EQ(0, 1); + } + close(pfd[0]); + + SubsystemMap subs; + subs.set_log_level(1, 20); + subs.set_gather_level(1, 10); + Log log(&subs); + log.start(); + log.set_log_file(""); + log.reopen_log_file(); + log.set_stderr_fd(pfd[1]); + log.set_stderr_level(1, 20); + /* -128 for prefix space */ + for (int i = 0; i < PIPE_BUF-128; i++) { + MutableEntry e(1, 1); + auto& s = e.get_ostream(); + for (int j = 0; j < i; j++) { + char c = 'a'; + c += (j % 26); + s << c; + } + log.submit_entry(std::move(e)); + } + log.flush(); + log.stop(); + close(pfd[1]); + int status; + pid_t waited = waitpid(pid, &status, 0); + ASSERT_EQ(pid, waited); + ASSERT_NE(WIFEXITED(status), 0); + ASSERT_EQ(WEXITSTATUS(status), 0); +} + +TEST(Log, StderrPipeBig) +{ + int pfd[2] = {-1, -1}; + int rc = pipe(pfd); + ASSERT_EQ(rc, 0); + pid_t pid = fork(); + if (pid == 0) { + /* no verification as some reads will be torn due to size > PIPE_BUF */ + close(pfd[1]); + readpipe(pfd[0], 0); + } else if (pid == (pid_t)-1) { + ASSERT_EQ(0, 1); + } + close(pfd[0]); + + SubsystemMap subs; + subs.set_log_level(1, 20); + subs.set_gather_level(1, 10); + Log log(&subs); + log.start(); + log.set_log_file(""); + log.reopen_log_file(); + log.set_stderr_fd(pfd[1]); + log.set_stderr_level(1, 20); + /* -128 for prefix space */ + for (int i = 0; i < PIPE_BUF*2; i++) { + MutableEntry e(1, 1); + auto& s = e.get_ostream(); + for (int j = 0; j < i; j++) { + char c = 'a'; + c += (j % 26); + s << c; + } + log.submit_entry(std::move(e)); + } + log.flush(); + log.stop(); + close(pfd[1]); + int status; + pid_t waited = waitpid(pid, &status, 0); + ASSERT_EQ(pid, waited); + ASSERT_NE(WIFEXITED(status), 0); + ASSERT_EQ(WEXITSTATUS(status), 0); +} + +void do_segv() +{ + SubsystemMap subs; + subs.set_log_level(1, 20); + subs.set_gather_level(1, 1); + Log log(&subs); + log.start(); + log.set_log_file("big"); + log.reopen_log_file(); + + log.inject_segv(); + MutableEntry e(10, 1); + { + PrCtl unset_dumpable; + log.submit_entry(std::move(e)); // this should segv + } + + log.flush(); + log.stop(); +} + +TEST(Log, InternalSegv) +{ + ASSERT_DEATH(do_segv(), ".*"); +} + +TEST(Log, LargeLog) +{ + SubsystemMap subs; + subs.set_log_level(1, 20); + subs.set_gather_level(1, 10); + Log log(&subs); + log.start(); + log.set_log_file("big"); + log.reopen_log_file(); + int l = 10; + { + MutableEntry e(l, 1); + std::string msg(10000000, 'a'); + e.get_ostream() << msg; + log.submit_entry(std::move(e)); + } + log.flush(); + log.stop(); +} + +TEST(Log, LargeFromSmallLog) +{ + SubsystemMap subs; + subs.set_log_level(1, 20); + subs.set_gather_level(1, 10); + Log log(&subs); + log.start(); + log.set_log_file("big"); + log.reopen_log_file(); + int l = 10; + { + MutableEntry e(l, 1); + for (int i = 0; i < 1000000; i++) { + std::string msg(10, 'a'); + e.get_ostream() << msg; + } + log.submit_entry(std::move(e)); + } + log.flush(); + log.stop(); +} + +// Make sure nothing bad happens when we switch + +TEST(Log, TimeSwitch) +{ + SubsystemMap subs; + subs.set_log_level(1, 20); + subs.set_gather_level(1, 10); + Log log(&subs); + log.start(); + log.set_log_file("time_switch_log"); + log.reopen_log_file(); + int l = 10; + bool coarse = true; + for (auto i = 0U; i < 300; ++i) { + MutableEntry e(l, 1); + e.get_ostream() << "SQUID THEFT! PUNISHABLE BY DEATH!"; + log.submit_entry(std::move(e)); + if (i % 50) + log.set_coarse_timestamps(coarse = !coarse); + } + log.flush(); + log.stop(); +} + +TEST(Log, TimeFormat) +{ + static constexpr auto buflen = 128u; + char buf[buflen]; + ceph::logging::log_clock clock; + { + clock.coarsen(); + auto t = clock.now(); + ceph::logging::append_time(t, buf, buflen); + auto c = std::strrchr(buf, '.'); + ASSERT_NE(c, nullptr); + ASSERT_EQ(8u, strlen(c + 1)); + } + { + clock.refine(); + auto t = clock.now(); + ceph::logging::append_time(t, buf, buflen); + auto c = std::strrchr(buf, '.'); + ASSERT_NE(c, nullptr); + ASSERT_EQ(11u, std::strlen(c + 1)); + } +} + +#define dout_subsys ceph_subsys_context + +template <int depth, int x> struct do_log +{ + void log(CephContext* cct); +}; + +template <int x> struct do_log<12, x> +{ + void log(CephContext* cct); +}; + +template<int depth, int x> void do_log<depth,x>::log(CephContext* cct) +{ + ldout(cct, 20) << "Log depth=" << depth << " x=" << x << dendl; + if (rand() % 2) { + do_log<depth+1, x*2> log; + log.log(cct); + } else { + do_log<depth+1, x*2+1> log; + log.log(cct); + } +} + +std::string recursion(CephContext* cct) +{ + ldout(cct, 20) << "Preparing recursion string" << dendl; + return "here-recursion"; +} + +template<int x> void do_log<12, x>::log(CephContext* cct) +{ + if ((rand() % 16) == 0) { + ldout(cct, 20) << "End " << recursion(cct) << "x=" << x << dendl; + } else { + ldout(cct, 20) << "End x=" << x << dendl; + } +} + +TEST(Log, Speed_gather) +{ + do_log<0,0> start; + g_ceph_context->_conf->subsys.set_gather_level(ceph_subsys_context, 30); + g_ceph_context->_conf->subsys.set_log_level(ceph_subsys_context, 0); + for (int i=0; i<100000;i++) { + ldout(g_ceph_context, 20) << "Iteration " << i << dendl; + start.log(g_ceph_context); + } +} + +TEST(Log, Speed_nogather) +{ + do_log<0,0> start; + g_ceph_context->_conf->subsys.set_gather_level(ceph_subsys_context, 0); + g_ceph_context->_conf->subsys.set_log_level(ceph_subsys_context, 0); + for (int i=0; i<100000;i++) { + ldout(g_ceph_context, 20) << "Iteration " << i << dendl; + start.log(g_ceph_context); + } +} + +TEST(Log, GarbleRecovery) +{ + static const char* test_file="log_for_moment"; + + Log* saved = g_ceph_context->_log; + Log log(&g_ceph_context->_conf->subsys); + log.start(); + unlink(test_file); + log.set_log_file(test_file); + log.reopen_log_file(); + g_ceph_context->_log = &log; + + std::string long_message(1000,'c'); + ldout(g_ceph_context, 0) << long_message << dendl; + ldout(g_ceph_context, 0) << "Prologue" << (std::streambuf*)nullptr << long_message << dendl; + ldout(g_ceph_context, 0) << "Epitaph" << long_message << dendl; + + g_ceph_context->_log = saved; + log.flush(); + log.stop(); + struct stat file_status; + ASSERT_EQ(stat(test_file, &file_status), 0); + ASSERT_GT(file_status.st_size, 2000); +} + +int main(int argc, char **argv) +{ + auto args = argv_to_vec(argc, argv); + + auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + common_init_finish(g_ceph_context); + + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} |