diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 15:35:18 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 15:35:18 +0000 |
commit | b750101eb236130cf056c675997decbac904cc49 (patch) | |
tree | a5df1a06754bdd014cb975c051c83b01c9a97532 /src/journal | |
parent | Initial commit. (diff) | |
download | systemd-b750101eb236130cf056c675997decbac904cc49.tar.xz systemd-b750101eb236130cf056c675997decbac904cc49.zip |
Adding upstream version 252.22.upstream/252.22upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/journal')
44 files changed, 12659 insertions, 0 deletions
diff --git a/src/journal/cat.c b/src/journal/cat.c new file mode 100644 index 0000000..350b805 --- /dev/null +++ b/src/journal/cat.c @@ -0,0 +1,168 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "sd-journal.h" + +#include "alloc-util.h" +#include "fd-util.h" +#include "main-func.h" +#include "parse-argument.h" +#include "parse-util.h" +#include "pretty-print.h" +#include "string-util.h" +#include "syslog-util.h" +#include "terminal-util.h" +#include "util.h" + +static const char *arg_identifier = NULL; +static int arg_priority = LOG_INFO; +static int arg_stderr_priority = -1; +static bool arg_level_prefix = true; + +static int help(void) { + _cleanup_free_ char *link = NULL; + int r; + + r = terminal_urlify_man("systemd-cat", "1", &link); + if (r < 0) + return log_oom(); + + printf("%s [OPTIONS...] COMMAND ...\n" + "\n%sExecute process with stdout/stderr connected to the journal.%s\n\n" + " -h --help Show this help\n" + " --version Show package version\n" + " -t --identifier=STRING Set syslog identifier\n" + " -p --priority=PRIORITY Set priority value (0..7)\n" + " --stderr-priority=PRIORITY Set priority value (0..7) used for stderr\n" + " --level-prefix=BOOL Control whether level prefix shall be parsed\n" + "\nSee the %s for details.\n", + program_invocation_short_name, + ansi_highlight(), + ansi_normal(), + link); + + return 0; +} + +static int parse_argv(int argc, char *argv[]) { + + enum { + ARG_VERSION = 0x100, + ARG_STDERR_PRIORITY, + ARG_LEVEL_PREFIX + }; + + static const struct option options[] = { + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, ARG_VERSION }, + { "identifier", required_argument, NULL, 't' }, + { "priority", required_argument, NULL, 'p' }, + { "stderr-priority", required_argument, NULL, ARG_STDERR_PRIORITY }, + { "level-prefix", required_argument, NULL, ARG_LEVEL_PREFIX }, + {} + }; + + int c, r; + + assert(argc >= 0); + assert(argv); + + /* Resetting to 0 forces the invocation of an internal initialization routine of getopt_long() + * that checks for GNU extensions in optstring ('-' or '+' at the beginning). */ + optind = 0; + while ((c = getopt_long(argc, argv, "+ht:p:", options, NULL)) >= 0) + + switch (c) { + + case 'h': + help(); + return 0; + + case ARG_VERSION: + return version(); + + case 't': + if (isempty(optarg)) + arg_identifier = NULL; + else + arg_identifier = optarg; + break; + + case 'p': + arg_priority = log_level_from_string(optarg); + if (arg_priority < 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to parse priority value."); + break; + + case ARG_STDERR_PRIORITY: + arg_stderr_priority = log_level_from_string(optarg); + if (arg_stderr_priority < 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to parse stderr priority value."); + break; + + case ARG_LEVEL_PREFIX: + r = parse_boolean_argument("--level-prefix=", optarg, &arg_level_prefix); + if (r < 0) + return r; + break; + + case '?': + return -EINVAL; + + default: + assert_not_reached(); + } + + return 1; +} + +static int run(int argc, char *argv[]) { + _cleanup_close_ int outfd = -1, errfd = -1, saved_stderr = -1; + int r; + + log_setup(); + + r = parse_argv(argc, argv); + if (r <= 0) + return r; + + outfd = sd_journal_stream_fd(arg_identifier, arg_priority, arg_level_prefix); + if (outfd < 0) + return log_error_errno(outfd, "Failed to create stream fd: %m"); + + if (arg_stderr_priority >= 0 && arg_stderr_priority != arg_priority) { + errfd = sd_journal_stream_fd(arg_identifier, arg_stderr_priority, arg_level_prefix); + if (errfd < 0) + return log_error_errno(errfd, "Failed to create stream fd: %m"); + } + + saved_stderr = fcntl(STDERR_FILENO, F_DUPFD_CLOEXEC, 3); + + r = rearrange_stdio(STDIN_FILENO, outfd, errfd < 0 ? outfd : errfd); /* Invalidates fd on success + error! */ + TAKE_FD(outfd); + TAKE_FD(errfd); + if (r < 0) + return log_error_errno(r, "Failed to rearrange stdout/stderr: %m"); + + if (argc <= optind) + (void) execl("/bin/cat", "/bin/cat", NULL); + else + (void) execvp(argv[optind], argv + optind); + r = -errno; + + /* Let's try to restore a working stderr, so we can print the error message */ + if (saved_stderr >= 0) + (void) dup3(saved_stderr, STDERR_FILENO, 0); + + return log_error_errno(r, "Failed to execute process: %m"); +} + +DEFINE_MAIN_FUNCTION(run); diff --git a/src/journal/fuzz-journald-audit.c b/src/journal/fuzz-journald-audit.c new file mode 100644 index 0000000..6e8e180 --- /dev/null +++ b/src/journal/fuzz-journald-audit.c @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "fuzz.h" +#include "fuzz-journald.h" +#include "journald-audit.h" + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + Server s; + + dummy_server_init(&s, data, size); + process_audit_string(&s, 0, s.buffer, size); + server_done(&s); + + return 0; +} diff --git a/src/journal/fuzz-journald-kmsg.c b/src/journal/fuzz-journald-kmsg.c new file mode 100644 index 0000000..ec22f14 --- /dev/null +++ b/src/journal/fuzz-journald-kmsg.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "fuzz.h" +#include "fuzz-journald.h" +#include "journald-kmsg.h" + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + Server s; + + if (size == 0) + return 0; + + /* We don't want to fill the logs with assert warnings. + * Disable most logging if not running standalone */ + if (!getenv("SYSTEMD_LOG_LEVEL")) + log_set_max_level(LOG_CRIT); + + dummy_server_init(&s, data, size); + dev_kmsg_record(&s, s.buffer, size); + server_done(&s); + + return 0; +} diff --git a/src/journal/fuzz-journald-native-fd.c b/src/journal/fuzz-journald-native-fd.c new file mode 100644 index 0000000..fcfc5df --- /dev/null +++ b/src/journal/fuzz-journald-native-fd.c @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "fd-util.h" +#include "fs-util.h" +#include "fuzz-journald.h" +#include "fuzz.h" +#include "journald-native.h" +#include "memfd-util.h" +#include "process-util.h" +#include "tmpfile-util.h" + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + Server s; + _cleanup_close_ int sealed_fd = -1, unsealed_fd = -1; + _cleanup_(unlink_tempfilep) char name[] = "/tmp/fuzz-journald-native-fd.XXXXXX"; + char *label = NULL; + size_t label_len = 0; + struct ucred ucred; + struct timeval *tv = NULL; + + if (!getenv("SYSTEMD_LOG_LEVEL")) + log_set_max_level(LOG_CRIT); + + dummy_server_init(&s, NULL, 0); + + sealed_fd = memfd_new(NULL); + assert_se(sealed_fd >= 0); + assert_se(write(sealed_fd, data, size) == (ssize_t) size); + assert_se(memfd_set_sealed(sealed_fd) >= 0); + assert_se(lseek(sealed_fd, 0, SEEK_SET) == 0); + ucred = (struct ucred) { + .pid = getpid_cached(), + .uid = geteuid(), + .gid = getegid(), + }; + server_process_native_file(&s, sealed_fd, &ucred, tv, label, label_len); + + unsealed_fd = mkostemp_safe(name); + assert_se(unsealed_fd >= 0); + assert_se(write(unsealed_fd, data, size) == (ssize_t) size); + assert_se(lseek(unsealed_fd, 0, SEEK_SET) == 0); + server_process_native_file(&s, unsealed_fd, &ucred, tv, label, label_len); + + server_done(&s); + + return 0; +} diff --git a/src/journal/fuzz-journald-native.c b/src/journal/fuzz-journald-native.c new file mode 100644 index 0000000..6531c4f --- /dev/null +++ b/src/journal/fuzz-journald-native.c @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "fuzz.h" +#include "fuzz-journald.h" +#include "journald-native.h" + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + fuzz_journald_processing_function(data, size, server_process_native_message); + return 0; +} diff --git a/src/journal/fuzz-journald-stream.c b/src/journal/fuzz-journald-stream.c new file mode 100644 index 0000000..67e990a --- /dev/null +++ b/src/journal/fuzz-journald-stream.c @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <linux/sockios.h> +#include <sys/ioctl.h> +#include <unistd.h> + +#include "fd-util.h" +#include "fuzz.h" +#include "fuzz-journald.h" +#include "journald-stream.h" + +static int stream_fds[2] = { -1, -1 }; + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + Server s; + StdoutStream *stream; + int v; + + if (outside_size_range(size, 1, 65536)) + return 0; + + if (!getenv("SYSTEMD_LOG_LEVEL")) + log_set_max_level(LOG_CRIT); + + assert_se(socketpair(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0, stream_fds) >= 0); + dummy_server_init(&s, NULL, 0); + assert_se(stdout_stream_install(&s, stream_fds[0], &stream) >= 0); + assert_se(write(stream_fds[1], data, size) == (ssize_t) size); + while (ioctl(stream_fds[0], SIOCINQ, &v) == 0 && v) + sd_event_run(s.event, UINT64_MAX); + if (s.n_stdout_streams) + stdout_stream_destroy(stream); + server_done(&s); + stream_fds[1] = safe_close(stream_fds[1]); + + return 0; +} diff --git a/src/journal/fuzz-journald-stream.options b/src/journal/fuzz-journald-stream.options new file mode 100644 index 0000000..678d526 --- /dev/null +++ b/src/journal/fuzz-journald-stream.options @@ -0,0 +1,2 @@ +[libfuzzer] +max_len = 65536 diff --git a/src/journal/fuzz-journald-syslog.c b/src/journal/fuzz-journald-syslog.c new file mode 100644 index 0000000..72ec610 --- /dev/null +++ b/src/journal/fuzz-journald-syslog.c @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "fuzz.h" +#include "fuzz-journald.h" +#include "journald-syslog.h" + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + fuzz_journald_processing_function(data, size, server_process_syslog_message); + return 0; +} diff --git a/src/journal/fuzz-journald.c b/src/journal/fuzz-journald.c new file mode 100644 index 0000000..ff1746e --- /dev/null +++ b/src/journal/fuzz-journald.c @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "alloc-util.h" +#include "fuzz-journald.h" +#include "journald-server.h" +#include "sd-event.h" + +void dummy_server_init(Server *s, const uint8_t *buffer, size_t size) { + *s = (Server) { + .syslog_fd = -1, + .native_fd = -1, + .stdout_fd = -1, + .dev_kmsg_fd = -1, + .audit_fd = -1, + .hostname_fd = -1, + .notify_fd = -1, + .storage = STORAGE_NONE, + .line_max = 64, + }; + assert_se(sd_event_default(&s->event) >= 0); + + if (buffer) { + s->buffer = memdup_suffix0(buffer, size); + assert_se(s->buffer); + } +} + +void fuzz_journald_processing_function( + const uint8_t *data, + size_t size, + void (*f)(Server *s, const char *buf, size_t raw_len, const struct ucred *ucred, const struct timeval *tv, const char *label, size_t label_len) + ) { + Server s; + char *label = NULL; + size_t label_len = 0; + struct ucred *ucred = NULL; + struct timeval *tv = NULL; + + if (size == 0) + return; + + dummy_server_init(&s, data, size); + (*f)(&s, s.buffer, size, ucred, tv, label, label_len); + server_done(&s); +} diff --git a/src/journal/fuzz-journald.h b/src/journal/fuzz-journald.h new file mode 100644 index 0000000..4abb100 --- /dev/null +++ b/src/journal/fuzz-journald.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "journald-server.h" + +void dummy_server_init(Server *s, const uint8_t *buffer, size_t size); + +void fuzz_journald_processing_function( + const uint8_t *data, + size_t size, + void (*f)(Server *s, const char *buf, size_t raw_len, const struct ucred *ucred, const struct timeval *tv, const char *label, size_t label_len) +); diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c new file mode 100644 index 0000000..d81d522 --- /dev/null +++ b/src/journal/journalctl.c @@ -0,0 +1,2761 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> +#include <fcntl.h> +#include <fnmatch.h> +#include <getopt.h> +#include <linux/fs.h> +#include <signal.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/inotify.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "sd-bus.h" +#include "sd-device.h" +#include "sd-journal.h" + +#include "acl-util.h" +#include "alloc-util.h" +#include "bus-error.h" +#include "bus-util.h" +#include "catalog.h" +#include "chase-symlinks.h" +#include "chattr-util.h" +#include "def.h" +#include "dissect-image.h" +#include "fd-util.h" +#include "fileio.h" +#include "format-table.h" +#include "format-util.h" +#include "fs-util.h" +#include "fsprg.h" +#include "glob-util.h" +#include "hostname-util.h" +#include "id128-print.h" +#include "io-util.h" +#include "journal-def.h" +#include "journal-internal.h" +#include "journal-util.h" +#include "journal-vacuum.h" +#include "journal-verify.h" +#include "locale-util.h" +#include "log.h" +#include "logs-show.h" +#include "memory-util.h" +#include "mkdir.h" +#include "mount-util.h" +#include "mountpoint-util.h" +#include "nulstr-util.h" +#include "pager.h" +#include "parse-argument.h" +#include "parse-util.h" +#include "path-util.h" +#include "pcre2-util.h" +#include "pretty-print.h" +#include "qrcode-util.h" +#include "random-util.h" +#include "rlimit-util.h" +#include "set.h" +#include "sigbus.h" +#include "static-destruct.h" +#include "stdio-util.h" +#include "string-table.h" +#include "strv.h" +#include "syslog-util.h" +#include "terminal-util.h" +#include "tmpfile-util.h" +#include "unit-name.h" +#include "user-util.h" +#include "varlink.h" + +#define DEFAULT_FSS_INTERVAL_USEC (15*USEC_PER_MINUTE) +#define PROCESS_INOTIFY_INTERVAL 1024 /* Every 1,024 messages processed */ + +enum { + /* Special values for arg_lines */ + ARG_LINES_DEFAULT = -2, + ARG_LINES_ALL = -1, +}; + +static OutputMode arg_output = OUTPUT_SHORT; +static JsonFormatFlags arg_json_format_flags = JSON_FORMAT_OFF; +static bool arg_utc = false; +static bool arg_follow = false; +static bool arg_full = true; +static bool arg_all = false; +static PagerFlags arg_pager_flags = 0; +static int arg_lines = ARG_LINES_DEFAULT; +static bool arg_no_tail = false; +static bool arg_quiet = false; +static bool arg_merge = false; +static bool arg_boot = false; +static sd_id128_t arg_boot_id = {}; +static int arg_boot_offset = 0; +static bool arg_dmesg = false; +static bool arg_no_hostname = false; +static const char *arg_cursor = NULL; +static const char *arg_cursor_file = NULL; +static const char *arg_after_cursor = NULL; +static bool arg_show_cursor = false; +static const char *arg_directory = NULL; +static char **arg_file = NULL; +static bool arg_file_stdin = false; +static int arg_priorities = 0xFF; +static Set *arg_facilities = NULL; +static char *arg_verify_key = NULL; +#if HAVE_GCRYPT +static usec_t arg_interval = DEFAULT_FSS_INTERVAL_USEC; +static bool arg_force = false; +#endif +static usec_t arg_since = 0, arg_until = 0; +static bool arg_since_set = false, arg_until_set = false; +static char **arg_syslog_identifier = NULL; +static char **arg_system_units = NULL; +static char **arg_user_units = NULL; +static const char *arg_field = NULL; +static bool arg_catalog = false; +static bool arg_reverse = false; +static int arg_journal_type = 0; +static int arg_namespace_flags = 0; +static char *arg_root = NULL; +static char *arg_image = NULL; +static const char *arg_machine = NULL; +static const char *arg_namespace = NULL; +static uint64_t arg_vacuum_size = 0; +static uint64_t arg_vacuum_n_files = 0; +static usec_t arg_vacuum_time = 0; +static char **arg_output_fields = NULL; +static const char *arg_pattern = NULL; +static pcre2_code *arg_compiled_pattern = NULL; +static PatternCompileCase arg_case = PATTERN_COMPILE_CASE_AUTO; + +STATIC_DESTRUCTOR_REGISTER(arg_file, strv_freep); +STATIC_DESTRUCTOR_REGISTER(arg_facilities, set_freep); +STATIC_DESTRUCTOR_REGISTER(arg_verify_key, freep); +STATIC_DESTRUCTOR_REGISTER(arg_syslog_identifier, strv_freep); +STATIC_DESTRUCTOR_REGISTER(arg_system_units, strv_freep); +STATIC_DESTRUCTOR_REGISTER(arg_user_units, strv_freep); +STATIC_DESTRUCTOR_REGISTER(arg_root, freep); +STATIC_DESTRUCTOR_REGISTER(arg_image, freep); +STATIC_DESTRUCTOR_REGISTER(arg_output_fields, strv_freep); +STATIC_DESTRUCTOR_REGISTER(arg_compiled_pattern, pattern_freep); + +static enum { + ACTION_SHOW, + ACTION_NEW_ID128, + ACTION_PRINT_HEADER, + ACTION_SETUP_KEYS, + ACTION_VERIFY, + ACTION_DISK_USAGE, + ACTION_LIST_CATALOG, + ACTION_DUMP_CATALOG, + ACTION_UPDATE_CATALOG, + ACTION_LIST_BOOTS, + ACTION_FLUSH, + ACTION_RELINQUISH_VAR, + ACTION_SYNC, + ACTION_ROTATE, + ACTION_VACUUM, + ACTION_ROTATE_AND_VACUUM, + ACTION_LIST_FIELDS, + ACTION_LIST_FIELD_NAMES, +} arg_action = ACTION_SHOW; + +typedef struct BootId { + sd_id128_t id; + uint64_t first; + uint64_t last; + LIST_FIELDS(struct BootId, boot_list); +} BootId; + +static int add_matches_for_device(sd_journal *j, const char *devpath) { + _cleanup_(sd_device_unrefp) sd_device *device = NULL; + sd_device *d = NULL; + struct stat st; + int r; + + assert(j); + assert(devpath); + + if (!path_startswith(devpath, "/dev/")) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Devpath does not start with /dev/"); + + if (stat(devpath, &st) < 0) + return log_error_errno(errno, "Couldn't stat file: %m"); + + r = sd_device_new_from_stat_rdev(&device, &st); + if (r < 0) + return log_error_errno(r, "Failed to get device from devnum %u:%u: %m", major(st.st_rdev), minor(st.st_rdev)); + + for (d = device; d; ) { + _cleanup_free_ char *match = NULL; + const char *subsys, *sysname, *devnode; + sd_device *parent; + + r = sd_device_get_subsystem(d, &subsys); + if (r < 0) + goto get_parent; + + r = sd_device_get_sysname(d, &sysname); + if (r < 0) + goto get_parent; + + match = strjoin("_KERNEL_DEVICE=+", subsys, ":", sysname); + if (!match) + return log_oom(); + + r = sd_journal_add_match(j, match, 0); + if (r < 0) + return log_error_errno(r, "Failed to add match: %m"); + + if (sd_device_get_devname(d, &devnode) >= 0) { + _cleanup_free_ char *match1 = NULL; + + r = stat(devnode, &st); + if (r < 0) + return log_error_errno(r, "Failed to stat() device node \"%s\": %m", devnode); + + r = asprintf(&match1, "_KERNEL_DEVICE=%c%u:%u", S_ISBLK(st.st_mode) ? 'b' : 'c', major(st.st_rdev), minor(st.st_rdev)); + if (r < 0) + return log_oom(); + + r = sd_journal_add_match(j, match1, 0); + if (r < 0) + return log_error_errno(r, "Failed to add match: %m"); + } + +get_parent: + if (sd_device_get_parent(d, &parent) < 0) + break; + + d = parent; + } + + r = add_match_this_boot(j, arg_machine); + if (r < 0) + return log_error_errno(r, "Failed to add match for the current boot: %m"); + + return 0; +} + +static char *format_timestamp_maybe_utc(char *buf, size_t l, usec_t t) { + + if (arg_utc) + return format_timestamp_style(buf, l, t, TIMESTAMP_UTC); + + return format_timestamp(buf, l, t); +} + +static int parse_boot_descriptor(const char *x, sd_id128_t *boot_id, int *offset) { + sd_id128_t id = SD_ID128_NULL; + int off = 0, r; + + if (streq(x, "all")) { + *boot_id = SD_ID128_NULL; + *offset = 0; + return 0; + } else if (strlen(x) >= SD_ID128_STRING_MAX - 1) { + char *t; + + t = strndupa_safe(x, SD_ID128_STRING_MAX - 1); + r = sd_id128_from_string(t, &id); + if (r >= 0) + x += SD_ID128_STRING_MAX - 1; + + if (!IN_SET(*x, 0, '-', '+')) + return -EINVAL; + + if (*x != 0) { + r = safe_atoi(x, &off); + if (r < 0) + return r; + } + } else { + r = safe_atoi(x, &off); + if (r < 0) + return r; + } + + if (boot_id) + *boot_id = id; + + if (offset) + *offset = off; + + return 1; +} + +static int help_facilities(void) { + if (!arg_quiet) + puts("Available facilities:"); + + for (int i = 0; i < LOG_NFACILITIES; i++) { + _cleanup_free_ char *t = NULL; + + if (log_facility_unshifted_to_string_alloc(i, &t)) + return log_oom(); + puts(t); + } + + return 0; +} + +static int help(void) { + _cleanup_free_ char *link = NULL; + int r; + + pager_open(arg_pager_flags); + + r = terminal_urlify_man("journalctl", "1", &link); + if (r < 0) + return log_oom(); + + printf("%1$s [OPTIONS...] [MATCHES...]\n\n" + "%5$sQuery the journal.%6$s\n\n" + "%3$sSource Options:%4$s\n" + " --system Show the system journal\n" + " --user Show the user journal for the current user\n" + " -M --machine=CONTAINER Operate on local container\n" + " -m --merge Show entries from all available journals\n" + " -D --directory=PATH Show journal files from directory\n" + " --file=PATH Show journal file\n" + " --root=ROOT Operate on files below a root directory\n" + " --image=IMAGE Operate on files in filesystem image\n" + " --namespace=NAMESPACE Show journal data from specified journal namespace\n" + "\n%3$sFiltering Options:%4$s\n" + " -S --since=DATE Show entries not older than the specified date\n" + " -U --until=DATE Show entries not newer than the specified date\n" + " -c --cursor=CURSOR Show entries starting at the specified cursor\n" + " --after-cursor=CURSOR Show entries after the specified cursor\n" + " --cursor-file=FILE Show entries after cursor in FILE and update FILE\n" + " -b --boot[=ID] Show current boot or the specified boot\n" + " -u --unit=UNIT Show logs from the specified unit\n" + " --user-unit=UNIT Show logs from the specified user unit\n" + " -t --identifier=STRING Show entries with the specified syslog identifier\n" + " -p --priority=RANGE Show entries with the specified priority\n" + " --facility=FACILITY... Show entries with the specified facilities\n" + " -g --grep=PATTERN Show entries with MESSAGE matching PATTERN\n" + " --case-sensitive[=BOOL] Force case sensitive or insensitive matching\n" + " -k --dmesg Show kernel message log from the current boot\n" + "\n%3$sOutput Control Options:%4$s\n" + " -o --output=STRING Change journal output mode (short, short-precise,\n" + " short-iso, short-iso-precise, short-full,\n" + " short-monotonic, short-unix, verbose, export,\n" + " json, json-pretty, json-sse, json-seq, cat,\n" + " with-unit)\n" + " --output-fields=LIST Select fields to print in verbose/export/json modes\n" + " -n --lines[=INTEGER] Number of journal entries to show\n" + " -r --reverse Show the newest entries first\n" + " --show-cursor Print the cursor after all the entries\n" + " --utc Express time in Coordinated Universal Time (UTC)\n" + " -x --catalog Add message explanations where available\n" + " --no-hostname Suppress output of hostname field\n" + " --no-full Ellipsize fields\n" + " -a --all Show all fields, including long and unprintable\n" + " -f --follow Follow the journal\n" + " --no-tail Show all lines, even in follow mode\n" + " -q --quiet Do not show info messages and privilege warning\n" + "\n%3$sPager Control Options:%4$s\n" + " --no-pager Do not pipe output into a pager\n" + " -e --pager-end Immediately jump to the end in the pager\n" + "\n%3$sForward Secure Sealing (FSS) Options:%4$s\n" + " --interval=TIME Time interval for changing the FSS sealing key\n" + " --verify-key=KEY Specify FSS verification key\n" + " --force Override of the FSS key pair with --setup-keys\n" + "\n%3$sCommands:%4$s\n" + " -h --help Show this help text\n" + " --version Show package version\n" + " -N --fields List all field names currently used\n" + " -F --field=FIELD List all values that a specified field takes\n" + " --list-boots Show terse information about recorded boots\n" + " --disk-usage Show total disk usage of all journal files\n" + " --vacuum-size=BYTES Reduce disk usage below specified size\n" + " --vacuum-files=INT Leave only the specified number of journal files\n" + " --vacuum-time=TIME Remove journal files older than specified time\n" + " --verify Verify journal file consistency\n" + " --sync Synchronize unwritten journal messages to disk\n" + " --relinquish-var Stop logging to disk, log to temporary file system\n" + " --smart-relinquish-var Similar, but NOP if log directory is on root mount\n" + " --flush Flush all journal data from /run into /var\n" + " --rotate Request immediate rotation of the journal files\n" + " --header Show journal header information\n" + " --list-catalog Show all message IDs in the catalog\n" + " --dump-catalog Show entries in the message catalog\n" + " --update-catalog Update the message catalog database\n" + " --setup-keys Generate a new FSS key pair\n" + "\nSee the %2$s for details.\n", + program_invocation_short_name, + link, + ansi_underline(), + ansi_normal(), + ansi_highlight(), + ansi_normal()); + + return 0; +} + +static int parse_argv(int argc, char *argv[]) { + + enum { + ARG_VERSION = 0x100, + ARG_NO_PAGER, + ARG_NO_FULL, + ARG_NO_TAIL, + ARG_NEW_ID128, + ARG_THIS_BOOT, + ARG_LIST_BOOTS, + ARG_USER, + ARG_SYSTEM, + ARG_ROOT, + ARG_IMAGE, + ARG_HEADER, + ARG_FACILITY, + ARG_SETUP_KEYS, + ARG_FILE, + ARG_INTERVAL, + ARG_VERIFY, + ARG_VERIFY_KEY, + ARG_DISK_USAGE, + ARG_AFTER_CURSOR, + ARG_CURSOR_FILE, + ARG_SHOW_CURSOR, + ARG_USER_UNIT, + ARG_LIST_CATALOG, + ARG_DUMP_CATALOG, + ARG_UPDATE_CATALOG, + ARG_FORCE, + ARG_CASE_SENSITIVE, + ARG_UTC, + ARG_SYNC, + ARG_FLUSH, + ARG_RELINQUISH_VAR, + ARG_SMART_RELINQUISH_VAR, + ARG_ROTATE, + ARG_VACUUM_SIZE, + ARG_VACUUM_FILES, + ARG_VACUUM_TIME, + ARG_NO_HOSTNAME, + ARG_OUTPUT_FIELDS, + ARG_NAMESPACE, + }; + + static const struct option options[] = { + { "help", no_argument, NULL, 'h' }, + { "version" , no_argument, NULL, ARG_VERSION }, + { "no-pager", no_argument, NULL, ARG_NO_PAGER }, + { "pager-end", no_argument, NULL, 'e' }, + { "follow", no_argument, NULL, 'f' }, + { "force", no_argument, NULL, ARG_FORCE }, + { "output", required_argument, NULL, 'o' }, + { "all", no_argument, NULL, 'a' }, + { "full", no_argument, NULL, 'l' }, + { "no-full", no_argument, NULL, ARG_NO_FULL }, + { "lines", optional_argument, NULL, 'n' }, + { "no-tail", no_argument, NULL, ARG_NO_TAIL }, + { "new-id128", no_argument, NULL, ARG_NEW_ID128 }, /* deprecated */ + { "quiet", no_argument, NULL, 'q' }, + { "merge", no_argument, NULL, 'm' }, + { "this-boot", no_argument, NULL, ARG_THIS_BOOT }, /* deprecated */ + { "boot", optional_argument, NULL, 'b' }, + { "list-boots", no_argument, NULL, ARG_LIST_BOOTS }, + { "dmesg", no_argument, NULL, 'k' }, + { "system", no_argument, NULL, ARG_SYSTEM }, + { "user", no_argument, NULL, ARG_USER }, + { "directory", required_argument, NULL, 'D' }, + { "file", required_argument, NULL, ARG_FILE }, + { "root", required_argument, NULL, ARG_ROOT }, + { "image", required_argument, NULL, ARG_IMAGE }, + { "header", no_argument, NULL, ARG_HEADER }, + { "identifier", required_argument, NULL, 't' }, + { "priority", required_argument, NULL, 'p' }, + { "facility", required_argument, NULL, ARG_FACILITY }, + { "grep", required_argument, NULL, 'g' }, + { "case-sensitive", optional_argument, NULL, ARG_CASE_SENSITIVE }, + { "setup-keys", no_argument, NULL, ARG_SETUP_KEYS }, + { "interval", required_argument, NULL, ARG_INTERVAL }, + { "verify", no_argument, NULL, ARG_VERIFY }, + { "verify-key", required_argument, NULL, ARG_VERIFY_KEY }, + { "disk-usage", no_argument, NULL, ARG_DISK_USAGE }, + { "cursor", required_argument, NULL, 'c' }, + { "cursor-file", required_argument, NULL, ARG_CURSOR_FILE }, + { "after-cursor", required_argument, NULL, ARG_AFTER_CURSOR }, + { "show-cursor", no_argument, NULL, ARG_SHOW_CURSOR }, + { "since", required_argument, NULL, 'S' }, + { "until", required_argument, NULL, 'U' }, + { "unit", required_argument, NULL, 'u' }, + { "user-unit", required_argument, NULL, ARG_USER_UNIT }, + { "field", required_argument, NULL, 'F' }, + { "fields", no_argument, NULL, 'N' }, + { "catalog", no_argument, NULL, 'x' }, + { "list-catalog", no_argument, NULL, ARG_LIST_CATALOG }, + { "dump-catalog", no_argument, NULL, ARG_DUMP_CATALOG }, + { "update-catalog", no_argument, NULL, ARG_UPDATE_CATALOG }, + { "reverse", no_argument, NULL, 'r' }, + { "machine", required_argument, NULL, 'M' }, + { "utc", no_argument, NULL, ARG_UTC }, + { "flush", no_argument, NULL, ARG_FLUSH }, + { "relinquish-var", no_argument, NULL, ARG_RELINQUISH_VAR }, + { "smart-relinquish-var", no_argument, NULL, ARG_SMART_RELINQUISH_VAR }, + { "sync", no_argument, NULL, ARG_SYNC }, + { "rotate", no_argument, NULL, ARG_ROTATE }, + { "vacuum-size", required_argument, NULL, ARG_VACUUM_SIZE }, + { "vacuum-files", required_argument, NULL, ARG_VACUUM_FILES }, + { "vacuum-time", required_argument, NULL, ARG_VACUUM_TIME }, + { "no-hostname", no_argument, NULL, ARG_NO_HOSTNAME }, + { "output-fields", required_argument, NULL, ARG_OUTPUT_FIELDS }, + { "namespace", required_argument, NULL, ARG_NAMESPACE }, + {} + }; + + int c, r; + + assert(argc >= 0); + assert(argv); + + while ((c = getopt_long(argc, argv, "hefo:aln::qmb::kD:p:g:c:S:U:t:u:NF:xrM:", options, NULL)) >= 0) + + switch (c) { + + case 'h': + return help(); + + case ARG_VERSION: + return version(); + + case ARG_NO_PAGER: + arg_pager_flags |= PAGER_DISABLE; + break; + + case 'e': + arg_pager_flags |= PAGER_JUMP_TO_END; + + if (arg_lines == ARG_LINES_DEFAULT) + arg_lines = 1000; + + arg_boot = true; + + break; + + case 'f': + arg_follow = true; + break; + + case 'o': + if (streq(optarg, "help")) { + DUMP_STRING_TABLE(output_mode, OutputMode, _OUTPUT_MODE_MAX); + return 0; + } + + arg_output = output_mode_from_string(optarg); + if (arg_output < 0) + return log_error_errno(arg_output, "Unknown output format '%s'.", optarg); + + if (IN_SET(arg_output, OUTPUT_EXPORT, OUTPUT_JSON, OUTPUT_JSON_PRETTY, OUTPUT_JSON_SSE, OUTPUT_JSON_SEQ, OUTPUT_CAT)) + arg_quiet = true; + + if (OUTPUT_MODE_IS_JSON(arg_output)) + arg_json_format_flags = output_mode_to_json_format_flags(arg_output) | JSON_FORMAT_COLOR_AUTO; + else + arg_json_format_flags = JSON_FORMAT_OFF; + + break; + + case 'l': + arg_full = true; + break; + + case ARG_NO_FULL: + arg_full = false; + break; + + case 'a': + arg_all = true; + break; + + case 'n': + if (optarg) { + if (streq(optarg, "all")) + arg_lines = ARG_LINES_ALL; + else { + r = safe_atoi(optarg, &arg_lines); + if (r < 0 || arg_lines < 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse lines '%s'", optarg); + } + } else { + arg_lines = 10; + + /* Hmm, no argument? Maybe the next + * word on the command line is + * supposed to be the argument? Let's + * see if there is one, and is + * parsable. */ + if (optind < argc) { + int n; + if (streq(argv[optind], "all")) { + arg_lines = ARG_LINES_ALL; + optind++; + } else if (safe_atoi(argv[optind], &n) >= 0 && n >= 0) { + arg_lines = n; + optind++; + } + } + } + + break; + + case ARG_NO_TAIL: + arg_no_tail = true; + break; + + case ARG_NEW_ID128: + arg_action = ACTION_NEW_ID128; + break; + + case 'q': + arg_quiet = true; + break; + + case 'm': + arg_merge = true; + break; + + case ARG_THIS_BOOT: + arg_boot = true; + arg_boot_id = SD_ID128_NULL; + arg_boot_offset = 0; + break; + + case 'b': + arg_boot = true; + arg_boot_id = SD_ID128_NULL; + arg_boot_offset = 0; + + if (optarg) { + r = parse_boot_descriptor(optarg, &arg_boot_id, &arg_boot_offset); + if (r < 0) + return log_error_errno(r, "Failed to parse boot descriptor '%s'", optarg); + + arg_boot = r; + + /* Hmm, no argument? Maybe the next + * word on the command line is + * supposed to be the argument? Let's + * see if there is one and is parsable + * as a boot descriptor... */ + } else if (optind < argc) { + r = parse_boot_descriptor(argv[optind], &arg_boot_id, &arg_boot_offset); + if (r >= 0) { + arg_boot = r; + optind++; + } + } + break; + + case ARG_LIST_BOOTS: + arg_action = ACTION_LIST_BOOTS; + break; + + case 'k': + arg_boot = arg_dmesg = true; + break; + + case ARG_SYSTEM: + arg_journal_type |= SD_JOURNAL_SYSTEM; + break; + + case ARG_USER: + arg_journal_type |= SD_JOURNAL_CURRENT_USER; + break; + + case 'M': + arg_machine = optarg; + break; + + case ARG_NAMESPACE: + if (streq(optarg, "*")) { + arg_namespace_flags = SD_JOURNAL_ALL_NAMESPACES; + arg_namespace = NULL; + } else if (startswith(optarg, "+")) { + arg_namespace_flags = SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE; + arg_namespace = optarg + 1; + } else if (isempty(optarg)) { + arg_namespace_flags = 0; + arg_namespace = NULL; + } else { + arg_namespace_flags = 0; + arg_namespace = optarg; + } + + break; + + case 'D': + arg_directory = optarg; + break; + + case ARG_FILE: + if (streq(optarg, "-")) + /* An undocumented feature: we can read journal files from STDIN. We don't document + * this though, since after all we only support this for mmap-able, seekable files, and + * not for example pipes which are probably the primary usecase for reading things from + * STDIN. To avoid confusion we hence don't document this feature. */ + arg_file_stdin = true; + else { + r = glob_extend(&arg_file, optarg, GLOB_NOCHECK); + if (r < 0) + return log_error_errno(r, "Failed to add paths: %m"); + } + break; + + case ARG_ROOT: + r = parse_path_argument(optarg, /* suppress_root= */ true, &arg_root); + if (r < 0) + return r; + break; + + case ARG_IMAGE: + r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_image); + if (r < 0) + return r; + break; + + case 'c': + arg_cursor = optarg; + break; + + case ARG_CURSOR_FILE: + arg_cursor_file = optarg; + break; + + case ARG_AFTER_CURSOR: + arg_after_cursor = optarg; + break; + + case ARG_SHOW_CURSOR: + arg_show_cursor = true; + break; + + case ARG_HEADER: + arg_action = ACTION_PRINT_HEADER; + break; + + case ARG_VERIFY: + arg_action = ACTION_VERIFY; + break; + + case ARG_DISK_USAGE: + arg_action = ACTION_DISK_USAGE; + break; + + case ARG_VACUUM_SIZE: + r = parse_size(optarg, 1024, &arg_vacuum_size); + if (r < 0) + return log_error_errno(r, "Failed to parse vacuum size: %s", optarg); + + arg_action = arg_action == ACTION_ROTATE ? ACTION_ROTATE_AND_VACUUM : ACTION_VACUUM; + break; + + case ARG_VACUUM_FILES: + r = safe_atou64(optarg, &arg_vacuum_n_files); + if (r < 0) + return log_error_errno(r, "Failed to parse vacuum files: %s", optarg); + + arg_action = arg_action == ACTION_ROTATE ? ACTION_ROTATE_AND_VACUUM : ACTION_VACUUM; + break; + + case ARG_VACUUM_TIME: + r = parse_sec(optarg, &arg_vacuum_time); + if (r < 0) + return log_error_errno(r, "Failed to parse vacuum time: %s", optarg); + + arg_action = arg_action == ACTION_ROTATE ? ACTION_ROTATE_AND_VACUUM : ACTION_VACUUM; + break; + +#if HAVE_GCRYPT + case ARG_FORCE: + arg_force = true; + break; + + case ARG_SETUP_KEYS: + arg_action = ACTION_SETUP_KEYS; + break; + + case ARG_VERIFY_KEY: + r = free_and_strdup(&arg_verify_key, optarg); + if (r < 0) + return r; + /* Use memset not explicit_bzero() or similar so this doesn't look confusing + * in ps or htop output. */ + memset(optarg, 'x', strlen(optarg)); + + arg_action = ACTION_VERIFY; + arg_merge = false; + break; + + case ARG_INTERVAL: + r = parse_sec(optarg, &arg_interval); + if (r < 0 || arg_interval <= 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to parse sealing key change interval: %s", optarg); + break; +#else + case ARG_SETUP_KEYS: + case ARG_VERIFY_KEY: + case ARG_INTERVAL: + case ARG_FORCE: + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), + "Compiled without forward-secure sealing support."); +#endif + + case 'p': { + const char *dots; + + dots = strstr(optarg, ".."); + if (dots) { + _cleanup_free_ char *a = NULL; + int from, to, i; + + /* a range */ + a = strndup(optarg, dots - optarg); + if (!a) + return log_oom(); + + from = log_level_from_string(a); + to = log_level_from_string(dots + 2); + + if (from < 0 || to < 0) + return log_error_errno(from < 0 ? from : to, + "Failed to parse log level range %s", optarg); + + arg_priorities = 0; + + if (from < to) { + for (i = from; i <= to; i++) + arg_priorities |= 1 << i; + } else { + for (i = to; i <= from; i++) + arg_priorities |= 1 << i; + } + + } else { + int p, i; + + p = log_level_from_string(optarg); + if (p < 0) + return log_error_errno(p, "Unknown log level %s", optarg); + + arg_priorities = 0; + + for (i = 0; i <= p; i++) + arg_priorities |= 1 << i; + } + + break; + } + + case ARG_FACILITY: { + const char *p; + + for (p = optarg;;) { + _cleanup_free_ char *fac = NULL; + int num; + + r = extract_first_word(&p, &fac, ",", 0); + if (r < 0) + return log_error_errno(r, "Failed to parse facilities: %s", optarg); + if (r == 0) + break; + + if (streq(fac, "help")) { + help_facilities(); + return 0; + } + + num = log_facility_unshifted_from_string(fac); + if (num < 0) + return log_error_errno(num, "Bad --facility= argument \"%s\".", fac); + + if (set_ensure_put(&arg_facilities, NULL, INT_TO_PTR(num)) < 0) + return log_oom(); + } + + break; + } + + case 'g': + arg_pattern = optarg; + break; + + case ARG_CASE_SENSITIVE: + if (optarg) { + r = parse_boolean(optarg); + if (r < 0) + return log_error_errno(r, "Bad --case-sensitive= argument \"%s\": %m", optarg); + arg_case = r ? PATTERN_COMPILE_CASE_SENSITIVE : PATTERN_COMPILE_CASE_INSENSITIVE; + } else + arg_case = PATTERN_COMPILE_CASE_SENSITIVE; + + break; + + case 'S': + r = parse_timestamp(optarg, &arg_since); + if (r < 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to parse timestamp: %s", optarg); + arg_since_set = true; + break; + + case 'U': + r = parse_timestamp(optarg, &arg_until); + if (r < 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to parse timestamp: %s", optarg); + arg_until_set = true; + break; + + case 't': + r = strv_extend(&arg_syslog_identifier, optarg); + if (r < 0) + return log_oom(); + break; + + case 'u': + r = strv_extend(&arg_system_units, optarg); + if (r < 0) + return log_oom(); + break; + + case ARG_USER_UNIT: + r = strv_extend(&arg_user_units, optarg); + if (r < 0) + return log_oom(); + break; + + case 'F': + arg_action = ACTION_LIST_FIELDS; + arg_field = optarg; + break; + + case 'N': + arg_action = ACTION_LIST_FIELD_NAMES; + break; + + case ARG_NO_HOSTNAME: + arg_no_hostname = true; + break; + + case 'x': + arg_catalog = true; + break; + + case ARG_LIST_CATALOG: + arg_action = ACTION_LIST_CATALOG; + break; + + case ARG_DUMP_CATALOG: + arg_action = ACTION_DUMP_CATALOG; + break; + + case ARG_UPDATE_CATALOG: + arg_action = ACTION_UPDATE_CATALOG; + break; + + case 'r': + arg_reverse = true; + break; + + case ARG_UTC: + arg_utc = true; + break; + + case ARG_FLUSH: + arg_action = ACTION_FLUSH; + break; + + case ARG_SMART_RELINQUISH_VAR: { + int root_mnt_id, log_mnt_id; + + /* Try to be smart about relinquishing access to /var/log/journal/ during shutdown: + * if it's on the same mount as the root file system there's no point in + * relinquishing access and we can leave journald write to it until the very last + * moment. */ + + r = path_get_mnt_id("/", &root_mnt_id); + if (r < 0) + log_debug_errno(r, "Failed to get root mount ID, ignoring: %m"); + else { + r = path_get_mnt_id("/var/log/journal/", &log_mnt_id); + if (r < 0) + log_debug_errno(r, "Failed to get journal directory mount ID, ignoring: %m"); + else if (root_mnt_id == log_mnt_id) { + log_debug("/var/log/journal/ is on root file system, not relinquishing access to /var."); + return 0; + } else + log_debug("/var/log/journal/ is not on the root file system, relinquishing access to it."); + } + + _fallthrough_; + } + + case ARG_RELINQUISH_VAR: + arg_action = ACTION_RELINQUISH_VAR; + break; + + case ARG_ROTATE: + arg_action = arg_action == ACTION_VACUUM ? ACTION_ROTATE_AND_VACUUM : ACTION_ROTATE; + break; + + case ARG_SYNC: + arg_action = ACTION_SYNC; + break; + + case ARG_OUTPUT_FIELDS: { + _cleanup_strv_free_ char **v = NULL; + + v = strv_split(optarg, ","); + if (!v) + return log_oom(); + + if (!arg_output_fields) + arg_output_fields = TAKE_PTR(v); + else { + r = strv_extend_strv(&arg_output_fields, v, true); + if (r < 0) + return log_oom(); + } + break; + } + + case '?': + return -EINVAL; + + default: + assert_not_reached(); + } + + if (arg_no_tail) + arg_lines = ARG_LINES_ALL; + + if (arg_follow && !arg_since_set && arg_lines == ARG_LINES_DEFAULT) + arg_lines = 10; + + if (arg_follow && !arg_merge && !arg_boot) { + arg_boot = true; + arg_boot_id = SD_ID128_NULL; + arg_boot_offset = 0; + } + + if (!!arg_directory + !!arg_file + !!arg_machine + !!arg_root + !!arg_image > 1) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Please specify at most one of -D/--directory=, --file=, -M/--machine=, --root=, --image=."); + + if (arg_since_set && arg_until_set && arg_since > arg_until) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "--since= must be before --until=."); + + if (!!arg_cursor + !!arg_after_cursor + !!arg_since_set > 1) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Please specify only one of --since=, --cursor=, and --after-cursor=."); + + if (arg_follow && arg_reverse) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Please specify either --reverse= or --follow=, not both."); + + if (!IN_SET(arg_action, ACTION_SHOW, ACTION_DUMP_CATALOG, ACTION_LIST_CATALOG) && optind < argc) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Extraneous arguments starting with '%s'", + argv[optind]); + + if ((arg_boot || arg_action == ACTION_LIST_BOOTS) && arg_merge) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Using --boot or --list-boots with --merge is not supported."); + + if (!strv_isempty(arg_system_units) && arg_journal_type == SD_JOURNAL_CURRENT_USER) { + /* Specifying --user and --unit= at the same time makes no sense (as the former excludes the user + * journal, but the latter excludes the system journal, thus resulting in empty output). Let's be nice + * to users, and automatically turn --unit= into --user-unit= if combined with --user. */ + r = strv_extend_strv(&arg_user_units, arg_system_units, true); + if (r < 0) + return r; + + arg_system_units = strv_free(arg_system_units); + } + + if (arg_pattern) { + r = pattern_compile_and_log(arg_pattern, arg_case, &arg_compiled_pattern); + if (r < 0) + return r; + + /* When --grep is used along with --lines, we don't know how many lines we can print. + * So we search backwards and count until enough lines have been printed or we hit the head. + * An exception is that --follow might set arg_lines, so let's not imply --reverse + * if that is specified. */ + if (arg_lines >= 0 && !arg_follow) + arg_reverse = true; + } + + return 1; +} + +static int add_matches(sd_journal *j, char **args) { + bool have_term = false; + + assert(j); + + STRV_FOREACH(i, args) { + int r; + + if (streq(*i, "+")) { + if (!have_term) + break; + r = sd_journal_add_disjunction(j); + have_term = false; + + } else if (path_is_absolute(*i)) { + _cleanup_free_ char *p = NULL, *t = NULL, *t2 = NULL, *interpreter = NULL; + struct stat st; + + r = chase_symlinks(*i, NULL, CHASE_TRAIL_SLASH, &p, NULL); + if (r < 0) + return log_error_errno(r, "Couldn't canonicalize path: %m"); + + if (lstat(p, &st) < 0) + return log_error_errno(errno, "Couldn't stat file: %m"); + + if (S_ISREG(st.st_mode) && (0111 & st.st_mode)) { + if (executable_is_script(p, &interpreter) > 0) { + _cleanup_free_ char *comm = NULL; + + comm = strndup(basename(p), 15); + if (!comm) + return log_oom(); + + t = strjoin("_COMM=", comm); + if (!t) + return log_oom(); + + /* Append _EXE only if the interpreter is not a link. + Otherwise, it might be outdated often. */ + if (lstat(interpreter, &st) == 0 && !S_ISLNK(st.st_mode)) { + t2 = strjoin("_EXE=", interpreter); + if (!t2) + return log_oom(); + } + } else { + t = strjoin("_EXE=", p); + if (!t) + return log_oom(); + } + + r = sd_journal_add_match(j, t, 0); + + if (r >=0 && t2) + r = sd_journal_add_match(j, t2, 0); + + } else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) { + r = add_matches_for_device(j, p); + if (r < 0) + return r; + } else + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "File is neither a device node, nor regular file, nor executable: %s", + *i); + + have_term = true; + } else { + r = sd_journal_add_match(j, *i, 0); + have_term = true; + } + + if (r < 0) + return log_error_errno(r, "Failed to add match '%s': %m", *i); + } + + if (!strv_isempty(args) && !have_term) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "\"+\" can only be used between terms"); + + return 0; +} + +static void boot_id_free_all(BootId *l) { + + while (l) { + BootId *i = l; + LIST_REMOVE(boot_list, l, i); + free(i); + } +} + +static int discover_next_boot(sd_journal *j, + sd_id128_t previous_boot_id, + bool advance_older, + BootId **ret) { + + _cleanup_free_ BootId *next_boot = NULL; + char match[STRLEN("_BOOT_ID=") + SD_ID128_STRING_MAX] = "_BOOT_ID="; + sd_id128_t boot_id; + int r; + + assert(j); + assert(ret); + + /* We expect the journal to be on the last position of a boot + * (in relation to the direction we are going), so that the next + * invocation of sd_journal_next/previous will be from a different + * boot. We then collect any information we desire and then jump + * to the last location of the new boot by using a _BOOT_ID match + * coming from the other journal direction. */ + + /* Make sure we aren't restricted by any _BOOT_ID matches, so that + * we can actually advance to a *different* boot. */ + sd_journal_flush_matches(j); + + do { + if (advance_older) + r = sd_journal_previous(j); + else + r = sd_journal_next(j); + if (r < 0) + return r; + else if (r == 0) + return 0; /* End of journal, yay. */ + + r = sd_journal_get_monotonic_usec(j, NULL, &boot_id); + if (r < 0) + return r; + + /* We iterate through this in a loop, until the boot ID differs from the previous one. Note that + * normally, this will only require a single iteration, as we seeked to the last entry of the previous + * boot entry already. However, it might happen that the per-journal-field entry arrays are less + * complete than the main entry array, and hence might reference an entry that's not actually the last + * one of the boot ID as last one. Let's hence use the per-field array is initial seek position to + * speed things up, but let's not trust that it is complete, and hence, manually advance as + * necessary. */ + + } while (sd_id128_equal(boot_id, previous_boot_id)); + + next_boot = new0(BootId, 1); + if (!next_boot) + return -ENOMEM; + + next_boot->id = boot_id; + + r = sd_journal_get_realtime_usec(j, &next_boot->first); + if (r < 0) + return r; + + /* Now seek to the last occurrence of this boot ID. */ + sd_id128_to_string(next_boot->id, match + STRLEN("_BOOT_ID=")); + r = sd_journal_add_match(j, match, sizeof(match) - 1); + if (r < 0) + return r; + + if (advance_older) + r = sd_journal_seek_head(j); + else + r = sd_journal_seek_tail(j); + if (r < 0) + return r; + + if (advance_older) + r = sd_journal_next(j); + else + r = sd_journal_previous(j); + if (r < 0) + return r; + else if (r == 0) + return log_debug_errno(SYNTHETIC_ERRNO(ENODATA), + "Whoopsie! We found a boot ID but can't read its last entry."); /* This shouldn't happen. We just came from this very boot ID. */ + + r = sd_journal_get_realtime_usec(j, &next_boot->last); + if (r < 0) + return r; + + *ret = TAKE_PTR(next_boot); + + return 0; +} + +static int get_boots( + sd_journal *j, + BootId **boots, + sd_id128_t *boot_id, + int offset) { + + bool skip_once; + int r, count = 0; + BootId *head = NULL, *tail = NULL; + const bool advance_older = boot_id && offset <= 0; + sd_id128_t previous_boot_id; + + assert(j); + + /* Adjust for the asymmetry that offset 0 is + * the last (and current) boot, while 1 is considered the + * (chronological) first boot in the journal. */ + skip_once = boot_id && sd_id128_is_null(*boot_id) && offset <= 0; + + /* Advance to the earliest/latest occurrence of our reference + * boot ID (taking our lookup direction into account), so that + * discover_next_boot() can do its job. + * If no reference is given, the journal head/tail will do, + * they're "virtual" boots after all. */ + if (boot_id && !sd_id128_is_null(*boot_id)) { + char match[STRLEN("_BOOT_ID=") + SD_ID128_STRING_MAX] = "_BOOT_ID="; + + sd_journal_flush_matches(j); + + sd_id128_to_string(*boot_id, match + STRLEN("_BOOT_ID=")); + r = sd_journal_add_match(j, match, sizeof(match) - 1); + if (r < 0) + return r; + + if (advance_older) + r = sd_journal_seek_head(j); /* seek to oldest */ + else + r = sd_journal_seek_tail(j); /* seek to newest */ + if (r < 0) + return r; + + if (advance_older) + r = sd_journal_next(j); /* read the oldest entry */ + else + r = sd_journal_previous(j); /* read the most recently added entry */ + if (r < 0) + return r; + else if (r == 0) + goto finish; + else if (offset == 0) { + count = 1; + goto finish; + } + + /* At this point the read pointer is positioned at the oldest/newest occurrence of the reference boot + * ID. After flushing the matches, one more invocation of _previous()/_next() will hence place us at + * the following entry, which must then have an older/newer boot ID */ + } else { + + if (advance_older) + r = sd_journal_seek_tail(j); /* seek to newest */ + else + r = sd_journal_seek_head(j); /* seek to oldest */ + if (r < 0) + return r; + + /* No sd_journal_next()/_previous() here. + * + * At this point the read pointer is positioned after the newest/before the oldest entry in the whole + * journal. The next invocation of _previous()/_next() will hence position us at the newest/oldest + * entry we have. */ + } + + previous_boot_id = SD_ID128_NULL; + for (;;) { + _cleanup_free_ BootId *current = NULL; + + r = discover_next_boot(j, previous_boot_id, advance_older, ¤t); + if (r < 0) { + boot_id_free_all(head); + return r; + } + + if (!current) + break; + + previous_boot_id = current->id; + + if (boot_id) { + if (!skip_once) + offset += advance_older ? 1 : -1; + skip_once = false; + + if (offset == 0) { + count = 1; + *boot_id = current->id; + break; + } + } else { + LIST_FOREACH(boot_list, id, head) { + if (sd_id128_equal(id->id, current->id)) { + /* boot id already stored, something wrong with the journal files */ + /* exiting as otherwise this problem would cause forever loop */ + goto finish; + } + } + LIST_INSERT_AFTER(boot_list, head, tail, current); + tail = TAKE_PTR(current); + count++; + } + } + +finish: + if (boots) + *boots = head; + + sd_journal_flush_matches(j); + + return count; +} + +static int list_boots(sd_journal *j) { + _cleanup_(table_unrefp) Table *table = NULL; + BootId *all_ids; + int count, i, r; + + assert(j); + + count = get_boots(j, &all_ids, NULL, 0); + if (count < 0) + return log_error_errno(count, "Failed to determine boots: %m"); + if (count == 0) + return count; + + table = table_new("idx", "boot id", "first entry", "last entry"); + if (!table) + return log_oom(); + + if (arg_full) + table_set_width(table, 0); + + r = table_set_json_field_name(table, 0, "index"); + if (r < 0) + return log_error_errno(r, "Failed to set JSON field name of column 0: %m"); + + (void) table_set_sort(table, (size_t) 0); + (void) table_set_reverse(table, 0, arg_reverse); + + i = 0; + LIST_FOREACH(boot_list, id, all_ids) { + r = table_add_many(table, + TABLE_INT, i - count + 1, + TABLE_SET_ALIGN_PERCENT, 100, + TABLE_ID128, id->id, + TABLE_TIMESTAMP, id->first, + TABLE_TIMESTAMP, id->last); + if (r < 0) + return table_log_add_error(r); + i++; + } + + r = table_print_with_pager(table, arg_json_format_flags, arg_pager_flags, !arg_quiet); + if (r < 0) + return table_log_print_error(r); + + boot_id_free_all(all_ids); + + return 0; +} + +static int add_boot(sd_journal *j) { + char match[STRLEN("_BOOT_ID=") + SD_ID128_STRING_MAX] = "_BOOT_ID="; + sd_id128_t boot_id; + int r; + + assert(j); + + if (!arg_boot) + return 0; + + /* Take a shortcut and use the current boot_id, which we can do very quickly. + * We can do this only when we logs are coming from the current machine, + * so take the slow path if log location is specified. */ + if (arg_boot_offset == 0 && sd_id128_is_null(arg_boot_id) && + !arg_directory && !arg_file && !arg_root) + return add_match_this_boot(j, arg_machine); + + boot_id = arg_boot_id; + r = get_boots(j, NULL, &boot_id, arg_boot_offset); + assert(r <= 1); + if (r <= 0) { + const char *reason = (r == 0) ? "No such boot ID in journal" : STRERROR(r); + + if (sd_id128_is_null(arg_boot_id)) + log_error("Data from the specified boot (%+i) is not available: %s", + arg_boot_offset, reason); + else + log_error("Data from the specified boot ("SD_ID128_FORMAT_STR") is not available: %s", + SD_ID128_FORMAT_VAL(arg_boot_id), reason); + + return r == 0 ? -ENODATA : r; + } + + sd_id128_to_string(boot_id, match + STRLEN("_BOOT_ID=")); + + r = sd_journal_add_match(j, match, sizeof(match) - 1); + if (r < 0) + return log_error_errno(r, "Failed to add match: %m"); + + r = sd_journal_add_conjunction(j); + if (r < 0) + return log_error_errno(r, "Failed to add conjunction: %m"); + + return 0; +} + +static int add_dmesg(sd_journal *j) { + int r; + assert(j); + + if (!arg_dmesg) + return 0; + + r = sd_journal_add_match(j, "_TRANSPORT=kernel", + STRLEN("_TRANSPORT=kernel")); + if (r < 0) + return log_error_errno(r, "Failed to add match: %m"); + + r = sd_journal_add_conjunction(j); + if (r < 0) + return log_error_errno(r, "Failed to add conjunction: %m"); + + return 0; +} + +static int get_possible_units( + sd_journal *j, + const char *fields, + char **patterns, + Set **units) { + + _cleanup_set_free_free_ Set *found = NULL; + const char *field; + int r; + + found = set_new(&string_hash_ops); + if (!found) + return -ENOMEM; + + NULSTR_FOREACH(field, fields) { + const void *data; + size_t size; + + r = sd_journal_query_unique(j, field); + if (r < 0) + return r; + + SD_JOURNAL_FOREACH_UNIQUE(j, data, size) { + char *eq; + size_t prefix; + _cleanup_free_ char *u = NULL; + + eq = memchr(data, '=', size); + if (eq) + prefix = eq - (char*) data + 1; + else + prefix = 0; + + u = strndup((char*) data + prefix, size - prefix); + if (!u) + return -ENOMEM; + + STRV_FOREACH(pattern, patterns) + if (fnmatch(*pattern, u, FNM_NOESCAPE) == 0) { + log_debug("Matched %s with pattern %s=%s", u, field, *pattern); + + r = set_consume(found, u); + u = NULL; + if (r < 0 && r != -EEXIST) + return r; + + break; + } + } + } + + *units = TAKE_PTR(found); + + return 0; +} + +/* This list is supposed to return the superset of unit names + * possibly matched by rules added with add_matches_for_unit... */ +#define SYSTEM_UNITS \ + "_SYSTEMD_UNIT\0" \ + "COREDUMP_UNIT\0" \ + "UNIT\0" \ + "OBJECT_SYSTEMD_UNIT\0" \ + "_SYSTEMD_SLICE\0" + +/* ... and add_matches_for_user_unit */ +#define USER_UNITS \ + "_SYSTEMD_USER_UNIT\0" \ + "USER_UNIT\0" \ + "COREDUMP_USER_UNIT\0" \ + "OBJECT_SYSTEMD_USER_UNIT\0" \ + "_SYSTEMD_USER_SLICE\0" + +static int add_units(sd_journal *j) { + _cleanup_strv_free_ char **patterns = NULL; + int r, count = 0; + + assert(j); + + STRV_FOREACH(i, arg_system_units) { + _cleanup_free_ char *u = NULL; + + r = unit_name_mangle(*i, UNIT_NAME_MANGLE_GLOB | (arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN), &u); + if (r < 0) + return r; + + if (string_is_glob(u)) { + r = strv_push(&patterns, u); + if (r < 0) + return r; + u = NULL; + } else { + r = add_matches_for_unit(j, u); + if (r < 0) + return r; + r = sd_journal_add_disjunction(j); + if (r < 0) + return r; + count++; + } + } + + if (!strv_isempty(patterns)) { + _cleanup_set_free_free_ Set *units = NULL; + char *u; + + r = get_possible_units(j, SYSTEM_UNITS, patterns, &units); + if (r < 0) + return r; + + SET_FOREACH(u, units) { + r = add_matches_for_unit(j, u); + if (r < 0) + return r; + r = sd_journal_add_disjunction(j); + if (r < 0) + return r; + count++; + } + } + + patterns = strv_free(patterns); + + STRV_FOREACH(i, arg_user_units) { + _cleanup_free_ char *u = NULL; + + r = unit_name_mangle(*i, UNIT_NAME_MANGLE_GLOB | (arg_quiet ? 0 : UNIT_NAME_MANGLE_WARN), &u); + if (r < 0) + return r; + + if (string_is_glob(u)) { + r = strv_push(&patterns, u); + if (r < 0) + return r; + u = NULL; + } else { + r = add_matches_for_user_unit(j, u, getuid()); + if (r < 0) + return r; + r = sd_journal_add_disjunction(j); + if (r < 0) + return r; + count++; + } + } + + if (!strv_isempty(patterns)) { + _cleanup_set_free_free_ Set *units = NULL; + char *u; + + r = get_possible_units(j, USER_UNITS, patterns, &units); + if (r < 0) + return r; + + SET_FOREACH(u, units) { + r = add_matches_for_user_unit(j, u, getuid()); + if (r < 0) + return r; + r = sd_journal_add_disjunction(j); + if (r < 0) + return r; + count++; + } + } + + /* Complain if the user request matches but nothing whatsoever was + * found, since otherwise everything would be matched. */ + if (!(strv_isempty(arg_system_units) && strv_isempty(arg_user_units)) && count == 0) + return -ENODATA; + + r = sd_journal_add_conjunction(j); + if (r < 0) + return r; + + return 0; +} + +static int add_priorities(sd_journal *j) { + char match[] = "PRIORITY=0"; + int i, r; + assert(j); + + if (arg_priorities == 0xFF) + return 0; + + for (i = LOG_EMERG; i <= LOG_DEBUG; i++) + if (arg_priorities & (1 << i)) { + match[sizeof(match)-2] = '0' + i; + + r = sd_journal_add_match(j, match, strlen(match)); + if (r < 0) + return log_error_errno(r, "Failed to add match: %m"); + } + + r = sd_journal_add_conjunction(j); + if (r < 0) + return log_error_errno(r, "Failed to add conjunction: %m"); + + return 0; +} + +static int add_facilities(sd_journal *j) { + void *p; + int r; + + SET_FOREACH(p, arg_facilities) { + char match[STRLEN("SYSLOG_FACILITY=") + DECIMAL_STR_MAX(int)]; + + xsprintf(match, "SYSLOG_FACILITY=%d", PTR_TO_INT(p)); + + r = sd_journal_add_match(j, match, strlen(match)); + if (r < 0) + return log_error_errno(r, "Failed to add match: %m"); + } + + return 0; +} + +static int add_syslog_identifier(sd_journal *j) { + int r; + + assert(j); + + STRV_FOREACH(i, arg_syslog_identifier) { + _cleanup_free_ char *u = NULL; + + u = strjoin("SYSLOG_IDENTIFIER=", *i); + if (!u) + return -ENOMEM; + r = sd_journal_add_match(j, u, 0); + if (r < 0) + return r; + r = sd_journal_add_disjunction(j); + if (r < 0) + return r; + } + + r = sd_journal_add_conjunction(j); + if (r < 0) + return r; + + return 0; +} + +#if HAVE_GCRYPT +static int format_journal_url( + const void *seed, + size_t seed_size, + uint64_t start, + uint64_t interval, + const char *hn, + sd_id128_t machine, + bool full, + char **ret_url) { + _cleanup_free_ char *url = NULL; + _cleanup_fclose_ FILE *f = NULL; + size_t url_size = 0; + int r; + + assert(seed); + assert(seed_size > 0); + + f = open_memstream_unlocked(&url, &url_size); + if (!f) + return -ENOMEM; + + if (full) + fputs("fss://", f); + + for (size_t i = 0; i < seed_size; i++) { + if (i > 0 && i % 3 == 0) + fputc('-', f); + fprintf(f, "%02x", ((uint8_t*) seed)[i]); + } + + fprintf(f, "/%"PRIx64"-%"PRIx64, start, interval); + + if (full) { + fprintf(f, "?machine=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(machine)); + if (hn) + fprintf(f, ";hostname=%s", hn); + } + + r = fflush_and_check(f); + if (r < 0) + return r; + + f = safe_fclose(f); + *ret_url = TAKE_PTR(url); + return 0; +} +#endif + +static int setup_keys(void) { +#if HAVE_GCRYPT + size_t mpk_size, seed_size, state_size; + _cleanup_(unlink_and_freep) char *k = NULL; + _cleanup_free_ char *p = NULL; + uint8_t *mpk, *seed, *state; + _cleanup_close_ int fd = -1; + sd_id128_t machine, boot; + struct stat st; + uint64_t n; + int r; + + r = stat("/var/log/journal", &st); + if (r < 0 && !IN_SET(errno, ENOENT, ENOTDIR)) + return log_error_errno(errno, "stat(\"%s\") failed: %m", "/var/log/journal"); + + if (r < 0 || !S_ISDIR(st.st_mode)) { + log_error("%s is not a directory, must be using persistent logging for FSS.", + "/var/log/journal"); + return r < 0 ? -errno : -ENOTDIR; + } + + r = sd_id128_get_machine(&machine); + if (r < 0) + return log_error_errno(r, "Failed to get machine ID: %m"); + + r = sd_id128_get_boot(&boot); + if (r < 0) + return log_error_errno(r, "Failed to get boot ID: %m"); + + if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/fss", + SD_ID128_FORMAT_VAL(machine)) < 0) + return log_oom(); + + if (arg_force) { + r = unlink(p); + if (r < 0 && errno != ENOENT) + return log_error_errno(errno, "unlink(\"%s\") failed: %m", p); + } else if (access(p, F_OK) >= 0) + return log_error_errno(SYNTHETIC_ERRNO(EEXIST), + "Sealing key file %s exists already. Use --force to recreate.", p); + + if (asprintf(&k, "/var/log/journal/" SD_ID128_FORMAT_STR "/fss.tmp.XXXXXX", + SD_ID128_FORMAT_VAL(machine)) < 0) + return log_oom(); + + mpk_size = FSPRG_mskinbytes(FSPRG_RECOMMENDED_SECPAR); + mpk = alloca_safe(mpk_size); + + seed_size = FSPRG_RECOMMENDED_SEEDLEN; + seed = alloca_safe(seed_size); + + state_size = FSPRG_stateinbytes(FSPRG_RECOMMENDED_SECPAR); + state = alloca_safe(state_size); + + log_info("Generating seed..."); + r = crypto_random_bytes(seed, seed_size); + if (r < 0) + return log_error_errno(r, "Failed to acquire random seed: %m"); + + log_info("Generating key pair..."); + FSPRG_GenMK(NULL, mpk, seed, seed_size, FSPRG_RECOMMENDED_SECPAR); + + log_info("Generating sealing key..."); + FSPRG_GenState0(state, mpk, seed, seed_size); + + assert(arg_interval > 0); + + n = now(CLOCK_REALTIME); + n /= arg_interval; + + safe_close(fd); + fd = mkostemp_safe(k); + if (fd < 0) + return log_error_errno(fd, "Failed to open %s: %m", k); + + r = chattr_secret(fd, CHATTR_WARN_UNSUPPORTED_FLAGS); + if (r < 0) + log_full_errno(ERRNO_IS_NOT_SUPPORTED(r) ? LOG_DEBUG : LOG_WARNING, + r, "Failed to set file attributes on '%s', ignoring: %m", k); + + struct FSSHeader h = { + .signature = { 'K', 'S', 'H', 'H', 'R', 'H', 'L', 'P' }, + .machine_id = machine, + .boot_id = boot, + .header_size = htole64(sizeof(h)), + .start_usec = htole64(n * arg_interval), + .interval_usec = htole64(arg_interval), + .fsprg_secpar = htole16(FSPRG_RECOMMENDED_SECPAR), + .fsprg_state_size = htole64(state_size), + }; + + r = loop_write(fd, &h, sizeof(h), false); + if (r < 0) + return log_error_errno(r, "Failed to write header: %m"); + + r = loop_write(fd, state, state_size, false); + if (r < 0) + return log_error_errno(r, "Failed to write state: %m"); + + if (rename(k, p) < 0) + return log_error_errno(errno, "Failed to link file: %m"); + + k = mfree(k); + + _cleanup_free_ char *hn = NULL, *key = NULL; + + r = format_journal_url(seed, seed_size, n, arg_interval, hn, machine, false, &key); + if (r < 0) + return r; + + if (on_tty()) { + hn = gethostname_malloc(); + if (hn) + hostname_cleanup(hn); + + fprintf(stderr, + "\nNew keys have been generated for host %s%s" SD_ID128_FORMAT_STR ".\n" + "\n" + "The %ssecret sealing key%s has been written to the following local file.\n" + "This key file is automatically updated when the sealing key is advanced.\n" + "It should not be used on multiple hosts.\n" + "\n" + "\t%s\n" + "\n" + "The sealing key is automatically changed every %s.\n" + "\n" + "Please write down the following %ssecret verification key%s. It should be stored\n" + "in a safe location and should not be saved locally on disk.\n" + "\n\t%s", + strempty(hn), hn ? "/" : "", + SD_ID128_FORMAT_VAL(machine), + ansi_highlight(), ansi_normal(), + p, + FORMAT_TIMESPAN(arg_interval, 0), + ansi_highlight(), ansi_normal(), + ansi_highlight_red()); + fflush(stderr); + } + + puts(key); + + if (on_tty()) { + fprintf(stderr, "%s", ansi_normal()); +#if HAVE_QRENCODE + _cleanup_free_ char *url = NULL; + r = format_journal_url(seed, seed_size, n, arg_interval, hn, machine, true, &url); + if (r < 0) + return r; + + (void) print_qrcode(stderr, + "To transfer the verification key to your phone scan the QR code below", + url); +#endif + } + + return 0; +#else + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), + "Forward-secure sealing not available."); +#endif +} + +static int verify(sd_journal *j, bool verbose) { + int r = 0; + JournalFile *f; + + assert(j); + + log_show_color(true); + + ORDERED_HASHMAP_FOREACH(f, j->files) { + int k; + usec_t first = 0, validated = 0, last = 0; + +#if HAVE_GCRYPT + if (!arg_verify_key && JOURNAL_HEADER_SEALED(f->header)) + log_notice("Journal file %s has sealing enabled but verification key has not been passed using --verify-key=.", f->path); +#endif + + k = journal_file_verify(f, arg_verify_key, &first, &validated, &last, verbose); + if (k == -EINVAL) + /* If the key was invalid give up right-away. */ + return k; + else if (k < 0) + r = log_warning_errno(k, "FAIL: %s (%m)", f->path); + else { + char a[FORMAT_TIMESTAMP_MAX], b[FORMAT_TIMESTAMP_MAX]; + log_full(verbose ? LOG_INFO : LOG_DEBUG, "PASS: %s", f->path); + + if (arg_verify_key && JOURNAL_HEADER_SEALED(f->header)) { + if (validated > 0) { + log_full(verbose ? LOG_INFO : LOG_DEBUG, + "=> Validated from %s to %s, final %s entries not sealed.", + format_timestamp_maybe_utc(a, sizeof(a), first), + format_timestamp_maybe_utc(b, sizeof(b), validated), + FORMAT_TIMESPAN(last > validated ? last - validated : 0, 0)); + } else if (last > 0) + log_full(verbose ? LOG_INFO : LOG_DEBUG, + "=> No sealing yet, %s of entries not sealed.", + FORMAT_TIMESPAN(last - first, 0)); + else + log_full(verbose ? LOG_INFO : LOG_DEBUG, + "=> No sealing yet, no entries in file."); + } + } + } + + return r; +} + +static int simple_varlink_call(const char *option, const char *method) { + _cleanup_(varlink_flush_close_unrefp) Varlink *link = NULL; + const char *error, *fn; + int r; + + if (arg_machine) + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "%s is not supported in conjunction with --machine=.", option); + + fn = arg_namespace ? + strjoina("/run/systemd/journal.", arg_namespace, "/io.systemd.journal") : + "/run/systemd/journal/io.systemd.journal"; + + r = varlink_connect_address(&link, fn); + if (r < 0) + return log_error_errno(r, "Failed to connect to %s: %m", fn); + + (void) varlink_set_description(link, "journal"); + (void) varlink_set_relative_timeout(link, USEC_INFINITY); + + r = varlink_call(link, method, NULL, NULL, &error, NULL); + if (r < 0) + return log_error_errno(r, "Failed to execute varlink call: %m"); + if (error) + return log_error_errno(SYNTHETIC_ERRNO(ENOANO), + "Failed to execute varlink call: %s", error); + + return 0; +} + +static int flush_to_var(void) { + if (access("/run/systemd/journal/flushed", F_OK) >= 0) + return 0; /* Already flushed, no need to contact journald */ + if (errno != ENOENT) + return log_error_errno(errno, "Unable to check for existence of /run/systemd/journal/flushed: %m"); + + return simple_varlink_call("--flush", "io.systemd.Journal.FlushToVar"); +} + +static int relinquish_var(void) { + return simple_varlink_call("--relinquish-var/--smart-relinquish-var", "io.systemd.Journal.RelinquishVar"); +} + +static int rotate(void) { + return simple_varlink_call("--rotate", "io.systemd.Journal.Rotate"); +} + +static int sync_journal(void) { + return simple_varlink_call("--sync", "io.systemd.Journal.Synchronize"); +} + +static int wait_for_change(sd_journal *j, int poll_fd) { + struct pollfd pollfds[] = { + { .fd = poll_fd, .events = POLLIN }, + { .fd = STDOUT_FILENO }, + }; + usec_t timeout; + int r; + + assert(j); + assert(poll_fd >= 0); + + /* Much like sd_journal_wait() but also keeps an eye on STDOUT, and exits as soon as we see a POLLHUP on that, + * i.e. when it is closed. */ + + r = sd_journal_get_timeout(j, &timeout); + if (r < 0) + return log_error_errno(r, "Failed to determine journal waiting time: %m"); + + r = ppoll_usec(pollfds, ELEMENTSOF(pollfds), timeout); + if (r == -EINTR) + return 0; + if (r < 0) + return log_error_errno(r, "Couldn't wait for journal event: %m"); + + if (pollfds[1].revents & (POLLHUP|POLLERR)) /* STDOUT has been closed? */ + return log_debug_errno(SYNTHETIC_ERRNO(ECANCELED), + "Standard output has been closed."); + + r = sd_journal_process(j); + if (r < 0) + return log_error_errno(r, "Failed to process journal events: %m"); + + return 0; +} + +int main(int argc, char *argv[]) { + _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL; + _cleanup_(umount_and_rmdir_and_freep) char *unlink_dir = NULL; + bool previous_boot_id_valid = false, first_line = true, ellipsized = false, need_seek = false; + bool use_cursor = false, after_cursor = false; + _cleanup_(sd_journal_closep) sd_journal *j = NULL; + sd_id128_t previous_boot_id = SD_ID128_NULL, previous_boot_id_output = SD_ID128_NULL; + dual_timestamp previous_ts_output = DUAL_TIMESTAMP_NULL; + int n_shown = 0, r, poll_fd = -1; + + setlocale(LC_ALL, ""); + log_setup(); + + /* Increase max number of open files if we can, we might needs this when browsing journal files, which might be + * split up into many files. */ + (void) rlimit_nofile_bump(HIGH_RLIMIT_NOFILE); + + r = parse_argv(argc, argv); + if (r <= 0) + goto finish; + + if (arg_image) { + assert(!arg_root); + + r = mount_image_privately_interactively( + arg_image, + DISSECT_IMAGE_GENERIC_ROOT | + DISSECT_IMAGE_REQUIRE_ROOT | + DISSECT_IMAGE_VALIDATE_OS | + DISSECT_IMAGE_RELAX_VAR_CHECK | + (arg_action == ACTION_UPDATE_CATALOG ? DISSECT_IMAGE_FSCK|DISSECT_IMAGE_GROWFS : DISSECT_IMAGE_READ_ONLY), + &unlink_dir, + &loop_device); + if (r < 0) + return r; + + arg_root = strdup(unlink_dir); + if (!arg_root) + return log_oom(); + } + + signal(SIGWINCH, columns_lines_cache_reset); + sigbus_install(); + + switch (arg_action) { + + case ACTION_NEW_ID128: + r = id128_print_new(ID128_PRINT_PRETTY); + goto finish; + + case ACTION_SETUP_KEYS: + r = setup_keys(); + goto finish; + + case ACTION_LIST_CATALOG: + case ACTION_DUMP_CATALOG: + case ACTION_UPDATE_CATALOG: { + _cleanup_free_ char *database = NULL; + + database = path_join(arg_root, CATALOG_DATABASE); + if (!database) { + r = log_oom(); + goto finish; + } + + if (arg_action == ACTION_UPDATE_CATALOG) { + r = catalog_update(database, arg_root, catalog_file_dirs); + if (r < 0) + log_error_errno(r, "Failed to list catalog: %m"); + } else { + bool oneline = arg_action == ACTION_LIST_CATALOG; + + pager_open(arg_pager_flags); + + if (optind < argc) + r = catalog_list_items(stdout, database, oneline, argv + optind); + else + r = catalog_list(stdout, database, oneline); + if (r < 0) + log_error_errno(r, "Failed to list catalog: %m"); + } + + goto finish; + } + + case ACTION_FLUSH: + r = flush_to_var(); + goto finish; + + case ACTION_RELINQUISH_VAR: + r = relinquish_var(); + goto finish; + + case ACTION_SYNC: + r = sync_journal(); + goto finish; + + case ACTION_ROTATE: + r = rotate(); + goto finish; + + case ACTION_SHOW: + case ACTION_PRINT_HEADER: + case ACTION_VERIFY: + case ACTION_DISK_USAGE: + case ACTION_LIST_BOOTS: + case ACTION_VACUUM: + case ACTION_ROTATE_AND_VACUUM: + case ACTION_LIST_FIELDS: + case ACTION_LIST_FIELD_NAMES: + /* These ones require access to the journal files, continue below. */ + break; + + default: + assert_not_reached(); + } + + if (arg_directory) + r = sd_journal_open_directory(&j, arg_directory, arg_journal_type); + else if (arg_root) + r = sd_journal_open_directory(&j, arg_root, arg_journal_type | SD_JOURNAL_OS_ROOT); + else if (arg_file_stdin) + r = sd_journal_open_files_fd(&j, (int[]) { STDIN_FILENO }, 1, 0); + else if (arg_file) + r = sd_journal_open_files(&j, (const char**) arg_file, 0); + else if (arg_machine) { + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + int fd; + + if (geteuid() != 0) { + /* The file descriptor returned by OpenMachineRootDirectory() will be owned by users/groups of + * the container, thus we need root privileges to override them. */ + r = log_error_errno(SYNTHETIC_ERRNO(EPERM), "Using the --machine= switch requires root privileges."); + goto finish; + } + + r = sd_bus_open_system(&bus); + if (r < 0) { + log_error_errno(r, "Failed to open system bus: %m"); + goto finish; + } + + r = sd_bus_call_method( + bus, + "org.freedesktop.machine1", + "/org/freedesktop/machine1", + "org.freedesktop.machine1.Manager", + "OpenMachineRootDirectory", + &error, + &reply, + "s", arg_machine); + if (r < 0) { + log_error_errno(r, "Failed to open root directory: %s", bus_error_message(&error, r)); + goto finish; + } + + r = sd_bus_message_read(reply, "h", &fd); + if (r < 0) { + bus_log_parse_error(r); + goto finish; + } + + fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); + if (fd < 0) { + r = log_error_errno(errno, "Failed to duplicate file descriptor: %m"); + goto finish; + } + + r = sd_journal_open_directory_fd(&j, fd, SD_JOURNAL_OS_ROOT); + if (r < 0) + safe_close(fd); + } else + r = sd_journal_open_namespace( + &j, + arg_namespace, + (arg_merge ? 0 : SD_JOURNAL_LOCAL_ONLY) | + arg_namespace_flags | arg_journal_type); + if (r < 0) { + log_error_errno(r, "Failed to open %s: %m", arg_directory ?: arg_file ? "files" : "journal"); + goto finish; + } + + r = journal_access_check_and_warn(j, arg_quiet, + !(arg_journal_type == SD_JOURNAL_CURRENT_USER || arg_user_units)); + if (r < 0) + goto finish; + + switch (arg_action) { + + case ACTION_NEW_ID128: + case ACTION_SETUP_KEYS: + case ACTION_LIST_CATALOG: + case ACTION_DUMP_CATALOG: + case ACTION_UPDATE_CATALOG: + case ACTION_FLUSH: + case ACTION_SYNC: + case ACTION_ROTATE: + assert_not_reached(); + + case ACTION_PRINT_HEADER: + journal_print_header(j); + r = 0; + goto finish; + + case ACTION_VERIFY: + r = verify(j, !arg_quiet); + goto finish; + + case ACTION_DISK_USAGE: { + uint64_t bytes = 0; + + r = sd_journal_get_usage(j, &bytes); + if (r < 0) + goto finish; + + printf("Archived and active journals take up %s in the file system.\n", + FORMAT_BYTES(bytes)); + goto finish; + } + + case ACTION_LIST_BOOTS: + r = list_boots(j); + goto finish; + + case ACTION_ROTATE_AND_VACUUM: + + r = rotate(); + if (r < 0) + goto finish; + + _fallthrough_; + + case ACTION_VACUUM: { + Directory *d; + + HASHMAP_FOREACH(d, j->directories_by_path) { + int q; + + q = journal_directory_vacuum(d->path, arg_vacuum_size, arg_vacuum_n_files, arg_vacuum_time, NULL, !arg_quiet); + if (q < 0) + r = log_error_errno(q, "Failed to vacuum %s: %m", d->path); + } + + goto finish; + } + + case ACTION_LIST_FIELD_NAMES: { + const char *field; + + SD_JOURNAL_FOREACH_FIELD(j, field) { + printf("%s\n", field); + n_shown++; + } + + r = 0; + goto finish; + } + + case ACTION_SHOW: + case ACTION_LIST_FIELDS: + break; + + default: + assert_not_reached(); + } + + if (arg_boot_offset != 0 && + sd_journal_has_runtime_files(j) > 0 && + sd_journal_has_persistent_files(j) == 0) { + log_info("Specifying boot ID or boot offset has no effect, no persistent journal was found."); + r = 0; + goto finish; + } + /* add_boot() must be called first! + * It may need to seek the journal to find parent boot IDs. */ + r = add_boot(j); + if (r < 0) + goto finish; + + r = add_dmesg(j); + if (r < 0) + goto finish; + + r = add_units(j); + if (r < 0) { + log_error_errno(r, "Failed to add filter for units: %m"); + goto finish; + } + + r = add_syslog_identifier(j); + if (r < 0) { + log_error_errno(r, "Failed to add filter for syslog identifiers: %m"); + goto finish; + } + + r = add_priorities(j); + if (r < 0) + goto finish; + + r = add_facilities(j); + if (r < 0) + goto finish; + + r = add_matches(j, argv + optind); + if (r < 0) + goto finish; + + if (DEBUG_LOGGING) { + _cleanup_free_ char *filter = NULL; + + filter = journal_make_match_string(j); + if (!filter) + return log_oom(); + + log_debug("Journal filter: %s", filter); + } + + if (arg_action == ACTION_LIST_FIELDS) { + const void *data; + size_t size; + + assert(arg_field); + + r = sd_journal_set_data_threshold(j, 0); + if (r < 0) { + log_error_errno(r, "Failed to unset data size threshold: %m"); + goto finish; + } + + r = sd_journal_query_unique(j, arg_field); + if (r < 0) { + log_error_errno(r, "Failed to query unique data objects: %m"); + goto finish; + } + + SD_JOURNAL_FOREACH_UNIQUE(j, data, size) { + const void *eq; + + if (arg_lines >= 0 && n_shown >= arg_lines) + break; + + eq = memchr(data, '=', size); + if (eq) + printf("%.*s\n", (int) (size - ((const uint8_t*) eq - (const uint8_t*) data + 1)), (const char*) eq + 1); + else + printf("%.*s\n", (int) size, (const char*) data); + + n_shown++; + } + + r = 0; + goto finish; + } + + /* Opening the fd now means the first sd_journal_wait() will actually wait */ + if (arg_follow) { + poll_fd = sd_journal_get_fd(j); + if (poll_fd == -EMFILE) { + log_warning_errno(poll_fd, "Insufficient watch descriptors available. Reverting to -n."); + arg_follow = false; + } else if (poll_fd == -EMEDIUMTYPE) { + log_error_errno(poll_fd, "The --follow switch is not supported in conjunction with reading from STDIN."); + goto finish; + } else if (poll_fd < 0) { + log_error_errno(poll_fd, "Failed to get journal fd: %m"); + goto finish; + } + } + + if (arg_cursor || arg_after_cursor || arg_cursor_file) { + _cleanup_free_ char *cursor_from_file = NULL; + const char *cursor = arg_cursor ?: arg_after_cursor; + + if (arg_cursor_file) { + r = read_one_line_file(arg_cursor_file, &cursor_from_file); + if (r < 0 && r != -ENOENT) { + log_error_errno(r, "Failed to read cursor file %s: %m", arg_cursor_file); + goto finish; + } + + if (r > 0) { + cursor = cursor_from_file; + after_cursor = true; + } + } else + after_cursor = arg_after_cursor; + + if (cursor) { + r = sd_journal_seek_cursor(j, cursor); + if (r < 0) { + log_error_errno(r, "Failed to seek to cursor: %m"); + goto finish; + } + use_cursor = true; + } + } + + if (use_cursor) { + if (!arg_reverse) + r = sd_journal_next_skip(j, 1 + after_cursor); + else + r = sd_journal_previous_skip(j, 1 + after_cursor); + + if (after_cursor && r < 2) { + /* We couldn't find the next entry after the cursor. */ + if (arg_follow) + need_seek = true; + else + arg_lines = 0; + } + + } else if (arg_since_set && !arg_reverse) { + r = sd_journal_seek_realtime_usec(j, arg_since); + if (r < 0) { + log_error_errno(r, "Failed to seek to date: %m"); + goto finish; + } + r = sd_journal_next(j); + + } else if (arg_until_set && arg_reverse) { + r = sd_journal_seek_realtime_usec(j, arg_until); + if (r < 0) { + log_error_errno(r, "Failed to seek to date: %m"); + goto finish; + } + r = sd_journal_previous(j); + + } else if (arg_reverse) { + r = sd_journal_seek_tail(j); + if (r < 0) { + log_error_errno(r, "Failed to seek to tail: %m"); + goto finish; + } + + r = sd_journal_previous(j); + + } else if (arg_lines >= 0) { + r = sd_journal_seek_tail(j); + if (r < 0) { + log_error_errno(r, "Failed to seek to tail: %m"); + goto finish; + } + + r = sd_journal_previous_skip(j, arg_lines); + + } else { + r = sd_journal_seek_head(j); + if (r < 0) { + log_error_errno(r, "Failed to seek to head: %m"); + goto finish; + } + + r = sd_journal_next(j); + } + + if (r < 0) { + log_error_errno(r, "Failed to iterate through journal: %m"); + goto finish; + } + if (r == 0) + need_seek = true; + + if (!arg_follow) + pager_open(arg_pager_flags); + + if (!arg_quiet && (arg_lines != 0 || arg_follow) && DEBUG_LOGGING) { + usec_t start, end; + char start_buf[FORMAT_TIMESTAMP_MAX], end_buf[FORMAT_TIMESTAMP_MAX]; + + r = sd_journal_get_cutoff_realtime_usec(j, &start, &end); + if (r < 0) { + log_error_errno(r, "Failed to get cutoff: %m"); + goto finish; + } + + if (r > 0) { + if (arg_follow) + printf("-- Journal begins at %s. --\n", + format_timestamp_maybe_utc(start_buf, sizeof(start_buf), start)); + else + printf("-- Journal begins at %s, ends at %s. --\n", + format_timestamp_maybe_utc(start_buf, sizeof(start_buf), start), + format_timestamp_maybe_utc(end_buf, sizeof(end_buf), end)); + } + } + + for (;;) { + while (arg_lines < 0 || n_shown < arg_lines || (arg_follow && !first_line)) { + int flags; + size_t highlight[2] = {}; + + if (need_seek) { + if (!arg_reverse) + r = sd_journal_next(j); + else + r = sd_journal_previous(j); + if (r < 0) { + log_error_errno(r, "Failed to iterate through journal: %m"); + goto finish; + } + if (r == 0) + break; + } + + if (arg_until_set && !arg_reverse) { + usec_t usec; + + r = sd_journal_get_realtime_usec(j, &usec); + if (r < 0) { + log_error_errno(r, "Failed to determine timestamp: %m"); + goto finish; + } + if (usec > arg_until) + break; + } + + if (arg_since_set && arg_reverse) { + usec_t usec; + + r = sd_journal_get_realtime_usec(j, &usec); + if (r < 0) { + log_error_errno(r, "Failed to determine timestamp: %m"); + goto finish; + } + if (usec < arg_since) + break; + } + + if (!arg_merge && !arg_quiet) { + sd_id128_t boot_id; + + r = sd_journal_get_monotonic_usec(j, NULL, &boot_id); + if (r >= 0) { + if (previous_boot_id_valid && + !sd_id128_equal(boot_id, previous_boot_id)) + printf("%s-- Boot "SD_ID128_FORMAT_STR" --%s\n", + ansi_highlight(), SD_ID128_FORMAT_VAL(boot_id), ansi_normal()); + + previous_boot_id = boot_id; + previous_boot_id_valid = true; + } + } + + if (arg_compiled_pattern) { + const void *message; + size_t len; + + r = sd_journal_get_data(j, "MESSAGE", &message, &len); + if (r < 0) { + if (r == -ENOENT) { + need_seek = true; + continue; + } + + log_error_errno(r, "Failed to get MESSAGE field: %m"); + goto finish; + } + + assert_se(message = startswith(message, "MESSAGE=")); + + r = pattern_matches_and_log(arg_compiled_pattern, message, + len - strlen("MESSAGE="), highlight); + if (r < 0) + goto finish; + if (r == 0) { + need_seek = true; + continue; + } + } + + flags = + arg_all * OUTPUT_SHOW_ALL | + arg_full * OUTPUT_FULL_WIDTH | + colors_enabled() * OUTPUT_COLOR | + arg_catalog * OUTPUT_CATALOG | + arg_utc * OUTPUT_UTC | + arg_no_hostname * OUTPUT_NO_HOSTNAME; + + r = show_journal_entry(stdout, j, arg_output, 0, flags, + arg_output_fields, highlight, &ellipsized, + &previous_ts_output, &previous_boot_id_output); + need_seek = true; + if (r == -EADDRNOTAVAIL) + break; + else if (r < 0) + goto finish; + + n_shown++; + + /* If journalctl take a long time to process messages, and during that time journal file + * rotation occurs, a journalctl client will keep those rotated files open until it calls + * sd_journal_process(), which typically happens as a result of calling sd_journal_wait() below + * in the "following" case. By periodically calling sd_journal_process() during the processing + * loop we shrink the window of time a client instance has open file descriptors for rotated + * (deleted) journal files. */ + if ((n_shown % PROCESS_INOTIFY_INTERVAL) == 0) { + r = sd_journal_process(j); + if (r < 0) { + log_error_errno(r, "Failed to process inotify events: %m"); + goto finish; + } + } + } + + if (!arg_follow) { + if (n_shown == 0 && !arg_quiet) + printf("-- No entries --\n"); + break; + } + + fflush(stdout); + + r = wait_for_change(j, poll_fd); + if (r < 0) + goto finish; + + first_line = false; + } + + if (arg_show_cursor || arg_cursor_file) { + _cleanup_free_ char *cursor = NULL; + + r = sd_journal_get_cursor(j, &cursor); + if (r < 0 && r != -EADDRNOTAVAIL) + log_error_errno(r, "Failed to get cursor: %m"); + else if (r >= 0) { + if (arg_show_cursor) + printf("-- cursor: %s\n", cursor); + + if (arg_cursor_file) { + r = write_string_file(arg_cursor_file, cursor, + WRITE_STRING_FILE_CREATE | + WRITE_STRING_FILE_ATOMIC); + if (r < 0) + log_error_errno(r, + "Failed to write new cursor to %s: %m", + arg_cursor_file); + } + } + } + +finish: + pager_close(); + + if (arg_compiled_pattern && r == 0 && n_shown == 0) + /* --grep was used, no error was thrown, but the pattern didn't + * match anything. Let's mimic grep's behavior here and return + * a non-zero exit code, so journalctl --grep can be used + * in scripts and such */ + r = -ENOENT; + + return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/src/journal/journald-audit.c b/src/journal/journald-audit.c new file mode 100644 index 0000000..3e87a93 --- /dev/null +++ b/src/journal/journald-audit.c @@ -0,0 +1,555 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <malloc.h> + +#include "alloc-util.h" +#include "audit-type.h" +#include "errno-util.h" +#include "fd-util.h" +#include "hexdecoct.h" +#include "io-util.h" +#include "journald-audit.h" +#include "missing_audit.h" +#include "string-util.h" + +typedef struct MapField { + const char *audit_field; + const char *journal_field; + int (*map)(const char *field, const char **p, struct iovec *iovec, size_t *n); +} MapField; + +static int map_simple_field( + const char *field, + const char **p, + struct iovec *iovec, + size_t *n) { + + _cleanup_free_ char *c = NULL; + size_t l = 0; + const char *e; + + assert(field); + assert(p); + assert(iovec); + assert(n); + + l = strlen(field); + c = malloc(l + 1); + if (!c) + return -ENOMEM; + + memcpy(c, field, l); + for (e = *p; !IN_SET(*e, 0, ' '); e++) { + if (!GREEDY_REALLOC(c, l+2)) + return -ENOMEM; + + c[l++] = *e; + } + + c[l] = 0; + + iovec[(*n)++] = IOVEC_MAKE(c, l); + + *p = e; + c = NULL; + + return 1; +} + +static int map_string_field_internal( + const char *field, + const char **p, + struct iovec *iovec, + size_t *n, + bool filter_printable) { + + _cleanup_free_ char *c = NULL; + const char *s, *e; + size_t l; + + assert(field); + assert(p); + assert(iovec); + assert(n); + + /* The kernel formats string fields in one of two formats. */ + + if (**p == '"') { + /* Normal quoted syntax */ + s = *p + 1; + e = strchr(s, '"'); + if (!e) + return 0; + + l = strlen(field) + (e - s); + c = malloc(l+1); + if (!c) + return -ENOMEM; + + *((char*) mempcpy(stpcpy(c, field), s, e - s)) = 0; + + e += 1; + + } else if (unhexchar(**p) >= 0) { + /* Hexadecimal escaping */ + l = strlen(field); + c = malloc(l + 2); + if (!c) + return -ENOMEM; + + memcpy(c, field, l); + for (e = *p; !IN_SET(*e, 0, ' '); e += 2) { + int a, b; + uint8_t x; + + a = unhexchar(e[0]); + if (a < 0) + return 0; + + b = unhexchar(e[1]); + if (b < 0) + return 0; + + x = ((uint8_t) a << 4 | (uint8_t) b); + + if (filter_printable && x < (uint8_t) ' ') + x = (uint8_t) ' '; + + if (!GREEDY_REALLOC(c, l+2)) + return -ENOMEM; + + c[l++] = (char) x; + } + + c[l] = 0; + } else + return 0; + + iovec[(*n)++] = IOVEC_MAKE(c, l); + + *p = e; + c = NULL; + + return 1; +} + +static int map_string_field(const char *field, const char **p, struct iovec *iovec, size_t *n) { + return map_string_field_internal(field, p, iovec, n, false); +} + +static int map_string_field_printable(const char *field, const char **p, struct iovec *iovec, size_t *n) { + return map_string_field_internal(field, p, iovec, n, true); +} + +static int map_generic_field( + const char *prefix, + const char **p, + struct iovec *iovec, + size_t *n) { + + const char *e, *f; + char *c, *t; + int r; + + /* Implements fallback mappings for all fields we don't know */ + + for (e = *p; e < *p + 16; e++) { + + if (IN_SET(*e, 0, ' ')) + return 0; + + if (*e == '=') + break; + + if (!(ascii_isalpha(*e) || + ascii_isdigit(*e) || + IN_SET(*e, '_', '-'))) + return 0; + } + + if (e <= *p || e >= *p + 16) + return 0; + + c = newa(char, strlen(prefix) + (e - *p) + 2); + + t = stpcpy(c, prefix); + for (f = *p; f < e; f++) { + char x; + + if (*f >= 'a' && *f <= 'z') + x = (*f - 'a') + 'A'; /* uppercase */ + else if (*f == '-') + x = '_'; /* dashes → underscores */ + else + x = *f; + + *(t++) = x; + } + strcpy(t, "="); + + e++; + + r = map_simple_field(c, &e, iovec, n); + if (r < 0) + return r; + + *p = e; + return r; +} + +/* Kernel fields are those occurring in the audit string before + * msg='. All of these fields are trusted, hence carry the "_" prefix. + * We try to translate the fields we know into our native names. The + * other's are generically mapped to _AUDIT_FIELD_XYZ= */ +static const MapField map_fields_kernel[] = { + + /* First, we map certain well-known audit fields into native + * well-known fields */ + { "pid=", "_PID=", map_simple_field }, + { "ppid=", "_PPID=", map_simple_field }, + { "uid=", "_UID=", map_simple_field }, + { "euid=", "_EUID=", map_simple_field }, + { "fsuid=", "_FSUID=", map_simple_field }, + { "gid=", "_GID=", map_simple_field }, + { "egid=", "_EGID=", map_simple_field }, + { "fsgid=", "_FSGID=", map_simple_field }, + { "tty=", "_TTY=", map_simple_field }, + { "ses=", "_AUDIT_SESSION=", map_simple_field }, + { "auid=", "_AUDIT_LOGINUID=", map_simple_field }, + { "subj=", "_SELINUX_CONTEXT=", map_simple_field }, + { "comm=", "_COMM=", map_string_field }, + { "exe=", "_EXE=", map_string_field }, + { "proctitle=", "_CMDLINE=", map_string_field_printable }, + + /* Some fields don't map to native well-known fields. However, + * we know that they are string fields, hence let's undo + * string field escaping for them, though we stick to the + * generic field names. */ + { "path=", "_AUDIT_FIELD_PATH=", map_string_field }, + { "dev=", "_AUDIT_FIELD_DEV=", map_string_field }, + { "name=", "_AUDIT_FIELD_NAME=", map_string_field }, + {} +}; + +/* Userspace fields are those occurring in the audit string after + * msg='. All of these fields are untrusted, hence carry no "_" + * prefix. We map the fields we don't know to AUDIT_FIELD_XYZ= */ +static const MapField map_fields_userspace[] = { + { "cwd=", "AUDIT_FIELD_CWD=", map_string_field }, + { "cmd=", "AUDIT_FIELD_CMD=", map_string_field }, + { "acct=", "AUDIT_FIELD_ACCT=", map_string_field }, + { "exe=", "AUDIT_FIELD_EXE=", map_string_field }, + { "comm=", "AUDIT_FIELD_COMM=", map_string_field }, + {} +}; + +static int map_all_fields( + const char *p, + const MapField map_fields[], + const char *prefix, + bool handle_msg, + struct iovec *iovec, + size_t *n, + size_t m) { + + int r; + + assert(p); + assert(iovec); + assert(n); + + for (;;) { + bool mapped = false; + const MapField *mf; + const char *v; + + if (*n >= m) { + log_debug( + "More fields in audit message than audit field limit (%i), skipping remaining fields", + N_IOVEC_AUDIT_FIELDS); + return 0; + } + + p += strspn(p, WHITESPACE); + + if (*p == 0) + return 0; + + if (handle_msg) { + v = startswith(p, "msg='"); + if (v) { + _cleanup_free_ char *c = NULL; + const char *e; + + /* Userspace message. It's enclosed in + simple quotation marks, is not + escaped, but the last field in the + line, hence let's remove the + quotation mark, and apply the + userspace mapping instead of the + kernel mapping. */ + + e = endswith(v, "'"); + if (!e) + return 0; /* don't continue splitting up if the final quotation mark is missing */ + + c = strndup(v, e - v); + if (!c) + return -ENOMEM; + + return map_all_fields(c, map_fields_userspace, "AUDIT_FIELD_", false, iovec, n, m); + } + } + + /* Try to map the kernel fields to our own names */ + for (mf = map_fields; mf->audit_field; mf++) { + v = startswith(p, mf->audit_field); + if (!v) + continue; + + r = mf->map(mf->journal_field, &v, iovec, n); + if (r < 0) + return log_debug_errno(r, "Failed to parse audit array: %m"); + + if (r > 0) { + mapped = true; + p = v; + break; + } + } + + if (!mapped) { + r = map_generic_field(prefix, &p, iovec, n); + if (r < 0) + return log_debug_errno(r, "Failed to parse audit array: %m"); + + if (r == 0) + /* Couldn't process as generic field, let's just skip over it */ + p += strcspn(p, WHITESPACE); + } + } +} + +void process_audit_string(Server *s, int type, const char *data, size_t size) { + size_t n = 0, z; + uint64_t seconds, msec, id; + const char *p, *type_name; + char id_field[sizeof("_AUDIT_ID=") + DECIMAL_STR_MAX(uint64_t)], + type_field[sizeof("_AUDIT_TYPE=") + DECIMAL_STR_MAX(int)], + source_time_field[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)]; + struct iovec iovec[N_IOVEC_META_FIELDS + 8 + N_IOVEC_AUDIT_FIELDS]; + char *m, *type_field_name; + int k; + + assert(s); + + if (size <= 0) + return; + + if (!data) + return; + + /* Note that the input buffer is NUL terminated, but let's + * check whether there is a spurious NUL byte */ + if (memchr(data, 0, size)) + return; + + p = startswith(data, "audit"); + if (!p) + return; + + k = 0; + if (sscanf(p, "(%" PRIu64 ".%" PRIu64 ":%" PRIu64 "):%n", + &seconds, + &msec, + &id, + &k) != 3 || k == 0) + return; + + p += k; + p += strspn(p, WHITESPACE); + + if (isempty(p)) + return; + + iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=audit"); + + sprintf(source_time_field, "_SOURCE_REALTIME_TIMESTAMP=%" PRIu64, + (usec_t) seconds * USEC_PER_SEC + (usec_t) msec * USEC_PER_MSEC); + iovec[n++] = IOVEC_MAKE_STRING(source_time_field); + + sprintf(type_field, "_AUDIT_TYPE=%i", type); + iovec[n++] = IOVEC_MAKE_STRING(type_field); + + sprintf(id_field, "_AUDIT_ID=%" PRIu64, id); + iovec[n++] = IOVEC_MAKE_STRING(id_field); + + assert_cc(4 == LOG_FAC(LOG_AUTH)); + iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=4"); + iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=audit"); + + type_name = audit_type_name_alloca(type); + + type_field_name = strjoina("_AUDIT_TYPE_NAME=", type_name); + iovec[n++] = IOVEC_MAKE_STRING(type_field_name); + + m = strjoina("MESSAGE=", type_name, " ", p); + iovec[n++] = IOVEC_MAKE_STRING(m); + + z = n; + + map_all_fields(p, map_fields_kernel, "_AUDIT_FIELD_", true, iovec, &n, n + N_IOVEC_AUDIT_FIELDS); + + server_dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, LOG_NOTICE, 0); + + /* free() all entries that map_all_fields() added. All others + * are allocated on the stack or are constant. */ + + for (; z < n; z++) + free(iovec[z].iov_base); +} + +void server_process_audit_message( + Server *s, + const void *buffer, + size_t buffer_size, + const struct ucred *ucred, + const union sockaddr_union *sa, + socklen_t salen) { + + const struct nlmsghdr *nl = buffer; + + assert(s); + + if (buffer_size < ALIGN(sizeof(struct nlmsghdr))) + return; + + assert(buffer); + + /* Filter out fake data */ + if (!sa || + salen != sizeof(struct sockaddr_nl) || + sa->nl.nl_family != AF_NETLINK || + sa->nl.nl_pid != 0) { + log_debug("Audit netlink message from invalid sender."); + return; + } + + if (!ucred || ucred->pid != 0) { + log_debug("Audit netlink message with invalid credentials."); + return; + } + + if (!NLMSG_OK(nl, buffer_size)) { + log_error("Audit netlink message truncated."); + return; + } + + /* Ignore special Netlink messages */ + if (IN_SET(nl->nlmsg_type, NLMSG_NOOP, NLMSG_ERROR)) + return; + + /* Except AUDIT_USER, all messages below AUDIT_FIRST_USER_MSG are control messages, let's ignore those */ + if (nl->nlmsg_type < AUDIT_FIRST_USER_MSG && nl->nlmsg_type != AUDIT_USER) + return; + + process_audit_string(s, nl->nlmsg_type, NLMSG_DATA(nl), nl->nlmsg_len - ALIGN(sizeof(struct nlmsghdr))); +} + +static int enable_audit(int fd, bool b) { + struct { + union { + struct nlmsghdr header; + uint8_t header_space[NLMSG_HDRLEN]; + }; + struct audit_status body; + } _packed_ request = { + .header.nlmsg_len = NLMSG_LENGTH(sizeof(struct audit_status)), + .header.nlmsg_type = AUDIT_SET, + .header.nlmsg_flags = NLM_F_REQUEST, + .header.nlmsg_seq = 1, + .header.nlmsg_pid = 0, + .body.mask = AUDIT_STATUS_ENABLED, + .body.enabled = b, + }; + union sockaddr_union sa = { + .nl.nl_family = AF_NETLINK, + .nl.nl_pid = 0, + }; + struct iovec iovec = { + .iov_base = &request, + .iov_len = NLMSG_LENGTH(sizeof(struct audit_status)), + }; + struct msghdr mh = { + .msg_iov = &iovec, + .msg_iovlen = 1, + .msg_name = &sa.sa, + .msg_namelen = sizeof(sa.nl), + }; + + ssize_t n; + + n = sendmsg(fd, &mh, MSG_NOSIGNAL); + if (n < 0) + return -errno; + if (n != NLMSG_LENGTH(sizeof(struct audit_status))) + return -EIO; + + /* We don't wait for the result here, we can't do anything + * about it anyway */ + + return 0; +} + +int server_open_audit(Server *s) { + int r; + + if (s->audit_fd < 0) { + static const union sockaddr_union sa = { + .nl.nl_family = AF_NETLINK, + .nl.nl_pid = 0, + .nl.nl_groups = AUDIT_NLGRP_READLOG, + }; + + s->audit_fd = socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, NETLINK_AUDIT); + if (s->audit_fd < 0) { + if (ERRNO_IS_NOT_SUPPORTED(errno)) + log_debug("Audit not supported in the kernel."); + else + log_warning_errno(errno, "Failed to create audit socket, ignoring: %m"); + + return 0; + } + + if (bind(s->audit_fd, &sa.sa, sizeof(sa.nl)) < 0) { + log_warning_errno(errno, + "Failed to join audit multicast group. " + "The kernel is probably too old or multicast reading is not supported. " + "Ignoring: %m"); + s->audit_fd = safe_close(s->audit_fd); + return 0; + } + } else + (void) fd_nonblock(s->audit_fd, true); + + r = setsockopt_int(s->audit_fd, SOL_SOCKET, SO_PASSCRED, true); + if (r < 0) + return log_error_errno(r, "Failed to set SO_PASSCRED on audit socket: %m"); + + r = sd_event_add_io(s->event, &s->audit_event_source, s->audit_fd, EPOLLIN, server_process_datagram, s); + if (r < 0) + return log_error_errno(r, "Failed to add audit fd to event loop: %m"); + + if (s->set_audit >= 0) { + /* We are listening now, try to enable audit if configured so */ + r = enable_audit(s->audit_fd, s->set_audit); + if (r < 0) + log_warning_errno(r, "Failed to issue audit enable call: %m"); + else if (s->set_audit > 0) + log_debug("Auditing in kernel turned on."); + else + log_debug("Auditing in kernel turned off."); + } + + return 0; +} diff --git a/src/journal/journald-audit.h b/src/journal/journald-audit.h new file mode 100644 index 0000000..79f3da9 --- /dev/null +++ b/src/journal/journald-audit.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "journald-server.h" +#include "socket-util.h" + +void server_process_audit_message(Server *s, const void *buffer, size_t buffer_size, const struct ucred *ucred, const union sockaddr_union *sa, socklen_t salen); + +void process_audit_string(Server *s, int type, const char *data, size_t size); + +int server_open_audit(Server *s); diff --git a/src/journal/journald-console.c b/src/journal/journald-console.c new file mode 100644 index 0000000..2035e2d --- /dev/null +++ b/src/journal/journald-console.c @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> +#include <sys/socket.h> +#include <time.h> + +#include "alloc-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "format-util.h" +#include "io-util.h" +#include "journald-console.h" +#include "journald-server.h" +#include "parse-util.h" +#include "process-util.h" +#include "stdio-util.h" +#include "terminal-util.h" + +static bool prefix_timestamp(void) { + + static int cached_printk_time = -1; + + if (_unlikely_(cached_printk_time < 0)) { + _cleanup_free_ char *p = NULL; + + cached_printk_time = + read_one_line_file("/sys/module/printk/parameters/time", &p) >= 0 + && parse_boolean(p) > 0; + } + + return cached_printk_time; +} + +void server_forward_console( + Server *s, + int priority, + const char *identifier, + const char *message, + const struct ucred *ucred) { + + struct iovec iovec[5]; + struct timespec ts; + char tbuf[STRLEN("[] ") + DECIMAL_STR_MAX(ts.tv_sec) + DECIMAL_STR_MAX(ts.tv_nsec)-3 + 1]; + char header_pid[STRLEN("[]: ") + DECIMAL_STR_MAX(pid_t)]; + _cleanup_free_ char *ident_buf = NULL; + _cleanup_close_ int fd = -1; + const char *tty; + int n = 0; + + assert(s); + assert(message); + + if (LOG_PRI(priority) > s->max_level_console) + return; + + /* First: timestamp */ + if (prefix_timestamp()) { + assert_se(clock_gettime(CLOCK_MONOTONIC, &ts) == 0); + xsprintf(tbuf, "[%5"PRI_TIME".%06"PRI_NSEC"] ", + ts.tv_sec, + (nsec_t)ts.tv_nsec / 1000); + + iovec[n++] = IOVEC_MAKE_STRING(tbuf); + } + + /* Second: identifier and PID */ + if (ucred) { + if (!identifier) { + (void) get_process_comm(ucred->pid, &ident_buf); + identifier = ident_buf; + } + + xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid); + + if (identifier) + iovec[n++] = IOVEC_MAKE_STRING(identifier); + + iovec[n++] = IOVEC_MAKE_STRING(header_pid); + } else if (identifier) { + iovec[n++] = IOVEC_MAKE_STRING(identifier); + iovec[n++] = IOVEC_MAKE_STRING(": "); + } + + /* Fourth: message */ + iovec[n++] = IOVEC_MAKE_STRING(message); + iovec[n++] = IOVEC_MAKE_STRING("\n"); + + tty = s->tty_path ?: "/dev/console"; + + /* Before you ask: yes, on purpose we open/close the console for each log line we write individually. This is a + * good strategy to avoid journald getting killed by the kernel's SAK concept (it doesn't fix this entirely, + * but minimizes the time window the kernel might end up killing journald due to SAK). It also makes things + * easier for us so that we don't have to recover from hangups and suchlike triggered on the console. */ + + fd = open_terminal(tty, O_WRONLY|O_NOCTTY|O_CLOEXEC); + if (fd < 0) { + log_debug_errno(fd, "Failed to open %s for logging: %m", tty); + return; + } + + if (writev(fd, iovec, n) < 0) + log_debug_errno(errno, "Failed to write to %s for logging: %m", tty); +} diff --git a/src/journal/journald-console.h b/src/journal/journald-console.h new file mode 100644 index 0000000..0a26f9c --- /dev/null +++ b/src/journal/journald-console.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "journald-server.h" + +void server_forward_console(Server *s, int priority, const char *identifier, const char *message, const struct ucred *ucred); diff --git a/src/journal/journald-context.c b/src/journal/journald-context.c new file mode 100644 index 0000000..0953fb2 --- /dev/null +++ b/src/journal/journald-context.c @@ -0,0 +1,786 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#if HAVE_SELINUX +#include <selinux/selinux.h> +#endif + +#include "alloc-util.h" +#include "audit-util.h" +#include "cgroup-util.h" +#include "env-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "io-util.h" +#include "journal-util.h" +#include "journald-context.h" +#include "parse-util.h" +#include "path-util.h" +#include "process-util.h" +#include "procfs-util.h" +#include "string-util.h" +#include "syslog-util.h" +#include "unaligned.h" +#include "user-util.h" + +/* This implements a metadata cache for clients, which are identified by their PID. Requesting metadata through /proc + * is expensive, hence let's cache the data if we can. Note that this means the metadata might be out-of-date when we + * store it, but it might already be anyway, as we request the data asynchronously from /proc at a different time the + * log entry was originally created. We hence just increase the "window of inaccuracy" a bit. + * + * The cache is indexed by the PID. Entries may be "pinned" in the cache, in which case the entries are not removed + * until they are unpinned. Unpinned entries are kept around until cache pressure is seen. Cache entries older than 5s + * are never used (a sad attempt to deal with the UNIX weakness of PIDs reuse), cache entries older than 1s are + * refreshed in an incremental way (meaning: data is reread from /proc, but any old data we can't refresh is not + * flushed out). Data newer than 1s is used immediately without refresh. + * + * Log stream clients (i.e. all clients using the AF_UNIX/SOCK_STREAM stdout/stderr transport) will pin a cache entry + * as long as their socket is connected. Note that cache entries are shared between different transports. That means a + * cache entry pinned for the stream connection logic may be reused for the syslog or native protocols. + * + * Caching metadata like this has two major benefits: + * + * 1. Reading metadata is expensive, and we can thus substantially speed up log processing under flood. + * + * 2. Because metadata caching is shared between stream and datagram transports and stream connections pin a cache + * entry there's a good chance we can properly map a substantial set of datagram log messages to their originating + * service, as all services (unless explicitly configured otherwise) will have their stdout/stderr connected to a + * stream connection. This should improve cases where a service process logs immediately before exiting and we + * previously had trouble associating the log message with the service. + * + * NB: With and without the metadata cache: the implicitly added entry metadata in the journal (with the exception of + * UID/PID/GID and SELinux label) must be understood as possibly slightly out of sync (i.e. sometimes slightly older + * and sometimes slightly newer than what was current at the log event). + */ + +/* We refresh every 1s */ +#define REFRESH_USEC (1*USEC_PER_SEC) + +/* Data older than 5s we flush out */ +#define MAX_USEC (5*USEC_PER_SEC) + +/* Keep at most 16K entries in the cache. (Note though that this limit may be violated if enough streams pin entries in + * the cache, in which case we *do* permit this limit to be breached. That's safe however, as the number of stream + * clients itself is limited.) */ +#define CACHE_MAX_FALLBACK 128U +#define CACHE_MAX_MAX (16*1024U) +#define CACHE_MAX_MIN 64U + +static size_t cache_max(void) { + static size_t cached = -1; + + if (cached == SIZE_MAX) { + uint64_t mem_total; + int r; + + r = procfs_memory_get(&mem_total, NULL); + if (r < 0) { + log_warning_errno(r, "Cannot query /proc/meminfo for MemTotal: %m"); + cached = CACHE_MAX_FALLBACK; + } else + /* Cache entries are usually a few kB, but the process cmdline is controlled by the + * user and can be up to _SC_ARG_MAX, usually 2MB. Let's say that approximately up to + * 1/8th of memory may be used by the cache. + * + * In the common case, this formula gives 64 cache entries for each GB of RAM. + */ + cached = CLAMP(mem_total / 8 / sc_arg_max(), CACHE_MAX_MIN, CACHE_MAX_MAX); + } + + return cached; +} + +static int client_context_compare(const void *a, const void *b) { + const ClientContext *x = a, *y = b; + int r; + + r = CMP(x->timestamp, y->timestamp); + if (r != 0) + return r; + + return CMP(x->pid, y->pid); +} + +static int client_context_new(Server *s, pid_t pid, ClientContext **ret) { + _cleanup_free_ ClientContext *c = NULL; + int r; + + assert(s); + assert(pid_is_valid(pid)); + assert(ret); + + r = prioq_ensure_allocated(&s->client_contexts_lru, client_context_compare); + if (r < 0) + return r; + + c = new(ClientContext, 1); + if (!c) + return -ENOMEM; + + *c = (ClientContext) { + .pid = pid, + .uid = UID_INVALID, + .gid = GID_INVALID, + .auditid = AUDIT_SESSION_INVALID, + .loginuid = UID_INVALID, + .owner_uid = UID_INVALID, + .lru_index = PRIOQ_IDX_NULL, + .timestamp = USEC_INFINITY, + .extra_fields_mtime = NSEC_INFINITY, + .log_level_max = -1, + .log_ratelimit_interval = s->ratelimit_interval, + .log_ratelimit_burst = s->ratelimit_burst, + }; + + r = hashmap_ensure_put(&s->client_contexts, NULL, PID_TO_PTR(pid), c); + if (r < 0) + return r; + + *ret = TAKE_PTR(c); + return 0; +} + +static void client_context_reset(Server *s, ClientContext *c) { + assert(s); + assert(c); + + c->timestamp = USEC_INFINITY; + + c->uid = UID_INVALID; + c->gid = GID_INVALID; + + c->comm = mfree(c->comm); + c->exe = mfree(c->exe); + c->cmdline = mfree(c->cmdline); + c->capeff = mfree(c->capeff); + + c->auditid = AUDIT_SESSION_INVALID; + c->loginuid = UID_INVALID; + + c->cgroup = mfree(c->cgroup); + c->session = mfree(c->session); + c->owner_uid = UID_INVALID; + c->unit = mfree(c->unit); + c->user_unit = mfree(c->user_unit); + c->slice = mfree(c->slice); + c->user_slice = mfree(c->user_slice); + + c->invocation_id = SD_ID128_NULL; + + c->label = mfree(c->label); + c->label_size = 0; + + c->extra_fields_iovec = mfree(c->extra_fields_iovec); + c->extra_fields_n_iovec = 0; + c->extra_fields_data = mfree(c->extra_fields_data); + c->extra_fields_mtime = NSEC_INFINITY; + + c->log_level_max = -1; + + c->log_ratelimit_interval = s->ratelimit_interval; + c->log_ratelimit_burst = s->ratelimit_burst; +} + +static ClientContext* client_context_free(Server *s, ClientContext *c) { + assert(s); + + if (!c) + return NULL; + + assert_se(hashmap_remove(s->client_contexts, PID_TO_PTR(c->pid)) == c); + + if (c->in_lru) + assert_se(prioq_remove(s->client_contexts_lru, c, &c->lru_index) >= 0); + + client_context_reset(s, c); + + return mfree(c); +} + +static void client_context_read_uid_gid(ClientContext *c, const struct ucred *ucred) { + assert(c); + assert(pid_is_valid(c->pid)); + + /* The ucred data passed in is always the most current and accurate, if we have any. Use it. */ + if (ucred && uid_is_valid(ucred->uid)) + c->uid = ucred->uid; + else + (void) get_process_uid(c->pid, &c->uid); + + if (ucred && gid_is_valid(ucred->gid)) + c->gid = ucred->gid; + else + (void) get_process_gid(c->pid, &c->gid); +} + +static void client_context_read_basic(ClientContext *c) { + char *t; + + assert(c); + assert(pid_is_valid(c->pid)); + + if (get_process_comm(c->pid, &t) >= 0) + free_and_replace(c->comm, t); + + if (get_process_exe(c->pid, &t) >= 0) + free_and_replace(c->exe, t); + + if (get_process_cmdline(c->pid, SIZE_MAX, PROCESS_CMDLINE_QUOTE, &t) >= 0) + free_and_replace(c->cmdline, t); + + if (get_process_capeff(c->pid, &t) >= 0) + free_and_replace(c->capeff, t); +} + +static int client_context_read_label( + ClientContext *c, + const char *label, size_t label_size) { + + assert(c); + assert(pid_is_valid(c->pid)); + assert(label_size == 0 || label); + + if (label_size > 0) { + char *l; + + /* If we got an SELinux label passed in it counts. */ + + l = newdup_suffix0(char, label, label_size); + if (!l) + return -ENOMEM; + + free_and_replace(c->label, l); + c->label_size = label_size; + } +#if HAVE_SELINUX + else { + char *con; + + /* If we got no SELinux label passed in, let's try to acquire one */ + + if (getpidcon(c->pid, &con) >= 0 && con) { + free_and_replace(c->label, con); + c->label_size = strlen(c->label); + } + } +#endif + + return 0; +} + +static int client_context_read_cgroup(Server *s, ClientContext *c, const char *unit_id) { + _cleanup_free_ char *t = NULL; + int r; + + assert(c); + + /* Try to acquire the current cgroup path */ + r = cg_pid_get_path_shifted(c->pid, s->cgroup_root, &t); + if (r < 0 || empty_or_root(t)) { + /* We use the unit ID passed in as fallback if we have nothing cached yet and cg_pid_get_path_shifted() + * failed or process is running in a root cgroup. Zombie processes are automatically migrated to root cgroup + * on cgroup v1 and we want to be able to map log messages from them too. */ + if (unit_id && !c->unit) { + c->unit = strdup(unit_id); + if (c->unit) + return 0; + } + + return r; + } + + /* Let's shortcut this if the cgroup path didn't change */ + if (streq_ptr(c->cgroup, t)) + return 0; + + free_and_replace(c->cgroup, t); + + (void) cg_path_get_session(c->cgroup, &t); + free_and_replace(c->session, t); + + if (cg_path_get_owner_uid(c->cgroup, &c->owner_uid) < 0) + c->owner_uid = UID_INVALID; + + (void) cg_path_get_unit(c->cgroup, &t); + free_and_replace(c->unit, t); + + (void) cg_path_get_user_unit(c->cgroup, &t); + free_and_replace(c->user_unit, t); + + (void) cg_path_get_slice(c->cgroup, &t); + free_and_replace(c->slice, t); + + (void) cg_path_get_user_slice(c->cgroup, &t); + free_and_replace(c->user_slice, t); + + return 0; +} + +static int client_context_read_invocation_id( + Server *s, + ClientContext *c) { + + _cleanup_free_ char *p = NULL, *value = NULL; + int r; + + assert(s); + assert(c); + + /* Read the invocation ID of a unit off a unit. + * PID 1 stores it in a per-unit symlink in /run/systemd/units/ + * User managers store it in a per-unit symlink under /run/user/<uid>/systemd/units/ */ + + if (!c->unit) + return 0; + + if (c->user_unit) { + r = asprintf(&p, "/run/user/" UID_FMT "/systemd/units/invocation:%s", c->owner_uid, c->user_unit); + if (r < 0) + return r; + } else { + p = strjoin("/run/systemd/units/invocation:", c->unit); + if (!p) + return -ENOMEM; + } + + r = readlink_malloc(p, &value); + if (r < 0) + return r; + + return sd_id128_from_string(value, &c->invocation_id); +} + +static int client_context_read_log_level_max( + Server *s, + ClientContext *c) { + + _cleanup_free_ char *value = NULL; + const char *p; + int r, ll; + + if (!c->unit) + return 0; + + p = strjoina("/run/systemd/units/log-level-max:", c->unit); + r = readlink_malloc(p, &value); + if (r < 0) + return r; + + ll = log_level_from_string(value); + if (ll < 0) + return ll; + + c->log_level_max = ll; + return 0; +} + +static int client_context_read_extra_fields( + Server *s, + ClientContext *c) { + + _cleanup_free_ struct iovec *iovec = NULL; + size_t size = 0, n_iovec = 0, left; + _cleanup_free_ void *data = NULL; + _cleanup_fclose_ FILE *f = NULL; + struct stat st; + const char *p; + uint8_t *q; + int r; + + if (!c->unit) + return 0; + + p = strjoina("/run/systemd/units/log-extra-fields:", c->unit); + + if (c->extra_fields_mtime != NSEC_INFINITY) { + if (stat(p, &st) < 0) { + if (errno == ENOENT) + return 0; + + return -errno; + } + + if (timespec_load_nsec(&st.st_mtim) == c->extra_fields_mtime) + return 0; + } + + f = fopen(p, "re"); + if (!f) { + if (errno == ENOENT) + return 0; + + return -errno; + } + + if (fstat(fileno(f), &st) < 0) /* The file might have been replaced since the stat() above, let's get a new + * one, that matches the stuff we are reading */ + return -errno; + + r = read_full_stream(f, (char**) &data, &size); + if (r < 0) + return r; + + q = data, left = size; + while (left > 0) { + uint8_t *field, *eq; + uint64_t v, n; + + if (left < sizeof(uint64_t)) + return -EBADMSG; + + v = unaligned_read_le64(q); + if (v < 2) + return -EBADMSG; + + n = sizeof(uint64_t) + v; + if (left < n) + return -EBADMSG; + + field = q + sizeof(uint64_t); + + eq = memchr(field, '=', v); + if (!eq) + return -EBADMSG; + + if (!journal_field_valid((const char *) field, eq - field, false)) + return -EBADMSG; + + if (!GREEDY_REALLOC(iovec, n_iovec+1)) + return -ENOMEM; + + iovec[n_iovec++] = IOVEC_MAKE(field, v); + + left -= n, q += n; + } + + free(c->extra_fields_iovec); + free(c->extra_fields_data); + + c->extra_fields_iovec = TAKE_PTR(iovec); + c->extra_fields_n_iovec = n_iovec; + c->extra_fields_data = TAKE_PTR(data); + c->extra_fields_mtime = timespec_load_nsec(&st.st_mtim); + + return 0; +} + +static int client_context_read_log_ratelimit_interval(ClientContext *c) { + _cleanup_free_ char *value = NULL; + const char *p; + int r; + + assert(c); + + if (!c->unit) + return 0; + + p = strjoina("/run/systemd/units/log-rate-limit-interval:", c->unit); + r = readlink_malloc(p, &value); + if (r < 0) + return r; + + return safe_atou64(value, &c->log_ratelimit_interval); +} + +static int client_context_read_log_ratelimit_burst(ClientContext *c) { + _cleanup_free_ char *value = NULL; + const char *p; + int r; + + assert(c); + + if (!c->unit) + return 0; + + p = strjoina("/run/systemd/units/log-rate-limit-burst:", c->unit); + r = readlink_malloc(p, &value); + if (r < 0) + return r; + + return safe_atou(value, &c->log_ratelimit_burst); +} + +static void client_context_really_refresh( + Server *s, + ClientContext *c, + const struct ucred *ucred, + const char *label, size_t label_size, + const char *unit_id, + usec_t timestamp) { + + assert(s); + assert(c); + assert(pid_is_valid(c->pid)); + + if (timestamp == USEC_INFINITY) + timestamp = now(CLOCK_MONOTONIC); + + client_context_read_uid_gid(c, ucred); + client_context_read_basic(c); + (void) client_context_read_label(c, label, label_size); + + (void) audit_session_from_pid(c->pid, &c->auditid); + (void) audit_loginuid_from_pid(c->pid, &c->loginuid); + + (void) client_context_read_cgroup(s, c, unit_id); + (void) client_context_read_invocation_id(s, c); + (void) client_context_read_log_level_max(s, c); + (void) client_context_read_extra_fields(s, c); + (void) client_context_read_log_ratelimit_interval(c); + (void) client_context_read_log_ratelimit_burst(c); + + c->timestamp = timestamp; + + if (c->in_lru) { + assert(c->n_ref == 0); + assert_se(prioq_reshuffle(s->client_contexts_lru, c, &c->lru_index) >= 0); + } +} + +void client_context_maybe_refresh( + Server *s, + ClientContext *c, + const struct ucred *ucred, + const char *label, size_t label_size, + const char *unit_id, + usec_t timestamp) { + + assert(s); + assert(c); + + if (timestamp == USEC_INFINITY) + timestamp = now(CLOCK_MONOTONIC); + + /* No cached data so far? Let's fill it up */ + if (c->timestamp == USEC_INFINITY) + goto refresh; + + /* If the data isn't pinned and if the cashed data is older than the upper limit, we flush it out + * entirely. This follows the logic that as long as an entry is pinned the PID reuse is unlikely. */ + if (c->n_ref == 0 && c->timestamp + MAX_USEC < timestamp) { + client_context_reset(s, c); + goto refresh; + } + + /* If the data is older than the lower limit, we refresh, but keep the old data for all we can't update */ + if (c->timestamp + REFRESH_USEC < timestamp) + goto refresh; + + /* If the data passed along doesn't match the cached data we also do a refresh */ + if (ucred && uid_is_valid(ucred->uid) && c->uid != ucred->uid) + goto refresh; + + if (ucred && gid_is_valid(ucred->gid) && c->gid != ucred->gid) + goto refresh; + + if (label_size > 0 && (label_size != c->label_size || memcmp(label, c->label, label_size) != 0)) + goto refresh; + + return; + +refresh: + client_context_really_refresh(s, c, ucred, label, label_size, unit_id, timestamp); +} + +static void client_context_try_shrink_to(Server *s, size_t limit) { + ClientContext *c; + usec_t t; + + assert(s); + + /* Flush any cache entries for PIDs that have already moved on. Don't do this + * too often, since it's a slow process. */ + t = now(CLOCK_MONOTONIC); + if (s->last_cache_pid_flush + MAX_USEC < t) { + unsigned n = prioq_size(s->client_contexts_lru), idx = 0; + + /* We do a number of iterations based on the initial size of the prioq. When we remove an + * item, a new item is moved into its places, and items to the right might be reshuffled. + */ + for (unsigned i = 0; i < n; i++) { + c = prioq_peek_by_index(s->client_contexts_lru, idx); + + assert(c->n_ref == 0); + + if (!pid_is_unwaited(c->pid)) + client_context_free(s, c); + else + idx ++; + } + + s->last_cache_pid_flush = t; + } + + /* Bring the number of cache entries below the indicated limit, so that we can create a new entry without + * breaching the limit. Note that we only flush out entries that aren't pinned here. This means the number of + * cache entries may very well grow beyond the limit, if all entries stored remain pinned. */ + + while (hashmap_size(s->client_contexts) > limit) { + c = prioq_pop(s->client_contexts_lru); + if (!c) + break; /* All remaining entries are pinned, give up */ + + assert(c->in_lru); + assert(c->n_ref == 0); + + c->in_lru = false; + + client_context_free(s, c); + } +} + +void client_context_flush_all(Server *s) { + assert(s); + + /* Flush out all remaining entries. This assumes all references are already dropped. */ + + s->my_context = client_context_release(s, s->my_context); + s->pid1_context = client_context_release(s, s->pid1_context); + + client_context_try_shrink_to(s, 0); + + assert(prioq_size(s->client_contexts_lru) == 0); + assert(hashmap_size(s->client_contexts) == 0); + + s->client_contexts_lru = prioq_free(s->client_contexts_lru); + s->client_contexts = hashmap_free(s->client_contexts); +} + +static int client_context_get_internal( + Server *s, + pid_t pid, + const struct ucred *ucred, + const char *label, size_t label_len, + const char *unit_id, + bool add_ref, + ClientContext **ret) { + + ClientContext *c; + int r; + + assert(s); + assert(ret); + + if (!pid_is_valid(pid)) + return -EINVAL; + + c = hashmap_get(s->client_contexts, PID_TO_PTR(pid)); + if (c) { + + if (add_ref) { + if (c->in_lru) { + /* The entry wasn't pinned so far, let's remove it from the LRU list then */ + assert(c->n_ref == 0); + assert_se(prioq_remove(s->client_contexts_lru, c, &c->lru_index) >= 0); + c->in_lru = false; + } + + c->n_ref++; + } + + client_context_maybe_refresh(s, c, ucred, label, label_len, unit_id, USEC_INFINITY); + + *ret = c; + return 0; + } + + client_context_try_shrink_to(s, cache_max()-1); + + r = client_context_new(s, pid, &c); + if (r < 0) + return r; + + if (add_ref) + c->n_ref++; + else { + r = prioq_put(s->client_contexts_lru, c, &c->lru_index); + if (r < 0) { + client_context_free(s, c); + return r; + } + + c->in_lru = true; + } + + client_context_really_refresh(s, c, ucred, label, label_len, unit_id, USEC_INFINITY); + + *ret = c; + return 0; +} + +int client_context_get( + Server *s, + pid_t pid, + const struct ucred *ucred, + const char *label, size_t label_len, + const char *unit_id, + ClientContext **ret) { + + return client_context_get_internal(s, pid, ucred, label, label_len, unit_id, false, ret); +} + +int client_context_acquire( + Server *s, + pid_t pid, + const struct ucred *ucred, + const char *label, size_t label_len, + const char *unit_id, + ClientContext **ret) { + + return client_context_get_internal(s, pid, ucred, label, label_len, unit_id, true, ret); +}; + +ClientContext *client_context_release(Server *s, ClientContext *c) { + assert(s); + + if (!c) + return NULL; + + assert(c->n_ref > 0); + assert(!c->in_lru); + + c->n_ref--; + if (c->n_ref > 0) + return NULL; + + /* The entry is not pinned anymore, let's add it to the LRU prioq if we can. If we can't we'll drop it + * right-away */ + + if (prioq_put(s->client_contexts_lru, c, &c->lru_index) < 0) + client_context_free(s, c); + else + c->in_lru = true; + + return NULL; +} + +void client_context_acquire_default(Server *s) { + int r; + + assert(s); + + /* Ensure that our own and PID1's contexts are always pinned. Our own context is particularly useful to + * generate driver messages. */ + + if (!s->my_context) { + struct ucred ucred = { + .pid = getpid_cached(), + .uid = getuid(), + .gid = getgid(), + }; + + r = client_context_acquire(s, ucred.pid, &ucred, NULL, 0, NULL, &s->my_context); + if (r < 0) + log_warning_errno(r, "Failed to acquire our own context, ignoring: %m"); + } + + if (!s->namespace && !s->pid1_context) { + /* Acquire PID1's context, but only if we are in non-namespaced mode, since PID 1 is only + * going to log to the non-namespaced journal instance. */ + + r = client_context_acquire(s, 1, NULL, NULL, 0, NULL, &s->pid1_context); + if (r < 0) + log_warning_errno(r, "Failed to acquire PID1's context, ignoring: %m"); + + } +} diff --git a/src/journal/journald-context.h b/src/journal/journald-context.h new file mode 100644 index 0000000..9bf74b2 --- /dev/null +++ b/src/journal/journald-context.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <inttypes.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include "sd-id128.h" + +#include "time-util.h" + +typedef struct ClientContext ClientContext; + +#include "journald-server.h" + +struct ClientContext { + unsigned n_ref; + unsigned lru_index; + usec_t timestamp; + bool in_lru; + + pid_t pid; + uid_t uid; + gid_t gid; + + char *comm; + char *exe; + char *cmdline; + char *capeff; + + uint32_t auditid; + uid_t loginuid; + + char *cgroup; + char *session; + uid_t owner_uid; + + char *unit; + char *user_unit; + + char *slice; + char *user_slice; + + sd_id128_t invocation_id; + + char *label; + size_t label_size; + + int log_level_max; + + struct iovec *extra_fields_iovec; + size_t extra_fields_n_iovec; + void *extra_fields_data; + nsec_t extra_fields_mtime; + + usec_t log_ratelimit_interval; + unsigned log_ratelimit_burst; +}; + +int client_context_get( + Server *s, + pid_t pid, + const struct ucred *ucred, + const char *label, size_t label_len, + const char *unit_id, + ClientContext **ret); + +int client_context_acquire( + Server *s, + pid_t pid, + const struct ucred *ucred, + const char *label, size_t label_len, + const char *unit_id, + ClientContext **ret); + +ClientContext* client_context_release(Server *s, ClientContext *c); + +void client_context_maybe_refresh( + Server *s, + ClientContext *c, + const struct ucred *ucred, + const char *label, size_t label_size, + const char *unit_id, + usec_t tstamp); + +void client_context_acquire_default(Server *s); +void client_context_flush_all(Server *s); + +static inline size_t client_context_extra_fields_n_iovec(const ClientContext *c) { + return c ? c->extra_fields_n_iovec : 0; +} + +static inline bool client_context_test_priority(const ClientContext *c, int priority) { + if (!c) + return true; + + if (c->log_level_max < 0) + return true; + + return LOG_PRI(priority) <= c->log_level_max; +} diff --git a/src/journal/journald-gperf.gperf b/src/journal/journald-gperf.gperf new file mode 100644 index 0000000..9076597 --- /dev/null +++ b/src/journal/journald-gperf.gperf @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +%{ +#if __GNUC__ >= 7 +_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"") +#endif +#include <stddef.h> +#include <sys/socket.h> +#include "conf-parser.h" +#include "journald-server.h" +%} +struct ConfigPerfItem; +%null_strings +%language=ANSI-C +%define slot-name section_and_lvalue +%define hash-function-name journald_gperf_hash +%define lookup-function-name journald_gperf_lookup +%readonly-tables +%omit-struct-type +%struct-type +%includes +%% +Journal.Storage, config_parse_storage, 0, offsetof(Server, storage) +Journal.Compress, config_parse_compress, 0, offsetof(Server, compress) +Journal.Seal, config_parse_bool, 0, offsetof(Server, seal) +Journal.ReadKMsg, config_parse_bool, 0, offsetof(Server, read_kmsg) +Journal.Audit, config_parse_tristate, 0, offsetof(Server, set_audit) +Journal.SyncIntervalSec, config_parse_sec, 0, offsetof(Server, sync_interval_usec) +# The following is a legacy name for compatibility +Journal.RateLimitInterval, config_parse_sec, 0, offsetof(Server, ratelimit_interval) +Journal.RateLimitIntervalSec,config_parse_sec, 0, offsetof(Server, ratelimit_interval) +Journal.RateLimitBurst, config_parse_unsigned, 0, offsetof(Server, ratelimit_burst) +Journal.SystemMaxUse, config_parse_iec_uint64, 0, offsetof(Server, system_storage.metrics.max_use) +Journal.SystemMaxFileSize, config_parse_iec_uint64, 0, offsetof(Server, system_storage.metrics.max_size) +Journal.SystemKeepFree, config_parse_iec_uint64, 0, offsetof(Server, system_storage.metrics.keep_free) +Journal.SystemMaxFiles, config_parse_uint64, 0, offsetof(Server, system_storage.metrics.n_max_files) +Journal.RuntimeMaxUse, config_parse_iec_uint64, 0, offsetof(Server, runtime_storage.metrics.max_use) +Journal.RuntimeMaxFileSize, config_parse_iec_uint64, 0, offsetof(Server, runtime_storage.metrics.max_size) +Journal.RuntimeKeepFree, config_parse_iec_uint64, 0, offsetof(Server, runtime_storage.metrics.keep_free) +Journal.RuntimeMaxFiles, config_parse_uint64, 0, offsetof(Server, runtime_storage.metrics.n_max_files) +Journal.MaxRetentionSec, config_parse_sec, 0, offsetof(Server, max_retention_usec) +Journal.MaxFileSec, config_parse_sec, 0, offsetof(Server, max_file_usec) +Journal.ForwardToSyslog, config_parse_bool, 0, offsetof(Server, forward_to_syslog) +Journal.ForwardToKMsg, config_parse_bool, 0, offsetof(Server, forward_to_kmsg) +Journal.ForwardToConsole, config_parse_bool, 0, offsetof(Server, forward_to_console) +Journal.ForwardToWall, config_parse_bool, 0, offsetof(Server, forward_to_wall) +Journal.TTYPath, config_parse_path, 0, offsetof(Server, tty_path) +Journal.MaxLevelStore, config_parse_log_level, 0, offsetof(Server, max_level_store) +Journal.MaxLevelSyslog, config_parse_log_level, 0, offsetof(Server, max_level_syslog) +Journal.MaxLevelKMsg, config_parse_log_level, 0, offsetof(Server, max_level_kmsg) +Journal.MaxLevelConsole, config_parse_log_level, 0, offsetof(Server, max_level_console) +Journal.MaxLevelWall, config_parse_log_level, 0, offsetof(Server, max_level_wall) +Journal.SplitMode, config_parse_split_mode, 0, offsetof(Server, split_mode) +Journal.LineMax, config_parse_line_max, 0, offsetof(Server, line_max) diff --git a/src/journal/journald-kmsg.c b/src/journal/journald-kmsg.c new file mode 100644 index 0000000..8ae7a23 --- /dev/null +++ b/src/journal/journald-kmsg.c @@ -0,0 +1,467 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> +#include <sys/epoll.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <unistd.h> + +#include "sd-device.h" +#include "sd-messages.h" + +#include "alloc-util.h" +#include "device-util.h" +#include "escape.h" +#include "fd-util.h" +#include "format-util.h" +#include "fs-util.h" +#include "io-util.h" +#include "journald-kmsg.h" +#include "journald-server.h" +#include "journald-syslog.h" +#include "log.h" +#include "parse-util.h" +#include "process-util.h" +#include "stdio-util.h" +#include "string-util.h" + +void server_forward_kmsg( + Server *s, + int priority, + const char *identifier, + const char *message, + const struct ucred *ucred) { + + _cleanup_free_ char *ident_buf = NULL; + struct iovec iovec[5]; + char header_priority[DECIMAL_STR_MAX(priority) + 3], + header_pid[STRLEN("[]: ") + DECIMAL_STR_MAX(pid_t) + 1]; + int n = 0; + + assert(s); + assert(priority >= 0); + assert(priority <= 999); + assert(message); + + if (_unlikely_(LOG_PRI(priority) > s->max_level_kmsg)) + return; + + if (_unlikely_(s->dev_kmsg_fd < 0)) + return; + + /* Never allow messages with kernel facility to be written to + * kmsg, regardless where the data comes from. */ + priority = syslog_fixup_facility(priority); + + /* First: priority field */ + xsprintf(header_priority, "<%i>", priority); + iovec[n++] = IOVEC_MAKE_STRING(header_priority); + + /* Second: identifier and PID */ + if (ucred) { + if (!identifier) { + (void) get_process_comm(ucred->pid, &ident_buf); + identifier = ident_buf; + } + + xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid); + + if (identifier) + iovec[n++] = IOVEC_MAKE_STRING(identifier); + + iovec[n++] = IOVEC_MAKE_STRING(header_pid); + } else if (identifier) { + iovec[n++] = IOVEC_MAKE_STRING(identifier); + iovec[n++] = IOVEC_MAKE_STRING(": "); + } + + /* Fourth: message */ + iovec[n++] = IOVEC_MAKE_STRING(message); + iovec[n++] = IOVEC_MAKE_STRING("\n"); + + if (writev(s->dev_kmsg_fd, iovec, n) < 0) + log_debug_errno(errno, "Failed to write to /dev/kmsg for logging: %m"); +} + +static bool is_us(const char *identifier, const char *pid) { + pid_t pid_num; + + if (!identifier || !pid) + return false; + + if (parse_pid(pid, &pid_num) < 0) + return false; + + return pid_num == getpid_cached() && + streq(identifier, program_invocation_short_name); +} + +void dev_kmsg_record(Server *s, char *p, size_t l) { + + _cleanup_free_ char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL, *identifier = NULL, *pid = NULL; + struct iovec iovec[N_IOVEC_META_FIELDS + 7 + N_IOVEC_KERNEL_FIELDS + 2 + N_IOVEC_UDEV_FIELDS]; + char *kernel_device = NULL; + unsigned long long usec; + size_t n = 0, z = 0, j; + int priority, r; + char *e, *f, *k; + uint64_t serial; + size_t pl; + int saved_log_max_level = INT_MAX; + ClientContext *c = NULL; + + assert(s); + assert(p); + + if (l <= 0) + return; + + e = memchr(p, ',', l); + if (!e) + return; + *e = 0; + + r = safe_atoi(p, &priority); + if (r < 0 || priority < 0 || priority > 999) + return; + + if (s->forward_to_kmsg && LOG_FAC(priority) != LOG_KERN) + return; + + l -= (e - p) + 1; + p = e + 1; + e = memchr(p, ',', l); + if (!e) + return; + *e = 0; + + r = safe_atou64(p, &serial); + if (r < 0) + return; + + if (s->kernel_seqnum) { + /* We already read this one? */ + if (serial < *s->kernel_seqnum) + return; + + /* Did we lose any? */ + if (serial > *s->kernel_seqnum) + server_driver_message(s, 0, + "MESSAGE_ID=" SD_MESSAGE_JOURNAL_MISSED_STR, + LOG_MESSAGE("Missed %"PRIu64" kernel messages", + serial - *s->kernel_seqnum), + NULL); + + /* Make sure we never read this one again. Note that + * we always store the next message serial we expect + * here, simply because this makes handling the first + * message with serial 0 easy. */ + *s->kernel_seqnum = serial + 1; + } + + l -= (e - p) + 1; + p = e + 1; + f = memchr(p, ';', l); + if (!f) + return; + /* Kernel 3.6 has the flags field, kernel 3.5 lacks that */ + e = memchr(p, ',', l); + if (!e || f < e) + e = f; + *e = 0; + + r = safe_atollu(p, &usec); + if (r < 0) + return; + + l -= (f - p) + 1; + p = f + 1; + e = memchr(p, '\n', l); + if (!e) + return; + *e = 0; + + pl = e - p; + l -= (e - p) + 1; + k = e + 1; + + for (j = 0; l > 0 && j < N_IOVEC_KERNEL_FIELDS; j++) { + char *m; + /* Metadata fields attached */ + + if (*k != ' ') + break; + + k++, l--; + + e = memchr(k, '\n', l); + if (!e) + goto finish; + + *e = 0; + + if (cunescape_length_with_prefix(k, e - k, "_KERNEL_", UNESCAPE_RELAX, &m) < 0) + break; + + if (startswith(m, "_KERNEL_DEVICE=")) + kernel_device = m + 15; + + iovec[n++] = IOVEC_MAKE_STRING(m); + z++; + + l -= (e - k) + 1; + k = e + 1; + } + + if (kernel_device) { + _cleanup_(sd_device_unrefp) sd_device *d = NULL; + + if (sd_device_new_from_device_id(&d, kernel_device) >= 0) { + const char *g; + char *b; + + if (sd_device_get_devname(d, &g) >= 0) { + b = strjoin("_UDEV_DEVNODE=", g); + if (b) { + iovec[n++] = IOVEC_MAKE_STRING(b); + z++; + } + } + + if (sd_device_get_sysname(d, &g) >= 0) { + b = strjoin("_UDEV_SYSNAME=", g); + if (b) { + iovec[n++] = IOVEC_MAKE_STRING(b); + z++; + } + } + + j = 0; + FOREACH_DEVICE_DEVLINK(d, g) { + + if (j >= N_IOVEC_UDEV_FIELDS) + break; + + b = strjoin("_UDEV_DEVLINK=", g); + if (b) { + iovec[n++] = IOVEC_MAKE_STRING(b); + z++; + } + + j++; + } + } + } + + if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu", usec) >= 0) + iovec[n++] = IOVEC_MAKE_STRING(source_time); + + iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=kernel"); + + if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0) + iovec[n++] = IOVEC_MAKE_STRING(syslog_priority); + + if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0) + iovec[n++] = IOVEC_MAKE_STRING(syslog_facility); + + if (LOG_FAC(priority) == LOG_KERN) + iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=kernel"); + else { + pl -= syslog_parse_identifier((const char**) &p, &identifier, &pid); + + /* Avoid logging any new messages when we're processing messages generated by ourselves via + * log_info() and friends to avoid infinite loops. */ + if (is_us(identifier, pid)) { + if (!ratelimit_below(&s->kmsg_own_ratelimit)) + return; + + saved_log_max_level = log_get_max_level(); + c = s->my_context; + log_set_max_level(LOG_NULL); + } + + if (identifier) { + syslog_identifier = strjoin("SYSLOG_IDENTIFIER=", identifier); + if (syslog_identifier) + iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier); + } + + if (pid) { + syslog_pid = strjoin("SYSLOG_PID=", pid); + if (syslog_pid) + iovec[n++] = IOVEC_MAKE_STRING(syslog_pid); + } + } + + if (cunescape_length_with_prefix(p, pl, "MESSAGE=", UNESCAPE_RELAX, &message) >= 0) + iovec[n++] = IOVEC_MAKE_STRING(message); + + + server_dispatch_message(s, iovec, n, ELEMENTSOF(iovec), c, NULL, priority, 0); + + if (saved_log_max_level != INT_MAX) + log_set_max_level(saved_log_max_level); + +finish: + for (j = 0; j < z; j++) + free(iovec[j].iov_base); +} + +static int server_read_dev_kmsg(Server *s) { + char buffer[8192+1]; /* the kernel-side limit per record is 8K currently */ + ssize_t l; + + assert(s); + assert(s->dev_kmsg_fd >= 0); + + l = read(s->dev_kmsg_fd, buffer, sizeof(buffer) - 1); + if (l == 0) + return 0; + if (l < 0) { + /* Old kernels who don't allow reading from /dev/kmsg + * return EINVAL when we try. So handle this cleanly, + * but don' try to ever read from it again. */ + if (errno == EINVAL) { + s->dev_kmsg_event_source = sd_event_source_unref(s->dev_kmsg_event_source); + return 0; + } + + if (ERRNO_IS_TRANSIENT(errno) || errno == EPIPE) + return 0; + + return log_error_errno(errno, "Failed to read from /dev/kmsg: %m"); + } + + dev_kmsg_record(s, buffer, l); + return 1; +} + +int server_flush_dev_kmsg(Server *s) { + int r; + + assert(s); + + if (s->dev_kmsg_fd < 0) + return 0; + + if (!s->dev_kmsg_readable) + return 0; + + log_debug("Flushing /dev/kmsg..."); + + for (;;) { + r = server_read_dev_kmsg(s); + if (r < 0) + return r; + + if (r == 0) + break; + } + + return 0; +} + +static int dispatch_dev_kmsg(sd_event_source *es, int fd, uint32_t revents, void *userdata) { + Server *s = ASSERT_PTR(userdata); + + assert(es); + assert(fd == s->dev_kmsg_fd); + + if (revents & EPOLLERR) + log_warning("/dev/kmsg buffer overrun, some messages lost."); + + if (!(revents & EPOLLIN)) + log_error("Got invalid event from epoll for /dev/kmsg: %"PRIx32, revents); + + return server_read_dev_kmsg(s); +} + +int server_open_dev_kmsg(Server *s) { + mode_t mode; + int r; + + assert(s); + + if (s->read_kmsg) + mode = O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY; + else + mode = O_WRONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY; + + s->dev_kmsg_fd = open("/dev/kmsg", mode); + if (s->dev_kmsg_fd < 0) { + log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, + errno, "Failed to open /dev/kmsg, ignoring: %m"); + return 0; + } + + if (!s->read_kmsg) + return 0; + + r = sd_event_add_io(s->event, &s->dev_kmsg_event_source, s->dev_kmsg_fd, EPOLLIN, dispatch_dev_kmsg, s); + if (r < 0) { + + /* This will fail with EPERM on older kernels where + * /dev/kmsg is not readable. */ + if (r == -EPERM) { + r = 0; + goto fail; + } + + log_error_errno(r, "Failed to add /dev/kmsg fd to event loop: %m"); + goto fail; + } + + r = sd_event_source_set_priority(s->dev_kmsg_event_source, SD_EVENT_PRIORITY_IMPORTANT+10); + if (r < 0) { + log_error_errno(r, "Failed to adjust priority of kmsg event source: %m"); + goto fail; + } + + s->dev_kmsg_readable = true; + + return 0; + +fail: + s->dev_kmsg_event_source = sd_event_source_unref(s->dev_kmsg_event_source); + s->dev_kmsg_fd = safe_close(s->dev_kmsg_fd); + + return r; +} + +int server_open_kernel_seqnum(Server *s) { + _cleanup_close_ int fd = -1; + const char *fn; + uint64_t *p; + int r; + + assert(s); + + /* We store the seqnum we last read in an mmapped file. That way we can just use it like a variable, + * but it is persistent and automatically flushed at reboot. */ + + if (!s->read_kmsg) + return 0; + + fn = strjoina(s->runtime_directory, "/kernel-seqnum"); + fd = open(fn, O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0644); + if (fd < 0) { + log_error_errno(errno, "Failed to open %s, ignoring: %m", fn); + return 0; + } + + r = posix_fallocate_loop(fd, 0, sizeof(uint64_t)); + if (r < 0) { + log_error_errno(r, "Failed to allocate sequential number file, ignoring: %m"); + return 0; + } + + p = mmap(NULL, sizeof(uint64_t), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if (p == MAP_FAILED) { + log_error_errno(errno, "Failed to map sequential number file, ignoring: %m"); + return 0; + } + + s->kernel_seqnum = p; + + return 0; +} diff --git a/src/journal/journald-kmsg.h b/src/journal/journald-kmsg.h new file mode 100644 index 0000000..bd288c5 --- /dev/null +++ b/src/journal/journald-kmsg.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "journald-server.h" + +int server_open_dev_kmsg(Server *s); +int server_flush_dev_kmsg(Server *s); + +void server_forward_kmsg(Server *s, int priority, const char *identifier, const char *message, const struct ucred *ucred); + +int server_open_kernel_seqnum(Server *s); + +void dev_kmsg_record(Server *s, char *p, size_t l); diff --git a/src/journal/journald-native.c b/src/journal/journald-native.c new file mode 100644 index 0000000..0325788 --- /dev/null +++ b/src/journal/journald-native.c @@ -0,0 +1,505 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <stddef.h> +#include <sys/epoll.h> +#include <sys/mman.h> +#include <sys/statvfs.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "fd-util.h" +#include "fs-util.h" +#include "io-util.h" +#include "journal-importer.h" +#include "journal-util.h" +#include "journald-console.h" +#include "journald-kmsg.h" +#include "journald-native.h" +#include "journald-server.h" +#include "journald-syslog.h" +#include "journald-wall.h" +#include "memfd-util.h" +#include "memory-util.h" +#include "parse-util.h" +#include "path-util.h" +#include "process-util.h" +#include "selinux-util.h" +#include "socket-util.h" +#include "string-util.h" +#include "strv.h" +#include "unaligned.h" + +static bool allow_object_pid(const struct ucred *ucred) { + return ucred && ucred->uid == 0; +} + +static void server_process_entry_meta( + const char *p, size_t l, + const struct ucred *ucred, + int *priority, + char **identifier, + char **message, + pid_t *object_pid) { + + /* We need to determine the priority of this entry for the rate limiting logic */ + + if (l == 10 && + startswith(p, "PRIORITY=") && + p[9] >= '0' && p[9] <= '9') + *priority = (*priority & LOG_FACMASK) | (p[9] - '0'); + + else if (l == 17 && + startswith(p, "SYSLOG_FACILITY=") && + p[16] >= '0' && p[16] <= '9') + *priority = (*priority & LOG_PRIMASK) | ((p[16] - '0') << 3); + + else if (l == 18 && + startswith(p, "SYSLOG_FACILITY=") && + p[16] >= '0' && p[16] <= '9' && + p[17] >= '0' && p[17] <= '9') + *priority = (*priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3); + + else if (l >= 19 && + startswith(p, "SYSLOG_IDENTIFIER=")) { + char *t; + + t = memdup_suffix0(p + 18, l - 18); + if (t) { + free(*identifier); + *identifier = t; + } + + } else if (l >= 8 && + startswith(p, "MESSAGE=")) { + char *t; + + t = memdup_suffix0(p + 8, l - 8); + if (t) { + free(*message); + *message = t; + } + + } else if (l > STRLEN("OBJECT_PID=") && + l < STRLEN("OBJECT_PID=") + DECIMAL_STR_MAX(pid_t) && + startswith(p, "OBJECT_PID=") && + allow_object_pid(ucred)) { + char buf[DECIMAL_STR_MAX(pid_t)]; + memcpy(buf, p + STRLEN("OBJECT_PID="), + l - STRLEN("OBJECT_PID=")); + buf[l-STRLEN("OBJECT_PID=")] = '\0'; + + (void) parse_pid(buf, object_pid); + } +} + +static int server_process_entry( + Server *s, + const void *buffer, size_t *remaining, + ClientContext *context, + const struct ucred *ucred, + const struct timeval *tv, + const char *label, size_t label_len) { + + /* Process a single entry from a native message. Returns 0 if nothing special happened and the message + * processing should continue, and a negative or positive value otherwise. + * + * Note that *remaining is altered on both success and failure. */ + + size_t n = 0, j, tn = SIZE_MAX, entry_size = 0; + char *identifier = NULL, *message = NULL; + struct iovec *iovec = NULL; + int priority = LOG_INFO; + pid_t object_pid = 0; + const char *p; + int r = 1; + + p = buffer; + + while (*remaining > 0) { + const char *e, *q; + + e = memchr(p, '\n', *remaining); + + if (!e) { + /* Trailing noise, let's ignore it, and flush what we collected */ + log_debug("Received message with trailing noise, ignoring."); + break; /* finish processing of the message */ + } + + if (e == p) { + /* Entry separator */ + *remaining -= 1; + break; + } + + if (IN_SET(*p, '.', '#')) { + /* Ignore control commands for now, and comments too. */ + *remaining -= (e - p) + 1; + p = e + 1; + continue; + } + + /* A property follows */ + if (n > ENTRY_FIELD_COUNT_MAX) { + log_debug("Received an entry that has more than " STRINGIFY(ENTRY_FIELD_COUNT_MAX) " fields, ignoring entry."); + goto finish; + } + + /* n existing properties, 1 new, +1 for _TRANSPORT */ + if (!GREEDY_REALLOC(iovec, + n + 2 + + N_IOVEC_META_FIELDS + N_IOVEC_OBJECT_FIELDS + + client_context_extra_fields_n_iovec(context))) { + r = log_oom(); + goto finish; + } + + q = memchr(p, '=', e - p); + if (q) { + if (journal_field_valid(p, q - p, false)) { + size_t l; + + l = e - p; + if (l > DATA_SIZE_MAX) { + log_debug("Received text block of %zu bytes is too large, ignoring entry.", l); + goto finish; + } + + if (entry_size + l + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */ + log_debug("Entry is too big (%zu bytes after processing %zu entries), ignoring entry.", + entry_size + l, n + 1); + goto finish; + } + + /* If the field name starts with an underscore, skip the variable, since that indicates + * a trusted field */ + iovec[n++] = IOVEC_MAKE((char*) p, l); + entry_size += l; + + server_process_entry_meta(p, l, ucred, + &priority, + &identifier, + &message, + &object_pid); + } + + *remaining -= (e - p) + 1; + p = e + 1; + continue; + } else { + uint64_t l, total; + char *k; + + if (*remaining < e - p + 1 + sizeof(uint64_t) + 1) { + log_debug("Failed to parse message, ignoring."); + break; + } + + l = unaligned_read_le64(e + 1); + if (l > DATA_SIZE_MAX) { + log_debug("Received binary data block of %"PRIu64" bytes is too large, ignoring entry.", l); + goto finish; + } + + total = (e - p) + 1 + l; + if (entry_size + total + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */ + log_debug("Entry is too big (%"PRIu64"bytes after processing %zu fields), ignoring.", + entry_size + total, n + 1); + goto finish; + } + + if ((uint64_t) *remaining < e - p + 1 + sizeof(uint64_t) + l + 1 || + e[1+sizeof(uint64_t)+l] != '\n') { + log_debug("Failed to parse message, ignoring."); + break; + } + + k = malloc(total); + if (!k) { + log_oom(); + break; + } + + memcpy(k, p, e - p); + k[e - p] = '='; + memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l); + + if (journal_field_valid(p, e - p, false)) { + iovec[n] = IOVEC_MAKE(k, (e - p) + 1 + l); + entry_size += iovec[n].iov_len; + n++; + + server_process_entry_meta(k, (e - p) + 1 + l, ucred, + &priority, + &identifier, + &message, + &object_pid); + } else + free(k); + + *remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1; + p = e + 1 + sizeof(uint64_t) + l + 1; + } + } + + if (n <= 0) + goto finish; + + tn = n++; + iovec[tn] = IOVEC_MAKE_STRING("_TRANSPORT=journal"); + entry_size += STRLEN("_TRANSPORT=journal"); + + if (entry_size + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */ + log_debug("Entry is too big with %zu properties and %zu bytes, ignoring.", n, entry_size); + goto finish; + } + + r = 0; /* Success, we read the message. */ + + if (!client_context_test_priority(context, priority)) + goto finish; + + if (message) { + if (s->forward_to_syslog) + server_forward_syslog(s, syslog_fixup_facility(priority), identifier, message, ucred, tv); + + if (s->forward_to_kmsg) + server_forward_kmsg(s, priority, identifier, message, ucred); + + if (s->forward_to_console) + server_forward_console(s, priority, identifier, message, ucred); + + if (s->forward_to_wall) + server_forward_wall(s, priority, identifier, message, ucred); + } + + server_dispatch_message(s, iovec, n, MALLOC_ELEMENTSOF(iovec), context, tv, priority, object_pid); + +finish: + for (j = 0; j < n; j++) { + if (j == tn) + continue; + + if (iovec[j].iov_base < buffer || + (const char*) iovec[j].iov_base >= p + *remaining) + free(iovec[j].iov_base); + } + + free(iovec); + free(identifier); + free(message); + + return r; +} + +void server_process_native_message( + Server *s, + const char *buffer, size_t buffer_size, + const struct ucred *ucred, + const struct timeval *tv, + const char *label, size_t label_len) { + + size_t remaining = buffer_size; + ClientContext *context = NULL; + int r; + + assert(s); + assert(buffer || buffer_size == 0); + + if (ucred && pid_is_valid(ucred->pid)) { + r = client_context_get(s, ucred->pid, ucred, label, label_len, NULL, &context); + if (r < 0) + log_warning_errno(r, "Failed to retrieve credentials for PID " PID_FMT ", ignoring: %m", ucred->pid); + } + + do { + r = server_process_entry(s, + (const uint8_t*) buffer + (buffer_size - remaining), &remaining, + context, ucred, tv, label, label_len); + } while (r == 0); +} + +void server_process_native_file( + Server *s, + int fd, + const struct ucred *ucred, + const struct timeval *tv, + const char *label, size_t label_len) { + + struct stat st; + bool sealed; + int r; + + /* Data is in the passed fd, probably it didn't fit in a datagram. */ + + assert(s); + assert(fd >= 0); + + /* If it's a memfd, check if it is sealed. If so, we can just + * mmap it and use it, and do not need to copy the data out. */ + sealed = memfd_get_sealed(fd) > 0; + + if (!sealed && (!ucred || ucred->uid != 0)) { + _cleanup_free_ char *k = NULL; + const char *e; + + /* If this is not a sealed memfd, and the peer is unknown or + * unprivileged, then verify the path. */ + + r = fd_get_path(fd, &k); + if (r < 0) { + log_error_errno(r, "readlink(/proc/self/fd/%i) failed: %m", fd); + return; + } + + e = PATH_STARTSWITH_SET(k, "/dev/shm/", "/tmp/", "/var/tmp/"); + if (!e) { + log_error("Received file outside of allowed directories. Refusing."); + return; + } + + if (!filename_is_valid(e)) { + log_error("Received file in subdirectory of allowed directories. Refusing."); + return; + } + } + + if (fstat(fd, &st) < 0) { + log_error_errno(errno, "Failed to stat passed file, ignoring: %m"); + return; + } + + if (!S_ISREG(st.st_mode)) { + log_error("File passed is not regular. Ignoring."); + return; + } + + if (st.st_size <= 0) + return; + + /* When !sealed, set a lower memory limit. We have to read the file, + * effectively doubling memory use. */ + if (st.st_size > ENTRY_SIZE_MAX / (sealed ? 1 : 2)) { + log_error("File passed too large (%"PRIu64" bytes). Ignoring.", (uint64_t) st.st_size); + return; + } + + if (sealed) { + void *p; + size_t ps; + + /* The file is sealed, we can just map it and use it. */ + + ps = PAGE_ALIGN(st.st_size); + p = mmap(NULL, ps, PROT_READ, MAP_PRIVATE, fd, 0); + if (p == MAP_FAILED) { + log_error_errno(errno, "Failed to map memfd, ignoring: %m"); + return; + } + + server_process_native_message(s, p, st.st_size, ucred, tv, label, label_len); + assert_se(munmap(p, ps) >= 0); + } else { + _cleanup_free_ void *p = NULL; + struct statvfs vfs; + ssize_t n; + + if (fstatvfs(fd, &vfs) < 0) { + log_error_errno(errno, "Failed to stat file system of passed file, not processing it: %m"); + return; + } + + /* Refuse operating on file systems that have + * mandatory locking enabled, see: + * + * https://github.com/systemd/systemd/issues/1822 + */ + if (vfs.f_flag & ST_MANDLOCK) { + log_error("Received file descriptor from file system with mandatory locking enabled, not processing it."); + return; + } + + /* Make the fd non-blocking. On regular files this has + * the effect of bypassing mandatory locking. Of + * course, this should normally not be necessary given + * the check above, but let's better be safe than + * sorry, after all NFS is pretty confusing regarding + * file system flags, and we better don't trust it, + * and so is SMB. */ + r = fd_nonblock(fd, true); + if (r < 0) { + log_error_errno(r, "Failed to make fd non-blocking, not processing it: %m"); + return; + } + + /* The file is not sealed, we can't map the file here, since + * clients might then truncate it and trigger a SIGBUS for + * us. So let's stupidly read it. */ + + p = malloc(st.st_size); + if (!p) { + log_oom(); + return; + } + + n = pread(fd, p, st.st_size, 0); + if (n < 0) + log_error_errno(errno, "Failed to read file, ignoring: %m"); + else if (n > 0) + server_process_native_message(s, p, n, ucred, tv, label, label_len); + } +} + +int server_open_native_socket(Server *s, const char *native_socket) { + int r; + + assert(s); + assert(native_socket); + + if (s->native_fd < 0) { + union sockaddr_union sa; + size_t sa_len; + + r = sockaddr_un_set_path(&sa.un, native_socket); + if (r < 0) + return log_error_errno(r, "Unable to use namespace path %s for AF_UNIX socket: %m", native_socket); + sa_len = r; + + s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0); + if (s->native_fd < 0) + return log_error_errno(errno, "socket() failed: %m"); + + (void) sockaddr_un_unlink(&sa.un); + + r = bind(s->native_fd, &sa.sa, sa_len); + if (r < 0) + return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path); + + (void) chmod(sa.un.sun_path, 0666); + } else + (void) fd_nonblock(s->native_fd, true); + + r = setsockopt_int(s->native_fd, SOL_SOCKET, SO_PASSCRED, true); + if (r < 0) + return log_error_errno(r, "SO_PASSCRED failed: %m"); + + if (mac_selinux_use()) { + r = setsockopt_int(s->native_fd, SOL_SOCKET, SO_PASSSEC, true); + if (r < 0) + log_warning_errno(r, "SO_PASSSEC failed: %m"); + } + + r = setsockopt_int(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, true); + if (r < 0) + return log_error_errno(r, "SO_TIMESTAMP failed: %m"); + + r = sd_event_add_io(s->event, &s->native_event_source, s->native_fd, EPOLLIN, server_process_datagram, s); + if (r < 0) + return log_error_errno(r, "Failed to add native server fd to event loop: %m"); + + r = sd_event_source_set_priority(s->native_event_source, SD_EVENT_PRIORITY_NORMAL+5); + if (r < 0) + return log_error_errno(r, "Failed to adjust native event source priority: %m"); + + return 0; +} diff --git a/src/journal/journald-native.h b/src/journal/journald-native.h new file mode 100644 index 0000000..7bbaaed --- /dev/null +++ b/src/journal/journald-native.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "journald-server.h" + +void server_process_native_message( + Server *s, + const char *buffer, + size_t buffer_size, + const struct ucred *ucred, + const struct timeval *tv, + const char *label, + size_t label_len); + +void server_process_native_file( + Server *s, + int fd, + const struct ucred *ucred, + const struct timeval *tv, + const char *label, + size_t label_len); + +int server_open_native_socket(Server *s, const char *native_socket); diff --git a/src/journal/journald-rate-limit.c b/src/journal/journald-rate-limit.c new file mode 100644 index 0000000..842882b --- /dev/null +++ b/src/journal/journald-rate-limit.c @@ -0,0 +1,256 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <errno.h> + +#include "alloc-util.h" +#include "hashmap.h" +#include "journald-rate-limit.h" +#include "list.h" +#include "random-util.h" +#include "string-util.h" +#include "time-util.h" + +#define POOLS_MAX 5 +#define BUCKETS_MAX 127 +#define GROUPS_MAX 2047 + +static const int priority_map[] = { + [LOG_EMERG] = 0, + [LOG_ALERT] = 0, + [LOG_CRIT] = 0, + [LOG_ERR] = 1, + [LOG_WARNING] = 2, + [LOG_NOTICE] = 3, + [LOG_INFO] = 3, + [LOG_DEBUG] = 4 +}; + +typedef struct JournalRateLimitPool JournalRateLimitPool; +typedef struct JournalRateLimitGroup JournalRateLimitGroup; + +struct JournalRateLimitPool { + usec_t begin; + unsigned num; + unsigned suppressed; +}; + +struct JournalRateLimitGroup { + JournalRateLimit *parent; + + char *id; + + /* Interval is stored to keep track of when the group expires */ + usec_t interval; + + JournalRateLimitPool pools[POOLS_MAX]; + uint64_t hash; + + LIST_FIELDS(JournalRateLimitGroup, bucket); + LIST_FIELDS(JournalRateLimitGroup, lru); +}; + +struct JournalRateLimit { + + JournalRateLimitGroup* buckets[BUCKETS_MAX]; + JournalRateLimitGroup *lru, *lru_tail; + + unsigned n_groups; + + uint8_t hash_key[16]; +}; + +JournalRateLimit *journal_ratelimit_new(void) { + JournalRateLimit *r; + + r = new0(JournalRateLimit, 1); + if (!r) + return NULL; + + random_bytes(r->hash_key, sizeof(r->hash_key)); + + return r; +} + +static void journal_ratelimit_group_free(JournalRateLimitGroup *g) { + assert(g); + + if (g->parent) { + assert(g->parent->n_groups > 0); + + if (g->parent->lru_tail == g) + g->parent->lru_tail = g->lru_prev; + + LIST_REMOVE(lru, g->parent->lru, g); + LIST_REMOVE(bucket, g->parent->buckets[g->hash % BUCKETS_MAX], g); + + g->parent->n_groups--; + } + + free(g->id); + free(g); +} + +void journal_ratelimit_free(JournalRateLimit *r) { + assert(r); + + while (r->lru) + journal_ratelimit_group_free(r->lru); + + free(r); +} + +static bool journal_ratelimit_group_expired(JournalRateLimitGroup *g, usec_t ts) { + unsigned i; + + assert(g); + + for (i = 0; i < POOLS_MAX; i++) + if (g->pools[i].begin + g->interval >= ts) + return false; + + return true; +} + +static void journal_ratelimit_vacuum(JournalRateLimit *r, usec_t ts) { + assert(r); + + /* Makes room for at least one new item, but drop all + * expored items too. */ + + while (r->n_groups >= GROUPS_MAX || + (r->lru_tail && journal_ratelimit_group_expired(r->lru_tail, ts))) + journal_ratelimit_group_free(r->lru_tail); +} + +static JournalRateLimitGroup* journal_ratelimit_group_new(JournalRateLimit *r, const char *id, usec_t interval, usec_t ts) { + JournalRateLimitGroup *g; + + assert(r); + assert(id); + + g = new0(JournalRateLimitGroup, 1); + if (!g) + return NULL; + + g->id = strdup(id); + if (!g->id) + goto fail; + + g->hash = siphash24_string(g->id, r->hash_key); + + g->interval = interval; + + journal_ratelimit_vacuum(r, ts); + + LIST_PREPEND(bucket, r->buckets[g->hash % BUCKETS_MAX], g); + LIST_PREPEND(lru, r->lru, g); + if (!g->lru_next) + r->lru_tail = g; + r->n_groups++; + + g->parent = r; + return g; + +fail: + journal_ratelimit_group_free(g); + return NULL; +} + +static unsigned burst_modulate(unsigned burst, uint64_t available) { + unsigned k; + + /* Modulates the burst rate a bit with the amount of available + * disk space */ + + k = log2u64(available); + + /* 1MB */ + if (k <= 20) + return burst; + + burst = (burst * (k-16)) / 4; + + /* + * Example: + * + * <= 1MB = rate * 1 + * 16MB = rate * 2 + * 256MB = rate * 3 + * 4GB = rate * 4 + * 64GB = rate * 5 + * 1TB = rate * 6 + */ + + return burst; +} + +int journal_ratelimit_test(JournalRateLimit *r, const char *id, usec_t rl_interval, unsigned rl_burst, int priority, uint64_t available) { + JournalRateLimitGroup *g, *found = NULL; + JournalRateLimitPool *p; + unsigned burst; + uint64_t h; + usec_t ts; + + assert(id); + + /* Returns: + * + * 0 → the log message shall be suppressed, + * 1 + n → the log message shall be permitted, and n messages were dropped from the peer before + * < 0 → error + */ + + if (!r) + return 1; + + ts = now(CLOCK_MONOTONIC); + + h = siphash24_string(id, r->hash_key); + g = r->buckets[h % BUCKETS_MAX]; + + LIST_FOREACH(bucket, i, g) + if (streq(i->id, id)) { + found = i; + break; + } + + if (!found) { + found = journal_ratelimit_group_new(r, id, rl_interval, ts); + if (!found) + return -ENOMEM; + } else + found->interval = rl_interval; + + if (rl_interval == 0 || rl_burst == 0) + return 1; + + burst = burst_modulate(rl_burst, available); + + p = &found->pools[priority_map[priority]]; + + if (p->begin <= 0) { + p->suppressed = 0; + p->num = 1; + p->begin = ts; + return 1; + } + + if (p->begin + rl_interval < ts) { + unsigned s; + + s = p->suppressed; + p->suppressed = 0; + p->num = 1; + p->begin = ts; + + return 1 + s; + } + + if (p->num < burst) { + p->num++; + return 1; + } + + p->suppressed++; + return 0; +} diff --git a/src/journal/journald-rate-limit.h b/src/journal/journald-rate-limit.h new file mode 100644 index 0000000..8def60f --- /dev/null +++ b/src/journal/journald-rate-limit.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "time-util.h" + +typedef struct JournalRateLimit JournalRateLimit; + +JournalRateLimit *journal_ratelimit_new(void); +void journal_ratelimit_free(JournalRateLimit *r); +int journal_ratelimit_test(JournalRateLimit *r, const char *id, usec_t rl_interval, unsigned rl_burst, int priority, uint64_t available); diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c new file mode 100644 index 0000000..77aef79 --- /dev/null +++ b/src/journal/journald-server.c @@ -0,0 +1,2714 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#if HAVE_SELINUX +#include <selinux/selinux.h> +#endif +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/signalfd.h> +#include <sys/statvfs.h> +#include <linux/sockios.h> + +#include "sd-daemon.h" +#include "sd-journal.h" +#include "sd-messages.h" + +#include "acl-util.h" +#include "alloc-util.h" +#include "audit-util.h" +#include "cgroup-util.h" +#include "conf-parser.h" +#include "dirent-util.h" +#include "extract-word.h" +#include "fd-util.h" +#include "fileio.h" +#include "format-util.h" +#include "fs-util.h" +#include "hashmap.h" +#include "hostname-util.h" +#include "id128-util.h" +#include "io-util.h" +#include "journal-authenticate.h" +#include "journal-internal.h" +#include "journal-vacuum.h" +#include "journald-audit.h" +#include "journald-context.h" +#include "journald-kmsg.h" +#include "journald-native.h" +#include "journald-rate-limit.h" +#include "journald-server.h" +#include "journald-stream.h" +#include "journald-syslog.h" +#include "log.h" +#include "missing_audit.h" +#include "mkdir.h" +#include "parse-util.h" +#include "path-util.h" +#include "proc-cmdline.h" +#include "process-util.h" +#include "rm-rf.h" +#include "selinux-util.h" +#include "signal-util.h" +#include "socket-util.h" +#include "stdio-util.h" +#include "string-table.h" +#include "string-util.h" +#include "syslog-util.h" +#include "uid-alloc-range.h" +#include "user-util.h" + +#define USER_JOURNALS_MAX 1024 + +#define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE) +#define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC) +#define DEFAULT_RATE_LIMIT_BURST 10000 +#define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH + +#define DEFAULT_KMSG_OWN_INTERVAL (5 * USEC_PER_SEC) +#define DEFAULT_KMSG_OWN_BURST 50 + +#define RECHECK_SPACE_USEC (30*USEC_PER_SEC) + +#define NOTIFY_SNDBUF_SIZE (8*1024*1024) + +/* The period to insert between posting changes for coalescing */ +#define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC) + +/* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room + * for a bit of additional metadata. */ +#define DEFAULT_LINE_MAX (48*1024) + +#define DEFERRED_CLOSES_MAX (4096) + +#define IDLE_TIMEOUT_USEC (30*USEC_PER_SEC) + +static int determine_path_usage( + Server *s, + const char *path, + uint64_t *ret_used, + uint64_t *ret_free) { + + _cleanup_closedir_ DIR *d = NULL; + struct statvfs ss; + + assert(s); + assert(path); + assert(ret_used); + assert(ret_free); + + d = opendir(path); + if (!d) + return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, + errno, "Failed to open %s: %m", path); + + if (fstatvfs(dirfd(d), &ss) < 0) + return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path); + + *ret_free = ss.f_bsize * ss.f_bavail; + *ret_used = 0; + FOREACH_DIRENT_ALL(de, d, break) { + struct stat st; + + if (!endswith(de->d_name, ".journal") && + !endswith(de->d_name, ".journal~")) + continue; + + if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) { + log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name); + continue; + } + + if (!S_ISREG(st.st_mode)) + continue; + + *ret_used += (uint64_t) st.st_blocks * 512UL; + } + + return 0; +} + +static void cache_space_invalidate(JournalStorageSpace *space) { + zero(*space); +} + +static int cache_space_refresh(Server *s, JournalStorage *storage) { + JournalStorageSpace *space; + JournalMetrics *metrics; + uint64_t vfs_used, vfs_avail, avail; + usec_t ts; + int r; + + assert(s); + + metrics = &storage->metrics; + space = &storage->space; + + ts = now(CLOCK_MONOTONIC); + + if (space->timestamp != 0 && usec_add(space->timestamp, RECHECK_SPACE_USEC) > ts) + return 0; + + r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail); + if (r < 0) + return r; + + space->vfs_used = vfs_used; + space->vfs_available = vfs_avail; + + avail = LESS_BY(vfs_avail, metrics->keep_free); + + space->limit = CLAMP(vfs_used + avail, metrics->min_use, metrics->max_use); + space->available = LESS_BY(space->limit, vfs_used); + space->timestamp = ts; + return 1; +} + +static void patch_min_use(JournalStorage *storage) { + assert(storage); + + /* Let's bump the min_use limit to the current usage on disk. We do + * this when starting up and first opening the journal files. This way + * sudden spikes in disk usage will not cause journald to vacuum files + * without bounds. Note that this means that only a restart of journald + * will make it reset this value. */ + + storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used); +} + +static JournalStorage* server_current_storage(Server *s) { + assert(s); + + return s->system_journal ? &s->system_storage : &s->runtime_storage; +} + +static int determine_space(Server *s, uint64_t *available, uint64_t *limit) { + JournalStorage *js; + int r; + + assert(s); + + js = server_current_storage(s); + + r = cache_space_refresh(s, js); + if (r >= 0) { + if (available) + *available = js->space.available; + if (limit) + *limit = js->space.limit; + } + return r; +} + +void server_space_usage_message(Server *s, JournalStorage *storage) { + assert(s); + + if (!storage) + storage = server_current_storage(s); + + if (cache_space_refresh(s, storage) < 0) + return; + + const JournalMetrics *metrics = &storage->metrics; + + server_driver_message(s, 0, + "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR, + LOG_MESSAGE("%s (%s) is %s, max %s, %s free.", + storage->name, storage->path, + FORMAT_BYTES(storage->space.vfs_used), + FORMAT_BYTES(storage->space.limit), + FORMAT_BYTES(storage->space.available)), + "JOURNAL_NAME=%s", storage->name, + "JOURNAL_PATH=%s", storage->path, + "CURRENT_USE=%"PRIu64, storage->space.vfs_used, + "CURRENT_USE_PRETTY=%s", FORMAT_BYTES(storage->space.vfs_used), + "MAX_USE=%"PRIu64, metrics->max_use, + "MAX_USE_PRETTY=%s", FORMAT_BYTES(metrics->max_use), + "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free, + "DISK_KEEP_FREE_PRETTY=%s", FORMAT_BYTES(metrics->keep_free), + "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available, + "DISK_AVAILABLE_PRETTY=%s", FORMAT_BYTES(storage->space.vfs_available), + "LIMIT=%"PRIu64, storage->space.limit, + "LIMIT_PRETTY=%s", FORMAT_BYTES(storage->space.limit), + "AVAILABLE=%"PRIu64, storage->space.available, + "AVAILABLE_PRETTY=%s", FORMAT_BYTES(storage->space.available), + NULL); +} + +static bool uid_for_system_journal(uid_t uid) { + + /* Returns true if the specified UID shall get its data stored in the system journal. */ + + return uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY; +} + +static void server_add_acls(ManagedJournalFile *f, uid_t uid) { + assert(f); + +#if HAVE_ACL + int r; + + if (uid_for_system_journal(uid)) + return; + + r = fd_add_uid_acl_permission(f->file->fd, uid, ACL_READ); + if (r < 0) + log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->file->path); +#endif +} + +static int open_journal( + Server *s, + bool reliably, + const char *fname, + int open_flags, + bool seal, + JournalMetrics *metrics, + ManagedJournalFile **ret) { + + _cleanup_(managed_journal_file_closep) ManagedJournalFile *f = NULL; + JournalFileFlags file_flags; + int r; + + assert(s); + assert(fname); + assert(ret); + + file_flags = (s->compress.enabled ? JOURNAL_COMPRESS : 0) | (seal ? JOURNAL_SEAL : 0); + + if (reliably) + r = managed_journal_file_open_reliably( + fname, + open_flags, + file_flags, + 0640, + s->compress.threshold_bytes, + metrics, + s->mmap, + s->deferred_closes, + NULL, + &f); + else + r = managed_journal_file_open( + -1, + fname, + open_flags, + file_flags, + 0640, + s->compress.threshold_bytes, + metrics, + s->mmap, + s->deferred_closes, + NULL, + &f); + + if (r < 0) + return r; + + r = journal_file_enable_post_change_timer(f->file, s->event, POST_CHANGE_TIMER_INTERVAL_USEC); + if (r < 0) + return r; + + *ret = TAKE_PTR(f); + return r; +} + +static bool flushed_flag_is_set(Server *s) { + const char *fn; + + assert(s); + + /* We don't support the "flushing" concept for namespace instances, we assume them to always have + * access to /var */ + if (s->namespace) + return true; + + fn = strjoina(s->runtime_directory, "/flushed"); + return access(fn, F_OK) >= 0; +} + +static int system_journal_open(Server *s, bool flush_requested, bool relinquish_requested) { + const char *fn; + int r = 0; + + if (!s->system_journal && + IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) && + (flush_requested || flushed_flag_is_set(s)) && + !relinquish_requested) { + + /* If in auto mode: first try to create the machine path, but not the prefix. + * + * If in persistent mode: create /var/log/journal and the machine path */ + + if (s->storage == STORAGE_PERSISTENT) + (void) mkdir_parents(s->system_storage.path, 0755); + + (void) mkdir(s->system_storage.path, 0755); + + fn = strjoina(s->system_storage.path, "/system.journal"); + r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal); + if (r >= 0) { + server_add_acls(s->system_journal, 0); + (void) cache_space_refresh(s, &s->system_storage); + patch_min_use(&s->system_storage); + } else { + if (!IN_SET(r, -ENOENT, -EROFS)) + log_warning_errno(r, "Failed to open system journal: %m"); + + r = 0; + } + + /* If the runtime journal is open, and we're post-flush, we're recovering from a failed + * system journal rotate (ENOSPC) for which the runtime journal was reopened. + * + * Perform an implicit flush to var, leaving the runtime journal closed, now that the system + * journal is back. + */ + if (!flush_requested) + (void) server_flush_to_var(s, true); + } + + if (!s->runtime_journal && + (s->storage != STORAGE_NONE)) { + + fn = strjoina(s->runtime_storage.path, "/system.journal"); + + if (!s->system_journal || relinquish_requested) { + /* OK, we really need the runtime journal, so create it if necessary. */ + + (void) mkdir_parents(s->runtime_storage.path, 0755); + (void) mkdir(s->runtime_storage.path, 0750); + + r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal); + if (r < 0) + return log_error_errno(r, "Failed to open runtime journal: %m"); + + } else if (!flushed_flag_is_set(s)) { + /* Try to open the runtime journal, but only + * if it already exists, so that we can flush + * it into the system journal */ + + r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal); + if (r < 0) { + if (r != -ENOENT) + log_warning_errno(r, "Failed to open runtime journal: %m"); + + r = 0; + } + } + + if (s->runtime_journal) { + server_add_acls(s->runtime_journal, 0); + (void) cache_space_refresh(s, &s->runtime_storage); + patch_min_use(&s->runtime_storage); + } + } + + return r; +} + +static ManagedJournalFile* find_journal(Server *s, uid_t uid) { + _cleanup_free_ char *p = NULL; + ManagedJournalFile *f; + int r; + + assert(s); + + /* A rotate that fails to create the new journal (ENOSPC) leaves the rotated journal as NULL. Unless + * we revisit opening, even after space is made available we'll continue to return NULL indefinitely. + * + * system_journal_open() is a noop if the journals are already open, so we can just call it here to + * recover from failed rotates (or anything else that's left the journals as NULL). + * + * Fixes https://github.com/systemd/systemd/issues/3968 */ + (void) system_journal_open(s, false, false); + + /* We split up user logs only on /var, not on /run. If the runtime file is open, we write to it + * exclusively, in order to guarantee proper order as soon as we flush /run to /var and close the + * runtime file. */ + + if (s->runtime_journal) + return s->runtime_journal; + + /* If we are not in persistent mode, then we need return NULL immediately rather than opening a + * persistent journal of any sort. + * + * Fixes https://github.com/systemd/systemd/issues/20390 */ + if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT)) + return NULL; + + if (uid_for_system_journal(uid)) + return s->system_journal; + + f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid)); + if (f) + return f; + + if (asprintf(&p, "%s/user-" UID_FMT ".journal", s->system_storage.path, uid) < 0) { + log_oom(); + return s->system_journal; + } + + /* Too many open? Then let's close one (or more) */ + while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) { + assert_se(f = ordered_hashmap_steal_first(s->user_journals)); + (void) managed_journal_file_close(f); + } + + r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f); + if (r < 0) + return s->system_journal; + + r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f); + if (r < 0) { + (void) managed_journal_file_close(f); + return s->system_journal; + } + + server_add_acls(f, uid); + return f; +} + +static int do_rotate( + Server *s, + ManagedJournalFile **f, + const char* name, + bool seal, + uint32_t uid) { + + JournalFileFlags file_flags; + int r; + + assert(s); + + if (!*f) + return -EINVAL; + + file_flags = + (s->compress.enabled ? JOURNAL_COMPRESS : 0)| + (seal ? JOURNAL_SEAL : 0); + + r = managed_journal_file_rotate(f, s->mmap, file_flags, s->compress.threshold_bytes, s->deferred_closes); + if (r < 0) { + if (*f) + return log_error_errno(r, "Failed to rotate %s: %m", (*f)->file->path); + else + return log_error_errno(r, "Failed to create new %s journal: %m", name); + } + + server_add_acls(*f, uid); + return r; +} + +static void server_process_deferred_closes(Server *s) { + ManagedJournalFile *f; + + /* Perform any deferred closes which aren't still offlining. */ + SET_FOREACH(f, s->deferred_closes) { + if (managed_journal_file_is_offlining(f)) + continue; + + (void) set_remove(s->deferred_closes, f); + (void) managed_journal_file_close(f); + } +} + +static void server_vacuum_deferred_closes(Server *s) { + assert(s); + + /* Make some room in the deferred closes list, so that it doesn't grow without bounds */ + if (set_size(s->deferred_closes) < DEFERRED_CLOSES_MAX) + return; + + /* Let's first remove all journal files that might already have completed closing */ + server_process_deferred_closes(s); + + /* And now, let's close some more until we reach the limit again. */ + while (set_size(s->deferred_closes) >= DEFERRED_CLOSES_MAX) { + ManagedJournalFile *f; + + assert_se(f = set_steal_first(s->deferred_closes)); + managed_journal_file_close(f); + } +} + +static int vacuum_offline_user_journals(Server *s) { + _cleanup_closedir_ DIR *d = NULL; + int r; + + assert(s); + + d = opendir(s->system_storage.path); + if (!d) { + if (errno == ENOENT) + return 0; + + return log_error_errno(errno, "Failed to open %s: %m", s->system_storage.path); + } + + for (;;) { + _cleanup_free_ char *u = NULL, *full = NULL; + _cleanup_close_ int fd = -1; + const char *a, *b; + struct dirent *de; + ManagedJournalFile *f; + uid_t uid; + + errno = 0; + de = readdir_no_dot(d); + if (!de) { + if (errno != 0) + log_warning_errno(errno, "Failed to enumerate %s, ignoring: %m", s->system_storage.path); + + break; + } + + a = startswith(de->d_name, "user-"); + if (!a) + continue; + b = endswith(de->d_name, ".journal"); + if (!b) + continue; + + u = strndup(a, b-a); + if (!u) + return log_oom(); + + r = parse_uid(u, &uid); + if (r < 0) { + log_debug_errno(r, "Failed to parse UID from file name '%s', ignoring: %m", de->d_name); + continue; + } + + /* Already rotated in the above loop? i.e. is it an open user journal? */ + if (ordered_hashmap_contains(s->user_journals, UID_TO_PTR(uid))) + continue; + + full = path_join(s->system_storage.path, de->d_name); + if (!full) + return log_oom(); + + fd = openat(dirfd(d), de->d_name, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK); + if (fd < 0) { + log_full_errno(IN_SET(errno, ELOOP, ENOENT) ? LOG_DEBUG : LOG_WARNING, errno, + "Failed to open journal file '%s' for rotation: %m", full); + continue; + } + + /* Make some room in the set of deferred close()s */ + server_vacuum_deferred_closes(s); + + /* Open the file briefly, so that we can archive it */ + r = managed_journal_file_open( + fd, + full, + O_RDWR, + (s->compress.enabled ? JOURNAL_COMPRESS : 0) | + (s->seal ? JOURNAL_SEAL : 0), + 0640, + s->compress.threshold_bytes, + &s->system_storage.metrics, + s->mmap, + s->deferred_closes, + NULL, + &f); + if (r < 0) { + log_warning_errno(r, "Failed to read journal file %s for rotation, trying to move it out of the way: %m", full); + + r = journal_file_dispose(dirfd(d), de->d_name); + if (r < 0) + log_warning_errno(r, "Failed to move %s out of the way, ignoring: %m", full); + else + log_debug("Successfully moved %s out of the way.", full); + + continue; + } + + TAKE_FD(fd); /* Donated to managed_journal_file_open() */ + + r = journal_file_archive(f->file, NULL); + if (r < 0) + log_debug_errno(r, "Failed to archive journal file '%s', ignoring: %m", full); + + managed_journal_file_initiate_close(f, s->deferred_closes); + f = NULL; + } + + return 0; +} + +void server_rotate(Server *s) { + ManagedJournalFile *f; + void *k; + int r; + + log_debug("Rotating..."); + + /* First, rotate the system journal (either in its runtime flavour or in its runtime flavour) */ + (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0); + (void) do_rotate(s, &s->system_journal, "system", s->seal, 0); + + /* Then, rotate all user journals we have open (keeping them open) */ + ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals) { + r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k)); + if (r >= 0) + ordered_hashmap_replace(s->user_journals, k, f); + else if (!f) + /* Old file has been closed and deallocated */ + ordered_hashmap_remove(s->user_journals, k); + } + + /* Finally, also rotate all user journals we currently do not have open. (But do so only if we + * actually have access to /var, i.e. are not in the log-to-runtime-journal mode). */ + if (!s->runtime_journal) + (void) vacuum_offline_user_journals(s); + + server_process_deferred_closes(s); +} + +void server_sync(Server *s) { + ManagedJournalFile *f; + int r; + + if (s->system_journal) { + r = managed_journal_file_set_offline(s->system_journal, false); + if (r < 0) + log_warning_errno(r, "Failed to sync system journal, ignoring: %m"); + } + + ORDERED_HASHMAP_FOREACH(f, s->user_journals) { + r = managed_journal_file_set_offline(f, false); + if (r < 0) + log_warning_errno(r, "Failed to sync user journal, ignoring: %m"); + } + + if (s->sync_event_source) { + r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF); + if (r < 0) + log_error_errno(r, "Failed to disable sync timer source: %m"); + } + + s->sync_scheduled = false; +} + +static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) { + + int r; + + assert(s); + assert(storage); + + (void) cache_space_refresh(s, storage); + + if (verbose) + server_space_usage_message(s, storage); + + r = journal_directory_vacuum(storage->path, storage->space.limit, + storage->metrics.n_max_files, s->max_retention_usec, + &s->oldest_file_usec, verbose); + if (r < 0 && r != -ENOENT) + log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path); + + cache_space_invalidate(&storage->space); +} + +void server_vacuum(Server *s, bool verbose) { + assert(s); + + log_debug("Vacuuming..."); + + s->oldest_file_usec = 0; + + if (s->system_journal) + do_vacuum(s, &s->system_storage, verbose); + if (s->runtime_journal) + do_vacuum(s, &s->runtime_storage, verbose); +} + +static void server_cache_machine_id(Server *s) { + sd_id128_t id; + int r; + + assert(s); + + r = sd_id128_get_machine(&id); + if (r < 0) + return; + + sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID=")); +} + +static void server_cache_boot_id(Server *s) { + sd_id128_t id; + int r; + + assert(s); + + r = sd_id128_get_boot(&id); + if (r < 0) + return; + + sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID=")); +} + +static void server_cache_hostname(Server *s) { + _cleanup_free_ char *t = NULL; + char *x; + + assert(s); + + t = gethostname_malloc(); + if (!t) + return; + + x = strjoin("_HOSTNAME=", t); + if (!x) + return; + + free_and_replace(s->hostname_field, x); +} + +static bool shall_try_append_again(JournalFile *f, int r) { + switch (r) { + + case -E2BIG: /* Hit configured limit */ + case -EFBIG: /* Hit fs limit */ + case -EDQUOT: /* Quota limit hit */ + case -ENOSPC: /* Disk full */ + log_debug("%s: Allocation limit reached, rotating.", f->path); + return true; + + case -EROFS: /* Read-only file system */ + /* When appending an entry fails if shall_try_append_again returns true, the journal is + * rotated. If the FS is read-only, rotation will fail and s->system_journal will be set to + * NULL. After that, when find_journal will try to open the journal since s->system_journal + * will be NULL, it will open the runtime journal. */ + log_warning("%s: Read-only file system, rotating.", f->path); + return true; + + case -EIO: /* I/O error of some kind (mmap) */ + log_warning("%s: IO error, rotating.", f->path); + return true; + + case -EHOSTDOWN: /* Other machine */ + log_info("%s: Journal file from other machine, rotating.", f->path); + return true; + + case -EBUSY: /* Unclean shutdown */ + log_info("%s: Unclean shutdown, rotating.", f->path); + return true; + + case -EPROTONOSUPPORT: /* Unsupported feature */ + log_info("%s: Unsupported feature, rotating.", f->path); + return true; + + case -EBADMSG: /* Corrupted */ + case -ENODATA: /* Truncated */ + case -ESHUTDOWN: /* Already archived */ + log_warning("%s: Journal file corrupted, rotating.", f->path); + return true; + + case -EIDRM: /* Journal file has been deleted */ + log_warning("%s: Journal file has been deleted, rotating.", f->path); + return true; + + case -ETXTBSY: /* Journal file is from the future */ + log_warning("%s: Journal file is from the future, rotating.", f->path); + return true; + + case -EAFNOSUPPORT: + log_warning("%s: underlying file system does not support memory mapping or another required file system feature.", f->path); + return false; + + default: + return false; + } +} + +static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, size_t n, int priority) { + bool vacuumed = false, rotate = false; + struct dual_timestamp ts; + ManagedJournalFile *f; + int r; + + assert(s); + assert(iovec); + assert(n > 0); + + /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use + * the source time, and not even the time the event was originally seen, but instead simply the time we started + * processing it, as we want strictly linear ordering in what we write out.) */ + assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0); + assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0); + + if (ts.realtime < s->last_realtime_clock) { + /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during + * regular operation. However, when it does happen, then we should make sure that we start fresh files + * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure + * bisection works correctly. */ + + log_info("Time jumped backwards, rotating."); + rotate = true; + } else { + + f = find_journal(s, uid); + if (!f) + return; + + if (journal_file_rotate_suggested(f->file, s->max_file_usec, LOG_INFO)) { + log_info("%s: Journal header limits reached or header out-of-date, rotating.", f->file->path); + rotate = true; + } + } + + if (rotate) { + server_rotate(s); + server_vacuum(s, false); + vacuumed = true; + + f = find_journal(s, uid); + if (!f) + return; + } + + s->last_realtime_clock = ts.realtime; + + r = journal_file_append_entry(f->file, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL); + if (r >= 0) { + server_schedule_sync(s, priority); + return; + } + + if (vacuumed || !shall_try_append_again(f->file, r)) { + log_ratelimit_full_errno(LOG_ERR, r, "Failed to write entry (%zu items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n)); + return; + } + + if (r == -E2BIG) + log_debug("Journal file %s is full, rotating to a new file", f->file->path); + else + log_ratelimit_full_errno(LOG_INFO, r, "Failed to write entry to %s (%zu items, %zu bytes), rotating before retrying: %m", f->file->path, n, IOVEC_TOTAL_SIZE(iovec, n)); + + server_rotate(s); + server_vacuum(s, false); + + f = find_journal(s, uid); + if (!f) + return; + + log_debug("Retrying write."); + r = journal_file_append_entry(f->file, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL); + if (r < 0) + log_ratelimit_full_errno(LOG_ERR, r, "Failed to write entry to %s (%zu items, %zu bytes) despite vacuuming, ignoring: %m", f->file->path, n, IOVEC_TOTAL_SIZE(iovec, n)); + else + server_schedule_sync(s, priority); +} + +#define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \ + if (isset(value)) { \ + char *k; \ + k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \ + sprintf(k, field "=" format, value); \ + iovec[n++] = IOVEC_MAKE_STRING(k); \ + } + +#define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \ + if (!isempty(value)) { \ + char *k; \ + k = strjoina(field "=", value); \ + iovec[n++] = IOVEC_MAKE_STRING(k); \ + } + +#define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \ + if (!sd_id128_is_null(value)) { \ + char *k; \ + k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \ + sd_id128_to_string(value, stpcpy(k, field "=")); \ + iovec[n++] = IOVEC_MAKE_STRING(k); \ + } + +#define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \ + if (value_size > 0) { \ + char *k; \ + k = newa(char, STRLEN(field "=") + value_size + 1); \ + *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \ + iovec[n++] = IOVEC_MAKE_STRING(k); \ + } \ + +static void dispatch_message_real( + Server *s, + struct iovec *iovec, size_t n, size_t m, + const ClientContext *c, + const struct timeval *tv, + int priority, + pid_t object_pid) { + + char source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)]; + _unused_ _cleanup_free_ char *cmdline1 = NULL, *cmdline2 = NULL; + uid_t journal_uid; + ClientContext *o; + + assert(s); + assert(iovec); + assert(n > 0); + assert(n + + N_IOVEC_META_FIELDS + + (pid_is_valid(object_pid) ? N_IOVEC_OBJECT_FIELDS : 0) + + client_context_extra_fields_n_iovec(c) <= m); + + if (c) { + IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->pid, pid_t, pid_is_valid, PID_FMT, "_PID"); + IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->uid, uid_t, uid_is_valid, UID_FMT, "_UID"); + IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->gid, gid_t, gid_is_valid, GID_FMT, "_GID"); + + IOVEC_ADD_STRING_FIELD(iovec, n, c->comm, "_COMM"); /* At most TASK_COMM_LENGTH (16 bytes) */ + IOVEC_ADD_STRING_FIELD(iovec, n, c->exe, "_EXE"); /* A path, so at most PATH_MAX (4096 bytes) */ + + if (c->cmdline) + /* At most _SC_ARG_MAX (2MB usually), which is too much to put on stack. + * Let's use a heap allocation for this one. */ + cmdline1 = set_iovec_string_field(iovec, &n, "_CMDLINE=", c->cmdline); + + IOVEC_ADD_STRING_FIELD(iovec, n, c->capeff, "_CAP_EFFECTIVE"); /* Read from /proc/.../status */ + IOVEC_ADD_SIZED_FIELD(iovec, n, c->label, c->label_size, "_SELINUX_CONTEXT"); + IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "_AUDIT_SESSION"); + IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->loginuid, uid_t, uid_is_valid, UID_FMT, "_AUDIT_LOGINUID"); + + IOVEC_ADD_STRING_FIELD(iovec, n, c->cgroup, "_SYSTEMD_CGROUP"); /* A path */ + IOVEC_ADD_STRING_FIELD(iovec, n, c->session, "_SYSTEMD_SESSION"); + IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->owner_uid, uid_t, uid_is_valid, UID_FMT, "_SYSTEMD_OWNER_UID"); + IOVEC_ADD_STRING_FIELD(iovec, n, c->unit, "_SYSTEMD_UNIT"); /* Unit names are bounded by UNIT_NAME_MAX */ + IOVEC_ADD_STRING_FIELD(iovec, n, c->user_unit, "_SYSTEMD_USER_UNIT"); + IOVEC_ADD_STRING_FIELD(iovec, n, c->slice, "_SYSTEMD_SLICE"); + IOVEC_ADD_STRING_FIELD(iovec, n, c->user_slice, "_SYSTEMD_USER_SLICE"); + + IOVEC_ADD_ID128_FIELD(iovec, n, c->invocation_id, "_SYSTEMD_INVOCATION_ID"); + + if (c->extra_fields_n_iovec > 0) { + memcpy(iovec + n, c->extra_fields_iovec, c->extra_fields_n_iovec * sizeof(struct iovec)); + n += c->extra_fields_n_iovec; + } + } + + assert(n <= m); + + if (pid_is_valid(object_pid) && client_context_get(s, object_pid, NULL, NULL, 0, NULL, &o) >= 0) { + + IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->pid, pid_t, pid_is_valid, PID_FMT, "OBJECT_PID"); + IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_UID"); + IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->gid, gid_t, gid_is_valid, GID_FMT, "OBJECT_GID"); + + /* See above for size limits, only ->cmdline may be large, so use a heap allocation for it. */ + IOVEC_ADD_STRING_FIELD(iovec, n, o->comm, "OBJECT_COMM"); + IOVEC_ADD_STRING_FIELD(iovec, n, o->exe, "OBJECT_EXE"); + if (o->cmdline) + cmdline2 = set_iovec_string_field(iovec, &n, "OBJECT_CMDLINE=", o->cmdline); + + IOVEC_ADD_STRING_FIELD(iovec, n, o->capeff, "OBJECT_CAP_EFFECTIVE"); + IOVEC_ADD_SIZED_FIELD(iovec, n, o->label, o->label_size, "OBJECT_SELINUX_CONTEXT"); + IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "OBJECT_AUDIT_SESSION"); + IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->loginuid, uid_t, uid_is_valid, UID_FMT, "OBJECT_AUDIT_LOGINUID"); + + IOVEC_ADD_STRING_FIELD(iovec, n, o->cgroup, "OBJECT_SYSTEMD_CGROUP"); + IOVEC_ADD_STRING_FIELD(iovec, n, o->session, "OBJECT_SYSTEMD_SESSION"); + IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->owner_uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_SYSTEMD_OWNER_UID"); + IOVEC_ADD_STRING_FIELD(iovec, n, o->unit, "OBJECT_SYSTEMD_UNIT"); + IOVEC_ADD_STRING_FIELD(iovec, n, o->user_unit, "OBJECT_SYSTEMD_USER_UNIT"); + IOVEC_ADD_STRING_FIELD(iovec, n, o->slice, "OBJECT_SYSTEMD_SLICE"); + IOVEC_ADD_STRING_FIELD(iovec, n, o->user_slice, "OBJECT_SYSTEMD_USER_SLICE"); + + IOVEC_ADD_ID128_FIELD(iovec, n, o->invocation_id, "OBJECT_SYSTEMD_INVOCATION_ID="); + } + + assert(n <= m); + + if (tv) { + sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv)); + iovec[n++] = IOVEC_MAKE_STRING(source_time); + } + + /* Note that strictly speaking storing the boot id here is + * redundant since the entry includes this in-line + * anyway. However, we need this indexed, too. */ + if (!isempty(s->boot_id_field)) + iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field); + + if (!isempty(s->machine_id_field)) + iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field); + + if (!isempty(s->hostname_field)) + iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field); + + if (!isempty(s->namespace_field)) + iovec[n++] = IOVEC_MAKE_STRING(s->namespace_field); + + iovec[n++] = in_initrd() ? IOVEC_MAKE_STRING("_RUNTIME_SCOPE=initrd") : IOVEC_MAKE_STRING("_RUNTIME_SCOPE=system"); + assert(n <= m); + + if (s->split_mode == SPLIT_UID && c && uid_is_valid(c->uid)) + /* Split up strictly by (non-root) UID */ + journal_uid = c->uid; + else if (s->split_mode == SPLIT_LOGIN && c && c->uid > 0 && uid_is_valid(c->owner_uid)) + /* Split up by login UIDs. We do this only if the + * realuid is not root, in order not to accidentally + * leak privileged information to the user that is + * logged by a privileged process that is part of an + * unprivileged session. */ + journal_uid = c->owner_uid; + else + journal_uid = 0; + + write_to_journal(s, journal_uid, iovec, n, priority); +} + +void server_driver_message(Server *s, pid_t object_pid, const char *message_id, const char *format, ...) { + + struct iovec *iovec; + size_t n = 0, k, m; + va_list ap; + int r; + + assert(s); + assert(format); + + m = N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS + client_context_extra_fields_n_iovec(s->my_context) + N_IOVEC_OBJECT_FIELDS; + iovec = newa(struct iovec, m); + + assert_cc(3 == LOG_FAC(LOG_DAEMON)); + iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3"); + iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald"); + + iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver"); + assert_cc(6 == LOG_INFO); + iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6"); + + if (message_id) + iovec[n++] = IOVEC_MAKE_STRING(message_id); + k = n; + + va_start(ap, format); + r = log_format_iovec(iovec, m, &n, false, 0, format, ap); + /* Error handling below */ + va_end(ap); + + if (r >= 0) + dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid); + + while (k < n) + free(iovec[k++].iov_base); + + if (r < 0) { + /* We failed to format the message. Emit a warning instead. */ + char buf[LINE_MAX]; + + errno = -r; + xsprintf(buf, "MESSAGE=Entry printing failed: %m"); + + n = 3; + iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4"); + iovec[n++] = IOVEC_MAKE_STRING(buf); + dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid); + } +} + +void server_dispatch_message( + Server *s, + struct iovec *iovec, size_t n, size_t m, + ClientContext *c, + const struct timeval *tv, + int priority, + pid_t object_pid) { + + uint64_t available = 0; + int rl; + + assert(s); + assert(iovec || n == 0); + + if (n == 0) + return; + + if (LOG_PRI(priority) > s->max_level_store) + return; + + /* Stop early in case the information will not be stored + * in a journal. */ + if (s->storage == STORAGE_NONE) + return; + + if (c && c->unit) { + (void) determine_space(s, &available, NULL); + + rl = journal_ratelimit_test(s->ratelimit, c->unit, c->log_ratelimit_interval, c->log_ratelimit_burst, priority & LOG_PRIMASK, available); + if (rl == 0) + return; + + /* Write a suppression message if we suppressed something */ + if (rl > 1) + server_driver_message(s, c->pid, + "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR, + LOG_MESSAGE("Suppressed %i messages from %s", rl - 1, c->unit), + "N_DROPPED=%i", rl - 1, + NULL); + } + + dispatch_message_real(s, iovec, n, m, c, tv, priority, object_pid); +} + +int server_flush_to_var(Server *s, bool require_flag_file) { + sd_journal *j = NULL; + const char *fn; + unsigned n = 0; + usec_t start; + int r, k; + + assert(s); + + if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT)) + return 0; + + if (s->namespace) /* Flushing concept does not exist for namespace instances */ + return 0; + + if (!s->runtime_journal) /* Nothing to flush? */ + return 0; + + if (require_flag_file && !flushed_flag_is_set(s)) + return 0; + + (void) system_journal_open(s, true, false); + + if (!s->system_journal) + return 0; + + log_debug("Flushing to %s...", s->system_storage.path); + + start = now(CLOCK_MONOTONIC); + + r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY); + if (r < 0) + return log_error_errno(r, "Failed to read runtime journal: %m"); + + sd_journal_set_data_threshold(j, 0); + + SD_JOURNAL_FOREACH(j) { + Object *o = NULL; + JournalFile *f; + + f = j->current_file; + assert(f && f->current_offset > 0); + + n++; + + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); + if (r < 0) { + log_error_errno(r, "Can't read entry: %m"); + goto finish; + } + + r = journal_file_copy_entry(f, s->system_journal->file, o, f->current_offset); + if (r >= 0) + continue; + + if (!shall_try_append_again(s->system_journal->file, r)) { + log_error_errno(r, "Can't write entry: %m"); + goto finish; + } + + log_info("Rotating system journal."); + + server_rotate(s); + server_vacuum(s, false); + + if (!s->system_journal) { + log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful."); + r = -EIO; + goto finish; + } + + log_debug("Retrying write."); + r = journal_file_copy_entry(f, s->system_journal->file, o, f->current_offset); + if (r < 0) { + log_error_errno(r, "Can't write entry: %m"); + goto finish; + } + } + + r = 0; + +finish: + if (s->system_journal) + journal_file_post_change(s->system_journal->file); + + s->runtime_journal = managed_journal_file_close(s->runtime_journal); + + if (r >= 0) + (void) rm_rf(s->runtime_storage.path, REMOVE_ROOT); + + sd_journal_close(j); + + server_driver_message(s, 0, NULL, + LOG_MESSAGE("Time spent on flushing to %s is %s for %u entries.", + s->system_storage.path, + FORMAT_TIMESPAN(usec_sub_unsigned(now(CLOCK_MONOTONIC), start), 0), + n), + NULL); + + fn = strjoina(s->runtime_directory, "/flushed"); + k = touch(fn); + if (k < 0) + log_warning_errno(k, "Failed to touch %s, ignoring: %m", fn); + + server_refresh_idle_timer(s); + return r; +} + +static int server_relinquish_var(Server *s) { + const char *fn; + assert(s); + + if (s->storage == STORAGE_NONE) + return 0; + + if (s->namespace) /* Concept does not exist for namespaced instances */ + return -EOPNOTSUPP; + + if (s->runtime_journal && !s->system_journal) + return 0; + + log_debug("Relinquishing %s...", s->system_storage.path); + + (void) system_journal_open(s, false, true); + + s->system_journal = managed_journal_file_close(s->system_journal); + ordered_hashmap_clear_with_destructor(s->user_journals, managed_journal_file_close); + set_clear_with_destructor(s->deferred_closes, managed_journal_file_close); + + fn = strjoina(s->runtime_directory, "/flushed"); + if (unlink(fn) < 0 && errno != ENOENT) + log_warning_errno(errno, "Failed to unlink %s, ignoring: %m", fn); + + server_refresh_idle_timer(s); + return 0; +} + +int server_process_datagram( + sd_event_source *es, + int fd, + uint32_t revents, + void *userdata) { + + size_t label_len = 0, m; + Server *s = ASSERT_PTR(userdata); + struct ucred *ucred = NULL; + struct timeval *tv = NULL; + struct cmsghdr *cmsg; + char *label = NULL; + struct iovec iovec; + ssize_t n; + int *fds = NULL, v = 0; + size_t n_fds = 0; + + /* We use NAME_MAX space for the SELinux label here. The kernel currently enforces no limit, but + * according to suggestions from the SELinux people this will change and it will probably be + * identical to NAME_MAX. For now we use that, but this should be updated one day when the final + * limit is known. + * + * Here, we need to explicitly initialize the buffer with zero, as glibc has a bug in + * __convert_scm_timestamps(), which assumes the buffer is initialized. See #20741. */ + CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred)) + + CMSG_SPACE_TIMEVAL + + CMSG_SPACE(sizeof(int)) + /* fd */ + CMSG_SPACE(NAME_MAX) /* selinux label */) control = {}; + + union sockaddr_union sa = {}; + + struct msghdr msghdr = { + .msg_iov = &iovec, + .msg_iovlen = 1, + .msg_control = &control, + .msg_controllen = sizeof(control), + .msg_name = &sa, + .msg_namelen = sizeof(sa), + }; + + assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd); + + if (revents != EPOLLIN) + return log_error_errno(SYNTHETIC_ERRNO(EIO), + "Got invalid event from epoll for datagram fd: %" PRIx32, + revents); + + /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on + * it.) */ + (void) ioctl(fd, SIOCINQ, &v); + + /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */ + m = PAGE_ALIGN(MAX3((size_t) v + 1, + (size_t) LINE_MAX, + ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1); + + if (!GREEDY_REALLOC(s->buffer, m)) + return log_oom(); + + iovec = IOVEC_MAKE(s->buffer, MALLOC_ELEMENTSOF(s->buffer) - 1); /* Leave room for trailing NUL we add later */ + + n = recvmsg_safe(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC); + if (n < 0) { + if (ERRNO_IS_TRANSIENT(n)) + return 0; + if (n == -EXFULL) { + log_warning("Got message with truncated control data (too many fds sent?), ignoring."); + return 0; + } + return log_error_errno(n, "recvmsg() failed: %m"); + } + + CMSG_FOREACH(cmsg, &msghdr) + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_CREDENTIALS && + cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) { + assert(!ucred); + ucred = (struct ucred*) CMSG_DATA(cmsg); + } else if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_SECURITY) { + assert(!label); + label = (char*) CMSG_DATA(cmsg); + label_len = cmsg->cmsg_len - CMSG_LEN(0); + } else if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SO_TIMESTAMP && + cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval))) { + assert(!tv); + tv = (struct timeval*) CMSG_DATA(cmsg); + } else if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_RIGHTS) { + assert(!fds); + fds = (int*) CMSG_DATA(cmsg); + n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int); + } + + /* And a trailing NUL, just in case */ + s->buffer[n] = 0; + + if (fd == s->syslog_fd) { + if (n > 0 && n_fds == 0) + server_process_syslog_message(s, s->buffer, n, ucred, tv, label, label_len); + else if (n_fds > 0) + log_warning("Got file descriptors via syslog socket. Ignoring."); + + } else if (fd == s->native_fd) { + if (n > 0 && n_fds == 0) + server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len); + else if (n == 0 && n_fds == 1) + server_process_native_file(s, fds[0], ucred, tv, label, label_len); + else if (n_fds > 0) + log_warning("Got too many file descriptors via native socket. Ignoring."); + + } else { + assert(fd == s->audit_fd); + + if (n > 0 && n_fds == 0) + server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen); + else if (n_fds > 0) + log_warning("Got file descriptors via audit socket. Ignoring."); + } + + close_many(fds, n_fds); + + server_refresh_idle_timer(s); + return 0; +} + +static void server_full_flush(Server *s) { + assert(s); + + (void) server_flush_to_var(s, false); + server_sync(s); + server_vacuum(s, false); + + server_space_usage_message(s, NULL); + + server_refresh_idle_timer(s); +} + +static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) { + Server *s = ASSERT_PTR(userdata); + + if (s->namespace) { + log_error("Received SIGUSR1 signal from PID %u, but flushing runtime journals not supported for namespaced instances.", si->ssi_pid); + return 0; + } + + log_info("Received SIGUSR1 signal from PID %u, as request to flush runtime journal.", si->ssi_pid); + server_full_flush(s); + + return 0; +} + +static void server_full_rotate(Server *s) { + const char *fn; + int r; + + assert(s); + + server_rotate(s); + server_vacuum(s, true); + + if (s->system_journal) + patch_min_use(&s->system_storage); + if (s->runtime_journal) + patch_min_use(&s->runtime_storage); + + /* Let clients know when the most recent rotation happened. */ + fn = strjoina(s->runtime_directory, "/rotated"); + r = write_timestamp_file_atomic(fn, now(CLOCK_MONOTONIC)); + if (r < 0) + log_warning_errno(r, "Failed to write %s, ignoring: %m", fn); +} + +static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) { + Server *s = ASSERT_PTR(userdata); + + log_info("Received SIGUSR2 signal from PID %u, as request to rotate journal, rotating.", si->ssi_pid); + server_full_rotate(s); + + return 0; +} + +static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) { + _cleanup_(sd_event_source_disable_unrefp) sd_event_source *news = NULL; + Server *s = ASSERT_PTR(userdata); + int r; + + log_received_signal(LOG_INFO, si); + + (void) sd_event_source_set_enabled(es, SD_EVENT_OFF); /* Make sure this handler is called at most once */ + + /* So on one hand we want to ensure that SIGTERMs are definitely handled in appropriate, bounded + * time. On the other hand we want that everything pending is first comprehensively processed and + * written to disk. These goals are incompatible, hence we try to find a middle ground: we'll process + * SIGTERM with high priority, but from the handler (this one right here) we'll install two new event + * sources: one low priority idle one that will issue the exit once everything else is processed (and + * which is hopefully the regular, clean codepath); and one high priority timer that acts as safety + * net: if our idle handler isn't run within 10s, we'll exit anyway. + * + * TLDR: we'll exit either when everything is processed, or after 10s max, depending on what happens + * first. + * + * Note that exiting before the idle event is hit doesn't typically mean that we lose any data, as + * messages will remain queued in the sockets they came in from, and thus can be processed when we + * start up next – unless we are going down for the final system shutdown, in which case everything + * is lost. */ + + r = sd_event_add_defer(s->event, &news, NULL, NULL); /* NULL handler means → exit when triggered */ + if (r < 0) { + log_error_errno(r, "Failed to allocate exit idle event handler: %m"); + goto fail; + } + + (void) sd_event_source_set_description(news, "exit-idle"); + + /* Run everything relevant before this. */ + r = sd_event_source_set_priority(news, SD_EVENT_PRIORITY_NORMAL+20); + if (r < 0) { + log_error_errno(r, "Failed to adjust priority of exit idle event handler: %m"); + goto fail; + } + + /* Give up ownership, so that this event source is freed automatically when the event loop is freed. */ + r = sd_event_source_set_floating(news, true); + if (r < 0) { + log_error_errno(r, "Failed to make exit idle event handler floating: %m"); + goto fail; + } + + news = sd_event_source_unref(news); + + r = sd_event_add_time_relative(s->event, &news, CLOCK_MONOTONIC, 10 * USEC_PER_SEC, 0, NULL, NULL); + if (r < 0) { + log_error_errno(r, "Failed to allocate exit timeout event handler: %m"); + goto fail; + } + + (void) sd_event_source_set_description(news, "exit-timeout"); + + r = sd_event_source_set_priority(news, SD_EVENT_PRIORITY_IMPORTANT-20); /* This is a safety net, with highest priority */ + if (r < 0) { + log_error_errno(r, "Failed to adjust priority of exit timeout event handler: %m"); + goto fail; + } + + r = sd_event_source_set_floating(news, true); + if (r < 0) { + log_error_errno(r, "Failed to make exit timeout event handler floating: %m"); + goto fail; + } + + news = sd_event_source_unref(news); + + log_debug("Exit event sources are now pending."); + return 0; + +fail: + sd_event_exit(s->event, 0); + return 0; +} + +static void server_full_sync(Server *s) { + const char *fn; + int r; + + assert(s); + + server_sync(s); + + /* Let clients know when the most recent sync happened. */ + fn = strjoina(s->runtime_directory, "/synced"); + r = write_timestamp_file_atomic(fn, now(CLOCK_MONOTONIC)); + if (r < 0) + log_warning_errno(r, "Failed to write %s, ignoring: %m", fn); + + return; +} + +static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) { + Server *s = ASSERT_PTR(userdata); + + log_debug("Received SIGRTMIN1 signal from PID %u, as request to sync.", si->ssi_pid); + server_full_sync(s); + + return 0; +} + +static int setup_signals(Server *s) { + int r; + + assert(s); + + assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0); + + r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s); + if (r < 0) + return r; + + r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s); + if (r < 0) + return r; + + r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s); + if (r < 0) + return r; + + /* Let's process SIGTERM early, so that we definitely react to it */ + r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_IMPORTANT-10); + if (r < 0) + return r; + + /* When journald is invoked on the terminal (when debugging), it's useful if C-c is handled + * equivalent to SIGTERM. */ + r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s); + if (r < 0) + return r; + + r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_IMPORTANT-10); + if (r < 0) + return r; + + /* SIGRTMIN+1 causes an immediate sync. We process this very late, so that everything else queued at + * this point is really written to disk. Clients can watch /run/systemd/journal/synced with inotify + * until its mtime changes to see when a sync happened. */ + r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s); + if (r < 0) + return r; + + r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15); + if (r < 0) + return r; + + return 0; +} + +static int parse_proc_cmdline_item(const char *key, const char *value, void *data) { + Server *s = ASSERT_PTR(data); + int r; + + if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) { + + r = value ? parse_boolean(value) : true; + if (r < 0) + log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value); + else + s->forward_to_syslog = r; + + } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) { + + r = value ? parse_boolean(value) : true; + if (r < 0) + log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value); + else + s->forward_to_kmsg = r; + + } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) { + + r = value ? parse_boolean(value) : true; + if (r < 0) + log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value); + else + s->forward_to_console = r; + + } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) { + + r = value ? parse_boolean(value) : true; + if (r < 0) + log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value); + else + s->forward_to_wall = r; + + } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) { + + if (proc_cmdline_value_missing(key, value)) + return 0; + + r = log_level_from_string(value); + if (r < 0) + log_warning("Failed to parse max level console value \"%s\". Ignoring.", value); + else + s->max_level_console = r; + + } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) { + + if (proc_cmdline_value_missing(key, value)) + return 0; + + r = log_level_from_string(value); + if (r < 0) + log_warning("Failed to parse max level store value \"%s\". Ignoring.", value); + else + s->max_level_store = r; + + } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) { + + if (proc_cmdline_value_missing(key, value)) + return 0; + + r = log_level_from_string(value); + if (r < 0) + log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value); + else + s->max_level_syslog = r; + + } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) { + + if (proc_cmdline_value_missing(key, value)) + return 0; + + r = log_level_from_string(value); + if (r < 0) + log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value); + else + s->max_level_kmsg = r; + + } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) { + + if (proc_cmdline_value_missing(key, value)) + return 0; + + r = log_level_from_string(value); + if (r < 0) + log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value); + else + s->max_level_wall = r; + + } else if (startswith(key, "systemd.journald")) + log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key); + + /* do not warn about state here, since probably systemd already did */ + return 0; +} + +static int server_parse_config_file(Server *s) { + int r; + + assert(s); + + if (s->namespace) { + const char *namespaced, *dropin_dirname; + + /* If we are running in namespace mode, load the namespace specific configuration file, and nothing else */ + namespaced = strjoina(PKGSYSCONFDIR "/journald@", s->namespace, ".conf"); + dropin_dirname = strjoina("journald@", s->namespace, ".conf.d"); + + r = config_parse_many( + STRV_MAKE_CONST(namespaced), + (const char* const*) CONF_PATHS_STRV("systemd"), + dropin_dirname, + "Journal\0", + config_item_perf_lookup, journald_gperf_lookup, + CONFIG_PARSE_WARN, s, NULL, NULL); + if (r < 0) + return r; + + return 0; + } + + return config_parse_many_nulstr( + PKGSYSCONFDIR "/journald.conf", + CONF_PATHS_NULSTR("systemd/journald.conf.d"), + "Journal\0", + config_item_perf_lookup, journald_gperf_lookup, + CONFIG_PARSE_WARN, s, NULL); +} + +static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) { + Server *s = ASSERT_PTR(userdata); + + server_sync(s); + return 0; +} + +int server_schedule_sync(Server *s, int priority) { + int r; + + assert(s); + + if (priority <= LOG_CRIT) { + /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */ + server_sync(s); + return 0; + } + + if (!s->event || sd_event_get_state(s->event) == SD_EVENT_FINISHED) { + /* Shutting down the server? Let's sync immediately. */ + server_sync(s); + return 0; + } + + if (s->sync_scheduled) + return 0; + + if (s->sync_interval_usec > 0) { + + if (!s->sync_event_source) { + r = sd_event_add_time_relative( + s->event, + &s->sync_event_source, + CLOCK_MONOTONIC, + s->sync_interval_usec, 0, + server_dispatch_sync, s); + if (r < 0) + return r; + + r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT); + } else { + r = sd_event_source_set_time_relative(s->sync_event_source, s->sync_interval_usec); + if (r < 0) + return r; + + r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT); + } + if (r < 0) + return r; + + s->sync_scheduled = true; + } + + return 0; +} + +static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) { + Server *s = ASSERT_PTR(userdata); + + server_cache_hostname(s); + return 0; +} + +static int server_open_hostname(Server *s) { + int r; + + assert(s); + + s->hostname_fd = open("/proc/sys/kernel/hostname", + O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY); + if (s->hostname_fd < 0) + return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m"); + + r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s); + if (r < 0) { + /* kernels prior to 3.2 don't support polling this file. Ignore + * the failure. */ + if (r == -EPERM) { + log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m"); + s->hostname_fd = safe_close(s->hostname_fd); + return 0; + } + + return log_error_errno(r, "Failed to register hostname fd in event loop: %m"); + } + + r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10); + if (r < 0) + return log_error_errno(r, "Failed to adjust priority of hostname event source: %m"); + + return 0; +} + +static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) { + Server *s = ASSERT_PTR(userdata); + int r; + + assert(s->notify_event_source == es); + assert(s->notify_fd == fd); + + /* The $NOTIFY_SOCKET is writable again, now send exactly one + * message on it. Either it's the watchdog event, the initial + * READY=1 event or an stdout stream event. If there's nothing + * to write anymore, turn our event source off. The next time + * there's something to send it will be turned on again. */ + + if (!s->sent_notify_ready) { + static const char p[] = "READY=1\n" + "STATUS=Processing requests..."; + + if (send(s->notify_fd, p, strlen(p), MSG_DONTWAIT) < 0) { + if (errno == EAGAIN) + return 0; + + return log_error_errno(errno, "Failed to send READY=1 notification message: %m"); + } + + s->sent_notify_ready = true; + log_debug("Sent READY=1 notification."); + + } else if (s->send_watchdog) { + static const char p[] = "WATCHDOG=1"; + + if (send(s->notify_fd, p, strlen(p), MSG_DONTWAIT) < 0) { + if (errno == EAGAIN) + return 0; + + return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m"); + } + + s->send_watchdog = false; + log_debug("Sent WATCHDOG=1 notification."); + + } else if (s->stdout_streams_notify_queue) + /* Dispatch one stream notification event */ + stdout_stream_send_notify(s->stdout_streams_notify_queue); + + /* Leave us enabled if there's still more to do. */ + if (s->send_watchdog || s->stdout_streams_notify_queue) + return 0; + + /* There was nothing to do anymore, let's turn ourselves off. */ + r = sd_event_source_set_enabled(es, SD_EVENT_OFF); + if (r < 0) + return log_error_errno(r, "Failed to turn off notify event source: %m"); + + return 0; +} + +static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) { + Server *s = ASSERT_PTR(userdata); + int r; + + s->send_watchdog = true; + + r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON); + if (r < 0) + log_warning_errno(r, "Failed to turn on notify event source: %m"); + + r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2); + if (r < 0) + return log_error_errno(r, "Failed to restart watchdog event source: %m"); + + r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON); + if (r < 0) + return log_error_errno(r, "Failed to enable watchdog event source: %m"); + + return 0; +} + +static int server_connect_notify(Server *s) { + union sockaddr_union sa; + socklen_t sa_len; + const char *e; + int r; + + assert(s); + assert(s->notify_fd < 0); + assert(!s->notify_event_source); + + /* + * So here's the problem: we'd like to send notification messages to PID 1, but we cannot do that via + * sd_notify(), since that's synchronous, and we might end up blocking on it. Specifically: given + * that PID 1 might block on dbus-daemon during IPC, and dbus-daemon is logging to us, and might + * hence block on us, we might end up in a deadlock if we block on sending PID 1 notification + * messages — by generating a full blocking circle. To avoid this, let's create a non-blocking + * socket, and connect it to the notification socket, and then wait for POLLOUT before we send + * anything. This should efficiently avoid any deadlocks, as we'll never block on PID 1, hence PID 1 + * can safely block on dbus-daemon which can safely block on us again. + * + * Don't think that this issue is real? It is, see: https://github.com/systemd/systemd/issues/1505 + */ + + e = getenv("NOTIFY_SOCKET"); + if (!e) + return 0; + + r = sockaddr_un_set_path(&sa.un, e); + if (r < 0) + return log_error_errno(r, "NOTIFY_SOCKET set to invalid value '%s': %m", e); + sa_len = r; + + s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0); + if (s->notify_fd < 0) + return log_error_errno(errno, "Failed to create notify socket: %m"); + + (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE); + + r = connect(s->notify_fd, &sa.sa, sa_len); + if (r < 0) + return log_error_errno(errno, "Failed to connect to notify socket: %m"); + + r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s); + if (r < 0) + return log_error_errno(r, "Failed to watch notification socket: %m"); + + if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) { + s->send_watchdog = true; + + r = sd_event_add_time_relative(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s); + if (r < 0) + return log_error_errno(r, "Failed to add watchdog time event: %m"); + } + + /* This should fire pretty soon, which we'll use to send the READY=1 event. */ + + return 0; +} + +static int synchronize_second_half(sd_event_source *event_source, void *userdata) { + Varlink *link = ASSERT_PTR(userdata); + Server *s; + int r; + + assert_se(s = varlink_get_userdata(link)); + + /* This is the "second half" of the Synchronize() varlink method. This function is called as deferred + * event source at a low priority to ensure the synchronization completes after all queued log + * messages are processed. */ + server_full_sync(s); + + /* Let's get rid of the event source now, by marking it as non-floating again. It then has no ref + * anymore and is immediately destroyed after we return from this function, i.e. from this event + * source handler at the end. */ + r = sd_event_source_set_floating(event_source, false); + if (r < 0) + return log_error_errno(r, "Failed to mark event source as non-floating: %m"); + + return varlink_reply(link, NULL); +} + +static void synchronize_destroy(void *userdata) { + varlink_unref(userdata); +} + +static int vl_method_synchronize(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) { + _cleanup_(sd_event_source_unrefp) sd_event_source *event_source = NULL; + Server *s = ASSERT_PTR(userdata); + int r; + + assert(link); + + if (json_variant_elements(parameters) > 0) + return varlink_error_invalid_parameter(link, parameters); + + log_info("Received client request to sync journal."); + + /* We don't do the main work now, but instead enqueue a deferred event loop job which will do + * it. That job is scheduled at low priority, so that we return from this method call only after all + * queued but not processed log messages are written to disk, so that this method call returning can + * be used as nice synchronization point. */ + r = sd_event_add_defer(s->event, &event_source, synchronize_second_half, link); + if (r < 0) + return log_error_errno(r, "Failed to allocate defer event source: %m"); + + r = sd_event_source_set_destroy_callback(event_source, synchronize_destroy); + if (r < 0) + return log_error_errno(r, "Failed to set event source destroy callback: %m"); + + varlink_ref(link); /* The varlink object is now left to the destroy callback to unref */ + + r = sd_event_source_set_priority(event_source, SD_EVENT_PRIORITY_NORMAL+15); + if (r < 0) + return log_error_errno(r, "Failed to set defer event source priority: %m"); + + /* Give up ownership of this event source. It will now be destroyed along with event loop itself, + * unless it destroys itself earlier. */ + r = sd_event_source_set_floating(event_source, true); + if (r < 0) + return log_error_errno(r, "Failed to mark event source as floating: %m"); + + (void) sd_event_source_set_description(event_source, "deferred-sync"); + + return 0; +} + +static int vl_method_rotate(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) { + Server *s = ASSERT_PTR(userdata); + + assert(link); + + if (json_variant_elements(parameters) > 0) + return varlink_error_invalid_parameter(link, parameters); + + log_info("Received client request to rotate journal, rotating."); + server_full_rotate(s); + + return varlink_reply(link, NULL); +} + +static int vl_method_flush_to_var(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) { + Server *s = ASSERT_PTR(userdata); + + assert(link); + + if (json_variant_elements(parameters) > 0) + return varlink_error_invalid_parameter(link, parameters); + if (s->namespace) + return varlink_error(link, "io.systemd.Journal.NotSupportedByNamespaces", NULL); + + log_info("Received client request to flush runtime journal."); + server_full_flush(s); + + return varlink_reply(link, NULL); +} + +static int vl_method_relinquish_var(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) { + Server *s = ASSERT_PTR(userdata); + + assert(link); + + if (json_variant_elements(parameters) > 0) + return varlink_error_invalid_parameter(link, parameters); + if (s->namespace) + return varlink_error(link, "io.systemd.Journal.NotSupportedByNamespaces", NULL); + + log_info("Received client request to relinquish %s access.", s->system_storage.path); + server_relinquish_var(s); + + return varlink_reply(link, NULL); +} + +static int vl_connect(VarlinkServer *server, Varlink *link, void *userdata) { + Server *s = ASSERT_PTR(userdata); + + assert(server); + assert(link); + + (void) server_start_or_stop_idle_timer(s); /* maybe we are no longer idle */ + + return 0; +} + +static void vl_disconnect(VarlinkServer *server, Varlink *link, void *userdata) { + Server *s = ASSERT_PTR(userdata); + + assert(server); + assert(link); + + (void) server_start_or_stop_idle_timer(s); /* maybe we are idle now */ +} + +static int server_open_varlink(Server *s, const char *socket, int fd) { + int r; + + assert(s); + + r = varlink_server_new(&s->varlink_server, VARLINK_SERVER_ROOT_ONLY|VARLINK_SERVER_INHERIT_USERDATA); + if (r < 0) + return r; + + varlink_server_set_userdata(s->varlink_server, s); + + r = varlink_server_bind_method_many( + s->varlink_server, + "io.systemd.Journal.Synchronize", vl_method_synchronize, + "io.systemd.Journal.Rotate", vl_method_rotate, + "io.systemd.Journal.FlushToVar", vl_method_flush_to_var, + "io.systemd.Journal.RelinquishVar", vl_method_relinquish_var); + if (r < 0) + return r; + + r = varlink_server_bind_connect(s->varlink_server, vl_connect); + if (r < 0) + return r; + + r = varlink_server_bind_disconnect(s->varlink_server, vl_disconnect); + if (r < 0) + return r; + + if (fd < 0) + r = varlink_server_listen_address(s->varlink_server, socket, 0600); + else + r = varlink_server_listen_fd(s->varlink_server, fd); + if (r < 0) + return r; + + r = varlink_server_attach_event(s->varlink_server, s->event, SD_EVENT_PRIORITY_NORMAL); + if (r < 0) + return r; + + return 0; +} + +static bool server_is_idle(Server *s) { + assert(s); + + /* The server for the main namespace is never idle */ + if (!s->namespace) + return false; + + /* If a retention maximum is set larger than the idle time we need to be running to enforce it, hence + * turn off the idle logic. */ + if (s->max_retention_usec > IDLE_TIMEOUT_USEC) + return false; + + /* We aren't idle if we have a varlink client */ + if (varlink_server_current_connections(s->varlink_server) > 0) + return false; + + /* If we have stdout streams we aren't idle */ + if (s->n_stdout_streams > 0) + return false; + + return true; +} + +static int server_idle_handler(sd_event_source *source, uint64_t usec, void *userdata) { + Server *s = ASSERT_PTR(userdata); + + assert(source); + + log_debug("Server is idle, exiting."); + sd_event_exit(s->event, 0); + return 0; +} + +int server_start_or_stop_idle_timer(Server *s) { + _cleanup_(sd_event_source_unrefp) sd_event_source *source = NULL; + int r; + + assert(s); + + if (!server_is_idle(s)) { + s->idle_event_source = sd_event_source_disable_unref(s->idle_event_source); + return 0; + } + + if (s->idle_event_source) + return 1; + + r = sd_event_add_time_relative(s->event, &source, CLOCK_MONOTONIC, IDLE_TIMEOUT_USEC, 0, server_idle_handler, s); + if (r < 0) + return log_error_errno(r, "Failed to allocate idle timer: %m"); + + r = sd_event_source_set_priority(source, SD_EVENT_PRIORITY_IDLE); + if (r < 0) + return log_error_errno(r, "Failed to set idle timer priority: %m"); + + (void) sd_event_source_set_description(source, "idle-timer"); + + s->idle_event_source = TAKE_PTR(source); + return 1; +} + +int server_refresh_idle_timer(Server *s) { + int r; + + assert(s); + + if (!s->idle_event_source) + return 0; + + r = sd_event_source_set_time_relative(s->idle_event_source, IDLE_TIMEOUT_USEC); + if (r < 0) + return log_error_errno(r, "Failed to refresh idle timer: %m"); + + return 1; +} + +static int set_namespace(Server *s, const char *namespace) { + assert(s); + + if (!namespace) + return 0; + + if (!log_namespace_name_valid(namespace)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Specified namespace name not valid, refusing: %s", namespace); + + s->namespace = strdup(namespace); + if (!s->namespace) + return log_oom(); + + s->namespace_field = strjoin("_NAMESPACE=", namespace); + if (!s->namespace_field) + return log_oom(); + + return 1; +} + +int server_init(Server *s, const char *namespace) { + const char *native_socket, *syslog_socket, *stdout_socket, *varlink_socket, *e; + _cleanup_fdset_free_ FDSet *fds = NULL; + int n, r, fd, varlink_fd = -1; + bool no_sockets; + + assert(s); + + *s = (Server) { + .syslog_fd = -1, + .native_fd = -1, + .stdout_fd = -1, + .dev_kmsg_fd = -1, + .audit_fd = -1, + .hostname_fd = -1, + .notify_fd = -1, + + .compress.enabled = true, + .compress.threshold_bytes = UINT64_MAX, + .seal = true, + + .set_audit = true, + + .watchdog_usec = USEC_INFINITY, + + .sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC, + .sync_scheduled = false, + + .ratelimit_interval = DEFAULT_RATE_LIMIT_INTERVAL, + .ratelimit_burst = DEFAULT_RATE_LIMIT_BURST, + + .forward_to_wall = true, + + .max_file_usec = DEFAULT_MAX_FILE_USEC, + + .max_level_store = LOG_DEBUG, + .max_level_syslog = LOG_DEBUG, + .max_level_kmsg = LOG_NOTICE, + .max_level_console = LOG_INFO, + .max_level_wall = LOG_EMERG, + + .line_max = DEFAULT_LINE_MAX, + + .runtime_storage.name = "Runtime Journal", + .system_storage.name = "System Journal", + + .kmsg_own_ratelimit = { + .interval = DEFAULT_KMSG_OWN_INTERVAL, + .burst = DEFAULT_KMSG_OWN_BURST, + }, + }; + + r = set_namespace(s, namespace); + if (r < 0) + return r; + + /* By default, only read from /dev/kmsg if are the main namespace */ + s->read_kmsg = !s->namespace; + s->storage = s->namespace ? STORAGE_PERSISTENT : STORAGE_AUTO; + + journal_reset_metrics(&s->system_storage.metrics); + journal_reset_metrics(&s->runtime_storage.metrics); + + server_parse_config_file(s); + + if (!s->namespace) { + /* Parse kernel command line, but only if we are not a namespace instance */ + r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX); + if (r < 0) + log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m"); + } + + if (!!s->ratelimit_interval != !!s->ratelimit_burst) { /* One set to 0 and the other not? */ + log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0", + s->ratelimit_interval, s->ratelimit_burst); + s->ratelimit_interval = s->ratelimit_burst = 0; + } + + e = getenv("RUNTIME_DIRECTORY"); + if (e) + s->runtime_directory = strdup(e); + else if (s->namespace) + s->runtime_directory = strjoin("/run/systemd/journal.", s->namespace); + else + s->runtime_directory = strdup("/run/systemd/journal"); + if (!s->runtime_directory) + return log_oom(); + + (void) mkdir_p(s->runtime_directory, 0755); + + s->user_journals = ordered_hashmap_new(NULL); + if (!s->user_journals) + return log_oom(); + + s->mmap = mmap_cache_new(); + if (!s->mmap) + return log_oom(); + + s->deferred_closes = set_new(NULL); + if (!s->deferred_closes) + return log_oom(); + + r = sd_event_default(&s->event); + if (r < 0) + return log_error_errno(r, "Failed to create event loop: %m"); + + n = sd_listen_fds(true); + if (n < 0) + return log_error_errno(n, "Failed to read listening file descriptors from environment: %m"); + + native_socket = strjoina(s->runtime_directory, "/socket"); + stdout_socket = strjoina(s->runtime_directory, "/stdout"); + syslog_socket = strjoina(s->runtime_directory, "/dev-log"); + varlink_socket = strjoina(s->runtime_directory, "/io.systemd.journal"); + + for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) { + + if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, native_socket, 0) > 0) { + + if (s->native_fd >= 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Too many native sockets passed."); + + s->native_fd = fd; + + } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, stdout_socket, 0) > 0) { + + if (s->stdout_fd >= 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Too many stdout sockets passed."); + + s->stdout_fd = fd; + + } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, syslog_socket, 0) > 0) { + + if (s->syslog_fd >= 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Too many /dev/log sockets passed."); + + s->syslog_fd = fd; + + } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, varlink_socket, 0) > 0) { + + if (varlink_fd >= 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Too many varlink sockets passed."); + + varlink_fd = fd; + } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) { + + if (s->audit_fd >= 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Too many audit sockets passed."); + + s->audit_fd = fd; + + } else { + + if (!fds) { + fds = fdset_new(); + if (!fds) + return log_oom(); + } + + r = fdset_put(fds, fd); + if (r < 0) + return log_oom(); + } + } + + /* Try to restore streams, but don't bother if this fails */ + (void) server_restore_streams(s, fds); + + if (fdset_size(fds) > 0) { + log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds)); + fds = fdset_free(fds); + } + + no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0 && varlink_fd < 0; + + /* always open stdout, syslog, native, and kmsg sockets */ + + /* systemd-journald.socket: /run/systemd/journal/stdout */ + r = server_open_stdout_socket(s, stdout_socket); + if (r < 0) + return r; + + /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */ + r = server_open_syslog_socket(s, syslog_socket); + if (r < 0) + return r; + + /* systemd-journald.socket: /run/systemd/journal/socket */ + r = server_open_native_socket(s, native_socket); + if (r < 0) + return r; + + /* /dev/kmsg */ + r = server_open_dev_kmsg(s); + if (r < 0) + return r; + + /* Unless we got *some* sockets and not audit, open audit socket */ + if (s->audit_fd >= 0 || no_sockets) { + r = server_open_audit(s); + if (r < 0) + return r; + } + + r = server_open_varlink(s, varlink_socket, varlink_fd); + if (r < 0) + return r; + + r = server_open_kernel_seqnum(s); + if (r < 0) + return r; + + r = server_open_hostname(s); + if (r < 0) + return r; + + r = setup_signals(s); + if (r < 0) + return r; + + s->ratelimit = journal_ratelimit_new(); + if (!s->ratelimit) + return log_oom(); + + r = cg_get_root_path(&s->cgroup_root); + if (r < 0) + return log_error_errno(r, "Failed to acquire cgroup root path: %m"); + + server_cache_hostname(s); + server_cache_boot_id(s); + server_cache_machine_id(s); + + if (s->namespace) + s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s), ".", s->namespace); + else + s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s)); + if (!s->runtime_storage.path) + return log_oom(); + + e = getenv("LOGS_DIRECTORY"); + if (e) + s->system_storage.path = strdup(e); + else if (s->namespace) + s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s), ".", s->namespace); + else + s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s)); + if (!s->system_storage.path) + return log_oom(); + + (void) server_connect_notify(s); + + (void) client_context_acquire_default(s); + + r = system_journal_open(s, false, false); + if (r < 0) + return r; + + server_start_or_stop_idle_timer(s); + return 0; +} + +void server_maybe_append_tags(Server *s) { +#if HAVE_GCRYPT + ManagedJournalFile *f; + usec_t n; + + n = now(CLOCK_REALTIME); + + if (s->system_journal) + journal_file_maybe_append_tag(s->system_journal->file, n); + + ORDERED_HASHMAP_FOREACH(f, s->user_journals) + journal_file_maybe_append_tag(f->file, n); +#endif +} + +void server_done(Server *s) { + assert(s); + + free(s->namespace); + free(s->namespace_field); + + set_free_with_destructor(s->deferred_closes, managed_journal_file_close); + + while (s->stdout_streams) + stdout_stream_free(s->stdout_streams); + + client_context_flush_all(s); + + (void) managed_journal_file_close(s->system_journal); + (void) managed_journal_file_close(s->runtime_journal); + + ordered_hashmap_free_with_destructor(s->user_journals, managed_journal_file_close); + + varlink_server_unref(s->varlink_server); + + sd_event_source_unref(s->syslog_event_source); + sd_event_source_unref(s->native_event_source); + sd_event_source_unref(s->stdout_event_source); + sd_event_source_unref(s->dev_kmsg_event_source); + sd_event_source_unref(s->audit_event_source); + sd_event_source_unref(s->sync_event_source); + sd_event_source_unref(s->sigusr1_event_source); + sd_event_source_unref(s->sigusr2_event_source); + sd_event_source_unref(s->sigterm_event_source); + sd_event_source_unref(s->sigint_event_source); + sd_event_source_unref(s->sigrtmin1_event_source); + sd_event_source_unref(s->hostname_event_source); + sd_event_source_unref(s->notify_event_source); + sd_event_source_unref(s->watchdog_event_source); + sd_event_source_unref(s->idle_event_source); + sd_event_unref(s->event); + + safe_close(s->syslog_fd); + safe_close(s->native_fd); + safe_close(s->stdout_fd); + safe_close(s->dev_kmsg_fd); + safe_close(s->audit_fd); + safe_close(s->hostname_fd); + safe_close(s->notify_fd); + + if (s->ratelimit) + journal_ratelimit_free(s->ratelimit); + + if (s->kernel_seqnum) + munmap(s->kernel_seqnum, sizeof(uint64_t)); + + free(s->buffer); + free(s->tty_path); + free(s->cgroup_root); + free(s->hostname_field); + free(s->runtime_storage.path); + free(s->system_storage.path); + free(s->runtime_directory); + + mmap_cache_unref(s->mmap); +} + +static const char* const storage_table[_STORAGE_MAX] = { + [STORAGE_AUTO] = "auto", + [STORAGE_VOLATILE] = "volatile", + [STORAGE_PERSISTENT] = "persistent", + [STORAGE_NONE] = "none" +}; + +DEFINE_STRING_TABLE_LOOKUP(storage, Storage); +DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting"); + +static const char* const split_mode_table[_SPLIT_MAX] = { + [SPLIT_LOGIN] = "login", + [SPLIT_UID] = "uid", + [SPLIT_NONE] = "none", +}; + +DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode); +DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting"); + +int config_parse_line_max( + const char* unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + size_t *sz = ASSERT_PTR(data); + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + if (isempty(rvalue)) + /* Empty assignment means default */ + *sz = DEFAULT_LINE_MAX; + else { + uint64_t v; + + r = parse_size(rvalue, 1024, &v); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse LineMax= value, ignoring: %s", rvalue); + return 0; + } + + if (v < 79) { + /* Why specify 79 here as minimum line length? Simply, because the most common traditional + * terminal size is 80ch, and it might make sense to break one character before the natural + * line break would occur on that. */ + log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too small, clamping to 79: %s", rvalue); + *sz = 79; + } else if (v > (uint64_t) (SSIZE_MAX-1)) { + /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read() + * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large + * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll + * fail much earlier anyway. */ + log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too large, clamping to %" PRIu64 ": %s", (uint64_t) (SSIZE_MAX-1), rvalue); + *sz = SSIZE_MAX-1; + } else + *sz = (size_t) v; + } + + return 0; +} + +int config_parse_compress( + const char* unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + JournalCompressOptions* compress = data; + int r; + + if (isempty(rvalue)) { + compress->enabled = true; + compress->threshold_bytes = UINT64_MAX; + } else if (streq(rvalue, "1")) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Compress= ambiguously specified as 1, enabling compression with default threshold"); + compress->enabled = true; + } else if (streq(rvalue, "0")) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Compress= ambiguously specified as 0, disabling compression"); + compress->enabled = false; + } else { + r = parse_boolean(rvalue); + if (r < 0) { + r = parse_size(rvalue, 1024, &compress->threshold_bytes); + if (r < 0) + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse Compress= value, ignoring: %s", rvalue); + else + compress->enabled = true; + } else + compress->enabled = r; + } + + return 0; +} diff --git a/src/journal/journald-server.h b/src/journal/journald-server.h new file mode 100644 index 0000000..ee8f374 --- /dev/null +++ b/src/journal/journald-server.h @@ -0,0 +1,229 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <stdbool.h> +#include <sys/types.h> + +#include "sd-event.h" + +typedef struct Server Server; + +#include "conf-parser.h" +#include "hashmap.h" +#include "journald-context.h" +#include "journald-rate-limit.h" +#include "journald-stream.h" +#include "list.h" +#include "managed-journal-file.h" +#include "prioq.h" +#include "ratelimit.h" +#include "time-util.h" +#include "varlink.h" + +typedef enum Storage { + STORAGE_AUTO, + STORAGE_VOLATILE, + STORAGE_PERSISTENT, + STORAGE_NONE, + _STORAGE_MAX, + _STORAGE_INVALID = -EINVAL, +} Storage; + +typedef enum SplitMode { + SPLIT_UID, + SPLIT_LOGIN, /* deprecated */ + SPLIT_NONE, + _SPLIT_MAX, + _SPLIT_INVALID = -EINVAL, +} SplitMode; + +typedef struct JournalCompressOptions { + bool enabled; + uint64_t threshold_bytes; +} JournalCompressOptions; + +typedef struct JournalStorageSpace { + usec_t timestamp; + + uint64_t available; + uint64_t limit; + + uint64_t vfs_used; /* space used by journal files */ + uint64_t vfs_available; +} JournalStorageSpace; + +typedef struct JournalStorage { + const char *name; + char *path; + + JournalMetrics metrics; + JournalStorageSpace space; +} JournalStorage; + +struct Server { + char *namespace; + + int syslog_fd; + int native_fd; + int stdout_fd; + int dev_kmsg_fd; + int audit_fd; + int hostname_fd; + int notify_fd; + + sd_event *event; + + sd_event_source *syslog_event_source; + sd_event_source *native_event_source; + sd_event_source *stdout_event_source; + sd_event_source *dev_kmsg_event_source; + sd_event_source *audit_event_source; + sd_event_source *sync_event_source; + sd_event_source *sigusr1_event_source; + sd_event_source *sigusr2_event_source; + sd_event_source *sigterm_event_source; + sd_event_source *sigint_event_source; + sd_event_source *sigrtmin1_event_source; + sd_event_source *hostname_event_source; + sd_event_source *notify_event_source; + sd_event_source *watchdog_event_source; + sd_event_source *idle_event_source; + + ManagedJournalFile *runtime_journal; + ManagedJournalFile *system_journal; + OrderedHashmap *user_journals; + + uint64_t seqnum; + + char *buffer; + + JournalRateLimit *ratelimit; + usec_t sync_interval_usec; + usec_t ratelimit_interval; + unsigned ratelimit_burst; + + JournalStorage runtime_storage; + JournalStorage system_storage; + + JournalCompressOptions compress; + bool seal; + bool read_kmsg; + int set_audit; + + bool forward_to_kmsg; + bool forward_to_syslog; + bool forward_to_console; + bool forward_to_wall; + + unsigned n_forward_syslog_missed; + usec_t last_warn_forward_syslog_missed; + + usec_t max_retention_usec; + usec_t max_file_usec; + usec_t oldest_file_usec; + + LIST_HEAD(StdoutStream, stdout_streams); + LIST_HEAD(StdoutStream, stdout_streams_notify_queue); + unsigned n_stdout_streams; + + char *tty_path; + + int max_level_store; + int max_level_syslog; + int max_level_kmsg; + int max_level_console; + int max_level_wall; + + Storage storage; + SplitMode split_mode; + + MMapCache *mmap; + + Set *deferred_closes; + + uint64_t *kernel_seqnum; + bool dev_kmsg_readable:1; + RateLimit kmsg_own_ratelimit; + + bool send_watchdog:1; + bool sent_notify_ready:1; + bool sync_scheduled:1; + + char machine_id_field[sizeof("_MACHINE_ID=") + 32]; + char boot_id_field[sizeof("_BOOT_ID=") + 32]; + char *hostname_field; + char *namespace_field; + char *runtime_directory; + + /* Cached cgroup root, so that we don't have to query that all the time */ + char *cgroup_root; + + usec_t watchdog_usec; + + usec_t last_realtime_clock; + + size_t line_max; + + /* Caching of client metadata */ + Hashmap *client_contexts; + Prioq *client_contexts_lru; + + usec_t last_cache_pid_flush; + + ClientContext *my_context; /* the context of journald itself */ + ClientContext *pid1_context; /* the context of PID 1 */ + + VarlinkServer *varlink_server; +}; + +#define SERVER_MACHINE_ID(s) ((s)->machine_id_field + STRLEN("_MACHINE_ID=")) + +/* Extra fields for any log messages */ +#define N_IOVEC_META_FIELDS 24 + +/* Extra fields for log messages that contain OBJECT_PID= (i.e. log about another process) */ +#define N_IOVEC_OBJECT_FIELDS 18 + +/* Maximum number of fields we'll add in for driver (i.e. internal) messages */ +#define N_IOVEC_PAYLOAD_FIELDS 16 + +/* kmsg: Maximum number of extra fields we'll import from the kernel's /dev/kmsg */ +#define N_IOVEC_KERNEL_FIELDS 64 + +/* kmsg: Maximum number of extra fields we'll import from udev's devices */ +#define N_IOVEC_UDEV_FIELDS 32 + +/* audit: Maximum number of extra fields we'll import from audit messages */ +#define N_IOVEC_AUDIT_FIELDS 64 + +void server_dispatch_message(Server *s, struct iovec *iovec, size_t n, size_t m, ClientContext *c, const struct timeval *tv, int priority, pid_t object_pid); +void server_driver_message(Server *s, pid_t object_pid, const char *message_id, const char *format, ...) _sentinel_ _printf_(4,0); + +/* gperf lookup function */ +const struct ConfigPerfItem* journald_gperf_lookup(const char *key, GPERF_LEN_TYPE length); + +CONFIG_PARSER_PROTOTYPE(config_parse_storage); +CONFIG_PARSER_PROTOTYPE(config_parse_line_max); +CONFIG_PARSER_PROTOTYPE(config_parse_compress); + +const char *storage_to_string(Storage s) _const_; +Storage storage_from_string(const char *s) _pure_; + +CONFIG_PARSER_PROTOTYPE(config_parse_split_mode); + +const char *split_mode_to_string(SplitMode s) _const_; +SplitMode split_mode_from_string(const char *s) _pure_; + +int server_init(Server *s, const char *namespace); +void server_done(Server *s); +void server_sync(Server *s); +void server_vacuum(Server *s, bool verbose); +void server_rotate(Server *s); +int server_schedule_sync(Server *s, int priority); +int server_flush_to_var(Server *s, bool require_flag_file); +void server_maybe_append_tags(Server *s); +int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata); +void server_space_usage_message(Server *s, JournalStorage *storage); + +int server_start_or_stop_idle_timer(Server *s); +int server_refresh_idle_timer(Server *s); diff --git a/src/journal/journald-stream.c b/src/journal/journald-stream.c new file mode 100644 index 0000000..f2f3f0a --- /dev/null +++ b/src/journal/journald-stream.c @@ -0,0 +1,990 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <stddef.h> +#include <unistd.h> + +#if HAVE_SELINUX +#include <selinux/selinux.h> +#endif + +#include "sd-daemon.h" +#include "sd-event.h" + +#include "alloc-util.h" +#include "dirent-util.h" +#include "env-file.h" +#include "errno-util.h" +#include "escape.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "io-util.h" +#include "journald-console.h" +#include "journald-context.h" +#include "journald-kmsg.h" +#include "journald-server.h" +#include "journald-stream.h" +#include "journald-syslog.h" +#include "journald-wall.h" +#include "mkdir.h" +#include "parse-util.h" +#include "process-util.h" +#include "selinux-util.h" +#include "socket-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "syslog-util.h" +#include "tmpfile-util.h" +#include "unit-name.h" +#include "user-util.h" + +#define STDOUT_STREAMS_MAX 4096 + +/* During the "setup" protocol phase of the stream logic let's define a different maximum line length than + * during the actual operational phase. We want to allow users to specify very short line lengths after all, + * but the unit name we embed in the setup protocol might be longer than that. Hence, during the setup phase + * let's enforce a line length matching the maximum unit name length (255) */ +#define STDOUT_STREAM_SETUP_PROTOCOL_LINE_MAX (UNIT_NAME_MAX-1U) + +typedef enum StdoutStreamState { + STDOUT_STREAM_IDENTIFIER, + STDOUT_STREAM_UNIT_ID, + STDOUT_STREAM_PRIORITY, + STDOUT_STREAM_LEVEL_PREFIX, + STDOUT_STREAM_FORWARD_TO_SYSLOG, + STDOUT_STREAM_FORWARD_TO_KMSG, + STDOUT_STREAM_FORWARD_TO_CONSOLE, + STDOUT_STREAM_RUNNING, +} StdoutStreamState; + +/* The different types of log record terminators: a real \n was read, a NUL character was read, the maximum line length + * was reached, or the end of the stream was reached */ + +typedef enum LineBreak { + LINE_BREAK_NEWLINE, + LINE_BREAK_NUL, + LINE_BREAK_LINE_MAX, + LINE_BREAK_EOF, + LINE_BREAK_PID_CHANGE, + _LINE_BREAK_MAX, + _LINE_BREAK_INVALID = -EINVAL, +} LineBreak; + +struct StdoutStream { + Server *server; + StdoutStreamState state; + + int fd; + + struct ucred ucred; + char *label; + char *identifier; + char *unit_id; + int priority; + bool level_prefix:1; + bool forward_to_syslog:1; + bool forward_to_kmsg:1; + bool forward_to_console:1; + + bool fdstore:1; + bool in_notify_queue:1; + + char *buffer; + size_t length; + + sd_event_source *event_source; + + char *state_file; + + ClientContext *context; + + LIST_FIELDS(StdoutStream, stdout_stream); + LIST_FIELDS(StdoutStream, stdout_stream_notify_queue); + + char id_field[STRLEN("_STREAM_ID=") + SD_ID128_STRING_MAX]; +}; + +StdoutStream* stdout_stream_free(StdoutStream *s) { + if (!s) + return NULL; + + if (s->server) { + if (s->context) + client_context_release(s->server, s->context); + + assert(s->server->n_stdout_streams > 0); + s->server->n_stdout_streams--; + LIST_REMOVE(stdout_stream, s->server->stdout_streams, s); + + if (s->in_notify_queue) + LIST_REMOVE(stdout_stream_notify_queue, s->server->stdout_streams_notify_queue, s); + + (void) server_start_or_stop_idle_timer(s->server); /* Maybe we are idle now? */ + } + + sd_event_source_disable_unref(s->event_source); + safe_close(s->fd); + free(s->label); + free(s->identifier); + free(s->unit_id); + free(s->state_file); + free(s->buffer); + + return mfree(s); +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(StdoutStream*, stdout_stream_free); + +void stdout_stream_destroy(StdoutStream *s) { + if (!s) + return; + + if (s->state_file) + (void) unlink(s->state_file); + + stdout_stream_free(s); +} + +static int stdout_stream_save(StdoutStream *s) { + _cleanup_(unlink_and_freep) char *temp_path = NULL; + _cleanup_fclose_ FILE *f = NULL; + int r; + + assert(s); + + if (s->state != STDOUT_STREAM_RUNNING) + return 0; + + if (!s->state_file) { + struct stat st; + + r = fstat(s->fd, &st); + if (r < 0) + return log_warning_errno(errno, "Failed to stat connected stream: %m"); + + /* We use device and inode numbers as identifier for the stream */ + r = asprintf(&s->state_file, "%s/streams/%lu:%lu", s->server->runtime_directory, (unsigned long) st.st_dev, (unsigned long) st.st_ino); + if (r < 0) + return log_oom(); + } + + (void) mkdir_parents(s->state_file, 0755); + + r = fopen_temporary(s->state_file, &f, &temp_path); + if (r < 0) + goto fail; + + fprintf(f, + "# This is private data. Do not parse\n" + "PRIORITY=%i\n" + "LEVEL_PREFIX=%i\n" + "FORWARD_TO_SYSLOG=%i\n" + "FORWARD_TO_KMSG=%i\n" + "FORWARD_TO_CONSOLE=%i\n" + "STREAM_ID=%s\n", + s->priority, + s->level_prefix, + s->forward_to_syslog, + s->forward_to_kmsg, + s->forward_to_console, + s->id_field + STRLEN("_STREAM_ID=")); + + if (!isempty(s->identifier)) { + _cleanup_free_ char *escaped = NULL; + + escaped = cescape(s->identifier); + if (!escaped) { + r = -ENOMEM; + goto fail; + } + + fprintf(f, "IDENTIFIER=%s\n", escaped); + } + + if (!isempty(s->unit_id)) { + _cleanup_free_ char *escaped = NULL; + + escaped = cescape(s->unit_id); + if (!escaped) { + r = -ENOMEM; + goto fail; + } + + fprintf(f, "UNIT=%s\n", escaped); + } + + r = fflush_and_check(f); + if (r < 0) + goto fail; + + if (rename(temp_path, s->state_file) < 0) { + r = -errno; + goto fail; + } + + temp_path = mfree(temp_path); + + if (!s->fdstore && !s->in_notify_queue) { + LIST_PREPEND(stdout_stream_notify_queue, s->server->stdout_streams_notify_queue, s); + s->in_notify_queue = true; + + if (s->server->notify_event_source) { + r = sd_event_source_set_enabled(s->server->notify_event_source, SD_EVENT_ON); + if (r < 0) + log_warning_errno(r, "Failed to enable notify event source: %m"); + } + } + + return 0; + +fail: + (void) unlink(s->state_file); + return log_error_errno(r, "Failed to save stream data %s: %m", s->state_file); +} + +static int stdout_stream_log( + StdoutStream *s, + const char *p, + LineBreak line_break) { + + struct iovec *iovec; + int priority; + char syslog_priority[] = "PRIORITY=\0"; + char syslog_facility[STRLEN("SYSLOG_FACILITY=") + DECIMAL_STR_MAX(int) + 1]; + _cleanup_free_ char *message = NULL, *syslog_identifier = NULL; + size_t n = 0, m; + int r; + + assert(s); + assert(p); + + assert(line_break >= 0); + assert(line_break < _LINE_BREAK_MAX); + + if (s->context) + (void) client_context_maybe_refresh(s->server, s->context, NULL, NULL, 0, NULL, USEC_INFINITY); + else if (pid_is_valid(s->ucred.pid)) { + r = client_context_acquire(s->server, s->ucred.pid, &s->ucred, s->label, strlen_ptr(s->label), s->unit_id, &s->context); + if (r < 0) + log_warning_errno(r, "Failed to acquire client context, ignoring: %m"); + } + + priority = s->priority; + + if (s->level_prefix) + syslog_parse_priority(&p, &priority, false); + + if (!client_context_test_priority(s->context, priority)) + return 0; + + if (isempty(p)) + return 0; + + if (s->forward_to_syslog || s->server->forward_to_syslog) + server_forward_syslog(s->server, syslog_fixup_facility(priority), s->identifier, p, &s->ucred, NULL); + + if (s->forward_to_kmsg || s->server->forward_to_kmsg) + server_forward_kmsg(s->server, priority, s->identifier, p, &s->ucred); + + if (s->forward_to_console || s->server->forward_to_console) + server_forward_console(s->server, priority, s->identifier, p, &s->ucred); + + if (s->server->forward_to_wall) + server_forward_wall(s->server, priority, s->identifier, p, &s->ucred); + + m = N_IOVEC_META_FIELDS + 7 + client_context_extra_fields_n_iovec(s->context); + iovec = newa(struct iovec, m); + + iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=stdout"); + iovec[n++] = IOVEC_MAKE_STRING(s->id_field); + + syslog_priority[STRLEN("PRIORITY=")] = '0' + LOG_PRI(priority); + iovec[n++] = IOVEC_MAKE_STRING(syslog_priority); + + if (priority & LOG_FACMASK) { + xsprintf(syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)); + iovec[n++] = IOVEC_MAKE_STRING(syslog_facility); + } + + if (s->identifier) { + syslog_identifier = strjoin("SYSLOG_IDENTIFIER=", s->identifier); + if (syslog_identifier) + iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier); + } + + static const char * const line_break_field_table[_LINE_BREAK_MAX] = { + [LINE_BREAK_NEWLINE] = NULL, /* Do not add field if traditional newline */ + [LINE_BREAK_NUL] = "_LINE_BREAK=nul", + [LINE_BREAK_LINE_MAX] = "_LINE_BREAK=line-max", + [LINE_BREAK_EOF] = "_LINE_BREAK=eof", + [LINE_BREAK_PID_CHANGE] = "_LINE_BREAK=pid-change", + }; + + const char *c = line_break_field_table[line_break]; + + /* If this log message was generated due to an uncommon line break then mention this in the log + * entry */ + if (c) + iovec[n++] = IOVEC_MAKE_STRING(c); + + message = strjoin("MESSAGE=", p); + if (message) + iovec[n++] = IOVEC_MAKE_STRING(message); + + server_dispatch_message(s->server, iovec, n, m, s->context, NULL, priority, 0); + return 0; +} + +static int syslog_parse_priority_and_facility(const char *s) { + int prio, r; + + /* Parses both facility and priority in one value, i.e. is different from log_level_from_string() + * which only parses the priority and refuses any facility value */ + + r = safe_atoi(s, &prio); + if (r < 0) + return r; + + if (prio < 0 || prio > 999) + return -ERANGE; + + return prio; +} + +static int stdout_stream_line(StdoutStream *s, char *p, LineBreak line_break) { + char *orig; + int r; + + assert(s); + assert(p); + + orig = p; + p = strstrip(p); + + /* line breaks by NUL, line max length or EOF are not permissible during the negotiation part of the protocol */ + if (line_break != LINE_BREAK_NEWLINE && s->state != STDOUT_STREAM_RUNNING) + return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), + "Control protocol line not properly terminated."); + + switch (s->state) { + + case STDOUT_STREAM_IDENTIFIER: + if (!isempty(p)) { + s->identifier = strdup(p); + if (!s->identifier) + return log_oom(); + } + + s->state = STDOUT_STREAM_UNIT_ID; + return 0; + + case STDOUT_STREAM_UNIT_ID: + if (s->ucred.uid == 0 && + unit_name_is_valid(p, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE)) { + + s->unit_id = strdup(p); + if (!s->unit_id) + return log_oom(); + } + + s->state = STDOUT_STREAM_PRIORITY; + return 0; + + case STDOUT_STREAM_PRIORITY: { + int priority; + + priority = syslog_parse_priority_and_facility(p); + if (priority < 0) + return log_warning_errno(priority, "Failed to parse log priority line: %m"); + + s->priority = priority; + s->state = STDOUT_STREAM_LEVEL_PREFIX; + return 0; + } + + case STDOUT_STREAM_LEVEL_PREFIX: + r = parse_boolean(p); + if (r < 0) + return log_warning_errno(r, "Failed to parse level prefix line: %m"); + + s->level_prefix = r; + s->state = STDOUT_STREAM_FORWARD_TO_SYSLOG; + return 0; + + case STDOUT_STREAM_FORWARD_TO_SYSLOG: + r = parse_boolean(p); + if (r < 0) + return log_warning_errno(r, "Failed to parse forward to syslog line: %m"); + + s->forward_to_syslog = r; + s->state = STDOUT_STREAM_FORWARD_TO_KMSG; + return 0; + + case STDOUT_STREAM_FORWARD_TO_KMSG: + r = parse_boolean(p); + if (r < 0) + return log_warning_errno(r, "Failed to parse copy to kmsg line: %m"); + + s->forward_to_kmsg = r; + s->state = STDOUT_STREAM_FORWARD_TO_CONSOLE; + return 0; + + case STDOUT_STREAM_FORWARD_TO_CONSOLE: + r = parse_boolean(p); + if (r < 0) + return log_warning_errno(r, "Failed to parse copy to console line."); + + s->forward_to_console = r; + s->state = STDOUT_STREAM_RUNNING; + + /* Try to save the stream, so that journald can be restarted and we can recover */ + (void) stdout_stream_save(s); + return 0; + + case STDOUT_STREAM_RUNNING: + return stdout_stream_log(s, orig, line_break); + } + + assert_not_reached(); +} + +static int stdout_stream_found( + StdoutStream *s, + char *p, + size_t l, + LineBreak line_break) { + + char saved; + int r; + + assert(s); + assert(p); + + /* Let's NUL terminate the specified buffer for this call, and revert back afterwards */ + saved = p[l]; + p[l] = 0; + r = stdout_stream_line(s, p, line_break); + p[l] = saved; + + return r; +} + +static size_t stdout_stream_line_max(StdoutStream *s) { + assert(s); + + /* During the "setup" phase of our protocol, let's ensure we use a line length where a full unit name + * can fit in */ + if (s->state != STDOUT_STREAM_RUNNING) + return STDOUT_STREAM_SETUP_PROTOCOL_LINE_MAX; + + /* After the protocol's "setup" phase is complete, let's use whatever the user configured */ + return s->server->line_max; +} + +static int stdout_stream_scan( + StdoutStream *s, + char *p, + size_t remaining, + LineBreak force_flush, + size_t *ret_consumed) { + + size_t consumed = 0; + int r; + + assert(s); + assert(p); + + + for (;;) { + LineBreak line_break; + size_t skip, found; + char *end1, *end2; + size_t tmp_remaining, line_max; + + line_max = stdout_stream_line_max(s); + tmp_remaining = MIN(remaining, line_max); + + end1 = memchr(p, '\n', tmp_remaining); + end2 = memchr(p, 0, end1 ? (size_t) (end1 - p) : tmp_remaining); + + if (end2) { + /* We found a NUL terminator */ + found = end2 - p; + skip = found + 1; + line_break = LINE_BREAK_NUL; + } else if (end1) { + /* We found a \n terminator */ + found = end1 - p; + skip = found + 1; + line_break = LINE_BREAK_NEWLINE; + } else if (remaining >= line_max) { + /* Force a line break after the maximum line length */ + found = skip = line_max; + line_break = LINE_BREAK_LINE_MAX; + } else + break; + + r = stdout_stream_found(s, p, found, line_break); + if (r < 0) + return r; + + p += skip; + consumed += skip; + remaining -= skip; + } + + if (force_flush >= 0 && remaining > 0) { + r = stdout_stream_found(s, p, remaining, force_flush); + if (r < 0) + return r; + + consumed += remaining; + } + + if (ret_consumed) + *ret_consumed = consumed; + + return 0; +} + +static int stdout_stream_process(sd_event_source *es, int fd, uint32_t revents, void *userdata) { + CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control; + size_t limit, consumed, allocated; + StdoutStream *s = ASSERT_PTR(userdata); + struct ucred *ucred; + struct iovec iovec; + ssize_t l; + char *p; + int r; + + struct msghdr msghdr = { + .msg_iov = &iovec, + .msg_iovlen = 1, + .msg_control = &control, + .msg_controllen = sizeof(control), + }; + + if ((revents|EPOLLIN|EPOLLHUP) != (EPOLLIN|EPOLLHUP)) { + log_error("Got invalid event from epoll for stdout stream: %"PRIx32, revents); + goto terminate; + } + + /* If the buffer is almost full, add room for another 1K */ + allocated = MALLOC_ELEMENTSOF(s->buffer); + if (s->length + 512 >= allocated) { + if (!GREEDY_REALLOC(s->buffer, s->length + 1 + 1024)) { + log_oom(); + goto terminate; + } + + allocated = MALLOC_ELEMENTSOF(s->buffer); + } + + /* Try to make use of the allocated buffer in full, but never read more than the configured line size. Also, + * always leave room for a terminating NUL we might need to add. */ + limit = MIN(allocated - 1, MAX(s->server->line_max, STDOUT_STREAM_SETUP_PROTOCOL_LINE_MAX)); + assert(s->length <= limit); + iovec = IOVEC_MAKE(s->buffer + s->length, limit - s->length); + + l = recvmsg(s->fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC); + if (l < 0) { + if (ERRNO_IS_TRANSIENT(errno)) + return 0; + + log_warning_errno(errno, "Failed to read from stream: %m"); + goto terminate; + } + cmsg_close_all(&msghdr); + + if (l == 0) { + (void) stdout_stream_scan(s, s->buffer, s->length, /* force_flush = */ LINE_BREAK_EOF, NULL); + goto terminate; + } + + /* Invalidate the context if the PID of the sender changed. This happens when a forked process + * inherits stdout/stderr from a parent. In this case getpeercred() returns the ucred of the parent, + * which can be invalid if the parent has exited in the meantime. */ + ucred = CMSG_FIND_DATA(&msghdr, SOL_SOCKET, SCM_CREDENTIALS, struct ucred); + if (ucred && ucred->pid != s->ucred.pid) { + /* Force out any previously half-written lines from a different process, before we switch to + * the new ucred structure for everything we just added */ + r = stdout_stream_scan(s, s->buffer, s->length, /* force_flush = */ LINE_BREAK_PID_CHANGE, NULL); + if (r < 0) + goto terminate; + + s->context = client_context_release(s->server, s->context); + + p = s->buffer + s->length; + } else { + p = s->buffer; + l += s->length; + } + + /* Always copy in the new credentials */ + if (ucred) + s->ucred = *ucred; + + r = stdout_stream_scan(s, p, l, _LINE_BREAK_INVALID, &consumed); + if (r < 0) + goto terminate; + + /* Move what wasn't consumed to the front of the buffer */ + assert(consumed <= (size_t) l); + s->length = l - consumed; + memmove(s->buffer, p + consumed, s->length); + + return 1; + +terminate: + stdout_stream_destroy(s); + return 0; +} + +int stdout_stream_install(Server *s, int fd, StdoutStream **ret) { + _cleanup_(stdout_stream_freep) StdoutStream *stream = NULL; + sd_id128_t id; + int r; + + assert(s); + assert(fd >= 0); + + r = sd_id128_randomize(&id); + if (r < 0) + return log_error_errno(r, "Failed to generate stream ID: %m"); + + stream = new(StdoutStream, 1); + if (!stream) + return log_oom(); + + *stream = (StdoutStream) { + .fd = -1, + .priority = LOG_INFO, + .ucred = UCRED_INVALID, + }; + + xsprintf(stream->id_field, "_STREAM_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(id)); + + r = getpeercred(fd, &stream->ucred); + if (r < 0) + return log_error_errno(r, "Failed to determine peer credentials: %m"); + + r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true); + if (r < 0) + return log_error_errno(r, "SO_PASSCRED failed: %m"); + + if (mac_selinux_use()) { + r = getpeersec(fd, &stream->label); + if (r < 0 && r != -EOPNOTSUPP) + (void) log_warning_errno(r, "Failed to determine peer security context: %m"); + } + + (void) shutdown(fd, SHUT_WR); + + r = sd_event_add_io(s->event, &stream->event_source, fd, EPOLLIN, stdout_stream_process, stream); + if (r < 0) + return log_error_errno(r, "Failed to add stream to event loop: %m"); + + r = sd_event_source_set_priority(stream->event_source, SD_EVENT_PRIORITY_NORMAL+5); + if (r < 0) + return log_error_errno(r, "Failed to adjust stdout event source priority: %m"); + + stream->fd = fd; + + stream->server = s; + LIST_PREPEND(stdout_stream, s->stdout_streams, stream); + s->n_stdout_streams++; + + (void) server_start_or_stop_idle_timer(s); /* Maybe no longer idle? */ + + if (ret) + *ret = stream; + + TAKE_PTR(stream); + return 0; +} + +static int stdout_stream_new(sd_event_source *es, int listen_fd, uint32_t revents, void *userdata) { + _cleanup_close_ int fd = -1; + Server *s = ASSERT_PTR(userdata); + int r; + + if (revents != EPOLLIN) + return log_error_errno(SYNTHETIC_ERRNO(EIO), + "Got invalid event from epoll for stdout server fd: %" PRIx32, + revents); + + fd = accept4(s->stdout_fd, NULL, NULL, SOCK_NONBLOCK|SOCK_CLOEXEC); + if (fd < 0) { + if (ERRNO_IS_ACCEPT_AGAIN(errno)) + return 0; + + return log_error_errno(errno, "Failed to accept stdout connection: %m"); + } + + if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) { + struct ucred u = UCRED_INVALID; + + (void) getpeercred(fd, &u); + + /* By closing fd here we make sure that the client won't wait too long for journald to + * gather all the data it adds to the error message to find out that the connection has + * just been refused. + */ + fd = safe_close(fd); + + server_driver_message(s, u.pid, NULL, LOG_MESSAGE("Too many stdout streams, refusing connection."), NULL); + return 0; + } + + r = stdout_stream_install(s, fd, NULL); + if (r < 0) + return r; + + TAKE_FD(fd); + return 0; +} + +static int stdout_stream_load(StdoutStream *stream, const char *fname) { + _cleanup_free_ char + *priority = NULL, + *level_prefix = NULL, + *forward_to_syslog = NULL, + *forward_to_kmsg = NULL, + *forward_to_console = NULL, + *stream_id = NULL; + int r; + + assert(stream); + assert(fname); + + if (!stream->state_file) { + stream->state_file = path_join(stream->server->runtime_directory, "streams", fname); + if (!stream->state_file) + return log_oom(); + } + + r = parse_env_file(NULL, stream->state_file, + "PRIORITY", &priority, + "LEVEL_PREFIX", &level_prefix, + "FORWARD_TO_SYSLOG", &forward_to_syslog, + "FORWARD_TO_KMSG", &forward_to_kmsg, + "FORWARD_TO_CONSOLE", &forward_to_console, + "IDENTIFIER", &stream->identifier, + "UNIT", &stream->unit_id, + "STREAM_ID", &stream_id); + if (r < 0) + return log_error_errno(r, "Failed to read: %s", stream->state_file); + + if (priority) { + int p; + + p = syslog_parse_priority_and_facility(priority); + if (p >= 0) + stream->priority = p; + } + + if (level_prefix) { + r = parse_boolean(level_prefix); + if (r >= 0) + stream->level_prefix = r; + } + + if (forward_to_syslog) { + r = parse_boolean(forward_to_syslog); + if (r >= 0) + stream->forward_to_syslog = r; + } + + if (forward_to_kmsg) { + r = parse_boolean(forward_to_kmsg); + if (r >= 0) + stream->forward_to_kmsg = r; + } + + if (forward_to_console) { + r = parse_boolean(forward_to_console); + if (r >= 0) + stream->forward_to_console = r; + } + + if (stream_id) { + sd_id128_t id; + + r = sd_id128_from_string(stream_id, &id); + if (r >= 0) + xsprintf(stream->id_field, "_STREAM_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(id)); + } + + return 0; +} + +static int stdout_stream_restore(Server *s, const char *fname, int fd) { + StdoutStream *stream; + int r; + + assert(s); + assert(fname); + assert(fd >= 0); + + if (s->n_stdout_streams >= STDOUT_STREAMS_MAX) { + log_warning("Too many stdout streams, refusing restoring of stream."); + return -ENOBUFS; + } + + r = stdout_stream_install(s, fd, &stream); + if (r < 0) + return r; + + stream->state = STDOUT_STREAM_RUNNING; + stream->fdstore = true; + + /* Ignore all parsing errors */ + (void) stdout_stream_load(stream, fname); + + return 0; +} + +int server_restore_streams(Server *s, FDSet *fds) { + _cleanup_closedir_ DIR *d = NULL; + const char *path; + int r; + + path = strjoina(s->runtime_directory, "/streams"); + d = opendir(path); + if (!d) { + if (errno == ENOENT) + return 0; + + return log_warning_errno(errno, "Failed to enumerate %s: %m", path); + } + + FOREACH_DIRENT(de, d, goto fail) { + unsigned long st_dev, st_ino; + bool found = false; + int fd; + + if (sscanf(de->d_name, "%lu:%lu", &st_dev, &st_ino) != 2) + continue; + + FDSET_FOREACH(fd, fds) { + struct stat st; + + if (fstat(fd, &st) < 0) + return log_error_errno(errno, "Failed to stat %s: %m", de->d_name); + + if (S_ISSOCK(st.st_mode) && st.st_dev == st_dev && st.st_ino == st_ino) { + found = true; + break; + } + } + + if (!found) { + /* No file descriptor? Then let's delete the state file */ + log_debug("Cannot restore stream file %s", de->d_name); + if (unlinkat(dirfd(d), de->d_name, 0) < 0) + log_warning_errno(errno, "Failed to remove %s/%s: %m", path, de->d_name); + continue; + } + + fdset_remove(fds, fd); + + r = stdout_stream_restore(s, de->d_name, fd); + if (r < 0) + safe_close(fd); + } + + return 0; + +fail: + return log_error_errno(errno, "Failed to read streams directory: %m"); +} + +int server_open_stdout_socket(Server *s, const char *stdout_socket) { + int r; + + assert(s); + assert(stdout_socket); + + if (s->stdout_fd < 0) { + union sockaddr_union sa; + socklen_t sa_len; + + r = sockaddr_un_set_path(&sa.un, stdout_socket); + if (r < 0) + return log_error_errno(r, "Unable to use namespace path %s for AF_UNIX socket: %m", stdout_socket); + sa_len = r; + + s->stdout_fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0); + if (s->stdout_fd < 0) + return log_error_errno(errno, "socket() failed: %m"); + + (void) sockaddr_un_unlink(&sa.un); + + r = bind(s->stdout_fd, &sa.sa, sa_len); + if (r < 0) + return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path); + + (void) chmod(sa.un.sun_path, 0666); + + if (listen(s->stdout_fd, SOMAXCONN_DELUXE) < 0) + return log_error_errno(errno, "listen(%s) failed: %m", sa.un.sun_path); + } else + (void) fd_nonblock(s->stdout_fd, true); + + r = sd_event_add_io(s->event, &s->stdout_event_source, s->stdout_fd, EPOLLIN, stdout_stream_new, s); + if (r < 0) + return log_error_errno(r, "Failed to add stdout server fd to event source: %m"); + + r = sd_event_source_set_priority(s->stdout_event_source, SD_EVENT_PRIORITY_NORMAL+5); + if (r < 0) + return log_error_errno(r, "Failed to adjust priority of stdout server event source: %m"); + + return 0; +} + +void stdout_stream_send_notify(StdoutStream *s) { + struct iovec iovec = { + .iov_base = (char*) "FDSTORE=1", + .iov_len = STRLEN("FDSTORE=1"), + }; + struct msghdr msghdr = { + .msg_iov = &iovec, + .msg_iovlen = 1, + }; + struct cmsghdr *cmsg; + ssize_t l; + + assert(s); + assert(!s->fdstore); + assert(s->in_notify_queue); + assert(s->server); + assert(s->server->notify_fd >= 0); + + /* Store the connection fd in PID 1, so that we get it passed + * in again on next start */ + + msghdr.msg_controllen = CMSG_SPACE(sizeof(int)); + msghdr.msg_control = alloca0(msghdr.msg_controllen); + + cmsg = CMSG_FIRSTHDR(&msghdr); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + + memcpy(CMSG_DATA(cmsg), &s->fd, sizeof(int)); + + l = sendmsg(s->server->notify_fd, &msghdr, MSG_DONTWAIT|MSG_NOSIGNAL); + if (l < 0) { + if (errno == EAGAIN) + return; + + log_error_errno(errno, "Failed to send stream file descriptor to service manager: %m"); + } else { + log_debug("Successfully sent stream file descriptor to service manager."); + s->fdstore = 1; + } + + LIST_REMOVE(stdout_stream_notify_queue, s->server->stdout_streams_notify_queue, s); + s->in_notify_queue = false; + +} diff --git a/src/journal/journald-stream.h b/src/journal/journald-stream.h new file mode 100644 index 0000000..7b756c0 --- /dev/null +++ b/src/journal/journald-stream.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +typedef struct StdoutStream StdoutStream; + +#include "fdset.h" +#include "journald-server.h" + +int server_open_stdout_socket(Server *s, const char *stdout_socket); +int server_restore_streams(Server *s, FDSet *fds); + +StdoutStream* stdout_stream_free(StdoutStream *s); +int stdout_stream_install(Server *s, int fd, StdoutStream **ret); +void stdout_stream_destroy(StdoutStream *s); +void stdout_stream_send_notify(StdoutStream *s); diff --git a/src/journal/journald-syslog.c b/src/journal/journald-syslog.c new file mode 100644 index 0000000..ce02378 --- /dev/null +++ b/src/journal/journald-syslog.c @@ -0,0 +1,526 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <stddef.h> +#include <sys/epoll.h> +#include <unistd.h> + +#include "sd-messages.h" + +#include "alloc-util.h" +#include "fd-util.h" +#include "format-util.h" +#include "io-util.h" +#include "journald-console.h" +#include "journald-kmsg.h" +#include "journald-server.h" +#include "journald-syslog.h" +#include "journald-wall.h" +#include "process-util.h" +#include "selinux-util.h" +#include "socket-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "syslog-util.h" + +/* Warn once every 30s if we missed syslog message */ +#define WARN_FORWARD_SYSLOG_MISSED_USEC (30 * USEC_PER_SEC) + +static void forward_syslog_iovec( + Server *s, + const struct iovec *iovec, + unsigned n_iovec, + const struct ucred *ucred, + const struct timeval *tv) { + + union sockaddr_union sa; + + struct msghdr msghdr = { + .msg_iov = (struct iovec *) iovec, + .msg_iovlen = n_iovec, + }; + struct cmsghdr *cmsg; + CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control; + const char *j; + int r; + + assert(s); + assert(iovec); + assert(n_iovec > 0); + + j = strjoina(s->runtime_directory, "/syslog"); + r = sockaddr_un_set_path(&sa.un, j); + if (r < 0) { + log_debug_errno(r, "Forwarding socket path %s too long for AF_UNIX, not forwarding: %m", j); + return; + } + + msghdr.msg_name = &sa.sa; + msghdr.msg_namelen = r; + + if (ucred) { + zero(control); + msghdr.msg_control = &control; + msghdr.msg_controllen = sizeof(control); + + cmsg = CMSG_FIRSTHDR(&msghdr); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_CREDENTIALS; + cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred)); + memcpy(CMSG_DATA(cmsg), ucred, sizeof(struct ucred)); + msghdr.msg_controllen = cmsg->cmsg_len; + } + + /* Forward the syslog message we received via /dev/log to /run/systemd/syslog. Unfortunately we + * currently can't set the SO_TIMESTAMP auxiliary data, and hence we don't. */ + + if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0) + return; + + /* The socket is full? I guess the syslog implementation is + * too slow, and we shouldn't wait for that... */ + if (errno == EAGAIN) { + s->n_forward_syslog_missed++; + return; + } + + if (ucred && IN_SET(errno, ESRCH, EPERM)) { + struct ucred u; + + /* Hmm, presumably the sender process vanished + * by now, or we don't have CAP_SYS_AMDIN, so + * let's fix it as good as we can, and retry */ + + u = *ucred; + u.pid = getpid_cached(); + memcpy(CMSG_DATA(cmsg), &u, sizeof(struct ucred)); + + if (sendmsg(s->syslog_fd, &msghdr, MSG_NOSIGNAL) >= 0) + return; + + if (errno == EAGAIN) { + s->n_forward_syslog_missed++; + return; + } + } + + if (errno != ENOENT) + log_debug_errno(errno, "Failed to forward syslog message: %m"); +} + +static void forward_syslog_raw(Server *s, int priority, const char *buffer, size_t buffer_len, const struct ucred *ucred, const struct timeval *tv) { + struct iovec iovec; + + assert(s); + assert(buffer); + + if (LOG_PRI(priority) > s->max_level_syslog) + return; + + iovec = IOVEC_MAKE((char *) buffer, buffer_len); + forward_syslog_iovec(s, &iovec, 1, ucred, tv); +} + +void server_forward_syslog(Server *s, int priority, const char *identifier, const char *message, const struct ucred *ucred, const struct timeval *tv) { + struct iovec iovec[5]; + char header_priority[DECIMAL_STR_MAX(priority) + 3], header_time[64], + header_pid[STRLEN("[]: ") + DECIMAL_STR_MAX(pid_t) + 1]; + int n = 0; + time_t t; + struct tm tm; + _cleanup_free_ char *ident_buf = NULL; + + assert(s); + assert(priority >= 0); + assert(priority <= 999); + assert(message); + + if (LOG_PRI(priority) > s->max_level_syslog) + return; + + /* First: priority field */ + xsprintf(header_priority, "<%i>", priority); + iovec[n++] = IOVEC_MAKE_STRING(header_priority); + + /* Second: timestamp */ + t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC)); + if (!localtime_r(&t, &tm)) + return; + if (strftime(header_time, sizeof(header_time), "%h %e %T ", &tm) <= 0) + return; + iovec[n++] = IOVEC_MAKE_STRING(header_time); + + /* Third: identifier and PID */ + if (ucred) { + if (!identifier) { + (void) get_process_comm(ucred->pid, &ident_buf); + identifier = ident_buf; + } + + xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid); + + if (identifier) + iovec[n++] = IOVEC_MAKE_STRING(identifier); + + iovec[n++] = IOVEC_MAKE_STRING(header_pid); + } else if (identifier) { + iovec[n++] = IOVEC_MAKE_STRING(identifier); + iovec[n++] = IOVEC_MAKE_STRING(": "); + } + + /* Fourth: message */ + iovec[n++] = IOVEC_MAKE_STRING(message); + + forward_syslog_iovec(s, iovec, n, ucred, tv); +} + +int syslog_fixup_facility(int priority) { + + if ((priority & LOG_FACMASK) == 0) + return (priority & LOG_PRIMASK) | LOG_USER; + + return priority; +} + +size_t syslog_parse_identifier(const char **buf, char **identifier, char **pid) { + const char *p; + char *t; + size_t l, e; + + assert(buf); + assert(identifier); + assert(pid); + + p = *buf; + + p += strspn(p, WHITESPACE); + l = strcspn(p, WHITESPACE); + + if (l <= 0 || + p[l-1] != ':') + return 0; + + e = l; + l--; + + if (l > 0 && p[l-1] == ']') { + size_t k = l-1; + + for (;;) { + + if (p[k] == '[') { + t = strndup(p+k+1, l-k-2); + if (t) + *pid = t; + + l = k; + break; + } + + if (k == 0) + break; + + k--; + } + } + + t = strndup(p, l); + if (t) + *identifier = t; + + /* Single space is used as separator */ + if (p[e] != '\0' && strchr(WHITESPACE, p[e])) + e++; + + l = (p - *buf) + e; + *buf = p + e; + return l; +} + +static int syslog_skip_timestamp(const char **buf) { + enum { + LETTER, + SPACE, + NUMBER, + SPACE_OR_NUMBER, + COLON + } sequence[] = { + LETTER, LETTER, LETTER, + SPACE, + SPACE_OR_NUMBER, NUMBER, + SPACE, + SPACE_OR_NUMBER, NUMBER, + COLON, + SPACE_OR_NUMBER, NUMBER, + COLON, + SPACE_OR_NUMBER, NUMBER, + SPACE + }; + + const char *p, *t; + unsigned i; + + assert(buf); + assert(*buf); + + for (i = 0, p = *buf; i < ELEMENTSOF(sequence); i++, p++) { + if (!*p) + return 0; + + switch (sequence[i]) { + + case SPACE: + if (*p != ' ') + return 0; + break; + + case SPACE_OR_NUMBER: + if (*p == ' ') + break; + + _fallthrough_; + case NUMBER: + if (!ascii_isdigit(*p)) + return 0; + + break; + + case LETTER: + if (!ascii_isalpha(*p)) + return 0; + + break; + + case COLON: + if (*p != ':') + return 0; + break; + + } + } + + t = *buf; + *buf = p; + return p - t; +} + +void server_process_syslog_message( + Server *s, + const char *buf, + size_t raw_len, + const struct ucred *ucred, + const struct timeval *tv, + const char *label, + size_t label_len) { + + char *t, syslog_priority[sizeof("PRIORITY=") + DECIMAL_STR_MAX(int)], + syslog_facility[sizeof("SYSLOG_FACILITY=") + DECIMAL_STR_MAX(int)]; + const char *msg, *syslog_ts, *a; + _cleanup_free_ char *identifier = NULL, *pid = NULL, + *dummy = NULL, *msg_msg = NULL, *msg_raw = NULL; + int priority = LOG_USER | LOG_INFO, r; + ClientContext *context = NULL; + struct iovec *iovec; + size_t n = 0, m, i, leading_ws, syslog_ts_len; + bool store_raw; + + assert(s); + assert(buf); + /* The message cannot be empty. */ + assert(raw_len > 0); + /* The buffer NUL-terminated and can be used a string. raw_len is the length + * without the terminating NUL byte, the buffer is actually one bigger. */ + assert(buf[raw_len] == '\0'); + + if (ucred && pid_is_valid(ucred->pid)) { + r = client_context_get(s, ucred->pid, ucred, label, label_len, NULL, &context); + if (r < 0) + log_warning_errno(r, "Failed to retrieve credentials for PID " PID_FMT ", ignoring: %m", ucred->pid); + } + + /* We are creating a copy of the message because we want to forward the original message + verbatim to the legacy syslog implementation */ + for (i = raw_len; i > 0; i--) + if (!strchr(WHITESPACE, buf[i-1])) + break; + + leading_ws = strspn(buf, WHITESPACE); + + if (i == 0) + /* The message contains only whitespaces */ + msg = buf + raw_len; + else if (i == raw_len) + /* Nice! No need to strip anything on the end, let's optimize this a bit */ + msg = buf + leading_ws; + else { + msg = dummy = new(char, i - leading_ws + 1); + if (!dummy) { + log_oom(); + return; + } + + memcpy(dummy, buf + leading_ws, i - leading_ws); + dummy[i - leading_ws] = 0; + } + + /* We will add the SYSLOG_RAW= field when we stripped anything + * _or_ if the input message contained NUL bytes. */ + store_raw = msg != buf || strlen(msg) != raw_len; + + syslog_parse_priority(&msg, &priority, true); + + if (!client_context_test_priority(context, priority)) + return; + + syslog_ts = msg; + syslog_ts_len = syslog_skip_timestamp(&msg); + if (syslog_ts_len == 0) + /* We failed to parse the full timestamp, store the raw message too */ + store_raw = true; + + syslog_parse_identifier(&msg, &identifier, &pid); + + if (s->forward_to_syslog) + forward_syslog_raw(s, priority, buf, raw_len, ucred, tv); + + if (s->forward_to_kmsg) + server_forward_kmsg(s, priority, identifier, msg, ucred); + + if (s->forward_to_console) + server_forward_console(s, priority, identifier, msg, ucred); + + if (s->forward_to_wall) + server_forward_wall(s, priority, identifier, msg, ucred); + + m = N_IOVEC_META_FIELDS + 8 + client_context_extra_fields_n_iovec(context); + iovec = newa(struct iovec, m); + + iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=syslog"); + + xsprintf(syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK); + iovec[n++] = IOVEC_MAKE_STRING(syslog_priority); + + if (priority & LOG_FACMASK) { + xsprintf(syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)); + iovec[n++] = IOVEC_MAKE_STRING(syslog_facility); + } + + if (identifier) { + a = strjoina("SYSLOG_IDENTIFIER=", identifier); + iovec[n++] = IOVEC_MAKE_STRING(a); + } + + if (pid) { + a = strjoina("SYSLOG_PID=", pid); + iovec[n++] = IOVEC_MAKE_STRING(a); + } + + if (syslog_ts_len > 0) { + const size_t hlen = STRLEN("SYSLOG_TIMESTAMP="); + + t = newa(char, hlen + syslog_ts_len); + memcpy(t, "SYSLOG_TIMESTAMP=", hlen); + memcpy(t + hlen, syslog_ts, syslog_ts_len); + + iovec[n++] = IOVEC_MAKE(t, hlen + syslog_ts_len); + } + + msg_msg = strjoin("MESSAGE=", msg); + if (!msg_msg) { + log_oom(); + return; + } + iovec[n++] = IOVEC_MAKE_STRING(msg_msg); + + if (store_raw) { + const size_t hlen = STRLEN("SYSLOG_RAW="); + + msg_raw = new(char, hlen + raw_len); + if (!msg_raw) { + log_oom(); + return; + } + + memcpy(msg_raw, "SYSLOG_RAW=", hlen); + memcpy(msg_raw + hlen, buf, raw_len); + + iovec[n++] = IOVEC_MAKE(msg_raw, hlen + raw_len); + } + + server_dispatch_message(s, iovec, n, m, context, tv, priority, 0); +} + +int server_open_syslog_socket(Server *s, const char *syslog_socket) { + int r; + + assert(s); + assert(syslog_socket); + + if (s->syslog_fd < 0) { + union sockaddr_union sa; + socklen_t sa_len; + + r = sockaddr_un_set_path(&sa.un, syslog_socket); + if (r < 0) + return log_error_errno(r, "Unable to use namespace path %s for AF_UNIX socket: %m", syslog_socket); + sa_len = r; + + s->syslog_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0); + if (s->syslog_fd < 0) + return log_error_errno(errno, "socket() failed: %m"); + + (void) sockaddr_un_unlink(&sa.un); + + r = bind(s->syslog_fd, &sa.sa, sa_len); + if (r < 0) + return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path); + + (void) chmod(sa.un.sun_path, 0666); + } else + (void) fd_nonblock(s->syslog_fd, true); + + r = setsockopt_int(s->syslog_fd, SOL_SOCKET, SO_PASSCRED, true); + if (r < 0) + return log_error_errno(r, "SO_PASSCRED failed: %m"); + + if (mac_selinux_use()) { + r = setsockopt_int(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, true); + if (r < 0) + log_warning_errno(r, "SO_PASSSEC failed: %m"); + } + + r = setsockopt_int(s->syslog_fd, SOL_SOCKET, SO_TIMESTAMP, true); + if (r < 0) + return log_error_errno(r, "SO_TIMESTAMP failed: %m"); + + r = sd_event_add_io(s->event, &s->syslog_event_source, s->syslog_fd, EPOLLIN, server_process_datagram, s); + if (r < 0) + return log_error_errno(r, "Failed to add syslog server fd to event loop: %m"); + + r = sd_event_source_set_priority(s->syslog_event_source, SD_EVENT_PRIORITY_NORMAL+5); + if (r < 0) + return log_error_errno(r, "Failed to adjust syslog event source priority: %m"); + + return 0; +} + +void server_maybe_warn_forward_syslog_missed(Server *s) { + usec_t n; + + assert(s); + + if (s->n_forward_syslog_missed <= 0) + return; + + n = now(CLOCK_MONOTONIC); + if (s->last_warn_forward_syslog_missed + WARN_FORWARD_SYSLOG_MISSED_USEC > n) + return; + + server_driver_message(s, 0, + "MESSAGE_ID=" SD_MESSAGE_FORWARD_SYSLOG_MISSED_STR, + LOG_MESSAGE("Forwarding to syslog missed %u messages.", + s->n_forward_syslog_missed), + NULL); + + s->n_forward_syslog_missed = 0; + s->last_warn_forward_syslog_missed = n; +} diff --git a/src/journal/journald-syslog.h b/src/journal/journald-syslog.h new file mode 100644 index 0000000..3bc3ffd --- /dev/null +++ b/src/journal/journald-syslog.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "journald-server.h" + +int syslog_fixup_facility(int priority) _const_; + +size_t syslog_parse_identifier(const char **buf, char **identifier, char **pid); + +void server_forward_syslog(Server *s, int priority, const char *identifier, const char *message, const struct ucred *ucred, const struct timeval *tv); + +void server_process_syslog_message(Server *s, const char *buf, size_t buf_len, const struct ucred *ucred, const struct timeval *tv, const char *label, size_t label_len); +int server_open_syslog_socket(Server *s, const char *syslog_socket); + +void server_maybe_warn_forward_syslog_missed(Server *s); diff --git a/src/journal/journald-wall.c b/src/journal/journald-wall.c new file mode 100644 index 0000000..21ec5a7 --- /dev/null +++ b/src/journal/journald-wall.c @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "alloc-util.h" +#include "format-util.h" +#include "journald-server.h" +#include "journald-wall.h" +#include "process-util.h" +#include "string-util.h" +#include "utmp-wtmp.h" + +void server_forward_wall( + Server *s, + int priority, + const char *identifier, + const char *message, + const struct ucred *ucred) { + + _cleanup_free_ char *ident_buf = NULL, *l_buf = NULL; + const char *l; + int r; + + assert(s); + assert(message); + + if (LOG_PRI(priority) > s->max_level_wall) + return; + + if (ucred) { + if (!identifier) { + (void) get_process_comm(ucred->pid, &ident_buf); + identifier = ident_buf; + } + + if (asprintf(&l_buf, "%s["PID_FMT"]: %s", strempty(identifier), ucred->pid, message) < 0) { + log_oom(); + return; + } + + l = l_buf; + + } else if (identifier) { + + l = l_buf = strjoin(identifier, ": ", message); + if (!l_buf) { + log_oom(); + return; + } + } else + l = message; + + r = utmp_wall(l, "systemd-journald", NULL, NULL, NULL); + if (r < 0) + log_debug_errno(r, "Failed to send wall message: %m"); +} diff --git a/src/journal/journald-wall.h b/src/journal/journald-wall.h new file mode 100644 index 0000000..3f98c35 --- /dev/null +++ b/src/journal/journald-wall.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include <sys/socket.h> + +#include "journald-server.h" + +void server_forward_wall(Server *s, int priority, const char *identifier, const char *message, const struct ucred *ucred); diff --git a/src/journal/journald.c b/src/journal/journald.c new file mode 100644 index 0000000..3d40442 --- /dev/null +++ b/src/journal/journald.c @@ -0,0 +1,138 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <unistd.h> + +#include "sd-daemon.h" +#include "sd-messages.h" + +#include "format-util.h" +#include "journal-authenticate.h" +#include "journald-kmsg.h" +#include "journald-server.h" +#include "journald-syslog.h" +#include "process-util.h" +#include "sigbus.h" + +int main(int argc, char *argv[]) { + const char *namespace; + LogTarget log_target; + Server server; + int r; + + if (argc > 2) { + log_error("This program takes one or no arguments."); + return EXIT_FAILURE; + } + + namespace = argc > 1 ? empty_to_null(argv[1]) : NULL; + + log_set_facility(LOG_SYSLOG); + + if (namespace) + /* If we run for a log namespace, then we ourselves can log to the main journald. */ + log_setup(); + else { + /* So here's the deal if we run as the main journald: we can't be considered as regular + * daemon when it comes to logging hence LOG_TARGET_AUTO won't do the right thing for + * us. Hence explicitly log to the console if we're started from a console or to kmsg + * otherwise. */ + log_target = isatty(STDERR_FILENO) > 0 ? LOG_TARGET_CONSOLE : LOG_TARGET_KMSG; + + log_set_prohibit_ipc(true); /* better safe than sorry */ + log_set_target(log_target); + log_parse_environment(); + log_open(); + } + + umask(0022); + + sigbus_install(); + + r = server_init(&server, namespace); + if (r < 0) + goto finish; + + server_vacuum(&server, false); + server_flush_to_var(&server, true); + server_flush_dev_kmsg(&server); + + if (server.namespace) + log_debug("systemd-journald running as PID "PID_FMT" for namespace '%s'.", getpid_cached(), server.namespace); + else + log_debug("systemd-journald running as PID "PID_FMT" for the system.", getpid_cached()); + + server_driver_message(&server, 0, + "MESSAGE_ID=" SD_MESSAGE_JOURNAL_START_STR, + LOG_MESSAGE("Journal started"), + NULL); + + /* Make sure to send the usage message *after* flushing the + * journal so entries from the runtime journals are ordered + * before this message. See #4190 for some details. */ + server_space_usage_message(&server, NULL); + + for (;;) { + usec_t t = USEC_INFINITY, n; + + r = sd_event_get_state(server.event); + if (r < 0) { + log_error_errno(r, "Failed to get event loop state: %m"); + goto finish; + } + if (r == SD_EVENT_FINISHED) + break; + + n = now(CLOCK_REALTIME); + + if (server.max_retention_usec > 0 && server.oldest_file_usec > 0) { + + /* The retention time is reached, so let's vacuum! */ + if (server.oldest_file_usec + server.max_retention_usec < n) { + log_info("Retention time reached, rotating."); + server_rotate(&server); + server_vacuum(&server, false); + continue; + } + + /* Calculate when to rotate the next time */ + t = server.oldest_file_usec + server.max_retention_usec - n; + } + +#if HAVE_GCRYPT + if (server.system_journal) { + usec_t u; + + if (journal_file_next_evolve_usec(server.system_journal->file, &u)) { + if (n >= u) + t = 0; + else + t = MIN(t, u - n); + } + } +#endif + + r = sd_event_run(server.event, t); + if (r < 0) { + log_error_errno(r, "Failed to run event loop: %m"); + goto finish; + } + + server_maybe_append_tags(&server); + server_maybe_warn_forward_syslog_missed(&server); + } + + if (server.namespace) + log_debug("systemd-journald stopped as PID "PID_FMT" for namespace '%s'.", getpid_cached(), server.namespace); + else + log_debug("systemd-journald stopped as PID "PID_FMT" for the system.", getpid_cached()); + + server_driver_message(&server, 0, + "MESSAGE_ID=" SD_MESSAGE_JOURNAL_STOP_STR, + LOG_MESSAGE("Journal stopped"), + NULL); + +finish: + server_done(&server); + + return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/src/journal/journald.conf b/src/journal/journald.conf new file mode 100644 index 0000000..5a60a9d --- /dev/null +++ b/src/journal/journald.conf @@ -0,0 +1,47 @@ +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it under the +# terms of the GNU Lesser General Public License as published by the Free +# Software Foundation; either version 2.1 of the License, or (at your option) +# any later version. +# +# Entries in this file show the compile time defaults. Local configuration +# should be created by either modifying this file, or by creating "drop-ins" in +# the journald.conf.d/ subdirectory. The latter is generally recommended. +# Defaults can be restored by simply deleting this file and all drop-ins. +# +# Use 'systemd-analyze cat-config systemd/journald.conf' to display the full config. +# +# See journald.conf(5) for details. + +[Journal] +#Storage=auto +#Compress=yes +#Seal=yes +#SplitMode=uid +#SyncIntervalSec=5m +#RateLimitIntervalSec=30s +#RateLimitBurst=10000 +#SystemMaxUse= +#SystemKeepFree= +#SystemMaxFileSize= +#SystemMaxFiles=100 +#RuntimeMaxUse= +#RuntimeKeepFree= +#RuntimeMaxFileSize= +#RuntimeMaxFiles=100 +#MaxRetentionSec= +#MaxFileSec=1month +#ForwardToSyslog=no +#ForwardToKMsg=no +#ForwardToConsole=no +#ForwardToWall=yes +#TTYPath=/dev/console +#MaxLevelStore=debug +#MaxLevelSyslog=debug +#MaxLevelKMsg=notice +#MaxLevelConsole=info +#MaxLevelWall=emerg +#LineMax=48K +#ReadKMsg=yes +#Audit=yes diff --git a/src/journal/managed-journal-file.c b/src/journal/managed-journal-file.c new file mode 100644 index 0000000..8101677 --- /dev/null +++ b/src/journal/managed-journal-file.c @@ -0,0 +1,561 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <pthread.h> +#include <unistd.h> + +#include "chattr-util.h" +#include "copy.h" +#include "errno-util.h" +#include "fd-util.h" +#include "format-util.h" +#include "journal-authenticate.h" +#include "managed-journal-file.h" +#include "path-util.h" +#include "random-util.h" +#include "set.h" +#include "stat-util.h" +#include "sync-util.h" + +#define PAYLOAD_BUFFER_SIZE (16U * 1024U) +#define MINIMUM_HOLE_SIZE (1U * 1024U * 1024U / 2U) + +static int managed_journal_file_truncate(JournalFile *f) { + uint64_t p; + int r; + + /* truncate excess from the end of archives */ + r = journal_file_tail_end_by_pread(f, &p); + if (r < 0) + return log_debug_errno(r, "Failed to determine end of tail object: %m"); + + /* arena_size can't exceed the file size, ensure it's updated before truncating */ + f->header->arena_size = htole64(p - le64toh(f->header->header_size)); + + if (ftruncate(f->fd, p) < 0) + return log_debug_errno(errno, "Failed to truncate %s: %m", f->path); + + return journal_file_fstat(f); +} + +static int managed_journal_file_entry_array_punch_hole(JournalFile *f, uint64_t p, uint64_t n_entries) { + Object o; + uint64_t offset, sz, n_items = 0, n_unused; + int r; + + if (n_entries == 0) + return 0; + + for (uint64_t q = p; q != 0; q = le64toh(o.entry_array.next_entry_array_offset)) { + r = journal_file_read_object_header(f, OBJECT_ENTRY_ARRAY, q, &o); + if (r < 0) + return r; + + n_items += journal_file_entry_array_n_items(f, &o); + p = q; + } + + if (p == 0) + return 0; + + if (n_entries > n_items) + return -EBADMSG; + + /* Amount of unused items in the final entry array. */ + n_unused = n_items - n_entries; + + if (n_unused == 0) + return 0; + + offset = p + offsetof(Object, entry_array.items) + + (journal_file_entry_array_n_items(f, &o) - n_unused) * journal_file_entry_array_item_size(f); + sz = p + le64toh(o.object.size) - offset; + + if (sz < MINIMUM_HOLE_SIZE) + return 0; + + if (p == le64toh(f->header->tail_object_offset) && !JOURNAL_HEADER_SEALED(f->header)) { + ssize_t n; + + o.object.size = htole64(offset - p); + + n = pwrite(f->fd, &o, sizeof(EntryArrayObject), p); + if (n < 0) + return log_debug_errno(errno, "Failed to modify entry array object size: %m"); + if ((size_t) n != sizeof(EntryArrayObject)) + return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Short pwrite() while modifying entry array object size."); + + f->header->arena_size = htole64(ALIGN64(offset) - le64toh(f->header->header_size)); + + if (ftruncate(f->fd, ALIGN64(offset)) < 0) + return log_debug_errno(errno, "Failed to truncate %s: %m", f->path); + + return 0; + } + + if (fallocate(f->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, sz) < 0) { + if (ERRNO_IS_NOT_SUPPORTED(errno)) + return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), /* Make recognizable */ + "Hole punching not supported by backing file system, skipping."); + + return log_debug_errno(errno, "Failed to punch hole in entry array of %s: %m", f->path); + } + + return 0; +} + +static int managed_journal_file_punch_holes(JournalFile *f) { + HashItem items[PAYLOAD_BUFFER_SIZE / sizeof(HashItem)]; + uint64_t p, sz; + ssize_t n = SSIZE_MAX; + int r; + + r = managed_journal_file_entry_array_punch_hole( + f, le64toh(f->header->entry_array_offset), le64toh(f->header->n_entries)); + if (r < 0) + return r; + + p = le64toh(f->header->data_hash_table_offset); + sz = le64toh(f->header->data_hash_table_size); + + for (uint64_t i = p; i < p + sz && n > 0; i += n) { + size_t m = MIN(sizeof(items), p + sz - i); + n = pread(f->fd, items, m, i); + if (n < 0) + return log_debug_errno(errno, "Failed to read hash table items: %m"); + + /* Let's ignore any partial hash items by rounding down to the nearest multiple of HashItem. */ + n -= n % sizeof(HashItem); + + for (size_t j = 0; j < (size_t) n / sizeof(HashItem); j++) { + Object o; + + for (uint64_t q = le64toh(items[j].head_hash_offset); q != 0; + q = le64toh(o.data.next_hash_offset)) { + + r = journal_file_read_object_header(f, OBJECT_DATA, q, &o); + if (r < 0) { + log_debug_errno(r, "Invalid data object: %m, ignoring"); + break; + } + + if (le64toh(o.data.n_entries) == 0) + continue; + + r = managed_journal_file_entry_array_punch_hole( + f, le64toh(o.data.entry_array_offset), le64toh(o.data.n_entries) - 1); + if (r == -EOPNOTSUPP) + return -EOPNOTSUPP; + + /* Ignore other errors */ + } + } + } + + return 0; +} + +/* This may be called from a separate thread to prevent blocking the caller for the duration of fsync(). + * As a result we use atomic operations on f->offline_state for inter-thread communications with + * journal_file_set_offline() and journal_file_set_online(). */ +static void managed_journal_file_set_offline_internal(ManagedJournalFile *f) { + int r; + + assert(f); + assert(f->file->fd >= 0); + assert(f->file->header); + + for (;;) { + switch (f->file->offline_state) { + case OFFLINE_CANCEL: { + OfflineState tmp_state = OFFLINE_CANCEL; + if (!__atomic_compare_exchange_n(&f->file->offline_state, &tmp_state, OFFLINE_DONE, + false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) + continue; + } + return; + + case OFFLINE_AGAIN_FROM_SYNCING: { + OfflineState tmp_state = OFFLINE_AGAIN_FROM_SYNCING; + if (!__atomic_compare_exchange_n(&f->file->offline_state, &tmp_state, OFFLINE_SYNCING, + false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) + continue; + } + break; + + case OFFLINE_AGAIN_FROM_OFFLINING: { + OfflineState tmp_state = OFFLINE_AGAIN_FROM_OFFLINING; + if (!__atomic_compare_exchange_n(&f->file->offline_state, &tmp_state, OFFLINE_SYNCING, + false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) + continue; + } + break; + + case OFFLINE_SYNCING: + if (f->file->archive) { + (void) managed_journal_file_truncate(f->file); + (void) managed_journal_file_punch_holes(f->file); + } + + (void) fsync(f->file->fd); + + { + OfflineState tmp_state = OFFLINE_SYNCING; + if (!__atomic_compare_exchange_n(&f->file->offline_state, &tmp_state, OFFLINE_OFFLINING, + false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) + continue; + } + + f->file->header->state = f->file->archive ? STATE_ARCHIVED : STATE_OFFLINE; + (void) fsync(f->file->fd); + + /* If we've archived the journal file, first try to re-enable COW on the file. If the + * FS_NOCOW_FL flag was never set or we successfully removed it, continue. If we fail + * to remove the flag on the archived file, rewrite the file without the NOCOW flag. + * We need this fallback because on some filesystems (BTRFS), the NOCOW flag cannot + * be removed after data has been written to a file. The only way to remove it is to + * copy all data to a new file without the NOCOW flag set. */ + + if (f->file->archive) { + r = chattr_fd(f->file->fd, 0, FS_NOCOW_FL, NULL); + if (r >= 0) + continue; + + log_debug_errno(r, "Failed to re-enable copy-on-write for %s: %m, rewriting file", f->file->path); + + r = copy_file_atomic(FORMAT_PROC_FD_PATH(f->file->fd), f->file->path, f->file->mode, + 0, + FS_NOCOW_FL, + COPY_REPLACE | COPY_FSYNC | COPY_HOLES | COPY_ALL_XATTRS); + if (r < 0) { + log_debug_errno(r, "Failed to rewrite %s: %m", f->file->path); + continue; + } + } + + break; + + case OFFLINE_OFFLINING: { + OfflineState tmp_state = OFFLINE_OFFLINING; + if (!__atomic_compare_exchange_n(&f->file->offline_state, &tmp_state, OFFLINE_DONE, + false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) + continue; + } + _fallthrough_; + case OFFLINE_DONE: + return; + + case OFFLINE_JOINED: + log_debug("OFFLINE_JOINED unexpected offline state for journal_file_set_offline_internal()"); + return; + } + } +} + +static void * managed_journal_file_set_offline_thread(void *arg) { + ManagedJournalFile *f = arg; + + (void) pthread_setname_np(pthread_self(), "journal-offline"); + + managed_journal_file_set_offline_internal(f); + + return NULL; +} + +/* Trigger a restart if the offline thread is mid-flight in a restartable state. */ +static bool managed_journal_file_set_offline_try_restart(ManagedJournalFile *f) { + for (;;) { + switch (f->file->offline_state) { + case OFFLINE_AGAIN_FROM_SYNCING: + case OFFLINE_AGAIN_FROM_OFFLINING: + return true; + + case OFFLINE_CANCEL: { + OfflineState tmp_state = OFFLINE_CANCEL; + if (!__atomic_compare_exchange_n(&f->file->offline_state, &tmp_state, OFFLINE_AGAIN_FROM_SYNCING, + false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) + continue; + } + return true; + + case OFFLINE_SYNCING: { + OfflineState tmp_state = OFFLINE_SYNCING; + if (!__atomic_compare_exchange_n(&f->file->offline_state, &tmp_state, OFFLINE_AGAIN_FROM_SYNCING, + false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) + continue; + } + return true; + + case OFFLINE_OFFLINING: { + OfflineState tmp_state = OFFLINE_OFFLINING; + if (!__atomic_compare_exchange_n(&f->file->offline_state, &tmp_state, OFFLINE_AGAIN_FROM_OFFLINING, + false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) + continue; + } + return true; + + default: + return false; + } + } +} + +/* Sets a journal offline. + * + * If wait is false then an offline is dispatched in a separate thread for a + * subsequent journal_file_set_offline() or journal_file_set_online() of the + * same journal to synchronize with. + * + * If wait is true, then either an existing offline thread will be restarted + * and joined, or if none exists the offline is simply performed in this + * context without involving another thread. + */ +int managed_journal_file_set_offline(ManagedJournalFile *f, bool wait) { + int target_state; + bool restarted; + int r; + + assert(f); + + if (!journal_file_writable(f->file)) + return -EPERM; + + if (f->file->fd < 0 || !f->file->header) + return -EINVAL; + + target_state = f->file->archive ? STATE_ARCHIVED : STATE_OFFLINE; + + /* An offlining journal is implicitly online and may modify f->header->state, + * we must also join any potentially lingering offline thread when already in + * the desired offline state. + */ + if (!managed_journal_file_is_offlining(f) && f->file->header->state == target_state) + return journal_file_set_offline_thread_join(f->file); + + /* Restart an in-flight offline thread and wait if needed, or join a lingering done one. */ + restarted = managed_journal_file_set_offline_try_restart(f); + if ((restarted && wait) || !restarted) { + r = journal_file_set_offline_thread_join(f->file); + if (r < 0) + return r; + } + + if (restarted) + return 0; + + /* Initiate a new offline. */ + f->file->offline_state = OFFLINE_SYNCING; + + if (wait) /* Without using a thread if waiting. */ + managed_journal_file_set_offline_internal(f); + else { + sigset_t ss, saved_ss; + int k; + + assert_se(sigfillset(&ss) >= 0); + /* Don't block SIGBUS since the offlining thread accesses a memory mapped file. + * Asynchronous SIGBUS signals can safely be handled by either thread. */ + assert_se(sigdelset(&ss, SIGBUS) >= 0); + + r = pthread_sigmask(SIG_BLOCK, &ss, &saved_ss); + if (r > 0) + return -r; + + r = pthread_create(&f->file->offline_thread, NULL, managed_journal_file_set_offline_thread, f); + + k = pthread_sigmask(SIG_SETMASK, &saved_ss, NULL); + if (r > 0) { + f->file->offline_state = OFFLINE_JOINED; + return -r; + } + if (k > 0) + return -k; + } + + return 0; +} + +bool managed_journal_file_is_offlining(ManagedJournalFile *f) { + assert(f); + + __atomic_thread_fence(__ATOMIC_SEQ_CST); + + if (IN_SET(f->file->offline_state, OFFLINE_DONE, OFFLINE_JOINED)) + return false; + + return true; +} + +ManagedJournalFile* managed_journal_file_close(ManagedJournalFile *f) { + if (!f) + return NULL; + +#if HAVE_GCRYPT + /* Write the final tag */ + if (JOURNAL_HEADER_SEALED(f->file->header) && journal_file_writable(f->file)) { + int r; + + r = journal_file_append_tag(f->file); + if (r < 0) + log_error_errno(r, "Failed to append tag when closing journal: %m"); + } +#endif + + if (sd_event_source_get_enabled(f->file->post_change_timer, NULL) > 0) + journal_file_post_change(f->file); + sd_event_source_disable_unref(f->file->post_change_timer); + + managed_journal_file_set_offline(f, true); + + journal_file_close(f->file); + + return mfree(f); +} + +int managed_journal_file_open( + int fd, + const char *fname, + int open_flags, + JournalFileFlags file_flags, + mode_t mode, + uint64_t compress_threshold_bytes, + JournalMetrics *metrics, + MMapCache *mmap_cache, + Set *deferred_closes, + ManagedJournalFile *template, + ManagedJournalFile **ret) { + _cleanup_free_ ManagedJournalFile *f = NULL; + int r; + + set_clear_with_destructor(deferred_closes, managed_journal_file_close); + + f = new0(ManagedJournalFile, 1); + if (!f) + return -ENOMEM; + + r = journal_file_open(fd, fname, open_flags, file_flags, mode, compress_threshold_bytes, metrics, + mmap_cache, template ? template->file : NULL, &f->file); + if (r < 0) + return r; + + *ret = TAKE_PTR(f); + + return 0; +} + + +ManagedJournalFile* managed_journal_file_initiate_close(ManagedJournalFile *f, Set *deferred_closes) { + int r; + + assert(f); + + if (deferred_closes) { + r = set_put(deferred_closes, f); + if (r < 0) + log_debug_errno(r, "Failed to add file to deferred close set, closing immediately."); + else { + (void) managed_journal_file_set_offline(f, false); + return NULL; + } + } + + return managed_journal_file_close(f); +} + +int managed_journal_file_rotate( + ManagedJournalFile **f, + MMapCache *mmap_cache, + JournalFileFlags file_flags, + uint64_t compress_threshold_bytes, + Set *deferred_closes) { + + _cleanup_free_ char *path = NULL; + ManagedJournalFile *new_file = NULL; + int r; + + assert(f); + assert(*f); + + r = journal_file_archive((*f)->file, &path); + if (r < 0) + return r; + + r = managed_journal_file_open( + -1, + path, + (*f)->file->open_flags, + file_flags, + (*f)->file->mode, + compress_threshold_bytes, + NULL, /* metrics */ + mmap_cache, + deferred_closes, + *f, /* template */ + &new_file); + + managed_journal_file_initiate_close(*f, deferred_closes); + *f = new_file; + + return r; +} + +int managed_journal_file_open_reliably( + const char *fname, + int open_flags, + JournalFileFlags file_flags, + mode_t mode, + uint64_t compress_threshold_bytes, + JournalMetrics *metrics, + MMapCache *mmap_cache, + Set *deferred_closes, + ManagedJournalFile *template, + ManagedJournalFile **ret) { + + _cleanup_(managed_journal_file_closep) ManagedJournalFile *old_file = NULL; + int r; + + r = managed_journal_file_open(-1, fname, open_flags, file_flags, mode, compress_threshold_bytes, metrics, + mmap_cache, deferred_closes, template, ret); + if (!IN_SET(r, + -EBADMSG, /* Corrupted */ + -ENODATA, /* Truncated */ + -EHOSTDOWN, /* Other machine */ + -EPROTONOSUPPORT, /* Incompatible feature */ + -EBUSY, /* Unclean shutdown */ + -ESHUTDOWN, /* Already archived */ + -EIO, /* IO error, including SIGBUS on mmap */ + -EIDRM, /* File has been deleted */ + -ETXTBSY)) /* File is from the future */ + return r; + + if ((open_flags & O_ACCMODE) == O_RDONLY) + return r; + + if (!(open_flags & O_CREAT)) + return r; + + if (!endswith(fname, ".journal")) + return r; + + /* The file is corrupted. Rotate it away and try it again (but only once) */ + log_warning_errno(r, "File %s corrupted or uncleanly shut down, renaming and replacing.", fname); + + if (!template) { + /* The file is corrupted and no template is specified. Try opening it read-only as the + * template before rotating to inherit its sequence number and ID. */ + r = managed_journal_file_open(-1, fname, + (open_flags & ~(O_ACCMODE|O_CREAT|O_EXCL)) | O_RDONLY, + file_flags, 0, compress_threshold_bytes, NULL, + mmap_cache, deferred_closes, NULL, &old_file); + if (r < 0) + log_debug_errno(r, "Failed to continue sequence from file %s, ignoring: %m", fname); + else + template = old_file; + } + + r = journal_file_dispose(AT_FDCWD, fname); + if (r < 0) + return r; + + return managed_journal_file_open(-1, fname, open_flags, file_flags, mode, compress_threshold_bytes, metrics, + mmap_cache, deferred_closes, template, ret); +} diff --git a/src/journal/managed-journal-file.h b/src/journal/managed-journal-file.h new file mode 100644 index 0000000..0ac69a7 --- /dev/null +++ b/src/journal/managed-journal-file.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +#include "journal-file.h" + +typedef struct { + JournalFile *file; +} ManagedJournalFile; + +int managed_journal_file_open( + int fd, + const char *fname, + int open_flags, + JournalFileFlags file_flags, + mode_t mode, + uint64_t compress_threshold_bytes, + JournalMetrics *metrics, + MMapCache *mmap_cache, + Set *deferred_closes, + ManagedJournalFile *template, + ManagedJournalFile **ret); + +int managed_journal_file_set_offline(ManagedJournalFile *f, bool wait); +bool managed_journal_file_is_offlining(ManagedJournalFile *f); +ManagedJournalFile* managed_journal_file_close(ManagedJournalFile *f); +DEFINE_TRIVIAL_CLEANUP_FUNC(ManagedJournalFile*, managed_journal_file_close); + +int managed_journal_file_open_reliably( + const char *fname, + int open_flags, + JournalFileFlags file_flags, + mode_t mode, + uint64_t compress_threshold_bytes, + JournalMetrics *metrics, + MMapCache *mmap_cache, + Set *deferred_closes, + ManagedJournalFile *template, + ManagedJournalFile **ret); + +ManagedJournalFile* managed_journal_file_initiate_close(ManagedJournalFile *f, Set *deferred_closes); +int managed_journal_file_rotate(ManagedJournalFile **f, MMapCache *mmap_cache, JournalFileFlags file_flags, uint64_t compress_threshold_bytes, Set *deferred_closes); diff --git a/src/journal/meson.build b/src/journal/meson.build new file mode 100644 index 0000000..1e41ea1 --- /dev/null +++ b/src/journal/meson.build @@ -0,0 +1,152 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later + +sources = files( + 'journald-audit.c', + 'journald-audit.h', + 'journald-console.c', + 'journald-console.h', + 'journald-context.c', + 'journald-context.h', + 'journald-kmsg.c', + 'journald-kmsg.h', + 'journald-native.c', + 'journald-native.h', + 'journald-rate-limit.c', + 'journald-rate-limit.h', + 'journald-server.c', + 'journald-server.h', + 'journald-stream.c', + 'journald-stream.h', + 'journald-syslog.c', + 'journald-syslog.h', + 'journald-wall.c', + 'journald-wall.h', + 'managed-journal-file.c', + 'managed-journal-file.h', +) + +sources += custom_target( + 'journald-gperf.c', + input : 'journald-gperf.gperf', + output : 'journald-gperf.c', + command : [gperf, '@INPUT@', '--output-file', '@OUTPUT@']) + +libjournal_core = static_library( + 'journal-core', + sources, + include_directories : includes, + dependencies: threads, + build_by_default : false) + +journal_includes = [includes, include_directories('.')] + +systemd_journald_sources = files( + 'journald.c', + 'journald-server.h', +) + +systemd_cat_sources = files('cat.c') + +journalctl_sources = files('journalctl.c') + +if install_sysconfdir_samples + install_data('journald.conf', + install_dir : pkgsysconfdir) +endif + +if get_option('create-log-dirs') + meson.add_install_script( + 'sh', '-c', + mkdir_p.format('/var/log/journal')) + meson.add_install_script( + 'sh', '-c', + '''chown 0:0 $DESTDIR/var/log/journal && + chmod 755 $DESTDIR/var/log/journal || :''') + if get_option('adm-group') + meson.add_install_script( + 'sh', '-c', + 'setfacl -nm g:adm:rx,d:g:adm:rx $DESTDIR/var/log/journal || :') + endif + if get_option('wheel-group') + meson.add_install_script( + 'sh', '-c', + 'setfacl -nm g:wheel:rx,d:g:wheel:rx $DESTDIR/var/log/journal || :') + endif +endif + +############################################################ + +tests += [ + [files('test-journal-syslog.c'), + [libjournal_core, + libshared], + [threads, + libxz, + liblz4, + libselinux]], + + [files('test-journal-config.c'), + [libjournal_core, + libshared], + [libxz, + liblz4, + libselinux]], + + [files('test-journal.c'), + [libjournal_core, + libshared]], + + [files('test-journal-stream.c'), + [libjournal_core, + libshared]], + + [files('test-journal-flush.c'), + [libjournal_core, + libshared]], + + [files('test-journal-verify.c'), + [libjournal_core, + libshared]], + + [files('test-journal-interleaving.c'), + [libjournal_core, + libshared]], +] + +fuzzers += [ + [files('fuzz-journald-audit.c', + 'fuzz-journald.c'), + [libjournal_core, + libshared], + [libselinux]], + + [files('fuzz-journald-kmsg.c', + 'fuzz-journald.c'), + [libjournal_core, + libshared], + [libselinux]], + + [files('fuzz-journald-native.c', + 'fuzz-journald.c'), + [libjournal_core, + libshared], + [libselinux]], + + [files('fuzz-journald-native-fd.c', + 'fuzz-journald.c'), + [libjournal_core, + libshared], + [libselinux]], + + [files('fuzz-journald-stream.c', + 'fuzz-journald.c'), + [libjournal_core, + libshared], + [libselinux]], + + [files('fuzz-journald-syslog.c', + 'fuzz-journald.c'), + [libjournal_core, + libshared], + [libselinux]], +] diff --git a/src/journal/test-journal-config.c b/src/journal/test-journal-config.c new file mode 100644 index 0000000..1a6c531 --- /dev/null +++ b/src/journal/test-journal-config.c @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <stdbool.h> + +#include "journald-server.h" +#include "tests.h" + +#define _COMPRESS_PARSE_CHECK(str, enab, thresh, varname) \ + do { \ + JournalCompressOptions varname = {true, 111}; \ + config_parse_compress("", "", 0, "", 0, "", 0, str, \ + &varname, NULL); \ + assert_se((enab) == varname.enabled); \ + if (varname.enabled) \ + assert_se((thresh) == varname.threshold_bytes); \ + } while (0) + +#define COMPRESS_PARSE_CHECK(str, enabled, threshold) \ + _COMPRESS_PARSE_CHECK(str, enabled, threshold, conf##__COUNTER__) + +TEST(config_compress) { + COMPRESS_PARSE_CHECK("yes", true, 111); + COMPRESS_PARSE_CHECK("no", false, 111); + COMPRESS_PARSE_CHECK("y", true, 111); + COMPRESS_PARSE_CHECK("n", false, 111); + COMPRESS_PARSE_CHECK("true", true, 111); + COMPRESS_PARSE_CHECK("false", false, 111); + COMPRESS_PARSE_CHECK("t", true, 111); + COMPRESS_PARSE_CHECK("f", false, 111); + COMPRESS_PARSE_CHECK("on", true, 111); + COMPRESS_PARSE_CHECK("off", false, 111); + + /* Weird size/bool overlapping case. We preserve backward compatibility instead of assuming these are byte + * counts. */ + COMPRESS_PARSE_CHECK("1", true, 111); + COMPRESS_PARSE_CHECK("0", false, 111); + + /* IEC sizing */ + COMPRESS_PARSE_CHECK("1B", true, 1); + COMPRESS_PARSE_CHECK("1K", true, 1024); + COMPRESS_PARSE_CHECK("1M", true, 1024 * 1024); + COMPRESS_PARSE_CHECK("1G", true, 1024 * 1024 * 1024); + + /* Invalid Case */ + COMPRESS_PARSE_CHECK("-1", true, 111); + COMPRESS_PARSE_CHECK("blah blah", true, 111); + COMPRESS_PARSE_CHECK("", true, UINT64_MAX); +} + +DEFINE_TEST_MAIN(LOG_INFO); diff --git a/src/journal/test-journal-flush.c b/src/journal/test-journal-flush.c new file mode 100644 index 0000000..c734aa0 --- /dev/null +++ b/src/journal/test-journal-flush.c @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> +#include <unistd.h> + +#include "sd-journal.h" + +#include "alloc-util.h" +#include "chattr-util.h" +#include "journal-internal.h" +#include "macro.h" +#include "managed-journal-file.h" +#include "path-util.h" +#include "string-util.h" + +static void test_journal_flush(int argc, char *argv[]) { + _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL; + _cleanup_free_ char *fn = NULL; + char dn[] = "/var/tmp/test-journal-flush.XXXXXX"; + ManagedJournalFile *new_journal = NULL; + sd_journal *j = NULL; + unsigned n = 0; + int r; + + m = mmap_cache_new(); + assert_se(m != NULL); + assert_se(mkdtemp(dn)); + (void) chattr_path(dn, FS_NOCOW_FL, FS_NOCOW_FL, NULL); + + fn = path_join(dn, "test.journal"); + + r = managed_journal_file_open(-1, fn, O_CREAT|O_RDWR, 0, 0644, 0, NULL, m, NULL, NULL, &new_journal); + assert_se(r >= 0); + + if (argc > 1) + r = sd_journal_open_files(&j, (const char **) strv_skip(argv, 1), 0); + else + r = sd_journal_open(&j, 0); + assert_se(r == 0); + + sd_journal_set_data_threshold(j, 0); + + SD_JOURNAL_FOREACH(j) { + Object *o; + JournalFile *f; + + f = j->current_file; + assert_se(f && f->current_offset > 0); + + r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o); + if (r < 0) + log_error_errno(r, "journal_file_move_to_object failed: %m"); + assert_se(r >= 0); + + r = journal_file_copy_entry(f, new_journal->file, o, f->current_offset); + if (r < 0) + log_warning_errno(r, "journal_file_copy_entry failed: %m"); + assert_se(r >= 0 || + IN_SET(r, -EBADMSG, /* corrupted file */ + -EPROTONOSUPPORT, /* unsupported compression */ + -EIO)); /* file rotated */ + + if (++n >= 10000) + break; + } + + sd_journal_close(j); + + (void) managed_journal_file_close(new_journal); + + unlink(fn); + assert_se(rmdir(dn) == 0); +} + +int main(int argc, char *argv[]) { + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "0", 1) >= 0); + test_journal_flush(argc, argv); + + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "1", 1) >= 0); + test_journal_flush(argc, argv); + + return 0; +} diff --git a/src/journal/test-journal-interleaving.c b/src/journal/test-journal-interleaving.c new file mode 100644 index 0000000..81e37b6 --- /dev/null +++ b/src/journal/test-journal-interleaving.c @@ -0,0 +1,312 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> +#include <unistd.h> + +#include "sd-journal.h" + +#include "alloc-util.h" +#include "chattr-util.h" +#include "io-util.h" +#include "journal-vacuum.h" +#include "log.h" +#include "managed-journal-file.h" +#include "parse-util.h" +#include "rm-rf.h" +#include "tests.h" +#include "util.h" + +/* This program tests skipping around in a multi-file journal. */ + +static bool arg_keep = false; + +_noreturn_ static void log_assert_errno(const char *text, int error, const char *file, unsigned line, const char *func) { + log_internal(LOG_CRIT, error, file, line, func, + "'%s' failed at %s:%u (%s): %m", text, file, line, func); + abort(); +} + +#define assert_ret(expr) \ + do { \ + int _r_ = (expr); \ + if (_unlikely_(_r_ < 0)) \ + log_assert_errno(#expr, -_r_, PROJECT_FILE, __LINE__, __PRETTY_FUNCTION__); \ + } while (false) + +static ManagedJournalFile *test_open(const char *name) { + _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL; + ManagedJournalFile *f; + + m = mmap_cache_new(); + assert_se(m != NULL); + + assert_ret(managed_journal_file_open(-1, name, O_RDWR|O_CREAT, JOURNAL_COMPRESS, 0644, UINT64_MAX, NULL, m, NULL, NULL, &f)); + return f; +} + +static void test_close(ManagedJournalFile *f) { + (void) managed_journal_file_close(f); +} + +static void append_number(ManagedJournalFile *f, int n, uint64_t *seqnum) { + char *p; + dual_timestamp ts; + static dual_timestamp previous_ts = {}; + struct iovec iovec[1]; + + dual_timestamp_get(&ts); + + if (ts.monotonic <= previous_ts.monotonic) + ts.monotonic = previous_ts.monotonic + 1; + + if (ts.realtime <= previous_ts.realtime) + ts.realtime = previous_ts.realtime + 1; + + previous_ts = ts; + + assert_se(asprintf(&p, "NUMBER=%d", n) >= 0); + iovec[0] = IOVEC_MAKE_STRING(p); + assert_ret(journal_file_append_entry(f->file, &ts, NULL, iovec, 1, seqnum, NULL, NULL)); + free(p); +} + +static void test_check_number(sd_journal *j, int n) { + const void *d; + _cleanup_free_ char *k = NULL; + size_t l; + int x; + + assert_ret(sd_journal_get_data(j, "NUMBER", &d, &l)); + assert_se(k = strndup(d, l)); + printf("%s\n", k); + + assert_se(safe_atoi(k + 7, &x) >= 0); + assert_se(n == x); +} + +static void test_check_numbers_down(sd_journal *j, int count) { + int i; + + for (i = 1; i <= count; i++) { + int r; + test_check_number(j, i); + assert_ret(r = sd_journal_next(j)); + if (i == count) + assert_se(r == 0); + else + assert_se(r == 1); + } + +} + +static void test_check_numbers_up(sd_journal *j, int count) { + for (int i = count; i >= 1; i--) { + int r; + test_check_number(j, i); + assert_ret(r = sd_journal_previous(j)); + if (i == 1) + assert_se(r == 0); + else + assert_se(r == 1); + } + +} + +static void setup_sequential(void) { + ManagedJournalFile *one, *two; + one = test_open("one.journal"); + two = test_open("two.journal"); + append_number(one, 1, NULL); + append_number(one, 2, NULL); + append_number(two, 3, NULL); + append_number(two, 4, NULL); + test_close(one); + test_close(two); +} + +static void setup_interleaved(void) { + ManagedJournalFile *one, *two; + one = test_open("one.journal"); + two = test_open("two.journal"); + append_number(one, 1, NULL); + append_number(two, 2, NULL); + append_number(one, 3, NULL); + append_number(two, 4, NULL); + test_close(one); + test_close(two); +} + +static void mkdtemp_chdir_chattr(char *path) { + assert_se(mkdtemp(path)); + assert_se(chdir(path) >= 0); + + /* Speed up things a bit on btrfs, ensuring that CoW is turned off for all files created in our + * directory during the test run */ + (void) chattr_path(path, FS_NOCOW_FL, FS_NOCOW_FL, NULL); +} + +static void test_skip_one(void (*setup)(void)) { + char t[] = "/var/tmp/journal-skip-XXXXXX"; + sd_journal *j; + int r; + + mkdtemp_chdir_chattr(t); + + setup(); + + /* Seek to head, iterate down. + */ + assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_seek_head(j)); + assert_ret(sd_journal_next(j)); + test_check_numbers_down(j, 4); + sd_journal_close(j); + + /* Seek to tail, iterate up. + */ + assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_seek_tail(j)); + assert_ret(sd_journal_previous(j)); + test_check_numbers_up(j, 4); + sd_journal_close(j); + + /* Seek to tail, skip to head, iterate down. + */ + assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_seek_tail(j)); + assert_ret(r = sd_journal_previous_skip(j, 4)); + assert_se(r == 4); + test_check_numbers_down(j, 4); + sd_journal_close(j); + + /* Seek to head, skip to tail, iterate up. + */ + assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_seek_head(j)); + assert_ret(r = sd_journal_next_skip(j, 4)); + assert_se(r == 4); + test_check_numbers_up(j, 4); + sd_journal_close(j); + + log_info("Done..."); + + if (arg_keep) + log_info("Not removing %s", t); + else { + journal_directory_vacuum(".", 3000000, 0, 0, NULL, true); + + assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0); + } + + puts("------------------------------------------------------------"); +} + +TEST(skip) { + test_skip_one(setup_sequential); + test_skip_one(setup_interleaved); +} + +static void test_sequence_numbers_one(void) { + _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL; + char t[] = "/var/tmp/journal-seq-XXXXXX"; + ManagedJournalFile *one, *two; + uint64_t seqnum = 0; + sd_id128_t seqnum_id; + + m = mmap_cache_new(); + assert_se(m != NULL); + + mkdtemp_chdir_chattr(t); + + assert_se(managed_journal_file_open(-1, "one.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS, 0644, + UINT64_MAX, NULL, m, NULL, NULL, &one) == 0); + + append_number(one, 1, &seqnum); + printf("seqnum=%"PRIu64"\n", seqnum); + assert_se(seqnum == 1); + append_number(one, 2, &seqnum); + printf("seqnum=%"PRIu64"\n", seqnum); + assert_se(seqnum == 2); + + assert_se(one->file->header->state == STATE_ONLINE); + assert_se(!sd_id128_equal(one->file->header->file_id, one->file->header->machine_id)); + assert_se(!sd_id128_equal(one->file->header->file_id, one->file->header->boot_id)); + assert_se(sd_id128_equal(one->file->header->file_id, one->file->header->seqnum_id)); + + memcpy(&seqnum_id, &one->file->header->seqnum_id, sizeof(sd_id128_t)); + + assert_se(managed_journal_file_open(-1, "two.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS, 0644, + UINT64_MAX, NULL, m, NULL, one, &two) == 0); + + assert_se(two->file->header->state == STATE_ONLINE); + assert_se(!sd_id128_equal(two->file->header->file_id, one->file->header->file_id)); + assert_se(sd_id128_equal(one->file->header->machine_id, one->file->header->machine_id)); + assert_se(sd_id128_equal(one->file->header->boot_id, one->file->header->boot_id)); + assert_se(sd_id128_equal(one->file->header->seqnum_id, one->file->header->seqnum_id)); + + append_number(two, 3, &seqnum); + printf("seqnum=%"PRIu64"\n", seqnum); + assert_se(seqnum == 3); + append_number(two, 4, &seqnum); + printf("seqnum=%"PRIu64"\n", seqnum); + assert_se(seqnum == 4); + + test_close(two); + + append_number(one, 5, &seqnum); + printf("seqnum=%"PRIu64"\n", seqnum); + assert_se(seqnum == 5); + + append_number(one, 6, &seqnum); + printf("seqnum=%"PRIu64"\n", seqnum); + assert_se(seqnum == 6); + + test_close(one); + + /* restart server */ + seqnum = 0; + + assert_se(managed_journal_file_open(-1, "two.journal", O_RDWR, JOURNAL_COMPRESS, 0, + UINT64_MAX, NULL, m, NULL, NULL, &two) == 0); + + assert_se(sd_id128_equal(two->file->header->seqnum_id, seqnum_id)); + + append_number(two, 7, &seqnum); + printf("seqnum=%"PRIu64"\n", seqnum); + assert_se(seqnum == 5); + + /* So..., here we have the same seqnum in two files with the + * same seqnum_id. */ + + test_close(two); + + log_info("Done..."); + + if (arg_keep) + log_info("Not removing %s", t); + else { + journal_directory_vacuum(".", 3000000, 0, 0, NULL, true); + + assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0); + } +} + +TEST(sequence_numbers) { + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "0", 1) >= 0); + test_sequence_numbers_one(); + + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "1", 1) >= 0); + test_sequence_numbers_one(); +} + +static int intro(void) { + /* managed_journal_file_open requires a valid machine id */ + if (access("/etc/machine-id", F_OK) != 0) + return log_tests_skipped("/etc/machine-id not found"); + + arg_keep = saved_argc > 1; + + return EXIT_SUCCESS; +} + +DEFINE_TEST_MAIN_WITH_INTRO(LOG_DEBUG, intro); diff --git a/src/journal/test-journal-stream.c b/src/journal/test-journal-stream.c new file mode 100644 index 0000000..ac5b7f0 --- /dev/null +++ b/src/journal/test-journal-stream.c @@ -0,0 +1,202 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> +#include <unistd.h> + +#include "sd-journal.h" + +#include "alloc-util.h" +#include "chattr-util.h" +#include "io-util.h" +#include "journal-internal.h" +#include "log.h" +#include "macro.h" +#include "managed-journal-file.h" +#include "parse-util.h" +#include "rm-rf.h" +#include "tests.h" +#include "util.h" + +#define N_ENTRIES 200 + +static void verify_contents(sd_journal *j, unsigned skip) { + unsigned i; + + assert_se(j); + + i = 0; + SD_JOURNAL_FOREACH(j) { + const void *d; + char *k, *c; + size_t l; + unsigned u = 0; + + assert_se(sd_journal_get_cursor(j, &k) >= 0); + printf("cursor: %s\n", k); + free(k); + + assert_se(sd_journal_get_data(j, "MAGIC", &d, &l) >= 0); + printf("\t%.*s\n", (int) l, (const char*) d); + + assert_se(sd_journal_get_data(j, "NUMBER", &d, &l) >= 0); + assert_se(k = strndup(d, l)); + printf("\t%s\n", k); + + if (skip > 0) { + assert_se(safe_atou(k + 7, &u) >= 0); + assert_se(i == u); + i += skip; + } + + free(k); + + assert_se(sd_journal_get_cursor(j, &c) >= 0); + assert_se(sd_journal_test_cursor(j, c) > 0); + free(c); + } + + if (skip > 0) + assert_se(i == N_ENTRIES); +} + +static void run_test(void) { + _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL; + ManagedJournalFile *one, *two, *three; + char t[] = "/var/tmp/journal-stream-XXXXXX"; + unsigned i; + _cleanup_(sd_journal_closep) sd_journal *j = NULL; + char *z; + const void *data; + size_t l; + dual_timestamp previous_ts = DUAL_TIMESTAMP_NULL; + + m = mmap_cache_new(); + assert_se(m != NULL); + + assert_se(mkdtemp(t)); + assert_se(chdir(t) >= 0); + (void) chattr_path(t, FS_NOCOW_FL, FS_NOCOW_FL, NULL); + + assert_se(managed_journal_file_open(-1, "one.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS, 0666, UINT64_MAX, NULL, m, NULL, NULL, &one) == 0); + assert_se(managed_journal_file_open(-1, "two.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS, 0666, UINT64_MAX, NULL, m, NULL, NULL, &two) == 0); + assert_se(managed_journal_file_open(-1, "three.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS, 0666, UINT64_MAX, NULL, m, NULL, NULL, &three) == 0); + + for (i = 0; i < N_ENTRIES; i++) { + char *p, *q; + dual_timestamp ts; + struct iovec iovec[2]; + + dual_timestamp_get(&ts); + + if (ts.monotonic <= previous_ts.monotonic) + ts.monotonic = previous_ts.monotonic + 1; + + if (ts.realtime <= previous_ts.realtime) + ts.realtime = previous_ts.realtime + 1; + + previous_ts = ts; + + assert_se(asprintf(&p, "NUMBER=%u", i) >= 0); + iovec[0] = IOVEC_MAKE(p, strlen(p)); + + assert_se(asprintf(&q, "MAGIC=%s", i % 5 == 0 ? "quux" : "waldo") >= 0); + + iovec[1] = IOVEC_MAKE(q, strlen(q)); + + if (i % 10 == 0) + assert_se(journal_file_append_entry(three->file, &ts, NULL, iovec, 2, NULL, NULL, NULL) == 0); + else { + if (i % 3 == 0) + assert_se(journal_file_append_entry(two->file, &ts, NULL, iovec, 2, NULL, NULL, NULL) == 0); + + assert_se(journal_file_append_entry(one->file, &ts, NULL, iovec, 2, NULL, NULL, NULL) == 0); + } + + free(p); + free(q); + } + + (void) managed_journal_file_close(one); + (void) managed_journal_file_close(two); + (void) managed_journal_file_close(three); + + assert_se(sd_journal_open_directory(&j, t, 0) >= 0); + + assert_se(sd_journal_add_match(j, "MAGIC=quux", 0) >= 0); + SD_JOURNAL_FOREACH_BACKWARDS(j) { + _cleanup_free_ char *c; + + assert_se(sd_journal_get_data(j, "NUMBER", &data, &l) >= 0); + printf("\t%.*s\n", (int) l, (const char*) data); + + assert_se(sd_journal_get_cursor(j, &c) >= 0); + assert_se(sd_journal_test_cursor(j, c) > 0); + } + + SD_JOURNAL_FOREACH(j) { + _cleanup_free_ char *c; + + assert_se(sd_journal_get_data(j, "NUMBER", &data, &l) >= 0); + printf("\t%.*s\n", (int) l, (const char*) data); + + assert_se(sd_journal_get_cursor(j, &c) >= 0); + assert_se(sd_journal_test_cursor(j, c) > 0); + } + + sd_journal_flush_matches(j); + + verify_contents(j, 1); + + printf("NEXT TEST\n"); + assert_se(sd_journal_add_match(j, "MAGIC=quux", 0) >= 0); + + assert_se(z = journal_make_match_string(j)); + printf("resulting match expression is: %s\n", z); + free(z); + + verify_contents(j, 5); + + printf("NEXT TEST\n"); + sd_journal_flush_matches(j); + assert_se(sd_journal_add_match(j, "MAGIC=waldo", 0) >= 0); + assert_se(sd_journal_add_match(j, "NUMBER=10", 0) >= 0); + assert_se(sd_journal_add_match(j, "NUMBER=11", 0) >= 0); + assert_se(sd_journal_add_match(j, "NUMBER=12", 0) >= 0); + + assert_se(z = journal_make_match_string(j)); + printf("resulting match expression is: %s\n", z); + free(z); + + verify_contents(j, 0); + + assert_se(sd_journal_query_unique(j, "NUMBER") >= 0); + SD_JOURNAL_FOREACH_UNIQUE(j, data, l) + printf("%.*s\n", (int) l, (const char*) data); + + assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0); +} + +int main(int argc, char *argv[]) { + + /* managed_journal_file_open requires a valid machine id */ + if (access("/etc/machine-id", F_OK) != 0) + return log_tests_skipped("/etc/machine-id not found"); + + test_setup_logging(LOG_DEBUG); + + /* Run this test multiple times with different configurations of features. */ + + assert_se(setenv("SYSTEMD_JOURNAL_KEYED_HASH", "0", 1) >= 0); + run_test(); + + assert_se(setenv("SYSTEMD_JOURNAL_KEYED_HASH", "1", 1) >= 0); + run_test(); + + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "0", 1) >= 0); + run_test(); + + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "1", 1) >= 0); + run_test(); + + return 0; +} diff --git a/src/journal/test-journal-syslog.c b/src/journal/test-journal-syslog.c new file mode 100644 index 0000000..84cfcef --- /dev/null +++ b/src/journal/test-journal-syslog.c @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "alloc-util.h" +#include "journald-syslog.h" +#include "macro.h" +#include "string-util.h" +#include "syslog-util.h" +#include "tests.h" + +static void test_syslog_parse_identifier_one(const char *str, + const char *ident, const char *pid, const char *rest, int ret) { + const char *buf = str; + _cleanup_free_ char *ident2 = NULL, *pid2 = NULL; + int ret2; + + ret2 = syslog_parse_identifier(&buf, &ident2, &pid2); + + assert_se(ret == ret2); + assert_se(ident == ident2 || streq_ptr(ident, ident2)); + assert_se(pid == pid2 || streq_ptr(pid, pid2)); + assert_se(streq(buf, rest)); +} + +static void test_syslog_parse_priority_one(const char *str, bool with_facility, int priority, int ret) { + int priority2 = 0, ret2; + + ret2 = syslog_parse_priority(&str, &priority2, with_facility); + + assert_se(ret == ret2); + if (ret2 == 1) + assert_se(priority == priority2); +} + +TEST(syslog_parse_identifier) { + test_syslog_parse_identifier_one("pidu[111]: xxx", "pidu", "111", "xxx", 11); + test_syslog_parse_identifier_one("pidu: xxx", "pidu", NULL, "xxx", 6); + test_syslog_parse_identifier_one("pidu: xxx", "pidu", NULL, " xxx", 6); + test_syslog_parse_identifier_one("pidu xxx", NULL, NULL, "pidu xxx", 0); + test_syslog_parse_identifier_one(" pidu xxx", NULL, NULL, " pidu xxx", 0); + test_syslog_parse_identifier_one("", NULL, NULL, "", 0); + test_syslog_parse_identifier_one(" ", NULL, NULL, " ", 0); + test_syslog_parse_identifier_one(":", "", NULL, "", 1); + test_syslog_parse_identifier_one(": ", "", NULL, " ", 2); + test_syslog_parse_identifier_one(" :", "", NULL, "", 2); + test_syslog_parse_identifier_one(" pidu:", "pidu", NULL, "", 8); + test_syslog_parse_identifier_one("pidu:", "pidu", NULL, "", 5); + test_syslog_parse_identifier_one("pidu: ", "pidu", NULL, "", 6); + test_syslog_parse_identifier_one("pidu : ", NULL, NULL, "pidu : ", 0); +} + +TEST(syslog_parse_priority) { + test_syslog_parse_priority_one("", false, 0, 0); + test_syslog_parse_priority_one("<>", false, 0, 0); + test_syslog_parse_priority_one("<>aaa", false, 0, 0); + test_syslog_parse_priority_one("<aaaa>", false, 0, 0); + test_syslog_parse_priority_one("<aaaa>aaa", false, 0, 0); + test_syslog_parse_priority_one(" <aaaa>", false, 0, 0); + test_syslog_parse_priority_one(" <aaaa>aaa", false, 0, 0); + test_syslog_parse_priority_one(" <aaaa>aaa", false, 0, 0); + test_syslog_parse_priority_one(" <1>", false, 0, 0); + test_syslog_parse_priority_one("<1>", false, 1, 1); + test_syslog_parse_priority_one("<7>", false, 7, 1); + test_syslog_parse_priority_one("<8>", false, 0, 0); + test_syslog_parse_priority_one("<9>", true, 9, 1); + test_syslog_parse_priority_one("<22>", true, 22, 1); + test_syslog_parse_priority_one("<111>", false, 0, 0); + test_syslog_parse_priority_one("<111>", true, 111, 1); +} + +DEFINE_TEST_MAIN(LOG_INFO); diff --git a/src/journal/test-journal-verify.c b/src/journal/test-journal-verify.c new file mode 100644 index 0000000..e36ea8c --- /dev/null +++ b/src/journal/test-journal-verify.c @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> +#include <stdio.h> +#include <unistd.h> + +#include "chattr-util.h" +#include "fd-util.h" +#include "io-util.h" +#include "journal-verify.h" +#include "log.h" +#include "managed-journal-file.h" +#include "mmap-cache.h" +#include "rm-rf.h" +#include "terminal-util.h" +#include "tests.h" +#include "util.h" + +#define N_ENTRIES 6000 +#define RANDOM_RANGE 77 + +static void bit_toggle(const char *fn, uint64_t p) { + uint8_t b; + ssize_t r; + int fd; + + fd = open(fn, O_RDWR|O_CLOEXEC); + assert_se(fd >= 0); + + r = pread(fd, &b, 1, p/8); + assert_se(r == 1); + + b ^= 1 << (p % 8); + + r = pwrite(fd, &b, 1, p/8); + assert_se(r == 1); + + safe_close(fd); +} + +static int raw_verify(const char *fn, const char *verification_key) { + _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL; + JournalFile *f; + int r; + + m = mmap_cache_new(); + assert_se(m != NULL); + + r = journal_file_open(-1, fn, O_RDONLY, JOURNAL_COMPRESS|(verification_key ? JOURNAL_SEAL : 0), 0666, UINT64_MAX, NULL, m, NULL, &f); + if (r < 0) + return r; + + r = journal_file_verify(f, verification_key, NULL, NULL, NULL, false); + (void) journal_file_close(f); + + return r; +} + +static int run_test(int argc, char *argv[]) { + _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL; + char t[] = "/var/tmp/journal-XXXXXX"; + unsigned n; + JournalFile *f; + ManagedJournalFile *df; + const char *verification_key = argv[1]; + usec_t from = 0, to = 0, total = 0; + struct stat st; + uint64_t p; + + m = mmap_cache_new(); + assert_se(m != NULL); + + /* managed_journal_file_open requires a valid machine id */ + if (access("/etc/machine-id", F_OK) != 0) + return log_tests_skipped("/etc/machine-id not found"); + + test_setup_logging(LOG_DEBUG); + + assert_se(mkdtemp(t)); + assert_se(chdir(t) >= 0); + (void) chattr_path(t, FS_NOCOW_FL, FS_NOCOW_FL, NULL); + + log_info("Generating..."); + + assert_se(managed_journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS|(verification_key ? JOURNAL_SEAL : 0), 0666, UINT64_MAX, NULL, m, NULL, NULL, &df) == 0); + + for (n = 0; n < N_ENTRIES; n++) { + struct iovec iovec; + struct dual_timestamp ts; + char *test; + + dual_timestamp_get(&ts); + + assert_se(asprintf(&test, "RANDOM=%li", random() % RANDOM_RANGE)); + + iovec = IOVEC_MAKE_STRING(test); + + assert_se(journal_file_append_entry(df->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0); + + free(test); + } + + (void) managed_journal_file_close(df); + + log_info("Verifying..."); + + assert_se(journal_file_open(-1, "test.journal", O_RDONLY, JOURNAL_COMPRESS|(verification_key ? JOURNAL_SEAL: 0), 0666, UINT64_MAX, NULL, m, NULL, &f) == 0); + /* journal_file_print_header(f); */ + journal_file_dump(f); + + assert_se(journal_file_verify(f, verification_key, &from, &to, &total, true) >= 0); + + if (verification_key && JOURNAL_HEADER_SEALED(f->header)) + log_info("=> Validated from %s to %s, %s missing", + FORMAT_TIMESTAMP(from), + FORMAT_TIMESTAMP(to), + FORMAT_TIMESPAN(total > to ? total - to : 0, 0)); + + (void) journal_file_close(f); + + if (verification_key) { + log_info("Toggling bits..."); + + assert_se(stat("test.journal", &st) >= 0); + + for (p = 38448*8+0; p < ((uint64_t) st.st_size * 8); p ++) { + bit_toggle("test.journal", p); + + log_info("[ %"PRIu64"+%"PRIu64"]", p / 8, p % 8); + + if (raw_verify("test.journal", verification_key) >= 0) + log_notice(ANSI_HIGHLIGHT_RED ">>>> %"PRIu64" (bit %"PRIu64") can be toggled without detection." ANSI_NORMAL, p / 8, p % 8); + + bit_toggle("test.journal", p); + } + } + + log_info("Exiting..."); + + assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0); + + return 0; +} + +int main(int argc, char *argv[]) { + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "0", 1) >= 0); + run_test(argc, argv); + + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "1", 1) >= 0); + run_test(argc, argv); + + return 0; +} diff --git a/src/journal/test-journal.c b/src/journal/test-journal.c new file mode 100644 index 0000000..889673c --- /dev/null +++ b/src/journal/test-journal.c @@ -0,0 +1,280 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fcntl.h> +#include <unistd.h> + +#include "chattr-util.h" +#include "io-util.h" +#include "journal-authenticate.h" +#include "journal-vacuum.h" +#include "log.h" +#include "managed-journal-file.h" +#include "rm-rf.h" +#include "tests.h" + +static bool arg_keep = false; + +static void mkdtemp_chdir_chattr(char *path) { + assert_se(mkdtemp(path)); + assert_se(chdir(path) >= 0); + + /* Speed up things a bit on btrfs, ensuring that CoW is turned off for all files created in our + * directory during the test run */ + (void) chattr_path(path, FS_NOCOW_FL, FS_NOCOW_FL, NULL); +} + +static void test_non_empty_one(void) { + _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL; + dual_timestamp ts; + ManagedJournalFile *f; + struct iovec iovec; + static const char test[] = "TEST1=1", test2[] = "TEST2=2"; + Object *o, *d; + uint64_t p; + sd_id128_t fake_boot_id; + char t[] = "/var/tmp/journal-XXXXXX"; + + m = mmap_cache_new(); + assert_se(m != NULL); + + mkdtemp_chdir_chattr(t); + + assert_se(managed_journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS|JOURNAL_SEAL, 0666, UINT64_MAX, NULL, m, NULL, NULL, &f) == 0); + + assert_se(dual_timestamp_get(&ts)); + assert_se(sd_id128_randomize(&fake_boot_id) == 0); + + iovec = IOVEC_MAKE_STRING(test); + assert_se(journal_file_append_entry(f->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0); + + iovec = IOVEC_MAKE_STRING(test2); + assert_se(journal_file_append_entry(f->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0); + + iovec = IOVEC_MAKE_STRING(test); + assert_se(journal_file_append_entry(f->file, &ts, &fake_boot_id, &iovec, 1, NULL, NULL, NULL) == 0); + +#if HAVE_GCRYPT + journal_file_append_tag(f->file); +#endif + journal_file_dump(f->file); + + assert_se(journal_file_next_entry(f->file, 0, DIRECTION_DOWN, &o, &p) == 1); + assert_se(le64toh(o->entry.seqnum) == 1); + + assert_se(journal_file_next_entry(f->file, p, DIRECTION_DOWN, &o, &p) == 1); + assert_se(le64toh(o->entry.seqnum) == 2); + + assert_se(journal_file_next_entry(f->file, p, DIRECTION_DOWN, &o, &p) == 1); + assert_se(le64toh(o->entry.seqnum) == 3); + assert_se(sd_id128_equal(o->entry.boot_id, fake_boot_id)); + + assert_se(journal_file_next_entry(f->file, p, DIRECTION_DOWN, &o, &p) == 0); + + assert_se(journal_file_next_entry(f->file, 0, DIRECTION_DOWN, &o, &p) == 1); + assert_se(le64toh(o->entry.seqnum) == 1); + + assert_se(journal_file_find_data_object(f->file, test, strlen(test), &d, NULL) == 1); + assert_se(journal_file_next_entry_for_data(f->file, d, DIRECTION_DOWN, &o, NULL) == 1); + assert_se(le64toh(o->entry.seqnum) == 1); + + assert_se(journal_file_next_entry_for_data(f->file, d, DIRECTION_UP, &o, NULL) == 1); + assert_se(le64toh(o->entry.seqnum) == 3); + + assert_se(journal_file_find_data_object(f->file, test2, strlen(test2), &d, NULL) == 1); + assert_se(journal_file_next_entry_for_data(f->file, d, DIRECTION_UP, &o, NULL) == 1); + assert_se(le64toh(o->entry.seqnum) == 2); + + assert_se(journal_file_next_entry_for_data(f->file, d, DIRECTION_DOWN, &o, NULL) == 1); + assert_se(le64toh(o->entry.seqnum) == 2); + + assert_se(journal_file_find_data_object(f->file, "quux", 4, &d, NULL) == 0); + + assert_se(journal_file_move_to_entry_by_seqnum(f->file, 1, DIRECTION_DOWN, &o, NULL) == 1); + assert_se(le64toh(o->entry.seqnum) == 1); + + assert_se(journal_file_move_to_entry_by_seqnum(f->file, 3, DIRECTION_DOWN, &o, NULL) == 1); + assert_se(le64toh(o->entry.seqnum) == 3); + + assert_se(journal_file_move_to_entry_by_seqnum(f->file, 2, DIRECTION_DOWN, &o, NULL) == 1); + assert_se(le64toh(o->entry.seqnum) == 2); + + assert_se(journal_file_move_to_entry_by_seqnum(f->file, 10, DIRECTION_DOWN, &o, NULL) == 0); + + managed_journal_file_rotate(&f, m, JOURNAL_SEAL|JOURNAL_COMPRESS, UINT64_MAX, NULL); + managed_journal_file_rotate(&f, m, JOURNAL_SEAL|JOURNAL_COMPRESS, UINT64_MAX, NULL); + + (void) managed_journal_file_close(f); + + log_info("Done..."); + + if (arg_keep) + log_info("Not removing %s", t); + else { + journal_directory_vacuum(".", 3000000, 0, 0, NULL, true); + + assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0); + } + + puts("------------------------------------------------------------"); +} + +TEST(non_empty) { + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "0", 1) >= 0); + test_non_empty_one(); + + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "1", 1) >= 0); + test_non_empty_one(); +} + +static void test_empty_one(void) { + _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL; + ManagedJournalFile *f1, *f2, *f3, *f4; + char t[] = "/var/tmp/journal-XXXXXX"; + + m = mmap_cache_new(); + assert_se(m != NULL); + + mkdtemp_chdir_chattr(t); + + assert_se(managed_journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, 0, 0666, UINT64_MAX, NULL, m, NULL, NULL, &f1) == 0); + assert_se(managed_journal_file_open(-1, "test-compress.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS, 0666, UINT64_MAX, NULL, m, NULL, NULL, &f2) == 0); + assert_se(managed_journal_file_open(-1, "test-seal.journal", O_RDWR|O_CREAT, JOURNAL_SEAL, 0666, UINT64_MAX, NULL, m, NULL, NULL, &f3) == 0); + assert_se(managed_journal_file_open(-1, "test-seal-compress.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS|JOURNAL_SEAL, 0666, UINT64_MAX, NULL, m, NULL, NULL, &f4) == 0); + + journal_file_print_header(f1->file); + puts(""); + journal_file_print_header(f2->file); + puts(""); + journal_file_print_header(f3->file); + puts(""); + journal_file_print_header(f4->file); + puts(""); + + log_info("Done..."); + + if (arg_keep) + log_info("Not removing %s", t); + else { + journal_directory_vacuum(".", 3000000, 0, 0, NULL, true); + + assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0); + } + + (void) managed_journal_file_close(f1); + (void) managed_journal_file_close(f2); + (void) managed_journal_file_close(f3); + (void) managed_journal_file_close(f4); +} + +TEST(empty) { + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "0", 1) >= 0); + test_empty_one(); + + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "1", 1) >= 0); + test_empty_one(); +} + +#if HAVE_COMPRESSION +static bool check_compressed(uint64_t compress_threshold, uint64_t data_size) { + _cleanup_(mmap_cache_unrefp) MMapCache *m = NULL; + dual_timestamp ts; + ManagedJournalFile *f; + struct iovec iovec; + Object *o; + uint64_t p; + char t[] = "/var/tmp/journal-XXXXXX"; + char data[2048] = "FIELD="; + bool is_compressed; + int r; + + assert_se(data_size <= sizeof(data)); + + m = mmap_cache_new(); + assert_se(m != NULL); + + mkdtemp_chdir_chattr(t); + + assert_se(managed_journal_file_open(-1, "test.journal", O_RDWR|O_CREAT, JOURNAL_COMPRESS|JOURNAL_SEAL, 0666, compress_threshold, NULL, m, NULL, NULL, &f) == 0); + + dual_timestamp_get(&ts); + + iovec = IOVEC_MAKE(data, data_size); + assert_se(journal_file_append_entry(f->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0); + +#if HAVE_GCRYPT + journal_file_append_tag(f->file); +#endif + journal_file_dump(f->file); + + /* We have to partially reimplement some of the dump logic, because the normal next_entry does the + * decompression for us. */ + p = le64toh(f->file->header->header_size); + for (;;) { + r = journal_file_move_to_object(f->file, OBJECT_UNUSED, p, &o); + assert_se(r == 0); + if (o->object.type == OBJECT_DATA) + break; + + assert_se(p < le64toh(f->file->header->tail_object_offset)); + p = p + ALIGN64(le64toh(o->object.size)); + } + + is_compressed = COMPRESSION_FROM_OBJECT(o) != COMPRESSION_NONE; + + (void) managed_journal_file_close(f); + + log_info("Done..."); + + if (arg_keep) + log_info("Not removing %s", t); + else { + journal_directory_vacuum(".", 3000000, 0, 0, NULL, true); + + assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0); + } + + puts("------------------------------------------------------------"); + + return is_compressed; +} + +static void test_min_compress_size_one(void) { + /* Note that XZ will actually fail to compress anything under 80 bytes, so you have to choose the limits + * carefully */ + + /* DEFAULT_MIN_COMPRESS_SIZE is 512 */ + assert_se(!check_compressed(UINT64_MAX, 255)); + assert_se(check_compressed(UINT64_MAX, 513)); + + /* compress everything */ + assert_se(check_compressed(0, 96)); + assert_se(check_compressed(8, 96)); + + /* Ensure we don't try to compress less than 8 bytes */ + assert_se(!check_compressed(0, 7)); + + /* check boundary conditions */ + assert_se(check_compressed(256, 256)); + assert_se(!check_compressed(256, 255)); +} + +TEST(min_compress_size) { + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "0", 1) >= 0); + test_min_compress_size_one(); + + assert_se(setenv("SYSTEMD_JOURNAL_COMPACT", "1", 1) >= 0); + test_min_compress_size_one(); +} +#endif + +static int intro(void) { + arg_keep = saved_argc > 1; + + /* managed_journal_file_open requires a valid machine id */ + if (access("/etc/machine-id", F_OK) != 0) + return log_tests_skipped("/etc/machine-id not found"); + + return EXIT_SUCCESS; +} + +DEFINE_TEST_MAIN_WITH_INTRO(LOG_DEBUG, intro); |