diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 02:25:50 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 02:25:50 +0000 |
commit | 19f4f86bfed21c5326ed2acebe1163f3a83e832b (patch) | |
tree | d59b9989ce55ed23693e80974d94c856f1c2c8b1 /src/shared | |
parent | Initial commit. (diff) | |
download | systemd-upstream.tar.xz systemd-upstream.zip |
Adding upstream version 241.upstream/241upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
180 files changed, 44871 insertions, 0 deletions
diff --git a/src/shared/acl-util.c b/src/shared/acl-util.c new file mode 100644 index 0000000..9633514 --- /dev/null +++ b/src/shared/acl-util.c @@ -0,0 +1,406 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stdbool.h> + +#include "acl-util.h" +#include "alloc-util.h" +#include "string-util.h" +#include "strv.h" +#include "user-util.h" +#include "util.h" + +int acl_find_uid(acl_t acl, uid_t uid, acl_entry_t *entry) { + acl_entry_t i; + int r; + + assert(acl); + assert(entry); + + for (r = acl_get_entry(acl, ACL_FIRST_ENTRY, &i); + r > 0; + r = acl_get_entry(acl, ACL_NEXT_ENTRY, &i)) { + + acl_tag_t tag; + uid_t *u; + bool b; + + if (acl_get_tag_type(i, &tag) < 0) + return -errno; + + if (tag != ACL_USER) + continue; + + u = acl_get_qualifier(i); + if (!u) + return -errno; + + b = *u == uid; + acl_free(u); + + if (b) { + *entry = i; + return 1; + } + } + if (r < 0) + return -errno; + + return 0; +} + +int calc_acl_mask_if_needed(acl_t *acl_p) { + acl_entry_t i; + int r; + bool need = false; + + assert(acl_p); + + for (r = acl_get_entry(*acl_p, ACL_FIRST_ENTRY, &i); + r > 0; + r = acl_get_entry(*acl_p, ACL_NEXT_ENTRY, &i)) { + acl_tag_t tag; + + if (acl_get_tag_type(i, &tag) < 0) + return -errno; + + if (tag == ACL_MASK) + return 0; + + if (IN_SET(tag, ACL_USER, ACL_GROUP)) + need = true; + } + if (r < 0) + return -errno; + + if (need && acl_calc_mask(acl_p) < 0) + return -errno; + + return need; +} + +int add_base_acls_if_needed(acl_t *acl_p, const char *path) { + acl_entry_t i; + int r; + bool have_user_obj = false, have_group_obj = false, have_other = false; + struct stat st; + _cleanup_(acl_freep) acl_t basic = NULL; + + assert(acl_p); + + for (r = acl_get_entry(*acl_p, ACL_FIRST_ENTRY, &i); + r > 0; + r = acl_get_entry(*acl_p, ACL_NEXT_ENTRY, &i)) { + acl_tag_t tag; + + if (acl_get_tag_type(i, &tag) < 0) + return -errno; + + if (tag == ACL_USER_OBJ) + have_user_obj = true; + else if (tag == ACL_GROUP_OBJ) + have_group_obj = true; + else if (tag == ACL_OTHER) + have_other = true; + if (have_user_obj && have_group_obj && have_other) + return 0; + } + if (r < 0) + return -errno; + + r = stat(path, &st); + if (r < 0) + return -errno; + + basic = acl_from_mode(st.st_mode); + if (!basic) + return -errno; + + for (r = acl_get_entry(basic, ACL_FIRST_ENTRY, &i); + r > 0; + r = acl_get_entry(basic, ACL_NEXT_ENTRY, &i)) { + acl_tag_t tag; + acl_entry_t dst; + + if (acl_get_tag_type(i, &tag) < 0) + return -errno; + + if ((tag == ACL_USER_OBJ && have_user_obj) || + (tag == ACL_GROUP_OBJ && have_group_obj) || + (tag == ACL_OTHER && have_other)) + continue; + + r = acl_create_entry(acl_p, &dst); + if (r < 0) + return -errno; + + r = acl_copy_entry(dst, i); + if (r < 0) + return -errno; + } + if (r < 0) + return -errno; + return 0; +} + +int acl_search_groups(const char *path, char ***ret_groups) { + _cleanup_strv_free_ char **g = NULL; + _cleanup_(acl_freep) acl_t acl = NULL; + bool ret = false; + acl_entry_t entry; + int r; + + assert(path); + + acl = acl_get_file(path, ACL_TYPE_DEFAULT); + if (!acl) + return -errno; + + r = acl_get_entry(acl, ACL_FIRST_ENTRY, &entry); + for (;;) { + _cleanup_(acl_free_gid_tpp) gid_t *gid = NULL; + acl_tag_t tag; + + if (r < 0) + return -errno; + if (r == 0) + break; + + if (acl_get_tag_type(entry, &tag) < 0) + return -errno; + + if (tag != ACL_GROUP) + goto next; + + gid = acl_get_qualifier(entry); + if (!gid) + return -errno; + + if (in_gid(*gid) > 0) { + if (!ret_groups) + return true; + + ret = true; + } + + if (ret_groups) { + char *name; + + name = gid_to_name(*gid); + if (!name) + return -ENOMEM; + + r = strv_consume(&g, name); + if (r < 0) + return r; + } + + next: + r = acl_get_entry(acl, ACL_NEXT_ENTRY, &entry); + } + + if (ret_groups) + *ret_groups = TAKE_PTR(g); + + return ret; +} + +int parse_acl(const char *text, acl_t *acl_access, acl_t *acl_default, bool want_mask) { + _cleanup_free_ char **a = NULL, **d = NULL; /* strings are not freed */ + _cleanup_strv_free_ char **split; + char **entry; + int r = -EINVAL; + _cleanup_(acl_freep) acl_t a_acl = NULL, d_acl = NULL; + + split = strv_split(text, ","); + if (!split) + return -ENOMEM; + + STRV_FOREACH(entry, split) { + char *p; + + p = STARTSWITH_SET(*entry, "default:", "d:"); + if (p) + r = strv_push(&d, p); + else + r = strv_push(&a, *entry); + if (r < 0) + return r; + } + + if (!strv_isempty(a)) { + _cleanup_free_ char *join; + + join = strv_join(a, ","); + if (!join) + return -ENOMEM; + + a_acl = acl_from_text(join); + if (!a_acl) + return -errno; + + if (want_mask) { + r = calc_acl_mask_if_needed(&a_acl); + if (r < 0) + return r; + } + } + + if (!strv_isempty(d)) { + _cleanup_free_ char *join; + + join = strv_join(d, ","); + if (!join) + return -ENOMEM; + + d_acl = acl_from_text(join); + if (!d_acl) + return -errno; + + if (want_mask) { + r = calc_acl_mask_if_needed(&d_acl); + if (r < 0) + return r; + } + } + + *acl_access = TAKE_PTR(a_acl); + *acl_default = TAKE_PTR(d_acl); + + return 0; +} + +static int acl_entry_equal(acl_entry_t a, acl_entry_t b) { + acl_tag_t tag_a, tag_b; + + if (acl_get_tag_type(a, &tag_a) < 0) + return -errno; + + if (acl_get_tag_type(b, &tag_b) < 0) + return -errno; + + if (tag_a != tag_b) + return false; + + switch (tag_a) { + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + /* can have only one of those */ + return true; + case ACL_USER: { + _cleanup_(acl_free_uid_tpp) uid_t *uid_a = NULL, *uid_b = NULL; + + uid_a = acl_get_qualifier(a); + if (!uid_a) + return -errno; + + uid_b = acl_get_qualifier(b); + if (!uid_b) + return -errno; + + return *uid_a == *uid_b; + } + case ACL_GROUP: { + _cleanup_(acl_free_gid_tpp) gid_t *gid_a = NULL, *gid_b = NULL; + + gid_a = acl_get_qualifier(a); + if (!gid_a) + return -errno; + + gid_b = acl_get_qualifier(b); + if (!gid_b) + return -errno; + + return *gid_a == *gid_b; + } + default: + assert_not_reached("Unknown acl tag type"); + } +} + +static int find_acl_entry(acl_t acl, acl_entry_t entry, acl_entry_t *out) { + acl_entry_t i; + int r; + + for (r = acl_get_entry(acl, ACL_FIRST_ENTRY, &i); + r > 0; + r = acl_get_entry(acl, ACL_NEXT_ENTRY, &i)) { + + r = acl_entry_equal(i, entry); + if (r < 0) + return r; + if (r > 0) { + *out = i; + return 1; + } + } + if (r < 0) + return -errno; + return 0; +} + +int acls_for_file(const char *path, acl_type_t type, acl_t new, acl_t *acl) { + _cleanup_(acl_freep) acl_t old; + acl_entry_t i; + int r; + + old = acl_get_file(path, type); + if (!old) + return -errno; + + for (r = acl_get_entry(new, ACL_FIRST_ENTRY, &i); + r > 0; + r = acl_get_entry(new, ACL_NEXT_ENTRY, &i)) { + + acl_entry_t j; + + r = find_acl_entry(old, i, &j); + if (r < 0) + return r; + if (r == 0) + if (acl_create_entry(&old, &j) < 0) + return -errno; + + if (acl_copy_entry(j, i) < 0) + return -errno; + } + if (r < 0) + return -errno; + + *acl = TAKE_PTR(old); + + return 0; +} + +int add_acls_for_user(int fd, uid_t uid) { + _cleanup_(acl_freep) acl_t acl = NULL; + acl_entry_t entry; + acl_permset_t permset; + int r; + + acl = acl_get_fd(fd); + if (!acl) + return -errno; + + r = acl_find_uid(acl, uid, &entry); + if (r <= 0) { + if (acl_create_entry(&acl, &entry) < 0 || + acl_set_tag_type(entry, ACL_USER) < 0 || + acl_set_qualifier(entry, &uid) < 0) + return -errno; + } + + /* We do not recalculate the mask unconditionally here, + * so that the fchmod() mask above stays intact. */ + if (acl_get_permset(entry, &permset) < 0 || + acl_add_perm(permset, ACL_READ) < 0) + return -errno; + + r = calc_acl_mask_if_needed(&acl); + if (r < 0) + return r; + + return acl_set_fd(fd, acl); +} diff --git a/src/shared/acl-util.h b/src/shared/acl-util.h new file mode 100644 index 0000000..10b2a3d --- /dev/null +++ b/src/shared/acl-util.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#if HAVE_ACL + +#include <acl/libacl.h> +#include <stdbool.h> +#include <sys/acl.h> + +#include "macro.h" + +int acl_find_uid(acl_t acl, uid_t uid, acl_entry_t *entry); +int calc_acl_mask_if_needed(acl_t *acl_p); +int add_base_acls_if_needed(acl_t *acl_p, const char *path); +int acl_search_groups(const char* path, char ***ret_groups); +int parse_acl(const char *text, acl_t *acl_access, acl_t *acl_default, bool want_mask); +int acls_for_file(const char *path, acl_type_t type, acl_t new, acl_t *acl); +int add_acls_for_user(int fd, uid_t uid); + +/* acl_free takes multiple argument types. + * Multiple cleanup functions are necessary. */ +DEFINE_TRIVIAL_CLEANUP_FUNC(acl_t, acl_free); +#define acl_free_charp acl_free +DEFINE_TRIVIAL_CLEANUP_FUNC(char*, acl_free_charp); +#define acl_free_uid_tp acl_free +DEFINE_TRIVIAL_CLEANUP_FUNC(uid_t*, acl_free_uid_tp); +#define acl_free_gid_tp acl_free +DEFINE_TRIVIAL_CLEANUP_FUNC(gid_t*, acl_free_gid_tp); + +#endif diff --git a/src/shared/acpi-fpdt.c b/src/shared/acpi-fpdt.c new file mode 100644 index 0000000..d565ebd --- /dev/null +++ b/src/shared/acpi-fpdt.c @@ -0,0 +1,147 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fcntl.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> + +#include "acpi-fpdt.h" +#include "alloc-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "time-util.h" + +struct acpi_table_header { + char signature[4]; + uint32_t length; + uint8_t revision; + uint8_t checksum; + char oem_id[6]; + char oem_table_id[8]; + uint32_t oem_revision; + char asl_compiler_id[4]; + uint32_t asl_compiler_revision; +}; + +enum { + ACPI_FPDT_TYPE_BOOT = 0, + ACPI_FPDT_TYPE_S3PERF = 1, +}; + +struct acpi_fpdt_header { + uint16_t type; + uint8_t length; + uint8_t revision; + uint8_t reserved[4]; + uint64_t ptr; +}; + +struct acpi_fpdt_boot_header { + char signature[4]; + uint32_t length; +}; + +enum { + ACPI_FPDT_S3PERF_RESUME_REC = 0, + ACPI_FPDT_S3PERF_SUSPEND_REC = 1, + ACPI_FPDT_BOOT_REC = 2, +}; + +struct acpi_fpdt_boot { + uint16_t type; + uint8_t length; + uint8_t revision; + uint8_t reserved[4]; + uint64_t reset_end; + uint64_t load_start; + uint64_t startup_start; + uint64_t exit_services_entry; + uint64_t exit_services_exit; +}; + +int acpi_get_boot_usec(usec_t *loader_start, usec_t *loader_exit) { + _cleanup_free_ char *buf = NULL; + struct acpi_table_header *tbl; + size_t l = 0; + struct acpi_fpdt_header *rec; + int r; + uint64_t ptr = 0; + _cleanup_close_ int fd = -1; + struct acpi_fpdt_boot_header hbrec; + struct acpi_fpdt_boot brec; + + r = read_full_file("/sys/firmware/acpi/tables/FPDT", &buf, &l); + if (r < 0) + return r; + + if (l < sizeof(struct acpi_table_header) + sizeof(struct acpi_fpdt_header)) + return -EINVAL; + + tbl = (struct acpi_table_header *)buf; + if (l != tbl->length) + return -EINVAL; + + if (memcmp(tbl->signature, "FPDT", 4) != 0) + return -EINVAL; + + /* find Firmware Basic Boot Performance Pointer Record */ + for (rec = (struct acpi_fpdt_header *)(buf + sizeof(struct acpi_table_header)); + (char *)rec < buf + l; + rec = (struct acpi_fpdt_header *)((char *)rec + rec->length)) { + if (rec->length <= 0) + break; + if (rec->type != ACPI_FPDT_TYPE_BOOT) + continue; + if (rec->length != sizeof(struct acpi_fpdt_header)) + continue; + + ptr = rec->ptr; + break; + } + + if (ptr == 0) + return -ENODATA; + + /* read Firmware Basic Boot Performance Data Record */ + fd = open("/dev/mem", O_CLOEXEC|O_RDONLY); + if (fd < 0) + return -errno; + + l = pread(fd, &hbrec, sizeof(struct acpi_fpdt_boot_header), ptr); + if (l != sizeof(struct acpi_fpdt_boot_header)) + return -EINVAL; + + if (memcmp(hbrec.signature, "FBPT", 4) != 0) + return -EINVAL; + + if (hbrec.length < sizeof(struct acpi_fpdt_boot_header) + sizeof(struct acpi_fpdt_boot)) + return -EINVAL; + + l = pread(fd, &brec, sizeof(struct acpi_fpdt_boot), ptr + sizeof(struct acpi_fpdt_boot_header)); + if (l != sizeof(struct acpi_fpdt_boot)) + return -EINVAL; + + if (brec.length != sizeof(struct acpi_fpdt_boot)) + return -EINVAL; + + if (brec.type != ACPI_FPDT_BOOT_REC) + return -EINVAL; + + if (brec.exit_services_exit == 0) + /* Non-UEFI compatible boot. */ + return -ENODATA; + + if (brec.startup_start == 0 || brec.exit_services_exit < brec.startup_start) + return -EINVAL; + if (brec.exit_services_exit > NSEC_PER_HOUR) + return -EINVAL; + + if (loader_start) + *loader_start = brec.startup_start / 1000; + if (loader_exit) + *loader_exit = brec.exit_services_exit / 1000; + + return 0; +} diff --git a/src/shared/acpi-fpdt.h b/src/shared/acpi-fpdt.h new file mode 100644 index 0000000..8d28893 --- /dev/null +++ b/src/shared/acpi-fpdt.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <time-util.h> + +int acpi_get_boot_usec(usec_t *loader_start, usec_t *loader_exit); diff --git a/src/shared/apparmor-util.c b/src/shared/apparmor-util.c new file mode 100644 index 0000000..c4a4b04 --- /dev/null +++ b/src/shared/apparmor-util.c @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <stddef.h> + +#include "alloc-util.h" +#include "apparmor-util.h" +#include "fileio.h" +#include "parse-util.h" + +bool mac_apparmor_use(void) { + static int cached_use = -1; + + if (cached_use < 0) { + _cleanup_free_ char *p = NULL; + + cached_use = + read_one_line_file("/sys/module/apparmor/parameters/enabled", &p) >= 0 && + parse_boolean(p) > 0; + } + + return cached_use; +} diff --git a/src/shared/apparmor-util.h b/src/shared/apparmor-util.h new file mode 100644 index 0000000..7fbaf90 --- /dev/null +++ b/src/shared/apparmor-util.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +bool mac_apparmor_use(void); diff --git a/src/shared/ask-password-api.c b/src/shared/ask-password-api.c new file mode 100644 index 0000000..072bf72 --- /dev/null +++ b/src/shared/ask-password-api.c @@ -0,0 +1,818 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fcntl.h> +#include <inttypes.h> +#include <limits.h> +#include <poll.h> +#include <signal.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/inotify.h> +#include <sys/signalfd.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/uio.h> +#include <sys/un.h> +#include <termios.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "ask-password-api.h" +#include "fd-util.h" +#include "fileio.h" +#include "format-util.h" +#include "fs-util.h" +#include "io-util.h" +#include "log.h" +#include "macro.h" +#include "missing.h" +#include "mkdir.h" +#include "process-util.h" +#include "random-util.h" +#include "signal-util.h" +#include "socket-util.h" +#include "string-util.h" +#include "strv.h" +#include "terminal-util.h" +#include "time-util.h" +#include "tmpfile-util.h" +#include "umask-util.h" +#include "utf8.h" +#include "util.h" + +#define KEYRING_TIMEOUT_USEC ((5 * USEC_PER_MINUTE) / 2) + +static int lookup_key(const char *keyname, key_serial_t *ret) { + key_serial_t serial; + + assert(keyname); + assert(ret); + + serial = request_key("user", keyname, NULL, 0); + if (serial == -1) + return negative_errno(); + + *ret = serial; + return 0; +} + +static int retrieve_key(key_serial_t serial, char ***ret) { + _cleanup_free_ char *p = NULL; + long m = 100, n; + char **l; + + assert(ret); + + for (;;) { + p = new(char, m); + if (!p) + return -ENOMEM; + + n = keyctl(KEYCTL_READ, (unsigned long) serial, (unsigned long) p, (unsigned long) m, 0); + if (n < 0) + return -errno; + + if (n < m) + break; + + explicit_bzero_safe(p, n); + free(p); + m *= 2; + } + + l = strv_parse_nulstr(p, n); + if (!l) + return -ENOMEM; + + explicit_bzero_safe(p, n); + + *ret = l; + return 0; +} + +static int add_to_keyring(const char *keyname, AskPasswordFlags flags, char **passwords) { + _cleanup_strv_free_erase_ char **l = NULL; + _cleanup_free_ char *p = NULL; + key_serial_t serial; + size_t n; + int r; + + assert(keyname); + assert(passwords); + + if (!(flags & ASK_PASSWORD_PUSH_CACHE)) + return 0; + + r = lookup_key(keyname, &serial); + if (r >= 0) { + r = retrieve_key(serial, &l); + if (r < 0) + return r; + } else if (r != -ENOKEY) + return r; + + r = strv_extend_strv(&l, passwords, true); + if (r <= 0) + return r; + + r = strv_make_nulstr(l, &p, &n); + if (r < 0) + return r; + + serial = add_key("user", keyname, p, n, KEY_SPEC_USER_KEYRING); + explicit_bzero_safe(p, n); + if (serial == -1) + return -errno; + + if (keyctl(KEYCTL_SET_TIMEOUT, + (unsigned long) serial, + (unsigned long) DIV_ROUND_UP(KEYRING_TIMEOUT_USEC, USEC_PER_SEC), 0, 0) < 0) + log_debug_errno(errno, "Failed to adjust timeout: %m"); + + /* Tell everyone to check the keyring */ + (void) touch("/run/systemd/ask-password"); + + log_debug("Added key to keyring as %" PRIi32 ".", serial); + + return 1; +} + +static int add_to_keyring_and_log(const char *keyname, AskPasswordFlags flags, char **passwords) { + int r; + + assert(keyname); + assert(passwords); + + r = add_to_keyring(keyname, flags, passwords); + if (r < 0) + return log_debug_errno(r, "Failed to add password to keyring: %m"); + + return 0; +} + +static int ask_password_keyring(const char *keyname, AskPasswordFlags flags, char ***ret) { + + key_serial_t serial; + int r; + + assert(keyname); + assert(ret); + + if (!(flags & ASK_PASSWORD_ACCEPT_CACHED)) + return -EUNATCH; + + r = lookup_key(keyname, &serial); + if (r == -ENOSYS) /* when retrieving the distinction doesn't matter */ + return -ENOKEY; + if (r < 0) + return r; + + return retrieve_key(serial, ret); +} + +static void backspace_chars(int ttyfd, size_t p) { + + if (ttyfd < 0) + return; + + while (p > 0) { + p--; + + loop_write(ttyfd, "\b \b", 3, false); + } +} + +static void backspace_string(int ttyfd, const char *str) { + size_t m; + + assert(str); + + if (ttyfd < 0) + return; + + /* Backspaces through enough characters to entirely undo printing of the specified string. */ + + m = utf8_n_codepoints(str); + if (m == (size_t) -1) + m = strlen(str); /* Not a valid UTF-8 string? If so, let's backspace the number of bytes output. Most + * likely this happened because we are not in an UTF-8 locale, and in that case that + * is the correct thing to do. And even if it's not, terminals tend to stop + * backspacing at the leftmost column, hence backspacing too much should be mostly + * OK. */ + + backspace_chars(ttyfd, m); +} + +int ask_password_tty( + int ttyfd, + const char *message, + const char *keyname, + usec_t until, + AskPasswordFlags flags, + const char *flag_file, + char ***ret) { + + enum { + POLL_TTY, + POLL_INOTIFY, + _POLL_MAX, + }; + + bool reset_tty = false, dirty = false, use_color = false; + _cleanup_close_ int cttyfd = -1, notify = -1; + struct termios old_termios, new_termios; + char passphrase[LINE_MAX + 1] = {}, *x; + _cleanup_strv_free_erase_ char **l = NULL; + struct pollfd pollfd[_POLL_MAX]; + size_t p = 0, codepoint = 0; + int r; + + assert(ret); + + if (flags & ASK_PASSWORD_NO_TTY) + return -EUNATCH; + + if (!message) + message = "Password:"; + + if (flag_file || ((flags & ASK_PASSWORD_ACCEPT_CACHED) && keyname)) { + notify = inotify_init1(IN_CLOEXEC|IN_NONBLOCK); + if (notify < 0) + return -errno; + } + if (flag_file) { + if (inotify_add_watch(notify, flag_file, IN_ATTRIB /* for the link count */) < 0) + return -errno; + } + if ((flags & ASK_PASSWORD_ACCEPT_CACHED) && keyname) { + r = ask_password_keyring(keyname, flags, ret); + if (r >= 0) + return 0; + else if (r != -ENOKEY) + return r; + + if (inotify_add_watch(notify, "/run/systemd/ask-password", IN_ATTRIB /* for mtime */) < 0) + return -errno; + } + + /* If the caller didn't specify a TTY, then use the controlling tty, if we can. */ + if (ttyfd < 0) + ttyfd = cttyfd = open("/dev/tty", O_RDWR|O_NOCTTY|O_CLOEXEC); + + if (ttyfd >= 0) { + if (tcgetattr(ttyfd, &old_termios) < 0) + return -errno; + + if (flags & ASK_PASSWORD_CONSOLE_COLOR) + use_color = dev_console_colors_enabled(); + else + use_color = colors_enabled(); + + if (use_color) + (void) loop_write(ttyfd, ANSI_HIGHLIGHT, STRLEN(ANSI_HIGHLIGHT), false); + + (void) loop_write(ttyfd, message, strlen(message), false); + (void) loop_write(ttyfd, " ", 1, false); + + if (use_color) + (void) loop_write(ttyfd, ANSI_NORMAL, STRLEN(ANSI_NORMAL), false); + + new_termios = old_termios; + new_termios.c_lflag &= ~(ICANON|ECHO); + new_termios.c_cc[VMIN] = 1; + new_termios.c_cc[VTIME] = 0; + + if (tcsetattr(ttyfd, TCSADRAIN, &new_termios) < 0) { + r = -errno; + goto finish; + } + + reset_tty = true; + } + + pollfd[POLL_TTY] = (struct pollfd) { + .fd = ttyfd >= 0 ? ttyfd : STDIN_FILENO, + .events = POLLIN, + }; + pollfd[POLL_INOTIFY] = (struct pollfd) { + .fd = notify, + .events = POLLIN, + }; + + for (;;) { + int sleep_for = -1, k; + ssize_t n; + char c; + + if (until > 0) { + usec_t y; + + y = now(CLOCK_MONOTONIC); + + if (y > until) { + r = -ETIME; + goto finish; + } + + sleep_for = (int) DIV_ROUND_UP(until - y, USEC_PER_MSEC); + } + + if (flag_file) + if (access(flag_file, F_OK) < 0) { + r = -errno; + goto finish; + } + + k = poll(pollfd, notify >= 0 ? 2 : 1, sleep_for); + if (k < 0) { + if (errno == EINTR) + continue; + + r = -errno; + goto finish; + } else if (k == 0) { + r = -ETIME; + goto finish; + } + + if (notify >= 0 && pollfd[POLL_INOTIFY].revents != 0 && keyname) { + (void) flush_fd(notify); + + r = ask_password_keyring(keyname, flags, ret); + if (r >= 0) { + r = 0; + goto finish; + } else if (r != -ENOKEY) + goto finish; + } + + if (pollfd[POLL_TTY].revents == 0) + continue; + + n = read(ttyfd >= 0 ? ttyfd : STDIN_FILENO, &c, 1); + if (n < 0) { + if (IN_SET(errno, EINTR, EAGAIN)) + continue; + + r = -errno; + goto finish; + + } + + /* We treat EOF, newline and NUL byte all as valid end markers */ + if (n == 0 || c == '\n' || c == 0) + break; + + if (c == 21) { /* C-u */ + + if (!(flags & ASK_PASSWORD_SILENT)) + backspace_string(ttyfd, passphrase); + + explicit_bzero_safe(passphrase, sizeof(passphrase)); + p = codepoint = 0; + + } else if (IN_SET(c, '\b', 127)) { + + if (p > 0) { + size_t q; + + if (!(flags & ASK_PASSWORD_SILENT)) + backspace_chars(ttyfd, 1); + + /* Remove a full UTF-8 codepoint from the end. For that, figure out where the last one + * begins */ + q = 0; + for (;;) { + size_t z; + + z = utf8_encoded_valid_unichar(passphrase + q); + if (z == 0) { + q = (size_t) -1; /* Invalid UTF8! */ + break; + } + + if (q + z >= p) /* This one brings us over the edge */ + break; + + q += z; + } + + p = codepoint = q == (size_t) -1 ? p - 1 : q; + explicit_bzero_safe(passphrase + p, sizeof(passphrase) - p); + + } else if (!dirty && !(flags & ASK_PASSWORD_SILENT)) { + + flags |= ASK_PASSWORD_SILENT; + + /* There are two ways to enter silent mode. Either by pressing backspace as first key + * (and only as first key), or ... */ + + if (ttyfd >= 0) + (void) loop_write(ttyfd, "(no echo) ", 10, false); + + } else if (ttyfd >= 0) + (void) loop_write(ttyfd, "\a", 1, false); + + } else if (c == '\t' && !(flags & ASK_PASSWORD_SILENT)) { + + backspace_string(ttyfd, passphrase); + flags |= ASK_PASSWORD_SILENT; + + /* ... or by pressing TAB at any time. */ + + if (ttyfd >= 0) + (void) loop_write(ttyfd, "(no echo) ", 10, false); + + } else if (p >= sizeof(passphrase)-1) { + + /* Reached the size limit */ + if (ttyfd >= 0) + (void) loop_write(ttyfd, "\a", 1, false); + + } else { + passphrase[p++] = c; + + if (!(flags & ASK_PASSWORD_SILENT) && ttyfd >= 0) { + /* Check if we got a complete UTF-8 character now. If so, let's output one '*'. */ + n = utf8_encoded_valid_unichar(passphrase + codepoint); + if (n >= 0) { + codepoint = p; + (void) loop_write(ttyfd, (flags & ASK_PASSWORD_ECHO) ? &c : "*", 1, false); + } + } + + dirty = true; + } + + /* Let's forget this char, just to not keep needlessly copies of key material around */ + c = 'x'; + } + + x = strndup(passphrase, p); + explicit_bzero_safe(passphrase, sizeof(passphrase)); + if (!x) { + r = -ENOMEM; + goto finish; + } + + r = strv_consume(&l, x); + if (r < 0) + goto finish; + + if (keyname) + (void) add_to_keyring_and_log(keyname, flags, l); + + *ret = TAKE_PTR(l); + r = 0; + +finish: + if (ttyfd >= 0 && reset_tty) { + (void) loop_write(ttyfd, "\n", 1, false); + (void) tcsetattr(ttyfd, TCSADRAIN, &old_termios); + } + + return r; +} + +static int create_socket(char **ret) { + _cleanup_free_ char *path = NULL; + union sockaddr_union sa = {}; + _cleanup_close_ int fd = -1; + int salen, r; + + assert(ret); + + fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0); + if (fd < 0) + return -errno; + + if (asprintf(&path, "/run/systemd/ask-password/sck.%" PRIx64, random_u64()) < 0) + return -ENOMEM; + + salen = sockaddr_un_set_path(&sa.un, path); + if (salen < 0) + return salen; + + RUN_WITH_UMASK(0177) { + if (bind(fd, &sa.sa, salen) < 0) + return -errno; + } + + r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true); + if (r < 0) + return r; + + *ret = TAKE_PTR(path); + return TAKE_FD(fd); +} + +int ask_password_agent( + const char *message, + const char *icon, + const char *id, + const char *keyname, + usec_t until, + AskPasswordFlags flags, + char ***ret) { + + enum { + FD_SOCKET, + FD_SIGNAL, + FD_INOTIFY, + _FD_MAX + }; + + _cleanup_close_ int socket_fd = -1, signal_fd = -1, notify = -1, fd = -1; + char temp[] = "/run/systemd/ask-password/tmp.XXXXXX"; + char final[sizeof(temp)] = ""; + _cleanup_free_ char *socket_name = NULL; + _cleanup_strv_free_erase_ char **l = NULL; + _cleanup_fclose_ FILE *f = NULL; + struct pollfd pollfd[_FD_MAX]; + sigset_t mask, oldmask; + int r; + + assert(ret); + + if (flags & ASK_PASSWORD_NO_AGENT) + return -EUNATCH; + + assert_se(sigemptyset(&mask) >= 0); + assert_se(sigset_add_many(&mask, SIGINT, SIGTERM, -1) >= 0); + assert_se(sigprocmask(SIG_BLOCK, &mask, &oldmask) >= 0); + + (void) mkdir_p_label("/run/systemd/ask-password", 0755); + + if ((flags & ASK_PASSWORD_ACCEPT_CACHED) && keyname) { + r = ask_password_keyring(keyname, flags, ret); + if (r >= 0) { + r = 0; + goto finish; + } else if (r != -ENOKEY) + goto finish; + + notify = inotify_init1(IN_CLOEXEC | IN_NONBLOCK); + if (notify < 0) { + r = -errno; + goto finish; + } + if (inotify_add_watch(notify, "/run/systemd/ask-password", IN_ATTRIB /* for mtime */) < 0) { + r = -errno; + goto finish; + } + } + + fd = mkostemp_safe(temp); + if (fd < 0) { + r = fd; + goto finish; + } + + (void) fchmod(fd, 0644); + + f = fdopen(fd, "w"); + if (!f) { + r = -errno; + goto finish; + } + + fd = -1; + + signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC); + if (signal_fd < 0) { + r = -errno; + goto finish; + } + + socket_fd = create_socket(&socket_name); + if (socket_fd < 0) { + r = socket_fd; + goto finish; + } + + fprintf(f, + "[Ask]\n" + "PID="PID_FMT"\n" + "Socket=%s\n" + "AcceptCached=%i\n" + "Echo=%i\n" + "NotAfter="USEC_FMT"\n", + getpid_cached(), + socket_name, + (flags & ASK_PASSWORD_ACCEPT_CACHED) ? 1 : 0, + (flags & ASK_PASSWORD_ECHO) ? 1 : 0, + until); + + if (message) + fprintf(f, "Message=%s\n", message); + + if (icon) + fprintf(f, "Icon=%s\n", icon); + + if (id) + fprintf(f, "Id=%s\n", id); + + r = fflush_and_check(f); + if (r < 0) + goto finish; + + memcpy(final, temp, sizeof(temp)); + + final[sizeof(final)-11] = 'a'; + final[sizeof(final)-10] = 's'; + final[sizeof(final)-9] = 'k'; + + if (rename(temp, final) < 0) { + r = -errno; + goto finish; + } + + zero(pollfd); + pollfd[FD_SOCKET].fd = socket_fd; + pollfd[FD_SOCKET].events = POLLIN; + pollfd[FD_SIGNAL].fd = signal_fd; + pollfd[FD_SIGNAL].events = POLLIN; + pollfd[FD_INOTIFY].fd = notify; + pollfd[FD_INOTIFY].events = POLLIN; + + for (;;) { + char passphrase[LINE_MAX+1]; + struct msghdr msghdr; + struct iovec iovec; + struct ucred *ucred; + union { + struct cmsghdr cmsghdr; + uint8_t buf[CMSG_SPACE(sizeof(struct ucred))]; + } control; + ssize_t n; + int k; + usec_t t; + + t = now(CLOCK_MONOTONIC); + + if (until > 0 && until <= t) { + r = -ETIME; + goto finish; + } + + k = poll(pollfd, notify >= 0 ? _FD_MAX : _FD_MAX - 1, until > 0 ? (int) ((until-t)/USEC_PER_MSEC) : -1); + if (k < 0) { + if (errno == EINTR) + continue; + + r = -errno; + goto finish; + } + + if (k <= 0) { + r = -ETIME; + goto finish; + } + + if (pollfd[FD_SIGNAL].revents & POLLIN) { + r = -EINTR; + goto finish; + } + + if (notify >= 0 && pollfd[FD_INOTIFY].revents != 0) { + (void) flush_fd(notify); + + r = ask_password_keyring(keyname, flags, ret); + if (r >= 0) { + r = 0; + goto finish; + } else if (r != -ENOKEY) + goto finish; + } + + if (pollfd[FD_SOCKET].revents == 0) + continue; + + if (pollfd[FD_SOCKET].revents != POLLIN) { + r = -EIO; + goto finish; + } + + iovec = IOVEC_MAKE(passphrase, sizeof(passphrase)); + + zero(control); + zero(msghdr); + msghdr.msg_iov = &iovec; + msghdr.msg_iovlen = 1; + msghdr.msg_control = &control; + msghdr.msg_controllen = sizeof(control); + + n = recvmsg(socket_fd, &msghdr, 0); + if (n < 0) { + if (IN_SET(errno, EAGAIN, EINTR)) + continue; + + r = -errno; + goto finish; + } + + cmsg_close_all(&msghdr); + + if (n <= 0) { + log_debug("Message too short"); + continue; + } + + if (msghdr.msg_controllen < CMSG_LEN(sizeof(struct ucred)) || + control.cmsghdr.cmsg_level != SOL_SOCKET || + control.cmsghdr.cmsg_type != SCM_CREDENTIALS || + control.cmsghdr.cmsg_len != CMSG_LEN(sizeof(struct ucred))) { + log_debug("Received message without credentials. Ignoring."); + continue; + } + + ucred = (struct ucred*) CMSG_DATA(&control.cmsghdr); + if (ucred->uid != 0) { + log_debug("Got request from unprivileged user. Ignoring."); + continue; + } + + if (passphrase[0] == '+') { + /* An empty message refers to the empty password */ + if (n == 1) + l = strv_new(""); + else + l = strv_parse_nulstr(passphrase+1, n-1); + explicit_bzero_safe(passphrase, n); + if (!l) { + r = -ENOMEM; + goto finish; + } + + if (strv_isempty(l)) { + l = strv_free(l); + log_debug("Invalid packet"); + continue; + } + + break; + } + + if (passphrase[0] == '-') { + r = -ECANCELED; + goto finish; + } + + log_debug("Invalid packet"); + } + + if (keyname) + (void) add_to_keyring_and_log(keyname, flags, l); + + *ret = TAKE_PTR(l); + r = 0; + +finish: + if (socket_name) + (void) unlink(socket_name); + + (void) unlink(temp); + + if (final[0]) + (void) unlink(final); + + assert_se(sigprocmask(SIG_SETMASK, &oldmask, NULL) == 0); + return r; +} + +int ask_password_auto( + const char *message, + const char *icon, + const char *id, + const char *keyname, + usec_t until, + AskPasswordFlags flags, + char ***ret) { + + int r; + + assert(ret); + + if ((flags & ASK_PASSWORD_ACCEPT_CACHED) && + keyname && + ((flags & ASK_PASSWORD_NO_TTY) || !isatty(STDIN_FILENO)) && + (flags & ASK_PASSWORD_NO_AGENT)) { + r = ask_password_keyring(keyname, flags, ret); + if (r != -ENOKEY) + return r; + } + + if (!(flags & ASK_PASSWORD_NO_TTY) && isatty(STDIN_FILENO)) + return ask_password_tty(-1, message, keyname, until, flags, NULL, ret); + + if (!(flags & ASK_PASSWORD_NO_AGENT)) + return ask_password_agent(message, icon, id, keyname, until, flags, ret); + + return -EUNATCH; +} diff --git a/src/shared/ask-password-api.h b/src/shared/ask-password-api.h new file mode 100644 index 0000000..15762b9 --- /dev/null +++ b/src/shared/ask-password-api.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +#include "time-util.h" + +typedef enum AskPasswordFlags { + ASK_PASSWORD_ACCEPT_CACHED = 1 << 0, + ASK_PASSWORD_PUSH_CACHE = 1 << 1, + ASK_PASSWORD_ECHO = 1 << 2, /* show the password literally while reading, instead of "*" */ + ASK_PASSWORD_SILENT = 1 << 3, /* do no show any password at all while reading */ + ASK_PASSWORD_NO_TTY = 1 << 4, + ASK_PASSWORD_NO_AGENT = 1 << 5, + ASK_PASSWORD_CONSOLE_COLOR = 1 << 6, /* Use color if /dev/console points to a console that supports color */ +} AskPasswordFlags; + +int ask_password_tty(int tty_fd, const char *message, const char *keyname, usec_t until, AskPasswordFlags flags, const char *flag_file, char ***ret); +int ask_password_agent(const char *message, const char *icon, const char *id, const char *keyname, usec_t until, AskPasswordFlags flag, char ***ret); +int ask_password_auto(const char *message, const char *icon, const char *id, const char *keyname, usec_t until, AskPasswordFlags flag, char ***ret); diff --git a/src/shared/barrier.c b/src/shared/barrier.c new file mode 100644 index 0000000..bb5869d --- /dev/null +++ b/src/shared/barrier.c @@ -0,0 +1,394 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <sys/eventfd.h> +#include <sys/types.h> +#include <unistd.h> + +#include "barrier.h" +#include "fd-util.h" +#include "macro.h" + +/** + * Barriers + * This barrier implementation provides a simple synchronization method based + * on file-descriptors that can safely be used between threads and processes. A + * barrier object contains 2 shared counters based on eventfd. Both processes + * can now place barriers and wait for the other end to reach a random or + * specific barrier. + * Barriers are numbered, so you can either wait for the other end to reach any + * barrier or the last barrier that you placed. This way, you can use barriers + * for one-way *and* full synchronization. Note that even-though barriers are + * numbered, these numbers are internal and recycled once both sides reached the + * same barrier (implemented as a simple signed counter). It is thus not + * possible to address barriers by their ID. + * + * Barrier-API: Both ends can place as many barriers via barrier_place() as + * they want and each pair of barriers on both sides will be implicitly linked. + * Each side can use the barrier_wait/sync_*() family of calls to wait for the + * other side to place a specific barrier. barrier_wait_next() waits until the + * other side calls barrier_place(). No links between the barriers are + * considered and this simply serves as most basic asynchronous barrier. + * barrier_sync_next() is like barrier_wait_next() and waits for the other side + * to place their next barrier via barrier_place(). However, it only waits for + * barriers that are linked to a barrier we already placed. If the other side + * already placed more barriers than we did, barrier_sync_next() returns + * immediately. + * barrier_sync() extends barrier_sync_next() and waits until the other end + * placed as many barriers via barrier_place() as we did. If they already placed + * as many as we did (or more), it returns immediately. + * + * Additionally to basic barriers, an abortion event is available. + * barrier_abort() places an abortion event that cannot be undone. An abortion + * immediately cancels all placed barriers and replaces them. Any running and + * following wait/sync call besides barrier_wait_abortion() will immediately + * return false on both sides (otherwise, they always return true). + * barrier_abort() can be called multiple times on both ends and will be a + * no-op if already called on this side. + * barrier_wait_abortion() can be used to wait for the other side to call + * barrier_abort() and is the only wait/sync call that does not return + * immediately if we aborted outself. It only returns once the other side + * called barrier_abort(). + * + * Barriers can be used for in-process and inter-process synchronization. + * However, for in-process synchronization you could just use mutexes. + * Therefore, main target is IPC and we require both sides to *not* share the FD + * table. If that's given, barriers provide target tracking: If the remote side + * exit()s, an abortion event is implicitly queued on the other side. This way, + * a sync/wait call will be woken up if the remote side crashed or exited + * unexpectedly. However, note that these abortion events are only queued if the + * barrier-queue has been drained. Therefore, it is safe to place a barrier and + * exit. The other side can safely wait on the barrier even though the exit + * queued an abortion event. Usually, the abortion event would overwrite the + * barrier, however, that's not true for exit-abortion events. Those are only + * queued if the barrier-queue is drained (thus, the receiving side has placed + * more barriers than the remote side). + */ + +/** + * barrier_create() - Initialize a barrier object + * @obj: barrier to initialize + * + * This initializes a barrier object. The caller is responsible of allocating + * the memory and keeping it valid. The memory does not have to be zeroed + * beforehand. + * Two eventfd objects are allocated for each barrier. If allocation fails, an + * error is returned. + * + * If this function fails, the barrier is reset to an invalid state so it is + * safe to call barrier_destroy() on the object regardless whether the + * initialization succeeded or not. + * + * The caller is responsible to destroy the object via barrier_destroy() before + * releasing the underlying memory. + * + * Returns: 0 on success, negative error code on failure. + */ +int barrier_create(Barrier *b) { + _cleanup_(barrier_destroyp) Barrier *staging = b; + int r; + + assert(b); + + b->me = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); + if (b->me < 0) + return -errno; + + b->them = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); + if (b->them < 0) + return -errno; + + r = pipe2(b->pipe, O_CLOEXEC | O_NONBLOCK); + if (r < 0) + return -errno; + + staging = NULL; + return 0; +} + +/** + * barrier_destroy() - Destroy a barrier object + * @b: barrier to destroy or NULL + * + * This destroys a barrier object that has previously been passed to + * barrier_create(). The object is released and reset to invalid + * state. Therefore, it is safe to call barrier_destroy() multiple + * times or even if barrier_create() failed. However, barrier must be + * always initialized with BARRIER_NULL. + * + * If @b is NULL, this is a no-op. + */ +void barrier_destroy(Barrier *b) { + if (!b) + return; + + b->me = safe_close(b->me); + b->them = safe_close(b->them); + safe_close_pair(b->pipe); + b->barriers = 0; +} + +/** + * barrier_set_role() - Set the local role of the barrier + * @b: barrier to operate on + * @role: role to set on the barrier + * + * This sets the roles on a barrier object. This is needed to know + * which side of the barrier you're on. Usually, the parent creates + * the barrier via barrier_create() and then calls fork() or clone(). + * Therefore, the FDs are duplicated and the child retains the same + * barrier object. + * + * Both sides need to call barrier_set_role() after fork() or clone() + * are done. If this is not done, barriers will not work correctly. + * + * Note that barriers could be supported without fork() or clone(). However, + * this is currently not needed so it hasn't been implemented. + */ +void barrier_set_role(Barrier *b, unsigned role) { + assert(b); + assert(IN_SET(role, BARRIER_PARENT, BARRIER_CHILD)); + /* make sure this is only called once */ + assert(b->pipe[0] >= 0 && b->pipe[1] >= 0); + + if (role == BARRIER_PARENT) + b->pipe[1] = safe_close(b->pipe[1]); + else { + b->pipe[0] = safe_close(b->pipe[0]); + + /* swap me/them for children */ + SWAP_TWO(b->me, b->them); + } +} + +/* places barrier; returns false if we aborted, otherwise true */ +static bool barrier_write(Barrier *b, uint64_t buf) { + ssize_t len; + + /* prevent new sync-points if we already aborted */ + if (barrier_i_aborted(b)) + return false; + + assert(b->me >= 0); + do { + len = write(b->me, &buf, sizeof(buf)); + } while (len < 0 && IN_SET(errno, EAGAIN, EINTR)); + + if (len != sizeof(buf)) + goto error; + + /* lock if we aborted */ + if (buf >= (uint64_t)BARRIER_ABORTION) { + if (barrier_they_aborted(b)) + b->barriers = BARRIER_WE_ABORTED; + else + b->barriers = BARRIER_I_ABORTED; + } else if (!barrier_is_aborted(b)) + b->barriers += buf; + + return !barrier_i_aborted(b); + +error: + /* If there is an unexpected error, we have to make this fatal. There + * is no way we can recover from sync-errors. Therefore, we close the + * pipe-ends and treat this as abortion. The other end will notice the + * pipe-close and treat it as abortion, too. */ + + safe_close_pair(b->pipe); + b->barriers = BARRIER_WE_ABORTED; + return false; +} + +/* waits for barriers; returns false if they aborted, otherwise true */ +static bool barrier_read(Barrier *b, int64_t comp) { + if (barrier_they_aborted(b)) + return false; + + while (b->barriers > comp) { + struct pollfd pfd[2] = { + { .fd = b->pipe[0] >= 0 ? b->pipe[0] : b->pipe[1], + .events = POLLHUP }, + { .fd = b->them, + .events = POLLIN }}; + uint64_t buf; + int r; + + r = poll(pfd, 2, -1); + if (r < 0 && IN_SET(errno, EAGAIN, EINTR)) + continue; + else if (r < 0) + goto error; + + if (pfd[1].revents) { + ssize_t len; + + /* events on @them signal new data for us */ + len = read(b->them, &buf, sizeof(buf)); + if (len < 0 && IN_SET(errno, EAGAIN, EINTR)) + continue; + + if (len != sizeof(buf)) + goto error; + } else if (pfd[0].revents & (POLLHUP | POLLERR | POLLNVAL)) + /* POLLHUP on the pipe tells us the other side exited. + * We treat this as implicit abortion. But we only + * handle it if there's no event on the eventfd. This + * guarantees that exit-abortions do not overwrite real + * barriers. */ + buf = BARRIER_ABORTION; + else + continue; + + /* lock if they aborted */ + if (buf >= (uint64_t)BARRIER_ABORTION) { + if (barrier_i_aborted(b)) + b->barriers = BARRIER_WE_ABORTED; + else + b->barriers = BARRIER_THEY_ABORTED; + } else if (!barrier_is_aborted(b)) + b->barriers -= buf; + } + + return !barrier_they_aborted(b); + +error: + /* If there is an unexpected error, we have to make this fatal. There + * is no way we can recover from sync-errors. Therefore, we close the + * pipe-ends and treat this as abortion. The other end will notice the + * pipe-close and treat it as abortion, too. */ + + safe_close_pair(b->pipe); + b->barriers = BARRIER_WE_ABORTED; + return false; +} + +/** + * barrier_place() - Place a new barrier + * @b: barrier object + * + * This places a new barrier on the barrier object. If either side already + * aborted, this is a no-op and returns "false". Otherwise, the barrier is + * placed and this returns "true". + * + * Returns: true if barrier was placed, false if either side aborted. + */ +bool barrier_place(Barrier *b) { + assert(b); + + if (barrier_is_aborted(b)) + return false; + + barrier_write(b, BARRIER_SINGLE); + return true; +} + +/** + * barrier_abort() - Abort the synchronization + * @b: barrier object to abort + * + * This aborts the barrier-synchronization. If barrier_abort() was already + * called on this side, this is a no-op. Otherwise, the barrier is put into the + * ABORT-state and will stay there. The other side is notified about the + * abortion. Any following attempt to place normal barriers or to wait on normal + * barriers will return immediately as "false". + * + * You can wait for the other side to call barrier_abort(), too. Use + * barrier_wait_abortion() for that. + * + * Returns: false if the other side already aborted, true otherwise. + */ +bool barrier_abort(Barrier *b) { + assert(b); + + barrier_write(b, BARRIER_ABORTION); + return !barrier_they_aborted(b); +} + +/** + * barrier_wait_next() - Wait for the next barrier of the other side + * @b: barrier to operate on + * + * This waits until the other side places its next barrier. This is independent + * of any barrier-links and just waits for any next barrier of the other side. + * + * If either side aborted, this returns false. + * + * Returns: false if either side aborted, true otherwise. + */ +bool barrier_wait_next(Barrier *b) { + assert(b); + + if (barrier_is_aborted(b)) + return false; + + barrier_read(b, b->barriers - 1); + return !barrier_is_aborted(b); +} + +/** + * barrier_wait_abortion() - Wait for the other side to abort + * @b: barrier to operate on + * + * This waits until the other side called barrier_abort(). This can be called + * regardless whether the local side already called barrier_abort() or not. + * + * If the other side has already aborted, this returns immediately. + * + * Returns: false if the local side aborted, true otherwise. + */ +bool barrier_wait_abortion(Barrier *b) { + assert(b); + + barrier_read(b, BARRIER_THEY_ABORTED); + return !barrier_i_aborted(b); +} + +/** + * barrier_sync_next() - Wait for the other side to place a next linked barrier + * @b: barrier to operate on + * + * This is like barrier_wait_next() and waits for the other side to call + * barrier_place(). However, this only waits for linked barriers. That means, if + * the other side already placed more barriers than (or as much as) we did, this + * returns immediately instead of waiting. + * + * If either side aborted, this returns false. + * + * Returns: false if either side aborted, true otherwise. + */ +bool barrier_sync_next(Barrier *b) { + assert(b); + + if (barrier_is_aborted(b)) + return false; + + barrier_read(b, MAX((int64_t)0, b->barriers - 1)); + return !barrier_is_aborted(b); +} + +/** + * barrier_sync() - Wait for the other side to place as many barriers as we did + * @b: barrier to operate on + * + * This is like barrier_sync_next() but waits for the other side to call + * barrier_place() as often as we did (in total). If they already placed as much + * as we did (or more), this returns immediately instead of waiting. + * + * If either side aborted, this returns false. + * + * Returns: false if either side aborted, true otherwise. + */ +bool barrier_sync(Barrier *b) { + assert(b); + + if (barrier_is_aborted(b)) + return false; + + barrier_read(b, 0); + return !barrier_is_aborted(b); +} diff --git a/src/shared/barrier.h b/src/shared/barrier.h new file mode 100644 index 0000000..0eb3d27 --- /dev/null +++ b/src/shared/barrier.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <stdint.h> +#include <sys/types.h> + +#include "macro.h" + +/* See source file for an API description. */ + +typedef struct Barrier Barrier; + +enum { + BARRIER_SINGLE = 1LL, + BARRIER_ABORTION = INT64_MAX, + + /* bias values to store state; keep @WE < @THEY < @I */ + BARRIER_BIAS = INT64_MIN, + BARRIER_WE_ABORTED = BARRIER_BIAS + 1LL, + BARRIER_THEY_ABORTED = BARRIER_BIAS + 2LL, + BARRIER_I_ABORTED = BARRIER_BIAS + 3LL, +}; + +enum { + BARRIER_PARENT, + BARRIER_CHILD, +}; + +struct Barrier { + int me; + int them; + int pipe[2]; + int64_t barriers; +}; + +#define BARRIER_NULL {-1, -1, {-1, -1}, 0} + +int barrier_create(Barrier *obj); +void barrier_destroy(Barrier *b); + +DEFINE_TRIVIAL_CLEANUP_FUNC(Barrier*, barrier_destroy); + +void barrier_set_role(Barrier *b, unsigned role); + +bool barrier_place(Barrier *b); +bool barrier_abort(Barrier *b); + +bool barrier_wait_next(Barrier *b); +bool barrier_wait_abortion(Barrier *b); +bool barrier_sync_next(Barrier *b); +bool barrier_sync(Barrier *b); + +static inline bool barrier_i_aborted(Barrier *b) { + return IN_SET(b->barriers, BARRIER_I_ABORTED, BARRIER_WE_ABORTED); +} + +static inline bool barrier_they_aborted(Barrier *b) { + return IN_SET(b->barriers, BARRIER_THEY_ABORTED, BARRIER_WE_ABORTED); +} + +static inline bool barrier_we_aborted(Barrier *b) { + return b->barriers == BARRIER_WE_ABORTED; +} + +static inline bool barrier_is_aborted(Barrier *b) { + return IN_SET(b->barriers, + BARRIER_I_ABORTED, BARRIER_THEY_ABORTED, BARRIER_WE_ABORTED); +} + +static inline bool barrier_place_and_sync(Barrier *b) { + (void) barrier_place(b); + return barrier_sync(b); +} diff --git a/src/shared/base-filesystem.c b/src/shared/base-filesystem.c new file mode 100644 index 0000000..89d7a7d --- /dev/null +++ b/src/shared/base-filesystem.c @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fcntl.h> +#include <stdbool.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <syslog.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "base-filesystem.h" +#include "fd-util.h" +#include "log.h" +#include "macro.h" +#include "string-util.h" +#include "umask-util.h" +#include "user-util.h" +#include "util.h" + +typedef struct BaseFilesystem { + const char *dir; + mode_t mode; + const char *target; + const char *exists; + bool ignore_failure; +} BaseFilesystem; + +static const BaseFilesystem table[] = { + { "bin", 0, "usr/bin\0", NULL }, + { "lib", 0, "usr/lib\0", NULL }, + { "root", 0755, NULL, NULL, true }, + { "sbin", 0, "usr/sbin\0", NULL }, + { "usr", 0755, NULL, NULL }, + { "var", 0755, NULL, NULL }, + { "etc", 0755, NULL, NULL }, + { "proc", 0755, NULL, NULL, true }, + { "sys", 0755, NULL, NULL, true }, + { "dev", 0755, NULL, NULL, true }, +#if defined(__i386__) || defined(__x86_64__) + { "lib64", 0, "usr/lib/x86_64-linux-gnu\0" + "usr/lib64\0", "ld-linux-x86-64.so.2" }, +#endif +}; + +int base_filesystem_create(const char *root, uid_t uid, gid_t gid) { + _cleanup_close_ int fd = -1; + int r = 0; + size_t i; + + fd = open(root, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW); + if (fd < 0) + return log_error_errno(errno, "Failed to open root file system: %m"); + + for (i = 0; i < ELEMENTSOF(table); i ++) { + if (faccessat(fd, table[i].dir, F_OK, AT_SYMLINK_NOFOLLOW) >= 0) + continue; + + if (table[i].target) { + const char *target = NULL, *s; + + /* check if one of the targets exists */ + NULSTR_FOREACH(s, table[i].target) { + if (faccessat(fd, s, F_OK, AT_SYMLINK_NOFOLLOW) < 0) + continue; + + /* check if a specific file exists at the target path */ + if (table[i].exists) { + _cleanup_free_ char *p = NULL; + + p = strjoin(s, "/", table[i].exists); + if (!p) + return log_oom(); + + if (faccessat(fd, p, F_OK, AT_SYMLINK_NOFOLLOW) < 0) + continue; + } + + target = s; + break; + } + + if (!target) + continue; + + r = symlinkat(target, fd, table[i].dir); + if (r < 0 && errno != EEXIST) + return log_error_errno(errno, "Failed to create symlink at %s/%s: %m", root, table[i].dir); + + if (uid_is_valid(uid) || gid_is_valid(gid)) { + if (fchownat(fd, table[i].dir, uid, gid, AT_SYMLINK_NOFOLLOW) < 0) + return log_error_errno(errno, "Failed to chown symlink at %s/%s: %m", root, table[i].dir); + } + + continue; + } + + RUN_WITH_UMASK(0000) + r = mkdirat(fd, table[i].dir, table[i].mode); + if (r < 0 && errno != EEXIST) { + log_full_errno(table[i].ignore_failure ? LOG_DEBUG : LOG_ERR, errno, + "Failed to create directory at %s/%s: %m", root, table[i].dir); + + if (!table[i].ignore_failure) + return -errno; + + continue; + } + + if (uid != UID_INVALID || gid != UID_INVALID) { + if (fchownat(fd, table[i].dir, uid, gid, AT_SYMLINK_NOFOLLOW) < 0) + return log_error_errno(errno, "Failed to chown directory at %s/%s: %m", root, table[i].dir); + } + } + + return 0; +} diff --git a/src/shared/base-filesystem.h b/src/shared/base-filesystem.h new file mode 100644 index 0000000..39d9708 --- /dev/null +++ b/src/shared/base-filesystem.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <sys/types.h> + +int base_filesystem_create(const char *root, uid_t uid, gid_t gid); diff --git a/src/shared/bitmap.c b/src/shared/bitmap.c new file mode 100644 index 0000000..a4cd645 --- /dev/null +++ b/src/shared/bitmap.c @@ -0,0 +1,218 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "alloc-util.h" +#include "bitmap.h" +#include "hashmap.h" +#include "macro.h" + +struct Bitmap { + uint64_t *bitmaps; + size_t n_bitmaps; + size_t bitmaps_allocated; +}; + +/* Bitmaps are only meant to store relatively small numbers + * (corresponding to, say, an enum), so it is ok to limit + * the max entry. 64k should be plenty. */ +#define BITMAPS_MAX_ENTRY 0xffff + +/* This indicates that we reached the end of the bitmap */ +#define BITMAP_END ((unsigned) -1) + +#define BITMAP_NUM_TO_OFFSET(n) ((n) / (sizeof(uint64_t) * 8)) +#define BITMAP_NUM_TO_REM(n) ((n) % (sizeof(uint64_t) * 8)) +#define BITMAP_OFFSET_TO_NUM(offset, rem) ((offset) * sizeof(uint64_t) * 8 + (rem)) + +Bitmap *bitmap_new(void) { + return new0(Bitmap, 1); +} + +Bitmap *bitmap_copy(Bitmap *b) { + Bitmap *ret; + + ret = bitmap_new(); + if (!ret) + return NULL; + + ret->bitmaps = newdup(uint64_t, b->bitmaps, b->n_bitmaps); + if (!ret->bitmaps) + return mfree(ret); + + ret->n_bitmaps = ret->bitmaps_allocated = b->n_bitmaps; + return ret; +} + +void bitmap_free(Bitmap *b) { + if (!b) + return; + + free(b->bitmaps); + free(b); +} + +int bitmap_ensure_allocated(Bitmap **b) { + Bitmap *a; + + assert(b); + + if (*b) + return 0; + + a = bitmap_new(); + if (!a) + return -ENOMEM; + + *b = a; + + return 0; +} + +int bitmap_set(Bitmap *b, unsigned n) { + uint64_t bitmask; + unsigned offset; + + assert(b); + + /* we refuse to allocate huge bitmaps */ + if (n > BITMAPS_MAX_ENTRY) + return -ERANGE; + + offset = BITMAP_NUM_TO_OFFSET(n); + + if (offset >= b->n_bitmaps) { + if (!GREEDY_REALLOC0(b->bitmaps, b->bitmaps_allocated, offset + 1)) + return -ENOMEM; + + b->n_bitmaps = offset + 1; + } + + bitmask = UINT64_C(1) << BITMAP_NUM_TO_REM(n); + + b->bitmaps[offset] |= bitmask; + + return 0; +} + +void bitmap_unset(Bitmap *b, unsigned n) { + uint64_t bitmask; + unsigned offset; + + if (!b) + return; + + offset = BITMAP_NUM_TO_OFFSET(n); + + if (offset >= b->n_bitmaps) + return; + + bitmask = UINT64_C(1) << BITMAP_NUM_TO_REM(n); + + b->bitmaps[offset] &= ~bitmask; +} + +bool bitmap_isset(Bitmap *b, unsigned n) { + uint64_t bitmask; + unsigned offset; + + if (!b) + return false; + + offset = BITMAP_NUM_TO_OFFSET(n); + + if (offset >= b->n_bitmaps) + return false; + + bitmask = UINT64_C(1) << BITMAP_NUM_TO_REM(n); + + return !!(b->bitmaps[offset] & bitmask); +} + +bool bitmap_isclear(Bitmap *b) { + unsigned i; + + if (!b) + return true; + + for (i = 0; i < b->n_bitmaps; i++) + if (b->bitmaps[i] != 0) + return false; + + return true; +} + +void bitmap_clear(Bitmap *b) { + + if (!b) + return; + + b->bitmaps = mfree(b->bitmaps); + b->n_bitmaps = 0; + b->bitmaps_allocated = 0; +} + +bool bitmap_iterate(Bitmap *b, Iterator *i, unsigned *n) { + uint64_t bitmask; + unsigned offset, rem; + + assert(i); + assert(n); + + if (!b || i->idx == BITMAP_END) + return false; + + offset = BITMAP_NUM_TO_OFFSET(i->idx); + rem = BITMAP_NUM_TO_REM(i->idx); + bitmask = UINT64_C(1) << rem; + + for (; offset < b->n_bitmaps; offset ++) { + if (b->bitmaps[offset]) { + for (; bitmask; bitmask <<= 1, rem ++) { + if (b->bitmaps[offset] & bitmask) { + *n = BITMAP_OFFSET_TO_NUM(offset, rem); + i->idx = *n + 1; + + return true; + } + } + } + + rem = 0; + bitmask = 1; + } + + i->idx = BITMAP_END; + + return false; +} + +bool bitmap_equal(Bitmap *a, Bitmap *b) { + size_t common_n_bitmaps; + Bitmap *c; + unsigned i; + + if (a == b) + return true; + + if (!a != !b) + return false; + + if (!a) + return true; + + common_n_bitmaps = MIN(a->n_bitmaps, b->n_bitmaps); + if (memcmp_safe(a->bitmaps, b->bitmaps, sizeof(uint64_t) * common_n_bitmaps) != 0) + return false; + + c = a->n_bitmaps > b->n_bitmaps ? a : b; + for (i = common_n_bitmaps; i < c->n_bitmaps; i++) + if (c->bitmaps[i] != 0) + return false; + + return true; +} diff --git a/src/shared/bitmap.h b/src/shared/bitmap.h new file mode 100644 index 0000000..843d27d --- /dev/null +++ b/src/shared/bitmap.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +#include "hashmap.h" +#include "macro.h" + +typedef struct Bitmap Bitmap; + +Bitmap *bitmap_new(void); +Bitmap *bitmap_copy(Bitmap *b); +int bitmap_ensure_allocated(Bitmap **b); +void bitmap_free(Bitmap *b); + +int bitmap_set(Bitmap *b, unsigned n); +void bitmap_unset(Bitmap *b, unsigned n); +bool bitmap_isset(Bitmap *b, unsigned n); +bool bitmap_isclear(Bitmap *b); +void bitmap_clear(Bitmap *b); + +bool bitmap_iterate(Bitmap *b, Iterator *i, unsigned *n); + +bool bitmap_equal(Bitmap *a, Bitmap *b); + +#define BITMAP_FOREACH(n, b, i) \ + for ((i).idx = 0; bitmap_iterate((b), &(i), (unsigned*)&(n)); ) + +DEFINE_TRIVIAL_CLEANUP_FUNC(Bitmap*, bitmap_free); + +#define _cleanup_bitmap_free_ _cleanup_(bitmap_freep) diff --git a/src/shared/blkid-util.h b/src/shared/blkid-util.h new file mode 100644 index 0000000..eb07a88 --- /dev/null +++ b/src/shared/blkid-util.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#if HAVE_BLKID +# include <blkid.h> + +# include "macro.h" + +DEFINE_TRIVIAL_CLEANUP_FUNC(blkid_probe, blkid_free_probe); +#endif diff --git a/src/shared/boot-timestamps.c b/src/shared/boot-timestamps.c new file mode 100644 index 0000000..bcbb86d --- /dev/null +++ b/src/shared/boot-timestamps.c @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "acpi-fpdt.h" +#include "boot-timestamps.h" +#include "efivars.h" +#include "macro.h" +#include "time-util.h" + +int boot_timestamps(const dual_timestamp *n, dual_timestamp *firmware, dual_timestamp *loader) { + usec_t x = 0, y = 0, a; + int r; + dual_timestamp _n; + + assert(firmware); + assert(loader); + + if (!n) { + dual_timestamp_get(&_n); + n = &_n; + } + + r = acpi_get_boot_usec(&x, &y); + if (r < 0) { + r = efi_loader_get_boot_usec(&x, &y); + if (r < 0) + return r; + } + + /* Let's convert this to timestamps where the firmware + * began/loader began working. To make this more confusing: + * since usec_t is unsigned and the kernel's monotonic clock + * begins at kernel initialization we'll actually initialize + * the monotonic timestamps here as negative of the actual + * value. */ + + firmware->monotonic = y; + loader->monotonic = y - x; + + a = n->monotonic + firmware->monotonic; + firmware->realtime = n->realtime > a ? n->realtime - a : 0; + + a = n->monotonic + loader->monotonic; + loader->realtime = n->realtime > a ? n->realtime - a : 0; + + return 0; +} diff --git a/src/shared/boot-timestamps.h b/src/shared/boot-timestamps.h new file mode 100644 index 0000000..4e648f1 --- /dev/null +++ b/src/shared/boot-timestamps.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <time-util.h> + +int boot_timestamps(const dual_timestamp *n, dual_timestamp *firmware, dual_timestamp *loader); diff --git a/src/shared/bootspec.c b/src/shared/bootspec.c new file mode 100644 index 0000000..7e276f1 --- /dev/null +++ b/src/shared/bootspec.c @@ -0,0 +1,665 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <stdio.h> +#include <linux/magic.h> + +#include "sd-id128.h" + +#include "alloc-util.h" +#include "blkid-util.h" +#include "bootspec.h" +#include "conf-files.h" +#include "def.h" +#include "device-nodes.h" +#include "efivars.h" +#include "env-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "parse-util.h" +#include "path-util.h" +#include "stat-util.h" +#include "string-util.h" +#include "strv.h" +#include "virt.h" + +static void boot_entry_free(BootEntry *entry) { + assert(entry); + + free(entry->id); + free(entry->path); + free(entry->title); + free(entry->show_title); + free(entry->version); + free(entry->machine_id); + free(entry->architecture); + strv_free(entry->options); + free(entry->kernel); + free(entry->efi); + strv_free(entry->initrd); + free(entry->device_tree); +} + +static int boot_entry_load(const char *path, BootEntry *entry) { + _cleanup_(boot_entry_free) BootEntry tmp = {}; + _cleanup_fclose_ FILE *f = NULL; + unsigned line = 1; + char *b, *c; + int r; + + assert(path); + assert(entry); + + c = endswith_no_case(path, ".conf"); + if (!c) { + log_error("Invalid loader entry filename: %s", path); + return -EINVAL; + } + + b = basename(path); + tmp.id = strndup(b, c - b); + if (!tmp.id) + return log_oom(); + + tmp.path = strdup(path); + if (!tmp.path) + return log_oom(); + + f = fopen(path, "re"); + if (!f) + return log_error_errno(errno, "Failed to open \"%s\": %m", path); + + for (;;) { + _cleanup_free_ char *buf = NULL, *field = NULL; + const char *p; + + r = read_line(f, LONG_LINE_MAX, &buf); + if (r == 0) + break; + if (r == -ENOBUFS) + return log_error_errno(r, "%s:%u: Line too long", path, line); + if (r < 0) + return log_error_errno(r, "%s:%u: Error while reading: %m", path, line); + + line++; + + if (IN_SET(*strstrip(buf), '#', '\0')) + continue; + + p = buf; + r = extract_first_word(&p, &field, " \t", 0); + if (r < 0) { + log_error_errno(r, "Failed to parse config file %s line %u: %m", path, line); + continue; + } + if (r == 0) { + log_warning("%s:%u: Bad syntax", path, line); + continue; + } + + if (streq(field, "title")) + r = free_and_strdup(&tmp.title, p); + else if (streq(field, "version")) + r = free_and_strdup(&tmp.version, p); + else if (streq(field, "machine-id")) + r = free_and_strdup(&tmp.machine_id, p); + else if (streq(field, "architecture")) + r = free_and_strdup(&tmp.architecture, p); + else if (streq(field, "options")) + r = strv_extend(&tmp.options, p); + else if (streq(field, "linux")) + r = free_and_strdup(&tmp.kernel, p); + else if (streq(field, "efi")) + r = free_and_strdup(&tmp.efi, p); + else if (streq(field, "initrd")) + r = strv_extend(&tmp.initrd, p); + else if (streq(field, "devicetree")) + r = free_and_strdup(&tmp.device_tree, p); + else { + log_notice("%s:%u: Unknown line \"%s\"", path, line, field); + continue; + } + if (r < 0) + return log_error_errno(r, "%s:%u: Error while reading: %m", path, line); + } + + *entry = tmp; + tmp = (BootEntry) {}; + return 0; +} + +void boot_config_free(BootConfig *config) { + size_t i; + + assert(config); + + free(config->default_pattern); + free(config->timeout); + free(config->editor); + free(config->auto_entries); + free(config->auto_firmware); + + free(config->entry_oneshot); + free(config->entry_default); + + for (i = 0; i < config->n_entries; i++) + boot_entry_free(config->entries + i); + free(config->entries); +} + +static int boot_loader_read_conf(const char *path, BootConfig *config) { + _cleanup_fclose_ FILE *f = NULL; + unsigned line = 1; + int r; + + assert(path); + assert(config); + + f = fopen(path, "re"); + if (!f) { + if (errno == ENOENT) + return 0; + + return log_error_errno(errno, "Failed to open \"%s\": %m", path); + } + + for (;;) { + _cleanup_free_ char *buf = NULL, *field = NULL; + const char *p; + + r = read_line(f, LONG_LINE_MAX, &buf); + if (r == 0) + break; + if (r == -ENOBUFS) + return log_error_errno(r, "%s:%u: Line too long", path, line); + if (r < 0) + return log_error_errno(r, "%s:%u: Error while reading: %m", path, line); + + line++; + + if (IN_SET(*strstrip(buf), '#', '\0')) + continue; + + p = buf; + r = extract_first_word(&p, &field, " \t", 0); + if (r < 0) { + log_error_errno(r, "Failed to parse config file %s line %u: %m", path, line); + continue; + } + if (r == 0) { + log_warning("%s:%u: Bad syntax", path, line); + continue; + } + + if (streq(field, "default")) + r = free_and_strdup(&config->default_pattern, p); + else if (streq(field, "timeout")) + r = free_and_strdup(&config->timeout, p); + else if (streq(field, "editor")) + r = free_and_strdup(&config->editor, p); + else if (streq(field, "auto-entries")) + r = free_and_strdup(&config->auto_entries, p); + else if (streq(field, "auto-firmware")) + r = free_and_strdup(&config->auto_firmware, p); + else if (streq(field, "console-mode")) + r = free_and_strdup(&config->console_mode, p); + else { + log_notice("%s:%u: Unknown line \"%s\"", path, line, field); + continue; + } + if (r < 0) + return log_error_errno(r, "%s:%u: Error while reading: %m", path, line); + } + + return 1; +} + +static int boot_entry_compare(const BootEntry *a, const BootEntry *b) { + return str_verscmp(a->id, b->id); +} + +static int boot_entries_find(const char *dir, BootEntry **ret_entries, size_t *ret_n_entries) { + _cleanup_strv_free_ char **files = NULL; + char **f; + int r; + BootEntry *array = NULL; + size_t n_allocated = 0, n = 0; + + assert(dir); + assert(ret_entries); + assert(ret_n_entries); + + r = conf_files_list(&files, ".conf", NULL, 0, dir, NULL); + if (r < 0) + return log_error_errno(r, "Failed to list files in \"%s\": %m", dir); + + STRV_FOREACH(f, files) { + if (!GREEDY_REALLOC0(array, n_allocated, n + 1)) + return log_oom(); + + r = boot_entry_load(*f, array + n); + if (r < 0) + continue; + + n++; + } + + typesafe_qsort(array, n, boot_entry_compare); + + *ret_entries = array; + *ret_n_entries = n; + + return 0; +} + +static bool find_nonunique(BootEntry *entries, size_t n_entries, bool *arr) { + size_t i, j; + bool non_unique = false; + + assert(entries || n_entries == 0); + assert(arr || n_entries == 0); + + for (i = 0; i < n_entries; i++) + arr[i] = false; + + for (i = 0; i < n_entries; i++) + for (j = 0; j < n_entries; j++) + if (i != j && streq(boot_entry_title(entries + i), + boot_entry_title(entries + j))) + non_unique = arr[i] = arr[j] = true; + + return non_unique; +} + +static int boot_entries_uniquify(BootEntry *entries, size_t n_entries) { + char *s; + size_t i; + int r; + bool arr[n_entries]; + + assert(entries || n_entries == 0); + + /* Find _all_ non-unique titles */ + if (!find_nonunique(entries, n_entries, arr)) + return 0; + + /* Add version to non-unique titles */ + for (i = 0; i < n_entries; i++) + if (arr[i] && entries[i].version) { + r = asprintf(&s, "%s (%s)", boot_entry_title(entries + i), entries[i].version); + if (r < 0) + return -ENOMEM; + + free_and_replace(entries[i].show_title, s); + } + + if (!find_nonunique(entries, n_entries, arr)) + return 0; + + /* Add machine-id to non-unique titles */ + for (i = 0; i < n_entries; i++) + if (arr[i] && entries[i].machine_id) { + r = asprintf(&s, "%s (%s)", boot_entry_title(entries + i), entries[i].machine_id); + if (r < 0) + return -ENOMEM; + + free_and_replace(entries[i].show_title, s); + } + + if (!find_nonunique(entries, n_entries, arr)) + return 0; + + /* Add file name to non-unique titles */ + for (i = 0; i < n_entries; i++) + if (arr[i]) { + r = asprintf(&s, "%s (%s)", boot_entry_title(entries + i), entries[i].id); + if (r < 0) + return -ENOMEM; + + free_and_replace(entries[i].show_title, s); + } + + return 0; +} + +static int boot_entries_select_default(const BootConfig *config) { + int i; + + assert(config); + + if (config->entry_oneshot) + for (i = config->n_entries - 1; i >= 0; i--) + if (streq(config->entry_oneshot, config->entries[i].id)) { + log_debug("Found default: id \"%s\" is matched by LoaderEntryOneShot", + config->entries[i].id); + return i; + } + + if (config->entry_default) + for (i = config->n_entries - 1; i >= 0; i--) + if (streq(config->entry_default, config->entries[i].id)) { + log_debug("Found default: id \"%s\" is matched by LoaderEntryDefault", + config->entries[i].id); + return i; + } + + if (config->default_pattern) + for (i = config->n_entries - 1; i >= 0; i--) + if (fnmatch(config->default_pattern, config->entries[i].id, FNM_CASEFOLD) == 0) { + log_debug("Found default: id \"%s\" is matched by pattern \"%s\"", + config->entries[i].id, config->default_pattern); + return i; + } + + if (config->n_entries > 0) + log_debug("Found default: last entry \"%s\"", config->entries[config->n_entries - 1].id); + else + log_debug("Found no default boot entry :("); + + return config->n_entries - 1; /* -1 means "no default" */ +} + +int boot_entries_load_config(const char *esp_path, BootConfig *config) { + const char *p; + int r; + + assert(esp_path); + assert(config); + + p = strjoina(esp_path, "/loader/loader.conf"); + r = boot_loader_read_conf(p, config); + if (r < 0) + return r; + + p = strjoina(esp_path, "/loader/entries"); + r = boot_entries_find(p, &config->entries, &config->n_entries); + if (r < 0) + return r; + + r = boot_entries_uniquify(config->entries, config->n_entries); + if (r < 0) + return log_error_errno(r, "Failed to uniquify boot entries: %m"); + + if (is_efi_boot()) { + r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderEntryOneShot", &config->entry_oneshot); + if (r < 0 && r != -ENOENT) + return log_error_errno(r, "Failed to read EFI var \"LoaderEntryOneShot\": %m"); + + r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderEntryDefault", &config->entry_default); + if (r < 0 && r != -ENOENT) + return log_error_errno(r, "Failed to read EFI var \"LoaderEntryDefault\": %m"); + } + + config->default_entry = boot_entries_select_default(config); + return 0; +} + +/********************************************************************************/ + +static int verify_esp( + const char *p, + bool searching, + bool unprivileged_mode, + uint32_t *ret_part, + uint64_t *ret_pstart, + uint64_t *ret_psize, + sd_id128_t *ret_uuid) { +#if HAVE_BLKID + _cleanup_(blkid_free_probep) blkid_probe b = NULL; + _cleanup_free_ char *node = NULL; + const char *v; +#endif + uint64_t pstart = 0, psize = 0; + struct stat st, st2; + const char *t2; + struct statfs sfs; + sd_id128_t uuid = SD_ID128_NULL; + uint32_t part = 0; + bool relax_checks; + int r; + + assert(p); + + relax_checks = getenv_bool("SYSTEMD_RELAX_ESP_CHECKS") > 0; + + /* Non-root user can only check the status, so if an error occured in the following, it does not cause any + * issues. Let's also, silence the error messages. */ + + if (!relax_checks) { + if (statfs(p, &sfs) < 0) { + /* If we are searching for the mount point, don't generate a log message if we can't find the path */ + if (errno == ENOENT && searching) + return -ENOENT; + + return log_full_errno(unprivileged_mode && errno == EACCES ? LOG_DEBUG : LOG_ERR, errno, + "Failed to check file system type of \"%s\": %m", p); + } + + if (!F_TYPE_EQUAL(sfs.f_type, MSDOS_SUPER_MAGIC)) { + if (searching) + return -EADDRNOTAVAIL; + + log_error("File system \"%s\" is not a FAT EFI System Partition (ESP) file system.", p); + return -ENODEV; + } + } + + if (stat(p, &st) < 0) + return log_full_errno(unprivileged_mode && errno == EACCES ? LOG_DEBUG : LOG_ERR, errno, + "Failed to determine block device node of \"%s\": %m", p); + + if (major(st.st_dev) == 0) { + log_error("Block device node of %p is invalid.", p); + return -ENODEV; + } + + t2 = strjoina(p, "/.."); + r = stat(t2, &st2); + if (r < 0) + return log_full_errno(unprivileged_mode && errno == EACCES ? LOG_DEBUG : LOG_ERR, errno, + "Failed to determine block device node of parent of \"%s\": %m", p); + + if (st.st_dev == st2.st_dev) { + log_error("Directory \"%s\" is not the root of the EFI System Partition (ESP) file system.", p); + return -ENODEV; + } + + /* In a container we don't have access to block devices, skip this part of the verification, we trust the + * container manager set everything up correctly on its own. Also skip the following verification for non-root user. */ + if (detect_container() > 0 || unprivileged_mode || relax_checks) + goto finish; + +#if HAVE_BLKID + r = device_path_make_major_minor(S_IFBLK, st.st_dev, &node); + if (r < 0) + return log_error_errno(r, "Failed to format major/minor device path: %m"); + errno = 0; + b = blkid_new_probe_from_filename(node); + if (!b) + return log_error_errno(errno ?: ENOMEM, "Failed to open file system \"%s\": %m", p); + + blkid_probe_enable_superblocks(b, 1); + blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE); + blkid_probe_enable_partitions(b, 1); + blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS); + + errno = 0; + r = blkid_do_safeprobe(b); + if (r == -2) { + log_error("File system \"%s\" is ambiguous.", p); + return -ENODEV; + } else if (r == 1) { + log_error("File system \"%s\" does not contain a label.", p); + return -ENODEV; + } else if (r != 0) + return log_error_errno(errno ?: EIO, "Failed to probe file system \"%s\": %m", p); + + errno = 0; + r = blkid_probe_lookup_value(b, "TYPE", &v, NULL); + if (r != 0) + return log_error_errno(errno ?: EIO, "Failed to probe file system type \"%s\": %m", p); + if (!streq(v, "vfat")) { + log_error("File system \"%s\" is not FAT.", p); + return -ENODEV; + } + + errno = 0; + r = blkid_probe_lookup_value(b, "PART_ENTRY_SCHEME", &v, NULL); + if (r != 0) + return log_error_errno(errno ?: EIO, "Failed to probe partition scheme \"%s\": %m", p); + if (!streq(v, "gpt")) { + log_error("File system \"%s\" is not on a GPT partition table.", p); + return -ENODEV; + } + + errno = 0; + r = blkid_probe_lookup_value(b, "PART_ENTRY_TYPE", &v, NULL); + if (r != 0) + return log_error_errno(errno ?: EIO, "Failed to probe partition type UUID \"%s\": %m", p); + if (!streq(v, "c12a7328-f81f-11d2-ba4b-00a0c93ec93b")) { + log_error("File system \"%s\" has wrong type for an EFI System Partition (ESP).", p); + return -ENODEV; + } + + errno = 0; + r = blkid_probe_lookup_value(b, "PART_ENTRY_UUID", &v, NULL); + if (r != 0) + return log_error_errno(errno ?: EIO, "Failed to probe partition entry UUID \"%s\": %m", p); + r = sd_id128_from_string(v, &uuid); + if (r < 0) { + log_error("Partition \"%s\" has invalid UUID \"%s\".", p, v); + return -EIO; + } + + errno = 0; + r = blkid_probe_lookup_value(b, "PART_ENTRY_NUMBER", &v, NULL); + if (r != 0) + return log_error_errno(errno ?: EIO, "Failed to probe partition number \"%s\": m", p); + r = safe_atou32(v, &part); + if (r < 0) + return log_error_errno(r, "Failed to parse PART_ENTRY_NUMBER field."); + + errno = 0; + r = blkid_probe_lookup_value(b, "PART_ENTRY_OFFSET", &v, NULL); + if (r != 0) + return log_error_errno(errno ?: EIO, "Failed to probe partition offset \"%s\": %m", p); + r = safe_atou64(v, &pstart); + if (r < 0) + return log_error_errno(r, "Failed to parse PART_ENTRY_OFFSET field."); + + errno = 0; + r = blkid_probe_lookup_value(b, "PART_ENTRY_SIZE", &v, NULL); + if (r != 0) + return log_error_errno(errno ?: EIO, "Failed to probe partition size \"%s\": %m", p); + r = safe_atou64(v, &psize); + if (r < 0) + return log_error_errno(r, "Failed to parse PART_ENTRY_SIZE field."); +#endif + +finish: + if (ret_part) + *ret_part = part; + if (ret_pstart) + *ret_pstart = pstart; + if (ret_psize) + *ret_psize = psize; + if (ret_uuid) + *ret_uuid = uuid; + + return 0; +} + +int find_esp_and_warn( + const char *path, + bool unprivileged_mode, + char **ret_path, + uint32_t *ret_part, + uint64_t *ret_pstart, + uint64_t *ret_psize, + sd_id128_t *ret_uuid) { + + int r; + + /* This logs about all errors except: + * + * -ENOKEY → when we can't find the partition + * -EACCESS → when unprivileged_mode is true, and we can't access something + */ + + if (path) { + r = verify_esp(path, false, unprivileged_mode, ret_part, ret_pstart, ret_psize, ret_uuid); + if (r < 0) + return r; + + goto found; + } + + path = getenv("SYSTEMD_ESP_PATH"); + if (path) { + if (!path_is_valid(path) || !path_is_absolute(path)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "$SYSTEMD_ESP_PATH does not refer to absolute path, refusing to use it: %s", + path); + + /* Note: when the user explicitly configured things with an env var we won't validate the mount + * point. After all we want this to be useful for testing. */ + goto found; + } + + FOREACH_STRING(path, "/efi", "/boot", "/boot/efi") { + + r = verify_esp(path, true, unprivileged_mode, ret_part, ret_pstart, ret_psize, ret_uuid); + if (r >= 0) + goto found; + if (!IN_SET(r, -ENOENT, -EADDRNOTAVAIL)) /* This one is not it */ + return r; + } + + /* No logging here */ + return -ENOKEY; + +found: + if (ret_path) { + char *c; + + c = strdup(path); + if (!c) + return log_oom(); + + *ret_path = c; + } + + return 0; +} + +int find_default_boot_entry( + const char *esp_path, + char **esp_where, + BootConfig *config, + const BootEntry **e) { + + _cleanup_free_ char *where = NULL; + int r; + + assert(config); + assert(e); + + r = find_esp_and_warn(esp_path, false, &where, NULL, NULL, NULL, NULL); + if (r < 0) + return r; + + r = boot_entries_load_config(where, config); + if (r < 0) + return log_error_errno(r, "Failed to load bootspec config from \"%s/loader\": %m", where); + + if (config->default_entry < 0) + return log_error_errno(SYNTHETIC_ERRNO(ENOENT), + "No entry suitable as default, refusing to guess."); + + *e = &config->entries[config->default_entry]; + log_debug("Found default boot entry in file \"%s\"", (*e)->path); + + if (esp_where) + *esp_where = TAKE_PTR(where); + + return 0; +} diff --git a/src/shared/bootspec.h b/src/shared/bootspec.h new file mode 100644 index 0000000..ed57621 --- /dev/null +++ b/src/shared/bootspec.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#pragma once + +#include <inttypes.h> +#include <stdbool.h> +#include <sys/types.h> + +#include "sd-id128.h" + +typedef struct BootEntry { + char *id; /* This is the file basename without extension */ + char *path; /* This is the full path to the file */ + char *title; + char *show_title; + char *version; + char *machine_id; + char *architecture; + char **options; + char *kernel; /* linux is #defined to 1, yikes! */ + char *efi; + char **initrd; + char *device_tree; +} BootEntry; + +typedef struct BootConfig { + char *default_pattern; + char *timeout; + char *editor; + char *auto_entries; + char *auto_firmware; + char *console_mode; + + char *entry_oneshot; + char *entry_default; + + BootEntry *entries; + size_t n_entries; + ssize_t default_entry; +} BootConfig; + +void boot_config_free(BootConfig *config); +int boot_entries_load_config(const char *esp_path, BootConfig *config); + +static inline const char* boot_entry_title(const BootEntry *entry) { + return entry->show_title ?: entry->title ?: entry->id; +} + +int find_esp_and_warn(const char *path, bool unprivileged_mode, char **ret_path, uint32_t *ret_part, uint64_t *ret_pstart, uint64_t *ret_psize, sd_id128_t *ret_uuid); + +int find_default_boot_entry(const char *esp_path, char **esp_where, BootConfig *config, const BootEntry **e); diff --git a/src/shared/bpf-program.c b/src/shared/bpf-program.c new file mode 100644 index 0000000..2c61e04 --- /dev/null +++ b/src/shared/bpf-program.c @@ -0,0 +1,237 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "bpf-program.h" +#include "fd-util.h" +#include "log.h" +#include "missing.h" +#include "path-util.h" +#include "util.h" + +int bpf_program_new(uint32_t prog_type, BPFProgram **ret) { + _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL; + + p = new0(BPFProgram, 1); + if (!p) + return log_oom(); + + p->n_ref = 1; + p->prog_type = prog_type; + p->kernel_fd = -1; + + *ret = TAKE_PTR(p); + + return 0; +} + +static BPFProgram *bpf_program_free(BPFProgram *p) { + assert(p); + + /* Unfortunately, the kernel currently doesn't implicitly detach BPF programs from their cgroups when the last + * fd to the BPF program is closed. This has nasty side-effects since this means that abnormally terminated + * programs that attached one of their BPF programs to a cgroup will leave this programs pinned for good with + * zero chance of recovery, until the cgroup is removed. This is particularly problematic if the cgroup in + * question is the root cgroup (or any other cgroup belonging to a service that cannot be restarted during + * operation, such as dbus), as the memory for the BPF program can only be reclaimed through a reboot. To + * counter this, we track closely to which cgroup a program was attached to and will detach it on our own + * whenever we close the BPF fd. */ + (void) bpf_program_cgroup_detach(p); + + safe_close(p->kernel_fd); + free(p->instructions); + free(p->attached_path); + + return mfree(p); +} + +DEFINE_TRIVIAL_REF_UNREF_FUNC(BPFProgram, bpf_program, bpf_program_free); + +int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *instructions, size_t count) { + + assert(p); + + if (p->kernel_fd >= 0) /* don't allow modification after we uploaded things to the kernel */ + return -EBUSY; + + if (!GREEDY_REALLOC(p->instructions, p->allocated, p->n_instructions + count)) + return -ENOMEM; + + memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count); + p->n_instructions += count; + + return 0; +} + +int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) { + union bpf_attr attr; + + assert(p); + + if (p->kernel_fd >= 0) { /* make this idempotent */ + memzero(log_buf, log_size); + return 0; + } + + attr = (union bpf_attr) { + .prog_type = p->prog_type, + .insns = PTR_TO_UINT64(p->instructions), + .insn_cnt = p->n_instructions, + .license = PTR_TO_UINT64("GPL"), + .log_buf = PTR_TO_UINT64(log_buf), + .log_level = !!log_buf, + .log_size = log_size, + }; + + p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + if (p->kernel_fd < 0) + return -errno; + + return 0; +} + +int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags) { + _cleanup_free_ char *copy = NULL; + _cleanup_close_ int fd = -1; + union bpf_attr attr; + int r; + + assert(p); + assert(type >= 0); + assert(path); + + if (!IN_SET(flags, 0, BPF_F_ALLOW_OVERRIDE, BPF_F_ALLOW_MULTI)) + return -EINVAL; + + /* We need to track which cgroup the program is attached to, and we can only track one attachment, hence let's + * refuse this early. */ + if (p->attached_path) { + if (!path_equal(p->attached_path, path)) + return -EBUSY; + if (p->attached_type != type) + return -EBUSY; + if (p->attached_flags != flags) + return -EBUSY; + + /* Here's a shortcut: if we previously attached this program already, then we don't have to do so + * again. Well, with one exception: if we are in BPF_F_ALLOW_OVERRIDE mode then someone else might have + * replaced our program since the last time, hence let's reattach it again, just to be safe. In flags + * == 0 mode this is not an issue since nobody else can replace our program in that case, and in flags + * == BPF_F_ALLOW_MULTI mode any other's program would be installed in addition to ours hence ours + * would remain in effect. */ + if (flags != BPF_F_ALLOW_OVERRIDE) + return 0; + } + + /* Ensure we have a kernel object for this. */ + r = bpf_program_load_kernel(p, NULL, 0); + if (r < 0) + return r; + + copy = strdup(path); + if (!copy) + return -ENOMEM; + + fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC); + if (fd < 0) + return -errno; + + attr = (union bpf_attr) { + .attach_type = type, + .target_fd = fd, + .attach_bpf_fd = p->kernel_fd, + .attach_flags = flags, + }; + + if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0) + return -errno; + + free_and_replace(p->attached_path, copy); + p->attached_type = type; + p->attached_flags = flags; + + return 0; +} + +int bpf_program_cgroup_detach(BPFProgram *p) { + _cleanup_close_ int fd = -1; + + assert(p); + + if (!p->attached_path) + return -EUNATCH; + + fd = open(p->attached_path, O_DIRECTORY|O_RDONLY|O_CLOEXEC); + if (fd < 0) { + if (errno != ENOENT) + return -errno; + + /* If the cgroup does not exist anymore, then we don't have to explicitly detach, it got detached + * implicitly by the removal, hence don't complain */ + + } else { + union bpf_attr attr; + + attr = (union bpf_attr) { + .attach_type = p->attached_type, + .target_fd = fd, + .attach_bpf_fd = p->kernel_fd, + }; + + if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0) + return -errno; + } + + p->attached_path = mfree(p->attached_path); + + return 0; +} + +int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) { + union bpf_attr attr = { + .map_type = type, + .key_size = key_size, + .value_size = value_size, + .max_entries = max_entries, + .map_flags = flags, + }; + int fd; + + fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); + if (fd < 0) + return -errno; + + return fd; +} + +int bpf_map_update_element(int fd, const void *key, void *value) { + + union bpf_attr attr = { + .map_fd = fd, + .key = PTR_TO_UINT64(key), + .value = PTR_TO_UINT64(value), + }; + + if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0) + return -errno; + + return 0; +} + +int bpf_map_lookup_element(int fd, const void *key, void *value) { + + union bpf_attr attr = { + .map_fd = fd, + .key = PTR_TO_UINT64(key), + .value = PTR_TO_UINT64(value), + }; + + if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0) + return -errno; + + return 0; +} diff --git a/src/shared/bpf-program.h b/src/shared/bpf-program.h new file mode 100644 index 0000000..c21eb2f --- /dev/null +++ b/src/shared/bpf-program.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <linux/bpf.h> +#include <stdint.h> +#include <sys/syscall.h> + +#include "list.h" +#include "macro.h" + +typedef struct BPFProgram BPFProgram; + +struct BPFProgram { + unsigned n_ref; + + int kernel_fd; + uint32_t prog_type; + + size_t n_instructions; + size_t allocated; + struct bpf_insn *instructions; + + char *attached_path; + int attached_type; + uint32_t attached_flags; +}; + +int bpf_program_new(uint32_t prog_type, BPFProgram **ret); +BPFProgram *bpf_program_unref(BPFProgram *p); +BPFProgram *bpf_program_ref(BPFProgram *p); + +int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *insn, size_t count); +int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size); + +int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags); +int bpf_program_cgroup_detach(BPFProgram *p); + +int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags); +int bpf_map_update_element(int fd, const void *key, void *value); +int bpf_map_lookup_element(int fd, const void *key, void *value); + +DEFINE_TRIVIAL_CLEANUP_FUNC(BPFProgram*, bpf_program_unref); diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c new file mode 100644 index 0000000..9a8051d --- /dev/null +++ b/src/shared/bus-unit-util.c @@ -0,0 +1,2547 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "alloc-util.h" +#include "bus-internal.h" +#include "bus-unit-util.h" +#include "bus-util.h" +#include "cap-list.h" +#include "cgroup-util.h" +#include "condition.h" +#include "cpu-set-util.h" +#include "env-util.h" +#include "errno-list.h" +#include "escape.h" +#include "hashmap.h" +#include "hexdecoct.h" +#include "hostname-util.h" +#include "in-addr-util.h" +#include "ip-protocol-list.h" +#include "list.h" +#include "locale-util.h" +#include "missing_fs.h" +#include "mountpoint-util.h" +#include "nsflags.h" +#include "parse-util.h" +#include "path-util.h" +#include "process-util.h" +#include "rlimit-util.h" +#include "securebits-util.h" +#include "signal-util.h" +#include "string-util.h" +#include "syslog-util.h" +#include "terminal-util.h" +#include "unit-def.h" +#include "user-util.h" +#include "utf8.h" +#include "util.h" + +int bus_parse_unit_info(sd_bus_message *message, UnitInfo *u) { + assert(message); + assert(u); + + u->machine = NULL; + + return sd_bus_message_read( + message, + "(ssssssouso)", + &u->id, + &u->description, + &u->load_state, + &u->active_state, + &u->sub_state, + &u->following, + &u->unit_path, + &u->job_id, + &u->job_type, + &u->job_path); +} + +#define DEFINE_BUS_APPEND_PARSE_PTR(bus_type, cast_type, type, parse_func) \ + static int bus_append_##parse_func( \ + sd_bus_message *m, \ + const char *field, \ + const char *eq) { \ + type val; \ + int r; \ + \ + r = parse_func(eq, &val); \ + if (r < 0) \ + return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq); \ + \ + r = sd_bus_message_append(m, "(sv)", field, \ + bus_type, (cast_type) val); \ + if (r < 0) \ + return bus_log_create_error(r); \ + \ + return 1; \ + } + +#define DEFINE_BUS_APPEND_PARSE(bus_type, parse_func) \ + static int bus_append_##parse_func( \ + sd_bus_message *m, \ + const char *field, \ + const char *eq) { \ + int r; \ + \ + r = parse_func(eq); \ + if (r < 0) \ + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to parse %s: %s", field, eq); \ + \ + r = sd_bus_message_append(m, "(sv)", field, \ + bus_type, (int32_t) r); \ + if (r < 0) \ + return bus_log_create_error(r); \ + \ + return 1; \ + } + +DEFINE_BUS_APPEND_PARSE("b", parse_boolean); +DEFINE_BUS_APPEND_PARSE("i", ioprio_class_from_string); +DEFINE_BUS_APPEND_PARSE("i", ip_tos_from_string); +DEFINE_BUS_APPEND_PARSE("i", log_facility_unshifted_from_string); +DEFINE_BUS_APPEND_PARSE("i", log_level_from_string); +DEFINE_BUS_APPEND_PARSE("i", parse_errno); +DEFINE_BUS_APPEND_PARSE("i", sched_policy_from_string); +DEFINE_BUS_APPEND_PARSE("i", secure_bits_from_string); +DEFINE_BUS_APPEND_PARSE("i", signal_from_string); +DEFINE_BUS_APPEND_PARSE("i", parse_ip_protocol); +DEFINE_BUS_APPEND_PARSE_PTR("i", int32_t, int, ioprio_parse_priority); +DEFINE_BUS_APPEND_PARSE_PTR("i", int32_t, int, parse_nice); +DEFINE_BUS_APPEND_PARSE_PTR("i", int32_t, int, safe_atoi); +DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, nsec_t, parse_nsec); +DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, cg_blkio_weight_parse); +DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, cg_cpu_shares_parse); +DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, cg_weight_parse); +DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, unsigned long, mount_propagation_flags_from_string); +DEFINE_BUS_APPEND_PARSE_PTR("t", uint64_t, uint64_t, safe_atou64); +DEFINE_BUS_APPEND_PARSE_PTR("u", uint32_t, mode_t, parse_mode); +DEFINE_BUS_APPEND_PARSE_PTR("u", uint32_t, unsigned, safe_atou); +DEFINE_BUS_APPEND_PARSE_PTR("x", int64_t, int64_t, safe_atoi64); + +static int bus_append_string(sd_bus_message *m, const char *field, const char *eq) { + int r; + + r = sd_bus_message_append(m, "(sv)", field, "s", eq); + if (r < 0) + return bus_log_create_error(r); + + return 1; +} + +static int bus_append_strv(sd_bus_message *m, const char *field, const char *eq, ExtractFlags flags) { + const char *p; + int r; + + r = sd_bus_message_open_container(m, 'r', "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, 's', field); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', "as"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'a', "s"); + if (r < 0) + return bus_log_create_error(r); + + for (p = eq;;) { + _cleanup_free_ char *word = NULL; + + r = extract_first_word(&p, &word, NULL, flags); + if (r == 0) + break; + if (r == -ENOMEM) + return log_oom(); + if (r < 0) + return log_error_errno(r, "Invalid syntax: %s", eq); + + r = sd_bus_message_append_basic(m, 's', word); + if (r < 0) + return bus_log_create_error(r); + } + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; +} + +static int bus_append_byte_array(sd_bus_message *m, const char *field, const void *buf, size_t n) { + int r; + + r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', "ay"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_array(m, 'y', buf, n); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; +} + +static int bus_append_parse_sec_rename(sd_bus_message *m, const char *field, const char *eq) { + char *n; + usec_t t; + size_t l; + int r; + + r = parse_sec(eq, &t); + if (r < 0) + return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq); + + l = strlen(field); + n = newa(char, l + 2); + /* Change suffix Sec → USec */ + strcpy(mempcpy(n, field, l - 3), "USec"); + + r = sd_bus_message_append(m, "(sv)", n, "t", t); + if (r < 0) + return bus_log_create_error(r); + + return 1; +} + +static int bus_append_parse_size(sd_bus_message *m, const char *field, const char *eq, uint64_t base) { + uint64_t v; + int r; + + r = parse_size(eq, base, &v); + if (r < 0) + return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq); + + r = sd_bus_message_append(m, "(sv)", field, "t", v); + if (r < 0) + return bus_log_create_error(r); + + return 1; +} + +static int bus_append_exec_command(sd_bus_message *m, const char *field, const char *eq) { + bool ignore_failure = false, explicit_path = false, done = false; + _cleanup_strv_free_ char **l = NULL; + _cleanup_free_ char *path = NULL; + int r; + + do { + switch (*eq) { + + case '-': + if (ignore_failure) + done = true; + else { + ignore_failure = true; + eq++; + } + break; + + case '@': + if (explicit_path) + done = true; + else { + explicit_path = true; + eq++; + } + break; + + case '+': + case '!': + /* The bus API doesn't support +, ! and !! currently, unfortunately. :-( */ + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), + "Sorry, but +, ! and !! are currently not supported for transient services."); + + default: + done = true; + break; + } + } while (!done); + + if (explicit_path) { + r = extract_first_word(&eq, &path, NULL, EXTRACT_QUOTES|EXTRACT_CUNESCAPE); + if (r < 0) + return log_error_errno(r, "Failed to parse path: %m"); + } + + r = strv_split_extract(&l, eq, NULL, EXTRACT_QUOTES|EXTRACT_CUNESCAPE); + if (r < 0) + return log_error_errno(r, "Failed to parse command line: %m"); + + r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', "a(sasb)"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'a', "(sasb)"); + if (r < 0) + return bus_log_create_error(r); + + if (!strv_isempty(l)) { + + r = sd_bus_message_open_container(m, 'r', "sasb"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append(m, "s", path ?: l[0]); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_strv(m, l); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append(m, "b", ignore_failure); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + } + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; +} + +static int bus_append_ip_address_access(sd_bus_message *m, int family, const union in_addr_union *prefix, unsigned char prefixlen) { + int r; + + assert(m); + assert(prefix); + + r = sd_bus_message_open_container(m, 'r', "iayu"); + if (r < 0) + return r; + + r = sd_bus_message_append(m, "i", family); + if (r < 0) + return r; + + r = sd_bus_message_append_array(m, 'y', prefix, FAMILY_ADDRESS_SIZE(family)); + if (r < 0) + return r; + + r = sd_bus_message_append(m, "u", prefixlen); + if (r < 0) + return r; + + return sd_bus_message_close_container(m); +} + +static int bus_append_cgroup_property(sd_bus_message *m, const char *field, const char *eq) { + int r; + + if (STR_IN_SET(field, "DevicePolicy", "Slice")) + + return bus_append_string(m, field, eq); + + if (STR_IN_SET(field, + "CPUAccounting", "MemoryAccounting", "IOAccounting", "BlockIOAccounting", + "TasksAccounting", "IPAccounting")) + + return bus_append_parse_boolean(m, field, eq); + + if (STR_IN_SET(field, "CPUWeight", "StartupCPUWeight", "IOWeight", "StartupIOWeight")) + + return bus_append_cg_weight_parse(m, field, eq); + + if (STR_IN_SET(field, "CPUShares", "StartupCPUShares")) + + return bus_append_cg_cpu_shares_parse(m, field, eq); + + if (STR_IN_SET(field, "BlockIOWeight", "StartupBlockIOWeight")) + + return bus_append_cg_blkio_weight_parse(m, field, eq); + + if (streq(field, "Delegate")) { + + r = parse_boolean(eq); + if (r < 0) + return bus_append_strv(m, "DelegateControllers", eq, EXTRACT_QUOTES); + + r = sd_bus_message_append(m, "(sv)", "Delegate", "b", r); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (STR_IN_SET(field, "MemoryMin", "MemoryLow", "MemoryHigh", "MemoryMax", "MemorySwapMax", "MemoryLimit", "TasksMax")) { + + if (isempty(eq) || streq(eq, "infinity")) { + r = sd_bus_message_append(m, "(sv)", field, "t", CGROUP_LIMIT_MAX); + if (r < 0) + return bus_log_create_error(r); + return 1; + } + + r = parse_permille(eq); + if (r >= 0) { + char *n; + + /* When this is a percentage we'll convert this into a relative value in the range 0…UINT32_MAX + * and pass it in the MemoryLowScale property (and related ones). This way the physical memory + * size can be determined server-side. */ + + n = strjoina(field, "Scale"); + r = sd_bus_message_append(m, "(sv)", n, "u", (uint32_t) (((uint64_t) r * UINT32_MAX) / 1000U)); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "TasksMax")) + return bus_append_safe_atou64(m, field, eq); + + return bus_append_parse_size(m, field, eq, 1024); + } + + if (streq(field, "CPUQuota")) { + + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", "CPUQuotaPerSecUSec", "t", USEC_INFINITY); + else { + r = parse_permille_unbounded(eq); + if (r == 0) + return log_error_errno(SYNTHETIC_ERRNO(ERANGE), + "CPU quota too small."); + if (r < 0) + return log_error_errno(r, "CPU quota '%s' invalid.", eq); + + r = sd_bus_message_append(m, "(sv)", "CPUQuotaPerSecUSec", "t", (((uint64_t) r * USEC_PER_SEC) / 1000U)); + } + + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "DeviceAllow")) { + + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", field, "a(ss)", 0); + else { + const char *path = eq, *rwm = NULL, *e; + + e = strchr(eq, ' '); + if (e) { + path = strndupa(eq, e - eq); + rwm = e+1; + } + + r = sd_bus_message_append(m, "(sv)", field, "a(ss)", 1, path, strempty(rwm)); + } + + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (cgroup_io_limit_type_from_string(field) >= 0 || STR_IN_SET(field, "BlockIOReadBandwidth", "BlockIOWriteBandwidth")) { + + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", field, "a(st)", 0); + else { + const char *path, *bandwidth, *e; + uint64_t bytes; + + e = strchr(eq, ' '); + if (!e) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to parse %s value %s.", + field, eq); + + path = strndupa(eq, e - eq); + bandwidth = e+1; + + if (streq(bandwidth, "infinity")) + bytes = CGROUP_LIMIT_MAX; + else { + r = parse_size(bandwidth, 1000, &bytes); + if (r < 0) + return log_error_errno(r, "Failed to parse byte value %s: %m", bandwidth); + } + + r = sd_bus_message_append(m, "(sv)", field, "a(st)", 1, path, bytes); + } + + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (STR_IN_SET(field, "IODeviceWeight", "BlockIODeviceWeight")) { + + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", field, "a(st)", 0); + else { + const char *path, *weight, *e; + uint64_t u; + + e = strchr(eq, ' '); + if (!e) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to parse %s value %s.", + field, eq); + + path = strndupa(eq, e - eq); + weight = e+1; + + r = safe_atou64(weight, &u); + if (r < 0) + return log_error_errno(r, "Failed to parse %s value %s: %m", field, weight); + + r = sd_bus_message_append(m, "(sv)", field, "a(st)", 1, path, u); + } + + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "IODeviceLatencyTargetSec")) { + const char *field_usec = "IODeviceLatencyTargetUSec"; + + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", field_usec, "a(st)", USEC_INFINITY); + else { + const char *path, *target, *e; + usec_t usec; + + e = strchr(eq, ' '); + if (!e) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to parse %s value %s.", + field, eq); + + path = strndupa(eq, e - eq); + target = e+1; + + r = parse_sec(target, &usec); + if (r < 0) + return log_error_errno(r, "Failed to parse %s value %s: %m", field, target); + + r = sd_bus_message_append(m, "(sv)", field_usec, "a(st)", 1, path, usec); + } + + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (STR_IN_SET(field, "IPAddressAllow", "IPAddressDeny")) { + unsigned char prefixlen; + union in_addr_union prefix = {}; + int family; + + if (isempty(eq)) { + r = sd_bus_message_append(m, "(sv)", field, "a(iayu)", 0); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', "a(iayu)"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'a', "(iayu)"); + if (r < 0) + return bus_log_create_error(r); + + if (streq(eq, "any")) { + /* "any" is a shortcut for 0.0.0.0/0 and ::/0 */ + + r = bus_append_ip_address_access(m, AF_INET, &prefix, 0); + if (r < 0) + return bus_log_create_error(r); + + r = bus_append_ip_address_access(m, AF_INET6, &prefix, 0); + if (r < 0) + return bus_log_create_error(r); + + } else if (is_localhost(eq)) { + /* "localhost" is a shortcut for 127.0.0.0/8 and ::1/128 */ + + prefix.in.s_addr = htobe32(0x7f000000); + r = bus_append_ip_address_access(m, AF_INET, &prefix, 8); + if (r < 0) + return bus_log_create_error(r); + + prefix.in6 = (struct in6_addr) IN6ADDR_LOOPBACK_INIT; + r = bus_append_ip_address_access(m, AF_INET6, &prefix, 128); + if (r < 0) + return r; + + } else if (streq(eq, "link-local")) { + /* "link-local" is a shortcut for 169.254.0.0/16 and fe80::/64 */ + + prefix.in.s_addr = htobe32((UINT32_C(169) << 24 | UINT32_C(254) << 16)); + r = bus_append_ip_address_access(m, AF_INET, &prefix, 16); + if (r < 0) + return bus_log_create_error(r); + + prefix.in6 = (struct in6_addr) { + .s6_addr32[0] = htobe32(0xfe800000) + }; + r = bus_append_ip_address_access(m, AF_INET6, &prefix, 64); + if (r < 0) + return bus_log_create_error(r); + + } else if (streq(eq, "multicast")) { + /* "multicast" is a shortcut for 224.0.0.0/4 and ff00::/8 */ + + prefix.in.s_addr = htobe32((UINT32_C(224) << 24)); + r = bus_append_ip_address_access(m, AF_INET, &prefix, 4); + if (r < 0) + return bus_log_create_error(r); + + prefix.in6 = (struct in6_addr) { + .s6_addr32[0] = htobe32(0xff000000) + }; + r = bus_append_ip_address_access(m, AF_INET6, &prefix, 8); + if (r < 0) + return bus_log_create_error(r); + + } else { + for (;;) { + _cleanup_free_ char *word = NULL; + + r = extract_first_word(&eq, &word, NULL, 0); + if (r == 0) + break; + if (r == -ENOMEM) + return log_oom(); + if (r < 0) + return log_error_errno(r, "Failed to parse %s: %s", field, eq); + + r = in_addr_prefix_from_string_auto(word, &family, &prefix, &prefixlen); + if (r < 0) + return log_error_errno(r, "Failed to parse IP address prefix: %s", word); + + r = bus_append_ip_address_access(m, family, &prefix, prefixlen); + if (r < 0) + return bus_log_create_error(r); + } + } + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + return 0; +} + +static int bus_append_automount_property(sd_bus_message *m, const char *field, const char *eq) { + + if (streq(field, "Where")) + + return bus_append_string(m, field, eq); + + if (streq(field, "DirectoryMode")) + + return bus_append_parse_mode(m, field, eq); + + if (streq(field, "TimeoutIdleSec")) + + return bus_append_parse_sec_rename(m, field, eq); + + return 0; +} + +static int bus_append_execute_property(sd_bus_message *m, const char *field, const char *eq) { + const char *suffix; + int r; + + if (STR_IN_SET(field, + "User", "Group", + "UtmpIdentifier", "UtmpMode", "PAMName", "TTYPath", + "WorkingDirectory", "RootDirectory", "SyslogIdentifier", + "ProtectSystem", "ProtectHome", "SELinuxContext", "RootImage", + "RuntimeDirectoryPreserve", "Personality", "KeyringMode")) + + return bus_append_string(m, field, eq); + + if (STR_IN_SET(field, + "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "TTYVTDisallocate", + "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers", + "PrivateMounts", "NoNewPrivileges", "SyslogLevelPrefix", + "MemoryDenyWriteExecute", "RestrictRealtime", "DynamicUser", "RemoveIPC", + "ProtectKernelTunables", "ProtectKernelModules", "ProtectControlGroups", + "MountAPIVFS", "CPUSchedulingResetOnFork", "LockPersonality")) + + return bus_append_parse_boolean(m, field, eq); + + if (STR_IN_SET(field, + "ReadWriteDirectories", "ReadOnlyDirectories", "InaccessibleDirectories", + "ReadWritePaths", "ReadOnlyPaths", "InaccessiblePaths", + "RuntimeDirectory", "StateDirectory", "CacheDirectory", "LogsDirectory", "ConfigurationDirectory", + "SupplementaryGroups", "SystemCallArchitectures")) + + return bus_append_strv(m, field, eq, EXTRACT_QUOTES); + + if (STR_IN_SET(field, "SyslogLevel", "LogLevelMax")) + + return bus_append_log_level_from_string(m, field, eq); + + if (streq(field, "SyslogFacility")) + + return bus_append_log_facility_unshifted_from_string(m, field, eq); + + if (streq(field, "SecureBits")) + + return bus_append_secure_bits_from_string(m, field, eq); + + if (streq(field, "CPUSchedulingPolicy")) + + return bus_append_sched_policy_from_string(m, field, eq); + + if (STR_IN_SET(field, "CPUSchedulingPriority", "OOMScoreAdjust")) + + return bus_append_safe_atoi(m, field, eq); + + if (streq(field, "Nice")) + + return bus_append_parse_nice(m, field, eq); + + if (streq(field, "SystemCallErrorNumber")) + + return bus_append_parse_errno(m, field, eq); + + if (streq(field, "IOSchedulingClass")) + + return bus_append_ioprio_class_from_string(m, field, eq); + + if (streq(field, "IOSchedulingPriority")) + + return bus_append_ioprio_parse_priority(m, field, eq); + + if (STR_IN_SET(field, + "RuntimeDirectoryMode", "StateDirectoryMode", "CacheDirectoryMode", + "LogsDirectoryMode", "ConfigurationDirectoryMode", "UMask")) + + return bus_append_parse_mode(m, field, eq); + + if (streq(field, "TimerSlackNSec")) + + return bus_append_parse_nsec(m, field, eq); + + if (streq(field, "LogRateLimitIntervalSec")) + + return bus_append_parse_sec_rename(m, field, eq); + + if (streq(field, "LogRateLimitBurst")) + + return bus_append_safe_atou(m, field, eq); + + if (streq(field, "MountFlags")) + + return bus_append_mount_propagation_flags_from_string(m, field, eq); + + if (STR_IN_SET(field, "Environment", "UnsetEnvironment", "PassEnvironment")) + + return bus_append_strv(m, field, eq, EXTRACT_QUOTES|EXTRACT_CUNESCAPE); + + if (streq(field, "EnvironmentFile")) { + + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", "EnvironmentFiles", "a(sb)", 0); + else + r = sd_bus_message_append(m, "(sv)", "EnvironmentFiles", "a(sb)", 1, + eq[0] == '-' ? eq + 1 : eq, + eq[0] == '-'); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "LogExtraFields")) { + + r = sd_bus_message_open_container(m, 'r', "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, 's', "LogExtraFields"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', "aay"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'a', "ay"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_array(m, 'y', eq, strlen(eq)); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (STR_IN_SET(field, "StandardInput", "StandardOutput", "StandardError")) { + const char *n, *appended; + + if ((n = startswith(eq, "fd:"))) { + appended = strjoina(field, "FileDescriptorName"); + r = sd_bus_message_append(m, "(sv)", appended, "s", n); + } else if ((n = startswith(eq, "file:"))) { + appended = strjoina(field, "File"); + r = sd_bus_message_append(m, "(sv)", appended, "s", n); + } else if ((n = startswith(eq, "append:"))) { + appended = strjoina(field, "FileToAppend"); + r = sd_bus_message_append(m, "(sv)", appended, "s", n); + } else + r = sd_bus_message_append(m, "(sv)", field, "s", eq); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "StandardInputText")) { + _cleanup_free_ char *unescaped = NULL; + + r = cunescape(eq, 0, &unescaped); + if (r < 0) + return log_error_errno(r, "Failed to unescape text '%s': %m", eq); + + if (!strextend(&unescaped, "\n", NULL)) + return log_oom(); + + /* Note that we don't expand specifiers here, but that should be OK, as this is a programmatic + * interface anyway */ + + return bus_append_byte_array(m, field, unescaped, strlen(unescaped)); + } + + if (streq(field, "StandardInputData")) { + _cleanup_free_ void *decoded = NULL; + size_t sz; + + r = unbase64mem(eq, (size_t) -1, &decoded, &sz); + if (r < 0) + return log_error_errno(r, "Failed to decode base64 data '%s': %m", eq); + + return bus_append_byte_array(m, field, decoded, sz); + } + + if ((suffix = startswith(field, "Limit"))) { + int rl; + + rl = rlimit_from_string(suffix); + if (rl >= 0) { + const char *sn; + struct rlimit l; + + r = rlimit_parse(rl, eq, &l); + if (r < 0) + return log_error_errno(r, "Failed to parse resource limit: %s", eq); + + r = sd_bus_message_append(m, "(sv)", field, "t", l.rlim_max); + if (r < 0) + return bus_log_create_error(r); + + sn = strjoina(field, "Soft"); + r = sd_bus_message_append(m, "(sv)", sn, "t", l.rlim_cur); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + } + + if (STR_IN_SET(field, "AppArmorProfile", "SmackProcessLabel")) { + int ignore = 0; + const char *s = eq; + + if (eq[0] == '-') { + ignore = 1; + s = eq + 1; + } + + r = sd_bus_message_append(m, "(sv)", field, "(bs)", ignore, s); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (STR_IN_SET(field, "CapabilityBoundingSet", "AmbientCapabilities")) { + uint64_t sum = 0; + bool invert = false; + const char *p = eq; + + if (*p == '~') { + invert = true; + p++; + } + + r = capability_set_from_string(p, &sum); + if (r < 0) + return log_error_errno(r, "Failed to parse %s value %s: %m", field, eq); + + sum = invert ? ~sum : sum; + + r = sd_bus_message_append(m, "(sv)", field, "t", sum); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "CPUAffinity")) { + _cleanup_cpu_free_ cpu_set_t *cpuset = NULL; + + r = parse_cpu_set(eq, &cpuset); + if (r < 0) + return log_error_errno(r, "Failed to parse %s value: %s", field, eq); + + return bus_append_byte_array(m, field, cpuset, CPU_ALLOC_SIZE(r)); + } + + if (STR_IN_SET(field, "RestrictAddressFamilies", "SystemCallFilter")) { + int whitelist = 1; + const char *p = eq; + + if (*p == '~') { + whitelist = 0; + p++; + } + + r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', "(bas)"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'r', "bas"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, 'b', &whitelist); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'a', "s"); + if (r < 0) + return bus_log_create_error(r); + + for (;;) { + _cleanup_free_ char *word = NULL; + + r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES); + if (r == 0) + break; + if (r == -ENOMEM) + return log_oom(); + if (r < 0) + return log_error_errno(r, "Invalid syntax: %s", eq); + + r = sd_bus_message_append_basic(m, 's', word); + if (r < 0) + return bus_log_create_error(r); + } + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "RestrictNamespaces")) { + bool invert = false; + unsigned long flags; + + r = parse_boolean(eq); + if (r > 0) + flags = 0; + else if (r == 0) + flags = NAMESPACE_FLAGS_ALL; + else { + if (eq[0] == '~') { + invert = true; + eq++; + } + + r = namespace_flags_from_string(eq, &flags); + if (r < 0) + return log_error_errno(r, "Failed to parse %s value %s.", field, eq); + } + + if (invert) + flags = (~flags) & NAMESPACE_FLAGS_ALL; + + r = sd_bus_message_append(m, "(sv)", field, "t", (uint64_t) flags); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (STR_IN_SET(field, "BindPaths", "BindReadOnlyPaths")) { + const char *p = eq; + + r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', "a(ssbt)"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'a', "(ssbt)"); + if (r < 0) + return bus_log_create_error(r); + + for (;;) { + _cleanup_free_ char *source = NULL, *destination = NULL; + char *s = NULL, *d = NULL; + bool ignore_enoent = false; + uint64_t flags = MS_REC; + + r = extract_first_word(&p, &source, ":" WHITESPACE, EXTRACT_QUOTES|EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return log_error_errno(r, "Failed to parse argument: %m"); + if (r == 0) + break; + + s = source; + if (s[0] == '-') { + ignore_enoent = true; + s++; + } + + if (p && p[-1] == ':') { + r = extract_first_word(&p, &destination, ":" WHITESPACE, EXTRACT_QUOTES|EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return log_error_errno(r, "Failed to parse argument: %m"); + if (r == 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Missing argument after ':': %s", + eq); + + d = destination; + + if (p && p[-1] == ':') { + _cleanup_free_ char *options = NULL; + + r = extract_first_word(&p, &options, NULL, EXTRACT_QUOTES); + if (r < 0) + return log_error_errno(r, "Failed to parse argument: %m"); + + if (isempty(options) || streq(options, "rbind")) + flags = MS_REC; + else if (streq(options, "norbind")) + flags = 0; + else + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Unknown options: %s", + eq); + } + } else + d = s; + + r = sd_bus_message_append(m, "(ssbt)", s, d, ignore_enoent, flags); + if (r < 0) + return bus_log_create_error(r); + } + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "TemporaryFileSystem")) { + const char *p = eq; + + r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', "a(ss)"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'a', "(ss)"); + if (r < 0) + return bus_log_create_error(r); + + for (;;) { + _cleanup_free_ char *word = NULL, *path = NULL; + const char *w; + + r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES); + if (r < 0) + return log_error_errno(r, "Failed to parse argument: %m"); + if (r == 0) + break; + + w = word; + r = extract_first_word(&w, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS); + if (r < 0) + return log_error_errno(r, "Failed to parse argument: %m"); + if (r == 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to parse argument: %s", + p); + + r = sd_bus_message_append(m, "(ss)", path, w); + if (r < 0) + return bus_log_create_error(r); + } + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + return 0; +} + +static int bus_append_kill_property(sd_bus_message *m, const char *field, const char *eq) { + + if (streq(field, "KillMode")) + + return bus_append_string(m, field, eq); + + if (STR_IN_SET(field, "SendSIGHUP", "SendSIGKILL")) + + return bus_append_parse_boolean(m, field, eq); + + if (STR_IN_SET(field, "KillSignal", "FinalKillSignal", "WatchdogSignal")) + + return bus_append_signal_from_string(m, field, eq); + + return 0; +} + +static int bus_append_mount_property(sd_bus_message *m, const char *field, const char *eq) { + + if (STR_IN_SET(field, "What", "Where", "Options", "Type")) + + return bus_append_string(m, field, eq); + + if (streq(field, "TimeoutSec")) + + return bus_append_parse_sec_rename(m, field, eq); + + if (streq(field, "DirectoryMode")) + + return bus_append_parse_mode(m, field, eq); + + if (STR_IN_SET(field, "SloppyOptions", "LazyUnmount", "ForceUnmount")) + + return bus_append_parse_boolean(m, field, eq); + + return 0; +} + +static int bus_append_path_property(sd_bus_message *m, const char *field, const char *eq) { + int r; + + if (streq(field, "MakeDirectory")) + + return bus_append_parse_boolean(m, field, eq); + + if (streq(field, "DirectoryMode")) + + return bus_append_parse_mode(m, field, eq); + + if (STR_IN_SET(field, + "PathExists", "PathExistsGlob", "PathChanged", + "PathModified", "DirectoryNotEmpty")) { + + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", "Paths", "a(ss)", 0); + else + r = sd_bus_message_append(m, "(sv)", "Paths", "a(ss)", 1, field, eq); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + return 0; +} + +static int bus_append_service_property(sd_bus_message *m, const char *field, const char *eq) { + int r; + + if (STR_IN_SET(field, + "PIDFile", "Type", "Restart", "BusName", "NotifyAccess", + "USBFunctionDescriptors", "USBFunctionStrings")) + + return bus_append_string(m, field, eq); + + if (STR_IN_SET(field, "PermissionsStartOnly", "RootDirectoryStartOnly", "RemainAfterExit", "GuessMainPID")) + + return bus_append_parse_boolean(m, field, eq); + + if (STR_IN_SET(field, "RestartSec", "TimeoutStartSec", "TimeoutStopSec", "RuntimeMaxSec", "WatchdogSec")) + + return bus_append_parse_sec_rename(m, field, eq); + + if (streq(field, "TimeoutSec")) { + + r = bus_append_parse_sec_rename(m, "TimeoutStartSec", eq); + if (r < 0) + return r; + + return bus_append_parse_sec_rename(m, "TimeoutStopSec", eq); + } + + if (streq(field, "FileDescriptorStoreMax")) + + return bus_append_safe_atou(m, field, eq); + + if (STR_IN_SET(field, + "ExecStartPre", "ExecStart", "ExecStartPost", + "ExecReload", "ExecStop", "ExecStopPost")) + + return bus_append_exec_command(m, field, eq); + + if (STR_IN_SET(field, "RestartPreventExitStatus", "RestartForceExitStatus", "SuccessExitStatus")) { + _cleanup_free_ int *status = NULL, *signal = NULL; + size_t sz_status = 0, sz_signal = 0; + const char *p; + + for (p = eq;;) { + _cleanup_free_ char *word = NULL; + int val; + + r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES); + if (r == 0) + break; + if (r == -ENOMEM) + return log_oom(); + if (r < 0) + return log_error_errno(r, "Invalid syntax in %s: %s", field, eq); + + r = safe_atoi(word, &val); + if (r < 0) { + val = signal_from_string(word); + if (val < 0) + return log_error_errno(r, "Invalid status or signal %s in %s: %m", word, field); + + signal = reallocarray(signal, sz_signal + 1, sizeof(int)); + if (!signal) + return log_oom(); + + signal[sz_signal++] = val; + } else { + status = reallocarray(status, sz_status + 1, sizeof(int)); + if (!status) + return log_oom(); + + status[sz_status++] = val; + } + } + + r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', "(aiai)"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'r', "aiai"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_array(m, 'i', status, sz_status); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_array(m, 'i', signal, sz_signal); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + return 0; +} + +static int bus_append_socket_property(sd_bus_message *m, const char *field, const char *eq) { + int r; + + if (STR_IN_SET(field, + "Accept", "Writable", "KeepAlive", "NoDelay", "FreeBind", "Transparent", "Broadcast", + "PassCredentials", "PassSecurity", "ReusePort", "RemoveOnStop", "SELinuxContextFromNet")) + + return bus_append_parse_boolean(m, field, eq); + + if (STR_IN_SET(field, "Priority", "IPTTL", "Mark")) + + return bus_append_safe_atoi(m, field, eq); + + if (streq(field, "IPTOS")) + + return bus_append_ip_tos_from_string(m, field, eq); + + if (STR_IN_SET(field, "Backlog", "MaxConnections", "MaxConnectionsPerSource", "KeepAliveProbes", "TriggerLimitBurst")) + + return bus_append_safe_atou(m, field, eq); + + if (STR_IN_SET(field, "SocketMode", "DirectoryMode")) + + return bus_append_parse_mode(m, field, eq); + + if (STR_IN_SET(field, "MessageQueueMaxMessages", "MessageQueueMessageSize")) + + return bus_append_safe_atoi64(m, field, eq); + + if (STR_IN_SET(field, "TimeoutSec", "KeepAliveTimeSec", "KeepAliveIntervalSec", "DeferAcceptSec", "TriggerLimitIntervalSec")) + + return bus_append_parse_sec_rename(m, field, eq); + + if (STR_IN_SET(field, "ReceiveBuffer", "SendBuffer", "PipeSize")) + + return bus_append_parse_size(m, field, eq, 1024); + + if (STR_IN_SET(field, "ExecStartPre", "ExecStartPost", "ExecReload", "ExecStopPost")) + + return bus_append_exec_command(m, field, eq); + + if (STR_IN_SET(field, + "SmackLabel", "SmackLabelIPIn", "SmackLabelIPOut", "TCPCongestion", + "BindToDevice", "BindIPv6Only", "FileDescriptorName", + "SocketUser", "SocketGroup")) + + return bus_append_string(m, field, eq); + + if (streq(field, "Symlinks")) + + return bus_append_strv(m, field, eq, EXTRACT_QUOTES); + + if (streq(field, "SocketProtocol")) + + return bus_append_parse_ip_protocol(m, field, eq); + + if (STR_IN_SET(field, + "ListenStream", "ListenDatagram", "ListenSequentialPacket", "ListenNetlink", + "ListenSpecial", "ListenMessageQueue", "ListenFIFO", "ListenUSBFunction")) { + + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", "Listen", "a(ss)", 0); + else + r = sd_bus_message_append(m, "(sv)", "Listen", "a(ss)", 1, field + STRLEN("Listen"), eq); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + return 0; +} +static int bus_append_timer_property(sd_bus_message *m, const char *field, const char *eq) { + int r; + + if (STR_IN_SET(field, "WakeSystem", "RemainAfterElapse", "Persistent")) + + return bus_append_parse_boolean(m, field, eq); + + if (STR_IN_SET(field, "AccuracySec", "RandomizedDelaySec")) + + return bus_append_parse_sec_rename(m, field, eq); + + if (STR_IN_SET(field, + "OnActiveSec", "OnBootSec", "OnStartupSec", + "OnUnitActiveSec","OnUnitInactiveSec")) { + + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", "TimersMonotonic", "a(st)", 0); + else { + usec_t t; + r = parse_sec(eq, &t); + if (r < 0) + return log_error_errno(r, "Failed to parse %s=%s: %m", field, eq); + + r = sd_bus_message_append(m, "(sv)", "TimersMonotonic", "a(st)", 1, field, t); + } + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (streq(field, "OnCalendar")) { + + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", "TimersCalendar", "a(ss)", 0); + else + r = sd_bus_message_append(m, "(sv)", "TimersCalendar", "a(ss)", 1, field, eq); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + return 0; +} + +static int bus_append_unit_property(sd_bus_message *m, const char *field, const char *eq) { + ConditionType t = _CONDITION_TYPE_INVALID; + bool is_condition = false; + int r; + + if (STR_IN_SET(field, + "Description", "SourcePath", "OnFailureJobMode", + "JobTimeoutAction", "JobTimeoutRebootArgument", + "StartLimitAction", "FailureAction", "SuccessAction", + "RebootArgument", "CollectMode")) + + return bus_append_string(m, field, eq); + + if (STR_IN_SET(field, + "StopWhenUnneeded", "RefuseManualStart", "RefuseManualStop", + "AllowIsolate", "IgnoreOnIsolate", "DefaultDependencies")) + + return bus_append_parse_boolean(m, field, eq); + + if (STR_IN_SET(field, "JobTimeoutSec", "JobRunningTimeoutSec", "StartLimitIntervalSec")) + + return bus_append_parse_sec_rename(m, field, eq); + + if (streq(field, "StartLimitBurst")) + + return bus_append_safe_atou(m, field, eq); + + if (STR_IN_SET(field, "SuccessActionExitStatus", "FailureActionExitStatus")) { + + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", field, "i", -1); + else { + uint8_t u; + + r = safe_atou8(eq, &u); + if (r < 0) + return log_error_errno(r, "Failed to parse %s=%s", field, eq); + + r = sd_bus_message_append(m, "(sv)", field, "i", (int) u); + } + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + if (unit_dependency_from_string(field) >= 0 || + STR_IN_SET(field, "Documentation", "RequiresMountsFor")) + + return bus_append_strv(m, field, eq, EXTRACT_QUOTES); + + t = condition_type_from_string(field); + if (t >= 0) + is_condition = true; + else + t = assert_type_from_string(field); + if (t >= 0) { + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", is_condition ? "Conditions" : "Asserts", "a(sbbs)", 0); + else { + const char *p = eq; + int trigger, negate; + + trigger = *p == '|'; + if (trigger) + p++; + + negate = *p == '!'; + if (negate) + p++; + + r = sd_bus_message_append(m, "(sv)", is_condition ? "Conditions" : "Asserts", "a(sbbs)", 1, + field, trigger, negate, p); + } + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + + return 0; +} + +int bus_append_unit_property_assignment(sd_bus_message *m, UnitType t, const char *assignment) { + const char *eq, *field; + int r; + + assert(m); + assert(assignment); + + eq = strchr(assignment, '='); + if (!eq) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Not an assignment: %s", assignment); + + field = strndupa(assignment, eq - assignment); + eq++; + + switch (t) { + case UNIT_SERVICE: + r = bus_append_cgroup_property(m, field, eq); + if (r != 0) + return r; + + r = bus_append_execute_property(m, field, eq); + if (r != 0) + return r; + + r = bus_append_kill_property(m, field, eq); + if (r != 0) + return r; + + r = bus_append_service_property(m, field, eq); + if (r != 0) + return r; + break; + + case UNIT_SOCKET: + r = bus_append_cgroup_property(m, field, eq); + if (r != 0) + return r; + + r = bus_append_execute_property(m, field, eq); + if (r != 0) + return r; + + r = bus_append_kill_property(m, field, eq); + if (r != 0) + return r; + + r = bus_append_socket_property(m, field, eq); + if (r != 0) + return r; + break; + + case UNIT_TIMER: + r = bus_append_timer_property(m, field, eq); + if (r != 0) + return r; + break; + + case UNIT_PATH: + r = bus_append_path_property(m, field, eq); + if (r != 0) + return r; + break; + + case UNIT_SLICE: + r = bus_append_cgroup_property(m, field, eq); + if (r != 0) + return r; + break; + + case UNIT_SCOPE: + + if (streq(field, "TimeoutStopSec")) + return bus_append_parse_sec_rename(m, field, eq); + + r = bus_append_cgroup_property(m, field, eq); + if (r != 0) + return r; + + r = bus_append_kill_property(m, field, eq); + if (r != 0) + return r; + break; + + case UNIT_MOUNT: + r = bus_append_cgroup_property(m, field, eq); + if (r != 0) + return r; + + r = bus_append_execute_property(m, field, eq); + if (r != 0) + return r; + + r = bus_append_kill_property(m, field, eq); + if (r != 0) + return r; + + r = bus_append_mount_property(m, field, eq); + if (r != 0) + return r; + + break; + + case UNIT_AUTOMOUNT: + r = bus_append_automount_property(m, field, eq); + if (r != 0) + return r; + + break; + + case UNIT_TARGET: + case UNIT_DEVICE: + case UNIT_SWAP: + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Not supported unit type"); + + default: + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Invalid unit type"); + } + + r = bus_append_unit_property(m, field, eq); + if (r != 0) + return r; + + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Unknown assignment: %s", assignment); +} + +int bus_append_unit_property_assignment_many(sd_bus_message *m, UnitType t, char **l) { + char **i; + int r; + + assert(m); + + STRV_FOREACH(i, l) { + r = bus_append_unit_property_assignment(m, t, *i); + if (r < 0) + return r; + } + + return 0; +} + +typedef struct BusWaitForJobs { + sd_bus *bus; + Set *jobs; + + char *name; + char *result; + + sd_bus_slot *slot_job_removed; + sd_bus_slot *slot_disconnected; +} BusWaitForJobs; + +static int match_disconnected(sd_bus_message *m, void *userdata, sd_bus_error *error) { + assert(m); + + log_error("Warning! D-Bus connection terminated."); + sd_bus_close(sd_bus_message_get_bus(m)); + + return 0; +} + +static int match_job_removed(sd_bus_message *m, void *userdata, sd_bus_error *error) { + const char *path, *unit, *result; + BusWaitForJobs *d = userdata; + uint32_t id; + char *found; + int r; + + assert(m); + assert(d); + + r = sd_bus_message_read(m, "uoss", &id, &path, &unit, &result); + if (r < 0) { + bus_log_parse_error(r); + return 0; + } + + found = set_remove(d->jobs, (char*) path); + if (!found) + return 0; + + free(found); + + if (!isempty(result)) + d->result = strdup(result); + + if (!isempty(unit)) + d->name = strdup(unit); + + return 0; +} + +void bus_wait_for_jobs_free(BusWaitForJobs *d) { + if (!d) + return; + + set_free_free(d->jobs); + + sd_bus_slot_unref(d->slot_disconnected); + sd_bus_slot_unref(d->slot_job_removed); + + sd_bus_unref(d->bus); + + free(d->name); + free(d->result); + + free(d); +} + +int bus_wait_for_jobs_new(sd_bus *bus, BusWaitForJobs **ret) { + _cleanup_(bus_wait_for_jobs_freep) BusWaitForJobs *d = NULL; + int r; + + assert(bus); + assert(ret); + + d = new0(BusWaitForJobs, 1); + if (!d) + return -ENOMEM; + + d->bus = sd_bus_ref(bus); + + /* When we are a bus client we match by sender. Direct + * connections OTOH have no initialized sender field, and + * hence we ignore the sender then */ + r = sd_bus_match_signal_async( + bus, + &d->slot_job_removed, + bus->bus_client ? "org.freedesktop.systemd1" : NULL, + "/org/freedesktop/systemd1", + "org.freedesktop.systemd1.Manager", + "JobRemoved", + match_job_removed, NULL, d); + if (r < 0) + return r; + + r = sd_bus_match_signal_async( + bus, + &d->slot_disconnected, + "org.freedesktop.DBus.Local", + NULL, + "org.freedesktop.DBus.Local", + "Disconnected", + match_disconnected, NULL, d); + if (r < 0) + return r; + + *ret = TAKE_PTR(d); + + return 0; +} + +static int bus_process_wait(sd_bus *bus) { + int r; + + for (;;) { + r = sd_bus_process(bus, NULL); + if (r < 0) + return r; + if (r > 0) + return 0; + + r = sd_bus_wait(bus, (uint64_t) -1); + if (r < 0) + return r; + } +} + +static int bus_job_get_service_result(BusWaitForJobs *d, char **result) { + _cleanup_free_ char *dbus_path = NULL; + + assert(d); + assert(d->name); + assert(result); + + if (!endswith(d->name, ".service")) + return -EINVAL; + + dbus_path = unit_dbus_path_from_name(d->name); + if (!dbus_path) + return -ENOMEM; + + return sd_bus_get_property_string(d->bus, + "org.freedesktop.systemd1", + dbus_path, + "org.freedesktop.systemd1.Service", + "Result", + NULL, + result); +} + +static const struct { + const char *result, *explanation; +} explanations [] = { + { "resources", "of unavailable resources or another system error" }, + { "protocol", "the service did not take the steps required by its unit configuration" }, + { "timeout", "a timeout was exceeded" }, + { "exit-code", "the control process exited with error code" }, + { "signal", "a fatal signal was delivered to the control process" }, + { "core-dump", "a fatal signal was delivered causing the control process to dump core" }, + { "watchdog", "the service failed to send watchdog ping" }, + { "start-limit", "start of the service was attempted too often" } +}; + +static void log_job_error_with_service_result(const char* service, const char *result, const char* const* extra_args) { + _cleanup_free_ char *service_shell_quoted = NULL; + const char *systemctl = "systemctl", *journalctl = "journalctl"; + + assert(service); + + service_shell_quoted = shell_maybe_quote(service, ESCAPE_BACKSLASH); + + if (!strv_isempty((char**) extra_args)) { + _cleanup_free_ char *t; + + t = strv_join((char**) extra_args, " "); + systemctl = strjoina("systemctl ", t ? : "<args>"); + journalctl = strjoina("journalctl ", t ? : "<args>"); + } + + if (!isempty(result)) { + unsigned i; + + for (i = 0; i < ELEMENTSOF(explanations); ++i) + if (streq(result, explanations[i].result)) + break; + + if (i < ELEMENTSOF(explanations)) { + log_error("Job for %s failed because %s.\n" + "See \"%s status %s\" and \"%s -xe\" for details.\n", + service, + explanations[i].explanation, + systemctl, + service_shell_quoted ?: "<service>", + journalctl); + goto finish; + } + } + + log_error("Job for %s failed.\n" + "See \"%s status %s\" and \"%s -xe\" for details.\n", + service, + systemctl, + service_shell_quoted ?: "<service>", + journalctl); + +finish: + /* For some results maybe additional explanation is required */ + if (streq_ptr(result, "start-limit")) + log_info("To force a start use \"%1$s reset-failed %2$s\"\n" + "followed by \"%1$s start %2$s\" again.", + systemctl, + service_shell_quoted ?: "<service>"); +} + +static int check_wait_response(BusWaitForJobs *d, bool quiet, const char* const* extra_args) { + assert(d->result); + + if (!quiet) { + if (streq(d->result, "canceled")) + log_error("Job for %s canceled.", strna(d->name)); + else if (streq(d->result, "timeout")) + log_error("Job for %s timed out.", strna(d->name)); + else if (streq(d->result, "dependency")) + log_error("A dependency job for %s failed. See 'journalctl -xe' for details.", strna(d->name)); + else if (streq(d->result, "invalid")) + log_error("%s is not active, cannot reload.", strna(d->name)); + else if (streq(d->result, "assert")) + log_error("Assertion failed on job for %s.", strna(d->name)); + else if (streq(d->result, "unsupported")) + log_error("Operation on or unit type of %s not supported on this system.", strna(d->name)); + else if (streq(d->result, "collected")) + log_error("Queued job for %s was garbage collected.", strna(d->name)); + else if (streq(d->result, "once")) + log_error("Unit %s was started already once and can't be started again.", strna(d->name)); + else if (!STR_IN_SET(d->result, "done", "skipped")) { + if (d->name) { + _cleanup_free_ char *result = NULL; + int q; + + q = bus_job_get_service_result(d, &result); + if (q < 0) + log_debug_errno(q, "Failed to get Result property of unit %s: %m", d->name); + + log_job_error_with_service_result(d->name, result, extra_args); + } else + log_error("Job failed. See \"journalctl -xe\" for details."); + } + } + + if (STR_IN_SET(d->result, "canceled", "collected")) + return -ECANCELED; + else if (streq(d->result, "timeout")) + return -ETIME; + else if (streq(d->result, "dependency")) + return -EIO; + else if (streq(d->result, "invalid")) + return -ENOEXEC; + else if (streq(d->result, "assert")) + return -EPROTO; + else if (streq(d->result, "unsupported")) + return -EOPNOTSUPP; + else if (streq(d->result, "once")) + return -ESTALE; + else if (STR_IN_SET(d->result, "done", "skipped")) + return 0; + + return log_debug_errno(SYNTHETIC_ERRNO(EIO), + "Unexpected job result, assuming server side newer than us: %s", d->result); +} + +int bus_wait_for_jobs(BusWaitForJobs *d, bool quiet, const char* const* extra_args) { + int r = 0; + + assert(d); + + while (!set_isempty(d->jobs)) { + int q; + + q = bus_process_wait(d->bus); + if (q < 0) + return log_error_errno(q, "Failed to wait for response: %m"); + + if (d->result) { + q = check_wait_response(d, quiet, extra_args); + /* Return the first error as it is most likely to be + * meaningful. */ + if (q < 0 && r == 0) + r = q; + + log_debug_errno(q, "Got result %s/%m for job %s", strna(d->result), strna(d->name)); + } + + d->name = mfree(d->name); + d->result = mfree(d->result); + } + + return r; +} + +int bus_wait_for_jobs_add(BusWaitForJobs *d, const char *path) { + int r; + + assert(d); + + r = set_ensure_allocated(&d->jobs, &string_hash_ops); + if (r < 0) + return r; + + return set_put_strdup(d->jobs, path); +} + +int bus_wait_for_jobs_one(BusWaitForJobs *d, const char *path, bool quiet) { + int r; + + r = bus_wait_for_jobs_add(d, path); + if (r < 0) + return log_oom(); + + return bus_wait_for_jobs(d, quiet, NULL); +} + +int bus_deserialize_and_dump_unit_file_changes(sd_bus_message *m, bool quiet, UnitFileChange **changes, size_t *n_changes) { + const char *type, *path, *source; + int r; + + /* changes is dereferenced when calling unit_file_dump_changes() later, + * so we have to make sure this is not NULL. */ + assert(changes); + assert(n_changes); + + r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(sss)"); + if (r < 0) + return bus_log_parse_error(r); + + while ((r = sd_bus_message_read(m, "(sss)", &type, &path, &source)) > 0) { + /* We expect only "success" changes to be sent over the bus. + Hence, reject anything negative. */ + UnitFileChangeType ch = unit_file_change_type_from_string(type); + + if (ch < 0) { + log_notice("Manager reported unknown change type \"%s\" for path \"%s\", ignoring.", type, path); + continue; + } + + r = unit_file_changes_add(changes, n_changes, ch, path, source); + if (r < 0) + return r; + } + if (r < 0) + return bus_log_parse_error(r); + + r = sd_bus_message_exit_container(m); + if (r < 0) + return bus_log_parse_error(r); + + unit_file_dump_changes(0, NULL, *changes, *n_changes, quiet); + return 0; +} + +struct CGroupInfo { + char *cgroup_path; + bool is_const; /* If false, cgroup_path should be free()'d */ + + Hashmap *pids; /* PID → process name */ + bool done; + + struct CGroupInfo *parent; + LIST_FIELDS(struct CGroupInfo, siblings); + LIST_HEAD(struct CGroupInfo, children); + size_t n_children; +}; + +static bool IS_ROOT(const char *p) { + return isempty(p) || streq(p, "/"); +} + +static int add_cgroup(Hashmap *cgroups, const char *path, bool is_const, struct CGroupInfo **ret) { + struct CGroupInfo *parent = NULL, *cg; + int r; + + assert(cgroups); + assert(ret); + + if (IS_ROOT(path)) + path = "/"; + + cg = hashmap_get(cgroups, path); + if (cg) { + *ret = cg; + return 0; + } + + if (!IS_ROOT(path)) { + const char *e, *pp; + + e = strrchr(path, '/'); + if (!e) + return -EINVAL; + + pp = strndupa(path, e - path); + if (!pp) + return -ENOMEM; + + r = add_cgroup(cgroups, pp, false, &parent); + if (r < 0) + return r; + } + + cg = new0(struct CGroupInfo, 1); + if (!cg) + return -ENOMEM; + + if (is_const) + cg->cgroup_path = (char*) path; + else { + cg->cgroup_path = strdup(path); + if (!cg->cgroup_path) { + free(cg); + return -ENOMEM; + } + } + + cg->is_const = is_const; + cg->parent = parent; + + r = hashmap_put(cgroups, cg->cgroup_path, cg); + if (r < 0) { + if (!is_const) + free(cg->cgroup_path); + free(cg); + return r; + } + + if (parent) { + LIST_PREPEND(siblings, parent->children, cg); + parent->n_children++; + } + + *ret = cg; + return 1; +} + +static int add_process( + Hashmap *cgroups, + const char *path, + pid_t pid, + const char *name) { + + struct CGroupInfo *cg; + int r; + + assert(cgroups); + assert(name); + assert(pid > 0); + + r = add_cgroup(cgroups, path, true, &cg); + if (r < 0) + return r; + + r = hashmap_ensure_allocated(&cg->pids, &trivial_hash_ops); + if (r < 0) + return r; + + return hashmap_put(cg->pids, PID_TO_PTR(pid), (void*) name); +} + +static void remove_cgroup(Hashmap *cgroups, struct CGroupInfo *cg) { + assert(cgroups); + assert(cg); + + while (cg->children) + remove_cgroup(cgroups, cg->children); + + hashmap_remove(cgroups, cg->cgroup_path); + + if (!cg->is_const) + free(cg->cgroup_path); + + hashmap_free(cg->pids); + + if (cg->parent) + LIST_REMOVE(siblings, cg->parent->children, cg); + + free(cg); +} + +static int cgroup_info_compare_func(struct CGroupInfo * const *a, struct CGroupInfo * const *b) { + return strcmp((*a)->cgroup_path, (*b)->cgroup_path); +} + +static int dump_processes( + Hashmap *cgroups, + const char *cgroup_path, + const char *prefix, + unsigned n_columns, + OutputFlags flags) { + + struct CGroupInfo *cg; + int r; + + assert(prefix); + + if (IS_ROOT(cgroup_path)) + cgroup_path = "/"; + + cg = hashmap_get(cgroups, cgroup_path); + if (!cg) + return 0; + + if (!hashmap_isempty(cg->pids)) { + const char *name; + size_t n = 0, i; + pid_t *pids; + void *pidp; + Iterator j; + int width; + + /* Order processes by their PID */ + pids = newa(pid_t, hashmap_size(cg->pids)); + + HASHMAP_FOREACH_KEY(name, pidp, cg->pids, j) + pids[n++] = PTR_TO_PID(pidp); + + assert(n == hashmap_size(cg->pids)); + typesafe_qsort(pids, n, pid_compare_func); + + width = DECIMAL_STR_WIDTH(pids[n-1]); + + for (i = 0; i < n; i++) { + _cleanup_free_ char *e = NULL; + const char *special; + bool more; + + name = hashmap_get(cg->pids, PID_TO_PTR(pids[i])); + assert(name); + + if (n_columns != 0) { + unsigned k; + + k = MAX(LESS_BY(n_columns, 2U + width + 1U), 20U); + + e = ellipsize(name, k, 100); + if (e) + name = e; + } + + more = i+1 < n || cg->children; + special = special_glyph(more ? SPECIAL_GLYPH_TREE_BRANCH : SPECIAL_GLYPH_TREE_RIGHT); + + fprintf(stdout, "%s%s%*"PID_PRI" %s\n", + prefix, + special, + width, pids[i], + name); + } + } + + if (cg->children) { + struct CGroupInfo **children, *child; + size_t n = 0, i; + + /* Order subcgroups by their name */ + children = newa(struct CGroupInfo*, cg->n_children); + LIST_FOREACH(siblings, child, cg->children) + children[n++] = child; + assert(n == cg->n_children); + typesafe_qsort(children, n, cgroup_info_compare_func); + + if (n_columns != 0) + n_columns = MAX(LESS_BY(n_columns, 2U), 20U); + + for (i = 0; i < n; i++) { + _cleanup_free_ char *pp = NULL; + const char *name, *special; + bool more; + + child = children[i]; + + name = strrchr(child->cgroup_path, '/'); + if (!name) + return -EINVAL; + name++; + + more = i+1 < n; + special = special_glyph(more ? SPECIAL_GLYPH_TREE_BRANCH : SPECIAL_GLYPH_TREE_RIGHT); + + fputs(prefix, stdout); + fputs(special, stdout); + fputs(name, stdout); + fputc('\n', stdout); + + special = special_glyph(more ? SPECIAL_GLYPH_TREE_VERTICAL : SPECIAL_GLYPH_TREE_SPACE); + + pp = strappend(prefix, special); + if (!pp) + return -ENOMEM; + + r = dump_processes(cgroups, child->cgroup_path, pp, n_columns, flags); + if (r < 0) + return r; + } + } + + cg->done = true; + return 0; +} + +static int dump_extra_processes( + Hashmap *cgroups, + const char *prefix, + unsigned n_columns, + OutputFlags flags) { + + _cleanup_free_ pid_t *pids = NULL; + _cleanup_hashmap_free_ Hashmap *names = NULL; + struct CGroupInfo *cg; + size_t n_allocated = 0, n = 0, k; + Iterator i; + int width, r; + + /* Prints the extra processes, i.e. those that are in cgroups we haven't displayed yet. We show them as + * combined, sorted, linear list. */ + + HASHMAP_FOREACH(cg, cgroups, i) { + const char *name; + void *pidp; + Iterator j; + + if (cg->done) + continue; + + if (hashmap_isempty(cg->pids)) + continue; + + r = hashmap_ensure_allocated(&names, &trivial_hash_ops); + if (r < 0) + return r; + + if (!GREEDY_REALLOC(pids, n_allocated, n + hashmap_size(cg->pids))) + return -ENOMEM; + + HASHMAP_FOREACH_KEY(name, pidp, cg->pids, j) { + pids[n++] = PTR_TO_PID(pidp); + + r = hashmap_put(names, pidp, (void*) name); + if (r < 0) + return r; + } + } + + if (n == 0) + return 0; + + typesafe_qsort(pids, n, pid_compare_func); + width = DECIMAL_STR_WIDTH(pids[n-1]); + + for (k = 0; k < n; k++) { + _cleanup_free_ char *e = NULL; + const char *name; + + name = hashmap_get(names, PID_TO_PTR(pids[k])); + assert(name); + + if (n_columns != 0) { + unsigned z; + + z = MAX(LESS_BY(n_columns, 2U + width + 1U), 20U); + + e = ellipsize(name, z, 100); + if (e) + name = e; + } + + fprintf(stdout, "%s%s %*" PID_PRI " %s\n", + prefix, + special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET), + width, pids[k], + name); + } + + return 0; +} + +int unit_show_processes( + sd_bus *bus, + const char *unit, + const char *cgroup_path, + const char *prefix, + unsigned n_columns, + OutputFlags flags, + sd_bus_error *error) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + Hashmap *cgroups = NULL; + struct CGroupInfo *cg; + int r; + + assert(bus); + assert(unit); + + if (flags & OUTPUT_FULL_WIDTH) + n_columns = 0; + else if (n_columns <= 0) + n_columns = columns(); + + prefix = strempty(prefix); + + r = sd_bus_call_method( + bus, + "org.freedesktop.systemd1", + "/org/freedesktop/systemd1", + "org.freedesktop.systemd1.Manager", + "GetUnitProcesses", + error, + &reply, + "s", + unit); + if (r < 0) + return r; + + cgroups = hashmap_new(&path_hash_ops); + if (!cgroups) + return -ENOMEM; + + r = sd_bus_message_enter_container(reply, 'a', "(sus)"); + if (r < 0) + goto finish; + + for (;;) { + const char *path = NULL, *name = NULL; + uint32_t pid; + + r = sd_bus_message_read(reply, "(sus)", &path, &pid, &name); + if (r < 0) + goto finish; + if (r == 0) + break; + + r = add_process(cgroups, path, pid, name); + if (r < 0) + goto finish; + } + + r = sd_bus_message_exit_container(reply); + if (r < 0) + goto finish; + + r = dump_processes(cgroups, cgroup_path, prefix, n_columns, flags); + if (r < 0) + goto finish; + + r = dump_extra_processes(cgroups, prefix, n_columns, flags); + +finish: + while ((cg = hashmap_first(cgroups))) + remove_cgroup(cgroups, cg); + + hashmap_free(cgroups); + + return r; +} + +int unit_load_state(sd_bus *bus, const char *name, char **load_state) { + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_free_ char *path = NULL; + int r; + + path = unit_dbus_path_from_name(name); + if (!path) + return log_oom(); + + /* This function warns on it's own, because otherwise it'd be awkward to pass + * the dbus error message around. */ + + r = sd_bus_get_property_string( + bus, + "org.freedesktop.systemd1", + path, + "org.freedesktop.systemd1.Unit", + "LoadState", + &error, + load_state); + if (r < 0) + return log_error_errno(r, "Failed to get load state of %s: %s", name, bus_error_message(&error, r)); + + return 0; +} diff --git a/src/shared/bus-unit-util.h b/src/shared/bus-unit-util.h new file mode 100644 index 0000000..4fc94b0 --- /dev/null +++ b/src/shared/bus-unit-util.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include "install.h" +#include "output-mode.h" +#include "sd-bus.h" +#include "unit-def.h" + +typedef struct UnitInfo { + const char *machine; + const char *id; + const char *description; + const char *load_state; + const char *active_state; + const char *sub_state; + const char *following; + const char *unit_path; + uint32_t job_id; + const char *job_type; + const char *job_path; +} UnitInfo; + +int bus_parse_unit_info(sd_bus_message *message, UnitInfo *u); + +int bus_append_unit_property_assignment(sd_bus_message *m, UnitType t, const char *assignment); +int bus_append_unit_property_assignment_many(sd_bus_message *m, UnitType t, char **l); + +typedef struct BusWaitForJobs BusWaitForJobs; + +int bus_wait_for_jobs_new(sd_bus *bus, BusWaitForJobs **ret); +void bus_wait_for_jobs_free(BusWaitForJobs *d); +int bus_wait_for_jobs_add(BusWaitForJobs *d, const char *path); +int bus_wait_for_jobs(BusWaitForJobs *d, bool quiet, const char* const* extra_args); +int bus_wait_for_jobs_one(BusWaitForJobs *d, const char *path, bool quiet); + +DEFINE_TRIVIAL_CLEANUP_FUNC(BusWaitForJobs*, bus_wait_for_jobs_free); + +int bus_deserialize_and_dump_unit_file_changes(sd_bus_message *m, bool quiet, UnitFileChange **changes, size_t *n_changes); + +int unit_show_processes(sd_bus *bus, const char *unit, const char *cgroup_path, const char *prefix, unsigned n_columns, OutputFlags flags, sd_bus_error *error); + +int unit_load_state(sd_bus *bus, const char *name, char **load_state); diff --git a/src/shared/bus-util.c b/src/shared/bus-util.c new file mode 100644 index 0000000..cbcf698 --- /dev/null +++ b/src/shared/bus-util.c @@ -0,0 +1,1753 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fcntl.h> +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/resource.h> +#include <sys/socket.h> +#include <unistd.h> + +#include "sd-bus-protocol.h" +#include "sd-bus.h" +#include "sd-daemon.h" +#include "sd-event.h" +#include "sd-id128.h" + +#include "alloc-util.h" +#include "bus-internal.h" +#include "bus-label.h" +#include "bus-message.h" +#include "bus-util.h" +#include "cap-list.h" +#include "cgroup-util.h" +#include "def.h" +#include "escape.h" +#include "fd-util.h" +#include "missing.h" +#include "mountpoint-util.h" +#include "nsflags.h" +#include "parse-util.h" +#include "proc-cmdline.h" +#include "rlimit-util.h" +#include "stdio-util.h" +#include "strv.h" +#include "user-util.h" + +static int name_owner_change_callback(sd_bus_message *m, void *userdata, sd_bus_error *ret_error) { + sd_event *e = userdata; + + assert(m); + assert(e); + + sd_bus_close(sd_bus_message_get_bus(m)); + sd_event_exit(e, 0); + + return 1; +} + +int bus_async_unregister_and_exit(sd_event *e, sd_bus *bus, const char *name) { + const char *match; + const char *unique; + int r; + + assert(e); + assert(bus); + assert(name); + + /* We unregister the name here and then wait for the + * NameOwnerChanged signal for this event to arrive before we + * quit. We do this in order to make sure that any queued + * requests are still processed before we really exit. */ + + r = sd_bus_get_unique_name(bus, &unique); + if (r < 0) + return r; + + match = strjoina( + "sender='org.freedesktop.DBus'," + "type='signal'," + "interface='org.freedesktop.DBus'," + "member='NameOwnerChanged'," + "path='/org/freedesktop/DBus'," + "arg0='", name, "',", + "arg1='", unique, "',", + "arg2=''"); + + r = sd_bus_add_match_async(bus, NULL, match, name_owner_change_callback, NULL, e); + if (r < 0) + return r; + + r = sd_bus_release_name_async(bus, NULL, name, NULL, NULL); + if (r < 0) + return r; + + return 0; +} + +int bus_event_loop_with_idle( + sd_event *e, + sd_bus *bus, + const char *name, + usec_t timeout, + check_idle_t check_idle, + void *userdata) { + bool exiting = false; + int r, code; + + assert(e); + assert(bus); + assert(name); + + for (;;) { + bool idle; + + r = sd_event_get_state(e); + if (r < 0) + return r; + if (r == SD_EVENT_FINISHED) + break; + + if (check_idle) + idle = check_idle(userdata); + else + idle = true; + + r = sd_event_run(e, exiting || !idle ? (uint64_t) -1 : timeout); + if (r < 0) + return r; + + if (r == 0 && !exiting && idle) { + + r = sd_bus_try_close(bus); + if (r == -EBUSY) + continue; + + /* Fallback for dbus1 connections: we + * unregister the name and wait for the + * response to come through for it */ + if (r == -EOPNOTSUPP) { + + /* Inform the service manager that we + * are going down, so that it will + * queue all further start requests, + * instead of assuming we are already + * running. */ + sd_notify(false, "STOPPING=1"); + + r = bus_async_unregister_and_exit(e, bus, name); + if (r < 0) + return r; + + exiting = true; + continue; + } + + if (r < 0) + return r; + + sd_event_exit(e, 0); + break; + } + } + + r = sd_event_get_exit_code(e, &code); + if (r < 0) + return r; + + return code; +} + +int bus_name_has_owner(sd_bus *c, const char *name, sd_bus_error *error) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *rep = NULL; + int r, has_owner = 0; + + assert(c); + assert(name); + + r = sd_bus_call_method(c, + "org.freedesktop.DBus", + "/org/freedesktop/dbus", + "org.freedesktop.DBus", + "NameHasOwner", + error, + &rep, + "s", + name); + if (r < 0) + return r; + + r = sd_bus_message_read_basic(rep, 'b', &has_owner); + if (r < 0) + return sd_bus_error_set_errno(error, r); + + return has_owner; +} + +static int check_good_user(sd_bus_message *m, uid_t good_user) { + _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL; + uid_t sender_uid; + int r; + + assert(m); + + if (good_user == UID_INVALID) + return 0; + + r = sd_bus_query_sender_creds(m, SD_BUS_CREDS_EUID, &creds); + if (r < 0) + return r; + + /* Don't trust augmented credentials for authorization */ + assert_return((sd_bus_creds_get_augmented_mask(creds) & SD_BUS_CREDS_EUID) == 0, -EPERM); + + r = sd_bus_creds_get_euid(creds, &sender_uid); + if (r < 0) + return r; + + return sender_uid == good_user; +} + +int bus_test_polkit( + sd_bus_message *call, + int capability, + const char *action, + const char **details, + uid_t good_user, + bool *_challenge, + sd_bus_error *e) { + + int r; + + assert(call); + assert(action); + + /* Tests non-interactively! */ + + r = check_good_user(call, good_user); + if (r != 0) + return r; + + r = sd_bus_query_sender_privilege(call, capability); + if (r < 0) + return r; + else if (r > 0) + return 1; +#if ENABLE_POLKIT + else { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *request = NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + int authorized = false, challenge = false; + const char *sender, **k, **v; + + sender = sd_bus_message_get_sender(call); + if (!sender) + return -EBADMSG; + + r = sd_bus_message_new_method_call( + call->bus, + &request, + "org.freedesktop.PolicyKit1", + "/org/freedesktop/PolicyKit1/Authority", + "org.freedesktop.PolicyKit1.Authority", + "CheckAuthorization"); + if (r < 0) + return r; + + r = sd_bus_message_append( + request, + "(sa{sv})s", + "system-bus-name", 1, "name", "s", sender, + action); + if (r < 0) + return r; + + r = sd_bus_message_open_container(request, 'a', "{ss}"); + if (r < 0) + return r; + + STRV_FOREACH_PAIR(k, v, details) { + r = sd_bus_message_append(request, "{ss}", *k, *v); + if (r < 0) + return r; + } + + r = sd_bus_message_close_container(request); + if (r < 0) + return r; + + r = sd_bus_message_append(request, "us", 0, NULL); + if (r < 0) + return r; + + r = sd_bus_call(call->bus, request, 0, e, &reply); + if (r < 0) { + /* Treat no PK available as access denied */ + if (sd_bus_error_has_name(e, SD_BUS_ERROR_SERVICE_UNKNOWN)) { + sd_bus_error_free(e); + return -EACCES; + } + + return r; + } + + r = sd_bus_message_enter_container(reply, 'r', "bba{ss}"); + if (r < 0) + return r; + + r = sd_bus_message_read(reply, "bb", &authorized, &challenge); + if (r < 0) + return r; + + if (authorized) + return 1; + + if (_challenge) { + *_challenge = challenge; + return 0; + } + } +#endif + + return -EACCES; +} + +#if ENABLE_POLKIT + +typedef struct AsyncPolkitQuery { + sd_bus_message *request, *reply; + sd_bus_message_handler_t callback; + void *userdata; + sd_bus_slot *slot; + Hashmap *registry; +} AsyncPolkitQuery; + +static void async_polkit_query_free(AsyncPolkitQuery *q) { + + if (!q) + return; + + sd_bus_slot_unref(q->slot); + + if (q->registry && q->request) + hashmap_remove(q->registry, q->request); + + sd_bus_message_unref(q->request); + sd_bus_message_unref(q->reply); + + free(q); +} + +static int async_polkit_callback(sd_bus_message *reply, void *userdata, sd_bus_error *error) { + _cleanup_(sd_bus_error_free) sd_bus_error error_buffer = SD_BUS_ERROR_NULL; + AsyncPolkitQuery *q = userdata; + int r; + + assert(reply); + assert(q); + + q->slot = sd_bus_slot_unref(q->slot); + q->reply = sd_bus_message_ref(reply); + + r = sd_bus_message_rewind(q->request, true); + if (r < 0) { + r = sd_bus_reply_method_errno(q->request, r, NULL); + goto finish; + } + + r = q->callback(q->request, q->userdata, &error_buffer); + r = bus_maybe_reply_error(q->request, r, &error_buffer); + +finish: + async_polkit_query_free(q); + + return r; +} + +#endif + +int bus_verify_polkit_async( + sd_bus_message *call, + int capability, + const char *action, + const char **details, + bool interactive, + uid_t good_user, + Hashmap **registry, + sd_bus_error *error) { + +#if ENABLE_POLKIT + _cleanup_(sd_bus_message_unrefp) sd_bus_message *pk = NULL; + AsyncPolkitQuery *q; + const char *sender, **k, **v; + sd_bus_message_handler_t callback; + void *userdata; + int c; +#endif + int r; + + assert(call); + assert(action); + assert(registry); + + r = check_good_user(call, good_user); + if (r != 0) + return r; + +#if ENABLE_POLKIT + q = hashmap_get(*registry, call); + if (q) { + int authorized, challenge; + + /* This is the second invocation of this function, and + * there's already a response from polkit, let's + * process it */ + assert(q->reply); + + if (sd_bus_message_is_method_error(q->reply, NULL)) { + const sd_bus_error *e; + + /* Copy error from polkit reply */ + e = sd_bus_message_get_error(q->reply); + sd_bus_error_copy(error, e); + + /* Treat no PK available as access denied */ + if (sd_bus_error_has_name(e, SD_BUS_ERROR_SERVICE_UNKNOWN)) + return -EACCES; + + return -sd_bus_error_get_errno(e); + } + + r = sd_bus_message_enter_container(q->reply, 'r', "bba{ss}"); + if (r >= 0) + r = sd_bus_message_read(q->reply, "bb", &authorized, &challenge); + + if (r < 0) + return r; + + if (authorized) + return 1; + + if (challenge) + return sd_bus_error_set(error, SD_BUS_ERROR_INTERACTIVE_AUTHORIZATION_REQUIRED, "Interactive authentication required."); + + return -EACCES; + } +#endif + + r = sd_bus_query_sender_privilege(call, capability); + if (r < 0) + return r; + else if (r > 0) + return 1; + +#if ENABLE_POLKIT + if (sd_bus_get_current_message(call->bus) != call) + return -EINVAL; + + callback = sd_bus_get_current_handler(call->bus); + if (!callback) + return -EINVAL; + + userdata = sd_bus_get_current_userdata(call->bus); + + sender = sd_bus_message_get_sender(call); + if (!sender) + return -EBADMSG; + + c = sd_bus_message_get_allow_interactive_authorization(call); + if (c < 0) + return c; + if (c > 0) + interactive = true; + + r = hashmap_ensure_allocated(registry, NULL); + if (r < 0) + return r; + + r = sd_bus_message_new_method_call( + call->bus, + &pk, + "org.freedesktop.PolicyKit1", + "/org/freedesktop/PolicyKit1/Authority", + "org.freedesktop.PolicyKit1.Authority", + "CheckAuthorization"); + if (r < 0) + return r; + + r = sd_bus_message_append( + pk, + "(sa{sv})s", + "system-bus-name", 1, "name", "s", sender, + action); + if (r < 0) + return r; + + r = sd_bus_message_open_container(pk, 'a', "{ss}"); + if (r < 0) + return r; + + STRV_FOREACH_PAIR(k, v, details) { + r = sd_bus_message_append(pk, "{ss}", *k, *v); + if (r < 0) + return r; + } + + r = sd_bus_message_close_container(pk); + if (r < 0) + return r; + + r = sd_bus_message_append(pk, "us", interactive, NULL); + if (r < 0) + return r; + + q = new0(AsyncPolkitQuery, 1); + if (!q) + return -ENOMEM; + + q->request = sd_bus_message_ref(call); + q->callback = callback; + q->userdata = userdata; + + r = hashmap_put(*registry, call, q); + if (r < 0) { + async_polkit_query_free(q); + return r; + } + + q->registry = *registry; + + r = sd_bus_call_async(call->bus, &q->slot, pk, async_polkit_callback, q, 0); + if (r < 0) { + async_polkit_query_free(q); + return r; + } + + return 0; +#endif + + return -EACCES; +} + +void bus_verify_polkit_async_registry_free(Hashmap *registry) { +#if ENABLE_POLKIT + hashmap_free_with_destructor(registry, async_polkit_query_free); +#endif +} + +int bus_check_peercred(sd_bus *c) { + struct ucred ucred; + int fd, r; + + assert(c); + + fd = sd_bus_get_fd(c); + if (fd < 0) + return fd; + + r = getpeercred(fd, &ucred); + if (r < 0) + return r; + + if (ucred.uid != 0 && ucred.uid != geteuid()) + return -EPERM; + + return 1; +} + +int bus_connect_system_systemd(sd_bus **_bus) { + _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL; + int r; + + assert(_bus); + + if (geteuid() != 0) + return sd_bus_default_system(_bus); + + /* If we are root then let's talk directly to the system + * instance, instead of going via the bus */ + + r = sd_bus_new(&bus); + if (r < 0) + return r; + + r = sd_bus_set_address(bus, "unix:path=/run/systemd/private"); + if (r < 0) + return r; + + r = sd_bus_start(bus); + if (r < 0) + return sd_bus_default_system(_bus); + + r = bus_check_peercred(bus); + if (r < 0) + return r; + + *_bus = TAKE_PTR(bus); + + return 0; +} + +int bus_connect_user_systemd(sd_bus **_bus) { + _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL; + _cleanup_free_ char *ee = NULL; + const char *e; + int r; + + assert(_bus); + + e = secure_getenv("XDG_RUNTIME_DIR"); + if (!e) + return sd_bus_default_user(_bus); + + ee = bus_address_escape(e); + if (!ee) + return -ENOMEM; + + r = sd_bus_new(&bus); + if (r < 0) + return r; + + bus->address = strjoin("unix:path=", ee, "/systemd/private"); + if (!bus->address) + return -ENOMEM; + + r = sd_bus_start(bus); + if (r < 0) + return sd_bus_default_user(_bus); + + r = bus_check_peercred(bus); + if (r < 0) + return r; + + *_bus = TAKE_PTR(bus); + + return 0; +} + +int bus_print_property_value(const char *name, const char *expected_value, bool only_value, const char *fmt, ...) { + va_list ap; + int r; + + assert(name); + assert(fmt); + + if (expected_value) { + _cleanup_free_ char *s = NULL; + + va_start(ap, fmt); + r = vasprintf(&s, fmt, ap); + va_end(ap); + if (r < 0) + return -ENOMEM; + + if (streq_ptr(expected_value, s)) { + if (only_value) + puts(s); + else + printf("%s=%s\n", name, s); + } + + return 0; + } + + if (!only_value) + printf("%s=", name); + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + puts(""); + + return 0; +} + +static int bus_print_property(const char *name, const char *expected_value, sd_bus_message *m, bool value, bool all) { + char type; + const char *contents; + int r; + + assert(name); + assert(m); + + r = sd_bus_message_peek_type(m, &type, &contents); + if (r < 0) + return r; + + switch (type) { + + case SD_BUS_TYPE_STRING: { + const char *s; + + r = sd_bus_message_read_basic(m, type, &s); + if (r < 0) + return r; + + if (all || !isempty(s)) { + bool good; + + /* This property has a single value, so we need to take + * care not to print a new line, everything else is OK. */ + good = !strchr(s, '\n'); + bus_print_property_value(name, expected_value, value, "%s", good ? s : "[unprintable]"); + } + + return 1; + } + + case SD_BUS_TYPE_BOOLEAN: { + int b; + + r = sd_bus_message_read_basic(m, type, &b); + if (r < 0) + return r; + + if (expected_value && parse_boolean(expected_value) != b) + return 1; + + bus_print_property_value(name, NULL, value, "%s", yes_no(b)); + return 1; + } + + case SD_BUS_TYPE_UINT64: { + uint64_t u; + + r = sd_bus_message_read_basic(m, type, &u); + if (r < 0) + return r; + + /* Yes, heuristics! But we can change this check + * should it turn out to not be sufficient */ + + if (endswith(name, "Timestamp") || + STR_IN_SET(name, "NextElapseUSecRealtime", "LastTriggerUSec", "TimeUSec", "RTCTimeUSec")) { + char timestamp[FORMAT_TIMESTAMP_MAX]; + const char *t; + + t = format_timestamp(timestamp, sizeof(timestamp), u); + if (t || all) + bus_print_property_value(name, expected_value, value, "%s", strempty(t)); + + } else if (strstr(name, "USec")) { + char timespan[FORMAT_TIMESPAN_MAX]; + + (void) format_timespan(timespan, sizeof(timespan), u, 0); + bus_print_property_value(name, expected_value, value, "%s", timespan); + + } else if (streq(name, "RestrictNamespaces")) { + _cleanup_free_ char *s = NULL; + const char *result; + + if ((u & NAMESPACE_FLAGS_ALL) == 0) + result = "yes"; + else if ((u & NAMESPACE_FLAGS_ALL) == NAMESPACE_FLAGS_ALL) + result = "no"; + else { + r = namespace_flags_to_string(u, &s); + if (r < 0) + return r; + + result = s; + } + + bus_print_property_value(name, expected_value, value, "%s", result); + + } else if (streq(name, "MountFlags")) { + const char *result; + + result = mount_propagation_flags_to_string(u); + if (!result) + return -EINVAL; + + bus_print_property_value(name, expected_value, value, "%s", result); + + } else if (STR_IN_SET(name, "CapabilityBoundingSet", "AmbientCapabilities")) { + _cleanup_free_ char *s = NULL; + + r = capability_set_to_string_alloc(u, &s); + if (r < 0) + return r; + + bus_print_property_value(name, expected_value, value, "%s", s); + + } else if ((STR_IN_SET(name, "CPUWeight", "StartupCPUWeight", "IOWeight", "StartupIOWeight") && u == CGROUP_WEIGHT_INVALID) || + (STR_IN_SET(name, "CPUShares", "StartupCPUShares") && u == CGROUP_CPU_SHARES_INVALID) || + (STR_IN_SET(name, "BlockIOWeight", "StartupBlockIOWeight") && u == CGROUP_BLKIO_WEIGHT_INVALID) || + (STR_IN_SET(name, "MemoryCurrent", "TasksCurrent") && u == (uint64_t) -1) || + (endswith(name, "NSec") && u == (uint64_t) -1)) + + bus_print_property_value(name, expected_value, value, "%s", "[not set]"); + + else if ((STR_IN_SET(name, "MemoryLow", "MemoryHigh", "MemoryMax", "MemorySwapMax", "MemoryLimit") && u == CGROUP_LIMIT_MAX) || + (STR_IN_SET(name, "TasksMax", "DefaultTasksMax") && u == (uint64_t) -1) || + (startswith(name, "Limit") && u == (uint64_t) -1) || + (startswith(name, "DefaultLimit") && u == (uint64_t) -1)) + + bus_print_property_value(name, expected_value, value, "%s", "infinity"); + else + bus_print_property_value(name, expected_value, value, "%"PRIu64, u); + + return 1; + } + + case SD_BUS_TYPE_INT64: { + int64_t i; + + r = sd_bus_message_read_basic(m, type, &i); + if (r < 0) + return r; + + bus_print_property_value(name, expected_value, value, "%"PRIi64, i); + return 1; + } + + case SD_BUS_TYPE_UINT32: { + uint32_t u; + + r = sd_bus_message_read_basic(m, type, &u); + if (r < 0) + return r; + + if (strstr(name, "UMask") || strstr(name, "Mode")) + bus_print_property_value(name, expected_value, value, "%04o", u); + + else if (streq(name, "UID")) { + if (u == UID_INVALID) + bus_print_property_value(name, expected_value, value, "%s", "[not set]"); + else + bus_print_property_value(name, expected_value, value, "%"PRIu32, u); + } else if (streq(name, "GID")) { + if (u == GID_INVALID) + bus_print_property_value(name, expected_value, value, "%s", "[not set]"); + else + bus_print_property_value(name, expected_value, value, "%"PRIu32, u); + } else + bus_print_property_value(name, expected_value, value, "%"PRIu32, u); + + return 1; + } + + case SD_BUS_TYPE_INT32: { + int32_t i; + + r = sd_bus_message_read_basic(m, type, &i); + if (r < 0) + return r; + + bus_print_property_value(name, expected_value, value, "%"PRIi32, i); + return 1; + } + + case SD_BUS_TYPE_DOUBLE: { + double d; + + r = sd_bus_message_read_basic(m, type, &d); + if (r < 0) + return r; + + bus_print_property_value(name, expected_value, value, "%g", d); + return 1; + } + + case SD_BUS_TYPE_ARRAY: + if (streq(contents, "s")) { + bool first = true; + const char *str; + + r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, contents); + if (r < 0) + return r; + + while ((r = sd_bus_message_read_basic(m, SD_BUS_TYPE_STRING, &str)) > 0) { + bool good; + + if (first && !value) + printf("%s=", name); + + /* This property has multiple space-separated values, so + * neither spaces nor newlines can be allowed in a value. */ + good = str[strcspn(str, " \n")] == '\0'; + + printf("%s%s", first ? "" : " ", good ? str : "[unprintable]"); + + first = false; + } + if (r < 0) + return r; + + if (first && all && !value) + printf("%s=", name); + if (!first || all) + puts(""); + + r = sd_bus_message_exit_container(m); + if (r < 0) + return r; + + return 1; + + } else if (streq(contents, "y")) { + const uint8_t *u; + size_t n; + + r = sd_bus_message_read_array(m, SD_BUS_TYPE_BYTE, (const void**) &u, &n); + if (r < 0) + return r; + + if (all || n > 0) { + unsigned i; + + if (!value) + printf("%s=", name); + + for (i = 0; i < n; i++) + printf("%02x", u[i]); + + puts(""); + } + + return 1; + + } else if (streq(contents, "u")) { + uint32_t *u; + size_t n; + + r = sd_bus_message_read_array(m, SD_BUS_TYPE_UINT32, (const void**) &u, &n); + if (r < 0) + return r; + + if (all || n > 0) { + unsigned i; + + if (!value) + printf("%s=", name); + + for (i = 0; i < n; i++) + printf("%08x", u[i]); + + puts(""); + } + + return 1; + } + + break; + } + + return 0; +} + +int bus_message_print_all_properties( + sd_bus_message *m, + bus_message_print_t func, + char **filter, + bool value, + bool all, + Set **found_properties) { + + int r; + + assert(m); + + r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "{sv}"); + if (r < 0) + return r; + + while ((r = sd_bus_message_enter_container(m, SD_BUS_TYPE_DICT_ENTRY, "sv")) > 0) { + _cleanup_free_ char *name_with_equal = NULL; + const char *name, *contents, *expected_value = NULL; + + r = sd_bus_message_read_basic(m, SD_BUS_TYPE_STRING, &name); + if (r < 0) + return r; + + if (found_properties) { + r = set_ensure_allocated(found_properties, &string_hash_ops); + if (r < 0) + return log_oom(); + + r = set_put(*found_properties, name); + if (r < 0 && r != -EEXIST) + return log_oom(); + } + + name_with_equal = strappend(name, "="); + if (!name_with_equal) + return log_oom(); + + if (!filter || strv_find(filter, name) || + (expected_value = strv_find_startswith(filter, name_with_equal))) { + r = sd_bus_message_peek_type(m, NULL, &contents); + if (r < 0) + return r; + + r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, contents); + if (r < 0) + return r; + + if (func) + r = func(name, expected_value, m, value, all); + if (!func || r == 0) + r = bus_print_property(name, expected_value, m, value, all); + if (r < 0) + return r; + if (r == 0) { + if (all && !expected_value) + printf("%s=[unprintable]\n", name); + /* skip what we didn't read */ + r = sd_bus_message_skip(m, contents); + if (r < 0) + return r; + } + + r = sd_bus_message_exit_container(m); + if (r < 0) + return r; + } else { + r = sd_bus_message_skip(m, "v"); + if (r < 0) + return r; + } + + r = sd_bus_message_exit_container(m); + if (r < 0) + return r; + } + if (r < 0) + return r; + + r = sd_bus_message_exit_container(m); + if (r < 0) + return r; + + return 0; +} + +int bus_print_all_properties( + sd_bus *bus, + const char *dest, + const char *path, + bus_message_print_t func, + char **filter, + bool value, + bool all, + Set **found_properties) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + int r; + + assert(bus); + assert(path); + + r = sd_bus_call_method(bus, + dest, + path, + "org.freedesktop.DBus.Properties", + "GetAll", + &error, + &reply, + "s", ""); + if (r < 0) + return r; + + return bus_message_print_all_properties(reply, func, filter, value, all, found_properties); +} + +int bus_map_id128(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) { + sd_id128_t *p = userdata; + const void *v; + size_t n; + int r; + + r = sd_bus_message_read_array(m, SD_BUS_TYPE_BYTE, &v, &n); + if (r < 0) + return r; + + if (n == 0) + *p = SD_ID128_NULL; + else if (n == 16) + memcpy((*p).bytes, v, n); + else + return -EINVAL; + + return 0; +} + +static int map_basic(sd_bus *bus, const char *member, sd_bus_message *m, unsigned flags, sd_bus_error *error, void *userdata) { + char type; + int r; + + r = sd_bus_message_peek_type(m, &type, NULL); + if (r < 0) + return r; + + switch (type) { + + case SD_BUS_TYPE_STRING: { + const char **p = userdata; + const char *s; + + r = sd_bus_message_read_basic(m, type, &s); + if (r < 0) + return r; + + if (isempty(s)) + s = NULL; + + if (flags & BUS_MAP_STRDUP) + return free_and_strdup((char **) userdata, s); + + *p = s; + return 0; + } + + case SD_BUS_TYPE_ARRAY: { + _cleanup_strv_free_ char **l = NULL; + char ***p = userdata; + + r = bus_message_read_strv_extend(m, &l); + if (r < 0) + return r; + + return strv_free_and_replace(*p, l); + } + + case SD_BUS_TYPE_BOOLEAN: { + int b; + + r = sd_bus_message_read_basic(m, type, &b); + if (r < 0) + return r; + + if (flags & BUS_MAP_BOOLEAN_AS_BOOL) + *(bool*) userdata = b; + else + *(int*) userdata = b; + + return 0; + } + + case SD_BUS_TYPE_INT32: + case SD_BUS_TYPE_UINT32: { + uint32_t u, *p = userdata; + + r = sd_bus_message_read_basic(m, type, &u); + if (r < 0) + return r; + + *p = u; + return 0; + } + + case SD_BUS_TYPE_INT64: + case SD_BUS_TYPE_UINT64: { + uint64_t t, *p = userdata; + + r = sd_bus_message_read_basic(m, type, &t); + if (r < 0) + return r; + + *p = t; + return 0; + } + + case SD_BUS_TYPE_DOUBLE: { + double d, *p = userdata; + + r = sd_bus_message_read_basic(m, type, &d); + if (r < 0) + return r; + + *p = d; + return 0; + }} + + return -EOPNOTSUPP; +} + +int bus_message_map_all_properties( + sd_bus_message *m, + const struct bus_properties_map *map, + unsigned flags, + sd_bus_error *error, + void *userdata) { + + int r; + + assert(m); + assert(map); + + r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "{sv}"); + if (r < 0) + return r; + + while ((r = sd_bus_message_enter_container(m, SD_BUS_TYPE_DICT_ENTRY, "sv")) > 0) { + const struct bus_properties_map *prop; + const char *member; + const char *contents; + void *v; + unsigned i; + + r = sd_bus_message_read_basic(m, SD_BUS_TYPE_STRING, &member); + if (r < 0) + return r; + + for (i = 0, prop = NULL; map[i].member; i++) + if (streq(map[i].member, member)) { + prop = &map[i]; + break; + } + + if (prop) { + r = sd_bus_message_peek_type(m, NULL, &contents); + if (r < 0) + return r; + + r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, contents); + if (r < 0) + return r; + + v = (uint8_t *)userdata + prop->offset; + if (map[i].set) + r = prop->set(sd_bus_message_get_bus(m), member, m, error, v); + else + r = map_basic(sd_bus_message_get_bus(m), member, m, flags, error, v); + if (r < 0) + return r; + + r = sd_bus_message_exit_container(m); + if (r < 0) + return r; + } else { + r = sd_bus_message_skip(m, "v"); + if (r < 0) + return r; + } + + r = sd_bus_message_exit_container(m); + if (r < 0) + return r; + } + if (r < 0) + return r; + + return sd_bus_message_exit_container(m); +} + +int bus_map_all_properties( + sd_bus *bus, + const char *destination, + const char *path, + const struct bus_properties_map *map, + unsigned flags, + sd_bus_error *error, + sd_bus_message **reply, + void *userdata) { + + _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL; + int r; + + assert(bus); + assert(destination); + assert(path); + assert(map); + assert(reply || (flags & BUS_MAP_STRDUP)); + + r = sd_bus_call_method( + bus, + destination, + path, + "org.freedesktop.DBus.Properties", + "GetAll", + error, + &m, + "s", ""); + if (r < 0) + return r; + + r = bus_message_map_all_properties(m, map, flags, error, userdata); + if (r < 0) + return r; + + if (reply) + *reply = sd_bus_message_ref(m); + + return r; +} + +int bus_connect_transport(BusTransport transport, const char *host, bool user, sd_bus **ret) { + _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL; + int r; + + assert(transport >= 0); + assert(transport < _BUS_TRANSPORT_MAX); + assert(ret); + + assert_return((transport == BUS_TRANSPORT_LOCAL) == !host, -EINVAL); + assert_return(transport == BUS_TRANSPORT_LOCAL || !user, -EOPNOTSUPP); + + switch (transport) { + + case BUS_TRANSPORT_LOCAL: + if (user) + r = sd_bus_default_user(&bus); + else { + if (sd_booted() <= 0) { + /* Print a friendly message when the local system is actually not running systemd as PID 1. */ + log_error("System has not been booted with systemd as init system (PID 1). Can't operate."); + + return -EHOSTDOWN; + } + r = sd_bus_default_system(&bus); + } + break; + + case BUS_TRANSPORT_REMOTE: + r = sd_bus_open_system_remote(&bus, host); + break; + + case BUS_TRANSPORT_MACHINE: + r = sd_bus_open_system_machine(&bus, host); + break; + + default: + assert_not_reached("Hmm, unknown transport type."); + } + if (r < 0) + return r; + + r = sd_bus_set_exit_on_disconnect(bus, true); + if (r < 0) + return r; + + *ret = TAKE_PTR(bus); + + return 0; +} + +int bus_connect_transport_systemd(BusTransport transport, const char *host, bool user, sd_bus **bus) { + int r; + + assert(transport >= 0); + assert(transport < _BUS_TRANSPORT_MAX); + assert(bus); + + assert_return((transport == BUS_TRANSPORT_LOCAL) == !host, -EINVAL); + assert_return(transport == BUS_TRANSPORT_LOCAL || !user, -EOPNOTSUPP); + + switch (transport) { + + case BUS_TRANSPORT_LOCAL: + if (user) + r = bus_connect_user_systemd(bus); + else { + if (sd_booted() <= 0) + /* Print a friendly message when the local system is actually not running systemd as PID 1. */ + return log_error_errno(SYNTHETIC_ERRNO(EHOSTDOWN), + "System has not been booted with systemd as init system (PID 1). Can't operate."); + r = bus_connect_system_systemd(bus); + } + break; + + case BUS_TRANSPORT_REMOTE: + r = sd_bus_open_system_remote(bus, host); + break; + + case BUS_TRANSPORT_MACHINE: + r = sd_bus_open_system_machine(bus, host); + break; + + default: + assert_not_reached("Hmm, unknown transport type."); + } + + return r; +} + +int bus_property_get_bool( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + int b = *(bool*) userdata; + + return sd_bus_message_append_basic(reply, 'b', &b); +} + +int bus_property_set_bool( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *value, + void *userdata, + sd_bus_error *error) { + + int b, r; + + r = sd_bus_message_read(value, "b", &b); + if (r < 0) + return r; + + *(bool*) userdata = b; + return 0; +} + +int bus_property_get_id128( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + sd_id128_t *id = userdata; + + if (sd_id128_is_null(*id)) /* Add an empty array if the ID is zero */ + return sd_bus_message_append(reply, "ay", 0); + else + return sd_bus_message_append_array(reply, 'y', id->bytes, 16); +} + +#if __SIZEOF_SIZE_T__ != 8 +int bus_property_get_size( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + uint64_t sz = *(size_t*) userdata; + + return sd_bus_message_append_basic(reply, 't', &sz); +} +#endif + +#if __SIZEOF_LONG__ != 8 +int bus_property_get_long( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + int64_t l = *(long*) userdata; + + return sd_bus_message_append_basic(reply, 'x', &l); +} + +int bus_property_get_ulong( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + uint64_t ul = *(unsigned long*) userdata; + + return sd_bus_message_append_basic(reply, 't', &ul); +} +#endif + +int bus_log_parse_error(int r) { + return log_error_errno(r, "Failed to parse bus message: %m"); +} + +int bus_log_create_error(int r) { + return log_error_errno(r, "Failed to create bus message: %m"); +} + +/** + * bus_path_encode_unique() - encode unique object path + * @b: bus connection or NULL + * @prefix: object path prefix + * @sender_id: unique-name of client, or NULL + * @external_id: external ID to be chosen by client, or NULL + * @ret_path: storage for encoded object path pointer + * + * Whenever we provide a bus API that allows clients to create and manage + * server-side objects, we need to provide a unique name for these objects. If + * we let the server choose the name, we suffer from a race condition: If a + * client creates an object asynchronously, it cannot destroy that object until + * it received the method reply. It cannot know the name of the new object, + * thus, it cannot destroy it. Furthermore, it enforces a round-trip. + * + * Therefore, many APIs allow the client to choose the unique name for newly + * created objects. There're two problems to solve, though: + * 1) Object names are usually defined via dbus object paths, which are + * usually globally namespaced. Therefore, multiple clients must be able + * to choose unique object names without interference. + * 2) If multiple libraries share the same bus connection, they must be + * able to choose unique object names without interference. + * The first problem is solved easily by prefixing a name with the + * unique-bus-name of a connection. The server side must enforce this and + * reject any other name. The second problem is solved by providing unique + * suffixes from within sd-bus. + * + * This helper allows clients to create unique object-paths. It uses the + * template '/prefix/sender_id/external_id' and returns the new path in + * @ret_path (must be freed by the caller). + * If @sender_id is NULL, the unique-name of @b is used. If @external_id is + * NULL, this function allocates a unique suffix via @b (by requesting a new + * cookie). If both @sender_id and @external_id are given, @b can be passed as + * NULL. + * + * Returns: 0 on success, negative error code on failure. + */ +int bus_path_encode_unique(sd_bus *b, const char *prefix, const char *sender_id, const char *external_id, char **ret_path) { + _cleanup_free_ char *sender_label = NULL, *external_label = NULL; + char external_buf[DECIMAL_STR_MAX(uint64_t)], *p; + int r; + + assert_return(b || (sender_id && external_id), -EINVAL); + assert_return(object_path_is_valid(prefix), -EINVAL); + assert_return(ret_path, -EINVAL); + + if (!sender_id) { + r = sd_bus_get_unique_name(b, &sender_id); + if (r < 0) + return r; + } + + if (!external_id) { + xsprintf(external_buf, "%"PRIu64, ++b->cookie); + external_id = external_buf; + } + + sender_label = bus_label_escape(sender_id); + if (!sender_label) + return -ENOMEM; + + external_label = bus_label_escape(external_id); + if (!external_label) + return -ENOMEM; + + p = strjoin(prefix, "/", sender_label, "/", external_label); + if (!p) + return -ENOMEM; + + *ret_path = p; + return 0; +} + +/** + * bus_path_decode_unique() - decode unique object path + * @path: object path to decode + * @prefix: object path prefix + * @ret_sender: output parameter for sender-id label + * @ret_external: output parameter for external-id label + * + * This does the reverse of bus_path_encode_unique() (see its description for + * details). Both trailing labels, sender-id and external-id, are unescaped and + * returned in the given output parameters (the caller must free them). + * + * Note that this function returns 0 if the path does not match the template + * (see bus_path_encode_unique()), 1 if it matched. + * + * Returns: Negative error code on failure, 0 if the given object path does not + * match the template (return parameters are set to NULL), 1 if it was + * parsed successfully (return parameters contain allocated labels). + */ +int bus_path_decode_unique(const char *path, const char *prefix, char **ret_sender, char **ret_external) { + const char *p, *q; + char *sender, *external; + + assert(object_path_is_valid(path)); + assert(object_path_is_valid(prefix)); + assert(ret_sender); + assert(ret_external); + + p = object_path_startswith(path, prefix); + if (!p) { + *ret_sender = NULL; + *ret_external = NULL; + return 0; + } + + q = strchr(p, '/'); + if (!q) { + *ret_sender = NULL; + *ret_external = NULL; + return 0; + } + + sender = bus_label_unescape_n(p, q - p); + external = bus_label_unescape(q + 1); + if (!sender || !external) { + free(sender); + free(external); + return -ENOMEM; + } + + *ret_sender = sender; + *ret_external = external; + return 1; +} + +int bus_property_get_rlimit( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + const char *is_soft; + struct rlimit *rl; + uint64_t u; + rlim_t x; + + assert(bus); + assert(reply); + assert(userdata); + + is_soft = endswith(property, "Soft"); + + rl = *(struct rlimit**) userdata; + if (rl) + x = is_soft ? rl->rlim_cur : rl->rlim_max; + else { + struct rlimit buf = {}; + const char *s, *p; + int z; + + /* Chop off "Soft" suffix */ + s = is_soft ? strndupa(property, is_soft - property) : property; + + /* Skip over any prefix, such as "Default" */ + assert_se(p = strstr(s, "Limit")); + + z = rlimit_from_string(p + 5); + assert(z >= 0); + + (void) getrlimit(z, &buf); + x = is_soft ? buf.rlim_cur : buf.rlim_max; + } + + /* rlim_t might have different sizes, let's map RLIMIT_INFINITY to (uint64_t) -1, so that it is the same on all + * archs */ + u = x == RLIM_INFINITY ? (uint64_t) -1 : (uint64_t) x; + + return sd_bus_message_append(reply, "t", u); +} + +int bus_track_add_name_many(sd_bus_track *t, char **l) { + int r = 0; + char **i; + + assert(t); + + /* Continues adding after failure, and returns the first failure. */ + + STRV_FOREACH(i, l) { + int k; + + k = sd_bus_track_add_name(t, *i); + if (k < 0 && r >= 0) + r = k; + } + + return r; +} + +int bus_open_system_watch_bind_with_description(sd_bus **ret, const char *description) { + _cleanup_(sd_bus_close_unrefp) sd_bus *bus = NULL; + const char *e; + int r; + + assert(ret); + + /* Match like sd_bus_open_system(), but with the "watch_bind" feature and the Connected() signal turned on. */ + + r = sd_bus_new(&bus); + if (r < 0) + return r; + + if (description) { + r = sd_bus_set_description(bus, description); + if (r < 0) + return r; + } + + e = secure_getenv("DBUS_SYSTEM_BUS_ADDRESS"); + if (!e) + e = DEFAULT_SYSTEM_BUS_ADDRESS; + + r = sd_bus_set_address(bus, e); + if (r < 0) + return r; + + r = sd_bus_set_bus_client(bus, true); + if (r < 0) + return r; + + r = sd_bus_set_trusted(bus, true); + if (r < 0) + return r; + + r = sd_bus_negotiate_creds(bus, true, SD_BUS_CREDS_UID|SD_BUS_CREDS_EUID|SD_BUS_CREDS_EFFECTIVE_CAPS); + if (r < 0) + return r; + + r = sd_bus_set_watch_bind(bus, true); + if (r < 0) + return r; + + r = sd_bus_set_connected_signal(bus, true); + if (r < 0) + return r; + + r = sd_bus_start(bus); + if (r < 0) + return r; + + *ret = TAKE_PTR(bus); + + return 0; +} + +int bus_reply_pair_array(sd_bus_message *m, char **l) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; + char **k, **v; + int r; + + assert(m); + + /* Reply to the specified message with a message containing a dictionary put together from the specified + * strv */ + + r = sd_bus_message_new_method_return(m, &reply); + if (r < 0) + return r; + + r = sd_bus_message_open_container(reply, 'a', "{ss}"); + if (r < 0) + return r; + + STRV_FOREACH_PAIR(k, v, l) { + r = sd_bus_message_append(reply, "{ss}", *k, *v); + if (r < 0) + return r; + } + + r = sd_bus_message_close_container(reply); + if (r < 0) + return r; + + return sd_bus_send(NULL, reply, NULL); +} diff --git a/src/shared/bus-util.h b/src/shared/bus-util.h new file mode 100644 index 0000000..71c248f --- /dev/null +++ b/src/shared/bus-util.h @@ -0,0 +1,179 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <sys/types.h> + +#include "sd-bus.h" +#include "sd-event.h" + +#include "hashmap.h" +#include "macro.h" +#include "string-util.h" + +typedef enum BusTransport { + BUS_TRANSPORT_LOCAL, + BUS_TRANSPORT_REMOTE, + BUS_TRANSPORT_MACHINE, + _BUS_TRANSPORT_MAX, + _BUS_TRANSPORT_INVALID = -1 +} BusTransport; + +typedef int (*bus_property_set_t) (sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata); + +struct bus_properties_map { + const char *member; + const char *signature; + bus_property_set_t set; + size_t offset; +}; + +enum { + BUS_MAP_STRDUP = 1 << 0, /* If set, each "s" message is duplicated. Thus, each pointer needs to be freed. */ + BUS_MAP_BOOLEAN_AS_BOOL = 1 << 1, /* If set, each "b" message is written to a bool pointer. If not set, "b" is written to a int pointer. */ +}; + +int bus_map_id128(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata); + +int bus_message_map_all_properties(sd_bus_message *m, const struct bus_properties_map *map, unsigned flags, sd_bus_error *error, void *userdata); +int bus_map_all_properties(sd_bus *bus, const char *destination, const char *path, const struct bus_properties_map *map, + unsigned flags, sd_bus_error *error, sd_bus_message **reply, void *userdata); + +int bus_async_unregister_and_exit(sd_event *e, sd_bus *bus, const char *name); + +typedef bool (*check_idle_t)(void *userdata); + +int bus_event_loop_with_idle(sd_event *e, sd_bus *bus, const char *name, usec_t timeout, check_idle_t check_idle, void *userdata); + +int bus_name_has_owner(sd_bus *c, const char *name, sd_bus_error *error); + +int bus_check_peercred(sd_bus *c); + +int bus_test_polkit(sd_bus_message *call, int capability, const char *action, const char **details, uid_t good_user, bool *_challenge, sd_bus_error *e); + +int bus_verify_polkit_async(sd_bus_message *call, int capability, const char *action, const char **details, bool interactive, uid_t good_user, Hashmap **registry, sd_bus_error *error); +void bus_verify_polkit_async_registry_free(Hashmap *registry); + +int bus_connect_system_systemd(sd_bus **_bus); +int bus_connect_user_systemd(sd_bus **_bus); + +int bus_connect_transport(BusTransport transport, const char *host, bool user, sd_bus **bus); +int bus_connect_transport_systemd(BusTransport transport, const char *host, bool user, sd_bus **bus); + +typedef int (*bus_message_print_t) (const char *name, const char *expected_value, sd_bus_message *m, bool value, bool all); + +int bus_print_property_value(const char *name, const char *expected_value, bool only_value, const char *fmt, ...) _printf_(4,5); +int bus_message_print_all_properties(sd_bus_message *m, bus_message_print_t func, char **filter, bool value, bool all, Set **found_properties); +int bus_print_all_properties(sd_bus *bus, const char *dest, const char *path, bus_message_print_t func, char **filter, bool value, bool all, Set **found_properties); + +int bus_property_get_bool(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error); +int bus_property_set_bool(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *value, void *userdata, sd_bus_error *error); +int bus_property_get_id128(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error); + +#define bus_property_get_usec ((sd_bus_property_get_t) NULL) +#define bus_property_set_usec ((sd_bus_property_set_t) NULL) + +assert_cc(sizeof(int) == sizeof(int32_t)); +#define bus_property_get_int ((sd_bus_property_get_t) NULL) + +assert_cc(sizeof(unsigned) == sizeof(uint32_t)); +#define bus_property_get_unsigned ((sd_bus_property_get_t) NULL) + +/* On 64bit machines we can use the default serializer for size_t and + * friends, otherwise we need to cast this manually */ +#if __SIZEOF_SIZE_T__ == 8 +#define bus_property_get_size ((sd_bus_property_get_t) NULL) +#else +int bus_property_get_size(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error); +#endif + +#if __SIZEOF_LONG__ == 8 +#define bus_property_get_long ((sd_bus_property_get_t) NULL) +#define bus_property_get_ulong ((sd_bus_property_get_t) NULL) +#else +int bus_property_get_long(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error); +int bus_property_get_ulong(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error); +#endif + +/* uid_t and friends on Linux 32 bit. This means we can just use the + * default serializer for 32bit unsigned, for serializing it, and map + * it to NULL here */ +assert_cc(sizeof(uid_t) == sizeof(uint32_t)); +#define bus_property_get_uid ((sd_bus_property_get_t) NULL) + +assert_cc(sizeof(gid_t) == sizeof(uint32_t)); +#define bus_property_get_gid ((sd_bus_property_get_t) NULL) + +assert_cc(sizeof(pid_t) == sizeof(uint32_t)); +#define bus_property_get_pid ((sd_bus_property_get_t) NULL) + +assert_cc(sizeof(mode_t) == sizeof(uint32_t)); +#define bus_property_get_mode ((sd_bus_property_get_t) NULL) + +int bus_log_parse_error(int r); +int bus_log_create_error(int r); + +#define BUS_DEFINE_PROPERTY_GET_GLOBAL(function, bus_type, val) \ + int function(sd_bus *bus, \ + const char *path, \ + const char *interface, \ + const char *property, \ + sd_bus_message *reply, \ + void *userdata, \ + sd_bus_error *error) { \ + \ + assert(bus); \ + assert(reply); \ + \ + return sd_bus_message_append(reply, bus_type, val); \ + } + +#define BUS_DEFINE_PROPERTY_GET2(function, bus_type, data_type, get1, get2) \ + int function(sd_bus *bus, \ + const char *path, \ + const char *interface, \ + const char *property, \ + sd_bus_message *reply, \ + void *userdata, \ + sd_bus_error *error) { \ + \ + data_type *data = userdata; \ + \ + assert(bus); \ + assert(reply); \ + assert(data); \ + \ + return sd_bus_message_append(reply, bus_type, \ + get2(get1(data))); \ + } + +#define ident(x) (x) +#define BUS_DEFINE_PROPERTY_GET(function, bus_type, data_type, get1) \ + BUS_DEFINE_PROPERTY_GET2(function, bus_type, data_type, get1, ident) + +#define ref(x) (*(x)) +#define BUS_DEFINE_PROPERTY_GET_REF(function, bus_type, data_type, get) \ + BUS_DEFINE_PROPERTY_GET2(function, bus_type, data_type, ref, get) + +#define BUS_DEFINE_PROPERTY_GET_ENUM(function, name, type) \ + BUS_DEFINE_PROPERTY_GET_REF(function, "s", type, name##_to_string) + +#define BUS_PROPERTY_DUAL_TIMESTAMP(name, offset, flags) \ + SD_BUS_PROPERTY(name, "t", bus_property_get_usec, (offset) + offsetof(struct dual_timestamp, realtime), (flags)), \ + SD_BUS_PROPERTY(name "Monotonic", "t", bus_property_get_usec, (offset) + offsetof(struct dual_timestamp, monotonic), (flags)) + +int bus_path_encode_unique(sd_bus *b, const char *prefix, const char *sender_id, const char *external_id, char **ret_path); +int bus_path_decode_unique(const char *path, const char *prefix, char **ret_sender, char **ret_external); + +int bus_property_get_rlimit(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error); + +int bus_track_add_name_many(sd_bus_track *t, char **l); + +int bus_open_system_watch_bind_with_description(sd_bus **ret, const char *description); +static inline int bus_open_system_watch_bind(sd_bus **ret) { + return bus_open_system_watch_bind_with_description(ret, NULL); +} + +int bus_reply_pair_array(sd_bus_message *m, char **l); diff --git a/src/shared/calendarspec.c b/src/shared/calendarspec.c new file mode 100644 index 0000000..dafc09e --- /dev/null +++ b/src/shared/calendarspec.c @@ -0,0 +1,1370 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <alloca.h> +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <stddef.h> +#include <stdio.h> +#include <stdio_ext.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <time.h> + +#include "alloc-util.h" +#include "calendarspec.h" +#include "fileio.h" +#include "macro.h" +#include "parse-util.h" +#include "process-util.h" +#include "string-util.h" +#include "time-util.h" + +#define BITS_WEEKDAYS 127 +#define MIN_YEAR 1970 +#define MAX_YEAR 2199 + +/* An arbitrary limit on the length of the chains of components. We don't want to + * build a very long linked list, which would be slow to iterate over and might cause + * our stack to overflow. It's unlikely that legitimate uses require more than a few + * linked compenents anyway. */ +#define CALENDARSPEC_COMPONENTS_MAX 240 + +static void free_chain(CalendarComponent *c) { + CalendarComponent *n; + + while (c) { + n = c->next; + free(c); + c = n; + } +} + +CalendarSpec* calendar_spec_free(CalendarSpec *c) { + + if (!c) + return NULL; + + free_chain(c->year); + free_chain(c->month); + free_chain(c->day); + free_chain(c->hour); + free_chain(c->minute); + free_chain(c->microsecond); + free(c->timezone); + + return mfree(c); +} + +static int component_compare(CalendarComponent * const *a, CalendarComponent * const *b) { + int r; + + r = CMP((*a)->start, (*b)->start); + if (r != 0) + return r; + + r = CMP((*a)->stop, (*b)->stop); + if (r != 0) + return r; + + return CMP((*a)->repeat, (*b)->repeat); +} + +static void normalize_chain(CalendarComponent **c) { + CalendarComponent **b, *i, **j, *next; + size_t n = 0, k; + + assert(c); + + for (i = *c; i; i = i->next) { + n++; + + /* + * While we're counting the chain, also normalize `stop` + * so the length of the range is a multiple of `repeat` + */ + if (i->stop > i->start && i->repeat > 0) + i->stop -= (i->stop - i->start) % i->repeat; + + } + + if (n <= 1) + return; + + j = b = newa(CalendarComponent*, n); + for (i = *c; i; i = i->next) + *(j++) = i; + + typesafe_qsort(b, n, component_compare); + + b[n-1]->next = NULL; + next = b[n-1]; + + /* Drop non-unique entries */ + for (k = n-1; k > 0; k--) { + if (component_compare(&b[k-1], &next) == 0) { + free(b[k-1]); + continue; + } + + b[k-1]->next = next; + next = b[k-1]; + } + + *c = next; +} + +static void fix_year(CalendarComponent *c) { + /* Turns 12 → 2012, 89 → 1989 */ + + while (c) { + if (c->start >= 0 && c->start < 70) + c->start += 2000; + + if (c->stop >= 0 && c->stop < 70) + c->stop += 2000; + + if (c->start >= 70 && c->start < 100) + c->start += 1900; + + if (c->stop >= 70 && c->stop < 100) + c->stop += 1900; + + c = c->next; + } +} + +int calendar_spec_normalize(CalendarSpec *c) { + assert(c); + + if (streq_ptr(c->timezone, "UTC")) { + c->utc = true; + c->timezone = mfree(c->timezone); + } + + if (c->weekdays_bits <= 0 || c->weekdays_bits >= BITS_WEEKDAYS) + c->weekdays_bits = -1; + + if (c->end_of_month && !c->day) + c->end_of_month = false; + + fix_year(c->year); + + normalize_chain(&c->year); + normalize_chain(&c->month); + normalize_chain(&c->day); + normalize_chain(&c->hour); + normalize_chain(&c->minute); + normalize_chain(&c->microsecond); + + return 0; +} + +_pure_ static bool chain_valid(CalendarComponent *c, int from, int to, bool end_of_month) { + assert(to >= from); + + if (!c) + return true; + + /* Forbid dates more than 28 days from the end of the month */ + if (end_of_month) + to -= 3; + + if (c->start < from || c->start > to) + return false; + + /* Avoid overly large values that could cause overflow */ + if (c->repeat > to - from) + return false; + + /* + * c->repeat must be short enough so at least one repetition may + * occur before the end of the interval. For dates scheduled + * relative to the end of the month, c->start and c->stop + * correspond to the Nth last day of the month. + */ + if (c->stop >= 0) { + if (c->stop < from || c ->stop > to) + return false; + + if (c->start + c->repeat > c->stop) + return false; + } else { + if (end_of_month && c->start - c->repeat < from) + return false; + + if (!end_of_month && c->start + c->repeat > to) + return false; + } + + if (c->next) + return chain_valid(c->next, from, to, end_of_month); + + return true; +} + +_pure_ bool calendar_spec_valid(CalendarSpec *c) { + assert(c); + + if (c->weekdays_bits > BITS_WEEKDAYS) + return false; + + if (!chain_valid(c->year, MIN_YEAR, MAX_YEAR, false)) + return false; + + if (!chain_valid(c->month, 1, 12, false)) + return false; + + if (!chain_valid(c->day, 1, 31, c->end_of_month)) + return false; + + if (!chain_valid(c->hour, 0, 23, false)) + return false; + + if (!chain_valid(c->minute, 0, 59, false)) + return false; + + if (!chain_valid(c->microsecond, 0, 60*USEC_PER_SEC-1, false)) + return false; + + return true; +} + +static void format_weekdays(FILE *f, const CalendarSpec *c) { + static const char *const days[] = { + "Mon", + "Tue", + "Wed", + "Thu", + "Fri", + "Sat", + "Sun" + }; + + int l, x; + bool need_comma = false; + + assert(f); + assert(c); + assert(c->weekdays_bits > 0 && c->weekdays_bits <= BITS_WEEKDAYS); + + for (x = 0, l = -1; x < (int) ELEMENTSOF(days); x++) { + + if (c->weekdays_bits & (1 << x)) { + + if (l < 0) { + if (need_comma) + fputc(',', f); + else + need_comma = true; + + fputs(days[x], f); + l = x; + } + + } else if (l >= 0) { + + if (x > l + 1) { + fputs(x > l + 2 ? ".." : ",", f); + fputs(days[x-1], f); + } + + l = -1; + } + } + + if (l >= 0 && x > l + 1) { + fputs(x > l + 2 ? ".." : ",", f); + fputs(days[x-1], f); + } +} + +static void format_chain(FILE *f, int space, const CalendarComponent *c, bool usec) { + int d = usec ? (int) USEC_PER_SEC : 1; + + assert(f); + + if (!c) { + fputc('*', f); + return; + } + + if (usec && c->start == 0 && c->repeat == USEC_PER_SEC && !c->next) { + fputc('*', f); + return; + } + + assert(c->start >= 0); + + fprintf(f, "%0*i", space, c->start / d); + if (c->start % d > 0) + fprintf(f, ".%06i", c->start % d); + + if (c->stop > 0) + fprintf(f, "..%0*i", space, c->stop / d); + if (c->stop % d > 0) + fprintf(f, ".%06i", c->stop % d); + + if (c->repeat > 0 && !(c->stop > 0 && c->repeat == d)) + fprintf(f, "/%i", c->repeat / d); + if (c->repeat % d > 0) + fprintf(f, ".%06i", c->repeat % d); + + if (c->next) { + fputc(',', f); + format_chain(f, space, c->next, usec); + } +} + +int calendar_spec_to_string(const CalendarSpec *c, char **p) { + char *buf = NULL; + size_t sz = 0; + FILE *f; + int r; + + assert(c); + assert(p); + + f = open_memstream(&buf, &sz); + if (!f) + return -ENOMEM; + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + + if (c->weekdays_bits > 0 && c->weekdays_bits <= BITS_WEEKDAYS) { + format_weekdays(f, c); + fputc(' ', f); + } + + format_chain(f, 4, c->year, false); + fputc('-', f); + format_chain(f, 2, c->month, false); + fputc(c->end_of_month ? '~' : '-', f); + format_chain(f, 2, c->day, false); + fputc(' ', f); + format_chain(f, 2, c->hour, false); + fputc(':', f); + format_chain(f, 2, c->minute, false); + fputc(':', f); + format_chain(f, 2, c->microsecond, true); + + if (c->utc) + fputs(" UTC", f); + else if (c->timezone != NULL) { + fputc(' ', f); + fputs(c->timezone, f); + } else if (IN_SET(c->dst, 0, 1)) { + + /* If daylight saving is explicitly on or off, let's show the used timezone. */ + + tzset(); + + if (!isempty(tzname[c->dst])) { + fputc(' ', f); + fputs(tzname[c->dst], f); + } + } + + r = fflush_and_check(f); + if (r < 0) { + free(buf); + fclose(f); + return r; + } + + fclose(f); + + *p = buf; + return 0; +} + +static int parse_weekdays(const char **p, CalendarSpec *c) { + static const struct { + const char *name; + const int nr; + } day_nr[] = { + { "Monday", 0 }, + { "Mon", 0 }, + { "Tuesday", 1 }, + { "Tue", 1 }, + { "Wednesday", 2 }, + { "Wed", 2 }, + { "Thursday", 3 }, + { "Thu", 3 }, + { "Friday", 4 }, + { "Fri", 4 }, + { "Saturday", 5 }, + { "Sat", 5 }, + { "Sunday", 6 }, + { "Sun", 6 } + }; + + int l = -1; + bool first = true; + + assert(p); + assert(*p); + assert(c); + + for (;;) { + size_t i; + + for (i = 0; i < ELEMENTSOF(day_nr); i++) { + size_t skip; + + if (!startswith_no_case(*p, day_nr[i].name)) + continue; + + skip = strlen(day_nr[i].name); + + if (!IN_SET((*p)[skip], 0, '-', '.', ',', ' ')) + return -EINVAL; + + c->weekdays_bits |= 1 << day_nr[i].nr; + + if (l >= 0) { + int j; + + if (l > day_nr[i].nr) + return -EINVAL; + + for (j = l + 1; j < day_nr[i].nr; j++) + c->weekdays_bits |= 1 << j; + } + + *p += skip; + break; + } + + /* Couldn't find this prefix, so let's assume the + weekday was not specified and let's continue with + the date */ + if (i >= ELEMENTSOF(day_nr)) + return first ? 0 : -EINVAL; + + /* We reached the end of the string */ + if (**p == 0) + return 0; + + /* We reached the end of the weekday spec part */ + if (**p == ' ') { + *p += strspn(*p, " "); + return 0; + } + + if (**p == '.') { + if (l >= 0) + return -EINVAL; + + if ((*p)[1] != '.') + return -EINVAL; + + l = day_nr[i].nr; + *p += 2; + + /* Support ranges with "-" for backwards compatibility */ + } else if (**p == '-') { + if (l >= 0) + return -EINVAL; + + l = day_nr[i].nr; + *p += 1; + + } else if (**p == ',') { + l = -1; + *p += 1; + } + + /* Allow a trailing comma but not an open range */ + if (IN_SET(**p, 0, ' ')) { + *p += strspn(*p, " "); + return l < 0 ? 0 : -EINVAL; + } + + first = false; + } +} + +static int parse_one_number(const char *p, const char **e, unsigned long *ret) { + char *ee = NULL; + unsigned long value; + + errno = 0; + value = strtoul(p, &ee, 10); + if (errno > 0) + return -errno; + if (ee == p) + return -EINVAL; + + *ret = value; + *e = ee; + return 0; +} + +static int parse_component_decimal(const char **p, bool usec, int *res) { + unsigned long value; + const char *e = NULL; + int r; + + if (!isdigit(**p)) + return -EINVAL; + + r = parse_one_number(*p, &e, &value); + if (r < 0) + return r; + + if (usec) { + if (value * USEC_PER_SEC / USEC_PER_SEC != value) + return -ERANGE; + + value *= USEC_PER_SEC; + + /* One "." is a decimal point, but ".." is a range separator */ + if (e[0] == '.' && e[1] != '.') { + unsigned add; + + e++; + r = parse_fractional_part_u(&e, 6, &add); + if (r < 0) + return r; + + if (add + value < value) + return -ERANGE; + value += add; + } + } + + if (value > INT_MAX) + return -ERANGE; + + *p = e; + *res = value; + + return 0; +} + +static int const_chain(int value, CalendarComponent **c) { + CalendarComponent *cc = NULL; + + assert(c); + + cc = new0(CalendarComponent, 1); + if (!cc) + return -ENOMEM; + + cc->start = value; + cc->stop = -1; + cc->repeat = 0; + cc->next = *c; + + *c = cc; + + return 0; +} + +static int calendarspec_from_time_t(CalendarSpec *c, time_t time) { + struct tm tm; + CalendarComponent *year = NULL, *month = NULL, *day = NULL, *hour = NULL, *minute = NULL, *us = NULL; + int r; + + if (!gmtime_r(&time, &tm)) + return -ERANGE; + + r = const_chain(tm.tm_year + 1900, &year); + if (r < 0) + return r; + + r = const_chain(tm.tm_mon + 1, &month); + if (r < 0) + return r; + + r = const_chain(tm.tm_mday, &day); + if (r < 0) + return r; + + r = const_chain(tm.tm_hour, &hour); + if (r < 0) + return r; + + r = const_chain(tm.tm_min, &minute); + if (r < 0) + return r; + + r = const_chain(tm.tm_sec * USEC_PER_SEC, &us); + if (r < 0) + return r; + + c->utc = true; + c->year = year; + c->month = month; + c->day = day; + c->hour = hour; + c->minute = minute; + c->microsecond = us; + return 0; +} + +static int prepend_component(const char **p, bool usec, unsigned nesting, CalendarComponent **c) { + int r, start, stop = -1, repeat = 0; + CalendarComponent *cc; + const char *e = *p; + + assert(p); + assert(c); + + if (nesting > CALENDARSPEC_COMPONENTS_MAX) + return -ENOBUFS; + + r = parse_component_decimal(&e, usec, &start); + if (r < 0) + return r; + + if (e[0] == '.' && e[1] == '.') { + e += 2; + r = parse_component_decimal(&e, usec, &stop); + if (r < 0) + return r; + + repeat = usec ? USEC_PER_SEC : 1; + } + + if (*e == '/') { + e++; + r = parse_component_decimal(&e, usec, &repeat); + if (r < 0) + return r; + + if (repeat == 0) + return -ERANGE; + } + + if (!IN_SET(*e, 0, ' ', ',', '-', '~', ':')) + return -EINVAL; + + cc = new0(CalendarComponent, 1); + if (!cc) + return -ENOMEM; + + cc->start = start; + cc->stop = stop; + cc->repeat = repeat; + cc->next = *c; + + *p = e; + *c = cc; + + if (*e ==',') { + *p += 1; + return prepend_component(p, usec, nesting + 1, c); + } + + return 0; +} + +static int parse_chain(const char **p, bool usec, CalendarComponent **c) { + const char *t; + CalendarComponent *cc = NULL; + int r; + + assert(p); + assert(c); + + t = *p; + + if (t[0] == '*') { + if (usec) { + r = const_chain(0, c); + if (r < 0) + return r; + (*c)->repeat = USEC_PER_SEC; + } else + *c = NULL; + + *p = t + 1; + return 0; + } + + r = prepend_component(&t, usec, 0, &cc); + if (r < 0) { + free_chain(cc); + return r; + } + + *p = t; + *c = cc; + return 0; +} + +static int parse_date(const char **p, CalendarSpec *c) { + const char *t; + int r; + CalendarComponent *first, *second, *third; + + assert(p); + assert(*p); + assert(c); + + t = *p; + + if (*t == 0) + return 0; + + /* @TIMESTAMP — UNIX time in seconds since the epoch */ + if (*t == '@') { + unsigned long value; + time_t time; + + r = parse_one_number(t + 1, &t, &value); + if (r < 0) + return r; + + time = value; + if ((unsigned long) time != value) + return -ERANGE; + + r = calendarspec_from_time_t(c, time); + if (r < 0) + return r; + + *p = t; + return 1; /* finito, don't parse H:M:S after that */ + } + + r = parse_chain(&t, false, &first); + if (r < 0) + return r; + + /* Already the end? A ':' as separator? In that case this was a time, not a date */ + if (IN_SET(*t, 0, ':')) { + free_chain(first); + return 0; + } + + if (*t == '~') + c->end_of_month = true; + else if (*t != '-') { + free_chain(first); + return -EINVAL; + } + + t++; + r = parse_chain(&t, false, &second); + if (r < 0) { + free_chain(first); + return r; + } + + /* Got two parts, hence it's month and day */ + if (IN_SET(*t, 0, ' ')) { + *p = t + strspn(t, " "); + c->month = first; + c->day = second; + return 0; + } else if (c->end_of_month) { + free_chain(first); + free_chain(second); + return -EINVAL; + } + + if (*t == '~') + c->end_of_month = true; + else if (*t != '-') { + free_chain(first); + free_chain(second); + return -EINVAL; + } + + t++; + r = parse_chain(&t, false, &third); + if (r < 0) { + free_chain(first); + free_chain(second); + return r; + } + + /* Got three parts, hence it is year, month and day */ + if (IN_SET(*t, 0, ' ')) { + *p = t + strspn(t, " "); + c->year = first; + c->month = second; + c->day = third; + return 0; + } + + free_chain(first); + free_chain(second); + free_chain(third); + return -EINVAL; +} + +static int parse_calendar_time(const char **p, CalendarSpec *c) { + CalendarComponent *h = NULL, *m = NULL, *s = NULL; + const char *t; + int r; + + assert(p); + assert(*p); + assert(c); + + t = *p; + + /* If no time is specified at all, then this means 00:00:00 */ + if (*t == 0) + goto null_hour; + + r = parse_chain(&t, false, &h); + if (r < 0) + goto fail; + + if (*t != ':') { + r = -EINVAL; + goto fail; + } + + t++; + r = parse_chain(&t, false, &m); + if (r < 0) + goto fail; + + /* Already at the end? Then it's hours and minutes, and seconds are 0 */ + if (*t == 0) + goto null_second; + + if (*t != ':') { + r = -EINVAL; + goto fail; + } + + t++; + r = parse_chain(&t, true, &s); + if (r < 0) + goto fail; + + /* At the end? Then it's hours, minutes and seconds */ + if (*t == 0) + goto finish; + + r = -EINVAL; + goto fail; + +null_hour: + r = const_chain(0, &h); + if (r < 0) + goto fail; + + r = const_chain(0, &m); + if (r < 0) + goto fail; + +null_second: + r = const_chain(0, &s); + if (r < 0) + goto fail; + +finish: + *p = t; + c->hour = h; + c->minute = m; + c->microsecond = s; + + return 0; + +fail: + free_chain(h); + free_chain(m); + free_chain(s); + return r; +} + +int calendar_spec_from_string(const char *p, CalendarSpec **spec) { + const char *utc; + _cleanup_(calendar_spec_freep) CalendarSpec *c = NULL; + int r; + + assert(p); + assert(spec); + + c = new0(CalendarSpec, 1); + if (!c) + return -ENOMEM; + c->dst = -1; + c->timezone = NULL; + + utc = endswith_no_case(p, " UTC"); + if (utc) { + c->utc = true; + p = strndupa(p, utc - p); + } else { + const char *e = NULL; + int j; + + tzset(); + + /* Check if the local timezone was specified? */ + for (j = 0; j <= 1; j++) { + if (isempty(tzname[j])) + continue; + + e = endswith_no_case(p, tzname[j]); + if (!e) + continue; + if (e == p) + continue; + if (e[-1] != ' ') + continue; + + break; + } + + /* Found one of the two timezones specified? */ + if (IN_SET(j, 0, 1)) { + p = strndupa(p, e - p - 1); + c->dst = j; + } else { + const char *last_space; + + last_space = strrchr(p, ' '); + if (last_space != NULL && timezone_is_valid(last_space + 1, LOG_DEBUG)) { + c->timezone = strdup(last_space + 1); + if (!c->timezone) + return -ENOMEM; + + p = strndupa(p, last_space - p); + } + } + } + + if (isempty(p)) + return -EINVAL; + + if (strcaseeq(p, "minutely")) { + r = const_chain(0, &c->microsecond); + if (r < 0) + return r; + + } else if (strcaseeq(p, "hourly")) { + r = const_chain(0, &c->minute); + if (r < 0) + return r; + r = const_chain(0, &c->microsecond); + if (r < 0) + return r; + + } else if (strcaseeq(p, "daily")) { + r = const_chain(0, &c->hour); + if (r < 0) + return r; + r = const_chain(0, &c->minute); + if (r < 0) + return r; + r = const_chain(0, &c->microsecond); + if (r < 0) + return r; + + } else if (strcaseeq(p, "monthly")) { + r = const_chain(1, &c->day); + if (r < 0) + return r; + r = const_chain(0, &c->hour); + if (r < 0) + return r; + r = const_chain(0, &c->minute); + if (r < 0) + return r; + r = const_chain(0, &c->microsecond); + if (r < 0) + return r; + + } else if (strcaseeq(p, "annually") || + strcaseeq(p, "yearly") || + strcaseeq(p, "anually") /* backwards compatibility */ ) { + + r = const_chain(1, &c->month); + if (r < 0) + return r; + r = const_chain(1, &c->day); + if (r < 0) + return r; + r = const_chain(0, &c->hour); + if (r < 0) + return r; + r = const_chain(0, &c->minute); + if (r < 0) + return r; + r = const_chain(0, &c->microsecond); + if (r < 0) + return r; + + } else if (strcaseeq(p, "weekly")) { + + c->weekdays_bits = 1; + + r = const_chain(0, &c->hour); + if (r < 0) + return r; + r = const_chain(0, &c->minute); + if (r < 0) + return r; + r = const_chain(0, &c->microsecond); + if (r < 0) + return r; + + } else if (strcaseeq(p, "quarterly")) { + + r = const_chain(1, &c->month); + if (r < 0) + return r; + r = const_chain(4, &c->month); + if (r < 0) + return r; + r = const_chain(7, &c->month); + if (r < 0) + return r; + r = const_chain(10, &c->month); + if (r < 0) + return r; + r = const_chain(1, &c->day); + if (r < 0) + return r; + r = const_chain(0, &c->hour); + if (r < 0) + return r; + r = const_chain(0, &c->minute); + if (r < 0) + return r; + r = const_chain(0, &c->microsecond); + if (r < 0) + return r; + + } else if (strcaseeq(p, "biannually") || + strcaseeq(p, "bi-annually") || + strcaseeq(p, "semiannually") || + strcaseeq(p, "semi-annually")) { + + r = const_chain(1, &c->month); + if (r < 0) + return r; + r = const_chain(7, &c->month); + if (r < 0) + return r; + r = const_chain(1, &c->day); + if (r < 0) + return r; + r = const_chain(0, &c->hour); + if (r < 0) + return r; + r = const_chain(0, &c->minute); + if (r < 0) + return r; + r = const_chain(0, &c->microsecond); + if (r < 0) + return r; + + } else { + r = parse_weekdays(&p, c); + if (r < 0) + return r; + + r = parse_date(&p, c); + if (r < 0) + return r; + + if (r == 0) { + r = parse_calendar_time(&p, c); + if (r < 0) + return r; + } + + if (*p != 0) + return -EINVAL; + } + + r = calendar_spec_normalize(c); + if (r < 0) + return r; + + if (!calendar_spec_valid(c)) + return -EINVAL; + + *spec = TAKE_PTR(c); + return 0; +} + +static int find_end_of_month(struct tm *tm, bool utc, int day) { + struct tm t = *tm; + + t.tm_mon++; + t.tm_mday = 1 - day; + + if (mktime_or_timegm(&t, utc) < 0 || + t.tm_mon != tm->tm_mon) + return -1; + + return t.tm_mday; +} + +static int find_matching_component(const CalendarSpec *spec, const CalendarComponent *c, + struct tm *tm, int *val) { + const CalendarComponent *p = c; + int start, stop, d = -1; + bool d_set = false; + int r; + + assert(val); + + if (!c) + return 0; + + while (c) { + start = c->start; + stop = c->stop; + + if (spec->end_of_month && p == spec->day) { + start = find_end_of_month(tm, spec->utc, start); + stop = find_end_of_month(tm, spec->utc, stop); + + if (stop > 0) + SWAP_TWO(start, stop); + } + + if (start >= *val) { + + if (!d_set || start < d) { + d = start; + d_set = true; + } + + } else if (c->repeat > 0) { + int k; + + k = start + c->repeat * DIV_ROUND_UP(*val - start, c->repeat); + + if ((!d_set || k < d) && (stop < 0 || k <= stop)) { + d = k; + d_set = true; + } + } + + c = c->next; + } + + if (!d_set) + return -ENOENT; + + r = *val != d; + *val = d; + return r; +} + +static bool tm_out_of_bounds(const struct tm *tm, bool utc) { + struct tm t; + assert(tm); + + t = *tm; + + if (mktime_or_timegm(&t, utc) < 0) + return true; + + /* + * Set an upper bound on the year so impossible dates like "*-02-31" + * don't cause find_next() to loop forever. tm_year contains years + * since 1900, so adjust it accordingly. + */ + if (tm->tm_year + 1900 > MAX_YEAR) + return true; + + /* Did any normalization take place? If so, it was out of bounds before */ + return + t.tm_year != tm->tm_year || + t.tm_mon != tm->tm_mon || + t.tm_mday != tm->tm_mday || + t.tm_hour != tm->tm_hour || + t.tm_min != tm->tm_min || + t.tm_sec != tm->tm_sec; +} + +static bool matches_weekday(int weekdays_bits, const struct tm *tm, bool utc) { + struct tm t; + int k; + + if (weekdays_bits < 0 || weekdays_bits >= BITS_WEEKDAYS) + return true; + + t = *tm; + if (mktime_or_timegm(&t, utc) < 0) + return false; + + k = t.tm_wday == 0 ? 6 : t.tm_wday - 1; + return (weekdays_bits & (1 << k)); +} + +static int find_next(const CalendarSpec *spec, struct tm *tm, usec_t *usec) { + struct tm c; + int tm_usec; + int r; + + assert(spec); + assert(tm); + + c = *tm; + tm_usec = *usec; + + for (;;) { + /* Normalize the current date */ + (void) mktime_or_timegm(&c, spec->utc); + c.tm_isdst = spec->dst; + + c.tm_year += 1900; + r = find_matching_component(spec, spec->year, &c, &c.tm_year); + c.tm_year -= 1900; + + if (r > 0) { + c.tm_mon = 0; + c.tm_mday = 1; + c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0; + } + if (r < 0) + return r; + if (tm_out_of_bounds(&c, spec->utc)) + return -ENOENT; + + c.tm_mon += 1; + r = find_matching_component(spec, spec->month, &c, &c.tm_mon); + c.tm_mon -= 1; + + if (r > 0) { + c.tm_mday = 1; + c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0; + } + if (r < 0 || tm_out_of_bounds(&c, spec->utc)) { + c.tm_year++; + c.tm_mon = 0; + c.tm_mday = 1; + c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0; + continue; + } + + r = find_matching_component(spec, spec->day, &c, &c.tm_mday); + if (r > 0) + c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0; + if (r < 0 || tm_out_of_bounds(&c, spec->utc)) { + c.tm_mon++; + c.tm_mday = 1; + c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0; + continue; + } + + if (!matches_weekday(spec->weekdays_bits, &c, spec->utc)) { + c.tm_mday++; + c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0; + continue; + } + + r = find_matching_component(spec, spec->hour, &c, &c.tm_hour); + if (r > 0) + c.tm_min = c.tm_sec = tm_usec = 0; + if (r < 0 || tm_out_of_bounds(&c, spec->utc)) { + c.tm_mday++; + c.tm_hour = c.tm_min = c.tm_sec = tm_usec = 0; + continue; + } + + r = find_matching_component(spec, spec->minute, &c, &c.tm_min); + if (r > 0) + c.tm_sec = tm_usec = 0; + if (r < 0 || tm_out_of_bounds(&c, spec->utc)) { + c.tm_hour++; + c.tm_min = c.tm_sec = tm_usec = 0; + continue; + } + + c.tm_sec = c.tm_sec * USEC_PER_SEC + tm_usec; + r = find_matching_component(spec, spec->microsecond, &c, &c.tm_sec); + tm_usec = c.tm_sec % USEC_PER_SEC; + c.tm_sec /= USEC_PER_SEC; + + if (r < 0 || tm_out_of_bounds(&c, spec->utc)) { + c.tm_min++; + c.tm_sec = tm_usec = 0; + continue; + } + + *tm = c; + *usec = tm_usec; + return 0; + } +} + +static int calendar_spec_next_usec_impl(const CalendarSpec *spec, usec_t usec, usec_t *next) { + struct tm tm; + time_t t; + int r; + usec_t tm_usec; + + assert(spec); + assert(next); + + if (usec > USEC_TIMESTAMP_FORMATTABLE_MAX) + return -EINVAL; + + usec++; + t = (time_t) (usec / USEC_PER_SEC); + assert_se(localtime_or_gmtime_r(&t, &tm, spec->utc)); + tm_usec = usec % USEC_PER_SEC; + + r = find_next(spec, &tm, &tm_usec); + if (r < 0) + return r; + + t = mktime_or_timegm(&tm, spec->utc); + if (t < 0) + return -EINVAL; + + *next = (usec_t) t * USEC_PER_SEC + tm_usec; + return 0; +} + +typedef struct SpecNextResult { + usec_t next; + int return_value; +} SpecNextResult; + +int calendar_spec_next_usec(const CalendarSpec *spec, usec_t usec, usec_t *next) { + SpecNextResult *shared, tmp; + int r; + + if (isempty(spec->timezone)) + return calendar_spec_next_usec_impl(spec, usec, next); + + shared = mmap(NULL, sizeof *shared, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0); + if (shared == MAP_FAILED) + return negative_errno(); + + r = safe_fork("(sd-calendar)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_WAIT, NULL); + if (r < 0) { + (void) munmap(shared, sizeof *shared); + return r; + } + if (r == 0) { + if (setenv("TZ", spec->timezone, 1) != 0) { + shared->return_value = negative_errno(); + _exit(EXIT_FAILURE); + } + + tzset(); + + shared->return_value = calendar_spec_next_usec_impl(spec, usec, &shared->next); + + _exit(EXIT_SUCCESS); + } + + tmp = *shared; + if (munmap(shared, sizeof *shared) < 0) + return negative_errno(); + + if (tmp.return_value == 0) + *next = tmp.next; + + return tmp.return_value; +} diff --git a/src/shared/calendarspec.h b/src/shared/calendarspec.h new file mode 100644 index 0000000..3bf8a39 --- /dev/null +++ b/src/shared/calendarspec.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +/* A structure for specifying (possibly repetitive) points in calendar + * time, a la cron */ + +#include <stdbool.h> + +#include "time-util.h" +#include "util.h" + +typedef struct CalendarComponent { + int start; + int stop; + int repeat; + + struct CalendarComponent *next; +} CalendarComponent; + +typedef struct CalendarSpec { + int weekdays_bits; + bool end_of_month; + bool utc; + int dst; + char *timezone; + + CalendarComponent *year; + CalendarComponent *month; + CalendarComponent *day; + + CalendarComponent *hour; + CalendarComponent *minute; + CalendarComponent *microsecond; +} CalendarSpec; + +CalendarSpec* calendar_spec_free(CalendarSpec *c); + +int calendar_spec_normalize(CalendarSpec *spec); +bool calendar_spec_valid(CalendarSpec *spec); + +int calendar_spec_to_string(const CalendarSpec *spec, char **p); +int calendar_spec_from_string(const char *p, CalendarSpec **spec); + +int calendar_spec_next_usec(const CalendarSpec *spec, usec_t usec, usec_t *next); + +DEFINE_TRIVIAL_CLEANUP_FUNC(CalendarSpec*, calendar_spec_free); diff --git a/src/shared/cgroup-show.c b/src/shared/cgroup-show.c new file mode 100644 index 0000000..61df751 --- /dev/null +++ b/src/shared/cgroup-show.c @@ -0,0 +1,354 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <dirent.h> +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "alloc-util.h" +#include "bus-error.h" +#include "bus-util.h" +#include "cgroup-show.h" +#include "cgroup-util.h" +#include "env-file.h" +#include "fd-util.h" +#include "format-util.h" +#include "locale-util.h" +#include "macro.h" +#include "output-mode.h" +#include "path-util.h" +#include "process-util.h" +#include "string-util.h" +#include "terminal-util.h" +#include "unit-name.h" + +static void show_pid_array( + pid_t pids[], + unsigned n_pids, + const char *prefix, + unsigned n_columns, + bool extra, + bool more, + OutputFlags flags) { + + unsigned i, j, pid_width; + + if (n_pids == 0) + return; + + typesafe_qsort(pids, n_pids, pid_compare_func); + + /* Filter duplicates */ + for (j = 0, i = 1; i < n_pids; i++) { + if (pids[i] == pids[j]) + continue; + pids[++j] = pids[i]; + } + n_pids = j + 1; + pid_width = DECIMAL_STR_WIDTH(pids[j]); + + if (flags & OUTPUT_FULL_WIDTH) + n_columns = 0; + else { + if (n_columns > pid_width+2) + n_columns -= pid_width+2; + else + n_columns = 20; + } + for (i = 0; i < n_pids; i++) { + _cleanup_free_ char *t = NULL; + + (void) get_process_cmdline(pids[i], n_columns, true, &t); + + if (extra) + printf("%s%s ", prefix, special_glyph(SPECIAL_GLYPH_TRIANGULAR_BULLET)); + else + printf("%s%s", prefix, special_glyph(((more || i < n_pids-1) ? SPECIAL_GLYPH_TREE_BRANCH : SPECIAL_GLYPH_TREE_RIGHT))); + + printf("%*"PID_PRI" %s\n", pid_width, pids[i], strna(t)); + } +} + +static int show_cgroup_one_by_path( + const char *path, + const char *prefix, + unsigned n_columns, + bool more, + OutputFlags flags) { + + char *fn; + _cleanup_fclose_ FILE *f = NULL; + size_t n = 0, n_allocated = 0; + _cleanup_free_ pid_t *pids = NULL; + _cleanup_free_ char *p = NULL; + pid_t pid; + int r; + + r = cg_mangle_path(path, &p); + if (r < 0) + return r; + + fn = strjoina(p, "/cgroup.procs"); + f = fopen(fn, "re"); + if (!f) + return -errno; + + while ((r = cg_read_pid(f, &pid)) > 0) { + + if (!(flags & OUTPUT_KERNEL_THREADS) && is_kernel_thread(pid) > 0) + continue; + + if (!GREEDY_REALLOC(pids, n_allocated, n + 1)) + return -ENOMEM; + + assert(n < n_allocated); + pids[n++] = pid; + } + + if (r < 0) + return r; + + show_pid_array(pids, n, prefix, n_columns, false, more, flags); + + return 0; +} + +int show_cgroup_by_path( + const char *path, + const char *prefix, + unsigned n_columns, + OutputFlags flags) { + + _cleanup_free_ char *fn = NULL, *p1 = NULL, *last = NULL, *p2 = NULL; + _cleanup_closedir_ DIR *d = NULL; + char *gn = NULL; + bool shown_pids = false; + int r; + + assert(path); + + if (n_columns <= 0) + n_columns = columns(); + + prefix = strempty(prefix); + + r = cg_mangle_path(path, &fn); + if (r < 0) + return r; + + d = opendir(fn); + if (!d) + return -errno; + + while ((r = cg_read_subgroup(d, &gn)) > 0) { + _cleanup_free_ char *k = NULL; + + k = strjoin(fn, "/", gn); + free(gn); + if (!k) + return -ENOMEM; + + if (!(flags & OUTPUT_SHOW_ALL) && cg_is_empty_recursive(NULL, k) > 0) + continue; + + if (!shown_pids) { + show_cgroup_one_by_path(path, prefix, n_columns, true, flags); + shown_pids = true; + } + + if (last) { + printf("%s%s%s\n", prefix, special_glyph(SPECIAL_GLYPH_TREE_BRANCH), cg_unescape(basename(last))); + + if (!p1) { + p1 = strappend(prefix, special_glyph(SPECIAL_GLYPH_TREE_VERTICAL)); + if (!p1) + return -ENOMEM; + } + + show_cgroup_by_path(last, p1, n_columns-2, flags); + free(last); + } + + last = TAKE_PTR(k); + } + + if (r < 0) + return r; + + if (!shown_pids) + show_cgroup_one_by_path(path, prefix, n_columns, !!last, flags); + + if (last) { + printf("%s%s%s\n", prefix, special_glyph(SPECIAL_GLYPH_TREE_RIGHT), cg_unescape(basename(last))); + + if (!p2) { + p2 = strappend(prefix, " "); + if (!p2) + return -ENOMEM; + } + + show_cgroup_by_path(last, p2, n_columns-2, flags); + } + + return 0; +} + +int show_cgroup(const char *controller, + const char *path, + const char *prefix, + unsigned n_columns, + OutputFlags flags) { + _cleanup_free_ char *p = NULL; + int r; + + assert(path); + + r = cg_get_path(controller, path, NULL, &p); + if (r < 0) + return r; + + return show_cgroup_by_path(p, prefix, n_columns, flags); +} + +static int show_extra_pids( + const char *controller, + const char *path, + const char *prefix, + unsigned n_columns, + const pid_t pids[], + unsigned n_pids, + OutputFlags flags) { + + _cleanup_free_ pid_t *copy = NULL; + unsigned i, j; + int r; + + assert(path); + + if (n_pids <= 0) + return 0; + + if (n_columns <= 0) + n_columns = columns(); + + prefix = strempty(prefix); + + copy = new(pid_t, n_pids); + if (!copy) + return -ENOMEM; + + for (i = 0, j = 0; i < n_pids; i++) { + _cleanup_free_ char *k = NULL; + + r = cg_pid_get_path(controller, pids[i], &k); + if (r < 0) + return r; + + if (path_startswith(k, path)) + continue; + + copy[j++] = pids[i]; + } + + show_pid_array(copy, j, prefix, n_columns, true, false, flags); + + return 0; +} + +int show_cgroup_and_extra( + const char *controller, + const char *path, + const char *prefix, + unsigned n_columns, + const pid_t extra_pids[], + unsigned n_extra_pids, + OutputFlags flags) { + + int r; + + assert(path); + + r = show_cgroup(controller, path, prefix, n_columns, flags); + if (r < 0) + return r; + + return show_extra_pids(controller, path, prefix, n_columns, extra_pids, n_extra_pids, flags); +} + +int show_cgroup_get_unit_path_and_warn( + sd_bus *bus, + const char *unit, + char **ret) { + + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_free_ char *path = NULL; + int r; + + path = unit_dbus_path_from_name(unit); + if (!path) + return log_oom(); + + r = sd_bus_get_property_string( + bus, + "org.freedesktop.systemd1", + path, + unit_dbus_interface_from_name(unit), + "ControlGroup", + &error, + ret); + if (r < 0) + return log_error_errno(r, "Failed to query unit control group path: %s", + bus_error_message(&error, r)); + + return 0; +} + +int show_cgroup_get_path_and_warn( + const char *machine, + const char *prefix, + char **ret) { + + int r; + _cleanup_free_ char *root = NULL; + + if (machine) { + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + _cleanup_free_ char *unit = NULL; + const char *m; + + m = strjoina("/run/systemd/machines/", machine); + r = parse_env_file(NULL, m, "SCOPE", &unit); + if (r < 0) + return log_error_errno(r, "Failed to load machine data: %m"); + + r = bus_connect_transport_systemd(BUS_TRANSPORT_LOCAL, NULL, false, &bus); + if (r < 0) + return log_error_errno(r, "Failed to create bus connection: %m"); + + r = show_cgroup_get_unit_path_and_warn(bus, unit, &root); + if (r < 0) + return r; + } else { + r = cg_get_root_path(&root); + if (r == -ENOMEDIUM) + return log_error_errno(r, "Failed to get root control group path.\n" + "No cgroup filesystem mounted on /sys/fs/cgroup"); + else if (r < 0) + return log_error_errno(r, "Failed to get root control group path: %m"); + } + + if (prefix) { + char *t; + + t = strjoin(root, prefix); + if (!t) + return log_oom(); + + *ret = t; + } else + *ret = TAKE_PTR(root); + + return 0; +} diff --git a/src/shared/cgroup-show.h b/src/shared/cgroup-show.h new file mode 100644 index 0000000..3593e9d --- /dev/null +++ b/src/shared/cgroup-show.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <sys/types.h> + +#include "sd-bus.h" + +#include "logs-show.h" +#include "output-mode.h" + +int show_cgroup_by_path(const char *path, const char *prefix, unsigned columns, OutputFlags flags); +int show_cgroup(const char *controller, const char *path, const char *prefix, unsigned columns, OutputFlags flags); + +int show_cgroup_and_extra(const char *controller, const char *path, const char *prefix, unsigned n_columns, const pid_t extra_pids[], unsigned n_extra_pids, OutputFlags flags); + +int show_cgroup_get_unit_path_and_warn( + sd_bus *bus, + const char *unit, + char **ret); +int show_cgroup_get_path_and_warn( + const char *machine, + const char *prefix, + char **ret); diff --git a/src/shared/clean-ipc.c b/src/shared/clean-ipc.c new file mode 100644 index 0000000..46fa680 --- /dev/null +++ b/src/shared/clean-ipc.c @@ -0,0 +1,453 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <mqueue.h> +#include <stdbool.h> +#include <stdio.h> +#include <string.h> +#include <sys/ipc.h> +#include <sys/msg.h> +#include <sys/sem.h> +#include <sys/shm.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "clean-ipc.h" +#include "dirent-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "format-util.h" +#include "log.h" +#include "macro.h" +#include "string-util.h" +#include "strv.h" +#include "user-util.h" + +static bool match_uid_gid(uid_t subject_uid, gid_t subject_gid, uid_t delete_uid, gid_t delete_gid) { + + if (uid_is_valid(delete_uid) && subject_uid == delete_uid) + return true; + + if (gid_is_valid(delete_gid) && subject_gid == delete_gid) + return true; + + return false; +} + +static int clean_sysvipc_shm(uid_t delete_uid, gid_t delete_gid, bool rm) { + _cleanup_fclose_ FILE *f = NULL; + bool first = true; + int ret = 0, r; + + f = fopen("/proc/sysvipc/shm", "re"); + if (!f) { + if (errno == ENOENT) + return 0; + + return log_warning_errno(errno, "Failed to open /proc/sysvipc/shm: %m"); + } + + for (;;) { + _cleanup_free_ char *line = NULL; + unsigned n_attached; + pid_t cpid, lpid; + uid_t uid, cuid; + gid_t gid, cgid; + int shmid; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return log_warning_errno(errno, "Failed to read /proc/sysvipc/shm: %m"); + if (r == 0) + break; + + if (first) { + first = false; + continue; + } + + if (sscanf(line, "%*i %i %*o %*u " PID_FMT " " PID_FMT " %u " UID_FMT " " GID_FMT " " UID_FMT " " GID_FMT, + &shmid, &cpid, &lpid, &n_attached, &uid, &gid, &cuid, &cgid) != 8) + continue; + + if (n_attached > 0) + continue; + + if (!match_uid_gid(uid, gid, delete_uid, delete_gid)) + continue; + + if (!rm) + return 1; + + if (shmctl(shmid, IPC_RMID, NULL) < 0) { + + /* Ignore entries that are already deleted */ + if (IN_SET(errno, EIDRM, EINVAL)) + continue; + + ret = log_warning_errno(errno, + "Failed to remove SysV shared memory segment %i: %m", + shmid); + } else { + log_debug("Removed SysV shared memory segment %i.", shmid); + if (ret == 0) + ret = 1; + } + } + + return ret; +} + +static int clean_sysvipc_sem(uid_t delete_uid, gid_t delete_gid, bool rm) { + _cleanup_fclose_ FILE *f = NULL; + bool first = true; + int ret = 0, r; + + f = fopen("/proc/sysvipc/sem", "re"); + if (!f) { + if (errno == ENOENT) + return 0; + + return log_warning_errno(errno, "Failed to open /proc/sysvipc/sem: %m"); + } + + for (;;) { + _cleanup_free_ char *line = NULL; + uid_t uid, cuid; + gid_t gid, cgid; + int semid; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return log_warning_errno(r, "Failed to read /proc/sysvipc/sem: %m"); + if (r == 0) + break; + + if (first) { + first = false; + continue; + } + + if (sscanf(line, "%*i %i %*o %*u " UID_FMT " " GID_FMT " " UID_FMT " " GID_FMT, + &semid, &uid, &gid, &cuid, &cgid) != 5) + continue; + + if (!match_uid_gid(uid, gid, delete_uid, delete_gid)) + continue; + + if (!rm) + return 1; + + if (semctl(semid, 0, IPC_RMID) < 0) { + + /* Ignore entries that are already deleted */ + if (IN_SET(errno, EIDRM, EINVAL)) + continue; + + ret = log_warning_errno(errno, + "Failed to remove SysV semaphores object %i: %m", + semid); + } else { + log_debug("Removed SysV semaphore %i.", semid); + if (ret == 0) + ret = 1; + } + } + + return ret; +} + +static int clean_sysvipc_msg(uid_t delete_uid, gid_t delete_gid, bool rm) { + _cleanup_fclose_ FILE *f = NULL; + bool first = true; + int ret = 0, r; + + f = fopen("/proc/sysvipc/msg", "re"); + if (!f) { + if (errno == ENOENT) + return 0; + + return log_warning_errno(errno, "Failed to open /proc/sysvipc/msg: %m"); + } + + for (;;) { + _cleanup_free_ char *line = NULL; + uid_t uid, cuid; + gid_t gid, cgid; + pid_t cpid, lpid; + int msgid; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return log_warning_errno(r, "Failed to read /proc/sysvipc/msg: %m"); + if (r == 0) + break; + + if (first) { + first = false; + continue; + } + + if (sscanf(line, "%*i %i %*o %*u %*u " PID_FMT " " PID_FMT " " UID_FMT " " GID_FMT " " UID_FMT " " GID_FMT, + &msgid, &cpid, &lpid, &uid, &gid, &cuid, &cgid) != 7) + continue; + + if (!match_uid_gid(uid, gid, delete_uid, delete_gid)) + continue; + + if (!rm) + return 1; + + if (msgctl(msgid, IPC_RMID, NULL) < 0) { + + /* Ignore entries that are already deleted */ + if (IN_SET(errno, EIDRM, EINVAL)) + continue; + + ret = log_warning_errno(errno, + "Failed to remove SysV message queue %i: %m", + msgid); + } else { + log_debug("Removed SysV message queue %i.", msgid); + if (ret == 0) + ret = 1; + } + } + + return ret; +} + +static int clean_posix_shm_internal(DIR *dir, uid_t uid, gid_t gid, bool rm) { + struct dirent *de; + int ret = 0, r; + + assert(dir); + + FOREACH_DIRENT_ALL(de, dir, goto fail) { + struct stat st; + + if (dot_or_dot_dot(de->d_name)) + continue; + + if (fstatat(dirfd(dir), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) { + if (errno == ENOENT) + continue; + + ret = log_warning_errno(errno, "Failed to stat() POSIX shared memory segment %s: %m", de->d_name); + continue; + } + + if (S_ISDIR(st.st_mode)) { + _cleanup_closedir_ DIR *kid; + + kid = xopendirat(dirfd(dir), de->d_name, O_NOFOLLOW|O_NOATIME); + if (!kid) { + if (errno != ENOENT) + ret = log_warning_errno(errno, "Failed to enter shared memory directory %s: %m", de->d_name); + } else { + r = clean_posix_shm_internal(kid, uid, gid, rm); + if (r < 0) + ret = r; + } + + if (!match_uid_gid(st.st_uid, st.st_gid, uid, gid)) + continue; + + if (!rm) + return 1; + + if (unlinkat(dirfd(dir), de->d_name, AT_REMOVEDIR) < 0) { + + if (errno == ENOENT) + continue; + + ret = log_warning_errno(errno, "Failed to remove POSIX shared memory directory %s: %m", de->d_name); + } else { + log_debug("Removed POSIX shared memory directory %s", de->d_name); + if (ret == 0) + ret = 1; + } + } else { + + if (!match_uid_gid(st.st_uid, st.st_gid, uid, gid)) + continue; + + if (!rm) + return 1; + + if (unlinkat(dirfd(dir), de->d_name, 0) < 0) { + + if (errno == ENOENT) + continue; + + ret = log_warning_errno(errno, "Failed to remove POSIX shared memory segment %s: %m", de->d_name); + } else { + log_debug("Removed POSIX shared memory segment %s", de->d_name); + if (ret == 0) + ret = 1; + } + } + } + + return ret; + +fail: + return log_warning_errno(errno, "Failed to read /dev/shm: %m"); +} + +static int clean_posix_shm(uid_t uid, gid_t gid, bool rm) { + _cleanup_closedir_ DIR *dir = NULL; + + dir = opendir("/dev/shm"); + if (!dir) { + if (errno == ENOENT) + return 0; + + return log_warning_errno(errno, "Failed to open /dev/shm: %m"); + } + + return clean_posix_shm_internal(dir, uid, gid, rm); +} + +static int clean_posix_mq(uid_t uid, gid_t gid, bool rm) { + _cleanup_closedir_ DIR *dir = NULL; + struct dirent *de; + int ret = 0; + + dir = opendir("/dev/mqueue"); + if (!dir) { + if (errno == ENOENT) + return 0; + + return log_warning_errno(errno, "Failed to open /dev/mqueue: %m"); + } + + FOREACH_DIRENT_ALL(de, dir, goto fail) { + struct stat st; + char fn[1+strlen(de->d_name)+1]; + + if (dot_or_dot_dot(de->d_name)) + continue; + + if (fstatat(dirfd(dir), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) { + if (errno == ENOENT) + continue; + + ret = log_warning_errno(errno, + "Failed to stat() MQ segment %s: %m", + de->d_name); + continue; + } + + if (!match_uid_gid(st.st_uid, st.st_gid, uid, gid)) + continue; + + if (!rm) + return 1; + + fn[0] = '/'; + strcpy(fn+1, de->d_name); + + if (mq_unlink(fn) < 0) { + if (errno == ENOENT) + continue; + + ret = log_warning_errno(errno, + "Failed to unlink POSIX message queue %s: %m", + fn); + } else { + log_debug("Removed POSIX message queue %s", fn); + if (ret == 0) + ret = 1; + } + } + + return ret; + +fail: + return log_warning_errno(errno, "Failed to read /dev/mqueue: %m"); +} + +int clean_ipc_internal(uid_t uid, gid_t gid, bool rm) { + int ret = 0, r; + + /* If 'rm' is true, clean all IPC objects owned by either the specified UID or the specified GID. Return the + * last error encountered or == 0 if no matching IPC objects have been found or > 0 if matching IPC objects + * have been found and have been removed. + * + * If 'rm' is false, just search for IPC objects owned by either the specified UID or the specified GID. In + * this case we return < 0 on error, > 0 if we found a matching object, == 0 if we didn't. + * + * As special rule: if UID/GID is specified as root we'll silently not clean up things, and always claim that + * there are IPC objects for it. */ + + if (uid == 0) { + if (!rm) + return 1; + + uid = UID_INVALID; + } + if (gid == 0) { + if (!rm) + return 1; + + gid = GID_INVALID; + } + + /* Anything to do? */ + if (!uid_is_valid(uid) && !gid_is_valid(gid)) + return 0; + + r = clean_sysvipc_shm(uid, gid, rm); + if (r != 0) { + if (!rm) + return r; + if (ret == 0) + ret = r; + } + + r = clean_sysvipc_sem(uid, gid, rm); + if (r != 0) { + if (!rm) + return r; + if (ret == 0) + ret = r; + } + + r = clean_sysvipc_msg(uid, gid, rm); + if (r != 0) { + if (!rm) + return r; + if (ret == 0) + ret = r; + } + + r = clean_posix_shm(uid, gid, rm); + if (r != 0) { + if (!rm) + return r; + if (ret == 0) + ret = r; + } + + r = clean_posix_mq(uid, gid, rm); + if (r != 0) { + if (!rm) + return r; + if (ret == 0) + ret = r; + } + + return ret; +} + +int clean_ipc_by_uid(uid_t uid) { + return clean_ipc_internal(uid, GID_INVALID, true); +} + +int clean_ipc_by_gid(gid_t gid) { + return clean_ipc_internal(UID_INVALID, gid, true); +} diff --git a/src/shared/clean-ipc.h b/src/shared/clean-ipc.h new file mode 100644 index 0000000..eaff47d --- /dev/null +++ b/src/shared/clean-ipc.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <sys/types.h> + +#include "user-util.h" + +int clean_ipc_internal(uid_t uid, gid_t gid, bool rm); + +/* Remove all IPC objects owned by the specified UID or GID */ +int clean_ipc_by_uid(uid_t uid); +int clean_ipc_by_gid(gid_t gid); + +/* Check if any IPC object owned by the specified UID or GID exists, returns > 0 if so, == 0 if not */ +static inline int search_ipc(uid_t uid, gid_t gid) { + return clean_ipc_internal(uid, gid, false); +} diff --git a/src/shared/clock-util.c b/src/shared/clock-util.c new file mode 100644 index 0000000..1877a81 --- /dev/null +++ b/src/shared/clock-util.c @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <time.h> +#include <linux/rtc.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/time.h> + +#include "alloc-util.h" +#include "clock-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "macro.h" +#include "string-util.h" +#include "util.h" + +int clock_get_hwclock(struct tm *tm) { + _cleanup_close_ int fd = -1; + + assert(tm); + + fd = open("/dev/rtc", O_RDONLY|O_CLOEXEC); + if (fd < 0) + return -errno; + + /* This leaves the timezone fields of struct tm + * uninitialized! */ + if (ioctl(fd, RTC_RD_TIME, tm) < 0) + return -errno; + + /* We don't know daylight saving, so we reset this in order not + * to confuse mktime(). */ + tm->tm_isdst = -1; + + return 0; +} + +int clock_set_hwclock(const struct tm *tm) { + _cleanup_close_ int fd = -1; + + assert(tm); + + fd = open("/dev/rtc", O_RDONLY|O_CLOEXEC); + if (fd < 0) + return -errno; + + if (ioctl(fd, RTC_SET_TIME, tm) < 0) + return -errno; + + return 0; +} + +int clock_is_localtime(const char* adjtime_path) { + _cleanup_fclose_ FILE *f; + int r; + + if (!adjtime_path) + adjtime_path = "/etc/adjtime"; + + /* + * The third line of adjtime is "UTC" or "LOCAL" or nothing. + * # /etc/adjtime + * 0.0 0 0 + * 0 + * UTC + */ + f = fopen(adjtime_path, "re"); + if (f) { + _cleanup_free_ char *line = NULL; + unsigned i; + + for (i = 0; i < 2; i++) { /* skip the first two lines */ + r = read_line(f, LONG_LINE_MAX, NULL); + if (r < 0) + return r; + if (r == 0) + return false; /* less than three lines → default to UTC */ + } + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + return false; /* less than three lines → default to UTC */ + + return streq(line, "LOCAL"); + + } else if (errno != ENOENT) + return -errno; + + /* adjtime not present → default to UTC */ + return false; +} + +int clock_set_timezone(int *min) { + const struct timeval *tv_null = NULL; + struct timespec ts; + struct tm tm; + int minutesdelta; + struct timezone tz; + + assert_se(clock_gettime(CLOCK_REALTIME, &ts) == 0); + assert_se(localtime_r(&ts.tv_sec, &tm)); + minutesdelta = tm.tm_gmtoff / 60; + + tz.tz_minuteswest = -minutesdelta; + tz.tz_dsttime = 0; /* DST_NONE */ + + /* + * If the RTC does not run in UTC but in local time, the very first + * call to settimeofday() will set the kernel's timezone and will warp the + * system clock, so that it runs in UTC instead of the local time we + * have read from the RTC. + */ + if (settimeofday(tv_null, &tz) < 0) + return negative_errno(); + + if (min) + *min = minutesdelta; + return 0; +} + +int clock_reset_timewarp(void) { + const struct timeval *tv_null = NULL; + struct timezone tz; + + tz.tz_minuteswest = 0; + tz.tz_dsttime = 0; /* DST_NONE */ + + /* + * The very first call to settimeofday() does time warp magic. Do a + * dummy call here, so the time warping is sealed and all later calls + * behave as expected. + */ + if (settimeofday(tv_null, &tz) < 0) + return -errno; + + return 0; +} + +#define TIME_EPOCH_USEC ((usec_t) TIME_EPOCH * USEC_PER_SEC) + +int clock_apply_epoch(void) { + struct timespec ts; + + if (now(CLOCK_REALTIME) >= TIME_EPOCH_USEC) + return 0; + + if (clock_settime(CLOCK_REALTIME, timespec_store(&ts, TIME_EPOCH_USEC)) < 0) + return -errno; + + return 1; +} diff --git a/src/shared/clock-util.h b/src/shared/clock-util.h new file mode 100644 index 0000000..b9db54e --- /dev/null +++ b/src/shared/clock-util.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <time.h> + +int clock_is_localtime(const char* adjtime_path); +int clock_set_timezone(int *min); +int clock_reset_timewarp(void); +int clock_get_hwclock(struct tm *tm); +int clock_set_hwclock(const struct tm *tm); +int clock_apply_epoch(void); diff --git a/src/shared/condition.c b/src/shared/condition.c new file mode 100644 index 0000000..fb77966 --- /dev/null +++ b/src/shared/condition.c @@ -0,0 +1,733 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fcntl.h> +#include <fnmatch.h> +#include <limits.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/utsname.h> +#include <time.h> +#include <unistd.h> + +#include "sd-id128.h" + +#include "alloc-util.h" +#include "apparmor-util.h" +#include "architecture.h" +#include "audit-util.h" +#include "cap-list.h" +#include "cgroup-util.h" +#include "condition.h" +#include "efivars.h" +#include "extract-word.h" +#include "fd-util.h" +#include "fileio.h" +#include "glob-util.h" +#include "hostname-util.h" +#include "ima-util.h" +#include "list.h" +#include "macro.h" +#include "mountpoint-util.h" +#include "env-file.h" +#include "parse-util.h" +#include "path-util.h" +#include "proc-cmdline.h" +#include "process-util.h" +#include "selinux-util.h" +#include "smack-util.h" +#include "stat-util.h" +#include "string-table.h" +#include "string-util.h" +#include "tomoyo-util.h" +#include "user-util.h" +#include "util.h" +#include "virt.h" + +Condition* condition_new(ConditionType type, const char *parameter, bool trigger, bool negate) { + Condition *c; + int r; + + assert(type >= 0); + assert(type < _CONDITION_TYPE_MAX); + assert((!parameter) == (type == CONDITION_NULL)); + + c = new0(Condition, 1); + if (!c) + return NULL; + + c->type = type; + c->trigger = trigger; + c->negate = negate; + + r = free_and_strdup(&c->parameter, parameter); + if (r < 0) { + return mfree(c); + } + + return c; +} + +void condition_free(Condition *c) { + assert(c); + + free(c->parameter); + free(c); +} + +Condition* condition_free_list(Condition *first) { + Condition *c, *n; + + LIST_FOREACH_SAFE(conditions, c, n, first) + condition_free(c); + + return NULL; +} + +static int condition_test_kernel_command_line(Condition *c) { + _cleanup_free_ char *line = NULL; + const char *p; + bool equal; + int r; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_KERNEL_COMMAND_LINE); + + r = proc_cmdline(&line); + if (r < 0) + return r; + + equal = strchr(c->parameter, '='); + + for (p = line;;) { + _cleanup_free_ char *word = NULL; + bool found; + + r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES|EXTRACT_RELAX); + if (r < 0) + return r; + if (r == 0) + break; + + if (equal) + found = streq(word, c->parameter); + else { + const char *f; + + f = startswith(word, c->parameter); + found = f && IN_SET(*f, 0, '='); + } + + if (found) + return true; + } + + return false; +} + +static int condition_test_kernel_version(Condition *c) { + enum { + /* Listed in order of checking. Note that some comparators are prefixes of others, hence the longest + * should be listed first. */ + LOWER_OR_EQUAL, + GREATER_OR_EQUAL, + LOWER, + GREATER, + EQUAL, + _ORDER_MAX, + }; + + static const char *const prefix[_ORDER_MAX] = { + [LOWER_OR_EQUAL] = "<=", + [GREATER_OR_EQUAL] = ">=", + [LOWER] = "<", + [GREATER] = ">", + [EQUAL] = "=", + }; + const char *p = NULL; + struct utsname u; + size_t i; + int k; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_KERNEL_VERSION); + + assert_se(uname(&u) >= 0); + + for (i = 0; i < _ORDER_MAX; i++) { + p = startswith(c->parameter, prefix[i]); + if (p) + break; + } + + /* No prefix? Then treat as glob string */ + if (!p) + return fnmatch(skip_leading_chars(c->parameter, NULL), u.release, 0) == 0; + + k = str_verscmp(u.release, skip_leading_chars(p, NULL)); + + switch (i) { + + case LOWER: + return k < 0; + + case LOWER_OR_EQUAL: + return k <= 0; + + case EQUAL: + return k == 0; + + case GREATER_OR_EQUAL: + return k >= 0; + + case GREATER: + return k > 0; + + default: + assert_not_reached("Can't compare"); + } +} + +static int condition_test_user(Condition *c) { + uid_t id; + int r; + _cleanup_free_ char *username = NULL; + const char *u; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_USER); + + r = parse_uid(c->parameter, &id); + if (r >= 0) + return id == getuid() || id == geteuid(); + + if (streq("@system", c->parameter)) + return uid_is_system(getuid()) || uid_is_system(geteuid()); + + username = getusername_malloc(); + if (!username) + return -ENOMEM; + + if (streq(username, c->parameter)) + return 1; + + if (getpid_cached() == 1) + return streq(c->parameter, "root"); + + u = c->parameter; + r = get_user_creds(&u, &id, NULL, NULL, NULL, USER_CREDS_ALLOW_MISSING); + if (r < 0) + return 0; + + return id == getuid() || id == geteuid(); +} + +static int condition_test_control_group_controller(Condition *c) { + int r; + CGroupMask system_mask, wanted_mask = 0; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_CONTROL_GROUP_CONTROLLER); + + r = cg_mask_supported(&system_mask); + if (r < 0) + return log_debug_errno(r, "Failed to determine supported controllers: %m"); + + r = cg_mask_from_string(c->parameter, &wanted_mask); + if (r < 0 || wanted_mask <= 0) { + /* This won't catch the case that we have an unknown controller + * mixed in with valid ones -- these are only assessed on the + * validity of the valid controllers found. */ + log_debug("Failed to parse cgroup string: %s", c->parameter); + return 1; + } + + return FLAGS_SET(system_mask, wanted_mask); +} + +static int condition_test_group(Condition *c) { + gid_t id; + int r; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_GROUP); + + r = parse_gid(c->parameter, &id); + if (r >= 0) + return in_gid(id); + + /* Avoid any NSS lookups if we are PID1 */ + if (getpid_cached() == 1) + return streq(c->parameter, "root"); + + return in_group(c->parameter) > 0; +} + +static int condition_test_virtualization(Condition *c) { + int b, v; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_VIRTUALIZATION); + + if (streq(c->parameter, "private-users")) + return running_in_userns(); + + v = detect_virtualization(); + if (v < 0) + return v; + + /* First, compare with yes/no */ + b = parse_boolean(c->parameter); + if (b >= 0) + return b == !!v; + + /* Then, compare categorization */ + if (streq(c->parameter, "vm")) + return VIRTUALIZATION_IS_VM(v); + + if (streq(c->parameter, "container")) + return VIRTUALIZATION_IS_CONTAINER(v); + + /* Finally compare id */ + return v != VIRTUALIZATION_NONE && streq(c->parameter, virtualization_to_string(v)); +} + +static int condition_test_architecture(Condition *c) { + int a, b; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_ARCHITECTURE); + + a = uname_architecture(); + if (a < 0) + return a; + + if (streq(c->parameter, "native")) + b = native_architecture(); + else { + b = architecture_from_string(c->parameter); + if (b < 0) /* unknown architecture? Then it's definitely not ours */ + return false; + } + + return a == b; +} + +static int condition_test_host(Condition *c) { + _cleanup_free_ char *h = NULL; + sd_id128_t x, y; + int r; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_HOST); + + if (sd_id128_from_string(c->parameter, &x) >= 0) { + + r = sd_id128_get_machine(&y); + if (r < 0) + return r; + + return sd_id128_equal(x, y); + } + + h = gethostname_malloc(); + if (!h) + return -ENOMEM; + + return fnmatch(c->parameter, h, FNM_CASEFOLD) == 0; +} + +static int condition_test_ac_power(Condition *c) { + int r; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_AC_POWER); + + r = parse_boolean(c->parameter); + if (r < 0) + return r; + + return (on_ac_power() != 0) == !!r; +} + +static int condition_test_security(Condition *c) { + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_SECURITY); + + if (streq(c->parameter, "selinux")) + return mac_selinux_use(); + if (streq(c->parameter, "smack")) + return mac_smack_use(); + if (streq(c->parameter, "apparmor")) + return mac_apparmor_use(); + if (streq(c->parameter, "audit")) + return use_audit(); + if (streq(c->parameter, "ima")) + return use_ima(); + if (streq(c->parameter, "tomoyo")) + return mac_tomoyo_use(); + if (streq(c->parameter, "uefi-secureboot")) + return is_efi_secure_boot(); + + return false; +} + +static int condition_test_capability(Condition *c) { + unsigned long long capabilities = (unsigned long long) -1; + _cleanup_fclose_ FILE *f = NULL; + int value, r; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_CAPABILITY); + + /* If it's an invalid capability, we don't have it */ + value = capability_from_name(c->parameter); + if (value < 0) + return -EINVAL; + + /* If it's a valid capability we default to assume + * that we have it */ + + f = fopen("/proc/self/status", "re"); + if (!f) + return -errno; + + for (;;) { + _cleanup_free_ char *line = NULL; + const char *p; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + break; + + p = startswith(line, "CapBnd:"); + if (p) { + if (sscanf(line+7, "%llx", &capabilities) != 1) + return -EIO; + + break; + } + } + + return !!(capabilities & (1ULL << value)); +} + +static int condition_test_needs_update(Condition *c) { + const char *p; + struct stat usr, other; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_NEEDS_UPDATE); + + /* If the file system is read-only we shouldn't suggest an update */ + if (path_is_read_only_fs(c->parameter) > 0) + return false; + + /* Any other failure means we should allow the condition to be true, + * so that we rather invoke too many update tools than too + * few. */ + + if (!path_is_absolute(c->parameter)) + return true; + + p = strjoina(c->parameter, "/.updated"); + if (lstat(p, &other) < 0) + return true; + + if (lstat("/usr/", &usr) < 0) + return true; + + /* + * First, compare seconds as they are always accurate... + */ + if (usr.st_mtim.tv_sec != other.st_mtim.tv_sec) + return usr.st_mtim.tv_sec > other.st_mtim.tv_sec; + + /* + * ...then compare nanoseconds. + * + * A false positive is only possible when /usr's nanoseconds > 0 + * (otherwise /usr cannot be strictly newer than the target file) + * AND the target file's nanoseconds == 0 + * (otherwise the filesystem supports nsec timestamps, see stat(2)). + */ + if (usr.st_mtim.tv_nsec > 0 && other.st_mtim.tv_nsec == 0) { + _cleanup_free_ char *timestamp_str = NULL; + uint64_t timestamp; + int r; + + r = parse_env_file(NULL, p, "TIMESTAMP_NSEC", ×tamp_str); + if (r < 0) { + log_error_errno(r, "Failed to parse timestamp file '%s', using mtime: %m", p); + return true; + } else if (r == 0) { + log_debug("No data in timestamp file '%s', using mtime", p); + return true; + } + + r = safe_atou64(timestamp_str, ×tamp); + if (r < 0) { + log_error_errno(r, "Failed to parse timestamp value '%s' in file '%s', using mtime: %m", timestamp_str, p); + return true; + } + + timespec_store(&other.st_mtim, timestamp); + } + + return usr.st_mtim.tv_nsec > other.st_mtim.tv_nsec; +} + +static int condition_test_first_boot(Condition *c) { + int r; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_FIRST_BOOT); + + r = parse_boolean(c->parameter); + if (r < 0) + return r; + + return (access("/run/systemd/first-boot", F_OK) >= 0) == !!r; +} + +static int condition_test_path_exists(Condition *c) { + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_PATH_EXISTS); + + return access(c->parameter, F_OK) >= 0; +} + +static int condition_test_path_exists_glob(Condition *c) { + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_PATH_EXISTS_GLOB); + + return glob_exists(c->parameter) > 0; +} + +static int condition_test_path_is_directory(Condition *c) { + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_PATH_IS_DIRECTORY); + + return is_dir(c->parameter, true) > 0; +} + +static int condition_test_path_is_symbolic_link(Condition *c) { + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_PATH_IS_SYMBOLIC_LINK); + + return is_symlink(c->parameter) > 0; +} + +static int condition_test_path_is_mount_point(Condition *c) { + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_PATH_IS_MOUNT_POINT); + + return path_is_mount_point(c->parameter, NULL, AT_SYMLINK_FOLLOW) > 0; +} + +static int condition_test_path_is_read_write(Condition *c) { + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_PATH_IS_READ_WRITE); + + return path_is_read_only_fs(c->parameter) <= 0; +} + +static int condition_test_directory_not_empty(Condition *c) { + int r; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_DIRECTORY_NOT_EMPTY); + + r = dir_is_empty(c->parameter); + return r <= 0 && r != -ENOENT; +} + +static int condition_test_file_not_empty(Condition *c) { + struct stat st; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_FILE_NOT_EMPTY); + + return (stat(c->parameter, &st) >= 0 && + S_ISREG(st.st_mode) && + st.st_size > 0); +} + +static int condition_test_file_is_executable(Condition *c) { + struct stat st; + + assert(c); + assert(c->parameter); + assert(c->type == CONDITION_FILE_IS_EXECUTABLE); + + return (stat(c->parameter, &st) >= 0 && + S_ISREG(st.st_mode) && + (st.st_mode & 0111)); +} + +static int condition_test_null(Condition *c) { + assert(c); + assert(c->type == CONDITION_NULL); + + /* Note that during parsing we already evaluate the string and + * store it in c->negate */ + return true; +} + +int condition_test(Condition *c) { + + static int (*const condition_tests[_CONDITION_TYPE_MAX])(Condition *c) = { + [CONDITION_PATH_EXISTS] = condition_test_path_exists, + [CONDITION_PATH_EXISTS_GLOB] = condition_test_path_exists_glob, + [CONDITION_PATH_IS_DIRECTORY] = condition_test_path_is_directory, + [CONDITION_PATH_IS_SYMBOLIC_LINK] = condition_test_path_is_symbolic_link, + [CONDITION_PATH_IS_MOUNT_POINT] = condition_test_path_is_mount_point, + [CONDITION_PATH_IS_READ_WRITE] = condition_test_path_is_read_write, + [CONDITION_DIRECTORY_NOT_EMPTY] = condition_test_directory_not_empty, + [CONDITION_FILE_NOT_EMPTY] = condition_test_file_not_empty, + [CONDITION_FILE_IS_EXECUTABLE] = condition_test_file_is_executable, + [CONDITION_KERNEL_COMMAND_LINE] = condition_test_kernel_command_line, + [CONDITION_KERNEL_VERSION] = condition_test_kernel_version, + [CONDITION_VIRTUALIZATION] = condition_test_virtualization, + [CONDITION_SECURITY] = condition_test_security, + [CONDITION_CAPABILITY] = condition_test_capability, + [CONDITION_HOST] = condition_test_host, + [CONDITION_AC_POWER] = condition_test_ac_power, + [CONDITION_ARCHITECTURE] = condition_test_architecture, + [CONDITION_NEEDS_UPDATE] = condition_test_needs_update, + [CONDITION_FIRST_BOOT] = condition_test_first_boot, + [CONDITION_USER] = condition_test_user, + [CONDITION_GROUP] = condition_test_group, + [CONDITION_CONTROL_GROUP_CONTROLLER] = condition_test_control_group_controller, + [CONDITION_NULL] = condition_test_null, + }; + + int r, b; + + assert(c); + assert(c->type >= 0); + assert(c->type < _CONDITION_TYPE_MAX); + + r = condition_tests[c->type](c); + if (r < 0) { + c->result = CONDITION_ERROR; + return r; + } + + b = (r > 0) == !c->negate; + c->result = b ? CONDITION_SUCCEEDED : CONDITION_FAILED; + return b; +} + +void condition_dump(Condition *c, FILE *f, const char *prefix, const char *(*to_string)(ConditionType t)) { + assert(c); + assert(f); + + prefix = strempty(prefix); + + fprintf(f, + "%s\t%s: %s%s%s %s\n", + prefix, + to_string(c->type), + c->trigger ? "|" : "", + c->negate ? "!" : "", + c->parameter, + condition_result_to_string(c->result)); +} + +void condition_dump_list(Condition *first, FILE *f, const char *prefix, const char *(*to_string)(ConditionType t)) { + Condition *c; + + LIST_FOREACH(conditions, c, first) + condition_dump(c, f, prefix, to_string); +} + +static const char* const condition_type_table[_CONDITION_TYPE_MAX] = { + [CONDITION_ARCHITECTURE] = "ConditionArchitecture", + [CONDITION_VIRTUALIZATION] = "ConditionVirtualization", + [CONDITION_HOST] = "ConditionHost", + [CONDITION_KERNEL_COMMAND_LINE] = "ConditionKernelCommandLine", + [CONDITION_KERNEL_VERSION] = "ConditionKernelVersion", + [CONDITION_SECURITY] = "ConditionSecurity", + [CONDITION_CAPABILITY] = "ConditionCapability", + [CONDITION_AC_POWER] = "ConditionACPower", + [CONDITION_NEEDS_UPDATE] = "ConditionNeedsUpdate", + [CONDITION_FIRST_BOOT] = "ConditionFirstBoot", + [CONDITION_PATH_EXISTS] = "ConditionPathExists", + [CONDITION_PATH_EXISTS_GLOB] = "ConditionPathExistsGlob", + [CONDITION_PATH_IS_DIRECTORY] = "ConditionPathIsDirectory", + [CONDITION_PATH_IS_SYMBOLIC_LINK] = "ConditionPathIsSymbolicLink", + [CONDITION_PATH_IS_MOUNT_POINT] = "ConditionPathIsMountPoint", + [CONDITION_PATH_IS_READ_WRITE] = "ConditionPathIsReadWrite", + [CONDITION_DIRECTORY_NOT_EMPTY] = "ConditionDirectoryNotEmpty", + [CONDITION_FILE_NOT_EMPTY] = "ConditionFileNotEmpty", + [CONDITION_FILE_IS_EXECUTABLE] = "ConditionFileIsExecutable", + [CONDITION_USER] = "ConditionUser", + [CONDITION_GROUP] = "ConditionGroup", + [CONDITION_CONTROL_GROUP_CONTROLLER] = "ConditionControlGroupController", + [CONDITION_NULL] = "ConditionNull" +}; + +DEFINE_STRING_TABLE_LOOKUP(condition_type, ConditionType); + +static const char* const assert_type_table[_CONDITION_TYPE_MAX] = { + [CONDITION_ARCHITECTURE] = "AssertArchitecture", + [CONDITION_VIRTUALIZATION] = "AssertVirtualization", + [CONDITION_HOST] = "AssertHost", + [CONDITION_KERNEL_COMMAND_LINE] = "AssertKernelCommandLine", + [CONDITION_KERNEL_VERSION] = "AssertKernelVersion", + [CONDITION_SECURITY] = "AssertSecurity", + [CONDITION_CAPABILITY] = "AssertCapability", + [CONDITION_AC_POWER] = "AssertACPower", + [CONDITION_NEEDS_UPDATE] = "AssertNeedsUpdate", + [CONDITION_FIRST_BOOT] = "AssertFirstBoot", + [CONDITION_PATH_EXISTS] = "AssertPathExists", + [CONDITION_PATH_EXISTS_GLOB] = "AssertPathExistsGlob", + [CONDITION_PATH_IS_DIRECTORY] = "AssertPathIsDirectory", + [CONDITION_PATH_IS_SYMBOLIC_LINK] = "AssertPathIsSymbolicLink", + [CONDITION_PATH_IS_MOUNT_POINT] = "AssertPathIsMountPoint", + [CONDITION_PATH_IS_READ_WRITE] = "AssertPathIsReadWrite", + [CONDITION_DIRECTORY_NOT_EMPTY] = "AssertDirectoryNotEmpty", + [CONDITION_FILE_NOT_EMPTY] = "AssertFileNotEmpty", + [CONDITION_FILE_IS_EXECUTABLE] = "AssertFileIsExecutable", + [CONDITION_USER] = "AssertUser", + [CONDITION_GROUP] = "AssertGroup", + [CONDITION_CONTROL_GROUP_CONTROLLER] = "AssertControlGroupController", + [CONDITION_NULL] = "AssertNull" +}; + +DEFINE_STRING_TABLE_LOOKUP(assert_type, ConditionType); + +static const char* const condition_result_table[_CONDITION_RESULT_MAX] = { + [CONDITION_UNTESTED] = "untested", + [CONDITION_SUCCEEDED] = "succeeded", + [CONDITION_FAILED] = "failed", + [CONDITION_ERROR] = "error", +}; + +DEFINE_STRING_TABLE_LOOKUP(condition_result, ConditionResult); diff --git a/src/shared/condition.h b/src/shared/condition.h new file mode 100644 index 0000000..e69fc36 --- /dev/null +++ b/src/shared/condition.h @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <stdio.h> + +#include "list.h" +#include "macro.h" + +typedef enum ConditionType { + CONDITION_ARCHITECTURE, + CONDITION_VIRTUALIZATION, + CONDITION_HOST, + CONDITION_KERNEL_COMMAND_LINE, + CONDITION_KERNEL_VERSION, + CONDITION_SECURITY, + CONDITION_CAPABILITY, + CONDITION_AC_POWER, + + CONDITION_NEEDS_UPDATE, + CONDITION_FIRST_BOOT, + + CONDITION_PATH_EXISTS, + CONDITION_PATH_EXISTS_GLOB, + CONDITION_PATH_IS_DIRECTORY, + CONDITION_PATH_IS_SYMBOLIC_LINK, + CONDITION_PATH_IS_MOUNT_POINT, + CONDITION_PATH_IS_READ_WRITE, + CONDITION_DIRECTORY_NOT_EMPTY, + CONDITION_FILE_NOT_EMPTY, + CONDITION_FILE_IS_EXECUTABLE, + + CONDITION_NULL, + + CONDITION_USER, + CONDITION_GROUP, + + CONDITION_CONTROL_GROUP_CONTROLLER, + + _CONDITION_TYPE_MAX, + _CONDITION_TYPE_INVALID = -1 +} ConditionType; + +typedef enum ConditionResult { + CONDITION_UNTESTED, + CONDITION_SUCCEEDED, + CONDITION_FAILED, + CONDITION_ERROR, + _CONDITION_RESULT_MAX, + _CONDITION_RESULT_INVALID = -1 +} ConditionResult; + +typedef struct Condition { + ConditionType type:8; + + bool trigger:1; + bool negate:1; + + ConditionResult result:6; + + char *parameter; + + LIST_FIELDS(struct Condition, conditions); +} Condition; + +Condition* condition_new(ConditionType type, const char *parameter, bool trigger, bool negate); +void condition_free(Condition *c); +Condition* condition_free_list(Condition *c); + +int condition_test(Condition *c); + +void condition_dump(Condition *c, FILE *f, const char *prefix, const char *(*to_string)(ConditionType t)); +void condition_dump_list(Condition *c, FILE *f, const char *prefix, const char *(*to_string)(ConditionType t)); + +const char* condition_type_to_string(ConditionType t) _const_; +ConditionType condition_type_from_string(const char *s) _pure_; + +const char* assert_type_to_string(ConditionType t) _const_; +ConditionType assert_type_from_string(const char *s) _pure_; + +const char* condition_result_to_string(ConditionResult r) _const_; +ConditionResult condition_result_from_string(const char *s) _pure_; + +static inline bool condition_takes_path(ConditionType t) { + return IN_SET(t, + CONDITION_PATH_EXISTS, + CONDITION_PATH_EXISTS_GLOB, + CONDITION_PATH_IS_DIRECTORY, + CONDITION_PATH_IS_SYMBOLIC_LINK, + CONDITION_PATH_IS_MOUNT_POINT, + CONDITION_PATH_IS_READ_WRITE, + CONDITION_DIRECTORY_NOT_EMPTY, + CONDITION_FILE_NOT_EMPTY, + CONDITION_FILE_IS_EXECUTABLE, + CONDITION_NEEDS_UPDATE); +} diff --git a/src/shared/conf-parser.c b/src/shared/conf-parser.c new file mode 100644 index 0000000..b80c147 --- /dev/null +++ b/src/shared/conf-parser.c @@ -0,0 +1,1113 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <limits.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> + +#include "alloc-util.h" +#include "conf-files.h" +#include "conf-parser.h" +#include "def.h" +#include "extract-word.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "log.h" +#include "macro.h" +#include "missing.h" +#include "parse-util.h" +#include "path-util.h" +#include "process-util.h" +#include "rlimit-util.h" +#include "signal-util.h" +#include "socket-util.h" +#include "string-util.h" +#include "strv.h" +#include "syslog-util.h" +#include "time-util.h" +#include "utf8.h" + +int config_item_table_lookup( + const void *table, + const char *section, + const char *lvalue, + ConfigParserCallback *func, + int *ltype, + void **data, + void *userdata) { + + const ConfigTableItem *t; + + assert(table); + assert(lvalue); + assert(func); + assert(ltype); + assert(data); + + for (t = table; t->lvalue; t++) { + + if (!streq(lvalue, t->lvalue)) + continue; + + if (!streq_ptr(section, t->section)) + continue; + + *func = t->parse; + *ltype = t->ltype; + *data = t->data; + return 1; + } + + return 0; +} + +int config_item_perf_lookup( + const void *table, + const char *section, + const char *lvalue, + ConfigParserCallback *func, + int *ltype, + void **data, + void *userdata) { + + ConfigPerfItemLookup lookup = (ConfigPerfItemLookup) table; + const ConfigPerfItem *p; + + assert(table); + assert(lvalue); + assert(func); + assert(ltype); + assert(data); + + if (section) { + const char *key; + + key = strjoina(section, ".", lvalue); + p = lookup(key, strlen(key)); + } else + p = lookup(lvalue, strlen(lvalue)); + if (!p) + return 0; + + *func = p->parse; + *ltype = p->ltype; + *data = (uint8_t*) userdata + p->offset; + return 1; +} + +/* Run the user supplied parser for an assignment */ +static int next_assignment( + const char *unit, + const char *filename, + unsigned line, + ConfigItemLookup lookup, + const void *table, + const char *section, + unsigned section_line, + const char *lvalue, + const char *rvalue, + ConfigParseFlags flags, + void *userdata) { + + ConfigParserCallback func = NULL; + int ltype = 0; + void *data = NULL; + int r; + + assert(filename); + assert(line > 0); + assert(lookup); + assert(lvalue); + assert(rvalue); + + r = lookup(table, section, lvalue, &func, <ype, &data, userdata); + if (r < 0) + return r; + if (r > 0) { + if (func) + return func(unit, filename, line, section, section_line, + lvalue, ltype, rvalue, data, userdata); + + return 0; + } + + /* Warn about unknown non-extension fields. */ + if (!(flags & CONFIG_PARSE_RELAXED) && !startswith(lvalue, "X-")) + log_syntax(unit, LOG_WARNING, filename, line, 0, "Unknown lvalue '%s' in section '%s', ignoring", lvalue, section); + + return 0; +} + +/* Parse a single logical line */ +static int parse_line( + const char* unit, + const char *filename, + unsigned line, + const char *sections, + ConfigItemLookup lookup, + const void *table, + ConfigParseFlags flags, + char **section, + unsigned *section_line, + bool *section_ignored, + char *l, + void *userdata) { + + char *e, *include; + + assert(filename); + assert(line > 0); + assert(lookup); + assert(l); + + l = strstrip(l); + if (!*l) + return 0; + + if (*l == '\n') + return 0; + + include = first_word(l, ".include"); + if (include) { + _cleanup_free_ char *fn = NULL; + + /* .includes are a bad idea, we only support them here + * for historical reasons. They create cyclic include + * problems and make it difficult to detect + * configuration file changes with an easy + * stat(). Better approaches, such as .d/ drop-in + * snippets exist. + * + * Support for them should be eventually removed. */ + + if (!(flags & CONFIG_PARSE_ALLOW_INCLUDE)) { + log_syntax(unit, LOG_ERR, filename, line, 0, ".include not allowed here. Ignoring."); + return 0; + } + + log_syntax(unit, LOG_WARNING, filename, line, 0, + ".include directives are deprecated, and support for them will be removed in a future version of systemd. " + "Please use drop-in files instead."); + + fn = file_in_same_dir(filename, strstrip(include)); + if (!fn) + return -ENOMEM; + + return config_parse(unit, fn, NULL, sections, lookup, table, flags, userdata); + } + + if (!utf8_is_valid(l)) + return log_syntax_invalid_utf8(unit, LOG_WARNING, filename, line, l); + + if (*l == '[') { + size_t k; + char *n; + + k = strlen(l); + assert(k > 0); + + if (l[k-1] != ']') { + log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid section header '%s'", l); + return -EBADMSG; + } + + n = strndup(l+1, k-2); + if (!n) + return -ENOMEM; + + if (sections && !nulstr_contains(sections, n)) { + + if (!(flags & CONFIG_PARSE_RELAXED) && !startswith(n, "X-")) + log_syntax(unit, LOG_WARNING, filename, line, 0, "Unknown section '%s'. Ignoring.", n); + + free(n); + *section = mfree(*section); + *section_line = 0; + *section_ignored = true; + } else { + free_and_replace(*section, n); + *section_line = line; + *section_ignored = false; + } + + return 0; + } + + if (sections && !*section) { + + if (!(flags & CONFIG_PARSE_RELAXED) && !*section_ignored) + log_syntax(unit, LOG_WARNING, filename, line, 0, "Assignment outside of section. Ignoring."); + + return 0; + } + + e = strchr(l, '='); + if (!e) { + log_syntax(unit, LOG_WARNING, filename, line, 0, "Missing '='."); + return -EINVAL; + } + + *e = 0; + e++; + + return next_assignment(unit, + filename, + line, + lookup, + table, + *section, + *section_line, + strstrip(l), + strstrip(e), + flags, + userdata); +} + +/* Go through the file and parse each line */ +int config_parse(const char *unit, + const char *filename, + FILE *f, + const char *sections, + ConfigItemLookup lookup, + const void *table, + ConfigParseFlags flags, + void *userdata) { + + _cleanup_free_ char *section = NULL, *continuation = NULL; + _cleanup_fclose_ FILE *ours = NULL; + unsigned line = 0, section_line = 0; + bool section_ignored = false; + int r; + + assert(filename); + assert(lookup); + + if (!f) { + f = ours = fopen(filename, "re"); + if (!f) { + /* Only log on request, except for ENOENT, + * since we return 0 to the caller. */ + if ((flags & CONFIG_PARSE_WARN) || errno == ENOENT) + log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno, + "Failed to open configuration file '%s': %m", filename); + return errno == ENOENT ? 0 : -errno; + } + } + + fd_warn_permissions(filename, fileno(f)); + + for (;;) { + _cleanup_free_ char *buf = NULL; + bool escaped = false; + char *l, *p, *e; + + r = read_line(f, LONG_LINE_MAX, &buf); + if (r == 0) + break; + if (r == -ENOBUFS) { + if (flags & CONFIG_PARSE_WARN) + log_error_errno(r, "%s:%u: Line too long", filename, line); + + return r; + } + if (r < 0) { + if (CONFIG_PARSE_WARN) + log_error_errno(r, "%s:%u: Error while reading configuration file: %m", filename, line); + + return r; + } + + if (strchr(COMMENTS, *skip_leading_chars(buf, WHITESPACE))) + continue; + + l = buf; + if (!(flags & CONFIG_PARSE_REFUSE_BOM)) { + char *q; + + q = startswith(buf, UTF8_BYTE_ORDER_MARK); + if (q) { + l = q; + flags |= CONFIG_PARSE_REFUSE_BOM; + } + } + + if (continuation) { + if (strlen(continuation) + strlen(l) > LONG_LINE_MAX) { + if (flags & CONFIG_PARSE_WARN) + log_error("%s:%u: Continuation line too long", filename, line); + return -ENOBUFS; + } + + if (!strextend(&continuation, l, NULL)) { + if (flags & CONFIG_PARSE_WARN) + log_oom(); + return -ENOMEM; + } + + p = continuation; + } else + p = l; + + for (e = p; *e; e++) { + if (escaped) + escaped = false; + else if (*e == '\\') + escaped = true; + } + + if (escaped) { + *(e-1) = ' '; + + if (!continuation) { + continuation = strdup(l); + if (!continuation) { + if (flags & CONFIG_PARSE_WARN) + log_oom(); + return -ENOMEM; + } + } + + continue; + } + + r = parse_line(unit, + filename, + ++line, + sections, + lookup, + table, + flags, + §ion, + §ion_line, + §ion_ignored, + p, + userdata); + if (r < 0) { + if (flags & CONFIG_PARSE_WARN) + log_warning_errno(r, "%s:%u: Failed to parse file: %m", filename, line); + return r; + } + + continuation = mfree(continuation); + } + + if (continuation) { + r = parse_line(unit, + filename, + ++line, + sections, + lookup, + table, + flags, + §ion, + §ion_line, + §ion_ignored, + continuation, + userdata); + if (r < 0) { + if (flags & CONFIG_PARSE_WARN) + log_warning_errno(r, "%s:%u: Failed to parse file: %m", filename, line); + return r; + } + } + + return 0; +} + +static int config_parse_many_files( + const char *conf_file, + char **files, + const char *sections, + ConfigItemLookup lookup, + const void *table, + ConfigParseFlags flags, + void *userdata) { + + char **fn; + int r; + + if (conf_file) { + r = config_parse(NULL, conf_file, NULL, sections, lookup, table, flags, userdata); + if (r < 0) + return r; + } + + STRV_FOREACH(fn, files) { + r = config_parse(NULL, *fn, NULL, sections, lookup, table, flags, userdata); + if (r < 0) + return r; + } + + return 0; +} + +/* Parse each config file in the directories specified as nulstr. */ +int config_parse_many_nulstr( + const char *conf_file, + const char *conf_file_dirs, + const char *sections, + ConfigItemLookup lookup, + const void *table, + ConfigParseFlags flags, + void *userdata) { + + _cleanup_strv_free_ char **files = NULL; + int r; + + r = conf_files_list_nulstr(&files, ".conf", NULL, 0, conf_file_dirs); + if (r < 0) + return r; + + return config_parse_many_files(conf_file, files, sections, lookup, table, flags, userdata); +} + +/* Parse each config file in the directories specified as strv. */ +int config_parse_many( + const char *conf_file, + const char* const* conf_file_dirs, + const char *dropin_dirname, + const char *sections, + ConfigItemLookup lookup, + const void *table, + ConfigParseFlags flags, + void *userdata) { + + _cleanup_strv_free_ char **dropin_dirs = NULL; + _cleanup_strv_free_ char **files = NULL; + const char *suffix; + int r; + + suffix = strjoina("/", dropin_dirname); + r = strv_extend_strv_concat(&dropin_dirs, (char**) conf_file_dirs, suffix); + if (r < 0) + return r; + + r = conf_files_list_strv(&files, ".conf", NULL, 0, (const char* const*) dropin_dirs); + if (r < 0) + return r; + + return config_parse_many_files(conf_file, files, sections, lookup, table, flags, userdata); +} + +#define DEFINE_PARSER(type, vartype, conv_func) \ + DEFINE_CONFIG_PARSE_PTR(config_parse_##type, conv_func, vartype, "Failed to parse " #type " value") + +DEFINE_PARSER(int, int, safe_atoi); +DEFINE_PARSER(long, long, safe_atoli); +DEFINE_PARSER(uint8, uint8_t, safe_atou8); +DEFINE_PARSER(uint16, uint16_t, safe_atou16); +DEFINE_PARSER(uint32, uint32_t, safe_atou32); +DEFINE_PARSER(uint64, uint64_t, safe_atou64); +DEFINE_PARSER(unsigned, unsigned, safe_atou); +DEFINE_PARSER(double, double, safe_atod); +DEFINE_PARSER(nsec, nsec_t, parse_nsec); +DEFINE_PARSER(sec, usec_t, parse_sec); +DEFINE_PARSER(mode, mode_t, parse_mode); + +int config_parse_iec_size(const char* unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + size_t *sz = data; + uint64_t v; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + r = parse_size(rvalue, 1024, &v); + if (r >= 0 && (uint64_t) (size_t) v != v) + r = -ERANGE; + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse size value '%s', ignoring: %m", rvalue); + return 0; + } + + *sz = (size_t) v; + return 0; +} + +int config_parse_si_size( + const char* unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + size_t *sz = data; + uint64_t v; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + r = parse_size(rvalue, 1000, &v); + if (r >= 0 && (uint64_t) (size_t) v != v) + r = -ERANGE; + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse size value '%s', ignoring: %m", rvalue); + return 0; + } + + *sz = (size_t) v; + return 0; +} + +int config_parse_iec_uint64( + const char* unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + uint64_t *bytes = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + r = parse_size(rvalue, 1024, bytes); + if (r < 0) + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse size value, ignoring: %s", rvalue); + + return 0; +} + +int config_parse_bool(const char* unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + int k; + bool *b = data; + bool fatal = ltype; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + k = parse_boolean(rvalue); + if (k < 0) { + log_syntax(unit, LOG_ERR, filename, line, k, + "Failed to parse boolean value%s: %s", + fatal ? "" : ", ignoring", rvalue); + return fatal ? -ENOEXEC : 0; + } + + *b = k; + return 0; +} + +int config_parse_tristate( + const char* unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + int k, *t = data; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + /* A tristate is pretty much a boolean, except that it can + * also take the special value -1, indicating "uninitialized", + * much like NULL is for a pointer type. */ + + k = parse_boolean(rvalue); + if (k < 0) { + log_syntax(unit, LOG_ERR, filename, line, k, "Failed to parse boolean value, ignoring: %s", rvalue); + return 0; + } + + *t = !!k; + return 0; +} + +int config_parse_string( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + char **s = data; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + if (free_and_strdup(s, empty_to_null(rvalue)) < 0) + return log_oom(); + + return 0; +} + +int config_parse_path( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_free_ char *n = NULL; + bool fatal = ltype; + char **s = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + if (isempty(rvalue)) + goto finalize; + + n = strdup(rvalue); + if (!n) + return log_oom(); + + r = path_simplify_and_warn(n, PATH_CHECK_ABSOLUTE | (fatal ? PATH_CHECK_FATAL : 0), unit, filename, line, lvalue); + if (r < 0) + return fatal ? -ENOEXEC : 0; + +finalize: + return free_and_replace(*s, n); +} + +int config_parse_strv( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + char ***sv = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + if (isempty(rvalue)) { + *sv = strv_free(*sv); + return 0; + } + + for (;;) { + char *word = NULL; + + r = extract_first_word(&rvalue, &word, NULL, EXTRACT_QUOTES|EXTRACT_RETAIN_ESCAPE); + if (r == 0) + break; + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Invalid syntax, ignoring: %s", rvalue); + break; + } + + r = strv_consume(sv, word); + if (r < 0) + return log_oom(); + } + + return 0; +} + +int config_parse_warn_compat( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + Disabled reason = ltype; + + switch(reason) { + + case DISABLED_CONFIGURATION: + log_syntax(unit, LOG_DEBUG, filename, line, 0, + "Support for option %s= has been disabled at compile time and it is ignored", lvalue); + break; + + case DISABLED_LEGACY: + log_syntax(unit, LOG_INFO, filename, line, 0, + "Support for option %s= has been removed and it is ignored", lvalue); + break; + + case DISABLED_EXPERIMENTAL: + log_syntax(unit, LOG_INFO, filename, line, 0, + "Support for option %s= has not yet been enabled and it is ignored", lvalue); + break; + } + + return 0; +} + +int config_parse_log_facility( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + int *o = data, x; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + x = log_facility_unshifted_from_string(rvalue); + if (x < 0) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse log facility, ignoring: %s", rvalue); + return 0; + } + + *o = (x << 3) | LOG_PRI(*o); + + return 0; +} + +int config_parse_log_level( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + int *o = data, x; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + x = log_level_from_string(rvalue); + if (x < 0) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse log level, ignoring: %s", rvalue); + return 0; + } + + if (*o < 0) /* if it wasn't initialized so far, assume zero facility */ + *o = x; + else + *o = (*o & LOG_FACMASK) | x; + + return 0; +} + +int config_parse_signal( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + int *sig = data, r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(sig); + + r = signal_from_string(rvalue); + if (r <= 0) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse signal name, ignoring: %s", rvalue); + return 0; + } + + *sig = r; + return 0; +} + +int config_parse_personality( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + unsigned long *personality = data, p; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(personality); + + if (isempty(rvalue)) + p = PERSONALITY_INVALID; + else { + p = personality_from_string(rvalue); + if (p == PERSONALITY_INVALID) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse personality, ignoring: %s", rvalue); + return 0; + } + } + + *personality = p; + return 0; +} + +int config_parse_ifname( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + char **s = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + if (isempty(rvalue)) { + *s = mfree(*s); + return 0; + } + + if (!ifname_valid(rvalue)) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Interface name is not valid or too long, ignoring assignment: %s", rvalue); + return 0; + } + + r = free_and_strdup(s, rvalue); + if (r < 0) + return log_oom(); + + return 0; +} + +int config_parse_ip_port( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + uint16_t *s = data; + uint16_t port; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + if (isempty(rvalue)) { + *s = 0; + return 0; + } + + r = parse_ip_port(rvalue, &port); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse port '%s'.", rvalue); + return 0; + } + + *s = port; + + return 0; +} + +int config_parse_mtu( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + uint32_t *mtu = data; + int r; + + assert(rvalue); + assert(mtu); + + r = parse_mtu(ltype, rvalue, mtu); + if (r == -ERANGE) { + log_syntax(unit, LOG_ERR, filename, line, r, + "Maximum transfer unit (MTU) value out of range. Permitted range is %" PRIu32 "…%" PRIu32 ", ignoring: %s", + (uint32_t) (ltype == AF_INET6 ? IPV6_MIN_MTU : IPV4_MIN_MTU), (uint32_t) UINT32_MAX, + rvalue); + return 0; + } + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, + "Failed to parse MTU value '%s', ignoring: %m", rvalue); + return 0; + } + + return 0; +} + +int config_parse_rlimit( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + struct rlimit **rl = data, d = {}; + int r; + + assert(rvalue); + assert(rl); + + r = rlimit_parse(ltype, rvalue, &d); + if (r == -EILSEQ) { + log_syntax(unit, LOG_WARNING, filename, line, r, "Soft resource limit chosen higher than hard limit, ignoring: %s", rvalue); + return 0; + } + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse resource value, ignoring: %s", rvalue); + return 0; + } + + if (rl[ltype]) + *rl[ltype] = d; + else { + rl[ltype] = newdup(struct rlimit, &d, 1); + if (!rl[ltype]) + return log_oom(); + } + + return 0; +} + +int config_parse_permille(const char* unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + unsigned *permille = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(permille); + + r = parse_permille(rvalue); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, + "Failed to parse permille value, ignoring: %s", rvalue); + return 0; + } + + *permille = (unsigned) r; + + return 0; +} diff --git a/src/shared/conf-parser.h b/src/shared/conf-parser.h new file mode 100644 index 0000000..865db42 --- /dev/null +++ b/src/shared/conf-parser.h @@ -0,0 +1,289 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <errno.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <syslog.h> + +#include "alloc-util.h" +#include "log.h" +#include "macro.h" + +/* An abstract parser for simple, line based, shallow configuration files consisting of variable assignments only. */ + +typedef enum ConfigParseFlags { + CONFIG_PARSE_RELAXED = 1 << 0, + CONFIG_PARSE_ALLOW_INCLUDE = 1 << 1, + CONFIG_PARSE_WARN = 1 << 2, + CONFIG_PARSE_REFUSE_BOM = 1 << 3, +} ConfigParseFlags; + +/* Argument list for parsers of specific configuration settings. */ +#define CONFIG_PARSER_ARGUMENTS \ + const char *unit, \ + const char *filename, \ + unsigned line, \ + const char *section, \ + unsigned section_line, \ + const char *lvalue, \ + int ltype, \ + const char *rvalue, \ + void *data, \ + void *userdata + +/* Prototype for a parser for a specific configuration setting */ +typedef int (*ConfigParserCallback)(CONFIG_PARSER_ARGUMENTS); + +/* A macro declaring the a function prototype, following the typedef above, simply because it's so cumbersomely long + * otherwise. (And current emacs gets irritatingly slow when editing files that contain lots of very long function + * prototypes on the same screen…) */ +#define CONFIG_PARSER_PROTOTYPE(name) int name(CONFIG_PARSER_ARGUMENTS) + +/* Wraps information for parsing a specific configuration variable, to + * be stored in a simple array */ +typedef struct ConfigTableItem { + const char *section; /* Section */ + const char *lvalue; /* Name of the variable */ + ConfigParserCallback parse; /* Function that is called to parse the variable's value */ + int ltype; /* Distinguish different variables passed to the same callback */ + void *data; /* Where to store the variable's data */ +} ConfigTableItem; + +/* Wraps information for parsing a specific configuration variable, to + * be stored in a gperf perfect hashtable */ +typedef struct ConfigPerfItem { + const char *section_and_lvalue; /* Section + "." + name of the variable */ + ConfigParserCallback parse; /* Function that is called to parse the variable's value */ + int ltype; /* Distinguish different variables passed to the same callback */ + size_t offset; /* Offset where to store data, from the beginning of userdata */ +} ConfigPerfItem; + +/* Prototype for a low-level gperf lookup function */ +typedef const ConfigPerfItem* (*ConfigPerfItemLookup)(const char *section_and_lvalue, unsigned length); + +/* Prototype for a generic high-level lookup function */ +typedef int (*ConfigItemLookup)( + const void *table, + const char *section, + const char *lvalue, + ConfigParserCallback *func, + int *ltype, + void **data, + void *userdata); + +/* Linear table search implementation of ConfigItemLookup, based on + * ConfigTableItem arrays */ +int config_item_table_lookup(const void *table, const char *section, const char *lvalue, ConfigParserCallback *func, int *ltype, void **data, void *userdata); + +/* gperf implementation of ConfigItemLookup, based on gperf + * ConfigPerfItem tables */ +int config_item_perf_lookup(const void *table, const char *section, const char *lvalue, ConfigParserCallback *func, int *ltype, void **data, void *userdata); + +int config_parse( + const char *unit, + const char *filename, + FILE *f, + const char *sections, /* nulstr */ + ConfigItemLookup lookup, + const void *table, + ConfigParseFlags flags, + void *userdata); + +int config_parse_many_nulstr( + const char *conf_file, /* possibly NULL */ + const char *conf_file_dirs, /* nulstr */ + const char *sections, /* nulstr */ + ConfigItemLookup lookup, + const void *table, + ConfigParseFlags flags, + void *userdata); + +int config_parse_many( + const char *conf_file, /* possibly NULL */ + const char* const* conf_file_dirs, + const char *dropin_dirname, + const char *sections, /* nulstr */ + ConfigItemLookup lookup, + const void *table, + ConfigParseFlags flags, + void *userdata); + +CONFIG_PARSER_PROTOTYPE(config_parse_int); +CONFIG_PARSER_PROTOTYPE(config_parse_unsigned); +CONFIG_PARSER_PROTOTYPE(config_parse_long); +CONFIG_PARSER_PROTOTYPE(config_parse_uint8); +CONFIG_PARSER_PROTOTYPE(config_parse_uint16); +CONFIG_PARSER_PROTOTYPE(config_parse_uint32); +CONFIG_PARSER_PROTOTYPE(config_parse_uint64); +CONFIG_PARSER_PROTOTYPE(config_parse_double); +CONFIG_PARSER_PROTOTYPE(config_parse_iec_size); +CONFIG_PARSER_PROTOTYPE(config_parse_si_size); +CONFIG_PARSER_PROTOTYPE(config_parse_iec_uint64); +CONFIG_PARSER_PROTOTYPE(config_parse_bool); +CONFIG_PARSER_PROTOTYPE(config_parse_tristate); +CONFIG_PARSER_PROTOTYPE(config_parse_string); +CONFIG_PARSER_PROTOTYPE(config_parse_path); +CONFIG_PARSER_PROTOTYPE(config_parse_strv); +CONFIG_PARSER_PROTOTYPE(config_parse_sec); +CONFIG_PARSER_PROTOTYPE(config_parse_nsec); +CONFIG_PARSER_PROTOTYPE(config_parse_mode); +CONFIG_PARSER_PROTOTYPE(config_parse_warn_compat); +CONFIG_PARSER_PROTOTYPE(config_parse_log_facility); +CONFIG_PARSER_PROTOTYPE(config_parse_log_level); +CONFIG_PARSER_PROTOTYPE(config_parse_signal); +CONFIG_PARSER_PROTOTYPE(config_parse_personality); +CONFIG_PARSER_PROTOTYPE(config_parse_permille); +CONFIG_PARSER_PROTOTYPE(config_parse_ifname); +CONFIG_PARSER_PROTOTYPE(config_parse_ip_port); +CONFIG_PARSER_PROTOTYPE(config_parse_mtu); +CONFIG_PARSER_PROTOTYPE(config_parse_rlimit); + +typedef enum Disabled { + DISABLED_CONFIGURATION, + DISABLED_LEGACY, + DISABLED_EXPERIMENTAL, +} Disabled; + +#define DEFINE_CONFIG_PARSE(function, parser, msg) \ + CONFIG_PARSER_PROTOTYPE(function) { \ + int *i = data, r; \ + \ + assert(filename); \ + assert(lvalue); \ + assert(rvalue); \ + assert(data); \ + \ + r = parser(rvalue); \ + if (r < 0) { \ + log_syntax(unit, LOG_ERR, filename, line, r, \ + msg ", ignoring: %s", rvalue); \ + return 0; \ + } \ + \ + *i = r; \ + return 0; \ + } + +#define DEFINE_CONFIG_PARSE_PTR(function, parser, type, msg) \ + CONFIG_PARSER_PROTOTYPE(function) { \ + type *i = data; \ + int r; \ + \ + assert(filename); \ + assert(lvalue); \ + assert(rvalue); \ + assert(data); \ + \ + r = parser(rvalue, i); \ + if (r < 0) \ + log_syntax(unit, LOG_ERR, filename, line, r, \ + msg ", ignoring: %s", rvalue); \ + \ + return 0; \ + } + +#define DEFINE_CONFIG_PARSE_ENUM(function, name, type, msg) \ + CONFIG_PARSER_PROTOTYPE(function) { \ + type *i = data, x; \ + \ + assert(filename); \ + assert(lvalue); \ + assert(rvalue); \ + assert(data); \ + \ + x = name##_from_string(rvalue); \ + if (x < 0) { \ + log_syntax(unit, LOG_ERR, filename, line, 0, \ + msg ", ignoring: %s", rvalue); \ + return 0; \ + } \ + \ + *i = x; \ + return 0; \ + } + +#define DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(function, name, type, default_value, msg) \ + CONFIG_PARSER_PROTOTYPE(function) { \ + type *i = data, x; \ + \ + assert(filename); \ + assert(lvalue); \ + assert(rvalue); \ + assert(data); \ + \ + if (isempty(rvalue)) { \ + *i = default_value; \ + return 0; \ + } \ + \ + x = name##_from_string(rvalue); \ + if (x < 0) { \ + log_syntax(unit, LOG_ERR, filename, line, 0, \ + msg ", ignoring: %s", rvalue); \ + return 0; \ + } \ + \ + *i = x; \ + return 0; \ + } + +#define DEFINE_CONFIG_PARSE_ENUMV(function, name, type, invalid, msg) \ + CONFIG_PARSER_PROTOTYPE(function) { \ + type **enums = data, x, *ys; \ + _cleanup_free_ type *xs = NULL; \ + const char *word, *state; \ + size_t l, i = 0; \ + \ + assert(filename); \ + assert(lvalue); \ + assert(rvalue); \ + assert(data); \ + \ + xs = new0(type, 1); \ + if (!xs) \ + return -ENOMEM; \ + \ + *xs = invalid; \ + \ + FOREACH_WORD(word, l, rvalue, state) { \ + _cleanup_free_ char *en = NULL; \ + type *new_xs; \ + \ + en = strndup(word, l); \ + if (!en) \ + return -ENOMEM; \ + \ + if ((x = name##_from_string(en)) < 0) { \ + log_syntax(unit, LOG_ERR, filename, line, 0, \ + msg ", ignoring: %s", en); \ + continue; \ + } \ + \ + for (ys = xs; x != invalid && *ys != invalid; ys++) { \ + if (*ys == x) { \ + log_syntax(unit, LOG_NOTICE, filename, \ + line, 0, \ + "Duplicate entry, ignoring: %s", \ + en); \ + x = invalid; \ + } \ + } \ + \ + if (x == invalid) \ + continue; \ + \ + *(xs + i) = x; \ + new_xs = realloc(xs, (++i + 1) * sizeof(type)); \ + if (new_xs) \ + xs = new_xs; \ + else \ + return -ENOMEM; \ + \ + *(xs + i) = invalid; \ + } \ + \ + free_and_replace(*enums, xs); \ + return 0; \ + } diff --git a/src/shared/cpu-set-util.c b/src/shared/cpu-set-util.c new file mode 100644 index 0000000..9a789ae --- /dev/null +++ b/src/shared/cpu-set-util.c @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stddef.h> +#include <syslog.h> + +#include "alloc-util.h" +#include "cpu-set-util.h" +#include "extract-word.h" +#include "log.h" +#include "macro.h" +#include "parse-util.h" +#include "string-util.h" + +cpu_set_t* cpu_set_malloc(unsigned *ncpus) { + cpu_set_t *c; + unsigned n = 1024; + + /* Allocates the cpuset in the right size */ + + for (;;) { + c = CPU_ALLOC(n); + if (!c) + return NULL; + + if (sched_getaffinity(0, CPU_ALLOC_SIZE(n), c) >= 0) { + CPU_ZERO_S(CPU_ALLOC_SIZE(n), c); + + if (ncpus) + *ncpus = n; + + return c; + } + + CPU_FREE(c); + + if (errno != EINVAL) + return NULL; + + n *= 2; + } +} + +int parse_cpu_set_internal( + const char *rvalue, + cpu_set_t **cpu_set, + bool warn, + const char *unit, + const char *filename, + unsigned line, + const char *lvalue) { + + _cleanup_cpu_free_ cpu_set_t *c = NULL; + const char *p = rvalue; + unsigned ncpus = 0; + + assert(rvalue); + + for (;;) { + _cleanup_free_ char *word = NULL; + unsigned cpu, cpu_lower, cpu_upper; + int r; + + r = extract_first_word(&p, &word, WHITESPACE ",", EXTRACT_QUOTES); + if (r == -ENOMEM) + return warn ? log_oom() : -ENOMEM; + if (r < 0) + return warn ? log_syntax(unit, LOG_ERR, filename, line, r, "Invalid value for %s: %s", lvalue, rvalue) : r; + if (r == 0) + break; + + if (!c) { + c = cpu_set_malloc(&ncpus); + if (!c) + return warn ? log_oom() : -ENOMEM; + } + + r = parse_range(word, &cpu_lower, &cpu_upper); + if (r < 0) + return warn ? log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse CPU affinity '%s'", word) : r; + if (cpu_lower >= ncpus || cpu_upper >= ncpus) + return warn ? log_syntax(unit, LOG_ERR, filename, line, EINVAL, "CPU out of range '%s' ncpus is %u", word, ncpus) : -EINVAL; + + if (cpu_lower > cpu_upper) { + if (warn) + log_syntax(unit, LOG_WARNING, filename, line, 0, "Range '%s' is invalid, %u > %u, ignoring", word, cpu_lower, cpu_upper); + continue; + } + + for (cpu = cpu_lower; cpu <= cpu_upper; cpu++) + CPU_SET_S(cpu, CPU_ALLOC_SIZE(ncpus), c); + } + + /* On success, sets *cpu_set and returns ncpus for the system. */ + if (c) + *cpu_set = TAKE_PTR(c); + + return (int) ncpus; +} diff --git a/src/shared/cpu-set-util.h b/src/shared/cpu-set-util.h new file mode 100644 index 0000000..1b6bd35 --- /dev/null +++ b/src/shared/cpu-set-util.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <sched.h> + +#include "macro.h" + +#ifdef __NCPUBITS +#define CPU_SIZE_TO_NUM(n) ((n) * __NCPUBITS) +#else +#define CPU_SIZE_TO_NUM(n) ((n) * sizeof(cpu_set_t) * 8) +#endif + +DEFINE_TRIVIAL_CLEANUP_FUNC(cpu_set_t*, CPU_FREE); +#define _cleanup_cpu_free_ _cleanup_(CPU_FREEp) + +static inline cpu_set_t* cpu_set_mfree(cpu_set_t *p) { + if (p) + CPU_FREE(p); + return NULL; +} + +cpu_set_t* cpu_set_malloc(unsigned *ncpus); + +int parse_cpu_set_internal(const char *rvalue, cpu_set_t **cpu_set, bool warn, const char *unit, const char *filename, unsigned line, const char *lvalue); + +static inline int parse_cpu_set_and_warn(const char *rvalue, cpu_set_t **cpu_set, const char *unit, const char *filename, unsigned line, const char *lvalue) { + assert(lvalue); + + return parse_cpu_set_internal(rvalue, cpu_set, true, unit, filename, line, lvalue); +} + +static inline int parse_cpu_set(const char *rvalue, cpu_set_t **cpu_set){ + return parse_cpu_set_internal(rvalue, cpu_set, false, NULL, NULL, 0, NULL); +} diff --git a/src/shared/crypt-util.c b/src/shared/crypt-util.c new file mode 100644 index 0000000..20bdc54 --- /dev/null +++ b/src/shared/crypt-util.c @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#if HAVE_LIBCRYPTSETUP +#include "crypt-util.h" +#include "log.h" + +void cryptsetup_log_glue(int level, const char *msg, void *usrptr) { + switch (level) { + case CRYPT_LOG_NORMAL: + level = LOG_NOTICE; + break; + case CRYPT_LOG_ERROR: + level = LOG_ERR; + break; + case CRYPT_LOG_VERBOSE: + level = LOG_INFO; + break; + case CRYPT_LOG_DEBUG: + level = LOG_DEBUG; + break; + default: + log_error("Unknown libcryptsetup log level: %d", level); + level = LOG_ERR; + } + + log_full(level, "%s", msg); +} +#endif diff --git a/src/shared/crypt-util.h b/src/shared/crypt-util.h new file mode 100644 index 0000000..8c86714 --- /dev/null +++ b/src/shared/crypt-util.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#if HAVE_LIBCRYPTSETUP +#include <libcryptsetup.h> + +#include "macro.h" + +/* libcryptsetup define for any LUKS version, compatible with libcryptsetup 1.x */ +#ifndef CRYPT_LUKS +#define CRYPT_LUKS NULL +#endif + +DEFINE_TRIVIAL_CLEANUP_FUNC(struct crypt_device *, crypt_free); + +void cryptsetup_log_glue(int level, const char *msg, void *usrptr); +#endif diff --git a/src/shared/daemon-util.h b/src/shared/daemon-util.h new file mode 100644 index 0000000..5e9eca1 --- /dev/null +++ b/src/shared/daemon-util.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +#include "sd-daemon.h" + +#define NOTIFY_READY "READY=1\n" "STATUS=Processing requests..." +#define NOTIFY_STOPPING "STOPPING=1\n" "STATUS=Shutting down..." + +static inline const char *notify_start(const char *start, const char *stop) { + if (start) + (void) sd_notify(false, start); + + return stop; +} + +/* This is intended to be used with _cleanup_ attribute. */ +static inline void notify_on_cleanup(const char **p) { + if (p) + (void) sd_notify(false, *p); +} diff --git a/src/shared/dev-setup.c b/src/shared/dev-setup.c new file mode 100644 index 0000000..b545c2a --- /dev/null +++ b/src/shared/dev-setup.c @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stdlib.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "dev-setup.h" +#include "label.h" +#include "log.h" +#include "path-util.h" +#include "umask-util.h" +#include "user-util.h" +#include "util.h" + +int dev_setup(const char *prefix, uid_t uid, gid_t gid) { + static const char symlinks[] = + "-/proc/kcore\0" "/dev/core\0" + "/proc/self/fd\0" "/dev/fd\0" + "/proc/self/fd/0\0" "/dev/stdin\0" + "/proc/self/fd/1\0" "/dev/stdout\0" + "/proc/self/fd/2\0" "/dev/stderr\0"; + + const char *j, *k; + int r; + + NULSTR_FOREACH_PAIR(j, k, symlinks) { + _cleanup_free_ char *link_name = NULL; + const char *n; + + if (j[0] == '-') { + j++; + + if (access(j, F_OK) < 0) + continue; + } + + if (prefix) { + link_name = prefix_root(prefix, k); + if (!link_name) + return -ENOMEM; + + n = link_name; + } else + n = k; + + r = symlink_label(j, n); + if (r < 0) + log_debug_errno(r, "Failed to symlink %s to %s: %m", j, n); + + if (uid != UID_INVALID || gid != GID_INVALID) + if (lchown(n, uid, gid) < 0) + log_debug_errno(errno, "Failed to chown %s: %m", n); + } + + return 0; +} + +int make_inaccessible_nodes(const char *root, uid_t uid, gid_t gid) { + static const struct { + const char *name; + mode_t mode; + } table[] = { + { "/run/systemd", S_IFDIR | 0755 }, + { "/run/systemd/inaccessible", S_IFDIR | 0000 }, + { "/run/systemd/inaccessible/reg", S_IFREG | 0000 }, + { "/run/systemd/inaccessible/dir", S_IFDIR | 0000 }, + { "/run/systemd/inaccessible/fifo", S_IFIFO | 0000 }, + { "/run/systemd/inaccessible/sock", S_IFSOCK | 0000 }, + + /* The following two are likely to fail if we lack the privs for it (for example in an userns + * environment, if CAP_SYS_MKNOD is missing, or if a device node policy prohibit major/minor of 0 + * device nodes to be created). But that's entirely fine. Consumers of these files should carry + * fallback to use a different node then, for example /run/systemd/inaccessible/sock, which is close + * enough in behaviour and semantics for most uses. */ + { "/run/systemd/inaccessible/chr", S_IFCHR | 0000 }, + { "/run/systemd/inaccessible/blk", S_IFBLK | 0000 }, + }; + + _cleanup_umask_ mode_t u; + size_t i; + int r; + + u = umask(0000); + + /* Set up inaccessible (and empty) file nodes of all types. This are used to as mount sources for over-mounting + * ("masking") file nodes that shall become inaccessible and empty for specific containers or services. We try + * to lock down these nodes as much as we can, but otherwise try to match them as closely as possible with the + * underlying file, i.e. in the best case we offer the same node type as the underlying node. */ + + for (i = 0; i < ELEMENTSOF(table); i++) { + _cleanup_free_ char *path = NULL; + + path = prefix_root(root, table[i].name); + if (!path) + return log_oom(); + + if (S_ISDIR(table[i].mode)) + r = mkdir(path, table[i].mode & 07777); + else + r = mknod(path, table[i].mode, makedev(0, 0)); + if (r < 0) { + if (errno != EEXIST) + log_debug_errno(errno, "Failed to create '%s', ignoring: %m", path); + continue; + } + + if (uid != UID_INVALID || gid != GID_INVALID) { + if (lchown(path, uid, gid) < 0) + log_debug_errno(errno, "Failed to chown '%s': %m", path); + } + } + + return 0; +} diff --git a/src/shared/dev-setup.h b/src/shared/dev-setup.h new file mode 100644 index 0000000..72b90ec --- /dev/null +++ b/src/shared/dev-setup.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <sys/types.h> + +int dev_setup(const char *prefix, uid_t uid, gid_t gid); + +int make_inaccessible_nodes(const char *root, uid_t uid, gid_t gid); diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c new file mode 100644 index 0000000..d340487 --- /dev/null +++ b/src/shared/dissect-image.c @@ -0,0 +1,1507 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <sys/mount.h> +#include <sys/prctl.h> +#include <sys/wait.h> + +#include "sd-device.h" +#include "sd-id128.h" + +#include "architecture.h" +#include "ask-password-api.h" +#include "blkid-util.h" +#include "blockdev-util.h" +#include "copy.h" +#include "crypt-util.h" +#include "def.h" +#include "device-nodes.h" +#include "device-util.h" +#include "dissect-image.h" +#include "env-file.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "gpt.h" +#include "hexdecoct.h" +#include "hostname-util.h" +#include "id128-util.h" +#include "linux-3.13/dm-ioctl.h" +#include "missing.h" +#include "mount-util.h" +#include "mountpoint-util.h" +#include "os-util.h" +#include "path-util.h" +#include "process-util.h" +#include "raw-clone.h" +#include "signal-util.h" +#include "stat-util.h" +#include "stdio-util.h" +#include "string-table.h" +#include "string-util.h" +#include "strv.h" +#include "tmpfile-util.h" +#include "udev-util.h" +#include "user-util.h" +#include "xattr-util.h" + +int probe_filesystem(const char *node, char **ret_fstype) { + /* Try to find device content type and return it in *ret_fstype. If nothing is found, + * 0/NULL will be returned. -EUCLEAN will be returned for ambigous results, and an + * different error otherwise. */ + +#if HAVE_BLKID + _cleanup_(blkid_free_probep) blkid_probe b = NULL; + const char *fstype; + int r; + + errno = 0; + b = blkid_new_probe_from_filename(node); + if (!b) + return -errno ?: -ENOMEM; + + blkid_probe_enable_superblocks(b, 1); + blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE); + + errno = 0; + r = blkid_do_safeprobe(b); + if (r == 1) { + log_debug("No type detected on partition %s", node); + goto not_found; + } + if (r == -2) { + log_debug("Results ambiguous for partition %s", node); + return -EUCLEAN; + } + if (r != 0) + return -errno ?: -EIO; + + (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL); + + if (fstype) { + char *t; + + t = strdup(fstype); + if (!t) + return -ENOMEM; + + *ret_fstype = t; + return 1; + } + +not_found: + *ret_fstype = NULL; + return 0; +#else + return -EOPNOTSUPP; +#endif +} + +#if HAVE_BLKID +/* Detect RPMB and Boot partitions, which are not listed by blkid. + * See https://github.com/systemd/systemd/issues/5806. */ +static bool device_is_mmc_special_partition(sd_device *d) { + const char *sysname; + + assert(d); + + if (sd_device_get_sysname(d, &sysname) < 0) + return false; + + return startswith(sysname, "mmcblk") && + (endswith(sysname, "rpmb") || endswith(sysname, "boot0") || endswith(sysname, "boot1")); +} + +static bool device_is_block(sd_device *d) { + const char *ss; + + assert(d); + + if (sd_device_get_subsystem(d, &ss) < 0) + return false; + + return streq(ss, "block"); +} + +static int enumerator_for_parent(sd_device *d, sd_device_enumerator **ret) { + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; + int r; + + assert(d); + assert(ret); + + r = sd_device_enumerator_new(&e); + if (r < 0) + return r; + + r = sd_device_enumerator_allow_uninitialized(e); + if (r < 0) + return r; + + r = sd_device_enumerator_add_match_parent(e, d); + if (r < 0) + return r; + + *ret = TAKE_PTR(e); + return 0; +} + +/* how many times to wait for the device nodes to appear */ +#define N_DEVICE_NODE_LIST_ATTEMPTS 10 + +static int wait_for_partitions_to_appear( + int fd, + sd_device *d, + unsigned num_partitions, + DissectImageFlags flags, + sd_device_enumerator **ret_enumerator) { + + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; + sd_device *q; + unsigned n; + int r; + + assert(fd >= 0); + assert(d); + assert(ret_enumerator); + + r = enumerator_for_parent(d, &e); + if (r < 0) + return r; + + /* Count the partitions enumerated by the kernel */ + n = 0; + FOREACH_DEVICE(e, q) { + if (sd_device_get_devnum(q, NULL) < 0) + continue; + if (!device_is_block(q)) + continue; + if (device_is_mmc_special_partition(q)) + continue; + + if (!FLAGS_SET(flags, DISSECT_IMAGE_NO_UDEV)) { + r = device_wait_for_initialization(q, "block", NULL); + if (r < 0) + return r; + } + + n++; + } + + if (n == num_partitions + 1) { + *ret_enumerator = TAKE_PTR(e); + return 0; /* success! */ + } + if (n > num_partitions + 1) + return log_debug_errno(SYNTHETIC_ERRNO(EIO), + "blkid and kernel partition lists do not match."); + + /* The kernel has probed fewer partitions than blkid? Maybe the kernel prober is still running or it + * got EBUSY because udev already opened the device. Let's reprobe the device, which is a synchronous + * call that waits until probing is complete. */ + + for (unsigned j = 0; ; j++) { + if (j++ > 20) + return -EBUSY; + + if (ioctl(fd, BLKRRPART, 0) >= 0) + break; + r = -errno; + if (r == -EINVAL) { + struct loop_info64 info; + + /* If we are running on a loop device that has partition scanning off, return + * an explicit recognizable error about this, so that callers can generate a + * proper message explaining the situation. */ + + if (ioctl(fd, LOOP_GET_STATUS64, &info) >= 0 && (info.lo_flags & LO_FLAGS_PARTSCAN) == 0) { + log_debug("Device is a loop device and partition scanning is off!"); + return -EPROTONOSUPPORT; + } + } + if (r != -EBUSY) + return r; + + /* If something else has the device open, such as an udev rule, the ioctl will return + * EBUSY. Since there's no way to wait until it isn't busy anymore, let's just wait a bit, + * and try again. + * + * This is really something they should fix in the kernel! */ + (void) usleep(50 * USEC_PER_MSEC); + + } + + return -EAGAIN; /* no success yet, try again */ +} + +static int loop_wait_for_partitions_to_appear( + int fd, + sd_device *d, + unsigned num_partitions, + DissectImageFlags flags, + sd_device_enumerator **ret_enumerator) { + _cleanup_(sd_device_unrefp) sd_device *device = NULL; + int r; + + assert(fd >= 0); + assert(d); + assert(ret_enumerator); + + log_debug("Waiting for device (parent + %d partitions) to appear...", num_partitions); + + if (!FLAGS_SET(flags, DISSECT_IMAGE_NO_UDEV)) { + r = device_wait_for_initialization(d, "block", &device); + if (r < 0) + return r; + } else + device = sd_device_ref(d); + + for (unsigned i = 0; i < N_DEVICE_NODE_LIST_ATTEMPTS; i++) { + r = wait_for_partitions_to_appear(fd, device, num_partitions, flags, ret_enumerator); + if (r != -EAGAIN) + return r; + } + + return log_debug_errno(SYNTHETIC_ERRNO(ENXIO), + "Kernel partitions dit not appear within %d attempts", + N_DEVICE_NODE_LIST_ATTEMPTS); +} + +#endif + +int dissect_image( + int fd, + const void *root_hash, + size_t root_hash_size, + DissectImageFlags flags, + DissectedImage **ret) { + +#if HAVE_BLKID + sd_id128_t root_uuid = SD_ID128_NULL, verity_uuid = SD_ID128_NULL; + _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL; + bool is_gpt, is_mbr, generic_rw, multiple_generic = false; + _cleanup_(sd_device_unrefp) sd_device *d = NULL; + _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL; + _cleanup_(blkid_free_probep) blkid_probe b = NULL; + _cleanup_free_ char *generic_node = NULL; + sd_id128_t generic_uuid = SD_ID128_NULL; + const char *pttype = NULL; + blkid_partlist pl; + int r, generic_nr; + struct stat st; + sd_device *q; + unsigned i; + + assert(fd >= 0); + assert(ret); + assert(root_hash || root_hash_size == 0); + + /* Probes a disk image, and returns information about what it found in *ret. + * + * Returns -ENOPKG if no suitable partition table or file system could be found. + * Returns -EADDRNOTAVAIL if a root hash was specified but no matching root/verity partitions found. */ + + if (root_hash) { + /* If a root hash is supplied, then we use the root partition that has a UUID that match the first + * 128bit of the root hash. And we use the verity partition that has a UUID that match the final + * 128bit. */ + + if (root_hash_size < sizeof(sd_id128_t)) + return -EINVAL; + + memcpy(&root_uuid, root_hash, sizeof(sd_id128_t)); + memcpy(&verity_uuid, (const uint8_t*) root_hash + root_hash_size - sizeof(sd_id128_t), sizeof(sd_id128_t)); + + if (sd_id128_is_null(root_uuid)) + return -EINVAL; + if (sd_id128_is_null(verity_uuid)) + return -EINVAL; + } + + if (fstat(fd, &st) < 0) + return -errno; + + if (!S_ISBLK(st.st_mode)) + return -ENOTBLK; + + b = blkid_new_probe(); + if (!b) + return -ENOMEM; + + errno = 0; + r = blkid_probe_set_device(b, fd, 0, 0); + if (r != 0) + return -errno ?: -ENOMEM; + + if ((flags & DISSECT_IMAGE_GPT_ONLY) == 0) { + /* Look for file system superblocks, unless we only shall look for GPT partition tables */ + blkid_probe_enable_superblocks(b, 1); + blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE); + } + + blkid_probe_enable_partitions(b, 1); + blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS); + + errno = 0; + r = blkid_do_safeprobe(b); + if (IN_SET(r, -2, 1)) { + log_debug("Failed to identify any partition table."); + return -ENOPKG; + } + if (r != 0) + return -errno ?: -EIO; + + m = new0(DissectedImage, 1); + if (!m) + return -ENOMEM; + + r = sd_device_new_from_devnum(&d, 'b', st.st_rdev); + if (r < 0) + return r; + + if (!(flags & DISSECT_IMAGE_GPT_ONLY) && + (flags & DISSECT_IMAGE_REQUIRE_ROOT)) { + const char *usage = NULL; + + (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL); + if (STRPTR_IN_SET(usage, "filesystem", "crypto")) { + _cleanup_free_ char *t = NULL, *n = NULL; + const char *fstype = NULL; + + /* OK, we have found a file system, that's our root partition then. */ + (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL); + + if (fstype) { + t = strdup(fstype); + if (!t) + return -ENOMEM; + } + + r = device_path_make_major_minor(st.st_mode, st.st_rdev, &n); + if (r < 0) + return r; + + m->partitions[PARTITION_ROOT] = (DissectedPartition) { + .found = true, + .rw = true, + .partno = -1, + .architecture = _ARCHITECTURE_INVALID, + .fstype = TAKE_PTR(t), + .node = TAKE_PTR(n), + }; + + m->encrypted = streq_ptr(fstype, "crypto_LUKS"); + + r = loop_wait_for_partitions_to_appear(fd, d, 0, flags, &e); + if (r < 0) + return r; + + *ret = TAKE_PTR(m); + + return 0; + } + } + + (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL); + if (!pttype) + return -ENOPKG; + + is_gpt = streq_ptr(pttype, "gpt"); + is_mbr = streq_ptr(pttype, "dos"); + + if (!is_gpt && ((flags & DISSECT_IMAGE_GPT_ONLY) || !is_mbr)) + return -ENOPKG; + + errno = 0; + pl = blkid_probe_get_partitions(b); + if (!pl) + return -errno ?: -ENOMEM; + + r = loop_wait_for_partitions_to_appear(fd, d, blkid_partlist_numof_partitions(pl), flags, &e); + if (r < 0) + return r; + + FOREACH_DEVICE(e, q) { + unsigned long long pflags; + blkid_partition pp; + const char *node; + dev_t qn; + int nr; + + r = sd_device_get_devnum(q, &qn); + if (r < 0) + continue; + + if (st.st_rdev == qn) + continue; + + if (!device_is_block(q)) + continue; + + if (device_is_mmc_special_partition(q)) + continue; + + r = sd_device_get_devname(q, &node); + if (r < 0) + continue; + + pp = blkid_partlist_devno_to_partition(pl, qn); + if (!pp) + continue; + + pflags = blkid_partition_get_flags(pp); + + nr = blkid_partition_get_partno(pp); + if (nr < 0) + continue; + + if (is_gpt) { + int designator = _PARTITION_DESIGNATOR_INVALID, architecture = _ARCHITECTURE_INVALID; + const char *stype, *sid, *fstype = NULL; + sd_id128_t type_id, id; + bool rw = true; + + sid = blkid_partition_get_uuid(pp); + if (!sid) + continue; + if (sd_id128_from_string(sid, &id) < 0) + continue; + + stype = blkid_partition_get_type_string(pp); + if (!stype) + continue; + if (sd_id128_from_string(stype, &type_id) < 0) + continue; + + if (sd_id128_equal(type_id, GPT_HOME)) { + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + + designator = PARTITION_HOME; + rw = !(pflags & GPT_FLAG_READ_ONLY); + } else if (sd_id128_equal(type_id, GPT_SRV)) { + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + + designator = PARTITION_SRV; + rw = !(pflags & GPT_FLAG_READ_ONLY); + } else if (sd_id128_equal(type_id, GPT_ESP)) { + + /* Note that we don't check the GPT_FLAG_NO_AUTO flag for the ESP, as it is not defined + * there. We instead check the GPT_FLAG_NO_BLOCK_IO_PROTOCOL, as recommended by the + * UEFI spec (See "12.3.3 Number and Location of System Partitions"). */ + + if (pflags & GPT_FLAG_NO_BLOCK_IO_PROTOCOL) + continue; + + designator = PARTITION_ESP; + fstype = "vfat"; + } +#ifdef GPT_ROOT_NATIVE + else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) { + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + + /* If a root ID is specified, ignore everything but the root id */ + if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id)) + continue; + + designator = PARTITION_ROOT; + architecture = native_architecture(); + rw = !(pflags & GPT_FLAG_READ_ONLY); + } else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE_VERITY)) { + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + + m->can_verity = true; + + /* Ignore verity unless a root hash is specified */ + if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id)) + continue; + + designator = PARTITION_ROOT_VERITY; + fstype = "DM_verity_hash"; + architecture = native_architecture(); + rw = false; + } +#endif +#ifdef GPT_ROOT_SECONDARY + else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) { + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + + /* If a root ID is specified, ignore everything but the root id */ + if (!sd_id128_is_null(root_uuid) && !sd_id128_equal(root_uuid, id)) + continue; + + designator = PARTITION_ROOT_SECONDARY; + architecture = SECONDARY_ARCHITECTURE; + rw = !(pflags & GPT_FLAG_READ_ONLY); + } else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY_VERITY)) { + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + + m->can_verity = true; + + /* Ignore verity unless root has is specified */ + if (sd_id128_is_null(verity_uuid) || !sd_id128_equal(verity_uuid, id)) + continue; + + designator = PARTITION_ROOT_SECONDARY_VERITY; + fstype = "DM_verity_hash"; + architecture = SECONDARY_ARCHITECTURE; + rw = false; + } +#endif + else if (sd_id128_equal(type_id, GPT_SWAP)) { + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + + designator = PARTITION_SWAP; + fstype = "swap"; + } else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) { + + if (pflags & GPT_FLAG_NO_AUTO) + continue; + + if (generic_node) + multiple_generic = true; + else { + generic_nr = nr; + generic_rw = !(pflags & GPT_FLAG_READ_ONLY); + generic_uuid = id; + generic_node = strdup(node); + if (!generic_node) + return -ENOMEM; + } + } + + if (designator != _PARTITION_DESIGNATOR_INVALID) { + _cleanup_free_ char *t = NULL, *n = NULL; + + /* First one wins */ + if (m->partitions[designator].found) + continue; + + if (fstype) { + t = strdup(fstype); + if (!t) + return -ENOMEM; + } + + n = strdup(node); + if (!n) + return -ENOMEM; + + m->partitions[designator] = (DissectedPartition) { + .found = true, + .partno = nr, + .rw = rw, + .architecture = architecture, + .node = TAKE_PTR(n), + .fstype = TAKE_PTR(t), + .uuid = id, + }; + } + + } else if (is_mbr) { + + if (pflags != 0x80) /* Bootable flag */ + continue; + + if (blkid_partition_get_type(pp) != 0x83) /* Linux partition */ + continue; + + if (generic_node) + multiple_generic = true; + else { + generic_nr = nr; + generic_rw = true; + generic_node = strdup(node); + if (!generic_node) + return -ENOMEM; + } + } + } + + if (!m->partitions[PARTITION_ROOT].found) { + /* No root partition found? Then let's see if ther's one for the secondary architecture. And if not + * either, then check if there's a single generic one, and use that. */ + + if (m->partitions[PARTITION_ROOT_VERITY].found) + return -EADDRNOTAVAIL; + + if (m->partitions[PARTITION_ROOT_SECONDARY].found) { + m->partitions[PARTITION_ROOT] = m->partitions[PARTITION_ROOT_SECONDARY]; + zero(m->partitions[PARTITION_ROOT_SECONDARY]); + + m->partitions[PARTITION_ROOT_VERITY] = m->partitions[PARTITION_ROOT_SECONDARY_VERITY]; + zero(m->partitions[PARTITION_ROOT_SECONDARY_VERITY]); + + } else if (flags & DISSECT_IMAGE_REQUIRE_ROOT) { + + /* If the root has was set, then we won't fallback to a generic node, because the root hash + * decides */ + if (root_hash) + return -EADDRNOTAVAIL; + + /* If we didn't find a generic node, then we can't fix this up either */ + if (!generic_node) + return -ENXIO; + + /* If we didn't find a properly marked root partition, but we did find a single suitable + * generic Linux partition, then use this as root partition, if the caller asked for it. */ + if (multiple_generic) + return -ENOTUNIQ; + + m->partitions[PARTITION_ROOT] = (DissectedPartition) { + .found = true, + .rw = generic_rw, + .partno = generic_nr, + .architecture = _ARCHITECTURE_INVALID, + .node = TAKE_PTR(generic_node), + .uuid = generic_uuid, + }; + } + } + + if (root_hash) { + if (!m->partitions[PARTITION_ROOT_VERITY].found || !m->partitions[PARTITION_ROOT].found) + return -EADDRNOTAVAIL; + + /* If we found the primary root with the hash, then we definitely want to suppress any secondary root + * (which would be weird, after all the root hash should only be assigned to one pair of + * partitions... */ + m->partitions[PARTITION_ROOT_SECONDARY].found = false; + m->partitions[PARTITION_ROOT_SECONDARY_VERITY].found = false; + + /* If we found a verity setup, then the root partition is necessarily read-only. */ + m->partitions[PARTITION_ROOT].rw = false; + + m->verity = true; + } + + blkid_free_probe(b); + b = NULL; + + /* Fill in file system types if we don't know them yet. */ + for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) { + DissectedPartition *p = m->partitions + i; + + if (!p->found) + continue; + + if (!p->fstype && p->node) { + r = probe_filesystem(p->node, &p->fstype); + if (r < 0 && r != -EUCLEAN) + return r; + } + + if (streq_ptr(p->fstype, "crypto_LUKS")) + m->encrypted = true; + + if (p->fstype && fstype_is_ro(p->fstype)) + p->rw = false; + } + + *ret = TAKE_PTR(m); + + return 0; +#else + return -EOPNOTSUPP; +#endif +} + +DissectedImage* dissected_image_unref(DissectedImage *m) { + unsigned i; + + if (!m) + return NULL; + + for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) { + free(m->partitions[i].fstype); + free(m->partitions[i].node); + free(m->partitions[i].decrypted_fstype); + free(m->partitions[i].decrypted_node); + } + + free(m->hostname); + strv_free(m->machine_info); + strv_free(m->os_release); + + return mfree(m); +} + +static int is_loop_device(const char *path) { + char s[SYS_BLOCK_PATH_MAX("/../loop/")]; + struct stat st; + + assert(path); + + if (stat(path, &st) < 0) + return -errno; + + if (!S_ISBLK(st.st_mode)) + return -ENOTBLK; + + xsprintf_sys_block_path(s, "/loop/", st.st_dev); + if (access(s, F_OK) < 0) { + if (errno != ENOENT) + return -errno; + + /* The device itself isn't a loop device, but maybe it's a partition and its parent is? */ + xsprintf_sys_block_path(s, "/../loop/", st.st_dev); + if (access(s, F_OK) < 0) + return errno == ENOENT ? false : -errno; + } + + return true; +} + +static int mount_partition( + DissectedPartition *m, + const char *where, + const char *directory, + uid_t uid_shift, + DissectImageFlags flags) { + + _cleanup_free_ char *chased = NULL, *options = NULL; + const char *p, *node, *fstype; + bool rw; + int r; + + assert(m); + assert(where); + + node = m->decrypted_node ?: m->node; + fstype = m->decrypted_fstype ?: m->fstype; + + if (!m->found || !node || !fstype) + return 0; + + /* Stacked encryption? Yuck */ + if (streq_ptr(fstype, "crypto_LUKS")) + return -ELOOP; + + rw = m->rw && !(flags & DISSECT_IMAGE_READ_ONLY); + + if (directory) { + r = chase_symlinks(directory, where, CHASE_PREFIX_ROOT, &chased); + if (r < 0) + return r; + + p = chased; + } else + p = where; + + /* If requested, turn on discard support. */ + if (fstype_can_discard(fstype) && + ((flags & DISSECT_IMAGE_DISCARD) || + ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node)))) { + options = strdup("discard"); + if (!options) + return -ENOMEM; + } + + if (uid_is_valid(uid_shift) && uid_shift != 0 && fstype_can_uid_gid(fstype)) { + _cleanup_free_ char *uid_option = NULL; + + if (asprintf(&uid_option, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0) + return -ENOMEM; + + if (!strextend_with_separator(&options, ",", uid_option, NULL)) + return -ENOMEM; + } + + return mount_verbose(LOG_DEBUG, node, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options); +} + +int dissected_image_mount(DissectedImage *m, const char *where, uid_t uid_shift, DissectImageFlags flags) { + int r; + + assert(m); + assert(where); + + if (!m->partitions[PARTITION_ROOT].found) + return -ENXIO; + + if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0) { + r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, uid_shift, flags); + if (r < 0) + return r; + + if (flags & DISSECT_IMAGE_VALIDATE_OS) { + r = path_is_os_tree(where); + if (r < 0) + return r; + if (r == 0) + return -EMEDIUMTYPE; + } + } + + if (flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY) + return 0; + + r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", uid_shift, flags); + if (r < 0) + return r; + + r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", uid_shift, flags); + if (r < 0) + return r; + + if (m->partitions[PARTITION_ESP].found) { + const char *mp; + + /* Mount the ESP to /efi if it exists and is empty. If it doesn't exist, use /boot instead. */ + + FOREACH_STRING(mp, "/efi", "/boot") { + _cleanup_free_ char *p = NULL; + + r = chase_symlinks(mp, where, CHASE_PREFIX_ROOT, &p); + if (r < 0) + continue; + + r = dir_is_empty(p); + if (r > 0) { + r = mount_partition(m->partitions + PARTITION_ESP, where, mp, uid_shift, flags); + if (r < 0) + return r; + } + } + } + + return 0; +} + +#if HAVE_LIBCRYPTSETUP +typedef struct DecryptedPartition { + struct crypt_device *device; + char *name; + bool relinquished; +} DecryptedPartition; + +struct DecryptedImage { + DecryptedPartition *decrypted; + size_t n_decrypted; + size_t n_allocated; +}; +#endif + +DecryptedImage* decrypted_image_unref(DecryptedImage* d) { +#if HAVE_LIBCRYPTSETUP + size_t i; + int r; + + if (!d) + return NULL; + + for (i = 0; i < d->n_decrypted; i++) { + DecryptedPartition *p = d->decrypted + i; + + if (p->device && p->name && !p->relinquished) { + r = crypt_deactivate(p->device, p->name); + if (r < 0) + log_debug_errno(r, "Failed to deactivate encrypted partition %s", p->name); + } + + if (p->device) + crypt_free(p->device); + free(p->name); + } + + free(d); +#endif + return NULL; +} + +#if HAVE_LIBCRYPTSETUP + +static int make_dm_name_and_node(const void *original_node, const char *suffix, char **ret_name, char **ret_node) { + _cleanup_free_ char *name = NULL, *node = NULL; + const char *base; + + assert(original_node); + assert(suffix); + assert(ret_name); + assert(ret_node); + + base = strrchr(original_node, '/'); + if (!base) + return -EINVAL; + base++; + if (isempty(base)) + return -EINVAL; + + name = strjoin(base, suffix); + if (!name) + return -ENOMEM; + if (!filename_is_valid(name)) + return -EINVAL; + + node = strjoin(crypt_get_dir(), "/", name); + if (!node) + return -ENOMEM; + + *ret_name = TAKE_PTR(name); + *ret_node = TAKE_PTR(node); + + return 0; +} + +static int decrypt_partition( + DissectedPartition *m, + const char *passphrase, + DissectImageFlags flags, + DecryptedImage *d) { + + _cleanup_free_ char *node = NULL, *name = NULL; + _cleanup_(crypt_freep) struct crypt_device *cd = NULL; + int r; + + assert(m); + assert(d); + + if (!m->found || !m->node || !m->fstype) + return 0; + + if (!streq(m->fstype, "crypto_LUKS")) + return 0; + + if (!passphrase) + return -ENOKEY; + + r = make_dm_name_and_node(m->node, "-decrypted", &name, &node); + if (r < 0) + return r; + + if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1)) + return -ENOMEM; + + r = crypt_init(&cd, m->node); + if (r < 0) + return log_debug_errno(r, "Failed to initialize dm-crypt: %m"); + + r = crypt_load(cd, CRYPT_LUKS, NULL); + if (r < 0) + return log_debug_errno(r, "Failed to load LUKS metadata: %m"); + + r = crypt_activate_by_passphrase(cd, name, CRYPT_ANY_SLOT, passphrase, strlen(passphrase), + ((flags & DISSECT_IMAGE_READ_ONLY) ? CRYPT_ACTIVATE_READONLY : 0) | + ((flags & DISSECT_IMAGE_DISCARD_ON_CRYPTO) ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0)); + if (r < 0) { + log_debug_errno(r, "Failed to activate LUKS device: %m"); + return r == -EPERM ? -EKEYREJECTED : r; + } + + d->decrypted[d->n_decrypted].name = TAKE_PTR(name); + d->decrypted[d->n_decrypted].device = TAKE_PTR(cd); + d->n_decrypted++; + + m->decrypted_node = TAKE_PTR(node); + + return 0; +} + +static int verity_partition( + DissectedPartition *m, + DissectedPartition *v, + const void *root_hash, + size_t root_hash_size, + DissectImageFlags flags, + DecryptedImage *d) { + + _cleanup_free_ char *node = NULL, *name = NULL; + _cleanup_(crypt_freep) struct crypt_device *cd = NULL; + int r; + + assert(m); + assert(v); + + if (!root_hash) + return 0; + + if (!m->found || !m->node || !m->fstype) + return 0; + if (!v->found || !v->node || !v->fstype) + return 0; + + if (!streq(v->fstype, "DM_verity_hash")) + return 0; + + r = make_dm_name_and_node(m->node, "-verity", &name, &node); + if (r < 0) + return r; + + if (!GREEDY_REALLOC0(d->decrypted, d->n_allocated, d->n_decrypted + 1)) + return -ENOMEM; + + r = crypt_init(&cd, v->node); + if (r < 0) + return r; + + r = crypt_load(cd, CRYPT_VERITY, NULL); + if (r < 0) + return r; + + r = crypt_set_data_device(cd, m->node); + if (r < 0) + return r; + + r = crypt_activate_by_volume_key(cd, name, root_hash, root_hash_size, CRYPT_ACTIVATE_READONLY); + if (r < 0) + return r; + + d->decrypted[d->n_decrypted].name = TAKE_PTR(name); + d->decrypted[d->n_decrypted].device = TAKE_PTR(cd); + d->n_decrypted++; + + m->decrypted_node = TAKE_PTR(node); + + return 0; +} +#endif + +int dissected_image_decrypt( + DissectedImage *m, + const char *passphrase, + const void *root_hash, + size_t root_hash_size, + DissectImageFlags flags, + DecryptedImage **ret) { + +#if HAVE_LIBCRYPTSETUP + _cleanup_(decrypted_image_unrefp) DecryptedImage *d = NULL; + unsigned i; + int r; +#endif + + assert(m); + assert(root_hash || root_hash_size == 0); + + /* Returns: + * + * = 0 → There was nothing to decrypt + * > 0 → Decrypted successfully + * -ENOKEY → There's something to decrypt but no key was supplied + * -EKEYREJECTED → Passed key was not correct + */ + + if (root_hash && root_hash_size < sizeof(sd_id128_t)) + return -EINVAL; + + if (!m->encrypted && !m->verity) { + *ret = NULL; + return 0; + } + +#if HAVE_LIBCRYPTSETUP + d = new0(DecryptedImage, 1); + if (!d) + return -ENOMEM; + + for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) { + DissectedPartition *p = m->partitions + i; + int k; + + if (!p->found) + continue; + + r = decrypt_partition(p, passphrase, flags, d); + if (r < 0) + return r; + + k = PARTITION_VERITY_OF(i); + if (k >= 0) { + r = verity_partition(p, m->partitions + k, root_hash, root_hash_size, flags, d); + if (r < 0) + return r; + } + + if (!p->decrypted_fstype && p->decrypted_node) { + r = probe_filesystem(p->decrypted_node, &p->decrypted_fstype); + if (r < 0 && r != -EUCLEAN) + return r; + } + } + + *ret = TAKE_PTR(d); + + return 1; +#else + return -EOPNOTSUPP; +#endif +} + +int dissected_image_decrypt_interactively( + DissectedImage *m, + const char *passphrase, + const void *root_hash, + size_t root_hash_size, + DissectImageFlags flags, + DecryptedImage **ret) { + + _cleanup_strv_free_erase_ char **z = NULL; + int n = 3, r; + + if (passphrase) + n--; + + for (;;) { + r = dissected_image_decrypt(m, passphrase, root_hash, root_hash_size, flags, ret); + if (r >= 0) + return r; + if (r == -EKEYREJECTED) + log_error_errno(r, "Incorrect passphrase, try again!"); + else if (r != -ENOKEY) + return log_error_errno(r, "Failed to decrypt image: %m"); + + if (--n < 0) + return log_error_errno(SYNTHETIC_ERRNO(EKEYREJECTED), + "Too many retries."); + + z = strv_free(z); + + r = ask_password_auto("Please enter image passphrase:", NULL, "dissect", "dissect", USEC_INFINITY, 0, &z); + if (r < 0) + return log_error_errno(r, "Failed to query for passphrase: %m"); + + passphrase = z[0]; + } +} + +#if HAVE_LIBCRYPTSETUP +static int deferred_remove(DecryptedPartition *p) { + struct dm_ioctl dm = { + .version = { + DM_VERSION_MAJOR, + DM_VERSION_MINOR, + DM_VERSION_PATCHLEVEL + }, + .data_size = sizeof(dm), + .flags = DM_DEFERRED_REMOVE, + }; + + _cleanup_close_ int fd = -1; + + assert(p); + + /* Unfortunately, libcryptsetup doesn't provide a proper API for this, hence call the ioctl() directly. */ + + fd = open("/dev/mapper/control", O_RDWR|O_CLOEXEC); + if (fd < 0) + return -errno; + + if (strlen(p->name) > sizeof(dm.name)) + return -ENAMETOOLONG; + + strncpy(dm.name, p->name, sizeof(dm.name)); + + if (ioctl(fd, DM_DEV_REMOVE, &dm)) + return -errno; + + return 0; +} +#endif + +int decrypted_image_relinquish(DecryptedImage *d) { + +#if HAVE_LIBCRYPTSETUP + size_t i; + int r; +#endif + + assert(d); + + /* Turns on automatic removal after the last use ended for all DM devices of this image, and sets a boolean so + * that we don't clean it up ourselves either anymore */ + +#if HAVE_LIBCRYPTSETUP + for (i = 0; i < d->n_decrypted; i++) { + DecryptedPartition *p = d->decrypted + i; + + if (p->relinquished) + continue; + + r = deferred_remove(p); + if (r < 0) + return log_debug_errno(r, "Failed to mark %s for auto-removal: %m", p->name); + + p->relinquished = true; + } +#endif + + return 0; +} + +int root_hash_load(const char *image, void **ret, size_t *ret_size) { + _cleanup_free_ char *text = NULL; + _cleanup_free_ void *k = NULL; + size_t l; + int r; + + assert(image); + assert(ret); + assert(ret_size); + + if (is_device_path(image)) { + /* If we are asked to load the root hash for a device node, exit early */ + *ret = NULL; + *ret_size = 0; + return 0; + } + + r = getxattr_malloc(image, "user.verity.roothash", &text, true); + if (r < 0) { + char *fn, *e, *n; + + if (!IN_SET(r, -ENODATA, -EOPNOTSUPP, -ENOENT)) + return r; + + fn = newa(char, strlen(image) + STRLEN(".roothash") + 1); + n = stpcpy(fn, image); + e = endswith(fn, ".raw"); + if (e) + n = e; + + strcpy(n, ".roothash"); + + r = read_one_line_file(fn, &text); + if (r == -ENOENT) { + *ret = NULL; + *ret_size = 0; + return 0; + } + if (r < 0) + return r; + } + + r = unhexmem(text, strlen(text), &k, &l); + if (r < 0) + return r; + if (l < sizeof(sd_id128_t)) + return -EINVAL; + + *ret = TAKE_PTR(k); + *ret_size = l; + + return 1; +} + +int dissected_image_acquire_metadata(DissectedImage *m) { + + enum { + META_HOSTNAME, + META_MACHINE_ID, + META_MACHINE_INFO, + META_OS_RELEASE, + _META_MAX, + }; + + static const char *const paths[_META_MAX] = { + [META_HOSTNAME] = "/etc/hostname\0", + [META_MACHINE_ID] = "/etc/machine-id\0", + [META_MACHINE_INFO] = "/etc/machine-info\0", + [META_OS_RELEASE] = "/etc/os-release\0/usr/lib/os-release\0", + }; + + _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL; + _cleanup_(rmdir_and_freep) char *t = NULL; + _cleanup_(sigkill_waitp) pid_t child = 0; + sd_id128_t machine_id = SD_ID128_NULL; + _cleanup_free_ char *hostname = NULL; + unsigned n_meta_initialized = 0, k; + int fds[2 * _META_MAX], r; + + BLOCK_SIGNALS(SIGCHLD); + + assert(m); + + for (; n_meta_initialized < _META_MAX; n_meta_initialized ++) + if (pipe2(fds + 2*n_meta_initialized, O_CLOEXEC) < 0) { + r = -errno; + goto finish; + } + + r = mkdtemp_malloc("/tmp/dissect-XXXXXX", &t); + if (r < 0) + goto finish; + + r = safe_fork("(sd-dissect)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE, &child); + if (r < 0) + goto finish; + if (r == 0) { + r = dissected_image_mount(m, t, UID_INVALID, DISSECT_IMAGE_READ_ONLY|DISSECT_IMAGE_MOUNT_ROOT_ONLY|DISSECT_IMAGE_VALIDATE_OS); + if (r < 0) { + log_debug_errno(r, "Failed to mount dissected image: %m"); + _exit(EXIT_FAILURE); + } + + for (k = 0; k < _META_MAX; k++) { + _cleanup_close_ int fd = -1; + const char *p; + + fds[2*k] = safe_close(fds[2*k]); + + NULSTR_FOREACH(p, paths[k]) { + fd = chase_symlinks_and_open(p, t, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL); + if (fd >= 0) + break; + } + if (fd < 0) { + log_debug_errno(fd, "Failed to read %s file of image, ignoring: %m", paths[k]); + continue; + } + + r = copy_bytes(fd, fds[2*k+1], (uint64_t) -1, 0); + if (r < 0) + _exit(EXIT_FAILURE); + + fds[2*k+1] = safe_close(fds[2*k+1]); + } + + _exit(EXIT_SUCCESS); + } + + for (k = 0; k < _META_MAX; k++) { + _cleanup_fclose_ FILE *f = NULL; + + fds[2*k+1] = safe_close(fds[2*k+1]); + + f = fdopen(fds[2*k], "r"); + if (!f) { + r = -errno; + goto finish; + } + + fds[2*k] = -1; + + switch (k) { + + case META_HOSTNAME: + r = read_etc_hostname_stream(f, &hostname); + if (r < 0) + log_debug_errno(r, "Failed to read /etc/hostname: %m"); + + break; + + case META_MACHINE_ID: { + _cleanup_free_ char *line = NULL; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + log_debug_errno(r, "Failed to read /etc/machine-id: %m"); + else if (r == 33) { + r = sd_id128_from_string(line, &machine_id); + if (r < 0) + log_debug_errno(r, "Image contains invalid /etc/machine-id: %s", line); + } else if (r == 0) + log_debug("/etc/machine-id file is empty."); + else + log_debug("/etc/machine-id has unexpected length %i.", r); + + break; + } + + case META_MACHINE_INFO: + r = load_env_file_pairs(f, "machine-info", &machine_info); + if (r < 0) + log_debug_errno(r, "Failed to read /etc/machine-info: %m"); + + break; + + case META_OS_RELEASE: + r = load_env_file_pairs(f, "os-release", &os_release); + if (r < 0) + log_debug_errno(r, "Failed to read OS release file: %m"); + + break; + } + } + + r = wait_for_terminate_and_check("(sd-dissect)", child, 0); + child = 0; + if (r < 0) + goto finish; + if (r != EXIT_SUCCESS) + return -EPROTO; + + free_and_replace(m->hostname, hostname); + m->machine_id = machine_id; + strv_free_and_replace(m->machine_info, machine_info); + strv_free_and_replace(m->os_release, os_release); + +finish: + for (k = 0; k < n_meta_initialized; k++) + safe_close_pair(fds + 2*k); + + return r; +} + +int dissect_image_and_warn( + int fd, + const char *name, + const void *root_hash, + size_t root_hash_size, + DissectImageFlags flags, + DissectedImage **ret) { + + _cleanup_free_ char *buffer = NULL; + int r; + + if (!name) { + r = fd_get_path(fd, &buffer); + if (r < 0) + return r; + + name = buffer; + } + + r = dissect_image(fd, root_hash, root_hash_size, flags, ret); + + switch (r) { + + case -EOPNOTSUPP: + return log_error_errno(r, "Dissecting images is not supported, compiled without blkid support."); + + case -ENOPKG: + return log_error_errno(r, "Couldn't identify a suitable partition table or file system in '%s'.", name); + + case -EADDRNOTAVAIL: + return log_error_errno(r, "No root partition for specified root hash found in '%s'.", name); + + case -ENOTUNIQ: + return log_error_errno(r, "Multiple suitable root partitions found in image '%s'.", name); + + case -ENXIO: + return log_error_errno(r, "No suitable root partition found in image '%s'.", name); + + case -EPROTONOSUPPORT: + return log_error_errno(r, "Device '%s' is loopback block device with partition scanning turned off, please turn it on.", name); + + default: + if (r < 0) + return log_error_errno(r, "Failed to dissect image '%s': %m", name); + + return r; + } +} + +static const char *const partition_designator_table[] = { + [PARTITION_ROOT] = "root", + [PARTITION_ROOT_SECONDARY] = "root-secondary", + [PARTITION_HOME] = "home", + [PARTITION_SRV] = "srv", + [PARTITION_ESP] = "esp", + [PARTITION_SWAP] = "swap", + [PARTITION_ROOT_VERITY] = "root-verity", + [PARTITION_ROOT_SECONDARY_VERITY] = "root-secondary-verity", +}; + +DEFINE_STRING_TABLE_LOOKUP(partition_designator, int); diff --git a/src/shared/dissect-image.h b/src/shared/dissect-image.h new file mode 100644 index 0000000..f50b40e --- /dev/null +++ b/src/shared/dissect-image.h @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +#include "sd-id128.h" + +#include "macro.h" + +typedef struct DissectedImage DissectedImage; +typedef struct DissectedPartition DissectedPartition; +typedef struct DecryptedImage DecryptedImage; + +struct DissectedPartition { + bool found:1; + bool rw:1; + int partno; /* -1 if there was no partition and the images contains a file system directly */ + int architecture; /* Intended architecture: either native, secondary or unset (-1). */ + sd_id128_t uuid; /* Partition entry UUID as reported by the GPT */ + char *fstype; + char *node; + char *decrypted_node; + char *decrypted_fstype; +}; + +enum { + PARTITION_ROOT, + PARTITION_ROOT_SECONDARY, /* Secondary architecture */ + PARTITION_HOME, + PARTITION_SRV, + PARTITION_ESP, + PARTITION_SWAP, + PARTITION_ROOT_VERITY, /* verity data for the PARTITION_ROOT partition */ + PARTITION_ROOT_SECONDARY_VERITY, /* verity data for the PARTITION_ROOT_SECONDARY partition */ + _PARTITION_DESIGNATOR_MAX, + _PARTITION_DESIGNATOR_INVALID = -1 +}; + +static inline int PARTITION_VERITY_OF(int p) { + if (p == PARTITION_ROOT) + return PARTITION_ROOT_VERITY; + if (p == PARTITION_ROOT_SECONDARY) + return PARTITION_ROOT_SECONDARY_VERITY; + return _PARTITION_DESIGNATOR_INVALID; +} + +typedef enum DissectImageFlags { + DISSECT_IMAGE_READ_ONLY = 1 << 0, + DISSECT_IMAGE_DISCARD_ON_LOOP = 1 << 1, /* Turn on "discard" if on a loop device and file system supports it */ + DISSECT_IMAGE_DISCARD = 1 << 2, /* Turn on "discard" if file system supports it, on all block devices */ + DISSECT_IMAGE_DISCARD_ON_CRYPTO = 1 << 3, /* Turn on "discard" also on crypto devices */ + DISSECT_IMAGE_DISCARD_ANY = DISSECT_IMAGE_DISCARD_ON_LOOP | + DISSECT_IMAGE_DISCARD | + DISSECT_IMAGE_DISCARD_ON_CRYPTO, + DISSECT_IMAGE_GPT_ONLY = 1 << 4, /* Only recognize images with GPT partition tables */ + DISSECT_IMAGE_REQUIRE_ROOT = 1 << 5, /* Don't accept disks without root partition */ + DISSECT_IMAGE_MOUNT_ROOT_ONLY = 1 << 6, /* Mount only the root partition */ + DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY = 1 << 7, /* Mount only non-root partitions */ + DISSECT_IMAGE_VALIDATE_OS = 1 << 8, /* Refuse mounting images that aren't identifyable as OS images */ + DISSECT_IMAGE_NO_UDEV = 1 << 9, /* Don't wait for udev initializing things */ +} DissectImageFlags; + +struct DissectedImage { + bool encrypted:1; + bool verity:1; /* verity available and usable */ + bool can_verity:1; /* verity available, but not necessarily used */ + + DissectedPartition partitions[_PARTITION_DESIGNATOR_MAX]; + + char *hostname; + sd_id128_t machine_id; + char **machine_info; + char **os_release; +}; + +int probe_filesystem(const char *node, char **ret_fstype); +int dissect_image(int fd, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DissectedImage **ret); +int dissect_image_and_warn(int fd, const char *name, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DissectedImage **ret); + +DissectedImage* dissected_image_unref(DissectedImage *m); +DEFINE_TRIVIAL_CLEANUP_FUNC(DissectedImage*, dissected_image_unref); + +int dissected_image_decrypt(DissectedImage *m, const char *passphrase, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DecryptedImage **ret); +int dissected_image_decrypt_interactively(DissectedImage *m, const char *passphrase, const void *root_hash, size_t root_hash_size, DissectImageFlags flags, DecryptedImage **ret); +int dissected_image_mount(DissectedImage *m, const char *dest, uid_t uid_shift, DissectImageFlags flags); + +int dissected_image_acquire_metadata(DissectedImage *m); + +DecryptedImage* decrypted_image_unref(DecryptedImage *p); +DEFINE_TRIVIAL_CLEANUP_FUNC(DecryptedImage*, decrypted_image_unref); +int decrypted_image_relinquish(DecryptedImage *d); + +const char* partition_designator_to_string(int i) _const_; +int partition_designator_from_string(const char *name) _pure_; + +int root_hash_load(const char *image, void **ret, size_t *ret_size); diff --git a/src/shared/dns-domain.c b/src/shared/dns-domain.c new file mode 100644 index 0000000..4b31cb3 --- /dev/null +++ b/src/shared/dns-domain.c @@ -0,0 +1,1375 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#if HAVE_LIBIDN2 +# include <idn2.h> +#elif HAVE_LIBIDN +# include <idna.h> +# include <stringprep.h> +#endif + +#include <endian.h> +#include <netinet/in.h> +#include <stdio.h> +#include <string.h> +#include <sys/socket.h> + +#include "alloc-util.h" +#include "dns-domain.h" +#include "hashmap.h" +#include "hexdecoct.h" +#include "hostname-util.h" +#include "in-addr-util.h" +#include "macro.h" +#include "parse-util.h" +#include "string-util.h" +#include "strv.h" +#include "utf8.h" + +int dns_label_unescape(const char **name, char *dest, size_t sz, DNSLabelFlags flags) { + const char *n; + char *d, last_char = 0; + int r = 0; + + assert(name); + assert(*name); + + n = *name; + d = dest; + + for (;;) { + if (*n == 0 || *n == '.') { + if (FLAGS_SET(flags, DNS_LABEL_LDH) && last_char == '-') + /* Trailing dash */ + return -EINVAL; + + if (*n == '.') + n++; + break; + } + + if (r >= DNS_LABEL_MAX) + return -EINVAL; + + if (sz <= 0) + return -ENOBUFS; + + if (*n == '\\') { + /* Escaped character */ + if (FLAGS_SET(flags, DNS_LABEL_NO_ESCAPES)) + return -EINVAL; + + n++; + + if (*n == 0) + /* Ending NUL */ + return -EINVAL; + + else if (IN_SET(*n, '\\', '.')) { + /* Escaped backslash or dot */ + + if (FLAGS_SET(flags, DNS_LABEL_LDH)) + return -EINVAL; + + last_char = *n; + if (d) + *(d++) = *n; + sz--; + r++; + n++; + + } else if (n[0] >= '0' && n[0] <= '9') { + unsigned k; + + /* Escaped literal ASCII character */ + + if (!(n[1] >= '0' && n[1] <= '9') || + !(n[2] >= '0' && n[2] <= '9')) + return -EINVAL; + + k = ((unsigned) (n[0] - '0') * 100) + + ((unsigned) (n[1] - '0') * 10) + + ((unsigned) (n[2] - '0')); + + /* Don't allow anything that doesn't + * fit in 8bit. Note that we do allow + * control characters, as some servers + * (e.g. cloudflare) are happy to + * generate labels with them + * inside. */ + if (k > 255) + return -EINVAL; + + if (FLAGS_SET(flags, DNS_LABEL_LDH) && + !valid_ldh_char((char) k)) + return -EINVAL; + + last_char = (char) k; + if (d) + *(d++) = (char) k; + sz--; + r++; + + n += 3; + } else + return -EINVAL; + + } else if ((uint8_t) *n >= (uint8_t) ' ' && *n != 127) { + + /* Normal character */ + + if (FLAGS_SET(flags, DNS_LABEL_LDH)) { + if (!valid_ldh_char(*n)) + return -EINVAL; + if (r == 0 && *n == '-') + /* Leading dash */ + return -EINVAL; + } + + last_char = *n; + if (d) + *(d++) = *n; + sz--; + r++; + n++; + } else + return -EINVAL; + } + + /* Empty label that is not at the end? */ + if (r == 0 && *n) + return -EINVAL; + + /* More than one trailing dot? */ + if (*n == '.') + return -EINVAL; + + if (sz >= 1 && d) + *d = 0; + + *name = n; + return r; +} + +/* @label_terminal: terminal character of a label, updated to point to the terminal character of + * the previous label (always skipping one dot) or to NULL if there are no more + * labels. */ +int dns_label_unescape_suffix(const char *name, const char **label_terminal, char *dest, size_t sz) { + const char *terminal; + int r; + + assert(name); + assert(label_terminal); + assert(dest); + + /* no more labels */ + if (!*label_terminal) { + if (sz >= 1) + *dest = 0; + + return 0; + } + + terminal = *label_terminal; + assert(IN_SET(*terminal, 0, '.')); + + /* Skip current terminal character (and accept domain names ending it ".") */ + if (*terminal == 0) + terminal--; + if (terminal >= name && *terminal == '.') + terminal--; + + /* Point name to the last label, and terminal to the preceding terminal symbol (or make it a NULL pointer) */ + for (;;) { + if (terminal < name) { + /* Reached the first label, so indicate that there are no more */ + terminal = NULL; + break; + } + + /* Find the start of the last label */ + if (*terminal == '.') { + const char *y; + unsigned slashes = 0; + + for (y = terminal - 1; y >= name && *y == '\\'; y--) + slashes++; + + if (slashes % 2 == 0) { + /* The '.' was not escaped */ + name = terminal + 1; + break; + } else { + terminal = y; + continue; + } + } + + terminal--; + } + + r = dns_label_unescape(&name, dest, sz, 0); + if (r < 0) + return r; + + *label_terminal = terminal; + + return r; +} + +int dns_label_escape(const char *p, size_t l, char *dest, size_t sz) { + char *q; + + /* DNS labels must be between 1 and 63 characters long. A + * zero-length label does not exist. See RFC 2182, Section + * 11. */ + + if (l <= 0 || l > DNS_LABEL_MAX) + return -EINVAL; + if (sz < 1) + return -ENOBUFS; + + assert(p); + assert(dest); + + q = dest; + while (l > 0) { + + if (IN_SET(*p, '.', '\\')) { + + /* Dot or backslash */ + + if (sz < 3) + return -ENOBUFS; + + *(q++) = '\\'; + *(q++) = *p; + + sz -= 2; + + } else if (IN_SET(*p, '_', '-') || + (*p >= '0' && *p <= '9') || + (*p >= 'a' && *p <= 'z') || + (*p >= 'A' && *p <= 'Z')) { + + /* Proper character */ + + if (sz < 2) + return -ENOBUFS; + + *(q++) = *p; + sz -= 1; + + } else { + + /* Everything else */ + + if (sz < 5) + return -ENOBUFS; + + *(q++) = '\\'; + *(q++) = '0' + (char) ((uint8_t) *p / 100); + *(q++) = '0' + (char) (((uint8_t) *p / 10) % 10); + *(q++) = '0' + (char) ((uint8_t) *p % 10); + + sz -= 4; + } + + p++; + l--; + } + + *q = 0; + return (int) (q - dest); +} + +int dns_label_escape_new(const char *p, size_t l, char **ret) { + _cleanup_free_ char *s = NULL; + int r; + + assert(p); + assert(ret); + + if (l <= 0 || l > DNS_LABEL_MAX) + return -EINVAL; + + s = new(char, DNS_LABEL_ESCAPED_MAX); + if (!s) + return -ENOMEM; + + r = dns_label_escape(p, l, s, DNS_LABEL_ESCAPED_MAX); + if (r < 0) + return r; + + *ret = TAKE_PTR(s); + + return r; +} + +#if HAVE_LIBIDN +int dns_label_apply_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max) { + _cleanup_free_ uint32_t *input = NULL; + size_t input_size, l; + const char *p; + bool contains_8bit = false; + char buffer[DNS_LABEL_MAX+1]; + + assert(encoded); + assert(decoded); + + /* Converts an U-label into an A-label */ + + if (encoded_size <= 0) + return -EINVAL; + + for (p = encoded; p < encoded + encoded_size; p++) + if ((uint8_t) *p > 127) + contains_8bit = true; + + if (!contains_8bit) { + if (encoded_size > DNS_LABEL_MAX) + return -EINVAL; + + return 0; + } + + input = stringprep_utf8_to_ucs4(encoded, encoded_size, &input_size); + if (!input) + return -ENOMEM; + + if (idna_to_ascii_4i(input, input_size, buffer, 0) != 0) + return -EINVAL; + + l = strlen(buffer); + + /* Verify that the result is not longer than one DNS label. */ + if (l <= 0 || l > DNS_LABEL_MAX) + return -EINVAL; + if (l > decoded_max) + return -ENOBUFS; + + memcpy(decoded, buffer, l); + + /* If there's room, append a trailing NUL byte, but only then */ + if (decoded_max > l) + decoded[l] = 0; + + return (int) l; +} + +int dns_label_undo_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max) { + size_t input_size, output_size; + _cleanup_free_ uint32_t *input = NULL; + _cleanup_free_ char *result = NULL; + uint32_t *output = NULL; + size_t w; + + /* To be invoked after unescaping. Converts an A-label into an U-label. */ + + assert(encoded); + assert(decoded); + + if (encoded_size <= 0 || encoded_size > DNS_LABEL_MAX) + return -EINVAL; + + if (!memory_startswith(encoded, encoded_size, IDNA_ACE_PREFIX)) + return 0; + + input = stringprep_utf8_to_ucs4(encoded, encoded_size, &input_size); + if (!input) + return -ENOMEM; + + output_size = input_size; + output = newa(uint32_t, output_size); + + idna_to_unicode_44i(input, input_size, output, &output_size, 0); + + result = stringprep_ucs4_to_utf8(output, output_size, NULL, &w); + if (!result) + return -ENOMEM; + if (w <= 0) + return -EINVAL; + if (w > decoded_max) + return -ENOBUFS; + + memcpy(decoded, result, w); + + /* Append trailing NUL byte if there's space, but only then. */ + if (decoded_max > w) + decoded[w] = 0; + + return w; +} +#endif + +int dns_name_concat(const char *a, const char *b, DNSLabelFlags flags, char **_ret) { + _cleanup_free_ char *ret = NULL; + size_t n = 0, allocated = 0; + const char *p; + bool first = true; + int r; + + if (a) + p = a; + else if (b) + p = TAKE_PTR(b); + else + goto finish; + + for (;;) { + char label[DNS_LABEL_MAX]; + + r = dns_label_unescape(&p, label, sizeof label, flags); + if (r < 0) + return r; + if (r == 0) { + if (*p != 0) + return -EINVAL; + + if (b) { + /* Now continue with the second string, if there is one */ + p = TAKE_PTR(b); + continue; + } + + break; + } + + if (_ret) { + if (!GREEDY_REALLOC(ret, allocated, n + !first + DNS_LABEL_ESCAPED_MAX)) + return -ENOMEM; + + r = dns_label_escape(label, r, ret + n + !first, DNS_LABEL_ESCAPED_MAX); + if (r < 0) + return r; + + if (!first) + ret[n] = '.'; + } else { + char escaped[DNS_LABEL_ESCAPED_MAX]; + + r = dns_label_escape(label, r, escaped, sizeof(escaped)); + if (r < 0) + return r; + } + + if (!first) + n++; + else + first = false; + + n += r; + } + +finish: + if (n > DNS_HOSTNAME_MAX) + return -EINVAL; + + if (_ret) { + if (n == 0) { + /* Nothing appended? If so, generate at least a single dot, to indicate the DNS root domain */ + if (!GREEDY_REALLOC(ret, allocated, 2)) + return -ENOMEM; + + ret[n++] = '.'; + } else { + if (!GREEDY_REALLOC(ret, allocated, n + 1)) + return -ENOMEM; + } + + ret[n] = 0; + *_ret = TAKE_PTR(ret); + } + + return 0; +} + +void dns_name_hash_func(const char *p, struct siphash *state) { + int r; + + assert(p); + + for (;;) { + char label[DNS_LABEL_MAX+1]; + + r = dns_label_unescape(&p, label, sizeof label, 0); + if (r < 0) + break; + if (r == 0) + break; + + ascii_strlower_n(label, r); + siphash24_compress(label, r, state); + siphash24_compress_byte(0, state); /* make sure foobar and foo.bar result in different hashes */ + } + + /* enforce that all names are terminated by the empty label */ + string_hash_func("", state); +} + +int dns_name_compare_func(const char *a, const char *b) { + const char *x, *y; + int r, q; + + assert(a); + assert(b); + + x = a + strlen(a); + y = b + strlen(b); + + for (;;) { + char la[DNS_LABEL_MAX], lb[DNS_LABEL_MAX]; + + if (x == NULL && y == NULL) + return 0; + + r = dns_label_unescape_suffix(a, &x, la, sizeof(la)); + q = dns_label_unescape_suffix(b, &y, lb, sizeof(lb)); + if (r < 0 || q < 0) + return CMP(r, q); + + r = ascii_strcasecmp_nn(la, r, lb, q); + if (r != 0) + return r; + } +} + +DEFINE_HASH_OPS(dns_name_hash_ops, char, dns_name_hash_func, dns_name_compare_func); + +int dns_name_equal(const char *x, const char *y) { + int r, q; + + assert(x); + assert(y); + + for (;;) { + char la[DNS_LABEL_MAX], lb[DNS_LABEL_MAX]; + + r = dns_label_unescape(&x, la, sizeof la, 0); + if (r < 0) + return r; + + q = dns_label_unescape(&y, lb, sizeof lb, 0); + if (q < 0) + return q; + + if (r != q) + return false; + if (r == 0) + return true; + + if (ascii_strcasecmp_n(la, lb, r) != 0) + return false; + } +} + +int dns_name_endswith(const char *name, const char *suffix) { + const char *n, *s, *saved_n = NULL; + int r, q; + + assert(name); + assert(suffix); + + n = name; + s = suffix; + + for (;;) { + char ln[DNS_LABEL_MAX], ls[DNS_LABEL_MAX]; + + r = dns_label_unescape(&n, ln, sizeof ln, 0); + if (r < 0) + return r; + + if (!saved_n) + saved_n = n; + + q = dns_label_unescape(&s, ls, sizeof ls, 0); + if (q < 0) + return q; + + if (r == 0 && q == 0) + return true; + if (r == 0 && saved_n == n) + return false; + + if (r != q || ascii_strcasecmp_n(ln, ls, r) != 0) { + + /* Not the same, let's jump back, and try with the next label again */ + s = suffix; + n = TAKE_PTR(saved_n); + } + } +} + +int dns_name_startswith(const char *name, const char *prefix) { + const char *n, *p; + int r, q; + + assert(name); + assert(prefix); + + n = name; + p = prefix; + + for (;;) { + char ln[DNS_LABEL_MAX], lp[DNS_LABEL_MAX]; + + r = dns_label_unescape(&p, lp, sizeof lp, 0); + if (r < 0) + return r; + if (r == 0) + return true; + + q = dns_label_unescape(&n, ln, sizeof ln, 0); + if (q < 0) + return q; + + if (r != q) + return false; + if (ascii_strcasecmp_n(ln, lp, r) != 0) + return false; + } +} + +int dns_name_change_suffix(const char *name, const char *old_suffix, const char *new_suffix, char **ret) { + const char *n, *s, *saved_before = NULL, *saved_after = NULL, *prefix; + int r, q; + + assert(name); + assert(old_suffix); + assert(new_suffix); + assert(ret); + + n = name; + s = old_suffix; + + for (;;) { + char ln[DNS_LABEL_MAX], ls[DNS_LABEL_MAX]; + + if (!saved_before) + saved_before = n; + + r = dns_label_unescape(&n, ln, sizeof ln, 0); + if (r < 0) + return r; + + if (!saved_after) + saved_after = n; + + q = dns_label_unescape(&s, ls, sizeof ls, 0); + if (q < 0) + return q; + + if (r == 0 && q == 0) + break; + if (r == 0 && saved_after == n) { + *ret = NULL; /* doesn't match */ + return 0; + } + + if (r != q || ascii_strcasecmp_n(ln, ls, r) != 0) { + + /* Not the same, let's jump back, and try with the next label again */ + s = old_suffix; + n = TAKE_PTR(saved_after); + saved_before = NULL; + } + } + + /* Found it! Now generate the new name */ + prefix = strndupa(name, saved_before - name); + + r = dns_name_concat(prefix, new_suffix, 0, ret); + if (r < 0) + return r; + + return 1; +} + +int dns_name_between(const char *a, const char *b, const char *c) { + /* Determine if b is strictly greater than a and strictly smaller than c. + We consider the order of names to be circular, so that if a is + strictly greater than c, we consider b to be between them if it is + either greater than a or smaller than c. This is how the canonical + DNS name order used in NSEC records work. */ + + if (dns_name_compare_func(a, c) < 0) + /* + a and c are properly ordered: + a<---b--->c + */ + return dns_name_compare_func(a, b) < 0 && + dns_name_compare_func(b, c) < 0; + else + /* + a and c are equal or 'reversed': + <--b--c a-----> + or: + <-----c a--b--> + */ + return dns_name_compare_func(b, c) < 0 || + dns_name_compare_func(a, b) < 0; +} + +int dns_name_reverse(int family, const union in_addr_union *a, char **ret) { + const uint8_t *p; + int r; + + assert(a); + assert(ret); + + p = (const uint8_t*) a; + + if (family == AF_INET) + r = asprintf(ret, "%u.%u.%u.%u.in-addr.arpa", p[3], p[2], p[1], p[0]); + else if (family == AF_INET6) + r = asprintf(ret, "%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.%c.ip6.arpa", + hexchar(p[15] & 0xF), hexchar(p[15] >> 4), hexchar(p[14] & 0xF), hexchar(p[14] >> 4), + hexchar(p[13] & 0xF), hexchar(p[13] >> 4), hexchar(p[12] & 0xF), hexchar(p[12] >> 4), + hexchar(p[11] & 0xF), hexchar(p[11] >> 4), hexchar(p[10] & 0xF), hexchar(p[10] >> 4), + hexchar(p[ 9] & 0xF), hexchar(p[ 9] >> 4), hexchar(p[ 8] & 0xF), hexchar(p[ 8] >> 4), + hexchar(p[ 7] & 0xF), hexchar(p[ 7] >> 4), hexchar(p[ 6] & 0xF), hexchar(p[ 6] >> 4), + hexchar(p[ 5] & 0xF), hexchar(p[ 5] >> 4), hexchar(p[ 4] & 0xF), hexchar(p[ 4] >> 4), + hexchar(p[ 3] & 0xF), hexchar(p[ 3] >> 4), hexchar(p[ 2] & 0xF), hexchar(p[ 2] >> 4), + hexchar(p[ 1] & 0xF), hexchar(p[ 1] >> 4), hexchar(p[ 0] & 0xF), hexchar(p[ 0] >> 4)); + else + return -EAFNOSUPPORT; + if (r < 0) + return -ENOMEM; + + return 0; +} + +int dns_name_address(const char *p, int *family, union in_addr_union *address) { + int r; + + assert(p); + assert(family); + assert(address); + + r = dns_name_endswith(p, "in-addr.arpa"); + if (r < 0) + return r; + if (r > 0) { + uint8_t a[4]; + unsigned i; + + for (i = 0; i < ELEMENTSOF(a); i++) { + char label[DNS_LABEL_MAX+1]; + + r = dns_label_unescape(&p, label, sizeof label, 0); + if (r < 0) + return r; + if (r == 0) + return -EINVAL; + if (r > 3) + return -EINVAL; + + r = safe_atou8(label, &a[i]); + if (r < 0) + return r; + } + + r = dns_name_equal(p, "in-addr.arpa"); + if (r <= 0) + return r; + + *family = AF_INET; + address->in.s_addr = htobe32(((uint32_t) a[3] << 24) | + ((uint32_t) a[2] << 16) | + ((uint32_t) a[1] << 8) | + (uint32_t) a[0]); + + return 1; + } + + r = dns_name_endswith(p, "ip6.arpa"); + if (r < 0) + return r; + if (r > 0) { + struct in6_addr a; + unsigned i; + + for (i = 0; i < ELEMENTSOF(a.s6_addr); i++) { + char label[DNS_LABEL_MAX+1]; + int x, y; + + r = dns_label_unescape(&p, label, sizeof label, 0); + if (r <= 0) + return r; + if (r != 1) + return -EINVAL; + x = unhexchar(label[0]); + if (x < 0) + return -EINVAL; + + r = dns_label_unescape(&p, label, sizeof label, 0); + if (r <= 0) + return r; + if (r != 1) + return -EINVAL; + y = unhexchar(label[0]); + if (y < 0) + return -EINVAL; + + a.s6_addr[ELEMENTSOF(a.s6_addr) - i - 1] = (uint8_t) y << 4 | (uint8_t) x; + } + + r = dns_name_equal(p, "ip6.arpa"); + if (r <= 0) + return r; + + *family = AF_INET6; + address->in6 = a; + return 1; + } + + return 0; +} + +bool dns_name_is_root(const char *name) { + + assert(name); + + /* There are exactly two ways to encode the root domain name: + * as empty string, or with a single dot. */ + + return STR_IN_SET(name, "", "."); +} + +bool dns_name_is_single_label(const char *name) { + int r; + + assert(name); + + r = dns_name_parent(&name); + if (r <= 0) + return false; + + return dns_name_is_root(name); +} + +/* Encode a domain name according to RFC 1035 Section 3.1, without compression */ +int dns_name_to_wire_format(const char *domain, uint8_t *buffer, size_t len, bool canonical) { + uint8_t *label_length, *out; + int r; + + assert(domain); + assert(buffer); + + out = buffer; + + do { + /* Reserve a byte for label length */ + if (len <= 0) + return -ENOBUFS; + len--; + label_length = out; + out++; + + /* Convert and copy a single label. Note that + * dns_label_unescape() returns 0 when it hits the end + * of the domain name, which we rely on here to encode + * the trailing NUL byte. */ + r = dns_label_unescape(&domain, (char *) out, len, 0); + if (r < 0) + return r; + + /* Optionally, output the name in DNSSEC canonical + * format, as described in RFC 4034, section 6.2. Or + * in other words: in lower-case. */ + if (canonical) + ascii_strlower_n((char*) out, (size_t) r); + + /* Fill label length, move forward */ + *label_length = r; + out += r; + len -= r; + + } while (r != 0); + + /* Verify the maximum size of the encoded name. The trailing + * dot + NUL byte account are included this time, hence + * compare against DNS_HOSTNAME_MAX + 2 (which is 255) this + * time. */ + if (out - buffer > DNS_HOSTNAME_MAX + 2) + return -EINVAL; + + return out - buffer; +} + +static bool srv_type_label_is_valid(const char *label, size_t n) { + size_t k; + + assert(label); + + if (n < 2) /* Label needs to be at least 2 chars long */ + return false; + + if (label[0] != '_') /* First label char needs to be underscore */ + return false; + + /* Second char must be a letter */ + if (!(label[1] >= 'A' && label[1] <= 'Z') && + !(label[1] >= 'a' && label[1] <= 'z')) + return false; + + /* Third and further chars must be alphanumeric or a hyphen */ + for (k = 2; k < n; k++) { + if (!(label[k] >= 'A' && label[k] <= 'Z') && + !(label[k] >= 'a' && label[k] <= 'z') && + !(label[k] >= '0' && label[k] <= '9') && + label[k] != '-') + return false; + } + + return true; +} + +bool dns_srv_type_is_valid(const char *name) { + unsigned c = 0; + int r; + + if (!name) + return false; + + for (;;) { + char label[DNS_LABEL_MAX]; + + /* This more or less implements RFC 6335, Section 5.1 */ + + r = dns_label_unescape(&name, label, sizeof label, 0); + if (r < 0) + return false; + if (r == 0) + break; + + if (c >= 2) + return false; + + if (!srv_type_label_is_valid(label, r)) + return false; + + c++; + } + + return c == 2; /* exactly two labels */ +} + +bool dnssd_srv_type_is_valid(const char *name) { + return dns_srv_type_is_valid(name) && + ((dns_name_endswith(name, "_tcp") > 0) || + (dns_name_endswith(name, "_udp") > 0)); /* Specific to DNS-SD. RFC 6763, Section 7 */ +} + +bool dns_service_name_is_valid(const char *name) { + size_t l; + + /* This more or less implements RFC 6763, Section 4.1.1 */ + + if (!name) + return false; + + if (!utf8_is_valid(name)) + return false; + + if (string_has_cc(name, NULL)) + return false; + + l = strlen(name); + if (l <= 0) + return false; + if (l > 63) + return false; + + return true; +} + +int dns_service_join(const char *name, const char *type, const char *domain, char **ret) { + char escaped[DNS_LABEL_ESCAPED_MAX]; + _cleanup_free_ char *n = NULL; + int r; + + assert(type); + assert(domain); + assert(ret); + + if (!dns_srv_type_is_valid(type)) + return -EINVAL; + + if (!name) + return dns_name_concat(type, domain, 0, ret); + + if (!dns_service_name_is_valid(name)) + return -EINVAL; + + r = dns_label_escape(name, strlen(name), escaped, sizeof(escaped)); + if (r < 0) + return r; + + r = dns_name_concat(type, domain, 0, &n); + if (r < 0) + return r; + + return dns_name_concat(escaped, n, 0, ret); +} + +static bool dns_service_name_label_is_valid(const char *label, size_t n) { + char *s; + + assert(label); + + if (memchr(label, 0, n)) + return false; + + s = strndupa(label, n); + return dns_service_name_is_valid(s); +} + +int dns_service_split(const char *joined, char **_name, char **_type, char **_domain) { + _cleanup_free_ char *name = NULL, *type = NULL, *domain = NULL; + const char *p = joined, *q = NULL, *d = NULL; + char a[DNS_LABEL_MAX], b[DNS_LABEL_MAX], c[DNS_LABEL_MAX]; + int an, bn, cn, r; + unsigned x = 0; + + assert(joined); + + /* Get first label from the full name */ + an = dns_label_unescape(&p, a, sizeof(a), 0); + if (an < 0) + return an; + + if (an > 0) { + x++; + + /* If there was a first label, try to get the second one */ + bn = dns_label_unescape(&p, b, sizeof(b), 0); + if (bn < 0) + return bn; + + if (bn > 0) { + x++; + + /* If there was a second label, try to get the third one */ + q = p; + cn = dns_label_unescape(&p, c, sizeof(c), 0); + if (cn < 0) + return cn; + + if (cn > 0) + x++; + } else + cn = 0; + } else + an = 0; + + if (x >= 2 && srv_type_label_is_valid(b, bn)) { + + if (x >= 3 && srv_type_label_is_valid(c, cn)) { + + if (dns_service_name_label_is_valid(a, an)) { + /* OK, got <name> . <type> . <type2> . <domain> */ + + name = strndup(a, an); + if (!name) + return -ENOMEM; + + type = strjoin(b, ".", c); + if (!type) + return -ENOMEM; + + d = p; + goto finish; + } + + } else if (srv_type_label_is_valid(a, an)) { + + /* OK, got <type> . <type2> . <domain> */ + + name = NULL; + + type = strjoin(a, ".", b); + if (!type) + return -ENOMEM; + + d = q; + goto finish; + } + } + + name = NULL; + type = NULL; + d = joined; + +finish: + r = dns_name_normalize(d, 0, &domain); + if (r < 0) + return r; + + if (_domain) + *_domain = TAKE_PTR(domain); + + if (_type) + *_type = TAKE_PTR(type); + + if (_name) + *_name = TAKE_PTR(name); + + return 0; +} + +static int dns_name_build_suffix_table(const char *name, const char *table[]) { + const char *p; + unsigned n = 0; + int r; + + assert(name); + assert(table); + + p = name; + for (;;) { + if (n > DNS_N_LABELS_MAX) + return -EINVAL; + + table[n] = p; + r = dns_name_parent(&p); + if (r < 0) + return r; + if (r == 0) + break; + + n++; + } + + return (int) n; +} + +int dns_name_suffix(const char *name, unsigned n_labels, const char **ret) { + const char* labels[DNS_N_LABELS_MAX+1]; + int n; + + assert(name); + assert(ret); + + n = dns_name_build_suffix_table(name, labels); + if (n < 0) + return n; + + if ((unsigned) n < n_labels) + return -EINVAL; + + *ret = labels[n - n_labels]; + return (int) (n - n_labels); +} + +int dns_name_skip(const char *a, unsigned n_labels, const char **ret) { + int r; + + assert(a); + assert(ret); + + for (; n_labels > 0; n_labels--) { + r = dns_name_parent(&a); + if (r < 0) + return r; + if (r == 0) { + *ret = ""; + return 0; + } + } + + *ret = a; + return 1; +} + +int dns_name_count_labels(const char *name) { + unsigned n = 0; + const char *p; + int r; + + assert(name); + + p = name; + for (;;) { + r = dns_name_parent(&p); + if (r < 0) + return r; + if (r == 0) + break; + + if (n >= DNS_N_LABELS_MAX) + return -EINVAL; + + n++; + } + + return (int) n; +} + +int dns_name_equal_skip(const char *a, unsigned n_labels, const char *b) { + int r; + + assert(a); + assert(b); + + r = dns_name_skip(a, n_labels, &a); + if (r <= 0) + return r; + + return dns_name_equal(a, b); +} + +int dns_name_common_suffix(const char *a, const char *b, const char **ret) { + const char *a_labels[DNS_N_LABELS_MAX+1], *b_labels[DNS_N_LABELS_MAX+1]; + int n = 0, m = 0, k = 0, r, q; + + assert(a); + assert(b); + assert(ret); + + /* Determines the common suffix of domain names a and b */ + + n = dns_name_build_suffix_table(a, a_labels); + if (n < 0) + return n; + + m = dns_name_build_suffix_table(b, b_labels); + if (m < 0) + return m; + + for (;;) { + char la[DNS_LABEL_MAX], lb[DNS_LABEL_MAX]; + const char *x, *y; + + if (k >= n || k >= m) { + *ret = a_labels[n - k]; + return 0; + } + + x = a_labels[n - 1 - k]; + r = dns_label_unescape(&x, la, sizeof la, 0); + if (r < 0) + return r; + + y = b_labels[m - 1 - k]; + q = dns_label_unescape(&y, lb, sizeof lb, 0); + if (q < 0) + return q; + + if (r != q || ascii_strcasecmp_n(la, lb, r) != 0) { + *ret = a_labels[n - k]; + return 0; + } + + k++; + } +} + +int dns_name_apply_idna(const char *name, char **ret) { + /* Return negative on error, 0 if not implemented, positive on success. */ + +#if HAVE_LIBIDN2 + int r; + _cleanup_free_ char *t = NULL; + + assert(name); + assert(ret); + + r = idn2_lookup_u8((uint8_t*) name, (uint8_t**) &t, + IDN2_NFC_INPUT | IDN2_NONTRANSITIONAL); + log_debug("idn2_lookup_u8: %s → %s", name, t); + if (r == IDN2_OK) { + if (!startswith(name, "xn--")) { + _cleanup_free_ char *s = NULL; + + r = idn2_to_unicode_8z8z(t, &s, 0); + if (r != IDN2_OK) { + log_debug("idn2_to_unicode_8z8z(\"%s\") failed: %d/%s", + t, r, idn2_strerror(r)); + return 0; + } + + if (!streq_ptr(name, s)) { + log_debug("idn2 roundtrip failed: \"%s\" → \"%s\" → \"%s\", ignoring.", + name, t, s); + return 0; + } + } + + *ret = TAKE_PTR(t); + + return 1; /* *ret has been written */ + } + + log_debug("idn2_lookup_u8(\"%s\") failed: %d/%s", name, r, idn2_strerror(r)); + if (r == IDN2_2HYPHEN) + /* The name has two hyphens — forbidden by IDNA2008 in some cases */ + return 0; + if (IN_SET(r, IDN2_TOO_BIG_DOMAIN, IDN2_TOO_BIG_LABEL)) + return -ENOSPC; + return -EINVAL; +#elif HAVE_LIBIDN + _cleanup_free_ char *buf = NULL; + size_t n = 0, allocated = 0; + bool first = true; + int r, q; + + assert(name); + assert(ret); + + for (;;) { + char label[DNS_LABEL_MAX]; + + r = dns_label_unescape(&name, label, sizeof label, 0); + if (r < 0) + return r; + if (r == 0) + break; + + q = dns_label_apply_idna(label, r, label, sizeof label); + if (q < 0) + return q; + if (q > 0) + r = q; + + if (!GREEDY_REALLOC(buf, allocated, n + !first + DNS_LABEL_ESCAPED_MAX)) + return -ENOMEM; + + r = dns_label_escape(label, r, buf + n + !first, DNS_LABEL_ESCAPED_MAX); + if (r < 0) + return r; + + if (first) + first = false; + else + buf[n++] = '.'; + + n += r; + } + + if (n > DNS_HOSTNAME_MAX) + return -EINVAL; + + if (!GREEDY_REALLOC(buf, allocated, n + 1)) + return -ENOMEM; + + buf[n] = 0; + *ret = TAKE_PTR(buf); + + return 1; +#else + return 0; +#endif +} + +int dns_name_is_valid_or_address(const char *name) { + /* Returns > 0 if the specified name is either a valid IP address formatted as string or a valid DNS name */ + + if (isempty(name)) + return 0; + + if (in_addr_from_string_auto(name, NULL, NULL) >= 0) + return 1; + + return dns_name_is_valid(name); +} diff --git a/src/shared/dns-domain.h b/src/shared/dns-domain.h new file mode 100644 index 0000000..6ed512c --- /dev/null +++ b/src/shared/dns-domain.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <errno.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +#include "hashmap.h" +#include "in-addr-util.h" + +/* Length of a single label, with all escaping removed, excluding any trailing dot or NUL byte */ +#define DNS_LABEL_MAX 63 + +/* Worst case length of a single label, with all escaping applied and room for a trailing NUL byte. */ +#define DNS_LABEL_ESCAPED_MAX (DNS_LABEL_MAX*4+1) + +/* Maximum length of a full hostname, consisting of a series of unescaped labels, and no trailing dot or NUL byte */ +#define DNS_HOSTNAME_MAX 253 + +/* Maximum length of a full hostname, on the wire, including the final NUL byte */ +#define DNS_WIRE_FORMAT_HOSTNAME_MAX 255 + +/* Maximum number of labels per valid hostname */ +#define DNS_N_LABELS_MAX 127 + +typedef enum DNSLabelFlags { + DNS_LABEL_LDH = 1 << 0, /* Follow the "LDH" rule — only letters, digits, and internal hyphens. */ + DNS_LABEL_NO_ESCAPES = 1 << 1, /* Do not treat backslashes specially */ +} DNSLabelFlags; + +int dns_label_unescape(const char **name, char *dest, size_t sz, DNSLabelFlags flags); +int dns_label_unescape_suffix(const char *name, const char **label_end, char *dest, size_t sz); +int dns_label_escape(const char *p, size_t l, char *dest, size_t sz); +int dns_label_escape_new(const char *p, size_t l, char **ret); + +static inline int dns_name_parent(const char **name) { + return dns_label_unescape(name, NULL, DNS_LABEL_MAX, 0); +} + +#if HAVE_LIBIDN +int dns_label_apply_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max); +int dns_label_undo_idna(const char *encoded, size_t encoded_size, char *decoded, size_t decoded_max); +#endif + +int dns_name_concat(const char *a, const char *b, DNSLabelFlags flags, char **ret); + +static inline int dns_name_normalize(const char *s, DNSLabelFlags flags, char **ret) { + /* dns_name_concat() normalizes as a side-effect */ + return dns_name_concat(s, NULL, flags, ret); +} + +static inline int dns_name_is_valid(const char *s) { + int r; + + /* dns_name_normalize() verifies as a side effect */ + r = dns_name_normalize(s, 0, NULL); + if (r == -EINVAL) + return 0; + if (r < 0) + return r; + return 1; +} + +static inline int dns_name_is_valid_ldh(const char *s) { + int r; + + r = dns_name_concat(s, NULL, DNS_LABEL_LDH|DNS_LABEL_NO_ESCAPES, NULL); + if (r == -EINVAL) + return 0; + if (r < 0) + return r; + return 1; +} + +void dns_name_hash_func(const char *s, struct siphash *state); +int dns_name_compare_func(const char *a, const char *b); +extern const struct hash_ops dns_name_hash_ops; + +int dns_name_between(const char *a, const char *b, const char *c); +int dns_name_equal(const char *x, const char *y); +int dns_name_endswith(const char *name, const char *suffix); +int dns_name_startswith(const char *name, const char *prefix); + +int dns_name_change_suffix(const char *name, const char *old_suffix, const char *new_suffix, char **ret); + +int dns_name_reverse(int family, const union in_addr_union *a, char **ret); +int dns_name_address(const char *p, int *family, union in_addr_union *a); + +bool dns_name_is_root(const char *name); +bool dns_name_is_single_label(const char *name); + +int dns_name_to_wire_format(const char *domain, uint8_t *buffer, size_t len, bool canonical); + +bool dns_srv_type_is_valid(const char *name); +bool dnssd_srv_type_is_valid(const char *name); +bool dns_service_name_is_valid(const char *name); + +int dns_service_join(const char *name, const char *type, const char *domain, char **ret); +int dns_service_split(const char *joined, char **name, char **type, char **domain); + +int dns_name_suffix(const char *name, unsigned n_labels, const char **ret); +int dns_name_count_labels(const char *name); + +int dns_name_skip(const char *a, unsigned n_labels, const char **ret); +int dns_name_equal_skip(const char *a, unsigned n_labels, const char *b); + +int dns_name_common_suffix(const char *a, const char *b, const char **ret); + +int dns_name_apply_idna(const char *name, char **ret); + +int dns_name_is_valid_or_address(const char *name); diff --git a/src/shared/dropin.c b/src/shared/dropin.c new file mode 100644 index 0000000..409eef2 --- /dev/null +++ b/src/shared/dropin.c @@ -0,0 +1,255 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> + +#include "alloc-util.h" +#include "conf-files.h" +#include "dirent-util.h" +#include "dropin.h" +#include "escape.h" +#include "fd-util.h" +#include "fileio-label.h" +#include "fs-util.h" +#include "hashmap.h" +#include "log.h" +#include "macro.h" +#include "mkdir.h" +#include "path-util.h" +#include "set.h" +#include "string-util.h" +#include "strv.h" +#include "unit-name.h" + +int drop_in_file(const char *dir, const char *unit, unsigned level, + const char *name, char **_p, char **_q) { + + char prefix[DECIMAL_STR_MAX(unsigned)]; + _cleanup_free_ char *b = NULL; + char *p, *q; + + assert(unit); + assert(name); + assert(_p); + assert(_q); + + sprintf(prefix, "%u", level); + + b = xescape(name, "/."); + if (!b) + return -ENOMEM; + + if (!filename_is_valid(b)) + return -EINVAL; + + p = strjoin(dir, "/", unit, ".d"); + if (!p) + return -ENOMEM; + + q = strjoin(p, "/", prefix, "-", b, ".conf"); + if (!q) { + free(p); + return -ENOMEM; + } + + *_p = p; + *_q = q; + return 0; +} + +int write_drop_in(const char *dir, const char *unit, unsigned level, + const char *name, const char *data) { + + _cleanup_free_ char *p = NULL, *q = NULL; + int r; + + assert(dir); + assert(unit); + assert(name); + assert(data); + + r = drop_in_file(dir, unit, level, name, &p, &q); + if (r < 0) + return r; + + (void) mkdir_p(p, 0755); + return write_string_file_atomic_label(q, data); +} + +int write_drop_in_format(const char *dir, const char *unit, unsigned level, + const char *name, const char *format, ...) { + _cleanup_free_ char *p = NULL; + va_list ap; + int r; + + assert(dir); + assert(unit); + assert(name); + assert(format); + + va_start(ap, format); + r = vasprintf(&p, format, ap); + va_end(ap); + + if (r < 0) + return -ENOMEM; + + return write_drop_in(dir, unit, level, name, p); +} + +static int unit_file_find_dir( + const char *original_root, + const char *path, + char ***dirs) { + + _cleanup_free_ char *chased = NULL; + int r; + + assert(path); + + r = chase_symlinks(path, original_root, 0, &chased); + if (r == -ENOENT) /* Ignore -ENOENT, after all most units won't have a drop-in dir. */ + return 0; + if (r == -ENAMETOOLONG) { + /* Also, ignore -ENAMETOOLONG but log about it. After all, users are not even able to create the + * drop-in dir in such case. This mostly happens for device units with an overly long /sys path. */ + log_debug_errno(r, "Path '%s' too long, couldn't canonicalize, ignoring.", path); + return 0; + } + if (r < 0) + return log_warning_errno(r, "Failed to canonicalize path '%s': %m", path); + + r = strv_push(dirs, chased); + if (r < 0) + return log_oom(); + + chased = NULL; + return 0; +} + +static int unit_file_find_dirs( + const char *original_root, + Set *unit_path_cache, + const char *unit_path, + const char *name, + const char *suffix, + char ***dirs) { + + _cleanup_free_ char *prefix = NULL, *instance = NULL, *built = NULL; + bool is_instance, chopped; + const char *dash; + UnitType type; + char *path; + size_t n; + int r; + + assert(unit_path); + assert(name); + assert(suffix); + + path = strjoina(unit_path, "/", name, suffix); + if (!unit_path_cache || set_get(unit_path_cache, path)) { + r = unit_file_find_dir(original_root, path, dirs); + if (r < 0) + return r; + } + + is_instance = unit_name_is_valid(name, UNIT_NAME_INSTANCE); + if (is_instance) { /* Also try the template dir */ + _cleanup_free_ char *template = NULL; + + r = unit_name_template(name, &template); + if (r < 0) + return log_error_errno(r, "Failed to generate template from unit name: %m"); + + r = unit_file_find_dirs(original_root, unit_path_cache, unit_path, template, suffix, dirs); + if (r < 0) + return r; + } + + /* Let's see if there's a "-" prefix for this unit name. If so, let's invoke ourselves for it. This will then + * recursively do the same for all our prefixes. i.e. this means given "foo-bar-waldo.service" we'll also + * search "foo-bar-.service" and "foo-.service". + * + * Note the order in which we do it: we traverse up adding drop-ins on each step. This means the more specific + * drop-ins may override the more generic drop-ins, which is the intended behaviour. */ + + r = unit_name_to_prefix(name, &prefix); + if (r < 0) + return log_error_errno(r, "Failed to derive unit name prefix from unit name: %m"); + + chopped = false; + for (;;) { + dash = strrchr(prefix, '-'); + if (!dash) /* No dash? if so we are done */ + return 0; + + n = (size_t) (dash - prefix); + if (n == 0) /* Leading dash? If so, we are done */ + return 0; + + if (prefix[n+1] != 0 || chopped) { + prefix[n+1] = 0; + break; + } + + /* Trailing dash? If so, chop it off and try again, but not more than once. */ + prefix[n] = 0; + chopped = true; + } + + if (!unit_prefix_is_valid(prefix)) + return 0; + + type = unit_name_to_type(name); + if (type < 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to to derive unit type from unit name: %s", + name); + + if (is_instance) { + r = unit_name_to_instance(name, &instance); + if (r < 0) + return log_error_errno(r, "Failed to derive unit name instance from unit name: %m"); + } + + r = unit_name_build_from_type(prefix, instance, type, &built); + if (r < 0) + return log_error_errno(r, "Failed to build prefix unit name: %m"); + + return unit_file_find_dirs(original_root, unit_path_cache, unit_path, built, suffix, dirs); +} + +int unit_file_find_dropin_paths( + const char *original_root, + char **lookup_path, + Set *unit_path_cache, + const char *dir_suffix, + const char *file_suffix, + Set *names, + char ***ret) { + + _cleanup_strv_free_ char **dirs = NULL; + char *t, **p; + Iterator i; + int r; + + assert(ret); + + SET_FOREACH(t, names, i) + STRV_FOREACH(p, lookup_path) + (void) unit_file_find_dirs(original_root, unit_path_cache, *p, t, dir_suffix, &dirs); + + if (strv_isempty(dirs)) { + *ret = NULL; + return 0; + } + + r = conf_files_list_strv(ret, file_suffix, NULL, 0, (const char**) dirs); + if (r < 0) + return log_warning_errno(r, "Failed to create the list of configuration files: %m"); + + return 1; +} diff --git a/src/shared/dropin.h b/src/shared/dropin.h new file mode 100644 index 0000000..ae7379b --- /dev/null +++ b/src/shared/dropin.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include "hashmap.h" +#include "macro.h" +#include "set.h" +#include "unit-name.h" + +int drop_in_file(const char *dir, const char *unit, unsigned level, + const char *name, char **_p, char **_q); + +int write_drop_in(const char *dir, const char *unit, unsigned level, + const char *name, const char *data); + +int write_drop_in_format(const char *dir, const char *unit, unsigned level, + const char *name, const char *format, ...) _printf_(5, 6); + +int unit_file_find_dropin_paths( + const char *original_root, + char **lookup_path, + Set *unit_path_cache, + const char *dir_suffix, + const char *file_suffix, + Set *names, + char ***paths); + +static inline int unit_file_find_dropin_conf_paths( + const char *original_root, + char **lookup_path, + Set *unit_path_cache, + Set *names, + char ***paths) { + + return unit_file_find_dropin_paths(original_root, + lookup_path, + unit_path_cache, + ".d", ".conf", + names, paths); +} diff --git a/src/shared/efivars.c b/src/shared/efivars.c new file mode 100644 index 0000000..26f905b --- /dev/null +++ b/src/shared/efivars.c @@ -0,0 +1,914 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <linux/fs.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "sd-id128.h" + +#include "alloc-util.h" +#include "chattr-util.h" +#include "dirent-util.h" +#include "efivars.h" +#include "fd-util.h" +#include "io-util.h" +#include "macro.h" +#include "parse-util.h" +#include "stdio-util.h" +#include "strv.h" +#include "time-util.h" +#include "utf8.h" +#include "util.h" +#include "virt.h" + +#if ENABLE_EFI + +#define LOAD_OPTION_ACTIVE 0x00000001 +#define MEDIA_DEVICE_PATH 0x04 +#define MEDIA_HARDDRIVE_DP 0x01 +#define MEDIA_FILEPATH_DP 0x04 +#define SIGNATURE_TYPE_GUID 0x02 +#define MBR_TYPE_EFI_PARTITION_TABLE_HEADER 0x02 +#define END_DEVICE_PATH_TYPE 0x7f +#define END_ENTIRE_DEVICE_PATH_SUBTYPE 0xff +#define EFI_OS_INDICATIONS_BOOT_TO_FW_UI 0x0000000000000001 + +#define boot_option__contents { \ + uint32_t attr; \ + uint16_t path_len; \ + uint16_t title[]; \ + } + +struct boot_option boot_option__contents; +struct boot_option__packed boot_option__contents _packed_; +assert_cc(offsetof(struct boot_option, title) == offsetof(struct boot_option__packed, title)); +/* sizeof(struct boot_option) != sizeof(struct boot_option__packed), so + * the *size* of the structure should not be used anywhere below. */ + +struct drive_path { + uint32_t part_nr; + uint64_t part_start; + uint64_t part_size; + char signature[16]; + uint8_t mbr_type; + uint8_t signature_type; +} _packed_; + +#define device_path__contents { \ + uint8_t type; \ + uint8_t sub_type; \ + uint16_t length; \ + union { \ + uint16_t path[0]; \ + struct drive_path drive; \ + }; \ + } + +struct device_path device_path__contents; +struct device_path__packed device_path__contents _packed_; +assert_cc(sizeof(struct device_path) == sizeof(struct device_path__packed)); + +bool is_efi_boot(void) { + if (detect_container() > 0) + return false; + + return access("/sys/firmware/efi/", F_OK) >= 0; +} + +static int read_flag(const char *varname) { + _cleanup_free_ void *v = NULL; + uint8_t b; + size_t s; + int r; + + if (!is_efi_boot()) /* If this is not an EFI boot, assume the queried flags are zero */ + return 0; + + r = efi_get_variable(EFI_VENDOR_GLOBAL, varname, NULL, &v, &s); + if (r < 0) + return r; + + if (s != 1) + return -EINVAL; + + b = *(uint8_t *)v; + return !!b; +} + +bool is_efi_secure_boot(void) { + return read_flag("SecureBoot") > 0; +} + +bool is_efi_secure_boot_setup_mode(void) { + return read_flag("SetupMode") > 0; +} + +int efi_reboot_to_firmware_supported(void) { + _cleanup_free_ void *v = NULL; + uint64_t b; + size_t s; + int r; + + if (!is_efi_boot()) + return -EOPNOTSUPP; + + r = efi_get_variable(EFI_VENDOR_GLOBAL, "OsIndicationsSupported", NULL, &v, &s); + if (r == -ENOENT) /* variable doesn't exist? it's not supported then */ + return -EOPNOTSUPP; + if (r < 0) + return r; + if (s != sizeof(uint64_t)) + return -EINVAL; + + b = *(uint64_t*) v; + if (!(b & EFI_OS_INDICATIONS_BOOT_TO_FW_UI)) + return -EOPNOTSUPP; /* bit unset? it's not supported then */ + + return 0; +} + +static int get_os_indications(uint64_t *os_indication) { + _cleanup_free_ void *v = NULL; + size_t s; + int r; + + /* Let's verify general support first */ + r = efi_reboot_to_firmware_supported(); + if (r < 0) + return r; + + r = efi_get_variable(EFI_VENDOR_GLOBAL, "OsIndications", NULL, &v, &s); + if (r == -ENOENT) { + /* Some firmware implementations that do support OsIndications and report that with + * OsIndicationsSupported will remove the OsIndications variable when it is unset. Let's pretend it's 0 + * then, to hide this implementation detail. Note that this call will return -ENOENT then only if the + * support for OsIndications is missing entirely, as determined by efi_reboot_to_firmware_supported() + * above. */ + *os_indication = 0; + return 0; + } else if (r < 0) + return r; + else if (s != sizeof(uint64_t)) + return -EINVAL; + + *os_indication = *(uint64_t *)v; + return 0; +} + +int efi_get_reboot_to_firmware(void) { + int r; + uint64_t b; + + r = get_os_indications(&b); + if (r < 0) + return r; + + return !!(b & EFI_OS_INDICATIONS_BOOT_TO_FW_UI); +} + +int efi_set_reboot_to_firmware(bool value) { + int r; + uint64_t b, b_new; + + r = get_os_indications(&b); + if (r < 0) + return r; + + if (value) + b_new = b | EFI_OS_INDICATIONS_BOOT_TO_FW_UI; + else + b_new = b & ~EFI_OS_INDICATIONS_BOOT_TO_FW_UI; + + /* Avoid writing to efi vars store if we can due to firmware bugs. */ + if (b != b_new) + return efi_set_variable(EFI_VENDOR_GLOBAL, "OsIndications", &b_new, sizeof(uint64_t)); + + return 0; +} + +int efi_get_variable( + sd_id128_t vendor, + const char *name, + uint32_t *attribute, + void **value, + size_t *size) { + + _cleanup_close_ int fd = -1; + _cleanup_free_ char *p = NULL; + uint32_t a; + ssize_t n; + struct stat st; + _cleanup_free_ void *buf = NULL; + + assert(name); + assert(value); + assert(size); + + if (asprintf(&p, + "/sys/firmware/efi/efivars/%s-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", + name, SD_ID128_FORMAT_VAL(vendor)) < 0) + return -ENOMEM; + + fd = open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC); + if (fd < 0) + return -errno; + + if (fstat(fd, &st) < 0) + return -errno; + if (st.st_size < 4) + return -EIO; + if (st.st_size > 4*1024*1024 + 4) + return -E2BIG; + + n = read(fd, &a, sizeof(a)); + if (n < 0) + return -errno; + if (n != sizeof(a)) + return -EIO; + + buf = malloc(st.st_size - 4 + 2); + if (!buf) + return -ENOMEM; + + n = read(fd, buf, (size_t) st.st_size - 4); + if (n < 0) + return -errno; + if (n != (ssize_t) st.st_size - 4) + return -EIO; + + /* Always NUL terminate (2 bytes, to protect UTF-16) */ + ((char*) buf)[st.st_size - 4] = 0; + ((char*) buf)[st.st_size - 4 + 1] = 0; + + *value = TAKE_PTR(buf); + *size = (size_t) st.st_size - 4; + + if (attribute) + *attribute = a; + + return 0; +} + +int efi_get_variable_string(sd_id128_t vendor, const char *name, char **p) { + _cleanup_free_ void *s = NULL; + size_t ss = 0; + int r; + char *x; + + r = efi_get_variable(vendor, name, NULL, &s, &ss); + if (r < 0) + return r; + + x = utf16_to_utf8(s, ss); + if (!x) + return -ENOMEM; + + *p = x; + return 0; +} + +int efi_set_variable( + sd_id128_t vendor, + const char *name, + const void *value, + size_t size) { + + struct var { + uint32_t attr; + char buf[]; + } _packed_ * _cleanup_free_ buf = NULL; + _cleanup_free_ char *p = NULL; + _cleanup_close_ int fd = -1; + bool saved_flags_valid = false; + unsigned saved_flags; + int r; + + assert(name); + assert(value || size == 0); + + if (asprintf(&p, + "/sys/firmware/efi/efivars/%s-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", + name, SD_ID128_FORMAT_VAL(vendor)) < 0) + return -ENOMEM; + + /* Newer efivarfs protects variables that are not in a whitelist with FS_IMMUTABLE_FL by default, to protect + * them for accidental removal and modification. We are not changing these variables accidentally however, + * hence let's unset the bit first. */ + + r = chattr_path(p, 0, FS_IMMUTABLE_FL, &saved_flags); + if (r < 0 && r != -ENOENT) + log_debug_errno(r, "Failed to drop FS_IMMUTABLE_FL flag from '%s', ignoring: %m", p); + + saved_flags_valid = r >= 0; + + if (size == 0) { + if (unlink(p) < 0) { + r = -errno; + goto finish; + } + + return 0; + } + + fd = open(p, O_WRONLY|O_CREAT|O_NOCTTY|O_CLOEXEC, 0644); + if (fd < 0) { + r = -errno; + goto finish; + } + + buf = malloc(sizeof(uint32_t) + size); + if (!buf) { + r = -ENOMEM; + goto finish; + } + + buf->attr = EFI_VARIABLE_NON_VOLATILE|EFI_VARIABLE_BOOTSERVICE_ACCESS|EFI_VARIABLE_RUNTIME_ACCESS; + memcpy(buf->buf, value, size); + + r = loop_write(fd, buf, sizeof(uint32_t) + size, false); + if (r < 0) + goto finish; + + r = 0; + +finish: + if (saved_flags_valid) { + int q; + + /* Restore the original flags field, just in case */ + if (fd < 0) + q = chattr_path(p, saved_flags, FS_IMMUTABLE_FL, NULL); + else + q = chattr_fd(fd, saved_flags, FS_IMMUTABLE_FL, NULL); + if (q < 0) + log_debug_errno(q, "Failed to restore FS_IMMUTABLE_FL on '%s', ignoring: %m", p); + } + + return r; +} + +int efi_set_variable_string(sd_id128_t vendor, const char *name, const char *v) { + _cleanup_free_ char16_t *u16 = NULL; + + u16 = utf8_to_utf16(v, strlen(v)); + if (!u16) + return -ENOMEM; + + return efi_set_variable(vendor, name, u16, (char16_strlen(u16) + 1) * sizeof(char16_t)); +} + +static ssize_t utf16_size(const uint16_t *s, size_t buf_len_bytes) { + size_t l = 0; + + /* Returns the size of the string in bytes without the terminating two zero bytes */ + + if (buf_len_bytes % sizeof(uint16_t) != 0) + return -EINVAL; + + while (l < buf_len_bytes / sizeof(uint16_t)) { + if (s[l] == 0) + return (l + 1) * sizeof(uint16_t); + l++; + } + + return -EINVAL; /* The terminator was not found */ +} + +struct guid { + uint32_t u1; + uint16_t u2; + uint16_t u3; + uint8_t u4[8]; +} _packed_; + +static void efi_guid_to_id128(const void *guid, sd_id128_t *id128) { + uint32_t u1; + uint16_t u2, u3; + const struct guid *uuid = guid; + + memcpy(&u1, &uuid->u1, sizeof(uint32_t)); + id128->bytes[0] = (u1 >> 24) & 0xff; + id128->bytes[1] = (u1 >> 16) & 0xff; + id128->bytes[2] = (u1 >> 8) & 0xff; + id128->bytes[3] = u1 & 0xff; + memcpy(&u2, &uuid->u2, sizeof(uint16_t)); + id128->bytes[4] = (u2 >> 8) & 0xff; + id128->bytes[5] = u2 & 0xff; + memcpy(&u3, &uuid->u3, sizeof(uint16_t)); + id128->bytes[6] = (u3 >> 8) & 0xff; + id128->bytes[7] = u3 & 0xff; + memcpy(&id128->bytes[8], uuid->u4, sizeof(uuid->u4)); +} + +int efi_get_boot_option( + uint16_t id, + char **title, + sd_id128_t *part_uuid, + char **path, + bool *active) { + + char boot_id[9]; + _cleanup_free_ uint8_t *buf = NULL; + size_t l; + struct boot_option *header; + ssize_t title_size; + _cleanup_free_ char *s = NULL, *p = NULL; + sd_id128_t p_uuid = SD_ID128_NULL; + int r; + + if (!is_efi_boot()) + return -EOPNOTSUPP; + + xsprintf(boot_id, "Boot%04X", id); + r = efi_get_variable(EFI_VENDOR_GLOBAL, boot_id, NULL, (void **)&buf, &l); + if (r < 0) + return r; + if (l < offsetof(struct boot_option, title)) + return -ENOENT; + + header = (struct boot_option *)buf; + title_size = utf16_size(header->title, l - offsetof(struct boot_option, title)); + if (title_size < 0) + return title_size; + + if (title) { + s = utf16_to_utf8(header->title, title_size); + if (!s) + return -ENOMEM; + } + + if (header->path_len > 0) { + uint8_t *dbuf; + size_t dnext, doff; + + doff = offsetof(struct boot_option, title) + title_size; + dbuf = buf + doff; + if (header->path_len > l - doff) + return -EINVAL; + + dnext = 0; + while (dnext < header->path_len) { + struct device_path *dpath; + + dpath = (struct device_path *)(dbuf + dnext); + if (dpath->length < 4) + break; + + /* Type 0x7F – End of Hardware Device Path, Sub-Type 0xFF – End Entire Device Path */ + if (dpath->type == END_DEVICE_PATH_TYPE && dpath->sub_type == END_ENTIRE_DEVICE_PATH_SUBTYPE) + break; + + dnext += dpath->length; + + /* Type 0x04 – Media Device Path */ + if (dpath->type != MEDIA_DEVICE_PATH) + continue; + + /* Sub-Type 1 – Hard Drive */ + if (dpath->sub_type == MEDIA_HARDDRIVE_DP) { + /* 0x02 – GUID Partition Table */ + if (dpath->drive.mbr_type != MBR_TYPE_EFI_PARTITION_TABLE_HEADER) + continue; + + /* 0x02 – GUID signature */ + if (dpath->drive.signature_type != SIGNATURE_TYPE_GUID) + continue; + + if (part_uuid) + efi_guid_to_id128(dpath->drive.signature, &p_uuid); + continue; + } + + /* Sub-Type 4 – File Path */ + if (dpath->sub_type == MEDIA_FILEPATH_DP && !p && path) { + p = utf16_to_utf8(dpath->path, dpath->length-4); + if (!p) + return -ENOMEM; + + efi_tilt_backslashes(p); + continue; + } + } + } + + if (title) + *title = TAKE_PTR(s); + if (part_uuid) + *part_uuid = p_uuid; + if (path) + *path = TAKE_PTR(p); + if (active) + *active = !!(header->attr & LOAD_OPTION_ACTIVE); + + return 0; +} + +static void to_utf16(uint16_t *dest, const char *src) { + int i; + + for (i = 0; src[i] != '\0'; i++) + dest[i] = src[i]; + dest[i] = '\0'; +} + +static void id128_to_efi_guid(sd_id128_t id, void *guid) { + struct guid uuid = { + .u1 = id.bytes[0] << 24 | id.bytes[1] << 16 | id.bytes[2] << 8 | id.bytes[3], + .u2 = id.bytes[4] << 8 | id.bytes[5], + .u3 = id.bytes[6] << 8 | id.bytes[7], + }; + memcpy(uuid.u4, id.bytes+8, sizeof(uuid.u4)); + memcpy(guid, &uuid, sizeof(uuid)); +} + +static uint16_t *tilt_slashes(uint16_t *s) { + uint16_t *p; + + for (p = s; *p; p++) + if (*p == '/') + *p = '\\'; + + return s; +} + +int efi_add_boot_option( + uint16_t id, + const char *title, + uint32_t part, + uint64_t pstart, + uint64_t psize, + sd_id128_t part_uuid, + const char *path) { + + size_t size, title_len, path_len; + _cleanup_free_ char *buf = NULL; + struct boot_option *option; + struct device_path *devicep; + char boot_id[9]; + + if (!is_efi_boot()) + return -EOPNOTSUPP; + + title_len = (strlen(title)+1) * 2; + path_len = (strlen(path)+1) * 2; + + buf = malloc0(offsetof(struct boot_option, title) + title_len + + sizeof(struct drive_path) + + sizeof(struct device_path) + path_len); + if (!buf) + return -ENOMEM; + + /* header */ + option = (struct boot_option *)buf; + option->attr = LOAD_OPTION_ACTIVE; + option->path_len = offsetof(struct device_path, drive) + sizeof(struct drive_path) + + offsetof(struct device_path, path) + path_len + + offsetof(struct device_path, path); + to_utf16(option->title, title); + size = offsetof(struct boot_option, title) + title_len; + + /* partition info */ + devicep = (struct device_path *)(buf + size); + devicep->type = MEDIA_DEVICE_PATH; + devicep->sub_type = MEDIA_HARDDRIVE_DP; + devicep->length = offsetof(struct device_path, drive) + sizeof(struct drive_path); + memcpy(&devicep->drive.part_nr, &part, sizeof(uint32_t)); + memcpy(&devicep->drive.part_start, &pstart, sizeof(uint64_t)); + memcpy(&devicep->drive.part_size, &psize, sizeof(uint64_t)); + id128_to_efi_guid(part_uuid, devicep->drive.signature); + devicep->drive.mbr_type = MBR_TYPE_EFI_PARTITION_TABLE_HEADER; + devicep->drive.signature_type = SIGNATURE_TYPE_GUID; + size += devicep->length; + + /* path to loader */ + devicep = (struct device_path *)(buf + size); + devicep->type = MEDIA_DEVICE_PATH; + devicep->sub_type = MEDIA_FILEPATH_DP; + devicep->length = offsetof(struct device_path, path) + path_len; + to_utf16(devicep->path, path); + tilt_slashes(devicep->path); + size += devicep->length; + + /* end of path */ + devicep = (struct device_path *)(buf + size); + devicep->type = END_DEVICE_PATH_TYPE; + devicep->sub_type = END_ENTIRE_DEVICE_PATH_SUBTYPE; + devicep->length = offsetof(struct device_path, path); + size += devicep->length; + + xsprintf(boot_id, "Boot%04X", id); + return efi_set_variable(EFI_VENDOR_GLOBAL, boot_id, buf, size); +} + +int efi_remove_boot_option(uint16_t id) { + char boot_id[9]; + + if (!is_efi_boot()) + return -EOPNOTSUPP; + + xsprintf(boot_id, "Boot%04X", id); + return efi_set_variable(EFI_VENDOR_GLOBAL, boot_id, NULL, 0); +} + +int efi_get_boot_order(uint16_t **order) { + _cleanup_free_ void *buf = NULL; + size_t l; + int r; + + if (!is_efi_boot()) + return -EOPNOTSUPP; + + r = efi_get_variable(EFI_VENDOR_GLOBAL, "BootOrder", NULL, &buf, &l); + if (r < 0) + return r; + + if (l <= 0) + return -ENOENT; + + if (l % sizeof(uint16_t) > 0 || + l / sizeof(uint16_t) > INT_MAX) + return -EINVAL; + + *order = TAKE_PTR(buf); + return (int) (l / sizeof(uint16_t)); +} + +int efi_set_boot_order(uint16_t *order, size_t n) { + + if (!is_efi_boot()) + return -EOPNOTSUPP; + + return efi_set_variable(EFI_VENDOR_GLOBAL, "BootOrder", order, n * sizeof(uint16_t)); +} + +static int boot_id_hex(const char s[static 4]) { + int id = 0, i; + + for (i = 0; i < 4; i++) + if (s[i] >= '0' && s[i] <= '9') + id |= (s[i] - '0') << (3 - i) * 4; + else if (s[i] >= 'A' && s[i] <= 'F') + id |= (s[i] - 'A' + 10) << (3 - i) * 4; + else + return -EINVAL; + + return id; +} + +static int cmp_uint16(const uint16_t *a, const uint16_t *b) { + return CMP(*a, *b); +} + +int efi_get_boot_options(uint16_t **options) { + _cleanup_closedir_ DIR *dir = NULL; + _cleanup_free_ uint16_t *list = NULL; + struct dirent *de; + size_t alloc = 0; + int count = 0; + + assert(options); + + if (!is_efi_boot()) + return -EOPNOTSUPP; + + dir = opendir("/sys/firmware/efi/efivars/"); + if (!dir) + return -errno; + + FOREACH_DIRENT(de, dir, return -errno) { + int id; + + if (strncmp(de->d_name, "Boot", 4) != 0) + continue; + + if (strlen(de->d_name) != 45) + continue; + + if (strcmp(de->d_name + 8, "-8be4df61-93ca-11d2-aa0d-00e098032b8c") != 0) + continue; + + id = boot_id_hex(de->d_name + 4); + if (id < 0) + continue; + + if (!GREEDY_REALLOC(list, alloc, count + 1)) + return -ENOMEM; + + list[count++] = id; + } + + typesafe_qsort(list, count, cmp_uint16); + + *options = TAKE_PTR(list); + + return count; +} + +static int read_usec(sd_id128_t vendor, const char *name, usec_t *u) { + _cleanup_free_ char *j = NULL; + int r; + uint64_t x = 0; + + assert(name); + assert(u); + + r = efi_get_variable_string(EFI_VENDOR_LOADER, name, &j); + if (r < 0) + return r; + + r = safe_atou64(j, &x); + if (r < 0) + return r; + + *u = x; + return 0; +} + +int efi_loader_get_boot_usec(usec_t *firmware, usec_t *loader) { + uint64_t x, y; + int r; + + assert(firmware); + assert(loader); + + if (!is_efi_boot()) + return -EOPNOTSUPP; + + r = read_usec(EFI_VENDOR_LOADER, "LoaderTimeInitUSec", &x); + if (r < 0) + return r; + + r = read_usec(EFI_VENDOR_LOADER, "LoaderTimeExecUSec", &y); + if (r < 0) + return r; + + if (y == 0 || y < x) + return -EIO; + + if (y > USEC_PER_HOUR) + return -EIO; + + *firmware = x; + *loader = y; + + return 0; +} + +int efi_loader_get_device_part_uuid(sd_id128_t *u) { + _cleanup_free_ char *p = NULL; + int r, parsed[16]; + + if (!is_efi_boot()) + return -EOPNOTSUPP; + + r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderDevicePartUUID", &p); + if (r < 0) + return r; + + if (sscanf(p, "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", + &parsed[0], &parsed[1], &parsed[2], &parsed[3], + &parsed[4], &parsed[5], &parsed[6], &parsed[7], + &parsed[8], &parsed[9], &parsed[10], &parsed[11], + &parsed[12], &parsed[13], &parsed[14], &parsed[15]) != 16) + return -EIO; + + if (u) { + unsigned i; + + for (i = 0; i < ELEMENTSOF(parsed); i++) + u->bytes[i] = parsed[i]; + } + + return 0; +} + +bool efi_loader_entry_name_valid(const char *s) { + if (isempty(s)) + return false; + + if (strlen(s) > FILENAME_MAX) /* Make sure entry names fit in filenames */ + return false; + + return in_charset(s, ALPHANUMERICAL "-"); +} + +int efi_loader_get_entries(char ***ret) { + _cleanup_free_ char16_t *entries = NULL; + _cleanup_strv_free_ char **l = NULL; + size_t size, i, start; + int r; + + assert(ret); + + if (!is_efi_boot()) + return -EOPNOTSUPP; + + r = efi_get_variable(EFI_VENDOR_LOADER, "LoaderEntries", NULL, (void**) &entries, &size); + if (r < 0) + return r; + + /* The variable contains a series of individually NUL terminated UTF-16 strings. */ + + for (i = 0, start = 0;; i++) { + _cleanup_free_ char *decoded = NULL; + bool end; + + /* Is this the end of the variable's data? */ + end = i * sizeof(char16_t) >= size; + + /* Are we in the middle of a string? (i.e. not at the end of the variable, nor at a NUL terminator?) If + * so, let's go to the next entry. */ + if (!end && entries[i] != 0) + continue; + + /* We reached the end of a string, let's decode it into UTF-8 */ + decoded = utf16_to_utf8(entries + start, (i - start) * sizeof(char16_t)); + if (!decoded) + return -ENOMEM; + + if (efi_loader_entry_name_valid(decoded)) { + r = strv_consume(&l, TAKE_PTR(decoded)); + if (r < 0) + return r; + } else + log_debug("Ignoring invalid loader entry '%s'.", decoded); + + /* We reached the end of the variable */ + if (end) + break; + + /* Continue after the NUL byte */ + start = i + 1; + } + + *ret = TAKE_PTR(l); + return 0; +} + +int efi_loader_get_features(uint64_t *ret) { + _cleanup_free_ void *v = NULL; + size_t s; + int r; + + if (!is_efi_boot()) { + *ret = 0; + return 0; + } + + r = efi_get_variable(EFI_VENDOR_LOADER, "LoaderFeatures", NULL, &v, &s); + if (r == -ENOENT) { + _cleanup_free_ char *info = NULL; + + /* The new (v240+) LoaderFeatures variable is not supported, let's see if it's systemd-boot at all */ + r = efi_get_variable_string(EFI_VENDOR_LOADER, "LoaderInfo", &info); + if (r < 0) { + if (r != -ENOENT) + return r; + + /* Variable not set, definitely means not systemd-boot */ + + } else if (first_word(info, "systemd-boot")) { + + /* An older systemd-boot version. Let's hardcode the feature set, since it was pretty + * static in all its versions. */ + + *ret = EFI_LOADER_FEATURE_CONFIG_TIMEOUT | + EFI_LOADER_FEATURE_ENTRY_DEFAULT | + EFI_LOADER_FEATURE_ENTRY_ONESHOT; + + return 0; + } + + /* No features supported */ + *ret = 0; + return 0; + } + if (r < 0) + return r; + + if (s != sizeof(uint64_t)) + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), + "LoaderFeatures EFI variable doesn't have the right size."); + + memcpy(ret, v, sizeof(uint64_t)); + return 0; +} + +#endif + +char *efi_tilt_backslashes(char *s) { + char *p; + + for (p = s; *p; p++) + if (*p == '\\') + *p = '/'; + + return s; +} diff --git a/src/shared/efivars.h b/src/shared/efivars.h new file mode 100644 index 0000000..92670c8 --- /dev/null +++ b/src/shared/efivars.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#if ! ENABLE_EFI +#include <errno.h> +#endif +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +#include "sd-id128.h" + +#include "time-util.h" + +#define EFI_VENDOR_LOADER SD_ID128_MAKE(4a,67,b0,82,0a,4c,41,cf,b6,c7,44,0b,29,bb,8c,4f) +#define EFI_VENDOR_GLOBAL SD_ID128_MAKE(8b,e4,df,61,93,ca,11,d2,aa,0d,00,e0,98,03,2b,8c) +#define EFI_VARIABLE_NON_VOLATILE 0x0000000000000001 +#define EFI_VARIABLE_BOOTSERVICE_ACCESS 0x0000000000000002 +#define EFI_VARIABLE_RUNTIME_ACCESS 0x0000000000000004 + +#define EFI_LOADER_FEATURE_CONFIG_TIMEOUT (UINT64_C(1) << 0) +#define EFI_LOADER_FEATURE_CONFIG_TIMEOUT_ONE_SHOT (UINT64_C(1) << 1) +#define EFI_LOADER_FEATURE_ENTRY_DEFAULT (UINT64_C(1) << 2) +#define EFI_LOADER_FEATURE_ENTRY_ONESHOT (UINT64_C(1) << 3) +#define EFI_LOADER_FEATURE_BOOT_COUNTING (UINT64_C(1) << 4) + +#if ENABLE_EFI + +bool is_efi_boot(void); +bool is_efi_secure_boot(void); +bool is_efi_secure_boot_setup_mode(void); +int efi_reboot_to_firmware_supported(void); +int efi_get_reboot_to_firmware(void); +int efi_set_reboot_to_firmware(bool value); + +int efi_get_variable(sd_id128_t vendor, const char *name, uint32_t *attribute, void **value, size_t *size); +int efi_get_variable_string(sd_id128_t vendor, const char *name, char **p); +int efi_set_variable(sd_id128_t vendor, const char *name, const void *value, size_t size); +int efi_set_variable_string(sd_id128_t vendor, const char *name, const char *p); + +int efi_get_boot_option(uint16_t nr, char **title, sd_id128_t *part_uuid, char **path, bool *active); +int efi_add_boot_option(uint16_t id, const char *title, uint32_t part, uint64_t pstart, uint64_t psize, sd_id128_t part_uuid, const char *path); +int efi_remove_boot_option(uint16_t id); +int efi_get_boot_order(uint16_t **order); +int efi_set_boot_order(uint16_t *order, size_t n); +int efi_get_boot_options(uint16_t **options); + +int efi_loader_get_device_part_uuid(sd_id128_t *u); +int efi_loader_get_boot_usec(usec_t *firmware, usec_t *loader); + +int efi_loader_get_entries(char ***ret); + +bool efi_loader_entry_name_valid(const char *s); + +int efi_loader_get_features(uint64_t *ret); + +#else + +static inline bool is_efi_boot(void) { + return false; +} + +static inline bool is_efi_secure_boot(void) { + return false; +} + +static inline bool is_efi_secure_boot_setup_mode(void) { + return false; +} + +static inline int efi_reboot_to_firmware_supported(void) { + return -EOPNOTSUPP; +} + +static inline int efi_get_reboot_to_firmware(void) { + return -EOPNOTSUPP; +} + +static inline int efi_set_reboot_to_firmware(bool value) { + return -EOPNOTSUPP; +} + +static inline int efi_get_variable(sd_id128_t vendor, const char *name, uint32_t *attribute, void **value, size_t *size) { + return -EOPNOTSUPP; +} + +static inline int efi_get_variable_string(sd_id128_t vendor, const char *name, char **p) { + return -EOPNOTSUPP; +} + +static inline int efi_set_variable(sd_id128_t vendor, const char *name, const void *value, size_t size) { + return -EOPNOTSUPP; +} + +static inline int efi_set_variable_string(sd_id128_t vendor, const char *name, const char *p) { + return -EOPNOTSUPP; +} + +static inline int efi_get_boot_option(uint16_t nr, char **title, sd_id128_t *part_uuid, char **path, bool *active) { + return -EOPNOTSUPP; +} + +static inline int efi_add_boot_option(uint16_t id, const char *title, uint32_t part, uint64_t pstart, uint64_t psize, sd_id128_t part_uuid, const char *path) { + return -EOPNOTSUPP; +} + +static inline int efi_remove_boot_option(uint16_t id) { + return -EOPNOTSUPP; +} + +static inline int efi_get_boot_order(uint16_t **order) { + return -EOPNOTSUPP; +} + +static inline int efi_set_boot_order(uint16_t *order, size_t n) { + return -EOPNOTSUPP; +} + +static inline int efi_get_boot_options(uint16_t **options) { + return -EOPNOTSUPP; +} + +static inline int efi_loader_get_device_part_uuid(sd_id128_t *u) { + return -EOPNOTSUPP; +} + +static inline int efi_loader_get_boot_usec(usec_t *firmware, usec_t *loader) { + return -EOPNOTSUPP; +} + +static inline int efi_loader_get_entries(char ***ret) { + return -EOPNOTSUPP; +} + +static inline int efi_loader_get_features(uint64_t *ret) { + return -EOPNOTSUPP; +} + +#endif + +char *efi_tilt_backslashes(char *s); diff --git a/src/shared/enable-mempool.c b/src/shared/enable-mempool.c new file mode 100644 index 0000000..a571b43 --- /dev/null +++ b/src/shared/enable-mempool.c @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "mempool.h" + +const bool mempool_use_allowed = true; diff --git a/src/shared/env-file-label.c b/src/shared/env-file-label.c new file mode 100644 index 0000000..add68d2 --- /dev/null +++ b/src/shared/env-file-label.c @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <sys/stat.h> + +#include "env-file-label.h" +#include "env-file.h" +#include "selinux-util.h" + +int write_env_file_label(const char *fname, char **l) { + int r; + + r = mac_selinux_create_file_prepare(fname, S_IFREG); + if (r < 0) + return r; + + r = write_env_file(fname, l); + + mac_selinux_create_file_clear(); + + return r; +} diff --git a/src/shared/env-file-label.h b/src/shared/env-file-label.h new file mode 100644 index 0000000..158fc4e --- /dev/null +++ b/src/shared/env-file-label.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +/* These functions are split out of fileio.h (and not for example just flags to the functions they wrap) in order to + * optimize linking: This way, -lselinux is needed only for the callers of these functions that need selinux, but not + * for all */ + +int write_env_file_label(const char *fname, char **l); diff --git a/src/shared/exec-util.c b/src/shared/exec-util.c new file mode 100644 index 0000000..17a278a --- /dev/null +++ b/src/shared/exec-util.c @@ -0,0 +1,353 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <dirent.h> +#include <errno.h> +#include <sys/prctl.h> +#include <sys/types.h> +#include <unistd.h> +#include <stdio.h> + +#include "alloc-util.h" +#include "conf-files.h" +#include "env-file.h" +#include "env-util.h" +#include "exec-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "hashmap.h" +#include "macro.h" +#include "process-util.h" +#include "rlimit-util.h" +#include "serialize.h" +#include "set.h" +#include "signal-util.h" +#include "stat-util.h" +#include "string-util.h" +#include "strv.h" +#include "terminal-util.h" +#include "tmpfile-util.h" +#include "util.h" + +/* Put this test here for a lack of better place */ +assert_cc(EAGAIN == EWOULDBLOCK); + +static int do_spawn(const char *path, char *argv[], int stdout_fd, pid_t *pid) { + + pid_t _pid; + int r; + + if (null_or_empty_path(path)) { + log_debug("%s is empty (a mask).", path); + return 0; + } + + r = safe_fork("(direxec)", FORK_DEATHSIG|FORK_LOG, &_pid); + if (r < 0) + return r; + if (r == 0) { + char *_argv[2]; + + if (stdout_fd >= 0) { + r = rearrange_stdio(STDIN_FILENO, stdout_fd, STDERR_FILENO); + if (r < 0) + _exit(EXIT_FAILURE); + } + + (void) rlimit_nofile_safe(); + + if (!argv) { + _argv[0] = (char*) path; + _argv[1] = NULL; + argv = _argv; + } else + argv[0] = (char*) path; + + execv(path, argv); + log_error_errno(errno, "Failed to execute %s: %m", path); + _exit(EXIT_FAILURE); + } + + *pid = _pid; + return 1; +} + +static int do_execute( + char **directories, + usec_t timeout, + gather_stdout_callback_t const callbacks[_STDOUT_CONSUME_MAX], + void* const callback_args[_STDOUT_CONSUME_MAX], + int output_fd, + char *argv[], + char *envp[]) { + + _cleanup_hashmap_free_free_ Hashmap *pids = NULL; + _cleanup_strv_free_ char **paths = NULL; + char **path, **e; + int r; + + /* We fork this all off from a child process so that we can somewhat cleanly make + * use of SIGALRM to set a time limit. + * + * If callbacks is nonnull, execution is serial. Otherwise, we default to parallel. + */ + + r = conf_files_list_strv(&paths, NULL, NULL, CONF_FILES_EXECUTABLE|CONF_FILES_REGULAR|CONF_FILES_FILTER_MASKED, (const char* const*) directories); + if (r < 0) + return log_error_errno(r, "Failed to enumerate executables: %m"); + + if (!callbacks) { + pids = hashmap_new(NULL); + if (!pids) + return log_oom(); + } + + /* Abort execution of this process after the timout. We simply rely on SIGALRM as + * default action terminating the process, and turn on alarm(). */ + + if (timeout != USEC_INFINITY) + alarm(DIV_ROUND_UP(timeout, USEC_PER_SEC)); + + STRV_FOREACH(e, envp) + if (putenv(*e) != 0) + return log_error_errno(errno, "Failed to set environment variable: %m"); + + STRV_FOREACH(path, paths) { + _cleanup_free_ char *t = NULL; + _cleanup_close_ int fd = -1; + pid_t pid; + + t = strdup(*path); + if (!t) + return log_oom(); + + if (callbacks) { + fd = open_serialization_fd(basename(*path)); + if (fd < 0) + return log_error_errno(fd, "Failed to open serialization file: %m"); + } + + r = do_spawn(t, argv, fd, &pid); + if (r <= 0) + continue; + + if (pids) { + r = hashmap_put(pids, PID_TO_PTR(pid), t); + if (r < 0) + return log_oom(); + t = NULL; + } else { + r = wait_for_terminate_and_check(t, pid, WAIT_LOG); + if (r < 0) + continue; + + if (lseek(fd, 0, SEEK_SET) < 0) + return log_error_errno(errno, "Failed to seek on serialization fd: %m"); + + r = callbacks[STDOUT_GENERATE](fd, callback_args[STDOUT_GENERATE]); + fd = -1; + if (r < 0) + return log_error_errno(r, "Failed to process output from %s: %m", *path); + } + } + + if (callbacks) { + r = callbacks[STDOUT_COLLECT](output_fd, callback_args[STDOUT_COLLECT]); + if (r < 0) + return log_error_errno(r, "Callback two failed: %m"); + } + + while (!hashmap_isempty(pids)) { + _cleanup_free_ char *t = NULL; + pid_t pid; + + pid = PTR_TO_PID(hashmap_first_key(pids)); + assert(pid > 0); + + t = hashmap_remove(pids, PID_TO_PTR(pid)); + assert(t); + + (void) wait_for_terminate_and_check(t, pid, WAIT_LOG); + } + + return 0; +} + +int execute_directories( + const char* const* directories, + usec_t timeout, + gather_stdout_callback_t const callbacks[_STDOUT_CONSUME_MAX], + void* const callback_args[_STDOUT_CONSUME_MAX], + char *argv[], + char *envp[]) { + + char **dirs = (char**) directories; + _cleanup_close_ int fd = -1; + char *name; + int r; + + assert(!strv_isempty(dirs)); + + name = basename(dirs[0]); + assert(!isempty(name)); + + if (callbacks) { + assert(callback_args); + assert(callbacks[STDOUT_GENERATE]); + assert(callbacks[STDOUT_COLLECT]); + assert(callbacks[STDOUT_CONSUME]); + + fd = open_serialization_fd(name); + if (fd < 0) + return log_error_errno(fd, "Failed to open serialization file: %m"); + } + + /* Executes all binaries in the directories serially or in parallel and waits for + * them to finish. Optionally a timeout is applied. If a file with the same name + * exists in more than one directory, the earliest one wins. */ + + r = safe_fork("(sd-executor)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_LOG|FORK_WAIT, NULL); + if (r < 0) + return r; + if (r == 0) { + r = do_execute(dirs, timeout, callbacks, callback_args, fd, argv, envp); + _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS); + } + + if (!callbacks) + return 0; + + if (lseek(fd, 0, SEEK_SET) < 0) + return log_error_errno(errno, "Failed to rewind serialization fd: %m"); + + r = callbacks[STDOUT_CONSUME](fd, callback_args[STDOUT_CONSUME]); + fd = -1; + if (r < 0) + return log_error_errno(r, "Failed to parse returned data: %m"); + return 0; +} + +static int gather_environment_generate(int fd, void *arg) { + char ***env = arg, **x, **y; + _cleanup_fclose_ FILE *f = NULL; + _cleanup_strv_free_ char **new = NULL; + int r; + + /* Read a series of VAR=value assignments from fd, use them to update the list of + * variables in env. Also update the exported environment. + * + * fd is always consumed, even on error. + */ + + assert(env); + + f = fdopen(fd, "r"); + if (!f) { + safe_close(fd); + return -errno; + } + + r = load_env_file_pairs(f, NULL, &new); + if (r < 0) + return r; + + STRV_FOREACH_PAIR(x, y, new) { + char *p; + + if (!env_name_is_valid(*x)) { + log_warning("Invalid variable assignment \"%s=...\", ignoring.", *x); + continue; + } + + p = strjoin(*x, "=", *y); + if (!p) + return -ENOMEM; + + r = strv_env_replace(env, p); + if (r < 0) + return r; + + if (setenv(*x, *y, true) < 0) + return -errno; + } + + return r; +} + +static int gather_environment_collect(int fd, void *arg) { + _cleanup_fclose_ FILE *f = NULL; + char ***env = arg; + int r; + + /* Write out a series of env=cescape(VAR=value) assignments to fd. */ + + assert(env); + + f = fdopen(fd, "w"); + if (!f) { + safe_close(fd); + return -errno; + } + + r = serialize_strv(f, "env", *env); + if (r < 0) + return r; + + r = fflush_and_check(f); + if (r < 0) + return r; + + return 0; +} + +static int gather_environment_consume(int fd, void *arg) { + _cleanup_fclose_ FILE *f = NULL; + char ***env = arg; + int r = 0; + + /* Read a series of env=cescape(VAR=value) assignments from fd into env. */ + + assert(env); + + f = fdopen(fd, "r"); + if (!f) { + safe_close(fd); + return -errno; + } + + for (;;) { + _cleanup_free_ char *line = NULL; + const char *v; + int k; + + k = read_line(f, LONG_LINE_MAX, &line); + if (k < 0) + return k; + if (k == 0) + break; + + v = startswith(line, "env="); + if (!v) { + log_debug("Serialization line \"%s\" unexpectedly didn't start with \"env=\".", line); + if (r == 0) + r = -EINVAL; + + continue; + } + + k = deserialize_environment(v, env); + if (k < 0) { + log_debug_errno(k, "Invalid serialization line \"%s\": %m", line); + + if (r == 0) + r = k; + } + } + + return r; +} + +const gather_stdout_callback_t gather_environment[] = { + gather_environment_generate, + gather_environment_collect, + gather_environment_consume, +}; diff --git a/src/shared/exec-util.h b/src/shared/exec-util.h new file mode 100644 index 0000000..6ac3c90 --- /dev/null +++ b/src/shared/exec-util.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +#include "time-util.h" + +typedef int (*gather_stdout_callback_t) (int fd, void *arg); + +enum { + STDOUT_GENERATE, /* from generators to helper process */ + STDOUT_COLLECT, /* from helper process to main process */ + STDOUT_CONSUME, /* process data in main process */ + _STDOUT_CONSUME_MAX, +}; + +int execute_directories( + const char* const* directories, + usec_t timeout, + gather_stdout_callback_t const callbacks[_STDOUT_CONSUME_MAX], + void* const callback_args[_STDOUT_CONSUME_MAX], + char *argv[], + char *envp[]); + +extern const gather_stdout_callback_t gather_environment[_STDOUT_CONSUME_MAX]; diff --git a/src/shared/exit-status.c b/src/shared/exit-status.c new file mode 100644 index 0000000..26b3060 --- /dev/null +++ b/src/shared/exit-status.c @@ -0,0 +1,284 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <signal.h> +#include <stdlib.h> +#include <sysexits.h> + +#include "exit-status.h" +#include "macro.h" +#include "set.h" + +const char* exit_status_to_string(int status, ExitStatusLevel level) { + + /* Exit status ranges: + * + * 0…1 │ ISO C, EXIT_SUCCESS + EXIT_FAILURE + * 2…7 │ LSB exit codes for init scripts + * 8…63 │ (Currently unmapped) + * 64…78 │ BSD defined exit codes + * 79…199 │ (Currently unmapped) + * 200…241 │ systemd's private error codes (might be extended to 254 in future development) + * 242…254 │ (Currently unmapped, but see above) + * + * 255 │ EXIT_EXCEPTION (We use this to propagate exit-by-signal events. It's frequently used by others apps (like bash) + * │ to indicate exit reason that cannot really be expressed in a single exit status value — such as a propagated + * │ signal or such, and we follow that logic here.) + */ + + switch (status) { /* We always cover the ISO C ones */ + + case EXIT_SUCCESS: + return "SUCCESS"; + + case EXIT_FAILURE: + return "FAILURE"; + } + + if (IN_SET(level, EXIT_STATUS_SYSTEMD, EXIT_STATUS_LSB, EXIT_STATUS_FULL)) { + switch (status) { /* Optionally we cover our own ones */ + + case EXIT_CHDIR: + return "CHDIR"; + + case EXIT_NICE: + return "NICE"; + + case EXIT_FDS: + return "FDS"; + + case EXIT_EXEC: + return "EXEC"; + + case EXIT_MEMORY: + return "MEMORY"; + + case EXIT_LIMITS: + return "LIMITS"; + + case EXIT_OOM_ADJUST: + return "OOM_ADJUST"; + + case EXIT_SIGNAL_MASK: + return "SIGNAL_MASK"; + + case EXIT_STDIN: + return "STDIN"; + + case EXIT_STDOUT: + return "STDOUT"; + + case EXIT_CHROOT: + return "CHROOT"; + + case EXIT_IOPRIO: + return "IOPRIO"; + + case EXIT_TIMERSLACK: + return "TIMERSLACK"; + + case EXIT_SECUREBITS: + return "SECUREBITS"; + + case EXIT_SETSCHEDULER: + return "SETSCHEDULER"; + + case EXIT_CPUAFFINITY: + return "CPUAFFINITY"; + + case EXIT_GROUP: + return "GROUP"; + + case EXIT_USER: + return "USER"; + + case EXIT_CAPABILITIES: + return "CAPABILITIES"; + + case EXIT_CGROUP: + return "CGROUP"; + + case EXIT_SETSID: + return "SETSID"; + + case EXIT_CONFIRM: + return "CONFIRM"; + + case EXIT_STDERR: + return "STDERR"; + + case EXIT_PAM: + return "PAM"; + + case EXIT_NETWORK: + return "NETWORK"; + + case EXIT_NAMESPACE: + return "NAMESPACE"; + + case EXIT_NO_NEW_PRIVILEGES: + return "NO_NEW_PRIVILEGES"; + + case EXIT_SECCOMP: + return "SECCOMP"; + + case EXIT_SELINUX_CONTEXT: + return "SELINUX_CONTEXT"; + + case EXIT_PERSONALITY: + return "PERSONALITY"; + + case EXIT_APPARMOR_PROFILE: + return "APPARMOR"; + + case EXIT_ADDRESS_FAMILIES: + return "ADDRESS_FAMILIES"; + + case EXIT_RUNTIME_DIRECTORY: + return "RUNTIME_DIRECTORY"; + + case EXIT_CHOWN: + return "CHOWN"; + + case EXIT_SMACK_PROCESS_LABEL: + return "SMACK_PROCESS_LABEL"; + + case EXIT_KEYRING: + return "KEYRING"; + + case EXIT_STATE_DIRECTORY: + return "STATE_DIRECTORY"; + + case EXIT_CACHE_DIRECTORY: + return "CACHE_DIRECTORY"; + + case EXIT_LOGS_DIRECTORY: + return "LOGS_DIRECTORY"; + + case EXIT_CONFIGURATION_DIRECTORY: + return "CONFIGURATION_DIRECTORY"; + + case EXIT_EXCEPTION: + return "EXCEPTION"; + } + } + + if (IN_SET(level, EXIT_STATUS_LSB, EXIT_STATUS_FULL)) { + switch (status) { /* Optionally we support LSB ones */ + + case EXIT_INVALIDARGUMENT: + return "INVALIDARGUMENT"; + + case EXIT_NOTIMPLEMENTED: + return "NOTIMPLEMENTED"; + + case EXIT_NOPERMISSION: + return "NOPERMISSION"; + + case EXIT_NOTINSTALLED: + return "NOTINSTALLED"; + + case EXIT_NOTCONFIGURED: + return "NOTCONFIGURED"; + + case EXIT_NOTRUNNING: + return "NOTRUNNING"; + } + } + + if (level == EXIT_STATUS_FULL) { + switch (status) { /* Optionally, we support BSD exit statusses */ + + case EX_USAGE: + return "USAGE"; + + case EX_DATAERR: + return "DATAERR"; + + case EX_NOINPUT: + return "NOINPUT"; + + case EX_NOUSER: + return "NOUSER"; + + case EX_NOHOST: + return "NOHOST"; + + case EX_UNAVAILABLE: + return "UNAVAILABLE"; + + case EX_SOFTWARE: + return "SOFTWARE"; + + case EX_OSERR: + return "OSERR"; + + case EX_OSFILE: + return "OSFILE"; + + case EX_CANTCREAT: + return "CANTCREAT"; + + case EX_IOERR: + return "IOERR"; + + case EX_TEMPFAIL: + return "TEMPFAIL"; + + case EX_PROTOCOL: + return "PROTOCOL"; + + case EX_NOPERM: + return "NOPERM"; + + case EX_CONFIG: + return "CONFIG"; + } + } + + return NULL; +} + +bool is_clean_exit(int code, int status, ExitClean clean, ExitStatusSet *success_status) { + + if (code == CLD_EXITED) + return status == 0 || + (success_status && + set_contains(success_status->status, INT_TO_PTR(status))); + + /* If a daemon does not implement handlers for some of the signals that's not considered an unclean shutdown */ + if (code == CLD_KILLED) + return + (clean == EXIT_CLEAN_DAEMON && IN_SET(status, SIGHUP, SIGINT, SIGTERM, SIGPIPE)) || + (success_status && + set_contains(success_status->signal, INT_TO_PTR(status))); + + return false; +} + +void exit_status_set_free(ExitStatusSet *x) { + assert(x); + + x->status = set_free(x->status); + x->signal = set_free(x->signal); +} + +bool exit_status_set_is_empty(ExitStatusSet *x) { + if (!x) + return true; + + return set_isempty(x->status) && set_isempty(x->signal); +} + +bool exit_status_set_test(ExitStatusSet *x, int code, int status) { + + if (exit_status_set_is_empty(x)) + return false; + + if (code == CLD_EXITED && set_contains(x->status, INT_TO_PTR(status))) + return true; + + if (IN_SET(code, CLD_KILLED, CLD_DUMPED) && set_contains(x->signal, INT_TO_PTR(status))) + return true; + + return false; +} diff --git a/src/shared/exit-status.h b/src/shared/exit-status.h new file mode 100644 index 0000000..510eb31 --- /dev/null +++ b/src/shared/exit-status.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +#include "hashmap.h" +#include "macro.h" +#include "set.h" + +/* This defines pretty names for the LSB 'start' verb exit codes. Note that they shouldn't be confused with the LSB + * 'status' verb exit codes which are defined very differently. For details see: + * + * https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/iniscrptact.html + */ + +enum { + /* EXIT_SUCCESS defined by libc */ + /* EXIT_FAILURE defined by libc */ + EXIT_INVALIDARGUMENT = 2, + EXIT_NOTIMPLEMENTED = 3, + EXIT_NOPERMISSION = 4, + EXIT_NOTINSTALLED = 5, + EXIT_NOTCONFIGURED = 6, + EXIT_NOTRUNNING = 7, + + /* BSD's sysexits.h defines a couple EX_xyz exit codes in the range 64 … 78 */ + + /* The LSB suggests that error codes >= 200 are "reserved". We use them here under the assumption that they + * hence are unused by init scripts. */ + EXIT_CHDIR = 200, + EXIT_NICE, + EXIT_FDS, + EXIT_EXEC, + EXIT_MEMORY, + EXIT_LIMITS, + EXIT_OOM_ADJUST, + EXIT_SIGNAL_MASK, + EXIT_STDIN, + EXIT_STDOUT, + EXIT_CHROOT, /* 210 */ + EXIT_IOPRIO, + EXIT_TIMERSLACK, + EXIT_SECUREBITS, + EXIT_SETSCHEDULER, + EXIT_CPUAFFINITY, + EXIT_GROUP, + EXIT_USER, + EXIT_CAPABILITIES, + EXIT_CGROUP, + EXIT_SETSID, /* 220 */ + EXIT_CONFIRM, + EXIT_STDERR, + _EXIT_RESERVED, /* used to be tcpwrap, don't reuse! */ + EXIT_PAM, + EXIT_NETWORK, + EXIT_NAMESPACE, + EXIT_NO_NEW_PRIVILEGES, + EXIT_SECCOMP, + EXIT_SELINUX_CONTEXT, + EXIT_PERSONALITY, /* 230 */ + EXIT_APPARMOR_PROFILE, + EXIT_ADDRESS_FAMILIES, + EXIT_RUNTIME_DIRECTORY, + _EXIT_RESERVED2, /* used to be used by kdbus, don't reuse */ + EXIT_CHOWN, + EXIT_SMACK_PROCESS_LABEL, + EXIT_KEYRING, + EXIT_STATE_DIRECTORY, + EXIT_CACHE_DIRECTORY, + EXIT_LOGS_DIRECTORY, /* 240 */ + EXIT_CONFIGURATION_DIRECTORY, + + EXIT_EXCEPTION = 255, /* Whenever we want to propagate an abnormal/signal exit, in line with bash */ +}; + +typedef enum ExitStatusLevel { + EXIT_STATUS_MINIMAL, /* only cover libc EXIT_STATUS/EXIT_FAILURE */ + EXIT_STATUS_SYSTEMD, /* cover libc and systemd's own exit codes */ + EXIT_STATUS_LSB, /* cover libc, systemd's own and LSB exit codes */ + EXIT_STATUS_FULL, /* cover libc, systemd's own, LSB and BSD (EX_xyz) exit codes */ +} ExitStatusLevel; + +typedef struct ExitStatusSet { + Set *status; + Set *signal; +} ExitStatusSet; + +const char* exit_status_to_string(int status, ExitStatusLevel level) _const_; + +typedef enum ExitClean { + EXIT_CLEAN_DAEMON, + EXIT_CLEAN_COMMAND, +} ExitClean; + +bool is_clean_exit(int code, int status, ExitClean clean, ExitStatusSet *success_status); + +void exit_status_set_free(ExitStatusSet *x); +bool exit_status_set_is_empty(ExitStatusSet *x); +bool exit_status_set_test(ExitStatusSet *x, int code, int status); diff --git a/src/shared/fdset.c b/src/shared/fdset.c new file mode 100644 index 0000000..5d27732 --- /dev/null +++ b/src/shared/fdset.c @@ -0,0 +1,255 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <alloca.h> +#include <errno.h> +#include <fcntl.h> +#include <stddef.h> + +#include "sd-daemon.h" + +#include "alloc-util.h" +#include "dirent-util.h" +#include "fd-util.h" +#include "fdset.h" +#include "log.h" +#include "macro.h" +#include "parse-util.h" +#include "path-util.h" +#include "set.h" + +#define MAKE_SET(s) ((Set*) s) +#define MAKE_FDSET(s) ((FDSet*) s) + +FDSet *fdset_new(void) { + return MAKE_FDSET(set_new(NULL)); +} + +int fdset_new_array(FDSet **ret, const int *fds, size_t n_fds) { + size_t i; + FDSet *s; + int r; + + assert(ret); + + s = fdset_new(); + if (!s) + return -ENOMEM; + + for (i = 0; i < n_fds; i++) { + + r = fdset_put(s, fds[i]); + if (r < 0) { + set_free(MAKE_SET(s)); + return r; + } + } + + *ret = s; + return 0; +} + +FDSet* fdset_free(FDSet *s) { + void *p; + + while ((p = set_steal_first(MAKE_SET(s)))) { + /* Valgrind's fd might have ended up in this set here, + * due to fdset_new_fill(). We'll ignore all failures + * here, so that the EBADFD that valgrind will return + * us on close() doesn't influence us */ + + /* When reloading duplicates of the private bus + * connection fds and suchlike are closed here, which + * has no effect at all, since they are only + * duplicates. So don't be surprised about these log + * messages. */ + + log_debug("Closing left-over fd %i", PTR_TO_FD(p)); + close_nointr(PTR_TO_FD(p)); + } + + set_free(MAKE_SET(s)); + return NULL; +} + +int fdset_put(FDSet *s, int fd) { + assert(s); + assert(fd >= 0); + + return set_put(MAKE_SET(s), FD_TO_PTR(fd)); +} + +int fdset_put_dup(FDSet *s, int fd) { + int copy, r; + + assert(s); + assert(fd >= 0); + + copy = fcntl(fd, F_DUPFD_CLOEXEC, 3); + if (copy < 0) + return -errno; + + r = fdset_put(s, copy); + if (r < 0) { + safe_close(copy); + return r; + } + + return copy; +} + +bool fdset_contains(FDSet *s, int fd) { + assert(s); + assert(fd >= 0); + + return !!set_get(MAKE_SET(s), FD_TO_PTR(fd)); +} + +int fdset_remove(FDSet *s, int fd) { + assert(s); + assert(fd >= 0); + + return set_remove(MAKE_SET(s), FD_TO_PTR(fd)) ? fd : -ENOENT; +} + +int fdset_new_fill(FDSet **_s) { + _cleanup_closedir_ DIR *d = NULL; + struct dirent *de; + int r = 0; + FDSet *s; + + assert(_s); + + /* Creates an fdset and fills in all currently open file + * descriptors. */ + + d = opendir("/proc/self/fd"); + if (!d) + return -errno; + + s = fdset_new(); + if (!s) { + r = -ENOMEM; + goto finish; + } + + FOREACH_DIRENT(de, d, return -errno) { + int fd = -1; + + r = safe_atoi(de->d_name, &fd); + if (r < 0) + goto finish; + + if (fd < 3) + continue; + + if (fd == dirfd(d)) + continue; + + r = fdset_put(s, fd); + if (r < 0) + goto finish; + } + + r = 0; + *_s = TAKE_PTR(s); + +finish: + /* We won't close the fds here! */ + if (s) + set_free(MAKE_SET(s)); + + return r; +} + +int fdset_cloexec(FDSet *fds, bool b) { + Iterator i; + void *p; + int r; + + assert(fds); + + SET_FOREACH(p, MAKE_SET(fds), i) { + r = fd_cloexec(PTR_TO_FD(p), b); + if (r < 0) + return r; + } + + return 0; +} + +int fdset_new_listen_fds(FDSet **_s, bool unset) { + int n, fd, r; + FDSet *s; + + assert(_s); + + /* Creates an fdset and fills in all passed file descriptors */ + + s = fdset_new(); + if (!s) { + r = -ENOMEM; + goto fail; + } + + n = sd_listen_fds(unset); + for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd ++) { + r = fdset_put(s, fd); + if (r < 0) + goto fail; + } + + *_s = s; + return 0; + +fail: + if (s) + set_free(MAKE_SET(s)); + + return r; +} + +int fdset_close_others(FDSet *fds) { + void *e; + Iterator i; + int *a = NULL; + size_t j = 0, m; + + m = fdset_size(fds); + + if (m > 0) { + a = newa(int, m); + SET_FOREACH(e, MAKE_SET(fds), i) + a[j++] = PTR_TO_FD(e); + } + + assert(j == m); + + return close_all_fds(a, j); +} + +unsigned fdset_size(FDSet *fds) { + return set_size(MAKE_SET(fds)); +} + +bool fdset_isempty(FDSet *fds) { + return set_isempty(MAKE_SET(fds)); +} + +int fdset_iterate(FDSet *s, Iterator *i) { + void *p; + + if (!set_iterate(MAKE_SET(s), i, &p)) + return -ENOENT; + + return PTR_TO_FD(p); +} + +int fdset_steal_first(FDSet *fds) { + void *p; + + p = set_steal_first(MAKE_SET(fds)); + if (!p) + return -ENOENT; + + return PTR_TO_FD(p); +} diff --git a/src/shared/fdset.h b/src/shared/fdset.h new file mode 100644 index 0000000..d31062b --- /dev/null +++ b/src/shared/fdset.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +#include "hashmap.h" +#include "macro.h" +#include "set.h" + +typedef struct FDSet FDSet; + +FDSet* fdset_new(void); +FDSet* fdset_free(FDSet *s); + +int fdset_put(FDSet *s, int fd); +int fdset_put_dup(FDSet *s, int fd); + +bool fdset_contains(FDSet *s, int fd); +int fdset_remove(FDSet *s, int fd); + +int fdset_new_array(FDSet **ret, const int *fds, size_t n_fds); +int fdset_new_fill(FDSet **ret); +int fdset_new_listen_fds(FDSet **ret, bool unset); + +int fdset_cloexec(FDSet *fds, bool b); + +int fdset_close_others(FDSet *fds); + +unsigned fdset_size(FDSet *fds); +bool fdset_isempty(FDSet *fds); + +int fdset_iterate(FDSet *s, Iterator *i); + +int fdset_steal_first(FDSet *fds); + +#define FDSET_FOREACH(fd, fds, i) \ + for ((i) = ITERATOR_FIRST, (fd) = fdset_iterate((fds), &(i)); (fd) >= 0; (fd) = fdset_iterate((fds), &(i))) + +DEFINE_TRIVIAL_CLEANUP_FUNC(FDSet*, fdset_free); +#define _cleanup_fdset_free_ _cleanup_(fdset_freep) diff --git a/src/shared/fileio-label.c b/src/shared/fileio-label.c new file mode 100644 index 0000000..49ab297 --- /dev/null +++ b/src/shared/fileio-label.c @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <sys/stat.h> + +#include "fileio-label.h" +#include "fileio.h" +#include "selinux-util.h" + +int write_string_file_atomic_label_ts(const char *fn, const char *line, struct timespec *ts) { + int r; + + r = mac_selinux_create_file_prepare(fn, S_IFREG); + if (r < 0) + return r; + + r = write_string_file_ts(fn, line, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC, ts); + + mac_selinux_create_file_clear(); + + return r; +} + +int create_shutdown_run_nologin_or_warn(void) { + int r; + + /* This is used twice: once in systemd-user-sessions.service, in order to block logins when we actually go + * down, and once in systemd-logind.service when shutdowns are scheduled, and logins are to be turned off a bit + * in advance. We use the same wording of the message in both cases. */ + + r = write_string_file_atomic_label("/run/nologin", + "System is going down. Unprivileged users are not permitted to log in anymore. " + "For technical details, see pam_nologin(8)."); + if (r < 0) + return log_error_errno(r, "Failed to create /run/nologin: %m"); + + return 0; +} diff --git a/src/shared/fileio-label.h b/src/shared/fileio-label.h new file mode 100644 index 0000000..8f88955 --- /dev/null +++ b/src/shared/fileio-label.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdio.h> + +/* These functions are split out of fileio.h (and not for example just flags to the functions they wrap) in order to + * optimize linking: This way, -lselinux is needed only for the callers of these functions that need selinux, but not + * for all */ + +int write_string_file_atomic_label_ts(const char *fn, const char *line, struct timespec *ts); +static inline int write_string_file_atomic_label(const char *fn, const char *line) { + return write_string_file_atomic_label_ts(fn, line, NULL); +} + +int create_shutdown_run_nologin_or_warn(void); diff --git a/src/shared/firewall-util.c b/src/shared/firewall-util.c new file mode 100644 index 0000000..cba52fb --- /dev/null +++ b/src/shared/firewall-util.c @@ -0,0 +1,350 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +/* Temporary work-around for broken glibc vs. linux kernel header definitions + * This is already fixed upstream, remove this when distributions have updated. + */ +#define _NET_IF_H 1 + +#include <alloca.h> +#include <arpa/inet.h> +#include <endian.h> +#include <errno.h> +#include <stddef.h> +#include <string.h> +#include <sys/socket.h> +#include <net/if.h> +#ifndef IFNAMSIZ +#define IFNAMSIZ 16 +#endif +#include <linux/if.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/nf_nat.h> +#include <linux/netfilter/xt_addrtype.h> +#include <libiptc/libiptc.h> + +#include "alloc-util.h" +#include "firewall-util.h" +#include "in-addr-util.h" +#include "macro.h" +#include "socket-util.h" + +DEFINE_TRIVIAL_CLEANUP_FUNC(struct xtc_handle*, iptc_free); + +static int entry_fill_basics( + struct ipt_entry *entry, + int protocol, + const char *in_interface, + const union in_addr_union *source, + unsigned source_prefixlen, + const char *out_interface, + const union in_addr_union *destination, + unsigned destination_prefixlen) { + + assert(entry); + + if (out_interface && !ifname_valid(out_interface)) + return -EINVAL; + if (in_interface && !ifname_valid(in_interface)) + return -EINVAL; + + entry->ip.proto = protocol; + + if (in_interface) { + size_t l; + + l = strlen(in_interface); + assert(l < sizeof entry->ip.iniface); + assert(l < sizeof entry->ip.iniface_mask); + + strcpy(entry->ip.iniface, in_interface); + memset(entry->ip.iniface_mask, 0xFF, l + 1); + } + if (source) { + entry->ip.src = source->in; + in4_addr_prefixlen_to_netmask(&entry->ip.smsk, source_prefixlen); + } + + if (out_interface) { + size_t l = strlen(out_interface); + assert(l < sizeof entry->ip.outiface); + assert(l < sizeof entry->ip.outiface_mask); + + strcpy(entry->ip.outiface, out_interface); + memset(entry->ip.outiface_mask, 0xFF, l + 1); + } + if (destination) { + entry->ip.dst = destination->in; + in4_addr_prefixlen_to_netmask(&entry->ip.dmsk, destination_prefixlen); + } + + return 0; +} + +int fw_add_masquerade( + bool add, + int af, + int protocol, + const union in_addr_union *source, + unsigned source_prefixlen, + const char *out_interface, + const union in_addr_union *destination, + unsigned destination_prefixlen) { + + _cleanup_(iptc_freep) struct xtc_handle *h = NULL; + struct ipt_entry *entry, *mask; + struct ipt_entry_target *t; + size_t sz; + struct nf_nat_ipv4_multi_range_compat *mr; + int r; + + if (af != AF_INET) + return -EOPNOTSUPP; + + if (!IN_SET(protocol, 0, IPPROTO_TCP, IPPROTO_UDP)) + return -EOPNOTSUPP; + + h = iptc_init("nat"); + if (!h) + return -errno; + + sz = XT_ALIGN(sizeof(struct ipt_entry)) + + XT_ALIGN(sizeof(struct ipt_entry_target)) + + XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat)); + + /* Put together the entry we want to add or remove */ + entry = alloca0(sz); + entry->next_offset = sz; + entry->target_offset = XT_ALIGN(sizeof(struct ipt_entry)); + r = entry_fill_basics(entry, protocol, NULL, source, source_prefixlen, out_interface, destination, destination_prefixlen); + if (r < 0) + return r; + + /* Fill in target part */ + t = ipt_get_target(entry); + t->u.target_size = + XT_ALIGN(sizeof(struct ipt_entry_target)) + + XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat)); + strncpy(t->u.user.name, "MASQUERADE", sizeof(t->u.user.name)); + mr = (struct nf_nat_ipv4_multi_range_compat*) t->data; + mr->rangesize = 1; + + /* Create a search mask entry */ + mask = alloca(sz); + memset(mask, 0xFF, sz); + + if (add) { + if (iptc_check_entry("POSTROUTING", entry, (unsigned char*) mask, h)) + return 0; + if (errno != ENOENT) /* if other error than not existing yet, fail */ + return -errno; + + if (!iptc_insert_entry("POSTROUTING", entry, 0, h)) + return -errno; + } else { + if (!iptc_delete_entry("POSTROUTING", entry, (unsigned char*) mask, h)) { + if (errno == ENOENT) /* if it's already gone, all is good! */ + return 0; + + return -errno; + } + } + + if (!iptc_commit(h)) + return -errno; + + return 0; +} + +int fw_add_local_dnat( + bool add, + int af, + int protocol, + const char *in_interface, + const union in_addr_union *source, + unsigned source_prefixlen, + const union in_addr_union *destination, + unsigned destination_prefixlen, + uint16_t local_port, + const union in_addr_union *remote, + uint16_t remote_port, + const union in_addr_union *previous_remote) { + + _cleanup_(iptc_freep) struct xtc_handle *h = NULL; + struct ipt_entry *entry, *mask; + struct ipt_entry_target *t; + struct ipt_entry_match *m; + struct xt_addrtype_info_v1 *at; + struct nf_nat_ipv4_multi_range_compat *mr; + size_t sz, msz; + int r; + + assert(add || !previous_remote); + + if (af != AF_INET) + return -EOPNOTSUPP; + + if (!IN_SET(protocol, IPPROTO_TCP, IPPROTO_UDP)) + return -EOPNOTSUPP; + + if (local_port <= 0) + return -EINVAL; + + if (remote_port <= 0) + return -EINVAL; + + h = iptc_init("nat"); + if (!h) + return -errno; + + sz = XT_ALIGN(sizeof(struct ipt_entry)) + + XT_ALIGN(sizeof(struct ipt_entry_match)) + + XT_ALIGN(sizeof(struct xt_addrtype_info_v1)) + + XT_ALIGN(sizeof(struct ipt_entry_target)) + + XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat)); + + if (protocol == IPPROTO_TCP) + msz = XT_ALIGN(sizeof(struct ipt_entry_match)) + + XT_ALIGN(sizeof(struct xt_tcp)); + else + msz = XT_ALIGN(sizeof(struct ipt_entry_match)) + + XT_ALIGN(sizeof(struct xt_udp)); + + sz += msz; + + /* Fill in basic part */ + entry = alloca0(sz); + entry->next_offset = sz; + entry->target_offset = + XT_ALIGN(sizeof(struct ipt_entry)) + + XT_ALIGN(sizeof(struct ipt_entry_match)) + + XT_ALIGN(sizeof(struct xt_addrtype_info_v1)) + + msz; + r = entry_fill_basics(entry, protocol, in_interface, source, source_prefixlen, NULL, destination, destination_prefixlen); + if (r < 0) + return r; + + /* Fill in first match */ + m = (struct ipt_entry_match*) ((uint8_t*) entry + XT_ALIGN(sizeof(struct ipt_entry))); + m->u.match_size = msz; + if (protocol == IPPROTO_TCP) { + struct xt_tcp *tcp; + + strncpy(m->u.user.name, "tcp", sizeof(m->u.user.name)); + tcp = (struct xt_tcp*) m->data; + tcp->dpts[0] = tcp->dpts[1] = local_port; + tcp->spts[0] = 0; + tcp->spts[1] = 0xFFFF; + + } else { + struct xt_udp *udp; + + strncpy(m->u.user.name, "udp", sizeof(m->u.user.name)); + udp = (struct xt_udp*) m->data; + udp->dpts[0] = udp->dpts[1] = local_port; + udp->spts[0] = 0; + udp->spts[1] = 0xFFFF; + } + + /* Fill in second match */ + m = (struct ipt_entry_match*) ((uint8_t*) entry + XT_ALIGN(sizeof(struct ipt_entry)) + msz); + m->u.match_size = + XT_ALIGN(sizeof(struct ipt_entry_match)) + + XT_ALIGN(sizeof(struct xt_addrtype_info_v1)); + strncpy(m->u.user.name, "addrtype", sizeof(m->u.user.name)); + m->u.user.revision = 1; + at = (struct xt_addrtype_info_v1*) m->data; + at->dest = XT_ADDRTYPE_LOCAL; + + /* Fill in target part */ + t = ipt_get_target(entry); + t->u.target_size = + XT_ALIGN(sizeof(struct ipt_entry_target)) + + XT_ALIGN(sizeof(struct nf_nat_ipv4_multi_range_compat)); + strncpy(t->u.user.name, "DNAT", sizeof(t->u.user.name)); + mr = (struct nf_nat_ipv4_multi_range_compat*) t->data; + mr->rangesize = 1; + mr->range[0].flags = NF_NAT_RANGE_PROTO_SPECIFIED|NF_NAT_RANGE_MAP_IPS; + mr->range[0].min_ip = mr->range[0].max_ip = remote->in.s_addr; + if (protocol == IPPROTO_TCP) + mr->range[0].min.tcp.port = mr->range[0].max.tcp.port = htons(remote_port); + else + mr->range[0].min.udp.port = mr->range[0].max.udp.port = htons(remote_port); + + mask = alloca0(sz); + memset(mask, 0xFF, sz); + + if (add) { + /* Add the PREROUTING rule, if it is missing so far */ + if (!iptc_check_entry("PREROUTING", entry, (unsigned char*) mask, h)) { + if (errno != ENOENT) + return -EINVAL; + + if (!iptc_insert_entry("PREROUTING", entry, 0, h)) + return -errno; + } + + /* If a previous remote is set, remove its entry */ + if (previous_remote && previous_remote->in.s_addr != remote->in.s_addr) { + mr->range[0].min_ip = mr->range[0].max_ip = previous_remote->in.s_addr; + + if (!iptc_delete_entry("PREROUTING", entry, (unsigned char*) mask, h)) { + if (errno != ENOENT) + return -errno; + } + + mr->range[0].min_ip = mr->range[0].max_ip = remote->in.s_addr; + } + + /* Add the OUTPUT rule, if it is missing so far */ + if (!in_interface) { + + /* Don't apply onto loopback addresses */ + if (!destination) { + entry->ip.dst.s_addr = htobe32(0x7F000000); + entry->ip.dmsk.s_addr = htobe32(0xFF000000); + entry->ip.invflags = IPT_INV_DSTIP; + } + + if (!iptc_check_entry("OUTPUT", entry, (unsigned char*) mask, h)) { + if (errno != ENOENT) + return -errno; + + if (!iptc_insert_entry("OUTPUT", entry, 0, h)) + return -errno; + } + + /* If a previous remote is set, remove its entry */ + if (previous_remote && previous_remote->in.s_addr != remote->in.s_addr) { + mr->range[0].min_ip = mr->range[0].max_ip = previous_remote->in.s_addr; + + if (!iptc_delete_entry("OUTPUT", entry, (unsigned char*) mask, h)) { + if (errno != ENOENT) + return -errno; + } + } + } + } else { + if (!iptc_delete_entry("PREROUTING", entry, (unsigned char*) mask, h)) { + if (errno != ENOENT) + return -errno; + } + + if (!in_interface) { + if (!destination) { + entry->ip.dst.s_addr = htobe32(0x7F000000); + entry->ip.dmsk.s_addr = htobe32(0xFF000000); + entry->ip.invflags = IPT_INV_DSTIP; + } + + if (!iptc_delete_entry("OUTPUT", entry, (unsigned char*) mask, h)) { + if (errno != ENOENT) + return -errno; + } + } + } + + if (!iptc_commit(h)) + return -errno; + + return 0; +} diff --git a/src/shared/firewall-util.h b/src/shared/firewall-util.h new file mode 100644 index 0000000..4fc71da --- /dev/null +++ b/src/shared/firewall-util.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <stdint.h> + +#include "in-addr-util.h" + +#if HAVE_LIBIPTC + +int fw_add_masquerade( + bool add, + int af, + int protocol, + const union in_addr_union *source, + unsigned source_prefixlen, + const char *out_interface, + const union in_addr_union *destination, + unsigned destination_prefixlen); + +int fw_add_local_dnat( + bool add, + int af, + int protocol, + const char *in_interface, + const union in_addr_union *source, + unsigned source_prefixlen, + const union in_addr_union *destination, + unsigned destination_prefixlen, + uint16_t local_port, + const union in_addr_union *remote, + uint16_t remote_port, + const union in_addr_union *previous_remote); + +#else + +static inline int fw_add_masquerade( + bool add, + int af, + int protocol, + const union in_addr_union *source, + unsigned source_prefixlen, + const char *out_interface, + const union in_addr_union *destination, + unsigned destination_prefixlen) { + return -EOPNOTSUPP; +} + +static inline int fw_add_local_dnat( + bool add, + int af, + int protocol, + const char *in_interface, + const union in_addr_union *source, + unsigned source_prefixlen, + const union in_addr_union *destination, + unsigned destination_prefixlen, + uint16_t local_port, + const union in_addr_union *remote, + uint16_t remote_port, + const union in_addr_union *previous_remote) { + return -EOPNOTSUPP; +} + +#endif diff --git a/src/shared/format-table.c b/src/shared/format-table.c new file mode 100644 index 0000000..7d52980 --- /dev/null +++ b/src/shared/format-table.c @@ -0,0 +1,1625 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <ctype.h> +#include <stdio_ext.h> + +#include "alloc-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "format-table.h" +#include "gunicode.h" +#include "pager.h" +#include "parse-util.h" +#include "pretty-print.h" +#include "string-util.h" +#include "terminal-util.h" +#include "time-util.h" +#include "utf8.h" +#include "util.h" + +#define DEFAULT_WEIGHT 100 + +/* + A few notes on implementation details: + + - TableCell is a 'fake' structure, it's just used as data type to pass references to specific cell positions in the + table. It can be easily converted to an index number and back. + + - TableData is where the actual data is stored: it encapsulates the data and formatting for a specific cell. It's + 'pseudo-immutable' and ref-counted. When a cell's data's formatting is to be changed, we duplicate the object if the + ref-counting is larger than 1. Note that TableData and its ref-counting is mostly not visible to the outside. The + outside only sees Table and TableCell. + + - The Table object stores a simple one-dimensional array of references to TableData objects, one row after the + previous one. + + - There's no special concept of a "row" or "column" in the table, and no special concept of the "header" row. It's all + derived from the cell index: we know how many cells are to be stored in a row, and can determine the rest from + that. The first row is always the header row. If header display is turned off we simply skip outputting the first + row. Also, when sorting rows we always leave the first row where it is, as the header shouldn't move. + + - Note because there's no row and no column object some properties that might be appropriate as row/column properties + are exposed as cell properties instead. For example, the "weight" of a column (which is used to determine where to + add/remove space preferable when expanding/compressing tables horizontally) is actually made the "weight" of a + cell. Given that we usually need it per-column though we will calculate the average across every cell of the column + instead. + + - To make things easy, when cells are added without any explicit configured formatting, then we'll copy the formatting + from the same cell in the previous cell. This is particularly useful for the "weight" of the cell (see above), as + this means setting the weight of the cells of the header row will nicely propagate to all cells in the other rows. +*/ + +typedef struct TableData { + unsigned n_ref; + TableDataType type; + + size_t minimum_width; /* minimum width for the column */ + size_t maximum_width; /* maximum width for the column */ + unsigned weight; /* the horizontal weight for this column, in case the table is expanded/compressed */ + unsigned ellipsize_percent; /* 0 … 100, where to place the ellipsis when compression is needed */ + unsigned align_percent; /* 0 … 100, where to pad with spaces when expanding is needed. 0: left-aligned, 100: right-aligned */ + + bool uppercase; /* Uppercase string on display */ + + const char *color; /* ANSI color string to use for this cell. When written to terminal should not move cursor. Will automatically be reset after the cell */ + char *url; /* A URL to use for a clickable hyperlink */ + char *formatted; /* A cached textual representation of the cell data, before ellipsation/alignment */ + + union { + uint8_t data[0]; /* data is generic array */ + bool boolean; + usec_t timestamp; + usec_t timespan; + uint64_t size; + char string[0]; + uint32_t uint32; + uint64_t uint64; + int percent; /* we use 'int' as datatype for percent values in order to match the result of parse_percent() */ + /* … add more here as we start supporting more cell data types … */ + }; +} TableData; + +static size_t TABLE_CELL_TO_INDEX(TableCell *cell) { + size_t i; + + assert(cell); + + i = PTR_TO_SIZE(cell); + assert(i > 0); + + return i-1; +} + +static TableCell* TABLE_INDEX_TO_CELL(size_t index) { + assert(index != (size_t) -1); + return SIZE_TO_PTR(index + 1); +} + +struct Table { + size_t n_columns; + size_t n_cells; + + bool header; /* Whether to show the header row? */ + size_t width; /* If != (size_t) -1 the width to format this table in */ + + TableData **data; + size_t n_allocated; + + size_t *display_map; /* List of columns to show (by their index). It's fine if columns are listed multiple times or not at all */ + size_t n_display_map; + + size_t *sort_map; /* The columns to order rows by, in order of preference. */ + size_t n_sort_map; + + bool *reverse_map; +}; + +Table *table_new_raw(size_t n_columns) { + _cleanup_(table_unrefp) Table *t = NULL; + + assert(n_columns > 0); + + t = new(Table, 1); + if (!t) + return NULL; + + *t = (struct Table) { + .n_columns = n_columns, + .header = true, + .width = (size_t) -1, + }; + + return TAKE_PTR(t); +} + +Table *table_new_internal(const char *first_header, ...) { + _cleanup_(table_unrefp) Table *t = NULL; + size_t n_columns = 1; + const char *h; + va_list ap; + int r; + + assert(first_header); + + va_start(ap, first_header); + for (;;) { + h = va_arg(ap, const char*); + if (!h) + break; + + n_columns++; + } + va_end(ap); + + t = table_new_raw(n_columns); + if (!t) + return NULL; + + va_start(ap, first_header); + for (h = first_header; h; h = va_arg(ap, const char*)) { + TableCell *cell; + + r = table_add_cell(t, &cell, TABLE_STRING, h); + if (r < 0) { + va_end(ap); + return NULL; + } + + /* Make the table header uppercase */ + r = table_set_uppercase(t, cell, true); + if (r < 0) { + va_end(ap); + return NULL; + } + } + va_end(ap); + + assert(t->n_columns == t->n_cells); + return TAKE_PTR(t); +} + +static TableData *table_data_free(TableData *d) { + assert(d); + + free(d->formatted); + free(d->url); + + return mfree(d); +} + +DEFINE_PRIVATE_TRIVIAL_REF_UNREF_FUNC(TableData, table_data, table_data_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(TableData*, table_data_unref); + +Table *table_unref(Table *t) { + size_t i; + + if (!t) + return NULL; + + for (i = 0; i < t->n_cells; i++) + table_data_unref(t->data[i]); + + free(t->data); + free(t->display_map); + free(t->sort_map); + free(t->reverse_map); + + return mfree(t); +} + +static size_t table_data_size(TableDataType type, const void *data) { + + switch (type) { + + case TABLE_EMPTY: + return 0; + + case TABLE_STRING: + return strlen(data) + 1; + + case TABLE_BOOLEAN: + return sizeof(bool); + + case TABLE_TIMESTAMP: + case TABLE_TIMESPAN: + return sizeof(usec_t); + + case TABLE_SIZE: + case TABLE_UINT64: + return sizeof(uint64_t); + + case TABLE_UINT32: + return sizeof(uint32_t); + + case TABLE_PERCENT: + return sizeof(int); + + default: + assert_not_reached("Uh? Unexpected cell type"); + } +} + +static bool table_data_matches( + TableData *d, + TableDataType type, + const void *data, + size_t minimum_width, + size_t maximum_width, + unsigned weight, + unsigned align_percent, + unsigned ellipsize_percent) { + + size_t k, l; + assert(d); + + if (d->type != type) + return false; + + if (d->minimum_width != minimum_width) + return false; + + if (d->maximum_width != maximum_width) + return false; + + if (d->weight != weight) + return false; + + if (d->align_percent != align_percent) + return false; + + if (d->ellipsize_percent != ellipsize_percent) + return false; + + /* If a color/url/uppercase flag is set, refuse to merge */ + if (d->color) + return false; + if (d->url) + return false; + if (d->uppercase) + return false; + + k = table_data_size(type, data); + l = table_data_size(d->type, d->data); + + if (k != l) + return false; + + return memcmp_safe(data, d->data, l) == 0; +} + +static TableData *table_data_new( + TableDataType type, + const void *data, + size_t minimum_width, + size_t maximum_width, + unsigned weight, + unsigned align_percent, + unsigned ellipsize_percent) { + + size_t data_size; + TableData *d; + + data_size = table_data_size(type, data); + + d = malloc0(offsetof(TableData, data) + data_size); + if (!d) + return NULL; + + d->n_ref = 1; + d->type = type; + d->minimum_width = minimum_width; + d->maximum_width = maximum_width; + d->weight = weight; + d->align_percent = align_percent; + d->ellipsize_percent = ellipsize_percent; + memcpy_safe(d->data, data, data_size); + + return d; +} + +int table_add_cell_full( + Table *t, + TableCell **ret_cell, + TableDataType type, + const void *data, + size_t minimum_width, + size_t maximum_width, + unsigned weight, + unsigned align_percent, + unsigned ellipsize_percent) { + + _cleanup_(table_data_unrefp) TableData *d = NULL; + TableData *p; + + assert(t); + assert(type >= 0); + assert(type < _TABLE_DATA_TYPE_MAX); + + /* Determine the cell adjacent to the current one, but one row up */ + if (t->n_cells >= t->n_columns) + assert_se(p = t->data[t->n_cells - t->n_columns]); + else + p = NULL; + + /* If formatting parameters are left unspecified, copy from the previous row */ + if (minimum_width == (size_t) -1) + minimum_width = p ? p->minimum_width : 1; + + if (weight == (unsigned) -1) + weight = p ? p->weight : DEFAULT_WEIGHT; + + if (align_percent == (unsigned) -1) + align_percent = p ? p->align_percent : 0; + + if (ellipsize_percent == (unsigned) -1) + ellipsize_percent = p ? p->ellipsize_percent : 100; + + assert(align_percent <= 100); + assert(ellipsize_percent <= 100); + + /* Small optimization: Pretty often adjacent cells in two subsequent lines have the same data and + * formatting. Let's see if we can reuse the cell data and ref it once more. */ + + if (p && table_data_matches(p, type, data, minimum_width, maximum_width, weight, align_percent, ellipsize_percent)) + d = table_data_ref(p); + else { + d = table_data_new(type, data, minimum_width, maximum_width, weight, align_percent, ellipsize_percent); + if (!d) + return -ENOMEM; + } + + if (!GREEDY_REALLOC(t->data, t->n_allocated, MAX(t->n_cells + 1, t->n_columns))) + return -ENOMEM; + + if (ret_cell) + *ret_cell = TABLE_INDEX_TO_CELL(t->n_cells); + + t->data[t->n_cells++] = TAKE_PTR(d); + + return 0; +} + +int table_dup_cell(Table *t, TableCell *cell) { + size_t i; + + assert(t); + + /* Add the data of the specified cell a second time as a new cell to the end. */ + + i = TABLE_CELL_TO_INDEX(cell); + if (i >= t->n_cells) + return -ENXIO; + + if (!GREEDY_REALLOC(t->data, t->n_allocated, MAX(t->n_cells + 1, t->n_columns))) + return -ENOMEM; + + t->data[t->n_cells++] = table_data_ref(t->data[i]); + return 0; +} + +static int table_dedup_cell(Table *t, TableCell *cell) { + _cleanup_free_ char *curl = NULL; + TableData *nd, *od; + size_t i; + + assert(t); + + /* Helper call that ensures the specified cell's data object has a ref count of 1, which we can use before + * changing a cell's formatting without effecting every other cell's formatting that shares the same data */ + + i = TABLE_CELL_TO_INDEX(cell); + if (i >= t->n_cells) + return -ENXIO; + + assert_se(od = t->data[i]); + if (od->n_ref == 1) + return 0; + + assert(od->n_ref > 1); + + if (od->url) { + curl = strdup(od->url); + if (!curl) + return -ENOMEM; + } + + nd = table_data_new( + od->type, + od->data, + od->minimum_width, + od->maximum_width, + od->weight, + od->align_percent, + od->ellipsize_percent); + if (!nd) + return -ENOMEM; + + nd->color = od->color; + nd->url = TAKE_PTR(curl); + nd->uppercase = od->uppercase; + + table_data_unref(od); + t->data[i] = nd; + + assert(nd->n_ref == 1); + + return 1; +} + +static TableData *table_get_data(Table *t, TableCell *cell) { + size_t i; + + assert(t); + assert(cell); + + /* Get the data object of the specified cell, or NULL if it doesn't exist */ + + i = TABLE_CELL_TO_INDEX(cell); + if (i >= t->n_cells) + return NULL; + + assert(t->data[i]); + assert(t->data[i]->n_ref > 0); + + return t->data[i]; +} + +int table_set_minimum_width(Table *t, TableCell *cell, size_t minimum_width) { + int r; + + assert(t); + assert(cell); + + if (minimum_width == (size_t) -1) + minimum_width = 1; + + r = table_dedup_cell(t, cell); + if (r < 0) + return r; + + table_get_data(t, cell)->minimum_width = minimum_width; + return 0; +} + +int table_set_maximum_width(Table *t, TableCell *cell, size_t maximum_width) { + int r; + + assert(t); + assert(cell); + + r = table_dedup_cell(t, cell); + if (r < 0) + return r; + + table_get_data(t, cell)->maximum_width = maximum_width; + return 0; +} + +int table_set_weight(Table *t, TableCell *cell, unsigned weight) { + int r; + + assert(t); + assert(cell); + + if (weight == (unsigned) -1) + weight = DEFAULT_WEIGHT; + + r = table_dedup_cell(t, cell); + if (r < 0) + return r; + + table_get_data(t, cell)->weight = weight; + return 0; +} + +int table_set_align_percent(Table *t, TableCell *cell, unsigned percent) { + int r; + + assert(t); + assert(cell); + + if (percent == (unsigned) -1) + percent = 0; + + assert(percent <= 100); + + r = table_dedup_cell(t, cell); + if (r < 0) + return r; + + table_get_data(t, cell)->align_percent = percent; + return 0; +} + +int table_set_ellipsize_percent(Table *t, TableCell *cell, unsigned percent) { + int r; + + assert(t); + assert(cell); + + if (percent == (unsigned) -1) + percent = 100; + + assert(percent <= 100); + + r = table_dedup_cell(t, cell); + if (r < 0) + return r; + + table_get_data(t, cell)->ellipsize_percent = percent; + return 0; +} + +int table_set_color(Table *t, TableCell *cell, const char *color) { + int r; + + assert(t); + assert(cell); + + r = table_dedup_cell(t, cell); + if (r < 0) + return r; + + table_get_data(t, cell)->color = empty_to_null(color); + return 0; +} + +int table_set_url(Table *t, TableCell *cell, const char *url) { + _cleanup_free_ char *copy = NULL; + int r; + + assert(t); + assert(cell); + + if (url) { + copy = strdup(url); + if (!copy) + return -ENOMEM; + } + + r = table_dedup_cell(t, cell); + if (r < 0) + return r; + + return free_and_replace(table_get_data(t, cell)->url, copy); +} + +int table_set_uppercase(Table *t, TableCell *cell, bool b) { + TableData *d; + int r; + + assert(t); + assert(cell); + + r = table_dedup_cell(t, cell); + if (r < 0) + return r; + + assert_se(d = table_get_data(t, cell)); + + if (d->uppercase == b) + return 0; + + d->formatted = mfree(d->formatted); + d->uppercase = b; + return 1; +} + +int table_update(Table *t, TableCell *cell, TableDataType type, const void *data) { + _cleanup_free_ char *curl = NULL; + TableData *nd, *od; + size_t i; + + assert(t); + assert(cell); + + i = TABLE_CELL_TO_INDEX(cell); + if (i >= t->n_cells) + return -ENXIO; + + assert_se(od = t->data[i]); + + if (od->url) { + curl = strdup(od->url); + if (!curl) + return -ENOMEM; + } + + nd = table_data_new( + type, + data, + od->minimum_width, + od->maximum_width, + od->weight, + od->align_percent, + od->ellipsize_percent); + if (!nd) + return -ENOMEM; + + nd->color = od->color; + nd->url = TAKE_PTR(curl); + nd->uppercase = od->uppercase; + + table_data_unref(od); + t->data[i] = nd; + + return 0; +} + +int table_add_many_internal(Table *t, TableDataType first_type, ...) { + TableDataType type; + va_list ap; + int r; + + assert(t); + assert(first_type >= 0); + assert(first_type < _TABLE_DATA_TYPE_MAX); + + type = first_type; + + va_start(ap, first_type); + for (;;) { + const void *data; + union { + uint64_t size; + usec_t usec; + uint32_t uint32; + uint64_t uint64; + int percent; + bool b; + } buffer; + + switch (type) { + + case TABLE_EMPTY: + data = NULL; + break; + + case TABLE_STRING: + data = va_arg(ap, const char *); + break; + + case TABLE_BOOLEAN: + buffer.b = va_arg(ap, int); + data = &buffer.b; + break; + + case TABLE_TIMESTAMP: + case TABLE_TIMESPAN: + buffer.usec = va_arg(ap, usec_t); + data = &buffer.usec; + break; + + case TABLE_SIZE: + buffer.size = va_arg(ap, uint64_t); + data = &buffer.size; + break; + + case TABLE_UINT32: + buffer.uint32 = va_arg(ap, uint32_t); + data = &buffer.uint32; + break; + + case TABLE_UINT64: + buffer.uint64 = va_arg(ap, uint64_t); + data = &buffer.uint64; + break; + + case TABLE_PERCENT: + buffer.percent = va_arg(ap, int); + data = &buffer.percent; + break; + + case _TABLE_DATA_TYPE_MAX: + /* Used as end marker */ + va_end(ap); + return 0; + + default: + assert_not_reached("Uh? Unexpected data type."); + } + + r = table_add_cell(t, NULL, type, data); + if (r < 0) { + va_end(ap); + return r; + } + + type = va_arg(ap, TableDataType); + } +} + +void table_set_header(Table *t, bool b) { + assert(t); + + t->header = b; +} + +void table_set_width(Table *t, size_t width) { + assert(t); + + t->width = width; +} + +int table_set_display(Table *t, size_t first_column, ...) { + size_t allocated, column; + va_list ap; + + assert(t); + + allocated = t->n_display_map; + column = first_column; + + va_start(ap, first_column); + for (;;) { + assert(column < t->n_columns); + + if (!GREEDY_REALLOC(t->display_map, allocated, MAX(t->n_columns, t->n_display_map+1))) { + va_end(ap); + return -ENOMEM; + } + + t->display_map[t->n_display_map++] = column; + + column = va_arg(ap, size_t); + if (column == (size_t) -1) + break; + + } + va_end(ap); + + return 0; +} + +int table_set_sort(Table *t, size_t first_column, ...) { + size_t allocated, column; + va_list ap; + + assert(t); + + allocated = t->n_sort_map; + column = first_column; + + va_start(ap, first_column); + for (;;) { + assert(column < t->n_columns); + + if (!GREEDY_REALLOC(t->sort_map, allocated, MAX(t->n_columns, t->n_sort_map+1))) { + va_end(ap); + return -ENOMEM; + } + + t->sort_map[t->n_sort_map++] = column; + + column = va_arg(ap, size_t); + if (column == (size_t) -1) + break; + } + va_end(ap); + + return 0; +} + +static int cell_data_compare(TableData *a, size_t index_a, TableData *b, size_t index_b) { + assert(a); + assert(b); + + if (a->type == b->type) { + + /* We only define ordering for cells of the same data type. If cells with different data types are + * compared we follow the order the cells were originally added in */ + + switch (a->type) { + + case TABLE_STRING: + return strcmp(a->string, b->string); + + case TABLE_BOOLEAN: + if (!a->boolean && b->boolean) + return -1; + if (a->boolean && !b->boolean) + return 1; + return 0; + + case TABLE_TIMESTAMP: + return CMP(a->timestamp, b->timestamp); + + case TABLE_TIMESPAN: + return CMP(a->timespan, b->timespan); + + case TABLE_SIZE: + return CMP(a->size, b->size); + + case TABLE_UINT32: + return CMP(a->uint32, b->uint32); + + case TABLE_UINT64: + return CMP(a->uint64, b->uint64); + + case TABLE_PERCENT: + return CMP(a->percent, b->percent); + + default: + ; + } + } + + /* Generic fallback using the orginal order in which the cells where added. */ + return CMP(index_a, index_b); +} + +static int table_data_compare(const size_t *a, const size_t *b, Table *t) { + size_t i; + int r; + + assert(t); + assert(t->sort_map); + + /* Make sure the header stays at the beginning */ + if (*a < t->n_columns && *b < t->n_columns) + return 0; + if (*a < t->n_columns) + return -1; + if (*b < t->n_columns) + return 1; + + /* Order other lines by the sorting map */ + for (i = 0; i < t->n_sort_map; i++) { + TableData *d, *dd; + + d = t->data[*a + t->sort_map[i]]; + dd = t->data[*b + t->sort_map[i]]; + + r = cell_data_compare(d, *a, dd, *b); + if (r != 0) + return t->reverse_map && t->reverse_map[t->sort_map[i]] ? -r : r; + } + + /* Order identical lines by the order there were originally added in */ + return CMP(*a, *b); +} + +static const char *table_data_format(TableData *d) { + assert(d); + + if (d->formatted) + return d->formatted; + + switch (d->type) { + case TABLE_EMPTY: + return ""; + + case TABLE_STRING: + if (d->uppercase) { + char *p, *q; + + d->formatted = new(char, strlen(d->string) + 1); + if (!d->formatted) + return NULL; + + for (p = d->string, q = d->formatted; *p; p++, q++) + *q = (char) toupper((unsigned char) *p); + *q = 0; + + return d->formatted; + } + + return d->string; + + case TABLE_BOOLEAN: + return yes_no(d->boolean); + + case TABLE_TIMESTAMP: { + _cleanup_free_ char *p; + + p = new(char, FORMAT_TIMESTAMP_MAX); + if (!p) + return NULL; + + if (!format_timestamp(p, FORMAT_TIMESTAMP_MAX, d->timestamp)) + return "n/a"; + + d->formatted = TAKE_PTR(p); + break; + } + + case TABLE_TIMESPAN: { + _cleanup_free_ char *p; + + p = new(char, FORMAT_TIMESPAN_MAX); + if (!p) + return NULL; + + if (!format_timespan(p, FORMAT_TIMESPAN_MAX, d->timespan, 0)) + return "n/a"; + + d->formatted = TAKE_PTR(p); + break; + } + + case TABLE_SIZE: { + _cleanup_free_ char *p; + + p = new(char, FORMAT_BYTES_MAX); + if (!p) + return NULL; + + if (!format_bytes(p, FORMAT_BYTES_MAX, d->size)) + return "n/a"; + + d->formatted = TAKE_PTR(p); + break; + } + + case TABLE_UINT32: { + _cleanup_free_ char *p; + + p = new(char, DECIMAL_STR_WIDTH(d->uint32) + 1); + if (!p) + return NULL; + + sprintf(p, "%" PRIu32, d->uint32); + d->formatted = TAKE_PTR(p); + break; + } + + case TABLE_UINT64: { + _cleanup_free_ char *p; + + p = new(char, DECIMAL_STR_WIDTH(d->uint64) + 1); + if (!p) + return NULL; + + sprintf(p, "%" PRIu64, d->uint64); + d->formatted = TAKE_PTR(p); + break; + } + + case TABLE_PERCENT: { + _cleanup_free_ char *p; + + p = new(char, DECIMAL_STR_WIDTH(d->percent) + 2); + if (!p) + return NULL; + + sprintf(p, "%i%%" , d->percent); + d->formatted = TAKE_PTR(p); + break; + } + + default: + assert_not_reached("Unexpected type?"); + } + + return d->formatted; +} + +static int table_data_requested_width(TableData *d, size_t *ret) { + const char *t; + size_t l; + + t = table_data_format(d); + if (!t) + return -ENOMEM; + + l = utf8_console_width(t); + if (l == (size_t) -1) + return -EINVAL; + + if (d->maximum_width != (size_t) -1 && l > d->maximum_width) + l = d->maximum_width; + + if (l < d->minimum_width) + l = d->minimum_width; + + *ret = l; + return 0; +} + +static char *align_string_mem(const char *str, const char *url, size_t new_length, unsigned percent) { + size_t w = 0, space, lspace, old_length, clickable_length; + _cleanup_free_ char *clickable = NULL; + const char *p; + char *ret; + size_t i; + int r; + + /* As with ellipsize_mem(), 'old_length' is a byte size while 'new_length' is a width in character cells */ + + assert(str); + assert(percent <= 100); + + old_length = strlen(str); + + if (url) { + r = terminal_urlify(url, str, &clickable); + if (r < 0) + return NULL; + + clickable_length = strlen(clickable); + } else + clickable_length = old_length; + + /* Determine current width on screen */ + p = str; + while (p < str + old_length) { + char32_t c; + + if (utf8_encoded_to_unichar(p, &c) < 0) { + p++, w++; /* count invalid chars as 1 */ + continue; + } + + p = utf8_next_char(p); + w += unichar_iswide(c) ? 2 : 1; + } + + /* Already wider than the target, if so, don't do anything */ + if (w >= new_length) + return clickable ? TAKE_PTR(clickable) : strdup(str); + + /* How much spaces shall we add? An how much on the left side? */ + space = new_length - w; + lspace = space * percent / 100U; + + ret = new(char, space + clickable_length + 1); + if (!ret) + return NULL; + + for (i = 0; i < lspace; i++) + ret[i] = ' '; + memcpy(ret + lspace, clickable ?: str, clickable_length); + for (i = lspace + clickable_length; i < space + clickable_length; i++) + ret[i] = ' '; + + ret[space + clickable_length] = 0; + return ret; +} + +int table_print(Table *t, FILE *f) { + size_t n_rows, *minimum_width, *maximum_width, display_columns, *requested_width, + i, j, table_minimum_width, table_maximum_width, table_requested_width, table_effective_width, + *width; + _cleanup_free_ size_t *sorted = NULL; + uint64_t *column_weight, weight_sum; + int r; + + assert(t); + + if (!f) + f = stdout; + + /* Ensure we have no incomplete rows */ + assert(t->n_cells % t->n_columns == 0); + + n_rows = t->n_cells / t->n_columns; + assert(n_rows > 0); /* at least the header row must be complete */ + + if (t->sort_map) { + /* If sorting is requested, let's calculate an index table we use to lookup the actual index to display with. */ + + sorted = new(size_t, n_rows); + if (!sorted) + return -ENOMEM; + + for (i = 0; i < n_rows; i++) + sorted[i] = i * t->n_columns; + + typesafe_qsort_r(sorted, n_rows, table_data_compare, t); + } + + if (t->display_map) + display_columns = t->n_display_map; + else + display_columns = t->n_columns; + + assert(display_columns > 0); + + minimum_width = newa(size_t, display_columns); + maximum_width = newa(size_t, display_columns); + requested_width = newa(size_t, display_columns); + width = newa(size_t, display_columns); + column_weight = newa0(uint64_t, display_columns); + + for (j = 0; j < display_columns; j++) { + minimum_width[j] = 1; + maximum_width[j] = (size_t) -1; + requested_width[j] = (size_t) -1; + } + + /* First pass: determine column sizes */ + for (i = t->header ? 0 : 1; i < n_rows; i++) { + TableData **row; + + /* Note that we don't care about ordering at this time, as we just want to determine column sizes, + * hence we don't care for sorted[] during the first pass. */ + row = t->data + i * t->n_columns; + + for (j = 0; j < display_columns; j++) { + TableData *d; + size_t req; + + assert_se(d = row[t->display_map ? t->display_map[j] : j]); + + r = table_data_requested_width(d, &req); + if (r < 0) + return r; + + /* Determine the biggest width that any cell in this column would like to have */ + if (requested_width[j] == (size_t) -1 || + requested_width[j] < req) + requested_width[j] = req; + + /* Determine the minimum width any cell in this column needs */ + if (minimum_width[j] < d->minimum_width) + minimum_width[j] = d->minimum_width; + + /* Determine the maximum width any cell in this column needs */ + if (d->maximum_width != (size_t) -1 && + (maximum_width[j] == (size_t) -1 || + maximum_width[j] > d->maximum_width)) + maximum_width[j] = d->maximum_width; + + /* Determine the full columns weight */ + column_weight[j] += d->weight; + } + } + + /* One space between each column */ + table_requested_width = table_minimum_width = table_maximum_width = display_columns - 1; + + /* Calculate the total weight for all columns, plus the minimum, maximum and requested width for the table. */ + weight_sum = 0; + for (j = 0; j < display_columns; j++) { + weight_sum += column_weight[j]; + + table_minimum_width += minimum_width[j]; + + if (maximum_width[j] == (size_t) -1) + table_maximum_width = (size_t) -1; + else + table_maximum_width += maximum_width[j]; + + table_requested_width += requested_width[j]; + } + + /* Calculate effective table width */ + if (t->width == (size_t) -1) + table_effective_width = pager_have() ? table_requested_width : MIN(table_requested_width, columns()); + else + table_effective_width = t->width; + + if (table_maximum_width != (size_t) -1 && table_effective_width > table_maximum_width) + table_effective_width = table_maximum_width; + + if (table_effective_width < table_minimum_width) + table_effective_width = table_minimum_width; + + if (table_effective_width >= table_requested_width) { + size_t extra; + + /* We have extra room, let's distribute it among columns according to their weights. We first provide + * each column with what it asked for and the distribute the rest. */ + + extra = table_effective_width - table_requested_width; + + for (j = 0; j < display_columns; j++) { + size_t delta; + + if (weight_sum == 0) + width[j] = requested_width[j] + extra / (display_columns - j); /* Avoid division by zero */ + else + width[j] = requested_width[j] + (extra * column_weight[j]) / weight_sum; + + if (maximum_width[j] != (size_t) -1 && width[j] > maximum_width[j]) + width[j] = maximum_width[j]; + + if (width[j] < minimum_width[j]) + width[j] = minimum_width[j]; + + assert(width[j] >= requested_width[j]); + delta = width[j] - requested_width[j]; + + /* Subtract what we just added from the rest */ + if (extra > delta) + extra -= delta; + else + extra = 0; + + assert(weight_sum >= column_weight[j]); + weight_sum -= column_weight[j]; + } + + } else { + /* We need to compress the table, columns can't get what they asked for. We first provide each column + * with the minimum they need, and then distribute anything left. */ + bool finalize = false; + size_t extra; + + extra = table_effective_width - table_minimum_width; + + for (j = 0; j < display_columns; j++) + width[j] = (size_t) -1; + + for (;;) { + bool restart = false; + + for (j = 0; j < display_columns; j++) { + size_t delta, w; + + /* Did this column already get something assigned? If so, let's skip to the next */ + if (width[j] != (size_t) -1) + continue; + + if (weight_sum == 0) + w = minimum_width[j] + extra / (display_columns - j); /* avoid division by zero */ + else + w = minimum_width[j] + (extra * column_weight[j]) / weight_sum; + + if (w >= requested_width[j]) { + /* Never give more than requested. If we hit a column like this, there's more + * space to allocate to other columns which means we need to restart the + * iteration. However, if we hit a column like this, let's assign it the space + * it wanted for good early.*/ + + w = requested_width[j]; + restart = true; + + } else if (!finalize) + continue; + + width[j] = w; + + assert(w >= minimum_width[j]); + delta = w - minimum_width[j]; + + assert(delta <= extra); + extra -= delta; + + assert(weight_sum >= column_weight[j]); + weight_sum -= column_weight[j]; + + if (restart && !finalize) + break; + } + + if (finalize) + break; + + if (!restart) + finalize = true; + } + } + + /* Second pass: show output */ + for (i = t->header ? 0 : 1; i < n_rows; i++) { + TableData **row; + + if (sorted) + row = t->data + sorted[i]; + else + row = t->data + i * t->n_columns; + + for (j = 0; j < display_columns; j++) { + _cleanup_free_ char *buffer = NULL; + const char *field; + TableData *d; + size_t l; + + assert_se(d = row[t->display_map ? t->display_map[j] : j]); + + field = table_data_format(d); + if (!field) + return -ENOMEM; + + l = utf8_console_width(field); + if (l > width[j]) { + /* Field is wider than allocated space. Let's ellipsize */ + + buffer = ellipsize(field, width[j], d->ellipsize_percent); + if (!buffer) + return -ENOMEM; + + field = buffer; + + } else if (l < width[j]) { + /* Field is shorter than allocated space. Let's align with spaces */ + + buffer = align_string_mem(field, d->url, width[j], d->align_percent); + if (!buffer) + return -ENOMEM; + + field = buffer; + } + + if (l >= width[j] && d->url) { + _cleanup_free_ char *clickable = NULL; + + r = terminal_urlify(d->url, field, &clickable); + if (r < 0) + return r; + + free_and_replace(buffer, clickable); + field = buffer; + } + + if (row == t->data) /* underline header line fully, including the column separator */ + fputs(ansi_underline(), f); + + if (j > 0) + fputc(' ', f); /* column separator */ + + if (d->color && colors_enabled()) { + if (row == t->data) /* first undo header underliner */ + fputs(ANSI_NORMAL, f); + + fputs(d->color, f); + } + + fputs(field, f); + + if (colors_enabled() && (d->color || row == t->data)) + fputs(ANSI_NORMAL, f); + } + + fputc('\n', f); + } + + return fflush_and_check(f); +} + +int table_format(Table *t, char **ret) { + _cleanup_fclose_ FILE *f = NULL; + char *buf = NULL; + size_t sz = 0; + int r; + + f = open_memstream(&buf, &sz); + if (!f) + return -ENOMEM; + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + + r = table_print(t, f); + if (r < 0) + return r; + + f = safe_fclose(f); + + *ret = buf; + + return 0; +} + +size_t table_get_rows(Table *t) { + if (!t) + return 0; + + assert(t->n_columns > 0); + return t->n_cells / t->n_columns; +} + +size_t table_get_columns(Table *t) { + if (!t) + return 0; + + assert(t->n_columns > 0); + return t->n_columns; +} + +int table_set_reverse(Table *t, size_t column, bool b) { + assert(t); + assert(column < t->n_columns); + + if (!t->reverse_map) { + if (!b) + return 0; + + t->reverse_map = new0(bool, t->n_columns); + if (!t->reverse_map) + return -ENOMEM; + } + + t->reverse_map[column] = b; + return 0; +} + +TableCell *table_get_cell(Table *t, size_t row, size_t column) { + size_t i; + + assert(t); + + if (column >= t->n_columns) + return NULL; + + i = row * t->n_columns + column; + if (i >= t->n_cells) + return NULL; + + return TABLE_INDEX_TO_CELL(i); +} + +const void *table_get(Table *t, TableCell *cell) { + TableData *d; + + assert(t); + + d = table_get_data(t, cell); + if (!d) + return NULL; + + return d->data; +} + +const void* table_get_at(Table *t, size_t row, size_t column) { + TableCell *cell; + + cell = table_get_cell(t, row, column); + if (!cell) + return NULL; + + return table_get(t, cell); +} + +static int table_data_to_json(TableData *d, JsonVariant **ret) { + + switch (d->type) { + + case TABLE_EMPTY: + return json_variant_new_null(ret); + + case TABLE_STRING: + return json_variant_new_string(ret, d->string); + + case TABLE_BOOLEAN: + return json_variant_new_boolean(ret, d->boolean); + + case TABLE_TIMESTAMP: + if (d->timestamp == USEC_INFINITY) + return json_variant_new_null(ret); + + return json_variant_new_unsigned(ret, d->timestamp); + + case TABLE_TIMESPAN: + if (d->timespan == USEC_INFINITY) + return json_variant_new_null(ret); + + return json_variant_new_unsigned(ret, d->timespan); + + case TABLE_SIZE: + if (d->size == (size_t) -1) + return json_variant_new_null(ret); + + return json_variant_new_unsigned(ret, d->size); + + case TABLE_UINT32: + return json_variant_new_unsigned(ret, d->uint32); + + case TABLE_UINT64: + return json_variant_new_unsigned(ret, d->uint64); + + case TABLE_PERCENT: + return json_variant_new_integer(ret, d->percent); + + default: + return -EINVAL; + } +} + +int table_to_json(Table *t, JsonVariant **ret) { + JsonVariant **rows = NULL, **elements = NULL; + _cleanup_free_ size_t *sorted = NULL; + size_t n_rows, i, j, display_columns; + int r; + + assert(t); + + /* Ensure we have no incomplete rows */ + assert(t->n_cells % t->n_columns == 0); + + n_rows = t->n_cells / t->n_columns; + assert(n_rows > 0); /* at least the header row must be complete */ + + if (t->sort_map) { + /* If sorting is requested, let's calculate an index table we use to lookup the actual index to display with. */ + + sorted = new(size_t, n_rows); + if (!sorted) { + r = -ENOMEM; + goto finish; + } + + for (i = 0; i < n_rows; i++) + sorted[i] = i * t->n_columns; + + typesafe_qsort_r(sorted, n_rows, table_data_compare, t); + } + + if (t->display_map) + display_columns = t->n_display_map; + else + display_columns = t->n_columns; + assert(display_columns > 0); + + elements = new0(JsonVariant*, display_columns * 2); + if (!elements) { + r = -ENOMEM; + goto finish; + } + + for (j = 0; j < display_columns; j++) { + TableData *d; + + assert_se(d = t->data[t->display_map ? t->display_map[j] : j]); + + r = table_data_to_json(d, elements + j*2); + if (r < 0) + goto finish; + } + + rows = new0(JsonVariant*, n_rows-1); + if (!rows) { + r = -ENOMEM; + goto finish; + } + + for (i = 1; i < n_rows; i++) { + TableData **row; + + if (sorted) + row = t->data + sorted[i]; + else + row = t->data + i * t->n_columns; + + for (j = 0; j < display_columns; j++) { + TableData *d; + size_t k; + + assert_se(d = row[t->display_map ? t->display_map[j] : j]); + + k = j*2+1; + elements[k] = json_variant_unref(elements[k]); + + r = table_data_to_json(d, elements + k); + if (r < 0) + goto finish; + } + + r = json_variant_new_object(rows + i - 1, elements, display_columns * 2); + if (r < 0) + goto finish; + } + + r = json_variant_new_array(ret, rows, n_rows - 1); + +finish: + if (rows) { + json_variant_unref_many(rows, n_rows-1); + free(rows); + } + + if (elements) { + json_variant_unref_many(elements, display_columns*2); + free(elements); + } + + return r; +} + +int table_print_json(Table *t, FILE *f, JsonFormatFlags flags) { + _cleanup_(json_variant_unrefp) JsonVariant *v = NULL; + int r; + + assert(t); + + if (!f) + f = stdout; + + r = table_to_json(t, &v); + if (r < 0) + return r; + + json_variant_dump(v, flags, f, NULL); + + return fflush_and_check(f); +} diff --git a/src/shared/format-table.h b/src/shared/format-table.h new file mode 100644 index 0000000..5ff2479 --- /dev/null +++ b/src/shared/format-table.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <stdio.h> +#include <sys/types.h> + +#include "json.h" +#include "macro.h" + +typedef enum TableDataType { + TABLE_EMPTY, + TABLE_STRING, + TABLE_BOOLEAN, + TABLE_TIMESTAMP, + TABLE_TIMESPAN, + TABLE_SIZE, + TABLE_UINT32, + TABLE_UINT64, + TABLE_PERCENT, + _TABLE_DATA_TYPE_MAX, + _TABLE_DATA_TYPE_INVALID = -1, +} TableDataType; + +typedef struct Table Table; +typedef struct TableCell TableCell; + +Table *table_new_internal(const char *first_header, ...) _sentinel_; +#define table_new(...) table_new_internal(__VA_ARGS__, NULL) +Table *table_new_raw(size_t n_columns); +Table *table_unref(Table *t); + +DEFINE_TRIVIAL_CLEANUP_FUNC(Table*, table_unref); + +int table_add_cell_full(Table *t, TableCell **ret_cell, TableDataType type, const void *data, size_t minimum_width, size_t maximum_width, unsigned weight, unsigned align_percent, unsigned ellipsize_percent); +static inline int table_add_cell(Table *t, TableCell **ret_cell, TableDataType type, const void *data) { + return table_add_cell_full(t, ret_cell, type, data, (size_t) -1, (size_t) -1, (unsigned) -1, (unsigned) -1, (unsigned) -1); +} + +int table_dup_cell(Table *t, TableCell *cell); + +int table_set_minimum_width(Table *t, TableCell *cell, size_t minimum_width); +int table_set_maximum_width(Table *t, TableCell *cell, size_t maximum_width); +int table_set_weight(Table *t, TableCell *cell, unsigned weight); +int table_set_align_percent(Table *t, TableCell *cell, unsigned percent); +int table_set_ellipsize_percent(Table *t, TableCell *cell, unsigned percent); +int table_set_color(Table *t, TableCell *cell, const char *color); +int table_set_url(Table *t, TableCell *cell, const char *color); +int table_set_uppercase(Table *t, TableCell *cell, bool b); + +int table_update(Table *t, TableCell *cell, TableDataType type, const void *data); + +int table_add_many_internal(Table *t, TableDataType first_type, ...); +#define table_add_many(t, ...) table_add_many_internal(t, __VA_ARGS__, _TABLE_DATA_TYPE_MAX) + +void table_set_header(Table *table, bool b); +void table_set_width(Table *t, size_t width); +int table_set_display(Table *t, size_t first_column, ...); +int table_set_sort(Table *t, size_t first_column, ...); +int table_set_reverse(Table *t, size_t column, bool b); + +int table_print(Table *t, FILE *f); +int table_format(Table *t, char **ret); + +static inline TableCell* TABLE_HEADER_CELL(size_t i) { + return SIZE_TO_PTR(i + 1); +} + +size_t table_get_rows(Table *t); +size_t table_get_columns(Table *t); + +TableCell *table_get_cell(Table *t, size_t row, size_t column); + +const void *table_get(Table *t, TableCell *cell); +const void *table_get_at(Table *t, size_t row, size_t column); + +int table_to_json(Table *t, JsonVariant **ret); +int table_print_json(Table *t, FILE *f, unsigned json_flags); diff --git a/src/shared/fstab-util.c b/src/shared/fstab-util.c new file mode 100644 index 0000000..6fd9866 --- /dev/null +++ b/src/shared/fstab-util.c @@ -0,0 +1,268 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <mntent.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "alloc-util.h" +#include "device-nodes.h" +#include "fstab-util.h" +#include "macro.h" +#include "mount-util.h" +#include "parse-util.h" +#include "path-util.h" +#include "string-util.h" +#include "strv.h" +#include "util.h" + +int fstab_has_fstype(const char *fstype) { + _cleanup_endmntent_ FILE *f = NULL; + struct mntent *m; + + f = setmntent("/etc/fstab", "re"); + if (!f) + return errno == ENOENT ? false : -errno; + + for (;;) { + errno = 0; + m = getmntent(f); + if (!m) + return errno != 0 ? -errno : false; + + if (streq(m->mnt_type, fstype)) + return true; + } + return false; +} + +int fstab_is_mount_point(const char *mount) { + _cleanup_endmntent_ FILE *f = NULL; + struct mntent *m; + + f = setmntent("/etc/fstab", "re"); + if (!f) + return errno == ENOENT ? false : -errno; + + for (;;) { + errno = 0; + m = getmntent(f); + if (!m) + return errno != 0 ? -errno : false; + + if (path_equal(m->mnt_dir, mount)) + return true; + } + return false; +} + +int fstab_filter_options(const char *opts, const char *names, + const char **namefound, char **value, char **filtered) { + const char *name, *n = NULL, *x; + _cleanup_strv_free_ char **stor = NULL; + _cleanup_free_ char *v = NULL, **strv = NULL; + + assert(names && *names); + + if (!opts) + goto answer; + + /* If !value and !filtered, this function is not allowed to fail. */ + + if (!filtered) { + const char *word, *state; + size_t l; + + FOREACH_WORD_SEPARATOR(word, l, opts, ",", state) + NULSTR_FOREACH(name, names) { + if (l < strlen(name)) + continue; + if (!strneq(word, name, strlen(name))) + continue; + + /* we know that the string is NUL + * terminated, so *x is valid */ + x = word + strlen(name); + if (IN_SET(*x, '\0', '=', ',')) { + n = name; + if (value) { + free(v); + if (IN_SET(*x, '\0', ',')) + v = NULL; + else { + assert(*x == '='); + x++; + v = strndup(x, l - strlen(name) - 1); + if (!v) + return -ENOMEM; + } + } + } + } + } else { + char **t, **s; + + stor = strv_split(opts, ","); + if (!stor) + return -ENOMEM; + strv = memdup(stor, sizeof(char*) * (strv_length(stor) + 1)); + if (!strv) + return -ENOMEM; + + for (s = t = strv; *s; s++) { + NULSTR_FOREACH(name, names) { + x = startswith(*s, name); + if (x && IN_SET(*x, '\0', '=')) + goto found; + } + + *t = *s; + t++; + continue; + found: + /* Keep the last occurence found */ + n = name; + if (value) { + free(v); + if (*x == '\0') + v = NULL; + else { + assert(*x == '='); + x++; + v = strdup(x); + if (!v) + return -ENOMEM; + } + } + } + *t = NULL; + } + +answer: + if (namefound) + *namefound = n; + if (filtered) { + char *f; + + f = strv_join(strv, ","); + if (!f) + return -ENOMEM; + + *filtered = f; + } + if (value) + *value = TAKE_PTR(v); + + return !!n; +} + +int fstab_extract_values(const char *opts, const char *name, char ***values) { + _cleanup_strv_free_ char **optsv = NULL, **res = NULL; + char **s; + + assert(opts); + assert(name); + assert(values); + + optsv = strv_split(opts, ","); + if (!optsv) + return -ENOMEM; + + STRV_FOREACH(s, optsv) { + char *arg; + int r; + + arg = startswith(*s, name); + if (!arg || *arg != '=') + continue; + r = strv_extend(&res, arg + 1); + if (r < 0) + return r; + } + + *values = TAKE_PTR(res); + + return !!*values; +} + +int fstab_find_pri(const char *options, int *ret) { + _cleanup_free_ char *opt = NULL; + int r; + unsigned pri; + + assert(ret); + + r = fstab_filter_options(options, "pri\0", NULL, &opt, NULL); + if (r < 0) + return r; + if (r == 0 || !opt) + return 0; + + r = safe_atou(opt, &pri); + if (r < 0) + return r; + + if ((int) pri < 0) + return -ERANGE; + + *ret = (int) pri; + return 1; +} + +static char *unquote(const char *s, const char* quotes) { + size_t l; + assert(s); + + /* This is rather stupid, simply removes the heading and + * trailing quotes if there is one. Doesn't care about + * escaping or anything. + * + * DON'T USE THIS FOR NEW CODE ANYMORE! */ + + l = strlen(s); + if (l < 2) + return strdup(s); + + if (strchr(quotes, s[0]) && s[l-1] == s[0]) + return strndup(s+1, l-2); + + return strdup(s); +} + +static char *tag_to_udev_node(const char *tagvalue, const char *by) { + _cleanup_free_ char *t = NULL, *u = NULL; + size_t enc_len; + + u = unquote(tagvalue, QUOTES); + if (!u) + return NULL; + + enc_len = strlen(u) * 4 + 1; + t = new(char, enc_len); + if (!t) + return NULL; + + if (encode_devnode_name(u, t, enc_len) < 0) + return NULL; + + return strjoin("/dev/disk/by-", by, "/", t); +} + +char *fstab_node_to_udev_node(const char *p) { + assert(p); + + if (startswith(p, "LABEL=")) + return tag_to_udev_node(p+6, "label"); + + if (startswith(p, "UUID=")) + return tag_to_udev_node(p+5, "uuid"); + + if (startswith(p, "PARTUUID=")) + return tag_to_udev_node(p+9, "partuuid"); + + if (startswith(p, "PARTLABEL=")) + return tag_to_udev_node(p+10, "partlabel"); + + return strdup(p); +} diff --git a/src/shared/fstab-util.h b/src/shared/fstab-util.h new file mode 100644 index 0000000..9820f78 --- /dev/null +++ b/src/shared/fstab-util.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <stddef.h> + +#include "macro.h" + +int fstab_is_mount_point(const char *mount); +int fstab_has_fstype(const char *fstype); + +int fstab_filter_options(const char *opts, const char *names, const char **namefound, char **value, char **filtered); + +int fstab_extract_values(const char *opts, const char *name, char ***values); + +static inline bool fstab_test_option(const char *opts, const char *names) { + return !!fstab_filter_options(opts, names, NULL, NULL, NULL); +} + +int fstab_find_pri(const char *options, int *ret); + +static inline bool fstab_test_yes_no_option(const char *opts, const char *yes_no) { + int r; + const char *opt; + + /* If first name given is last, return 1. + * If second name given is last or neither is found, return 0. */ + + r = fstab_filter_options(opts, yes_no, &opt, NULL, NULL); + assert(r >= 0); + + return opt == yes_no; +} + +char *fstab_node_to_udev_node(const char *p); diff --git a/src/shared/generate-ip-protocol-list.sh b/src/shared/generate-ip-protocol-list.sh new file mode 100755 index 0000000..a9b1e0f --- /dev/null +++ b/src/shared/generate-ip-protocol-list.sh @@ -0,0 +1,6 @@ +#!/bin/sh +set -eu + +$1 -dM -include netinet/in.h - </dev/null | \ + awk '/^#define[ \t]+IPPROTO_[^ \t]+[ \t]+[^ \t]/ { print $2; }' | \ + sed -e 's/IPPROTO_//' diff --git a/src/shared/generator.c b/src/shared/generator.c new file mode 100644 index 0000000..0adaaf2 --- /dev/null +++ b/src/shared/generator.c @@ -0,0 +1,504 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stdio_ext.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "dropin.h" +#include "escape.h" +#include "fd-util.h" +#include "fileio.h" +#include "fstab-util.h" +#include "generator.h" +#include "log.h" +#include "macro.h" +#include "mkdir.h" +#include "path-util.h" +#include "special.h" +#include "specifier.h" +#include "string-util.h" +#include "time-util.h" +#include "unit-name.h" +#include "util.h" + +int generator_open_unit_file( + const char *dest, + const char *source, + const char *name, + FILE **file) { + + const char *unit; + FILE *f; + + unit = strjoina(dest, "/", name); + + f = fopen(unit, "wxe"); + if (!f) { + if (source && errno == EEXIST) + return log_error_errno(errno, + "Failed to create unit file %s, as it already exists. Duplicate entry in %s?", + unit, source); + else + return log_error_errno(errno, + "Failed to create unit file %s: %m", + unit); + } + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + + fprintf(f, + "# Automatically generated by %s\n\n", + program_invocation_short_name); + + *file = f; + return 0; +} + +int generator_add_symlink(const char *root, const char *dst, const char *dep_type, const char *src) { + /* Adds a symlink from <dst>.<dep_type>.d/ to ../<src> */ + + const char *from, *to; + + from = strjoina("../", src); + to = strjoina(root, "/", dst, ".", dep_type, "/", src); + + mkdir_parents_label(to, 0755); + if (symlink(from, to) < 0) + if (errno != EEXIST) + return log_error_errno(errno, "Failed to create symlink \"%s\": %m", to); + + return 0; +} + +static int write_fsck_sysroot_service(const char *dir, const char *what) { + _cleanup_free_ char *device = NULL, *escaped = NULL, *escaped2 = NULL; + _cleanup_fclose_ FILE *f = NULL; + const char *unit; + int r; + + escaped = specifier_escape(what); + if (!escaped) + return log_oom(); + + escaped2 = cescape(escaped); + if (!escaped2) + return log_oom(); + + unit = strjoina(dir, "/systemd-fsck-root.service"); + log_debug("Creating %s", unit); + + r = unit_name_from_path(what, ".device", &device); + if (r < 0) + return log_error_errno(r, "Failed to convert device \"%s\" to unit name: %m", what); + + f = fopen(unit, "wxe"); + if (!f) + return log_error_errno(errno, "Failed to create unit file %s: %m", unit); + + fprintf(f, + "# Automatically generated by %1$s\n\n" + "[Unit]\n" + "Description=File System Check on %2$s\n" + "Documentation=man:systemd-fsck-root.service(8)\n" + "DefaultDependencies=no\n" + "BindsTo=%3$s\n" + "Conflicts=shutdown.target\n" + "After=initrd-root-device.target local-fs-pre.target %3$s\n" + "Before=shutdown.target\n" + "\n" + "[Service]\n" + "Type=oneshot\n" + "RemainAfterExit=yes\n" + "ExecStart=" SYSTEMD_FSCK_PATH " %4$s\n" + "TimeoutSec=0\n", + program_invocation_short_name, + escaped, + device, + escaped2); + + r = fflush_and_check(f); + if (r < 0) + return log_error_errno(r, "Failed to write unit file %s: %m", unit); + + return 0; +} + +int generator_write_fsck_deps( + FILE *f, + const char *dir, + const char *what, + const char *where, + const char *fstype) { + + int r; + + assert(f); + assert(dir); + assert(what); + assert(where); + + if (!is_device_path(what)) { + log_warning("Checking was requested for \"%s\", but it is not a device.", what); + return 0; + } + + if (!isempty(fstype) && !streq(fstype, "auto")) { + r = fsck_exists(fstype); + if (r < 0) + log_warning_errno(r, "Checking was requested for %s, but couldn't detect if fsck.%s may be used, proceeding: %m", what, fstype); + else if (r == 0) { + /* treat missing check as essentially OK */ + log_debug("Checking was requested for %s, but fsck.%s does not exist.", what, fstype); + return 0; + } + } + + if (path_equal(where, "/")) { + const char *lnk; + + lnk = strjoina(dir, "/" SPECIAL_LOCAL_FS_TARGET ".wants/systemd-fsck-root.service"); + + mkdir_parents(lnk, 0755); + if (symlink(SYSTEM_DATA_UNIT_PATH "/systemd-fsck-root.service", lnk) < 0) + return log_error_errno(errno, "Failed to create symlink %s: %m", lnk); + + } else { + _cleanup_free_ char *_fsck = NULL; + const char *fsck; + + if (in_initrd() && path_equal(where, "/sysroot")) { + r = write_fsck_sysroot_service(dir, what); + if (r < 0) + return r; + + fsck = "systemd-fsck-root.service"; + } else { + r = unit_name_from_path_instance("systemd-fsck", what, ".service", &_fsck); + if (r < 0) + return log_error_errno(r, "Failed to create fsck service name: %m"); + + fsck = _fsck; + } + + fprintf(f, + "Requires=%1$s\n" + "After=%1$s\n", + fsck); + } + + return 0; +} + +int generator_write_timeouts( + const char *dir, + const char *what, + const char *where, + const char *opts, + char **filtered) { + + /* Configure how long we wait for a device that backs a mount point or a + * swap partition to show up. This is useful to support endless device timeouts + * for devices that show up only after user input, like crypto devices. */ + + _cleanup_free_ char *node = NULL, *unit = NULL, *timeout = NULL; + usec_t u; + int r; + + r = fstab_filter_options(opts, "comment=systemd.device-timeout\0" + "x-systemd.device-timeout\0", + NULL, &timeout, filtered); + if (r <= 0) + return r; + + r = parse_sec_fix_0(timeout, &u); + if (r < 0) { + log_warning("Failed to parse timeout for %s, ignoring: %s", where, timeout); + return 0; + } + + node = fstab_node_to_udev_node(what); + if (!node) + return log_oom(); + if (!is_device_path(node)) { + log_warning("x-systemd.device-timeout ignored for %s", what); + return 0; + } + + r = unit_name_from_path(node, ".device", &unit); + if (r < 0) + return log_error_errno(r, "Failed to make unit name from path: %m"); + + return write_drop_in_format(dir, unit, 50, "device-timeout", + "# Automatically generated by %s\n\n" + "[Unit]\n" + "JobRunningTimeoutSec=%s", + program_invocation_short_name, + timeout); +} + +int generator_write_device_deps( + const char *dir, + const char *what, + const char *where, + const char *opts) { + + /* fstab records that specify _netdev option should apply the network + * ordering on the actual device depending on network connection. If we + * are not mounting real device (NFS, CIFS), we rely on _netdev effect + * on the mount unit itself. */ + + _cleanup_free_ char *node = NULL, *unit = NULL; + int r; + + if (!fstab_test_option(opts, "_netdev\0")) + return 0; + + node = fstab_node_to_udev_node(what); + if (!node) + return log_oom(); + + /* Nothing to apply dependencies to. */ + if (!is_device_path(node)) + return 0; + + r = unit_name_from_path(node, ".device", &unit); + if (r < 0) + return log_error_errno(r, "Failed to make unit name from path \"%s\": %m", + node); + + /* See mount_add_default_dependencies for explanation why we create such + * dependencies. */ + return write_drop_in_format(dir, unit, 50, "netdev-dependencies", + "# Automatically generated by %s\n\n" + "[Unit]\n" + "After=" SPECIAL_NETWORK_ONLINE_TARGET " " SPECIAL_NETWORK_TARGET "\n" + "Wants=" SPECIAL_NETWORK_ONLINE_TARGET "\n", + program_invocation_short_name); +} + +int generator_write_initrd_root_device_deps(const char *dir, const char *what) { + _cleanup_free_ char *unit = NULL; + int r; + + r = unit_name_from_path(what, ".device", &unit); + if (r < 0) + return log_error_errno(r, "Failed to make unit name from path \"%s\": %m", + what); + + return write_drop_in_format(dir, SPECIAL_INITRD_ROOT_DEVICE_TARGET, 50, "root-device", + "# Automatically generated by %s\n\n" + "[Unit]\n" + "Requires=%s\n" + "After=%s", + program_invocation_short_name, + unit, + unit); +} + +int generator_hook_up_mkswap( + const char *dir, + const char *what) { + + _cleanup_free_ char *node = NULL, *unit = NULL, *escaped = NULL, *where_unit = NULL; + _cleanup_fclose_ FILE *f = NULL; + const char *unit_file; + int r; + + node = fstab_node_to_udev_node(what); + if (!node) + return log_oom(); + + /* Nothing to work on. */ + if (!is_device_path(node)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Cannot format something that is not a device node: %s", + node); + + r = unit_name_from_path_instance("systemd-mkswap", node, ".service", &unit); + if (r < 0) + return log_error_errno(r, "Failed to make unit instance name from path \"%s\": %m", + node); + + unit_file = strjoina(dir, "/", unit); + log_debug("Creating %s", unit_file); + + escaped = cescape(node); + if (!escaped) + return log_oom(); + + r = unit_name_from_path(what, ".swap", &where_unit); + if (r < 0) + return log_error_errno(r, "Failed to make unit name from path \"%s\": %m", + what); + + f = fopen(unit_file, "wxe"); + if (!f) + return log_error_errno(errno, "Failed to create unit file %s: %m", + unit_file); + + fprintf(f, + "# Automatically generated by %s\n\n" + "[Unit]\n" + "Description=Make Swap on %%f\n" + "Documentation=man:systemd-mkswap@.service(8)\n" + "DefaultDependencies=no\n" + "BindsTo=%%i.device\n" + "Conflicts=shutdown.target\n" + "After=%%i.device\n" + "Before=shutdown.target %s\n" + "\n" + "[Service]\n" + "Type=oneshot\n" + "RemainAfterExit=yes\n" + "ExecStart="SYSTEMD_MAKEFS_PATH " swap %s\n" + "TimeoutSec=0\n", + program_invocation_short_name, + where_unit, + escaped); + + r = fflush_and_check(f); + if (r < 0) + return log_error_errno(r, "Failed to write unit file %s: %m", unit_file); + + return generator_add_symlink(dir, where_unit, "requires", unit); +} + +int generator_hook_up_mkfs( + const char *dir, + const char *what, + const char *where, + const char *type) { + + _cleanup_free_ char *node = NULL, *unit = NULL, *escaped = NULL, *where_unit = NULL; + _cleanup_fclose_ FILE *f = NULL; + const char *unit_file; + int r; + + node = fstab_node_to_udev_node(what); + if (!node) + return log_oom(); + + /* Nothing to work on. */ + if (!is_device_path(node)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Cannot format something that is not a device node: %s", + node); + + if (!type || streq(type, "auto")) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Cannot format partition %s, filesystem type is not specified", + node); + + r = unit_name_from_path_instance("systemd-mkfs", node, ".service", &unit); + if (r < 0) + return log_error_errno(r, "Failed to make unit instance name from path \"%s\": %m", + node); + + unit_file = strjoina(dir, "/", unit); + log_debug("Creating %s", unit_file); + + escaped = cescape(node); + if (!escaped) + return log_oom(); + + r = unit_name_from_path(where, ".mount", &where_unit); + if (r < 0) + return log_error_errno(r, "Failed to make unit name from path \"%s\": %m", + where); + + f = fopen(unit_file, "wxe"); + if (!f) + return log_error_errno(errno, "Failed to create unit file %s: %m", + unit_file); + + fprintf(f, + "# Automatically generated by %s\n\n" + "[Unit]\n" + "Description=Make File System on %%f\n" + "Documentation=man:systemd-mkfs@.service(8)\n" + "DefaultDependencies=no\n" + "BindsTo=%%i.device\n" + "Conflicts=shutdown.target\n" + "After=%%i.device\n" + /* fsck might or might not be used, so let's be safe and order + * ourselves before both systemd-fsck@.service and the mount unit. */ + "Before=shutdown.target systemd-fsck@%%i.service %s\n" + "\n" + "[Service]\n" + "Type=oneshot\n" + "RemainAfterExit=yes\n" + "ExecStart="SYSTEMD_MAKEFS_PATH " %s %s\n" + "TimeoutSec=0\n", + program_invocation_short_name, + where_unit, + type, + escaped); + // XXX: what about local-fs-pre.target? + + r = fflush_and_check(f); + if (r < 0) + return log_error_errno(r, "Failed to write unit file %s: %m", unit_file); + + return generator_add_symlink(dir, where_unit, "requires", unit); +} + +int generator_hook_up_growfs( + const char *dir, + const char *where, + const char *target) { + + _cleanup_free_ char *unit = NULL, *escaped = NULL, *where_unit = NULL; + _cleanup_fclose_ FILE *f = NULL; + const char *unit_file; + int r; + + escaped = cescape(where); + if (!escaped) + return log_oom(); + + r = unit_name_from_path_instance("systemd-growfs", where, ".service", &unit); + if (r < 0) + return log_error_errno(r, "Failed to make unit instance name from path \"%s\": %m", + where); + + r = unit_name_from_path(where, ".mount", &where_unit); + if (r < 0) + return log_error_errno(r, "Failed to make unit name from path \"%s\": %m", + where); + + unit_file = strjoina(dir, "/", unit); + log_debug("Creating %s", unit_file); + + f = fopen(unit_file, "wxe"); + if (!f) + return log_error_errno(errno, "Failed to create unit file %s: %m", + unit_file); + + fprintf(f, + "# Automatically generated by %s\n\n" + "[Unit]\n" + "Description=Grow File System on %%f\n" + "Documentation=man:systemd-growfs@.service(8)\n" + "DefaultDependencies=no\n" + "BindsTo=%%i.mount\n" + "Conflicts=shutdown.target\n" + "After=%%i.mount\n" + "Before=shutdown.target %s\n" + "\n" + "[Service]\n" + "Type=oneshot\n" + "RemainAfterExit=yes\n" + "ExecStart="SYSTEMD_GROWFS_PATH " %s\n" + "TimeoutSec=0\n", + program_invocation_short_name, + target, + escaped); + + return generator_add_symlink(dir, where_unit, "wants", unit); +} + +void log_setup_generator(void) { + log_set_prohibit_ipc(true); + log_setup_service(); +} diff --git a/src/shared/generator.h b/src/shared/generator.h new file mode 100644 index 0000000..5a1c1e3 --- /dev/null +++ b/src/shared/generator.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdio.h> + +#include "main-func.h" + +int generator_open_unit_file( + const char *dest, + const char *source, + const char *name, + FILE **file); + +int generator_add_symlink(const char *root, const char *dst, const char *dep_type, const char *src); + +int generator_write_fsck_deps( + FILE *f, + const char *dir, + const char *what, + const char *where, + const char *type); + +int generator_write_timeouts( + const char *dir, + const char *what, + const char *where, + const char *opts, + char **filtered); + +int generator_write_device_deps( + const char *dir, + const char *what, + const char *where, + const char *opts); + +int generator_write_initrd_root_device_deps( + const char *dir, + const char *what); + +int generator_hook_up_mkswap( + const char *dir, + const char *what); +int generator_hook_up_mkfs( + const char *dir, + const char *what, + const char *where, + const char *type); +int generator_hook_up_growfs( + const char *dir, + const char *where, + const char *target); + +void log_setup_generator(void); + +/* Similar to DEFINE_MAIN_FUNCTION, but initializes logging and assigns positional arguments. */ +#define DEFINE_MAIN_GENERATOR_FUNCTION(impl) \ + _DEFINE_MAIN_FUNCTION( \ + ({ \ + log_setup_generator(); \ + if (argc > 1 && argc != 4) \ + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), \ + "This program takes zero or three arguments."); \ + }), \ + impl(argc > 1 ? argv[1] : "/tmp", \ + argc > 1 ? argv[2] : "/tmp", \ + argc > 1 ? argv[3] : "/tmp"), \ + r < 0 ? EXIT_FAILURE : EXIT_SUCCESS) diff --git a/src/shared/gpt.h b/src/shared/gpt.h new file mode 100644 index 0000000..fd953fa --- /dev/null +++ b/src/shared/gpt.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <endian.h> + +#include "sd-id128.h" + +/* We only support root disk discovery for x86, x86-64, Itanium and ARM for + * now, since EFI for anything else doesn't really exist, and we only + * care for root partitions on the same disk as the EFI ESP. */ + +#define GPT_ROOT_X86 SD_ID128_MAKE(44,47,95,40,f2,97,41,b2,9a,f7,d1,31,d5,f0,45,8a) +#define GPT_ROOT_X86_64 SD_ID128_MAKE(4f,68,bc,e3,e8,cd,4d,b1,96,e7,fb,ca,f9,84,b7,09) +#define GPT_ROOT_ARM SD_ID128_MAKE(69,da,d7,10,2c,e4,4e,3c,b1,6c,21,a1,d4,9a,be,d3) +#define GPT_ROOT_ARM_64 SD_ID128_MAKE(b9,21,b0,45,1d,f0,41,c3,af,44,4c,6f,28,0d,3f,ae) +#define GPT_ROOT_IA64 SD_ID128_MAKE(99,3d,8d,3d,f8,0e,42,25,85,5a,9d,af,8e,d7,ea,97) +#define GPT_ESP SD_ID128_MAKE(c1,2a,73,28,f8,1f,11,d2,ba,4b,00,a0,c9,3e,c9,3b) +#define GPT_SWAP SD_ID128_MAKE(06,57,fd,6d,a4,ab,43,c4,84,e5,09,33,c8,4b,4f,4f) +#define GPT_HOME SD_ID128_MAKE(93,3a,c7,e1,2e,b4,4f,13,b8,44,0e,14,e2,ae,f9,15) +#define GPT_SRV SD_ID128_MAKE(3b,8f,84,25,20,e0,4f,3b,90,7f,1a,25,a7,6f,98,e8) + +/* Verity partitions for the root partitions above (we only define them for the root partitions, because only they are + * are commonly read-only and hence suitable for verity). */ +#define GPT_ROOT_X86_VERITY SD_ID128_MAKE(d1,3c,5d,3b,b5,d1,42,2a,b2,9f,94,54,fd,c8,9d,76) +#define GPT_ROOT_X86_64_VERITY SD_ID128_MAKE(2c,73,57,ed,eb,d2,46,d9,ae,c1,23,d4,37,ec,2b,f5) +#define GPT_ROOT_ARM_VERITY SD_ID128_MAKE(73,86,cd,f2,20,3c,47,a9,a4,98,f2,ec,ce,45,a2,d6) +#define GPT_ROOT_ARM_64_VERITY SD_ID128_MAKE(df,33,00,ce,d6,9f,4c,92,97,8c,9b,fb,0f,38,d8,20) +#define GPT_ROOT_IA64_VERITY SD_ID128_MAKE(86,ed,10,d5,b6,07,45,bb,89,57,d3,50,f2,3d,05,71) + +#if defined(__x86_64__) +# define GPT_ROOT_NATIVE GPT_ROOT_X86_64 +# define GPT_ROOT_SECONDARY GPT_ROOT_X86 +# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_X86_64_VERITY +# define GPT_ROOT_SECONDARY_VERITY GPT_ROOT_X86_VERITY +#elif defined(__i386__) +# define GPT_ROOT_NATIVE GPT_ROOT_X86 +# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_X86_VERITY +#endif + +#if defined(__ia64__) +# define GPT_ROOT_NATIVE GPT_ROOT_IA64 +# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_IA64_VERITY +#endif + +#if defined(__aarch64__) && (__BYTE_ORDER != __BIG_ENDIAN) +# define GPT_ROOT_NATIVE GPT_ROOT_ARM_64 +# define GPT_ROOT_SECONDARY GPT_ROOT_ARM +# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_ARM_64_VERITY +# define GPT_ROOT_SECONDARY_VERITY GPT_ROOT_ARM_VERITY +#elif defined(__arm__) && (__BYTE_ORDER != __BIG_ENDIAN) +# define GPT_ROOT_NATIVE GPT_ROOT_ARM +# define GPT_ROOT_NATIVE_VERITY GPT_ROOT_ARM_VERITY +#endif + +#define GPT_FLAG_NO_BLOCK_IO_PROTOCOL (1ULL << 1) + +/* Flags we recognize on the root, swap, home and srv partitions when + * doing auto-discovery. These happen to be identical to what + * Microsoft defines for its own Basic Data Partitions, but that's + * just because we saw no point in defining any other values here. */ +#define GPT_FLAG_READ_ONLY (1ULL << 60) +#define GPT_FLAG_NO_AUTO (1ULL << 63) + +#define GPT_LINUX_GENERIC SD_ID128_MAKE(0f,c6,3d,af,84,83,47,72,8e,79,3d,69,d8,47,7d,e4) diff --git a/src/shared/id128-print.c b/src/shared/id128-print.c new file mode 100644 index 0000000..1b20b8f --- /dev/null +++ b/src/shared/id128-print.c @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <stdio.h> + +#include "sd-id128.h" + +#include "alloc-util.h" +#include "id128-print.h" +#include "log.h" +#include "pretty-print.h" +#include "terminal-util.h" + +int id128_pretty_print(sd_id128_t id, bool pretty) { + unsigned i; + _cleanup_free_ char *man_link = NULL, *mod_link = NULL; + const char *on, *off; + + if (!pretty) { + printf(SD_ID128_FORMAT_STR "\n", + SD_ID128_FORMAT_VAL(id)); + return 0; + } + + on = ansi_highlight(); + off = ansi_normal(); + + if (terminal_urlify("man:systemd-id128(1)", "systemd-id128(1)", &man_link) < 0) + return log_oom(); + + if (terminal_urlify("https://docs.python.org/3/library/uuid.html", "uuid", &mod_link) < 0) + return log_oom(); + + printf("As string:\n" + "%s" SD_ID128_FORMAT_STR "%s\n\n" + "As UUID:\n" + "%s%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x%s\n\n" + "As %s macro:\n" + "%s#define MESSAGE_XYZ SD_ID128_MAKE(", + on, SD_ID128_FORMAT_VAL(id), off, + on, SD_ID128_FORMAT_VAL(id), off, + man_link, + on); + for (i = 0; i < 16; i++) + printf("%02x%s", id.bytes[i], i != 15 ? "," : ""); + printf(")%s\n\n", off); + + printf("As Python constant:\n" + ">>> import %s\n" + ">>> %sMESSAGE_XYZ = uuid.UUID('" SD_ID128_FORMAT_STR "')%s\n", + mod_link, + on, SD_ID128_FORMAT_VAL(id), off); + + return 0; +} + +int id128_print_new(bool pretty) { + sd_id128_t id; + int r; + + r = sd_id128_randomize(&id); + if (r < 0) + return log_error_errno(r, "Failed to generate ID: %m"); + + return id128_pretty_print(id, pretty); +} diff --git a/src/shared/id128-print.h b/src/shared/id128-print.h new file mode 100644 index 0000000..5d50de0 --- /dev/null +++ b/src/shared/id128-print.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#pragma once + +#include <stdbool.h> + +#include "sd-id128.h" + +int id128_pretty_print(sd_id128_t id, bool pretty); +int id128_print_new(bool pretty); diff --git a/src/shared/ima-util.c b/src/shared/ima-util.c new file mode 100644 index 0000000..0d4ce3c --- /dev/null +++ b/src/shared/ima-util.c @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <unistd.h> + +#include "ima-util.h" + +static int use_ima_cached = -1; + +bool use_ima(void) { + + if (use_ima_cached < 0) + use_ima_cached = access("/sys/kernel/security/ima/", F_OK) >= 0; + + return use_ima_cached; +} diff --git a/src/shared/ima-util.h b/src/shared/ima-util.h new file mode 100644 index 0000000..8f20741 --- /dev/null +++ b/src/shared/ima-util.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +bool use_ima(void); diff --git a/src/shared/import-util.c b/src/shared/import-util.c new file mode 100644 index 0000000..bcd6c0c --- /dev/null +++ b/src/shared/import-util.c @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <string.h> + +#include "alloc-util.h" +#include "btrfs-util.h" +#include "import-util.h" +#include "log.h" +#include "macro.h" +#include "path-util.h" +#include "string-table.h" +#include "string-util.h" +#include "util.h" + +int import_url_last_component(const char *url, char **ret) { + const char *e, *p; + char *s; + + e = strchrnul(url, '?'); + + while (e > url && e[-1] == '/') + e--; + + p = e; + while (p > url && p[-1] != '/') + p--; + + if (e <= p) + return -EINVAL; + + s = strndup(p, e - p); + if (!s) + return -ENOMEM; + + *ret = s; + return 0; +} + +int import_url_change_last_component(const char *url, const char *suffix, char **ret) { + const char *e; + char *s; + + assert(url); + assert(ret); + + e = strchrnul(url, '?'); + + while (e > url && e[-1] == '/') + e--; + + while (e > url && e[-1] != '/') + e--; + + if (e <= url) + return -EINVAL; + + s = new(char, (e - url) + strlen(suffix) + 1); + if (!s) + return -ENOMEM; + + strcpy(mempcpy(s, url, e - url), suffix); + *ret = s; + return 0; +} + +static const char* const import_verify_table[_IMPORT_VERIFY_MAX] = { + [IMPORT_VERIFY_NO] = "no", + [IMPORT_VERIFY_CHECKSUM] = "checksum", + [IMPORT_VERIFY_SIGNATURE] = "signature", +}; + +DEFINE_STRING_TABLE_LOOKUP(import_verify, ImportVerify); + +int tar_strip_suffixes(const char *name, char **ret) { + const char *e; + char *s; + + e = endswith(name, ".tar"); + if (!e) + e = endswith(name, ".tar.xz"); + if (!e) + e = endswith(name, ".tar.gz"); + if (!e) + e = endswith(name, ".tar.bz2"); + if (!e) + e = endswith(name, ".tgz"); + if (!e) + e = strchr(name, 0); + + if (e <= name) + return -EINVAL; + + s = strndup(name, e - name); + if (!s) + return -ENOMEM; + + *ret = s; + return 0; +} + +int raw_strip_suffixes(const char *p, char **ret) { + + static const char suffixes[] = + ".xz\0" + ".gz\0" + ".bz2\0" + ".raw\0" + ".qcow2\0" + ".img\0" + ".bin\0"; + + _cleanup_free_ char *q = NULL; + + q = strdup(p); + if (!q) + return -ENOMEM; + + for (;;) { + const char *sfx; + bool changed = false; + + NULSTR_FOREACH(sfx, suffixes) { + char *e; + + e = endswith(q, sfx); + if (e) { + *e = 0; + changed = true; + } + } + + if (!changed) + break; + } + + *ret = TAKE_PTR(q); + + return 0; +} + +int import_assign_pool_quota_and_warn(const char *path) { + int r; + + r = btrfs_subvol_auto_qgroup("/var/lib/machines", 0, true); + if (r == -ENOTTY) { + log_debug_errno(r, "Failed to set up default quota hierarchy for /var/lib/machines, as directory is not on btrfs or not a subvolume. Ignoring."); + return 0; + } + if (r < 0) + return log_error_errno(r, "Failed to set up default quota hierarchy for /var/lib/machines: %m"); + if (r > 0) + log_info("Set up default quota hierarchy for /var/lib/machines."); + + r = btrfs_subvol_auto_qgroup(path, 0, true); + if (r == -ENOTTY) { + log_debug_errno(r, "Failed to set up quota hierarchy for %s, as directory is not on btrfs or not a subvolume. Ignoring.", path); + return 0; + } + if (r < 0) + return log_error_errno(r, "Failed to set up default quota hierarchy for %s: %m", path); + if (r > 0) + log_debug("Set up default quota hierarchy for %s.", path); + + return 0; +} diff --git a/src/shared/import-util.h b/src/shared/import-util.h new file mode 100644 index 0000000..0f2a517 --- /dev/null +++ b/src/shared/import-util.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +#include "macro.h" + +typedef enum ImportVerify { + IMPORT_VERIFY_NO, + IMPORT_VERIFY_CHECKSUM, + IMPORT_VERIFY_SIGNATURE, + _IMPORT_VERIFY_MAX, + _IMPORT_VERIFY_INVALID = -1, +} ImportVerify; + +int import_url_last_component(const char *url, char **ret); +int import_url_change_last_component(const char *url, const char *suffix, char **ret); + +const char* import_verify_to_string(ImportVerify v) _const_; +ImportVerify import_verify_from_string(const char *s) _pure_; + +int tar_strip_suffixes(const char *name, char **ret); +int raw_strip_suffixes(const char *name, char **ret); + +int import_assign_pool_quota_and_warn(const char *path); diff --git a/src/shared/initreq.h b/src/shared/initreq.h new file mode 100644 index 0000000..1d7ff81 --- /dev/null +++ b/src/shared/initreq.h @@ -0,0 +1,73 @@ +/* + * initreq.h Interface to talk to init through /dev/initctl. + * + * Copyright (C) 1995-2004 Miquel van Smoorenburg + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * Version: @(#)initreq.h 1.28 31-Mar-2004 MvS + */ + +#pragma once + +#include <sys/param.h> + +#if defined(__FreeBSD_kernel__) +# define INIT_FIFO "/etc/.initctl" +#else +# define INIT_FIFO "/dev/initctl" +#endif + +#define INIT_MAGIC 0x03091969 +#define INIT_CMD_START 0 +#define INIT_CMD_RUNLVL 1 +#define INIT_CMD_POWERFAIL 2 +#define INIT_CMD_POWERFAILNOW 3 +#define INIT_CMD_POWEROK 4 +#define INIT_CMD_BSD 5 +#define INIT_CMD_SETENV 6 +#define INIT_CMD_UNSETENV 7 + +#define INIT_CMD_CHANGECONS 12345 + +#ifdef MAXHOSTNAMELEN +# define INITRQ_HLEN MAXHOSTNAMELEN +#else +# define INITRQ_HLEN 64 +#endif + +/* + * This is what BSD 4.4 uses when talking to init. + * Linux doesn't use this right now. + */ +struct init_request_bsd { + char gen_id[8]; /* Beats me.. telnetd uses "fe" */ + char tty_id[16]; /* Tty name minus /dev/tty */ + char host[INITRQ_HLEN]; /* Hostname */ + char term_type[16]; /* Terminal type */ + int signal; /* Signal to send */ + int pid; /* Process to send to */ + char exec_name[128]; /* Program to execute */ + char reserved[128]; /* For future expansion. */ +}; + +/* + * Because of legacy interfaces, "runlevel" and "sleeptime" + * aren't in a separate struct in the union. + * + * The weird sizes are because init expects the whole + * struct to be 384 bytes. + */ +struct init_request { + int magic; /* Magic number */ + int cmd; /* What kind of request */ + int runlevel; /* Runlevel to change to */ + int sleeptime; /* Time between TERM and KILL */ + union { + struct init_request_bsd bsd; + char data[368]; + } i; +}; diff --git a/src/shared/install-printf.c b/src/shared/install-printf.c new file mode 100644 index 0000000..d2143be --- /dev/null +++ b/src/shared/install-printf.c @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#include "format-util.h" +#include "install-printf.h" +#include "install.h" +#include "macro.h" +#include "specifier.h" +#include "string-util.h" +#include "unit-name.h" +#include "user-util.h" + +static int specifier_prefix_and_instance(char specifier, const void *data, const void *userdata, char **ret) { + const UnitFileInstallInfo *i = userdata; + _cleanup_free_ char *prefix = NULL; + int r; + + assert(i); + + r = unit_name_to_prefix_and_instance(i->name, &prefix); + if (r < 0) + return r; + + if (endswith(prefix, "@") && i->default_instance) { + char *ans; + + ans = strjoin(prefix, i->default_instance); + if (!ans) + return -ENOMEM; + *ret = ans; + } else + *ret = TAKE_PTR(prefix); + + return 0; +} + +static int specifier_name(char specifier, const void *data, const void *userdata, char **ret) { + const UnitFileInstallInfo *i = userdata; + char *ans; + + assert(i); + + if (unit_name_is_valid(i->name, UNIT_NAME_TEMPLATE) && i->default_instance) + return unit_name_replace_instance(i->name, i->default_instance, ret); + + ans = strdup(i->name); + if (!ans) + return -ENOMEM; + *ret = ans; + return 0; +} + +static int specifier_prefix(char specifier, const void *data, const void *userdata, char **ret) { + const UnitFileInstallInfo *i = userdata; + + assert(i); + + return unit_name_to_prefix(i->name, ret); +} + +static int specifier_instance(char specifier, const void *data, const void *userdata, char **ret) { + const UnitFileInstallInfo *i = userdata; + char *instance; + int r; + + assert(i); + + r = unit_name_to_instance(i->name, &instance); + if (r < 0) + return r; + + if (isempty(instance)) { + r = free_and_strdup(&instance, strempty(i->default_instance)); + if (r < 0) + return r; + } + + *ret = instance; + return 0; +} + +static int specifier_last_component(char specifier, const void *data, const void *userdata, char **ret) { + _cleanup_free_ char *prefix = NULL; + char *dash; + int r; + + r = specifier_prefix(specifier, data, userdata, &prefix); + if (r < 0) + return r; + + dash = strrchr(prefix, '-'); + if (dash) { + dash = strdup(dash + 1); + if (!dash) + return -ENOMEM; + *ret = dash; + } else + *ret = TAKE_PTR(prefix); + + return 0; +} + +int install_full_printf(const UnitFileInstallInfo *i, const char *format, char **ret) { + /* This is similar to unit_full_printf() but does not support + * anything path-related. + * + * %n: the full id of the unit (foo@bar.waldo) + * %N: the id of the unit without the suffix (foo@bar) + * %p: the prefix (foo) + * %i: the instance (bar) + + * %U the UID of the running user + * %u the username of running user + * %m the machine ID of the running system + * %H the host name of the running system + * %b the boot ID of the running system + * %v `uname -r` of the running system + */ + + const Specifier table[] = { + { 'n', specifier_name, NULL }, + { 'N', specifier_prefix_and_instance, NULL }, + { 'p', specifier_prefix, NULL }, + { 'i', specifier_instance, NULL }, + { 'j', specifier_last_component, NULL }, + + { 'g', specifier_group_name, NULL }, + { 'G', specifier_group_id, NULL }, + { 'U', specifier_user_id, NULL }, + { 'u', specifier_user_name, NULL }, + + { 'm', specifier_machine_id, NULL }, + { 'H', specifier_host_name, NULL }, + { 'b', specifier_boot_id, NULL }, + { 'v', specifier_kernel_release, NULL }, + {} + }; + + assert(i); + assert(format); + assert(ret); + + return specifier_printf(format, table, i, ret); +} diff --git a/src/shared/install-printf.h b/src/shared/install-printf.h new file mode 100644 index 0000000..fa8ea7e --- /dev/null +++ b/src/shared/install-printf.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include "install.h" + +int install_full_printf(const UnitFileInstallInfo *i, const char *format, char **ret); diff --git a/src/shared/install.c b/src/shared/install.c new file mode 100644 index 0000000..8629304 --- /dev/null +++ b/src/shared/install.c @@ -0,0 +1,3383 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <fnmatch.h> +#include <limits.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "conf-files.h" +#include "conf-parser.h" +#include "dirent-util.h" +#include "extract-word.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "hashmap.h" +#include "install-printf.h" +#include "install.h" +#include "locale-util.h" +#include "log.h" +#include "macro.h" +#include "mkdir.h" +#include "path-lookup.h" +#include "path-util.h" +#include "rm-rf.h" +#include "set.h" +#include "special.h" +#include "stat-util.h" +#include "string-table.h" +#include "string-util.h" +#include "strv.h" +#include "unit-name.h" + +#define UNIT_FILE_FOLLOW_SYMLINK_MAX 64 + +typedef enum SearchFlags { + SEARCH_LOAD = 1 << 0, + SEARCH_FOLLOW_CONFIG_SYMLINKS = 1 << 1, + SEARCH_DROPIN = 1 << 2, +} SearchFlags; + +typedef struct { + OrderedHashmap *will_process; + OrderedHashmap *have_processed; +} InstallContext; + +typedef enum { + PRESET_UNKNOWN, + PRESET_ENABLE, + PRESET_DISABLE, +} PresetAction; + +typedef struct { + char *pattern; + PresetAction action; + char **instances; +} PresetRule; + +typedef struct { + PresetRule *rules; + size_t n_rules; +} Presets; + +static bool unit_file_install_info_has_rules(const UnitFileInstallInfo *i) { + assert(i); + + return !strv_isempty(i->aliases) || + !strv_isempty(i->wanted_by) || + !strv_isempty(i->required_by); +} + +static bool unit_file_install_info_has_also(const UnitFileInstallInfo *i) { + assert(i); + + return !strv_isempty(i->also); +} + +static void presets_freep(Presets *p) { + size_t i; + + if (!p) + return; + + for (i = 0; i < p->n_rules; i++) { + free(p->rules[i].pattern); + strv_free(p->rules[i].instances); + } + + free(p->rules); + p->n_rules = 0; +} + +bool unit_type_may_alias(UnitType type) { + return IN_SET(type, + UNIT_SERVICE, + UNIT_SOCKET, + UNIT_TARGET, + UNIT_DEVICE, + UNIT_TIMER, + UNIT_PATH); +} + +bool unit_type_may_template(UnitType type) { + return IN_SET(type, + UNIT_SERVICE, + UNIT_SOCKET, + UNIT_TARGET, + UNIT_TIMER, + UNIT_PATH); +} + +static const char *unit_file_type_table[_UNIT_FILE_TYPE_MAX] = { + [UNIT_FILE_TYPE_REGULAR] = "regular", + [UNIT_FILE_TYPE_SYMLINK] = "symlink", + [UNIT_FILE_TYPE_MASKED] = "masked", +}; + +DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(unit_file_type, UnitFileType); + +static int in_search_path(const LookupPaths *p, const char *path) { + _cleanup_free_ char *parent = NULL; + char **i; + + assert(path); + + parent = dirname_malloc(path); + if (!parent) + return -ENOMEM; + + STRV_FOREACH(i, p->search_path) + if (path_equal(parent, *i)) + return true; + + return false; +} + +static const char* skip_root(const LookupPaths *p, const char *path) { + char *e; + + assert(p); + assert(path); + + if (!p->root_dir) + return path; + + e = path_startswith(path, p->root_dir); + if (!e) + return NULL; + + /* Make sure the returned path starts with a slash */ + if (e[0] != '/') { + if (e == path || e[-1] != '/') + return NULL; + + e--; + } + + return e; +} + +static int path_is_generator(const LookupPaths *p, const char *path) { + _cleanup_free_ char *parent = NULL; + + assert(p); + assert(path); + + parent = dirname_malloc(path); + if (!parent) + return -ENOMEM; + + return path_equal_ptr(parent, p->generator) || + path_equal_ptr(parent, p->generator_early) || + path_equal_ptr(parent, p->generator_late); +} + +static int path_is_transient(const LookupPaths *p, const char *path) { + _cleanup_free_ char *parent = NULL; + + assert(p); + assert(path); + + parent = dirname_malloc(path); + if (!parent) + return -ENOMEM; + + return path_equal_ptr(parent, p->transient); +} + +static int path_is_control(const LookupPaths *p, const char *path) { + _cleanup_free_ char *parent = NULL; + + assert(p); + assert(path); + + parent = dirname_malloc(path); + if (!parent) + return -ENOMEM; + + return path_equal_ptr(parent, p->persistent_control) || + path_equal_ptr(parent, p->runtime_control); +} + +static int path_is_config(const LookupPaths *p, const char *path, bool check_parent) { + _cleanup_free_ char *parent = NULL; + + assert(p); + assert(path); + + /* Note that we do *not* have generic checks for /etc or /run in place, since with + * them we couldn't discern configuration from transient or generated units */ + + if (check_parent) { + parent = dirname_malloc(path); + if (!parent) + return -ENOMEM; + + path = parent; + } + + return path_equal_ptr(path, p->persistent_config) || + path_equal_ptr(path, p->runtime_config); +} + +static int path_is_runtime(const LookupPaths *p, const char *path, bool check_parent) { + _cleanup_free_ char *parent = NULL; + const char *rpath; + + assert(p); + assert(path); + + /* Everything in /run is considered runtime. On top of that we also add + * explicit checks for the various runtime directories, as safety net. */ + + rpath = skip_root(p, path); + if (rpath && path_startswith(rpath, "/run")) + return true; + + if (check_parent) { + parent = dirname_malloc(path); + if (!parent) + return -ENOMEM; + + path = parent; + } + + return path_equal_ptr(path, p->runtime_config) || + path_equal_ptr(path, p->generator) || + path_equal_ptr(path, p->generator_early) || + path_equal_ptr(path, p->generator_late) || + path_equal_ptr(path, p->transient) || + path_equal_ptr(path, p->runtime_control); +} + +static int path_is_vendor(const LookupPaths *p, const char *path) { + const char *rpath; + + assert(p); + assert(path); + + rpath = skip_root(p, path); + if (!rpath) + return 0; + + if (path_startswith(rpath, "/usr")) + return true; + +#if HAVE_SPLIT_USR + if (path_startswith(rpath, "/lib")) + return true; +#endif + + return path_equal(rpath, SYSTEM_DATA_UNIT_PATH); +} + +int unit_file_changes_add( + UnitFileChange **changes, + size_t *n_changes, + UnitFileChangeType type, + const char *path, + const char *source) { + + _cleanup_free_ char *p = NULL, *s = NULL; + UnitFileChange *c; + + assert(path); + assert(!changes == !n_changes); + + if (!changes) + return 0; + + c = reallocarray(*changes, *n_changes + 1, sizeof(UnitFileChange)); + if (!c) + return -ENOMEM; + *changes = c; + + p = strdup(path); + if (source) + s = strdup(source); + + if (!p || (source && !s)) + return -ENOMEM; + + path_simplify(p, false); + if (s) + path_simplify(s, false); + + c[*n_changes] = (UnitFileChange) { type, p, s }; + p = s = NULL; + (*n_changes) ++; + return 0; +} + +void unit_file_changes_free(UnitFileChange *changes, size_t n_changes) { + size_t i; + + assert(changes || n_changes == 0); + + for (i = 0; i < n_changes; i++) { + free(changes[i].path); + free(changes[i].source); + } + + free(changes); +} + +void unit_file_dump_changes(int r, const char *verb, const UnitFileChange *changes, size_t n_changes, bool quiet) { + size_t i; + bool logged = false; + + assert(changes || n_changes == 0); + /* If verb is not specified, errors are not allowed! */ + assert(verb || r >= 0); + + for (i = 0; i < n_changes; i++) { + assert(verb || changes[i].type >= 0); + + switch(changes[i].type) { + case UNIT_FILE_SYMLINK: + if (!quiet) + log_info("Created symlink %s %s %s.", + changes[i].path, + special_glyph(SPECIAL_GLYPH_ARROW), + changes[i].source); + break; + case UNIT_FILE_UNLINK: + if (!quiet) + log_info("Removed %s.", changes[i].path); + break; + case UNIT_FILE_IS_MASKED: + if (!quiet) + log_info("Unit %s is masked, ignoring.", changes[i].path); + break; + case UNIT_FILE_IS_DANGLING: + if (!quiet) + log_info("Unit %s is an alias to a unit that is not present, ignoring.", + changes[i].path); + break; + case -EEXIST: + if (changes[i].source) + log_error_errno(changes[i].type, + "Failed to %s unit, file %s already exists and is a symlink to %s.", + verb, changes[i].path, changes[i].source); + else + log_error_errno(changes[i].type, + "Failed to %s unit, file %s already exists.", + verb, changes[i].path); + logged = true; + break; + case -ERFKILL: + log_error_errno(changes[i].type, "Failed to %s unit, unit %s is masked.", + verb, changes[i].path); + logged = true; + break; + case -EADDRNOTAVAIL: + log_error_errno(changes[i].type, "Failed to %s unit, unit %s is transient or generated.", + verb, changes[i].path); + logged = true; + break; + case -ELOOP: + log_error_errno(changes[i].type, "Failed to %s unit, refusing to operate on linked unit file %s", + verb, changes[i].path); + logged = true; + break; + + case -ENOENT: + log_error_errno(changes[i].type, "Failed to %s unit, unit %s does not exist.", verb, changes[i].path); + logged = true; + break; + + default: + assert(changes[i].type < 0); + log_error_errno(changes[i].type, "Failed to %s unit, file %s: %m.", + verb, changes[i].path); + logged = true; + } + } + + if (r < 0 && !logged) + log_error_errno(r, "Failed to %s: %m.", verb); +} + +/** + * Checks if two paths or symlinks from wd are the same, when root is the root of the filesystem. + * wc should be the full path in the host file system. + */ +static bool chroot_symlinks_same(const char *root, const char *wd, const char *a, const char *b) { + assert(path_is_absolute(wd)); + + /* This will give incorrect results if the paths are relative and go outside + * of the chroot. False negatives are possible. */ + + if (!root) + root = "/"; + + a = strjoina(path_is_absolute(a) ? root : wd, "/", a); + b = strjoina(path_is_absolute(b) ? root : wd, "/", b); + return path_equal_or_files_same(a, b, 0); +} + +static int create_symlink( + const LookupPaths *paths, + const char *old_path, + const char *new_path, + bool force, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_free_ char *dest = NULL, *dirname = NULL; + const char *rp; + int r; + + assert(old_path); + assert(new_path); + + rp = skip_root(paths, old_path); + if (rp) + old_path = rp; + + /* Actually create a symlink, and remember that we did. Is + * smart enough to check if there's already a valid symlink in + * place. + * + * Returns 1 if a symlink was created or already exists and points to + * the right place, or negative on error. + */ + + mkdir_parents_label(new_path, 0755); + + if (symlink(old_path, new_path) >= 0) { + unit_file_changes_add(changes, n_changes, UNIT_FILE_SYMLINK, new_path, old_path); + return 1; + } + + if (errno != EEXIST) { + unit_file_changes_add(changes, n_changes, -errno, new_path, NULL); + return -errno; + } + + r = readlink_malloc(new_path, &dest); + if (r < 0) { + /* translate EINVAL (non-symlink exists) to EEXIST */ + if (r == -EINVAL) + r = -EEXIST; + + unit_file_changes_add(changes, n_changes, r, new_path, NULL); + return r; + } + + dirname = dirname_malloc(new_path); + if (!dirname) + return -ENOMEM; + + if (chroot_symlinks_same(paths->root_dir, dirname, dest, old_path)) { + log_debug("Symlink %s → %s already exists", new_path, dest); + return 1; + } + + if (!force) { + unit_file_changes_add(changes, n_changes, -EEXIST, new_path, dest); + return -EEXIST; + } + + r = symlink_atomic(old_path, new_path); + if (r < 0) { + unit_file_changes_add(changes, n_changes, r, new_path, NULL); + return r; + } + + unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, new_path, NULL); + unit_file_changes_add(changes, n_changes, UNIT_FILE_SYMLINK, new_path, old_path); + + return 1; +} + +static int mark_symlink_for_removal( + Set **remove_symlinks_to, + const char *p) { + + char *n; + int r; + + assert(p); + + r = set_ensure_allocated(remove_symlinks_to, &path_hash_ops); + if (r < 0) + return r; + + n = strdup(p); + if (!n) + return -ENOMEM; + + path_simplify(n, false); + + r = set_consume(*remove_symlinks_to, n); + if (r == -EEXIST) + return 0; + if (r < 0) + return r; + + return 1; +} + +static int remove_marked_symlinks_fd( + Set *remove_symlinks_to, + int fd, + const char *path, + const char *config_path, + const LookupPaths *lp, + bool dry_run, + bool *restart, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_closedir_ DIR *d = NULL; + struct dirent *de; + int r = 0; + + assert(remove_symlinks_to); + assert(fd >= 0); + assert(path); + assert(config_path); + assert(lp); + assert(restart); + + d = fdopendir(fd); + if (!d) { + safe_close(fd); + return -errno; + } + + rewinddir(d); + + FOREACH_DIRENT(de, d, return -errno) { + + dirent_ensure_type(d, de); + + if (de->d_type == DT_DIR) { + _cleanup_free_ char *p = NULL; + int nfd, q; + + nfd = openat(fd, de->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW); + if (nfd < 0) { + if (errno == ENOENT) + continue; + + if (r == 0) + r = -errno; + continue; + } + + p = path_make_absolute(de->d_name, path); + if (!p) { + safe_close(nfd); + return -ENOMEM; + } + + /* This will close nfd, regardless whether it succeeds or not */ + q = remove_marked_symlinks_fd(remove_symlinks_to, nfd, p, config_path, lp, dry_run, restart, changes, n_changes); + if (q < 0 && r == 0) + r = q; + + } else if (de->d_type == DT_LNK) { + _cleanup_free_ char *p = NULL, *dest = NULL; + const char *rp; + bool found; + int q; + + if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY)) + continue; + + p = path_make_absolute(de->d_name, path); + if (!p) + return -ENOMEM; + path_simplify(p, false); + + q = readlink_malloc(p, &dest); + if (q == -ENOENT) + continue; + if (q < 0) { + if (r == 0) + r = q; + continue; + } + + /* We remove all links pointing to a file or path that is marked, as well as all files sharing + * the same name as a file that is marked. */ + + found = set_contains(remove_symlinks_to, dest) || + set_contains(remove_symlinks_to, basename(dest)) || + set_contains(remove_symlinks_to, de->d_name); + + if (!found) + continue; + + if (!dry_run) { + if (unlinkat(fd, de->d_name, 0) < 0 && errno != ENOENT) { + if (r == 0) + r = -errno; + unit_file_changes_add(changes, n_changes, -errno, p, NULL); + continue; + } + + (void) rmdir_parents(p, config_path); + } + + unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, p, NULL); + + /* Now, remember the full path (but with the root prefix removed) of + * the symlink we just removed, and remove any symlinks to it, too. */ + + rp = skip_root(lp, p); + q = mark_symlink_for_removal(&remove_symlinks_to, rp ?: p); + if (q < 0) + return q; + if (q > 0 && !dry_run) + *restart = true; + } + } + + return r; +} + +static int remove_marked_symlinks( + Set *remove_symlinks_to, + const char *config_path, + const LookupPaths *lp, + bool dry_run, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_close_ int fd = -1; + bool restart; + int r = 0; + + assert(config_path); + assert(lp); + + if (set_size(remove_symlinks_to) <= 0) + return 0; + + fd = open(config_path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC); + if (fd < 0) + return errno == ENOENT ? 0 : -errno; + + do { + int q, cfd; + restart = false; + + cfd = fcntl(fd, F_DUPFD_CLOEXEC, 3); + if (cfd < 0) + return -errno; + + /* This takes possession of cfd and closes it */ + q = remove_marked_symlinks_fd(remove_symlinks_to, cfd, config_path, config_path, lp, dry_run, &restart, changes, n_changes); + if (r == 0) + r = q; + } while (restart); + + return r; +} + +static int is_symlink_with_known_name(const UnitFileInstallInfo *i, const char *name) { + int r; + + if (streq(name, i->name)) + return true; + + if (strv_contains(i->aliases, name)) + return true; + + /* Look for template symlink matching DefaultInstance */ + if (i->default_instance && unit_name_is_valid(i->name, UNIT_NAME_TEMPLATE)) { + _cleanup_free_ char *s = NULL; + + r = unit_name_replace_instance(i->name, i->default_instance, &s); + if (r < 0) { + if (r != -EINVAL) + return r; + + } else if (streq(name, s)) + return true; + } + + return false; +} + +static int find_symlinks_fd( + const char *root_dir, + const UnitFileInstallInfo *i, + bool match_aliases, + bool ignore_same_name, + int fd, + const char *path, + const char *config_path, + bool *same_name_link) { + + _cleanup_closedir_ DIR *d = NULL; + struct dirent *de; + int r = 0; + + assert(i); + assert(fd >= 0); + assert(path); + assert(config_path); + assert(same_name_link); + + d = fdopendir(fd); + if (!d) { + safe_close(fd); + return -errno; + } + + FOREACH_DIRENT(de, d, return -errno) { + + dirent_ensure_type(d, de); + + if (de->d_type == DT_DIR) { + _cleanup_free_ char *p = NULL; + int nfd, q; + + nfd = openat(fd, de->d_name, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW); + if (nfd < 0) { + if (errno == ENOENT) + continue; + + if (r == 0) + r = -errno; + continue; + } + + p = path_make_absolute(de->d_name, path); + if (!p) { + safe_close(nfd); + return -ENOMEM; + } + + /* This will close nfd, regardless whether it succeeds or not */ + q = find_symlinks_fd(root_dir, i, match_aliases, ignore_same_name, nfd, + p, config_path, same_name_link); + if (q > 0) + return 1; + if (r == 0) + r = q; + + } else if (de->d_type == DT_LNK) { + _cleanup_free_ char *p = NULL, *dest = NULL; + bool found_path = false, found_dest, b = false; + int q; + + /* Acquire symlink name */ + p = path_make_absolute(de->d_name, path); + if (!p) + return -ENOMEM; + + /* Acquire symlink destination */ + q = readlink_malloc(p, &dest); + if (q == -ENOENT) + continue; + if (q < 0) { + if (r == 0) + r = q; + continue; + } + + /* Make absolute */ + if (!path_is_absolute(dest)) { + char *x; + + x = prefix_root(root_dir, dest); + if (!x) + return -ENOMEM; + + free_and_replace(dest, x); + } + + assert(unit_name_is_valid(i->name, UNIT_NAME_ANY)); + if (!ignore_same_name) + /* Check if the symlink itself matches what we are looking for. + * + * If ignore_same_name is specified, we are in one of the directories which + * have lower priority than the unit file, and even if a file or symlink with + * this name was found, we should ignore it. */ + found_path = streq(de->d_name, i->name); + + /* Check if what the symlink points to matches what we are looking for */ + found_dest = streq(basename(dest), i->name); + + if (found_path && found_dest) { + _cleanup_free_ char *t = NULL; + + /* Filter out same name links in the main + * config path */ + t = path_make_absolute(i->name, config_path); + if (!t) + return -ENOMEM; + + b = path_equal(t, p); + } + + if (b) + *same_name_link = true; + else if (found_path || found_dest) { + if (!match_aliases) + return 1; + + /* Check if symlink name is in the set of names used by [Install] */ + q = is_symlink_with_known_name(i, de->d_name); + if (q < 0) + return q; + if (q > 0) + return 1; + } + } + } + + return r; +} + +static int find_symlinks( + const char *root_dir, + const UnitFileInstallInfo *i, + bool match_name, + bool ignore_same_name, + const char *config_path, + bool *same_name_link) { + + int fd; + + assert(i); + assert(config_path); + assert(same_name_link); + + fd = open(config_path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC); + if (fd < 0) { + if (IN_SET(errno, ENOENT, ENOTDIR, EACCES)) + return 0; + return -errno; + } + + /* This takes possession of fd and closes it */ + return find_symlinks_fd(root_dir, i, match_name, ignore_same_name, fd, + config_path, config_path, same_name_link); +} + +static int find_symlinks_in_scope( + UnitFileScope scope, + const LookupPaths *paths, + const UnitFileInstallInfo *i, + bool match_name, + UnitFileState *state) { + + bool same_name_link_runtime = false, same_name_link_config = false; + bool enabled_in_runtime = false, enabled_at_all = false; + bool ignore_same_name = false; + char **p; + int r; + + assert(paths); + assert(i); + + /* As we iterate over the list of search paths in paths->search_path, we may encounter "same name" + * symlinks. The ones which are "below" (i.e. have lower priority) than the unit file itself are + * efectively masked, so we should ignore them. */ + + STRV_FOREACH(p, paths->search_path) { + bool same_name_link = false; + + r = find_symlinks(paths->root_dir, i, match_name, ignore_same_name, *p, &same_name_link); + if (r < 0) + return r; + if (r > 0) { + /* We found symlinks in this dir? Yay! Let's see where precisely it is enabled. */ + + if (path_equal_ptr(*p, paths->persistent_config)) { + /* This is the best outcome, let's return it immediately. */ + *state = UNIT_FILE_ENABLED; + return 1; + } + + /* look for global enablement of user units */ + if (scope == UNIT_FILE_USER && path_is_user_config_dir(*p)) { + *state = UNIT_FILE_ENABLED; + return 1; + } + + r = path_is_runtime(paths, *p, false); + if (r < 0) + return r; + if (r > 0) + enabled_in_runtime = true; + else + enabled_at_all = true; + + } else if (same_name_link) { + if (path_equal_ptr(*p, paths->persistent_config)) + same_name_link_config = true; + else { + r = path_is_runtime(paths, *p, false); + if (r < 0) + return r; + if (r > 0) + same_name_link_runtime = true; + } + } + + /* Check if next iteration will be "below" the unit file (either a regular file + * or a symlink), and hence should be ignored */ + if (!ignore_same_name && path_startswith(i->path, *p)) + ignore_same_name = true; + } + + if (enabled_in_runtime) { + *state = UNIT_FILE_ENABLED_RUNTIME; + return 1; + } + + /* Here's a special rule: if the unit we are looking for is an instance, and it symlinked in the search path + * outside of runtime and configuration directory, then we consider it statically enabled. Note we do that only + * for instance, not for regular names, as those are merely aliases, while instances explicitly instantiate + * something, and hence are a much stronger concept. */ + if (enabled_at_all && unit_name_is_valid(i->name, UNIT_NAME_INSTANCE)) { + *state = UNIT_FILE_STATIC; + return 1; + } + + /* Hmm, we didn't find it, but maybe we found the same name + * link? */ + if (same_name_link_config) { + *state = UNIT_FILE_LINKED; + return 1; + } + if (same_name_link_runtime) { + *state = UNIT_FILE_LINKED_RUNTIME; + return 1; + } + + return 0; +} + +static void install_info_free(UnitFileInstallInfo *i) { + + if (!i) + return; + + free(i->name); + free(i->path); + strv_free(i->aliases); + strv_free(i->wanted_by); + strv_free(i->required_by); + strv_free(i->also); + free(i->default_instance); + free(i->symlink_target); + free(i); +} + +static void install_context_done(InstallContext *c) { + assert(c); + + c->will_process = ordered_hashmap_free_with_destructor(c->will_process, install_info_free); + c->have_processed = ordered_hashmap_free_with_destructor(c->have_processed, install_info_free); +} + +static UnitFileInstallInfo *install_info_find(InstallContext *c, const char *name) { + UnitFileInstallInfo *i; + + i = ordered_hashmap_get(c->have_processed, name); + if (i) + return i; + + return ordered_hashmap_get(c->will_process, name); +} + +static int install_info_may_process( + const UnitFileInstallInfo *i, + const LookupPaths *paths, + UnitFileChange **changes, + size_t *n_changes) { + assert(i); + assert(paths); + + /* Checks whether the loaded unit file is one we should process, or is masked, + * transient or generated and thus not subject to enable/disable operations. */ + + if (i->type == UNIT_FILE_TYPE_MASKED) { + unit_file_changes_add(changes, n_changes, -ERFKILL, i->path, NULL); + return -ERFKILL; + } + if (path_is_generator(paths, i->path) || + path_is_transient(paths, i->path)) { + unit_file_changes_add(changes, n_changes, -EADDRNOTAVAIL, i->path, NULL); + return -EADDRNOTAVAIL; + } + + return 0; +} + +/** + * Adds a new UnitFileInstallInfo entry under name in the InstallContext.will_process + * hashmap, or retrieves the existing one if already present. + * + * Returns negative on error, 0 if the unit was already known, 1 otherwise. + */ +static int install_info_add( + InstallContext *c, + const char *name, + const char *path, + bool auxiliary, + UnitFileInstallInfo **ret) { + + UnitFileInstallInfo *i = NULL; + int r; + + assert(c); + assert(name || path); + + if (!name) + name = basename(path); + + if (!unit_name_is_valid(name, UNIT_NAME_ANY)) + return -EINVAL; + + i = install_info_find(c, name); + if (i) { + i->auxiliary = i->auxiliary && auxiliary; + + if (ret) + *ret = i; + return 0; + } + + r = ordered_hashmap_ensure_allocated(&c->will_process, &string_hash_ops); + if (r < 0) + return r; + + i = new(UnitFileInstallInfo, 1); + if (!i) + return -ENOMEM; + + *i = (UnitFileInstallInfo) { + .type = _UNIT_FILE_TYPE_INVALID, + .auxiliary = auxiliary, + }; + + i->name = strdup(name); + if (!i->name) { + r = -ENOMEM; + goto fail; + } + + if (path) { + i->path = strdup(path); + if (!i->path) { + r = -ENOMEM; + goto fail; + } + } + + r = ordered_hashmap_put(c->will_process, i->name, i); + if (r < 0) + goto fail; + + if (ret) + *ret = i; + + return 1; + +fail: + install_info_free(i); + return r; +} + +static int config_parse_alias( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + UnitType type; + + assert(unit); + assert(filename); + assert(lvalue); + assert(rvalue); + + type = unit_name_to_type(unit); + if (!unit_type_may_alias(type)) + return log_syntax(unit, LOG_WARNING, filename, line, 0, + "Alias= is not allowed for %s units, ignoring.", + unit_type_to_string(type)); + + return config_parse_strv(unit, filename, line, section, section_line, + lvalue, ltype, rvalue, data, userdata); +} + +static int config_parse_also( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + UnitFileInstallInfo *info = userdata, *alsoinfo = NULL; + InstallContext *c = data; + int r; + + assert(unit); + assert(filename); + assert(lvalue); + assert(rvalue); + + for (;;) { + _cleanup_free_ char *word = NULL, *printed = NULL; + + r = extract_first_word(&rvalue, &word, NULL, 0); + if (r < 0) + return r; + if (r == 0) + break; + + r = install_full_printf(info, word, &printed); + if (r < 0) + return r; + + if (!unit_name_is_valid(printed, UNIT_NAME_ANY)) + return -EINVAL; + + r = install_info_add(c, printed, NULL, true, &alsoinfo); + if (r < 0) + return r; + + r = strv_push(&info->also, printed); + if (r < 0) + return r; + + printed = NULL; + } + + return 0; +} + +static int config_parse_default_instance( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + UnitFileInstallInfo *i = data; + _cleanup_free_ char *printed = NULL; + int r; + + assert(unit); + assert(filename); + assert(lvalue); + assert(rvalue); + + if (unit_name_is_valid(unit, UNIT_NAME_INSTANCE)) + /* When enabling an instance, we might be using a template unit file, + * but we should ignore DefaultInstance silently. */ + return 0; + if (!unit_name_is_valid(unit, UNIT_NAME_TEMPLATE)) + return log_syntax(unit, LOG_WARNING, filename, line, 0, + "DefaultInstance= only makes sense for template units, ignoring."); + + r = install_full_printf(i, rvalue, &printed); + if (r < 0) + return r; + + if (!unit_instance_is_valid(printed)) + return -EINVAL; + + return free_and_replace(i->default_instance, printed); +} + +static int unit_file_load( + InstallContext *c, + UnitFileInstallInfo *info, + const char *path, + const char *root_dir, + SearchFlags flags) { + + const ConfigTableItem items[] = { + { "Install", "Alias", config_parse_alias, 0, &info->aliases }, + { "Install", "WantedBy", config_parse_strv, 0, &info->wanted_by }, + { "Install", "RequiredBy", config_parse_strv, 0, &info->required_by }, + { "Install", "DefaultInstance", config_parse_default_instance, 0, info }, + { "Install", "Also", config_parse_also, 0, c }, + {} + }; + + UnitType type; + _cleanup_fclose_ FILE *f = NULL; + _cleanup_close_ int fd = -1; + struct stat st; + int r; + + assert(info); + assert(path); + + if (!(flags & SEARCH_DROPIN)) { + /* Loading or checking for the main unit file… */ + + type = unit_name_to_type(info->name); + if (type < 0) + return -EINVAL; + if (unit_name_is_valid(info->name, UNIT_NAME_TEMPLATE|UNIT_NAME_INSTANCE) && !unit_type_may_template(type)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Unit type %s cannot be templated.", unit_type_to_string(type)); + + if (!(flags & SEARCH_LOAD)) { + r = lstat(path, &st); + if (r < 0) + return -errno; + + if (null_or_empty(&st)) + info->type = UNIT_FILE_TYPE_MASKED; + else if (S_ISREG(st.st_mode)) + info->type = UNIT_FILE_TYPE_REGULAR; + else if (S_ISLNK(st.st_mode)) + return -ELOOP; + else if (S_ISDIR(st.st_mode)) + return -EISDIR; + else + return -ENOTTY; + + return 0; + } + + fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); + if (fd < 0) + return -errno; + } else { + /* Operating on a drop-in file. If we aren't supposed to load the unit file drop-ins don't matter, let's hence shortcut this. */ + + if (!(flags & SEARCH_LOAD)) + return 0; + + fd = chase_symlinks_and_open(path, root_dir, 0, O_RDONLY|O_CLOEXEC|O_NOCTTY, NULL); + if (fd < 0) + return fd; + } + + if (fstat(fd, &st) < 0) + return -errno; + + if (null_or_empty(&st)) { + if ((flags & SEARCH_DROPIN) == 0) + info->type = UNIT_FILE_TYPE_MASKED; + + return 0; + } + + r = stat_verify_regular(&st); + if (r < 0) + return r; + + f = fdopen(fd, "r"); + if (!f) + return -errno; + fd = -1; + + /* c is only needed if we actually load the file (it's referenced from items[] btw, in case you wonder.) */ + assert(c); + + r = config_parse(info->name, path, f, + NULL, + config_item_table_lookup, items, + CONFIG_PARSE_RELAXED|CONFIG_PARSE_ALLOW_INCLUDE, info); + if (r < 0) + return log_debug_errno(r, "Failed to parse %s: %m", info->name); + + if ((flags & SEARCH_DROPIN) == 0) + info->type = UNIT_FILE_TYPE_REGULAR; + + return + (int) strv_length(info->aliases) + + (int) strv_length(info->wanted_by) + + (int) strv_length(info->required_by); +} + +static int unit_file_load_or_readlink( + InstallContext *c, + UnitFileInstallInfo *info, + const char *path, + const char *root_dir, + SearchFlags flags) { + + _cleanup_free_ char *target = NULL; + int r; + + r = unit_file_load(c, info, path, root_dir, flags); + if (r != -ELOOP || (flags & SEARCH_DROPIN)) + return r; + + /* This is a symlink, let's read it. */ + + r = readlink_malloc(path, &target); + if (r < 0) + return r; + + if (path_equal(target, "/dev/null")) + info->type = UNIT_FILE_TYPE_MASKED; + else { + const char *bn; + UnitType a, b; + + bn = basename(target); + + if (unit_name_is_valid(info->name, UNIT_NAME_PLAIN)) { + + if (!unit_name_is_valid(bn, UNIT_NAME_PLAIN)) + return -EINVAL; + + } else if (unit_name_is_valid(info->name, UNIT_NAME_INSTANCE)) { + + if (!unit_name_is_valid(bn, UNIT_NAME_INSTANCE|UNIT_NAME_TEMPLATE)) + return -EINVAL; + + } else if (unit_name_is_valid(info->name, UNIT_NAME_TEMPLATE)) { + + if (!unit_name_is_valid(bn, UNIT_NAME_TEMPLATE)) + return -EINVAL; + } else + return -EINVAL; + + /* Enforce that the symlink destination does not + * change the unit file type. */ + + a = unit_name_to_type(info->name); + b = unit_name_to_type(bn); + if (a < 0 || b < 0 || a != b) + return -EINVAL; + + if (path_is_absolute(target)) + /* This is an absolute path, prefix the root so that we always deal with fully qualified paths */ + info->symlink_target = prefix_root(root_dir, target); + else + /* This is a relative path, take it relative to the dir the symlink is located in. */ + info->symlink_target = file_in_same_dir(path, target); + if (!info->symlink_target) + return -ENOMEM; + + info->type = UNIT_FILE_TYPE_SYMLINK; + } + + return 0; +} + +static int unit_file_search( + InstallContext *c, + UnitFileInstallInfo *info, + const LookupPaths *paths, + SearchFlags flags) { + + const char *dropin_dir_name = NULL, *dropin_template_dir_name = NULL; + _cleanup_strv_free_ char **dirs = NULL, **files = NULL; + _cleanup_free_ char *template = NULL; + bool found_unit = false; + int r, result; + char **p; + + assert(info); + assert(paths); + + /* Was this unit already loaded? */ + if (info->type != _UNIT_FILE_TYPE_INVALID) + return 0; + + if (info->path) + return unit_file_load_or_readlink(c, info, info->path, paths->root_dir, flags); + + assert(info->name); + + if (unit_name_is_valid(info->name, UNIT_NAME_INSTANCE)) { + r = unit_name_template(info->name, &template); + if (r < 0) + return r; + } + + STRV_FOREACH(p, paths->search_path) { + _cleanup_free_ char *path = NULL; + + path = strjoin(*p, "/", info->name); + if (!path) + return -ENOMEM; + + r = unit_file_load_or_readlink(c, info, path, paths->root_dir, flags); + if (r >= 0) { + info->path = TAKE_PTR(path); + result = r; + found_unit = true; + break; + } else if (!IN_SET(r, -ENOENT, -ENOTDIR, -EACCES)) + return r; + } + + if (!found_unit && template) { + + /* Unit file doesn't exist, however instance + * enablement was requested. We will check if it is + * possible to load template unit file. */ + + STRV_FOREACH(p, paths->search_path) { + _cleanup_free_ char *path = NULL; + + path = strjoin(*p, "/", template); + if (!path) + return -ENOMEM; + + r = unit_file_load_or_readlink(c, info, path, paths->root_dir, flags); + if (r >= 0) { + info->path = TAKE_PTR(path); + result = r; + found_unit = true; + break; + } else if (!IN_SET(r, -ENOENT, -ENOTDIR, -EACCES)) + return r; + } + } + + if (!found_unit) + return log_debug_errno(SYNTHETIC_ERRNO(ENOENT), + "Cannot find unit %s%s%s.", + info->name, template ? " or " : "", strempty(template)); + + if (info->type == UNIT_FILE_TYPE_MASKED) + return result; + + /* Search for drop-in directories */ + + dropin_dir_name = strjoina(info->name, ".d"); + STRV_FOREACH(p, paths->search_path) { + char *path; + + path = path_join(*p, dropin_dir_name); + if (!path) + return -ENOMEM; + + r = strv_consume(&dirs, path); + if (r < 0) + return r; + } + + if (template) { + dropin_template_dir_name = strjoina(template, ".d"); + STRV_FOREACH(p, paths->search_path) { + char *path; + + path = path_join(*p, dropin_template_dir_name); + if (!path) + return -ENOMEM; + + r = strv_consume(&dirs, path); + if (r < 0) + return r; + } + } + + /* Load drop-in conf files */ + + r = conf_files_list_strv(&files, ".conf", NULL, 0, (const char**) dirs); + if (r < 0) + return log_debug_errno(r, "Failed to get list of conf files: %m"); + + STRV_FOREACH(p, files) { + r = unit_file_load_or_readlink(c, info, *p, paths->root_dir, flags | SEARCH_DROPIN); + if (r < 0) + return log_debug_errno(r, "Failed to load conf file %s: %m", *p); + } + + return result; +} + +static int install_info_follow( + InstallContext *c, + UnitFileInstallInfo *i, + const char *root_dir, + SearchFlags flags, + bool ignore_different_name) { + + assert(c); + assert(i); + + if (i->type != UNIT_FILE_TYPE_SYMLINK) + return -EINVAL; + if (!i->symlink_target) + return -EINVAL; + + /* If the basename doesn't match, the caller should add a + * complete new entry for this. */ + + if (!ignore_different_name && !streq(basename(i->symlink_target), i->name)) + return -EXDEV; + + free_and_replace(i->path, i->symlink_target); + i->type = _UNIT_FILE_TYPE_INVALID; + + return unit_file_load_or_readlink(c, i, i->path, root_dir, flags); +} + +/** + * Search for the unit file. If the unit name is a symlink, follow the symlink to the + * target, maybe more than once. Propagate the instance name if present. + */ +static int install_info_traverse( + UnitFileScope scope, + InstallContext *c, + const LookupPaths *paths, + UnitFileInstallInfo *start, + SearchFlags flags, + UnitFileInstallInfo **ret) { + + UnitFileInstallInfo *i; + unsigned k = 0; + int r; + + assert(paths); + assert(start); + assert(c); + + r = unit_file_search(c, start, paths, flags); + if (r < 0) + return r; + + i = start; + while (i->type == UNIT_FILE_TYPE_SYMLINK) { + /* Follow the symlink */ + + if (++k > UNIT_FILE_FOLLOW_SYMLINK_MAX) + return -ELOOP; + + if (!(flags & SEARCH_FOLLOW_CONFIG_SYMLINKS)) { + r = path_is_config(paths, i->path, true); + if (r < 0) + return r; + if (r > 0) + return -ELOOP; + } + + r = install_info_follow(c, i, paths->root_dir, flags, false); + if (r == -EXDEV) { + _cleanup_free_ char *buffer = NULL; + const char *bn; + + /* Target has a different name, create a new + * install info object for that, and continue + * with that. */ + + bn = basename(i->symlink_target); + + if (unit_name_is_valid(i->name, UNIT_NAME_INSTANCE) && + unit_name_is_valid(bn, UNIT_NAME_TEMPLATE)) { + + _cleanup_free_ char *instance = NULL; + + r = unit_name_to_instance(i->name, &instance); + if (r < 0) + return r; + + r = unit_name_replace_instance(bn, instance, &buffer); + if (r < 0) + return r; + + if (streq(buffer, i->name)) { + + /* We filled in the instance, and the target stayed the same? If so, then let's + * honour the link as it is. */ + + r = install_info_follow(c, i, paths->root_dir, flags, true); + if (r < 0) + return r; + + continue; + } + + bn = buffer; + } + + r = install_info_add(c, bn, NULL, false, &i); + if (r < 0) + return r; + + /* Try again, with the new target we found. */ + r = unit_file_search(c, i, paths, flags); + if (r == -ENOENT) + /* Translate error code to highlight this specific case */ + return -ENOLINK; + } + + if (r < 0) + return r; + } + + if (ret) + *ret = i; + + return 0; +} + +/** + * Call install_info_add() with name_or_path as the path (if name_or_path starts with "/") + * or the name (otherwise). root_dir is prepended to the path. + */ +static int install_info_add_auto( + InstallContext *c, + const LookupPaths *paths, + const char *name_or_path, + UnitFileInstallInfo **ret) { + + assert(c); + assert(name_or_path); + + if (path_is_absolute(name_or_path)) { + const char *pp; + + pp = prefix_roota(paths->root_dir, name_or_path); + + return install_info_add(c, NULL, pp, false, ret); + } else + return install_info_add(c, name_or_path, NULL, false, ret); +} + +static int install_info_discover( + UnitFileScope scope, + InstallContext *c, + const LookupPaths *paths, + const char *name, + SearchFlags flags, + UnitFileInstallInfo **ret, + UnitFileChange **changes, + size_t *n_changes) { + + UnitFileInstallInfo *i; + int r; + + assert(c); + assert(paths); + assert(name); + + r = install_info_add_auto(c, paths, name, &i); + if (r >= 0) + r = install_info_traverse(scope, c, paths, i, flags, ret); + + if (r < 0) + unit_file_changes_add(changes, n_changes, r, name, NULL); + return r; +} + +static int install_info_discover_and_check( + UnitFileScope scope, + InstallContext *c, + const LookupPaths *paths, + const char *name, + SearchFlags flags, + UnitFileInstallInfo **ret, + UnitFileChange **changes, + size_t *n_changes) { + + int r; + + r = install_info_discover(scope, c, paths, name, flags, ret, changes, n_changes); + if (r < 0) + return r; + + return install_info_may_process(ret ? *ret : NULL, paths, changes, n_changes); +} + +static int install_info_symlink_alias( + UnitFileInstallInfo *i, + const LookupPaths *paths, + const char *config_path, + bool force, + UnitFileChange **changes, + size_t *n_changes) { + + char **s; + int r = 0, q; + + assert(i); + assert(paths); + assert(config_path); + + STRV_FOREACH(s, i->aliases) { + _cleanup_free_ char *alias_path = NULL, *dst = NULL; + + q = install_full_printf(i, *s, &dst); + if (q < 0) + return q; + + alias_path = path_make_absolute(dst, config_path); + if (!alias_path) + return -ENOMEM; + + q = create_symlink(paths, i->path, alias_path, force, changes, n_changes); + if (r == 0) + r = q; + } + + return r; +} + +static int install_info_symlink_wants( + UnitFileInstallInfo *i, + const LookupPaths *paths, + const char *config_path, + char **list, + const char *suffix, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_free_ char *buf = NULL; + const char *n; + char **s; + int r = 0, q; + + assert(i); + assert(paths); + assert(config_path); + + if (strv_isempty(list)) + return 0; + + if (unit_name_is_valid(i->name, UNIT_NAME_TEMPLATE)) { + UnitFileInstallInfo instance = { + .type = _UNIT_FILE_TYPE_INVALID, + }; + _cleanup_free_ char *path = NULL; + + /* If this is a template, and we have no instance, don't do anything */ + if (!i->default_instance) + return 1; + + r = unit_name_replace_instance(i->name, i->default_instance, &buf); + if (r < 0) + return r; + + instance.name = buf; + r = unit_file_search(NULL, &instance, paths, SEARCH_FOLLOW_CONFIG_SYMLINKS); + if (r < 0) + return r; + + path = TAKE_PTR(instance.path); + + if (instance.type == UNIT_FILE_TYPE_MASKED) { + unit_file_changes_add(changes, n_changes, -ERFKILL, path, NULL); + return -ERFKILL; + } + + n = buf; + } else + n = i->name; + + STRV_FOREACH(s, list) { + _cleanup_free_ char *path = NULL, *dst = NULL; + + q = install_full_printf(i, *s, &dst); + if (q < 0) + return q; + + if (!unit_name_is_valid(dst, UNIT_NAME_ANY)) { + r = -EINVAL; + continue; + } + + path = strjoin(config_path, "/", dst, suffix, n); + if (!path) + return -ENOMEM; + + q = create_symlink(paths, i->path, path, true, changes, n_changes); + if (r == 0) + r = q; + } + + return r; +} + +static int install_info_symlink_link( + UnitFileInstallInfo *i, + const LookupPaths *paths, + const char *config_path, + bool force, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_free_ char *path = NULL; + int r; + + assert(i); + assert(paths); + assert(config_path); + assert(i->path); + + r = in_search_path(paths, i->path); + if (r < 0) + return r; + if (r > 0) + return 0; + + path = strjoin(config_path, "/", i->name); + if (!path) + return -ENOMEM; + + return create_symlink(paths, i->path, path, force, changes, n_changes); +} + +static int install_info_apply( + UnitFileInstallInfo *i, + const LookupPaths *paths, + const char *config_path, + bool force, + UnitFileChange **changes, + size_t *n_changes) { + + int r, q; + + assert(i); + assert(paths); + assert(config_path); + + if (i->type != UNIT_FILE_TYPE_REGULAR) + return 0; + + r = install_info_symlink_alias(i, paths, config_path, force, changes, n_changes); + + q = install_info_symlink_wants(i, paths, config_path, i->wanted_by, ".wants/", changes, n_changes); + if (r == 0) + r = q; + + q = install_info_symlink_wants(i, paths, config_path, i->required_by, ".requires/", changes, n_changes); + if (r == 0) + r = q; + + q = install_info_symlink_link(i, paths, config_path, force, changes, n_changes); + /* Do not count links to the unit file towards the "carries_install_info" count */ + if (r == 0 && q < 0) + r = q; + + return r; +} + +static int install_context_apply( + UnitFileScope scope, + InstallContext *c, + const LookupPaths *paths, + const char *config_path, + bool force, + SearchFlags flags, + UnitFileChange **changes, + size_t *n_changes) { + + UnitFileInstallInfo *i; + int r; + + assert(c); + assert(paths); + assert(config_path); + + if (ordered_hashmap_isempty(c->will_process)) + return 0; + + r = ordered_hashmap_ensure_allocated(&c->have_processed, &string_hash_ops); + if (r < 0) + return r; + + r = 0; + while ((i = ordered_hashmap_first(c->will_process))) { + int q; + + q = ordered_hashmap_move_one(c->have_processed, c->will_process, i->name); + if (q < 0) + return q; + + q = install_info_traverse(scope, c, paths, i, flags, NULL); + if (q < 0) { + unit_file_changes_add(changes, n_changes, r, i->name, NULL); + return q; + } + + /* We can attempt to process a masked unit when a different unit + * that we were processing specifies it in Also=. */ + if (i->type == UNIT_FILE_TYPE_MASKED) { + unit_file_changes_add(changes, n_changes, UNIT_FILE_IS_MASKED, i->path, NULL); + if (r >= 0) + /* Assume that something *could* have been enabled here, + * avoid "empty [Install] section" warning. */ + r += 1; + continue; + } + + if (i->type != UNIT_FILE_TYPE_REGULAR) + continue; + + q = install_info_apply(i, paths, config_path, force, changes, n_changes); + if (r >= 0) { + if (q < 0) + r = q; + else + r += q; + } + } + + return r; +} + +static int install_context_mark_for_removal( + UnitFileScope scope, + InstallContext *c, + const LookupPaths *paths, + Set **remove_symlinks_to, + const char *config_path, + UnitFileChange **changes, + size_t *n_changes) { + + UnitFileInstallInfo *i; + int r; + + assert(c); + assert(paths); + assert(config_path); + + /* Marks all items for removal */ + + if (ordered_hashmap_isempty(c->will_process)) + return 0; + + r = ordered_hashmap_ensure_allocated(&c->have_processed, &string_hash_ops); + if (r < 0) + return r; + + while ((i = ordered_hashmap_first(c->will_process))) { + + r = ordered_hashmap_move_one(c->have_processed, c->will_process, i->name); + if (r < 0) + return r; + + r = install_info_traverse(scope, c, paths, i, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS, NULL); + if (r == -ENOLINK) { + log_debug_errno(r, "Name %s leads to a dangling symlink, removing name.", i->name); + unit_file_changes_add(changes, n_changes, UNIT_FILE_IS_DANGLING, i->path ?: i->name, NULL); + } else if (r == -ENOENT) { + + if (i->auxiliary) /* some unit specified in Also= or similar is missing */ + log_debug_errno(r, "Auxiliary unit of %s not found, removing name.", i->name); + else { + log_debug_errno(r, "Unit %s not found, removing name.", i->name); + unit_file_changes_add(changes, n_changes, r, i->path ?: i->name, NULL); + } + + } else if (r < 0) { + log_debug_errno(r, "Failed to find unit %s, removing name: %m", i->name); + unit_file_changes_add(changes, n_changes, r, i->path ?: i->name, NULL); + } else if (i->type == UNIT_FILE_TYPE_MASKED) { + log_debug("Unit file %s is masked, ignoring.", i->name); + unit_file_changes_add(changes, n_changes, UNIT_FILE_IS_MASKED, i->path ?: i->name, NULL); + continue; + } else if (i->type != UNIT_FILE_TYPE_REGULAR) { + log_debug("Unit %s has type %s, ignoring.", i->name, unit_file_type_to_string(i->type) ?: "invalid"); + continue; + } + + r = mark_symlink_for_removal(remove_symlinks_to, i->name); + if (r < 0) + return r; + } + + return 0; +} + +int unit_file_mask( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + const char *config_path; + char **i; + int r; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config; + if (!config_path) + return -ENXIO; + + STRV_FOREACH(i, files) { + _cleanup_free_ char *path = NULL; + int q; + + if (!unit_name_is_valid(*i, UNIT_NAME_ANY)) { + if (r == 0) + r = -EINVAL; + continue; + } + + path = path_make_absolute(*i, config_path); + if (!path) + return -ENOMEM; + + q = create_symlink(&paths, "/dev/null", path, !!(flags & UNIT_FILE_FORCE), changes, n_changes); + if (q < 0 && r >= 0) + r = q; + } + + return r; +} + +int unit_file_unmask( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + _cleanup_set_free_free_ Set *remove_symlinks_to = NULL; + _cleanup_strv_free_ char **todo = NULL; + size_t n_todo = 0, n_allocated = 0; + const char *config_path; + char **i; + bool dry_run; + int r, q; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config; + if (!config_path) + return -ENXIO; + + dry_run = !!(flags & UNIT_FILE_DRY_RUN); + + STRV_FOREACH(i, files) { + _cleanup_free_ char *path = NULL; + + if (!unit_name_is_valid(*i, UNIT_NAME_ANY)) + return -EINVAL; + + path = path_make_absolute(*i, config_path); + if (!path) + return -ENOMEM; + + r = null_or_empty_path(path); + if (r == -ENOENT) + continue; + if (r < 0) + return r; + if (r == 0) + continue; + + if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2)) + return -ENOMEM; + + todo[n_todo] = strdup(*i); + if (!todo[n_todo]) + return -ENOMEM; + + n_todo++; + } + + strv_uniq(todo); + + r = 0; + STRV_FOREACH(i, todo) { + _cleanup_free_ char *path = NULL; + const char *rp; + + path = path_make_absolute(*i, config_path); + if (!path) + return -ENOMEM; + + if (!dry_run && unlink(path) < 0) { + if (errno != ENOENT) { + if (r >= 0) + r = -errno; + unit_file_changes_add(changes, n_changes, -errno, path, NULL); + } + + continue; + } + + unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, path, NULL); + + rp = skip_root(&paths, path); + q = mark_symlink_for_removal(&remove_symlinks_to, rp ?: path); + if (q < 0) + return q; + } + + q = remove_marked_symlinks(remove_symlinks_to, config_path, &paths, dry_run, changes, n_changes); + if (r >= 0) + r = q; + + return r; +} + +int unit_file_link( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + _cleanup_strv_free_ char **todo = NULL; + size_t n_todo = 0, n_allocated = 0; + const char *config_path; + char **i; + int r, q; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config; + if (!config_path) + return -ENXIO; + + STRV_FOREACH(i, files) { + _cleanup_free_ char *full = NULL; + struct stat st; + char *fn; + + if (!path_is_absolute(*i)) + return -EINVAL; + + fn = basename(*i); + if (!unit_name_is_valid(fn, UNIT_NAME_ANY)) + return -EINVAL; + + full = prefix_root(paths.root_dir, *i); + if (!full) + return -ENOMEM; + + if (lstat(full, &st) < 0) + return -errno; + r = stat_verify_regular(&st); + if (r < 0) + return r; + + q = in_search_path(&paths, *i); + if (q < 0) + return q; + if (q > 0) + continue; + + if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2)) + return -ENOMEM; + + todo[n_todo] = strdup(*i); + if (!todo[n_todo]) + return -ENOMEM; + + n_todo++; + } + + strv_uniq(todo); + + r = 0; + STRV_FOREACH(i, todo) { + _cleanup_free_ char *new_path = NULL; + + new_path = path_make_absolute(basename(*i), config_path); + if (!new_path) + return -ENOMEM; + + q = create_symlink(&paths, *i, new_path, !!(flags & UNIT_FILE_FORCE), changes, n_changes); + if (q < 0 && r >= 0) + r = q; + } + + return r; +} + +static int path_shall_revert(const LookupPaths *paths, const char *path) { + int r; + + assert(paths); + assert(path); + + /* Checks whether the path is one where the drop-in directories shall be removed. */ + + r = path_is_config(paths, path, true); + if (r != 0) + return r; + + r = path_is_control(paths, path); + if (r != 0) + return r; + + return path_is_transient(paths, path); +} + +int unit_file_revert( + UnitFileScope scope, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_set_free_free_ Set *remove_symlinks_to = NULL; + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + _cleanup_strv_free_ char **todo = NULL; + size_t n_todo = 0, n_allocated = 0; + char **i; + int r, q; + + /* Puts a unit file back into vendor state. This means: + * + * a) we remove all drop-in snippets added by the user ("config"), add to transient units ("transient"), and + * added via "systemctl set-property" ("control"), but not if the drop-in is generated ("generated"). + * + * c) if there's a vendor unit file (i.e. one in /usr) we remove any configured overriding unit files (i.e. in + * "config", but not in "transient" or "control" or even "generated"). + * + * We remove all that in both the runtime and the persistent directories, if that applies. + */ + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + STRV_FOREACH(i, files) { + bool has_vendor = false; + char **p; + + if (!unit_name_is_valid(*i, UNIT_NAME_ANY)) + return -EINVAL; + + STRV_FOREACH(p, paths.search_path) { + _cleanup_free_ char *path = NULL, *dropin = NULL; + struct stat st; + + path = path_make_absolute(*i, *p); + if (!path) + return -ENOMEM; + + r = lstat(path, &st); + if (r < 0) { + if (errno != ENOENT) + return -errno; + } else if (S_ISREG(st.st_mode)) { + /* Check if there's a vendor version */ + r = path_is_vendor(&paths, path); + if (r < 0) + return r; + if (r > 0) + has_vendor = true; + } + + dropin = strappend(path, ".d"); + if (!dropin) + return -ENOMEM; + + r = lstat(dropin, &st); + if (r < 0) { + if (errno != ENOENT) + return -errno; + } else if (S_ISDIR(st.st_mode)) { + /* Remove the drop-ins */ + r = path_shall_revert(&paths, dropin); + if (r < 0) + return r; + if (r > 0) { + if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2)) + return -ENOMEM; + + todo[n_todo++] = TAKE_PTR(dropin); + } + } + } + + if (!has_vendor) + continue; + + /* OK, there's a vendor version, hence drop all configuration versions */ + STRV_FOREACH(p, paths.search_path) { + _cleanup_free_ char *path = NULL; + struct stat st; + + path = path_make_absolute(*i, *p); + if (!path) + return -ENOMEM; + + r = lstat(path, &st); + if (r < 0) { + if (errno != ENOENT) + return -errno; + } else if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) { + r = path_is_config(&paths, path, true); + if (r < 0) + return r; + if (r > 0) { + if (!GREEDY_REALLOC0(todo, n_allocated, n_todo + 2)) + return -ENOMEM; + + todo[n_todo++] = TAKE_PTR(path); + } + } + } + } + + strv_uniq(todo); + + r = 0; + STRV_FOREACH(i, todo) { + _cleanup_strv_free_ char **fs = NULL; + const char *rp; + char **j; + + (void) get_files_in_directory(*i, &fs); + + q = rm_rf(*i, REMOVE_ROOT|REMOVE_PHYSICAL); + if (q < 0 && q != -ENOENT && r >= 0) { + r = q; + continue; + } + + STRV_FOREACH(j, fs) { + _cleanup_free_ char *t = NULL; + + t = strjoin(*i, "/", *j); + if (!t) + return -ENOMEM; + + unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, t, NULL); + } + + unit_file_changes_add(changes, n_changes, UNIT_FILE_UNLINK, *i, NULL); + + rp = skip_root(&paths, *i); + q = mark_symlink_for_removal(&remove_symlinks_to, rp ?: *i); + if (q < 0) + return q; + } + + q = remove_marked_symlinks(remove_symlinks_to, paths.runtime_config, &paths, false, changes, n_changes); + if (r >= 0) + r = q; + + q = remove_marked_symlinks(remove_symlinks_to, paths.persistent_config, &paths, false, changes, n_changes); + if (r >= 0) + r = q; + + return r; +} + +int unit_file_add_dependency( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + const char *target, + UnitDependency dep, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + _cleanup_(install_context_done) InstallContext c = {}; + UnitFileInstallInfo *i, *target_info; + const char *config_path; + char **f; + int r; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + assert(target); + + if (!IN_SET(dep, UNIT_WANTS, UNIT_REQUIRES)) + return -EINVAL; + + if (!unit_name_is_valid(target, UNIT_NAME_ANY)) + return -EINVAL; + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config; + if (!config_path) + return -ENXIO; + + r = install_info_discover_and_check(scope, &c, &paths, target, SEARCH_FOLLOW_CONFIG_SYMLINKS, + &target_info, changes, n_changes); + if (r < 0) + return r; + + assert(target_info->type == UNIT_FILE_TYPE_REGULAR); + + STRV_FOREACH(f, files) { + char ***l; + + r = install_info_discover_and_check(scope, &c, &paths, *f, SEARCH_FOLLOW_CONFIG_SYMLINKS, + &i, changes, n_changes); + if (r < 0) + return r; + + assert(i->type == UNIT_FILE_TYPE_REGULAR); + + /* We didn't actually load anything from the unit + * file, but instead just add in our new symlink to + * create. */ + + if (dep == UNIT_WANTS) + l = &i->wanted_by; + else + l = &i->required_by; + + strv_free(*l); + *l = strv_new(target_info->name); + if (!*l) + return -ENOMEM; + } + + return install_context_apply(scope, &c, &paths, config_path, !!(flags & UNIT_FILE_FORCE), SEARCH_FOLLOW_CONFIG_SYMLINKS, changes, n_changes); +} + +int unit_file_enable( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + _cleanup_(install_context_done) InstallContext c = {}; + const char *config_path; + UnitFileInstallInfo *i; + char **f; + int r; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config; + if (!config_path) + return -ENXIO; + + STRV_FOREACH(f, files) { + r = install_info_discover_and_check(scope, &c, &paths, *f, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS, + &i, changes, n_changes); + if (r < 0) + return r; + + assert(i->type == UNIT_FILE_TYPE_REGULAR); + } + + /* This will return the number of symlink rules that were + supposed to be created, not the ones actually created. This + is useful to determine whether the passed files had any + installation data at all. */ + + return install_context_apply(scope, &c, &paths, config_path, !!(flags & UNIT_FILE_FORCE), SEARCH_LOAD, changes, n_changes); +} + +int unit_file_disable( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + _cleanup_(install_context_done) InstallContext c = {}; + _cleanup_set_free_free_ Set *remove_symlinks_to = NULL; + const char *config_path; + char **i; + int r; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config; + if (!config_path) + return -ENXIO; + + STRV_FOREACH(i, files) { + if (!unit_name_is_valid(*i, UNIT_NAME_ANY)) + return -EINVAL; + + r = install_info_add(&c, *i, NULL, false, NULL); + if (r < 0) + return r; + } + + r = install_context_mark_for_removal(scope, &c, &paths, &remove_symlinks_to, config_path, changes, n_changes); + if (r < 0) + return r; + + return remove_marked_symlinks(remove_symlinks_to, config_path, &paths, !!(flags & UNIT_FILE_DRY_RUN), changes, n_changes); +} + +int unit_file_reenable( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes) { + + char **n; + int r; + size_t l, i; + + /* First, we invoke the disable command with only the basename... */ + l = strv_length(files); + n = newa(char*, l+1); + for (i = 0; i < l; i++) + n[i] = basename(files[i]); + n[i] = NULL; + + r = unit_file_disable(scope, flags, root_dir, n, changes, n_changes); + if (r < 0) + return r; + + /* But the enable command with the full name */ + return unit_file_enable(scope, flags, root_dir, files, changes, n_changes); +} + +int unit_file_set_default( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + const char *name, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + _cleanup_(install_context_done) InstallContext c = {}; + UnitFileInstallInfo *i; + const char *new_path; + int r; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + assert(name); + + if (unit_name_to_type(name) != UNIT_TARGET) /* this also validates the name */ + return -EINVAL; + if (streq(name, SPECIAL_DEFAULT_TARGET)) + return -EINVAL; + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + r = install_info_discover_and_check(scope, &c, &paths, name, 0, &i, changes, n_changes); + if (r < 0) + return r; + + new_path = strjoina(paths.persistent_config, "/" SPECIAL_DEFAULT_TARGET); + return create_symlink(&paths, i->path, new_path, !!(flags & UNIT_FILE_FORCE), changes, n_changes); +} + +int unit_file_get_default( + UnitFileScope scope, + const char *root_dir, + char **name) { + + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + _cleanup_(install_context_done) InstallContext c = {}; + UnitFileInstallInfo *i; + char *n; + int r; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + assert(name); + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + r = install_info_discover(scope, &c, &paths, SPECIAL_DEFAULT_TARGET, SEARCH_FOLLOW_CONFIG_SYMLINKS, + &i, NULL, NULL); + if (r < 0) + return r; + r = install_info_may_process(i, &paths, NULL, 0); + if (r < 0) + return r; + + n = strdup(i->name); + if (!n) + return -ENOMEM; + + *name = n; + return 0; +} + +int unit_file_lookup_state( + UnitFileScope scope, + const LookupPaths *paths, + const char *name, + UnitFileState *ret) { + + _cleanup_(install_context_done) InstallContext c = {}; + UnitFileInstallInfo *i; + UnitFileState state; + int r; + + assert(paths); + assert(name); + + if (!unit_name_is_valid(name, UNIT_NAME_ANY)) + return -EINVAL; + + r = install_info_discover(scope, &c, paths, name, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS, + &i, NULL, NULL); + if (r < 0) + return log_debug_errno(r, "Failed to discover unit %s: %m", name); + + assert(IN_SET(i->type, UNIT_FILE_TYPE_REGULAR, UNIT_FILE_TYPE_MASKED)); + log_debug("Found unit %s at %s (%s)", name, strna(i->path), + i->type == UNIT_FILE_TYPE_REGULAR ? "regular file" : "mask"); + + /* Shortcut things, if the caller just wants to know if this unit exists. */ + if (!ret) + return 0; + + switch (i->type) { + + case UNIT_FILE_TYPE_MASKED: + r = path_is_runtime(paths, i->path, true); + if (r < 0) + return r; + + state = r > 0 ? UNIT_FILE_MASKED_RUNTIME : UNIT_FILE_MASKED; + break; + + case UNIT_FILE_TYPE_REGULAR: + r = path_is_generator(paths, i->path); + if (r < 0) + return r; + if (r > 0) { + state = UNIT_FILE_GENERATED; + break; + } + + r = path_is_transient(paths, i->path); + if (r < 0) + return r; + if (r > 0) { + state = UNIT_FILE_TRANSIENT; + break; + } + + /* Check if any of the Alias= symlinks have been created. + * We ignore other aliases, and only check those that would + * be created by systemctl enable for this unit. */ + r = find_symlinks_in_scope(scope, paths, i, true, &state); + if (r < 0) + return r; + if (r > 0) + break; + + /* Check if the file is known under other names. If it is, + * it might be in use. Report that as UNIT_FILE_INDIRECT. */ + r = find_symlinks_in_scope(scope, paths, i, false, &state); + if (r < 0) + return r; + if (r > 0) + state = UNIT_FILE_INDIRECT; + else { + if (unit_file_install_info_has_rules(i)) + state = UNIT_FILE_DISABLED; + else if (unit_file_install_info_has_also(i)) + state = UNIT_FILE_INDIRECT; + else + state = UNIT_FILE_STATIC; + } + + break; + + default: + assert_not_reached("Unexpect unit file type."); + } + + *ret = state; + return 0; +} + +int unit_file_get_state( + UnitFileScope scope, + const char *root_dir, + const char *name, + UnitFileState *ret) { + + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + int r; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + assert(name); + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + return unit_file_lookup_state(scope, &paths, name, ret); +} + +int unit_file_exists(UnitFileScope scope, const LookupPaths *paths, const char *name) { + _cleanup_(install_context_done) InstallContext c = {}; + int r; + + assert(paths); + assert(name); + + if (!unit_name_is_valid(name, UNIT_NAME_ANY)) + return -EINVAL; + + r = install_info_discover(scope, &c, paths, name, 0, NULL, NULL, NULL); + if (r == -ENOENT) + return 0; + if (r < 0) + return r; + + return 1; +} + +static int split_pattern_into_name_and_instances(const char *pattern, char **out_unit_name, char ***out_instances) { + _cleanup_strv_free_ char **instances = NULL; + _cleanup_free_ char *unit_name = NULL; + int r; + + assert(pattern); + assert(out_instances); + assert(out_unit_name); + + r = extract_first_word(&pattern, &unit_name, NULL, 0); + if (r < 0) + return r; + + /* We handle the instances logic when unit name is extracted */ + if (pattern) { + /* We only create instances when a rule of templated unit + * is seen. A rule like enable foo@.service a b c will + * result in an array of (a, b, c) as instance names */ + if (!unit_name_is_valid(unit_name, UNIT_NAME_TEMPLATE)) + return -EINVAL; + + instances = strv_split(pattern, WHITESPACE); + if (!instances) + return -ENOMEM; + + *out_instances = TAKE_PTR(instances); + } + + *out_unit_name = TAKE_PTR(unit_name); + + return 0; +} + +static int read_presets(UnitFileScope scope, const char *root_dir, Presets *presets) { + _cleanup_(presets_freep) Presets ps = {}; + size_t n_allocated = 0; + _cleanup_strv_free_ char **files = NULL; + char **p; + int r; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + assert(presets); + + switch (scope) { + case UNIT_FILE_SYSTEM: + r = conf_files_list(&files, ".preset", root_dir, 0, + "/etc/systemd/system-preset", + "/run/systemd/system-preset", + "/usr/local/lib/systemd/system-preset", + "/usr/lib/systemd/system-preset", +#if HAVE_SPLIT_USR + "/lib/systemd/system-preset", +#endif + NULL); + break; + + case UNIT_FILE_GLOBAL: + case UNIT_FILE_USER: + r = conf_files_list(&files, ".preset", root_dir, 0, + "/etc/systemd/user-preset", + "/run/systemd/user-preset", + "/usr/local/lib/systemd/user-preset", + "/usr/lib/systemd/user-preset", + NULL); + break; + + default: + assert_not_reached("Invalid unit file scope"); + } + + if (r < 0) + return r; + + STRV_FOREACH(p, files) { + _cleanup_fclose_ FILE *f; + int n = 0; + + f = fopen(*p, "re"); + if (!f) { + if (errno == ENOENT) + continue; + + return -errno; + } + + for (;;) { + _cleanup_free_ char *line = NULL; + PresetRule rule = {}; + const char *parameter; + char *l; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return r; + if (r == 0) + break; + + l = strstrip(line); + n++; + + if (isempty(l)) + continue; + if (strchr(COMMENTS, *l)) + continue; + + parameter = first_word(l, "enable"); + if (parameter) { + char *unit_name; + char **instances = NULL; + + /* Unit_name will remain the same as parameter when no instances are specified */ + r = split_pattern_into_name_and_instances(parameter, &unit_name, &instances); + if (r < 0) { + log_syntax(NULL, LOG_WARNING, *p, n, r, "Couldn't parse line '%s'. Ignoring.", line); + continue; + } + + rule = (PresetRule) { + .pattern = unit_name, + .action = PRESET_ENABLE, + .instances = instances, + }; + } + + parameter = first_word(l, "disable"); + if (parameter) { + char *pattern; + + pattern = strdup(parameter); + if (!pattern) + return -ENOMEM; + + rule = (PresetRule) { + .pattern = pattern, + .action = PRESET_DISABLE, + }; + } + + if (rule.action) { + if (!GREEDY_REALLOC(ps.rules, n_allocated, ps.n_rules + 1)) + return -ENOMEM; + + ps.rules[ps.n_rules++] = rule; + continue; + } + + log_syntax(NULL, LOG_WARNING, *p, n, 0, "Couldn't parse line '%s'. Ignoring.", line); + } + } + + *presets = ps; + ps = (Presets){}; + + return 0; +} + +static int pattern_match_multiple_instances( + const PresetRule rule, + const char *unit_name, + char ***ret) { + + _cleanup_free_ char *templated_name = NULL; + int r; + + /* If no ret is needed or the rule itself does not have instances + * initalized, we return not matching */ + if (!ret || !rule.instances) + return 0; + + r = unit_name_template(unit_name, &templated_name); + if (r < 0) + return r; + if (!streq(rule.pattern, templated_name)) + return 0; + + /* Compose a list of specified instances when unit name is a template */ + if (unit_name_is_valid(unit_name, UNIT_NAME_TEMPLATE)) { + _cleanup_free_ char *prefix = NULL; + _cleanup_strv_free_ char **out_strv = NULL; + char **iter; + + r = unit_name_to_prefix(unit_name, &prefix); + if (r < 0) + return r; + + STRV_FOREACH(iter, rule.instances) { + _cleanup_free_ char *name = NULL; + r = unit_name_build(prefix, *iter, ".service", &name); + if (r < 0) + return r; + r = strv_extend(&out_strv, name); + if (r < 0) + return r; + } + + *ret = TAKE_PTR(out_strv); + return 1; + } else { + /* We now know the input unit name is an instance name */ + _cleanup_free_ char *instance_name = NULL; + + r = unit_name_to_instance(unit_name, &instance_name); + if (r < 0) + return r; + + if (strv_find(rule.instances, instance_name)) + return 1; + } + return 0; +} + +static int query_presets(const char *name, const Presets presets, char ***instance_name_list) { + PresetAction action = PRESET_UNKNOWN; + size_t i; + char **s; + if (!unit_name_is_valid(name, UNIT_NAME_ANY)) + return -EINVAL; + + for (i = 0; i < presets.n_rules; i++) + if (pattern_match_multiple_instances(presets.rules[i], name, instance_name_list) > 0 || + fnmatch(presets.rules[i].pattern, name, FNM_NOESCAPE) == 0) { + action = presets.rules[i].action; + break; + } + + switch (action) { + case PRESET_UNKNOWN: + log_debug("Preset files don't specify rule for %s. Enabling.", name); + return 1; + case PRESET_ENABLE: + if (instance_name_list && *instance_name_list) + STRV_FOREACH(s, *instance_name_list) + log_debug("Preset files say enable %s.", *s); + else + log_debug("Preset files say enable %s.", name); + return 1; + case PRESET_DISABLE: + log_debug("Preset files say disable %s.", name); + return 0; + default: + assert_not_reached("invalid preset action"); + } +} + +int unit_file_query_preset(UnitFileScope scope, const char *root_dir, const char *name) { + _cleanup_(presets_freep) Presets presets = {}; + int r; + + r = read_presets(scope, root_dir, &presets); + if (r < 0) + return r; + + return query_presets(name, presets, NULL); +} + +static int execute_preset( + UnitFileScope scope, + InstallContext *plus, + InstallContext *minus, + const LookupPaths *paths, + const char *config_path, + char **files, + UnitFilePresetMode mode, + bool force, + UnitFileChange **changes, + size_t *n_changes) { + + int r; + + assert(plus); + assert(minus); + assert(paths); + assert(config_path); + + if (mode != UNIT_FILE_PRESET_ENABLE_ONLY) { + _cleanup_set_free_free_ Set *remove_symlinks_to = NULL; + + r = install_context_mark_for_removal(scope, minus, paths, &remove_symlinks_to, config_path, changes, n_changes); + if (r < 0) + return r; + + r = remove_marked_symlinks(remove_symlinks_to, config_path, paths, false, changes, n_changes); + } else + r = 0; + + if (mode != UNIT_FILE_PRESET_DISABLE_ONLY) { + int q; + + /* Returns number of symlinks that where supposed to be installed. */ + q = install_context_apply(scope, plus, paths, config_path, force, SEARCH_LOAD, changes, n_changes); + if (r >= 0) { + if (q < 0) + r = q; + else + r += q; + } + } + + return r; +} + +static int preset_prepare_one( + UnitFileScope scope, + InstallContext *plus, + InstallContext *minus, + LookupPaths *paths, + const char *name, + Presets presets, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_(install_context_done) InstallContext tmp = {}; + _cleanup_strv_free_ char **instance_name_list = NULL; + UnitFileInstallInfo *i; + int r; + + if (install_info_find(plus, name) || install_info_find(minus, name)) + return 0; + + r = install_info_discover(scope, &tmp, paths, name, SEARCH_FOLLOW_CONFIG_SYMLINKS, + &i, changes, n_changes); + if (r < 0) + return r; + if (!streq(name, i->name)) { + log_debug("Skipping %s because it is an alias for %s.", name, i->name); + return 0; + } + + r = query_presets(name, presets, &instance_name_list); + if (r < 0) + return r; + + if (r > 0) { + if (instance_name_list) { + char **s; + STRV_FOREACH(s, instance_name_list) { + r = install_info_discover_and_check(scope, plus, paths, *s, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS, + &i, changes, n_changes); + if (r < 0) + return r; + } + } else { + r = install_info_discover_and_check(scope, plus, paths, name, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS, + &i, changes, n_changes); + if (r < 0) + return r; + } + + } else + r = install_info_discover(scope, minus, paths, name, SEARCH_FOLLOW_CONFIG_SYMLINKS, + &i, changes, n_changes); + + return r; +} + +int unit_file_preset( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFilePresetMode mode, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_(install_context_done) InstallContext plus = {}, minus = {}; + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + _cleanup_(presets_freep) Presets presets = {}; + const char *config_path; + char **i; + int r; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + assert(mode < _UNIT_FILE_PRESET_MAX); + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config; + if (!config_path) + return -ENXIO; + + r = read_presets(scope, root_dir, &presets); + if (r < 0) + return r; + + STRV_FOREACH(i, files) { + r = preset_prepare_one(scope, &plus, &minus, &paths, *i, presets, changes, n_changes); + if (r < 0) + return r; + } + + return execute_preset(scope, &plus, &minus, &paths, config_path, files, mode, !!(flags & UNIT_FILE_FORCE), changes, n_changes); +} + +int unit_file_preset_all( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + UnitFilePresetMode mode, + UnitFileChange **changes, + size_t *n_changes) { + + _cleanup_(install_context_done) InstallContext plus = {}, minus = {}; + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + _cleanup_(presets_freep) Presets presets = {}; + const char *config_path = NULL; + char **i; + int r; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + assert(mode < _UNIT_FILE_PRESET_MAX); + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + config_path = (flags & UNIT_FILE_RUNTIME) ? paths.runtime_config : paths.persistent_config; + if (!config_path) + return -ENXIO; + + r = read_presets(scope, root_dir, &presets); + if (r < 0) + return r; + + STRV_FOREACH(i, paths.search_path) { + _cleanup_closedir_ DIR *d = NULL; + struct dirent *de; + + d = opendir(*i); + if (!d) { + if (errno == ENOENT) + continue; + + return -errno; + } + + FOREACH_DIRENT(de, d, return -errno) { + + if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY)) + continue; + + dirent_ensure_type(d, de); + + if (!IN_SET(de->d_type, DT_LNK, DT_REG)) + continue; + + /* we don't pass changes[] in, because we want to handle errors on our own */ + r = preset_prepare_one(scope, &plus, &minus, &paths, de->d_name, presets, NULL, 0); + if (r == -ERFKILL) + r = unit_file_changes_add(changes, n_changes, + UNIT_FILE_IS_MASKED, de->d_name, NULL); + else if (r == -ENOLINK) + r = unit_file_changes_add(changes, n_changes, + UNIT_FILE_IS_DANGLING, de->d_name, NULL); + else if (r == -EADDRNOTAVAIL) /* Ignore generated/transient units when applying preset */ + continue; + if (r < 0) + return r; + } + } + + return execute_preset(scope, &plus, &minus, &paths, config_path, NULL, mode, !!(flags & UNIT_FILE_FORCE), changes, n_changes); +} + +static void unit_file_list_free_one(UnitFileList *f) { + if (!f) + return; + + free(f->path); + free(f); +} + +Hashmap* unit_file_list_free(Hashmap *h) { + return hashmap_free_with_destructor(h, unit_file_list_free_one); +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(UnitFileList*, unit_file_list_free_one); + +int unit_file_get_list( + UnitFileScope scope, + const char *root_dir, + Hashmap *h, + char **states, + char **patterns) { + + _cleanup_(lookup_paths_free) LookupPaths paths = {}; + char **i; + int r; + + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + assert(h); + + r = lookup_paths_init(&paths, scope, 0, root_dir); + if (r < 0) + return r; + + STRV_FOREACH(i, paths.search_path) { + _cleanup_closedir_ DIR *d = NULL; + struct dirent *de; + + d = opendir(*i); + if (!d) { + if (errno == ENOENT) + continue; + if (IN_SET(errno, ENOTDIR, EACCES)) { + log_debug_errno(errno, "Failed to open \"%s\": %m", *i); + continue; + } + + return -errno; + } + + FOREACH_DIRENT(de, d, return -errno) { + _cleanup_(unit_file_list_free_onep) UnitFileList *f = NULL; + + if (!unit_name_is_valid(de->d_name, UNIT_NAME_ANY)) + continue; + + if (!strv_fnmatch_or_empty(patterns, de->d_name, FNM_NOESCAPE)) + continue; + + if (hashmap_get(h, de->d_name)) + continue; + + dirent_ensure_type(d, de); + + if (!IN_SET(de->d_type, DT_LNK, DT_REG)) + continue; + + f = new0(UnitFileList, 1); + if (!f) + return -ENOMEM; + + f->path = path_make_absolute(de->d_name, *i); + if (!f->path) + return -ENOMEM; + + r = unit_file_lookup_state(scope, &paths, de->d_name, &f->state); + if (r < 0) + f->state = UNIT_FILE_BAD; + + if (!strv_isempty(states) && + !strv_contains(states, unit_file_state_to_string(f->state))) + continue; + + r = hashmap_put(h, basename(f->path), f); + if (r < 0) + return r; + + f = NULL; /* prevent cleanup */ + } + } + + return 0; +} + +static const char* const unit_file_state_table[_UNIT_FILE_STATE_MAX] = { + [UNIT_FILE_ENABLED] = "enabled", + [UNIT_FILE_ENABLED_RUNTIME] = "enabled-runtime", + [UNIT_FILE_LINKED] = "linked", + [UNIT_FILE_LINKED_RUNTIME] = "linked-runtime", + [UNIT_FILE_MASKED] = "masked", + [UNIT_FILE_MASKED_RUNTIME] = "masked-runtime", + [UNIT_FILE_STATIC] = "static", + [UNIT_FILE_DISABLED] = "disabled", + [UNIT_FILE_INDIRECT] = "indirect", + [UNIT_FILE_GENERATED] = "generated", + [UNIT_FILE_TRANSIENT] = "transient", + [UNIT_FILE_BAD] = "bad", +}; + +DEFINE_STRING_TABLE_LOOKUP(unit_file_state, UnitFileState); + +static const char* const unit_file_change_type_table[_UNIT_FILE_CHANGE_TYPE_MAX] = { + [UNIT_FILE_SYMLINK] = "symlink", + [UNIT_FILE_UNLINK] = "unlink", + [UNIT_FILE_IS_MASKED] = "masked", + [UNIT_FILE_IS_DANGLING] = "dangling", +}; + +DEFINE_STRING_TABLE_LOOKUP(unit_file_change_type, UnitFileChangeType); + +static const char* const unit_file_preset_mode_table[_UNIT_FILE_PRESET_MAX] = { + [UNIT_FILE_PRESET_FULL] = "full", + [UNIT_FILE_PRESET_ENABLE_ONLY] = "enable-only", + [UNIT_FILE_PRESET_DISABLE_ONLY] = "disable-only", +}; + +DEFINE_STRING_TABLE_LOOKUP(unit_file_preset_mode, UnitFilePresetMode); diff --git a/src/shared/install.h b/src/shared/install.h new file mode 100644 index 0000000..e452940 --- /dev/null +++ b/src/shared/install.h @@ -0,0 +1,230 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +typedef enum UnitFileScope UnitFileScope; +typedef enum UnitFileState UnitFileState; +typedef enum UnitFilePresetMode UnitFilePresetMode; +typedef enum UnitFileChangeType UnitFileChangeType; +typedef enum UnitFileFlags UnitFileFlags; +typedef enum UnitFileType UnitFileType; +typedef struct UnitFileChange UnitFileChange; +typedef struct UnitFileList UnitFileList; +typedef struct UnitFileInstallInfo UnitFileInstallInfo; + +#include <stdbool.h> + +#include "hashmap.h" +#include "macro.h" +#include "path-lookup.h" +#include "strv.h" +#include "unit-name.h" + +enum UnitFileScope { + UNIT_FILE_SYSTEM, + UNIT_FILE_GLOBAL, + UNIT_FILE_USER, + _UNIT_FILE_SCOPE_MAX, + _UNIT_FILE_SCOPE_INVALID = -1 +}; + +enum UnitFileState { + UNIT_FILE_ENABLED, + UNIT_FILE_ENABLED_RUNTIME, + UNIT_FILE_LINKED, + UNIT_FILE_LINKED_RUNTIME, + UNIT_FILE_MASKED, + UNIT_FILE_MASKED_RUNTIME, + UNIT_FILE_STATIC, + UNIT_FILE_DISABLED, + UNIT_FILE_INDIRECT, + UNIT_FILE_GENERATED, + UNIT_FILE_TRANSIENT, + UNIT_FILE_BAD, + _UNIT_FILE_STATE_MAX, + _UNIT_FILE_STATE_INVALID = -1 +}; + +enum UnitFilePresetMode { + UNIT_FILE_PRESET_FULL, + UNIT_FILE_PRESET_ENABLE_ONLY, + UNIT_FILE_PRESET_DISABLE_ONLY, + _UNIT_FILE_PRESET_MAX, + _UNIT_FILE_PRESET_INVALID = -1 +}; + +enum UnitFileChangeType { + UNIT_FILE_SYMLINK, + UNIT_FILE_UNLINK, + UNIT_FILE_IS_MASKED, + UNIT_FILE_IS_DANGLING, + _UNIT_FILE_CHANGE_TYPE_MAX, + _UNIT_FILE_CHANGE_TYPE_INVALID = INT_MIN +}; + +enum UnitFileFlags { + UNIT_FILE_RUNTIME = 1 << 0, + UNIT_FILE_FORCE = 1 << 1, + UNIT_FILE_DRY_RUN = 1 << 2, +}; + +/* type can either one of the UnitFileChangeTypes listed above, or a negative error. + * If source is specified, it should be the contents of the path symlink. + * In case of an error, source should be the existing symlink contents or NULL + */ +struct UnitFileChange { + int type; /* UnitFileChangeType or bust */ + char *path; + char *source; +}; + +static inline bool unit_file_changes_have_modification(const UnitFileChange* changes, size_t n_changes) { + size_t i; + for (i = 0; i < n_changes; i++) + if (IN_SET(changes[i].type, UNIT_FILE_SYMLINK, UNIT_FILE_UNLINK)) + return true; + return false; +} + +struct UnitFileList { + char *path; + UnitFileState state; +}; + +enum UnitFileType { + UNIT_FILE_TYPE_REGULAR, + UNIT_FILE_TYPE_SYMLINK, + UNIT_FILE_TYPE_MASKED, + _UNIT_FILE_TYPE_MAX, + _UNIT_FILE_TYPE_INVALID = -1, +}; + +struct UnitFileInstallInfo { + char *name; + char *path; + + char **aliases; + char **wanted_by; + char **required_by; + char **also; + + char *default_instance; + char *symlink_target; + + UnitFileType type; + bool auxiliary; +}; + +bool unit_type_may_alias(UnitType type) _const_; +bool unit_type_may_template(UnitType type) _const_; + +int unit_file_enable( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes); +int unit_file_disable( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes); +int unit_file_reenable( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes); +int unit_file_preset( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFilePresetMode mode, + UnitFileChange **changes, + size_t *n_changes); +int unit_file_preset_all( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + UnitFilePresetMode mode, + UnitFileChange **changes, + size_t *n_changes); +int unit_file_mask( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes); +int unit_file_unmask( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes); +int unit_file_link( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes); +int unit_file_revert( + UnitFileScope scope, + const char *root_dir, + char **files, + UnitFileChange **changes, + size_t *n_changes); +int unit_file_set_default( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + const char *file, + UnitFileChange **changes, + size_t *n_changes); +int unit_file_get_default( + UnitFileScope scope, + const char *root_dir, + char **name); +int unit_file_add_dependency( + UnitFileScope scope, + UnitFileFlags flags, + const char *root_dir, + char **files, + const char *target, + UnitDependency dep, + UnitFileChange **changes, + size_t *n_changes); + +int unit_file_lookup_state( + UnitFileScope scope, + const LookupPaths *paths, + const char *name, + UnitFileState *ret); + +int unit_file_get_state(UnitFileScope scope, const char *root_dir, const char *filename, UnitFileState *ret); +int unit_file_exists(UnitFileScope scope, const LookupPaths *paths, const char *name); + +int unit_file_get_list(UnitFileScope scope, const char *root_dir, Hashmap *h, char **states, char **patterns); +Hashmap* unit_file_list_free(Hashmap *h); + +int unit_file_changes_add(UnitFileChange **changes, size_t *n_changes, UnitFileChangeType type, const char *path, const char *source); +void unit_file_changes_free(UnitFileChange *changes, size_t n_changes); +void unit_file_dump_changes(int r, const char *verb, const UnitFileChange *changes, size_t n_changes, bool quiet); + +int unit_file_query_preset(UnitFileScope scope, const char *root_dir, const char *name); + +const char *unit_file_state_to_string(UnitFileState s) _const_; +UnitFileState unit_file_state_from_string(const char *s) _pure_; +/* from_string conversion is unreliable because of the overlap between -EPERM and -1 for error. */ + +const char *unit_file_change_type_to_string(UnitFileChangeType s) _const_; +UnitFileChangeType unit_file_change_type_from_string(const char *s) _pure_; + +const char *unit_file_preset_mode_to_string(UnitFilePresetMode m) _const_; +UnitFilePresetMode unit_file_preset_mode_from_string(const char *s) _pure_; diff --git a/src/shared/ip-protocol-list.c b/src/shared/ip-protocol-list.c new file mode 100644 index 0000000..aa675ea --- /dev/null +++ b/src/shared/ip-protocol-list.c @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <netinet/in.h> + +#include "alloc-util.h" +#include "ip-protocol-list.h" +#include "macro.h" +#include "parse-util.h" +#include "string-util.h" + +static const struct ip_protocol_name* lookup_ip_protocol(register const char *str, register GPERF_LEN_TYPE len); + +#include "ip-protocol-from-name.h" +#include "ip-protocol-to-name.h" + +const char *ip_protocol_to_name(int id) { + + if (id < 0) + return NULL; + + if ((size_t) id >= ELEMENTSOF(ip_protocol_names)) + return NULL; + + return ip_protocol_names[id]; +} + +int ip_protocol_from_name(const char *name) { + const struct ip_protocol_name *sc; + + assert(name); + + sc = lookup_ip_protocol(name, strlen(name)); + if (!sc) + return -EINVAL; + + return sc->id; +} + +int parse_ip_protocol(const char *s) { + _cleanup_free_ char *str = NULL; + int i, r; + + assert(s); + + if (isempty(s)) + return IPPROTO_IP; + + /* Do not use strdupa() here, as the input string may come from * + * command line or config files. */ + str = strdup(s); + if (!str) + return -ENOMEM; + + i = ip_protocol_from_name(ascii_strlower(str)); + if (i >= 0) + return i; + + r = safe_atoi(str, &i); + if (r < 0) + return r; + + if (!ip_protocol_to_name(i)) + return -EINVAL; + + return i; +} diff --git a/src/shared/ip-protocol-list.h b/src/shared/ip-protocol-list.h new file mode 100644 index 0000000..5c94969 --- /dev/null +++ b/src/shared/ip-protocol-list.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +const char *ip_protocol_to_name(int id); +int ip_protocol_from_name(const char *name); +int parse_ip_protocol(const char *s); diff --git a/src/shared/ip-protocol-to-name.awk b/src/shared/ip-protocol-to-name.awk new file mode 100644 index 0000000..824f811 --- /dev/null +++ b/src/shared/ip-protocol-to-name.awk @@ -0,0 +1,9 @@ +BEGIN{ + print "static const char* const ip_protocol_names[] = { " +} +!/HOPOPTS/ { + printf " [IPPROTO_%s] = \"%s\",\n", $1, tolower($1) +} +END{ + print "};" +} diff --git a/src/shared/journal-importer.c b/src/shared/journal-importer.c new file mode 100644 index 0000000..8638cd3 --- /dev/null +++ b/src/shared/journal-importer.c @@ -0,0 +1,504 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "escape.h" +#include "fd-util.h" +#include "io-util.h" +#include "journal-file.h" +#include "journal-importer.h" +#include "journal-util.h" +#include "parse-util.h" +#include "string-util.h" +#include "unaligned.h" + +enum { + IMPORTER_STATE_LINE = 0, /* waiting to read, or reading line */ + IMPORTER_STATE_DATA_START, /* reading binary data header */ + IMPORTER_STATE_DATA, /* reading binary data */ + IMPORTER_STATE_DATA_FINISH, /* expecting newline */ + IMPORTER_STATE_EOF, /* done */ +}; + +static int iovw_put(struct iovec_wrapper *iovw, void* data, size_t len) { + if (iovw->count >= ENTRY_FIELD_COUNT_MAX) + return -E2BIG; + + if (!GREEDY_REALLOC(iovw->iovec, iovw->size_bytes, iovw->count + 1)) + return log_oom(); + + iovw->iovec[iovw->count++] = IOVEC_MAKE(data, len); + return 0; +} + +static void iovw_free_contents(struct iovec_wrapper *iovw) { + iovw->iovec = mfree(iovw->iovec); + iovw->size_bytes = iovw->count = 0; +} + +static void iovw_rebase(struct iovec_wrapper *iovw, char *old, char *new) { + size_t i; + + for (i = 0; i < iovw->count; i++) + iovw->iovec[i].iov_base = (char*) iovw->iovec[i].iov_base - old + new; +} + +size_t iovw_size(struct iovec_wrapper *iovw) { + size_t n = 0, i; + + for (i = 0; i < iovw->count; i++) + n += iovw->iovec[i].iov_len; + + return n; +} + +void journal_importer_cleanup(JournalImporter *imp) { + if (imp->fd >= 0 && !imp->passive_fd) { + log_debug("Closing %s (fd=%d)", imp->name ?: "importer", imp->fd); + safe_close(imp->fd); + } + + free(imp->name); + free(imp->buf); + iovw_free_contents(&imp->iovw); +} + +static char* realloc_buffer(JournalImporter *imp, size_t size) { + char *b, *old = imp->buf; + + b = GREEDY_REALLOC(imp->buf, imp->size, size); + if (!b) + return NULL; + + iovw_rebase(&imp->iovw, old, imp->buf); + + return b; +} + +static int get_line(JournalImporter *imp, char **line, size_t *size) { + ssize_t n; + char *c = NULL; + + assert(imp); + assert(imp->state == IMPORTER_STATE_LINE); + assert(imp->offset <= imp->filled); + assert(imp->filled <= imp->size); + assert(!imp->buf || imp->size > 0); + assert(imp->fd >= 0); + + for (;;) { + if (imp->buf) { + size_t start = MAX(imp->scanned, imp->offset); + + c = memchr(imp->buf + start, '\n', + imp->filled - start); + if (c != NULL) + break; + } + + imp->scanned = imp->filled; + if (imp->scanned >= DATA_SIZE_MAX) + return log_error_errno(SYNTHETIC_ERRNO(ENOBUFS), + "Entry is bigger than %u bytes.", + DATA_SIZE_MAX); + + if (imp->passive_fd) + /* we have to wait for some data to come to us */ + return -EAGAIN; + + /* We know that imp->filled is at most DATA_SIZE_MAX, so if + we reallocate it, we'll increase the size at least a bit. */ + assert_cc(DATA_SIZE_MAX < ENTRY_SIZE_MAX); + if (imp->size - imp->filled < LINE_CHUNK && + !realloc_buffer(imp, MIN(imp->filled + LINE_CHUNK, ENTRY_SIZE_MAX))) + return log_oom(); + + assert(imp->buf); + assert(imp->size - imp->filled >= LINE_CHUNK || + imp->size == ENTRY_SIZE_MAX); + + n = read(imp->fd, + imp->buf + imp->filled, + imp->size - imp->filled); + if (n < 0) { + if (errno != EAGAIN) + log_error_errno(errno, "read(%d, ..., %zu): %m", + imp->fd, + imp->size - imp->filled); + return -errno; + } else if (n == 0) + return 0; + + imp->filled += n; + } + + *line = imp->buf + imp->offset; + *size = c + 1 - imp->buf - imp->offset; + imp->offset += *size; + + return 1; +} + +static int fill_fixed_size(JournalImporter *imp, void **data, size_t size) { + + assert(imp); + assert(IN_SET(imp->state, IMPORTER_STATE_DATA_START, IMPORTER_STATE_DATA, IMPORTER_STATE_DATA_FINISH)); + assert(size <= DATA_SIZE_MAX); + assert(imp->offset <= imp->filled); + assert(imp->filled <= imp->size); + assert(imp->buf || imp->size == 0); + assert(!imp->buf || imp->size > 0); + assert(imp->fd >= 0); + assert(data); + + while (imp->filled - imp->offset < size) { + int n; + + if (imp->passive_fd) + /* we have to wait for some data to come to us */ + return -EAGAIN; + + if (!realloc_buffer(imp, imp->offset + size)) + return log_oom(); + + n = read(imp->fd, imp->buf + imp->filled, + imp->size - imp->filled); + if (n < 0) { + if (errno != EAGAIN) + log_error_errno(errno, "read(%d, ..., %zu): %m", imp->fd, + imp->size - imp->filled); + return -errno; + } else if (n == 0) + return 0; + + imp->filled += n; + } + + *data = imp->buf + imp->offset; + imp->offset += size; + + return 1; +} + +static int get_data_size(JournalImporter *imp) { + int r; + void *data; + + assert(imp); + assert(imp->state == IMPORTER_STATE_DATA_START); + assert(imp->data_size == 0); + + r = fill_fixed_size(imp, &data, sizeof(uint64_t)); + if (r <= 0) + return r; + + imp->data_size = unaligned_read_le64(data); + if (imp->data_size > DATA_SIZE_MAX) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Stream declares field with size %zu > DATA_SIZE_MAX = %u", + imp->data_size, DATA_SIZE_MAX); + if (imp->data_size == 0) + log_warning("Binary field with zero length"); + + return 1; +} + +static int get_data_data(JournalImporter *imp, void **data) { + int r; + + assert(imp); + assert(data); + assert(imp->state == IMPORTER_STATE_DATA); + + r = fill_fixed_size(imp, data, imp->data_size); + if (r <= 0) + return r; + + return 1; +} + +static int get_data_newline(JournalImporter *imp) { + int r; + char *data; + + assert(imp); + assert(imp->state == IMPORTER_STATE_DATA_FINISH); + + r = fill_fixed_size(imp, (void**) &data, 1); + if (r <= 0) + return r; + + assert(data); + if (*data != '\n') { + char buf[4]; + int l; + + l = cescape_char(*data, buf); + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Expected newline, got '%.*s'", l, buf); + } + + return 1; +} + +static int process_special_field(JournalImporter *imp, char *line) { + const char *value; + char buf[CELLESCAPE_DEFAULT_LENGTH]; + int r; + + assert(line); + + value = startswith(line, "__CURSOR="); + if (value) + /* ignore __CURSOR */ + return 1; + + value = startswith(line, "__REALTIME_TIMESTAMP="); + if (value) { + uint64_t x; + + r = safe_atou64(value, &x); + if (r < 0) + return log_warning_errno(r, "Failed to parse __REALTIME_TIMESTAMP '%s': %m", + cellescape(buf, sizeof buf, value)); + else if (!VALID_REALTIME(x)) { + log_warning("__REALTIME_TIMESTAMP out of range, ignoring: %"PRIu64, x); + return -ERANGE; + } + + imp->ts.realtime = x; + return 1; + } + + value = startswith(line, "__MONOTONIC_TIMESTAMP="); + if (value) { + uint64_t x; + + r = safe_atou64(value, &x); + if (r < 0) + return log_warning_errno(r, "Failed to parse __MONOTONIC_TIMESTAMP '%s': %m", + cellescape(buf, sizeof buf, value)); + else if (!VALID_MONOTONIC(x)) { + log_warning("__MONOTONIC_TIMESTAMP out of range, ignoring: %"PRIu64, x); + return -ERANGE; + } + + imp->ts.monotonic = x; + return 1; + } + + /* Just a single underline, but it needs special treatment too. */ + value = startswith(line, "_BOOT_ID="); + if (value) { + r = sd_id128_from_string(value, &imp->boot_id); + if (r < 0) + return log_warning_errno(r, "Failed to parse _BOOT_ID '%s': %m", + cellescape(buf, sizeof buf, value)); + + /* store the field in the usual fashion too */ + return 0; + } + + value = startswith(line, "__"); + if (value) { + log_notice("Unknown dunder line __%s, ignoring.", cellescape(buf, sizeof buf, value)); + return 1; + } + + /* no dunder */ + return 0; +} + +int journal_importer_process_data(JournalImporter *imp) { + int r; + + switch(imp->state) { + case IMPORTER_STATE_LINE: { + char *line, *sep; + size_t n = 0; + + assert(imp->data_size == 0); + + r = get_line(imp, &line, &n); + if (r < 0) + return r; + if (r == 0) { + imp->state = IMPORTER_STATE_EOF; + return 0; + } + assert(n > 0); + assert(line[n-1] == '\n'); + + if (n == 1) { + log_trace("Received empty line, event is ready"); + return 1; + } + + /* MESSAGE=xxx\n + or + COREDUMP\n + LLLLLLLL0011223344...\n + */ + sep = memchr(line, '=', n); + if (sep) { + /* chomp newline */ + n--; + + if (!journal_field_valid(line, sep - line, true)) { + char buf[64], *t; + + t = strndupa(line, sep - line); + log_debug("Ignoring invalid field: \"%s\"", + cellescape(buf, sizeof buf, t)); + + return 0; + } + + line[n] = '\0'; + r = process_special_field(imp, line); + if (r != 0) + return r < 0 ? r : 0; + + r = iovw_put(&imp->iovw, line, n); + if (r < 0) + return r; + } else { + /* replace \n with = */ + line[n-1] = '='; + + imp->field_len = n; + imp->state = IMPORTER_STATE_DATA_START; + + /* we cannot put the field in iovec until we have all data */ + } + + log_trace("Received: %.*s (%s)", (int) n, line, sep ? "text" : "binary"); + + return 0; /* continue */ + } + + case IMPORTER_STATE_DATA_START: + assert(imp->data_size == 0); + + r = get_data_size(imp); + // log_debug("get_data_size() -> %d", r); + if (r < 0) + return r; + if (r == 0) { + imp->state = IMPORTER_STATE_EOF; + return 0; + } + + imp->state = imp->data_size > 0 ? + IMPORTER_STATE_DATA : IMPORTER_STATE_DATA_FINISH; + + return 0; /* continue */ + + case IMPORTER_STATE_DATA: { + void *data; + char *field; + + assert(imp->data_size > 0); + + r = get_data_data(imp, &data); + // log_debug("get_data_data() -> %d", r); + if (r < 0) + return r; + if (r == 0) { + imp->state = IMPORTER_STATE_EOF; + return 0; + } + + assert(data); + + field = (char*) data - sizeof(uint64_t) - imp->field_len; + memmove(field + sizeof(uint64_t), field, imp->field_len); + + r = iovw_put(&imp->iovw, field + sizeof(uint64_t), imp->field_len + imp->data_size); + if (r < 0) + return r; + + imp->state = IMPORTER_STATE_DATA_FINISH; + + return 0; /* continue */ + } + + case IMPORTER_STATE_DATA_FINISH: + r = get_data_newline(imp); + // log_debug("get_data_newline() -> %d", r); + if (r < 0) + return r; + if (r == 0) { + imp->state = IMPORTER_STATE_EOF; + return 0; + } + + imp->data_size = 0; + imp->state = IMPORTER_STATE_LINE; + + return 0; /* continue */ + default: + assert_not_reached("wtf?"); + } +} + +int journal_importer_push_data(JournalImporter *imp, const char *data, size_t size) { + assert(imp); + assert(imp->state != IMPORTER_STATE_EOF); + + if (!realloc_buffer(imp, imp->filled + size)) + return log_error_errno(SYNTHETIC_ERRNO(ENOMEM), + "Failed to store received data of size %zu " + "(in addition to existing %zu bytes with %zu filled): %s", + size, imp->size, imp->filled, + strerror(ENOMEM)); + + memcpy(imp->buf + imp->filled, data, size); + imp->filled += size; + + return 0; +} + +void journal_importer_drop_iovw(JournalImporter *imp) { + size_t remain, target; + + /* This function drops processed data that along with the iovw that points at it */ + + iovw_free_contents(&imp->iovw); + + /* possibly reset buffer position */ + remain = imp->filled - imp->offset; + + if (remain == 0) /* no brainer */ + imp->offset = imp->scanned = imp->filled = 0; + else if (imp->offset > imp->size - imp->filled && + imp->offset > remain) { + memcpy(imp->buf, imp->buf + imp->offset, remain); + imp->offset = imp->scanned = 0; + imp->filled = remain; + } + + target = imp->size; + while (target > 16 * LINE_CHUNK && imp->filled < target / 2) + target /= 2; + if (target < imp->size) { + char *tmp; + + tmp = realloc(imp->buf, target); + if (!tmp) + log_warning("Failed to reallocate buffer to (smaller) size %zu", + target); + else { + log_debug("Reallocated buffer from %zu to %zu bytes", + imp->size, target); + imp->buf = tmp; + imp->size = target; + } + } +} + +bool journal_importer_eof(const JournalImporter *imp) { + return imp->state == IMPORTER_STATE_EOF; +} diff --git a/src/shared/journal-importer.h b/src/shared/journal-importer.h new file mode 100644 index 0000000..7914c0c --- /dev/null +++ b/src/shared/journal-importer.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#pragma once + +#include <stddef.h> +#include <stdbool.h> +#include <sys/uio.h> + +#include "sd-id128.h" + +#include "time-util.h" + +/* Make sure not to make this smaller than the maximum coredump size. + * See JOURNAL_SIZE_MAX in coredump.c */ +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +#define ENTRY_SIZE_MAX (1024*1024*770u) +#define DATA_SIZE_MAX (1024*1024*768u) +#else +#define ENTRY_SIZE_MAX (1024*1024*13u) +#define DATA_SIZE_MAX (1024*1024*11u) +#endif +#define LINE_CHUNK 8*1024u + +/* The maximum number of fields in an entry */ +#define ENTRY_FIELD_COUNT_MAX 1024 + +struct iovec_wrapper { + struct iovec *iovec; + size_t size_bytes; + size_t count; +}; + +size_t iovw_size(struct iovec_wrapper *iovw); + +typedef struct JournalImporter { + int fd; + bool passive_fd; + char *name; + + char *buf; + size_t size; /* total size of the buffer */ + size_t offset; /* offset to the beginning of live data in the buffer */ + size_t scanned; /* number of bytes since the beginning of data without a newline */ + size_t filled; /* total number of bytes in the buffer */ + + size_t field_len; /* used for binary fields: the field name length */ + size_t data_size; /* and the size of the binary data chunk being processed */ + + struct iovec_wrapper iovw; + + int state; + dual_timestamp ts; + sd_id128_t boot_id; +} JournalImporter; + +void journal_importer_cleanup(JournalImporter *); +int journal_importer_process_data(JournalImporter *); +int journal_importer_push_data(JournalImporter *, const char *data, size_t size); +void journal_importer_drop_iovw(JournalImporter *); +bool journal_importer_eof(const JournalImporter *); + +static inline size_t journal_importer_bytes_remaining(const JournalImporter *imp) { + return imp->filled; +} diff --git a/src/shared/journal-util.c b/src/shared/journal-util.c new file mode 100644 index 0000000..89b76af --- /dev/null +++ b/src/shared/journal-util.c @@ -0,0 +1,174 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "acl-util.h" +#include "fs-util.h" +#include "hashmap.h" +#include "journal-internal.h" +#include "journal-util.h" +#include "log.h" +#include "strv.h" +#include "user-util.h" + +static int access_check_var_log_journal(sd_journal *j, bool want_other_users) { +#if HAVE_ACL + _cleanup_strv_free_ char **g = NULL; + const char* dir; +#endif + int r; + + assert(j); + + /* If we are root, we should have access, don't warn. */ + if (getuid() == 0) + return 0; + + /* If we are in the 'systemd-journal' group, we should have + * access too. */ + r = in_group("systemd-journal"); + if (r < 0) + return log_error_errno(r, "Failed to check if we are in the 'systemd-journal' group: %m"); + if (r > 0) + return 0; + +#if HAVE_ACL + if (laccess("/run/log/journal", F_OK) >= 0) + dir = "/run/log/journal"; + else + dir = "/var/log/journal"; + + /* If we are in any of the groups listed in the journal ACLs, + * then all is good, too. Let's enumerate all groups from the + * default ACL of the directory, which generally should allow + * access to most journal files too. */ + r = acl_search_groups(dir, &g); + if (r < 0) + return log_error_errno(r, "Failed to search journal ACL: %m"); + if (r > 0) + return 0; + + /* Print a pretty list, if there were ACLs set. */ + if (!strv_isempty(g)) { + _cleanup_free_ char *s = NULL; + + /* Thre are groups in the ACL, let's list them */ + r = strv_extend(&g, "systemd-journal"); + if (r < 0) + return log_oom(); + + strv_sort(g); + strv_uniq(g); + + s = strv_join(g, "', '"); + if (!s) + return log_oom(); + + log_notice("Hint: You are currently not seeing messages from %s.\n" + " Users in groups '%s' can see all messages.\n" + " Pass -q to turn off this notice.", + want_other_users ? "other users and the system" : "the system", + s); + return 1; + } +#endif + + /* If no ACLs were found, print a short version of the message. */ + log_notice("Hint: You are currently not seeing messages from %s.\n" + " Users in the 'systemd-journal' group can see all messages. Pass -q to\n" + " turn off this notice.", + want_other_users ? "other users and the system" : "the system"); + + return 1; +} + +int journal_access_check_and_warn(sd_journal *j, bool quiet, bool want_other_users) { + Iterator it; + void *code; + char *path; + int r = 0; + + assert(j); + + if (hashmap_isempty(j->errors)) { + if (ordered_hashmap_isempty(j->files) && !quiet) + log_notice("No journal files were found."); + + return 0; + } + + if (hashmap_contains(j->errors, INT_TO_PTR(-EACCES))) { + if (!quiet) + (void) access_check_var_log_journal(j, want_other_users); + + if (ordered_hashmap_isempty(j->files)) + r = log_error_errno(EACCES, "No journal files were opened due to insufficient permissions."); + } + + HASHMAP_FOREACH_KEY(path, code, j->errors, it) { + int err; + + err = abs(PTR_TO_INT(code)); + + switch (err) { + case EACCES: + continue; + + case ENODATA: + log_warning_errno(err, "Journal file %s is truncated, ignoring file.", path); + break; + + case EPROTONOSUPPORT: + log_warning_errno(err, "Journal file %1$s uses an unsupported feature, ignoring file.\n" + "Use SYSTEMD_LOG_LEVEL=debug journalctl --file=%1$s to see the details.", + path); + break; + + case EBADMSG: + log_warning_errno(err, "Journal file %s corrupted, ignoring file.", path); + break; + + default: + log_warning_errno(err, "An error was encountered while opening journal file or directory %s, ignoring file: %m", path); + break; + } + } + + return r; +} + +bool journal_field_valid(const char *p, size_t l, bool allow_protected) { + const char *a; + + /* We kinda enforce POSIX syntax recommendations for + environment variables here, but make a couple of additional + requirements. + + http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html */ + + if (l == (size_t) -1) + l = strlen(p); + + /* No empty field names */ + if (l <= 0) + return false; + + /* Don't allow names longer than 64 chars */ + if (l > 64) + return false; + + /* Variables starting with an underscore are protected */ + if (!allow_protected && p[0] == '_') + return false; + + /* Don't allow digits as first character */ + if (p[0] >= '0' && p[0] <= '9') + return false; + + /* Only allow A-Z0-9 and '_' */ + for (a = p; a < p + l; a++) + if ((*a < 'A' || *a > 'Z') && + (*a < '0' || *a > '9') && + *a != '_') + return false; + + return true; +} diff --git a/src/shared/journal-util.h b/src/shared/journal-util.h new file mode 100644 index 0000000..da86434 --- /dev/null +++ b/src/shared/journal-util.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <sys/types.h> + +#include "sd-journal.h" + +bool journal_field_valid(const char *p, size_t l, bool allow_protected); + +int journal_access_check_and_warn(sd_journal *j, bool quiet, bool want_other_users); diff --git a/src/shared/json-internal.h b/src/shared/json-internal.h new file mode 100644 index 0000000..bf158bf --- /dev/null +++ b/src/shared/json-internal.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#pragma once + +#include "json.h" + +/* This header should include all prototypes only the JSON parser itself and + * its tests need access to. Normal code consuming the JSON parser should not + * interface with this. */ + +typedef union JsonValue { + /* Encodes a simple value. On x86-64 this structure is 16 bytes wide (as long double is 128bit). */ + bool boolean; + long double real; + intmax_t integer; + uintmax_t unsig; +} JsonValue; + +/* Let's protect us against accidental structure size changes on our most relevant arch */ +#ifdef __x86_64__ +assert_cc(sizeof(JsonValue) == 16U); +#endif + +#define JSON_VALUE_NULL ((JsonValue) {}) + +/* We use fake JsonVariant objects for some special values, in order to avoid memory allocations for them. Note that + * effectively this means that there are multiple ways to encode the same objects: via these magic values or as + * properly allocated JsonVariant. We convert between both on-the-fly as necessary. */ +#define JSON_VARIANT_MAGIC_TRUE ((JsonVariant*) 1) +#define JSON_VARIANT_MAGIC_FALSE ((JsonVariant*) 2) +#define JSON_VARIANT_MAGIC_NULL ((JsonVariant*) 3) +#define JSON_VARIANT_MAGIC_ZERO_INTEGER ((JsonVariant*) 4) +#define JSON_VARIANT_MAGIC_ZERO_UNSIGNED ((JsonVariant*) 5) +#define JSON_VARIANT_MAGIC_ZERO_REAL ((JsonVariant*) 6) +#define JSON_VARIANT_MAGIC_EMPTY_STRING ((JsonVariant*) 7) +#define JSON_VARIANT_MAGIC_EMPTY_ARRAY ((JsonVariant*) 8) +#define JSON_VARIANT_MAGIC_EMPTY_OBJECT ((JsonVariant*) 9) +#define _JSON_VARIANT_MAGIC_MAX ((JsonVariant*) 10) + +/* This is only safe as long as we don't define more than 4K magic pointers, i.e. the page size of the simplest + * architectures we support. That's because we rely on the fact that malloc() will never allocate from the first memory + * page, as it is a faulting page for catching NULL pointer dereferences. */ +assert_cc((uintptr_t) _JSON_VARIANT_MAGIC_MAX < 4096U); + +enum { /* JSON tokens */ + JSON_TOKEN_END, + JSON_TOKEN_COLON, + JSON_TOKEN_COMMA, + JSON_TOKEN_OBJECT_OPEN, + JSON_TOKEN_OBJECT_CLOSE, + JSON_TOKEN_ARRAY_OPEN, + JSON_TOKEN_ARRAY_CLOSE, + JSON_TOKEN_STRING, + JSON_TOKEN_REAL, + JSON_TOKEN_INTEGER, + JSON_TOKEN_UNSIGNED, + JSON_TOKEN_BOOLEAN, + JSON_TOKEN_NULL, + _JSON_TOKEN_MAX, + _JSON_TOKEN_INVALID = -1, +}; + +int json_tokenize(const char **p, char **ret_string, JsonValue *ret_value, unsigned *ret_line, unsigned *ret_column, void **state, unsigned *line, unsigned *column); diff --git a/src/shared/json.c b/src/shared/json.c new file mode 100644 index 0000000..3786ff1 --- /dev/null +++ b/src/shared/json.c @@ -0,0 +1,3480 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <math.h> +#include <stdarg.h> +#include <stdio_ext.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> + +#include "sd-messages.h" + +#include "alloc-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "float.h" +#include "hexdecoct.h" +#include "json-internal.h" +#include "json.h" +#include "macro.h" +#include "string-table.h" +#include "string-util.h" +#include "strv.h" +#include "terminal-util.h" +#include "utf8.h" + +/* Refuse putting together variants with a larger depth than 4K by default (as a protection against overflowing stacks + * if code processes JSON objects recursively. Note that we store the depth in an uint16_t, hence make sure this + * remains under 2^16. + * The value was 16k, but it was discovered to be too high on llvm/x86-64. See also the issue #10738. */ +#define DEPTH_MAX (4U*1024U) +assert_cc(DEPTH_MAX <= UINT16_MAX); + +typedef struct JsonSource { + /* When we parse from a file or similar, encodes the filename, to indicate the source of a json variant */ + size_t n_ref; + unsigned max_line; + unsigned max_column; + char name[]; +} JsonSource; + +/* On x86-64 this whole structure should have a size of 6 * 64 bit = 48 bytes */ +struct JsonVariant { + union { + /* We either maintain a reference counter for this variant itself, or we are embedded into an + * array/object, in which case only that surrounding object is ref-counted. (If 'embedded' is false, + * see below.) */ + size_t n_ref; + + /* If this JsonVariant is part of an array/object, then this field points to the surrounding + * JSON_VARIANT_ARRAY/JSON_VARIANT_OBJECT object. (If 'embedded' is true, see below.) */ + JsonVariant *parent; + }; + + /* If this was parsed from some file or buffer, this stores where from, as well as the source line/column */ + JsonSource *source; + unsigned line, column; + + JsonVariantType type:5; + + /* A marker whether this variant is embedded into in array/object or not. If true, the 'parent' pointer above + * is valid. If false, the 'n_ref' field above is valid instead. */ + bool is_embedded:1; + + /* In some conditions (for example, if this object is part of an array of strings or objects), we don't store + * any data inline, but instead simply reference an external object and act as surrogate of it. In that case + * this bool is set, and the external object is referenced through the .reference field below. */ + bool is_reference:1; + + /* While comparing two arrays, we use this for marking what we already have seen */ + bool is_marked:1; + + /* The current 'depth' of the JsonVariant, i.e. how many levels of member variants this has */ + uint16_t depth; + + union { + /* For simple types we store the value in-line. */ + JsonValue value; + + /* For objects and arrays we store the number of elements immediately following */ + size_t n_elements; + + /* If is_reference as indicated above is set, this is where the reference object is actually stored. */ + JsonVariant *reference; + + /* Strings are placed immediately after the structure. Note that when this is a JsonVariant embedded + * into an array we might encode strings up to INLINE_STRING_LENGTH characters directly inside the + * element, while longer strings are stored as references. When this object is not embedded into an + * array, but stand-alone we allocate the right size for the whole structure, i.e. the array might be + * much larger than INLINE_STRING_LENGTH. + * + * Note that because we want to allocate arrays of the JsonVariant structure we specify [0] here, + * rather than the prettier []. If we wouldn't, then this char array would have undefined size, and so + * would the union and then the struct this is included in. And of structures with undefined size we + * can't allocate arrays (at least not easily). */ + char string[0]; + }; +}; + +/* Inside string arrays we have a series of JasonVariant structures one after the other. In this case, strings longer + * than INLINE_STRING_MAX are stored as references, and all shorter ones inline. (This means — on x86-64 — strings up + * to 15 chars are stored within the array elements, and all others in separate allocations) */ +#define INLINE_STRING_MAX (sizeof(JsonVariant) - offsetof(JsonVariant, string) - 1U) + +/* Let's make sure this structure isn't increased in size accidentally. This check is only for our most relevant arch + * (x86-64). */ +#ifdef __x86_64__ +assert_cc(sizeof(JsonVariant) == 48U); +assert_cc(INLINE_STRING_MAX == 15U); +#endif + +static JsonSource* json_source_new(const char *name) { + JsonSource *s; + + assert(name); + + s = malloc(offsetof(JsonSource, name) + strlen(name) + 1); + if (!s) + return NULL; + + *s = (JsonSource) { + .n_ref = 1, + }; + strcpy(s->name, name); + + return s; +} + +DEFINE_PRIVATE_TRIVIAL_REF_UNREF_FUNC(JsonSource, json_source, mfree); + +static bool json_source_equal(JsonSource *a, JsonSource *b) { + if (a == b) + return true; + + if (!a || !b) + return false; + + return streq(a->name, b->name); +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(JsonSource*, json_source_unref); + +/* There are four kind of JsonVariant* pointers: + * + * 1. NULL + * 2. A 'regular' one, i.e. pointing to malloc() memory + * 3. A 'magic' one, i.e. one of the special JSON_VARIANT_MAGIC_XYZ values, that encode a few very basic values directly in the pointer. + * 4. A 'const string' one, i.e. a pointer to a const string. + * + * The four kinds of pointers can be discerned like this: + * + * Detecting #1 is easy, just compare with NULL. Detecting #3 is similarly easy: all magic pointers are below + * _JSON_VARIANT_MAGIC_MAX (which is pretty low, within the first memory page, which is special on Linux and other + * OSes, as it is a faulting page). In order to discern #2 and #4 we check the lowest bit. If it's off it's #2, + * otherwise #4. This makes use of the fact that malloc() will return "maximum aligned" memory, which definitely + * means the pointer is even. This means we can use the uneven pointers to reference static strings, as long as we + * make sure that all static strings used like this are aligned to 2 (or higher), and that we mask the bit on + * access. The JSON_VARIANT_STRING_CONST() macro encodes strings as JsonVariant* pointers, with the bit set. */ + +static bool json_variant_is_magic(const JsonVariant *v) { + if (!v) + return false; + + return v < _JSON_VARIANT_MAGIC_MAX; +} + +static bool json_variant_is_const_string(const JsonVariant *v) { + + if (v < _JSON_VARIANT_MAGIC_MAX) + return false; + + /* A proper JsonVariant is aligned to whatever malloc() aligns things too, which is definitely not uneven. We + * hence use all uneven pointers as indicators for const strings. */ + + return (((uintptr_t) v) & 1) != 0; +} + +static bool json_variant_is_regular(const JsonVariant *v) { + + if (v < _JSON_VARIANT_MAGIC_MAX) + return false; + + return (((uintptr_t) v) & 1) == 0; +} + +static JsonVariant *json_variant_dereference(JsonVariant *v) { + + /* Recursively dereference variants that are references to other variants */ + + if (!v) + return NULL; + + if (!json_variant_is_regular(v)) + return v; + + if (!v->is_reference) + return v; + + return json_variant_dereference(v->reference); +} + +static uint16_t json_variant_depth(JsonVariant *v) { + + v = json_variant_dereference(v); + if (!v) + return 0; + + if (!json_variant_is_regular(v)) + return 0; + + return v->depth; +} + +static JsonVariant *json_variant_normalize(JsonVariant *v) { + + /* Converts json variants to their normalized form, i.e. fully dereferenced and wherever possible converted to + * the "magic" version if there is one */ + + if (!v) + return NULL; + + v = json_variant_dereference(v); + + switch (json_variant_type(v)) { + + case JSON_VARIANT_BOOLEAN: + return json_variant_boolean(v) ? JSON_VARIANT_MAGIC_TRUE : JSON_VARIANT_MAGIC_FALSE; + + case JSON_VARIANT_NULL: + return JSON_VARIANT_MAGIC_NULL; + + case JSON_VARIANT_INTEGER: + return json_variant_integer(v) == 0 ? JSON_VARIANT_MAGIC_ZERO_INTEGER : v; + + case JSON_VARIANT_UNSIGNED: + return json_variant_unsigned(v) == 0 ? JSON_VARIANT_MAGIC_ZERO_UNSIGNED : v; + + case JSON_VARIANT_REAL: +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wfloat-equal" + return json_variant_real(v) == 0.0 ? JSON_VARIANT_MAGIC_ZERO_REAL : v; +#pragma GCC diagnostic pop + + case JSON_VARIANT_STRING: + return isempty(json_variant_string(v)) ? JSON_VARIANT_MAGIC_EMPTY_STRING : v; + + case JSON_VARIANT_ARRAY: + return json_variant_elements(v) == 0 ? JSON_VARIANT_MAGIC_EMPTY_ARRAY : v; + + case JSON_VARIANT_OBJECT: + return json_variant_elements(v) == 0 ? JSON_VARIANT_MAGIC_EMPTY_OBJECT : v; + + default: + return v; + } +} + +static JsonVariant *json_variant_conservative_normalize(JsonVariant *v) { + + /* Much like json_variant_normalize(), but won't simplify if the variant has a source/line location attached to + * it, in order not to lose context */ + + if (!v) + return NULL; + + if (!json_variant_is_regular(v)) + return v; + + if (v->source || v->line > 0 || v->column > 0) + return v; + + return json_variant_normalize(v); +} + +static int json_variant_new(JsonVariant **ret, JsonVariantType type, size_t space) { + JsonVariant *v; + + assert_return(ret, -EINVAL); + + v = malloc0(offsetof(JsonVariant, value) + space); + if (!v) + return -ENOMEM; + + v->n_ref = 1; + v->type = type; + + *ret = v; + return 0; +} + +int json_variant_new_integer(JsonVariant **ret, intmax_t i) { + JsonVariant *v; + int r; + + assert_return(ret, -EINVAL); + + if (i == 0) { + *ret = JSON_VARIANT_MAGIC_ZERO_INTEGER; + return 0; + } + + r = json_variant_new(&v, JSON_VARIANT_INTEGER, sizeof(i)); + if (r < 0) + return r; + + v->value.integer = i; + *ret = v; + + return 0; +} + +int json_variant_new_unsigned(JsonVariant **ret, uintmax_t u) { + JsonVariant *v; + int r; + + assert_return(ret, -EINVAL); + if (u == 0) { + *ret = JSON_VARIANT_MAGIC_ZERO_UNSIGNED; + return 0; + } + + r = json_variant_new(&v, JSON_VARIANT_UNSIGNED, sizeof(u)); + if (r < 0) + return r; + + v->value.unsig = u; + *ret = v; + + return 0; +} + +int json_variant_new_real(JsonVariant **ret, long double d) { + JsonVariant *v; + int r; + + assert_return(ret, -EINVAL); + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wfloat-equal" + if (d == 0.0) { +#pragma GCC diagnostic pop + *ret = JSON_VARIANT_MAGIC_ZERO_REAL; + return 0; + } + + r = json_variant_new(&v, JSON_VARIANT_REAL, sizeof(d)); + if (r < 0) + return r; + + v->value.real = d; + *ret = v; + + return 0; +} + +int json_variant_new_boolean(JsonVariant **ret, bool b) { + assert_return(ret, -EINVAL); + + if (b) + *ret = JSON_VARIANT_MAGIC_TRUE; + else + *ret = JSON_VARIANT_MAGIC_FALSE; + + return 0; +} + +int json_variant_new_null(JsonVariant **ret) { + assert_return(ret, -EINVAL); + + *ret = JSON_VARIANT_MAGIC_NULL; + return 0; +} + +int json_variant_new_stringn(JsonVariant **ret, const char *s, size_t n) { + JsonVariant *v; + int r; + + assert_return(ret, -EINVAL); + if (!s) { + assert_return(n == 0, -EINVAL); + return json_variant_new_null(ret); + } + if (n == 0) { + *ret = JSON_VARIANT_MAGIC_EMPTY_STRING; + return 0; + } + + r = json_variant_new(&v, JSON_VARIANT_STRING, n + 1); + if (r < 0) + return r; + + memcpy(v->string, s, n); + v->string[n] = 0; + + *ret = v; + return 0; +} + +static void json_variant_set(JsonVariant *a, JsonVariant *b) { + assert(a); + + b = json_variant_dereference(b); + if (!b) { + a->type = JSON_VARIANT_NULL; + return; + } + + a->type = json_variant_type(b); + switch (a->type) { + + case JSON_VARIANT_INTEGER: + a->value.integer = json_variant_integer(b); + break; + + case JSON_VARIANT_UNSIGNED: + a->value.unsig = json_variant_unsigned(b); + break; + + case JSON_VARIANT_REAL: + a->value.real = json_variant_real(b); + break; + + case JSON_VARIANT_BOOLEAN: + a->value.boolean = json_variant_boolean(b); + break; + + case JSON_VARIANT_STRING: { + const char *s; + + assert_se(s = json_variant_string(b)); + + /* Short strings we can store inline */ + if (strnlen(s, INLINE_STRING_MAX+1) <= INLINE_STRING_MAX) { + strcpy(a->string, s); + break; + } + + /* For longer strings, use a reference… */ + _fallthrough_; + } + + case JSON_VARIANT_ARRAY: + case JSON_VARIANT_OBJECT: + a->is_reference = true; + a->reference = json_variant_ref(json_variant_conservative_normalize(b)); + break; + + case JSON_VARIANT_NULL: + break; + + default: + assert_not_reached("Unexpected variant type"); + } +} + +static void json_variant_copy_source(JsonVariant *v, JsonVariant *from) { + assert(v); + assert(from); + + if (!json_variant_is_regular(from)) + return; + + v->line = from->line; + v->column = from->column; + v->source = json_source_ref(from->source); +} + +int json_variant_new_array(JsonVariant **ret, JsonVariant **array, size_t n) { + _cleanup_(json_variant_unrefp) JsonVariant *v = NULL; + + assert_return(ret, -EINVAL); + if (n == 0) { + *ret = JSON_VARIANT_MAGIC_EMPTY_ARRAY; + return 0; + } + assert_return(array, -EINVAL); + + v = new(JsonVariant, n + 1); + if (!v) + return -ENOMEM; + + *v = (JsonVariant) { + .n_ref = 1, + .type = JSON_VARIANT_ARRAY, + }; + + for (v->n_elements = 0; v->n_elements < n; v->n_elements++) { + JsonVariant *w = v + 1 + v->n_elements, + *c = array[v->n_elements]; + uint16_t d; + + d = json_variant_depth(c); + if (d >= DEPTH_MAX) /* Refuse too deep nesting */ + return -ELNRNG; + if (d >= v->depth) + v->depth = d + 1; + + *w = (JsonVariant) { + .is_embedded = true, + .parent = v, + }; + + json_variant_set(w, c); + json_variant_copy_source(w, c); + } + + *ret = TAKE_PTR(v); + return 0; +} + +int json_variant_new_array_bytes(JsonVariant **ret, const void *p, size_t n) { + JsonVariant *v; + size_t i; + + assert_return(ret, -EINVAL); + if (n == 0) { + *ret = JSON_VARIANT_MAGIC_EMPTY_ARRAY; + return 0; + } + assert_return(p, -EINVAL); + + v = new(JsonVariant, n + 1); + if (!v) + return -ENOMEM; + + *v = (JsonVariant) { + .n_ref = 1, + .type = JSON_VARIANT_ARRAY, + .n_elements = n, + .depth = 1, + }; + + for (i = 0; i < n; i++) { + JsonVariant *w = v + 1 + i; + + *w = (JsonVariant) { + .is_embedded = true, + .parent = v, + .type = JSON_VARIANT_UNSIGNED, + .value.unsig = ((const uint8_t*) p)[i], + }; + } + + *ret = v; + return 0; +} + +int json_variant_new_array_strv(JsonVariant **ret, char **l) { + _cleanup_(json_variant_unrefp) JsonVariant *v = NULL; + size_t n; + int r; + + assert(ret); + + n = strv_length(l); + if (n == 0) { + *ret = JSON_VARIANT_MAGIC_EMPTY_ARRAY; + return 0; + } + + v = new(JsonVariant, n + 1); + if (!v) + return -ENOMEM; + + *v = (JsonVariant) { + .n_ref = 1, + .type = JSON_VARIANT_ARRAY, + .depth = 1, + }; + + for (v->n_elements = 0; v->n_elements < n; v->n_elements++) { + JsonVariant *w = v + 1 + v->n_elements; + size_t k; + + *w = (JsonVariant) { + .is_embedded = true, + .parent = v, + .type = JSON_VARIANT_STRING, + }; + + k = strlen(l[v->n_elements]); + + if (k > INLINE_STRING_MAX) { + /* If string is too long, store it as reference. */ + + r = json_variant_new_stringn(&w->reference, l[v->n_elements], k); + if (r < 0) + return r; + + w->is_reference = true; + } else + memcpy(w->string, l[v->n_elements], k+1); + } + + *ret = TAKE_PTR(v); + return 0; +} + +int json_variant_new_object(JsonVariant **ret, JsonVariant **array, size_t n) { + _cleanup_(json_variant_unrefp) JsonVariant *v = NULL; + + assert_return(ret, -EINVAL); + if (n == 0) { + *ret = JSON_VARIANT_MAGIC_EMPTY_OBJECT; + return 0; + } + assert_return(array, -EINVAL); + assert_return(n % 2 == 0, -EINVAL); + + v = new(JsonVariant, n + 1); + if (!v) + return -ENOMEM; + + *v = (JsonVariant) { + .n_ref = 1, + .type = JSON_VARIANT_OBJECT, + }; + + for (v->n_elements = 0; v->n_elements < n; v->n_elements++) { + JsonVariant *w = v + 1 + v->n_elements, + *c = array[v->n_elements]; + uint16_t d; + + if ((v->n_elements & 1) == 0 && + !json_variant_is_string(c)) + return -EINVAL; /* Every second one needs to be a string, as it is the key name */ + + d = json_variant_depth(c); + if (d >= DEPTH_MAX) /* Refuse too deep nesting */ + return -ELNRNG; + if (d >= v->depth) + v->depth = d + 1; + + *w = (JsonVariant) { + .is_embedded = true, + .parent = v, + }; + + json_variant_set(w, c); + json_variant_copy_source(w, c); + } + + *ret = TAKE_PTR(v); + return 0; +} + +static void json_variant_free_inner(JsonVariant *v) { + assert(v); + + if (!json_variant_is_regular(v)) + return; + + json_source_unref(v->source); + + if (v->is_reference) { + json_variant_unref(v->reference); + return; + } + + if (IN_SET(v->type, JSON_VARIANT_ARRAY, JSON_VARIANT_OBJECT)) { + size_t i; + + for (i = 0; i < v->n_elements; i++) + json_variant_free_inner(v + 1 + i); + } +} + +JsonVariant *json_variant_ref(JsonVariant *v) { + if (!v) + return NULL; + if (!json_variant_is_regular(v)) + return v; + + if (v->is_embedded) + json_variant_ref(v->parent); /* ref the compounding variant instead */ + else { + assert(v->n_ref > 0); + v->n_ref++; + } + + return v; +} + +JsonVariant *json_variant_unref(JsonVariant *v) { + if (!v) + return NULL; + if (!json_variant_is_regular(v)) + return NULL; + + if (v->is_embedded) + json_variant_unref(v->parent); + else { + assert(v->n_ref > 0); + v->n_ref--; + + if (v->n_ref == 0) { + json_variant_free_inner(v); + free(v); + } + } + + return NULL; +} + +void json_variant_unref_many(JsonVariant **array, size_t n) { + size_t i; + + assert(array || n == 0); + + for (i = 0; i < n; i++) + json_variant_unref(array[i]); +} + +const char *json_variant_string(JsonVariant *v) { + if (!v) + return NULL; + if (v == JSON_VARIANT_MAGIC_EMPTY_STRING) + return ""; + if (json_variant_is_magic(v)) + goto mismatch; + if (json_variant_is_const_string(v)) { + uintptr_t p = (uintptr_t) v; + + assert((p & 1) != 0); + return (const char*) (p ^ 1U); + } + + if (v->is_reference) + return json_variant_string(v->reference); + if (v->type != JSON_VARIANT_STRING) + goto mismatch; + + return v->string; + +mismatch: + log_debug("Non-string JSON variant requested as string, returning NULL."); + return NULL; +} + +bool json_variant_boolean(JsonVariant *v) { + if (!v) + goto mismatch; + if (v == JSON_VARIANT_MAGIC_TRUE) + return true; + if (v == JSON_VARIANT_MAGIC_FALSE) + return false; + if (!json_variant_is_regular(v)) + goto mismatch; + if (v->type != JSON_VARIANT_BOOLEAN) + goto mismatch; + if (v->is_reference) + return json_variant_boolean(v->reference); + + return v->value.boolean; + +mismatch: + log_debug("Non-boolean JSON variant requested as boolean, returning false."); + return false; +} + +intmax_t json_variant_integer(JsonVariant *v) { + if (!v) + goto mismatch; + if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER || + v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED || + v == JSON_VARIANT_MAGIC_ZERO_REAL) + return 0; + if (!json_variant_is_regular(v)) + goto mismatch; + if (v->is_reference) + return json_variant_integer(v->reference); + + switch (v->type) { + + case JSON_VARIANT_INTEGER: + return v->value.integer; + + case JSON_VARIANT_UNSIGNED: + if (v->value.unsig <= INTMAX_MAX) + return (intmax_t) v->value.unsig; + + log_debug("Unsigned integer %ju requested as signed integer and out of range, returning 0.", v->value.unsig); + return 0; + + case JSON_VARIANT_REAL: { + intmax_t converted; + + converted = (intmax_t) v->value.real; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wfloat-equal" + if ((long double) converted == v->value.real) +#pragma GCC diagnostic pop + return converted; + + log_debug("Real %Lg requested as integer, and cannot be converted losslessly, returning 0.", v->value.real); + return 0; + } + + default: + break; + } + +mismatch: + log_debug("Non-integer JSON variant requested as integer, returning 0."); + return 0; +} + +uintmax_t json_variant_unsigned(JsonVariant *v) { + if (!v) + goto mismatch; + if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER || + v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED || + v == JSON_VARIANT_MAGIC_ZERO_REAL) + return 0; + if (!json_variant_is_regular(v)) + goto mismatch; + if (v->is_reference) + return json_variant_integer(v->reference); + + switch (v->type) { + + case JSON_VARIANT_INTEGER: + if (v->value.integer >= 0) + return (uintmax_t) v->value.integer; + + log_debug("Signed integer %ju requested as unsigned integer and out of range, returning 0.", v->value.integer); + return 0; + + case JSON_VARIANT_UNSIGNED: + return v->value.unsig; + + case JSON_VARIANT_REAL: { + uintmax_t converted; + + converted = (uintmax_t) v->value.real; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wfloat-equal" + if ((long double) converted == v->value.real) +#pragma GCC diagnostic pop + return converted; + + log_debug("Real %Lg requested as unsigned integer, and cannot be converted losslessly, returning 0.", v->value.real); + return 0; + } + + default: + break; + } + +mismatch: + log_debug("Non-integer JSON variant requested as unsigned, returning 0."); + return 0; +} + +long double json_variant_real(JsonVariant *v) { + if (!v) + return 0.0; + if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER || + v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED || + v == JSON_VARIANT_MAGIC_ZERO_REAL) + return 0.0; + if (!json_variant_is_regular(v)) + goto mismatch; + if (v->is_reference) + return json_variant_real(v->reference); + + switch (v->type) { + + case JSON_VARIANT_REAL: + return v->value.real; + + case JSON_VARIANT_INTEGER: { + long double converted; + + converted = (long double) v->value.integer; + + if ((intmax_t) converted == v->value.integer) + return converted; + + log_debug("Signed integer %ji requested as real, and cannot be converted losslessly, returning 0.", v->value.integer); + return 0.0; + } + + case JSON_VARIANT_UNSIGNED: { + long double converted; + + converted = (long double) v->value.unsig; + + if ((uintmax_t) converted == v->value.unsig) + return converted; + + log_debug("Unsigned integer %ju requested as real, and cannot be converted losslessly, returning 0.", v->value.unsig); + return 0.0; + } + + default: + break; + } + +mismatch: + log_debug("Non-integer JSON variant requested as integer, returning 0."); + return 0.0; +} + +bool json_variant_is_negative(JsonVariant *v) { + if (!v) + goto mismatch; + if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER || + v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED || + v == JSON_VARIANT_MAGIC_ZERO_REAL) + return false; + if (!json_variant_is_regular(v)) + goto mismatch; + if (v->is_reference) + return json_variant_is_negative(v->reference); + + /* This function is useful as checking whether numbers are negative is pretty complex since we have three types + * of numbers. And some JSON code (OCI for example) uses negative numbers to mark "not defined" numeric + * values. */ + + switch (v->type) { + + case JSON_VARIANT_REAL: + return v->value.real < 0; + + case JSON_VARIANT_INTEGER: + return v->value.integer < 0; + + case JSON_VARIANT_UNSIGNED: + return false; + + default: + break; + } + +mismatch: + log_debug("Non-integer JSON variant tested for negativity, returning false."); + return false; +} + +JsonVariantType json_variant_type(JsonVariant *v) { + + if (!v) + return _JSON_VARIANT_TYPE_INVALID; + + if (json_variant_is_const_string(v)) + return JSON_VARIANT_STRING; + + if (v == JSON_VARIANT_MAGIC_TRUE || v == JSON_VARIANT_MAGIC_FALSE) + return JSON_VARIANT_BOOLEAN; + + if (v == JSON_VARIANT_MAGIC_NULL) + return JSON_VARIANT_NULL; + + if (v == JSON_VARIANT_MAGIC_ZERO_INTEGER) + return JSON_VARIANT_INTEGER; + + if (v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED) + return JSON_VARIANT_UNSIGNED; + + if (v == JSON_VARIANT_MAGIC_ZERO_REAL) + return JSON_VARIANT_REAL; + + if (v == JSON_VARIANT_MAGIC_EMPTY_STRING) + return JSON_VARIANT_STRING; + + if (v == JSON_VARIANT_MAGIC_EMPTY_ARRAY) + return JSON_VARIANT_ARRAY; + + if (v == JSON_VARIANT_MAGIC_EMPTY_OBJECT) + return JSON_VARIANT_OBJECT; + + return v->type; +} + +bool json_variant_has_type(JsonVariant *v, JsonVariantType type) { + JsonVariantType rt; + + v = json_variant_dereference(v); + if (!v) + return false; + + rt = json_variant_type(v); + if (rt == type) + return true; + + /* If it's a const string, then it only can be a string, and if it is not, it's not */ + if (json_variant_is_const_string(v)) + return false; + + /* All three magic zeroes qualify as integer, unsigned and as real */ + if ((v == JSON_VARIANT_MAGIC_ZERO_INTEGER || v == JSON_VARIANT_MAGIC_ZERO_UNSIGNED || v == JSON_VARIANT_MAGIC_ZERO_REAL) && + IN_SET(type, JSON_VARIANT_INTEGER, JSON_VARIANT_UNSIGNED, JSON_VARIANT_REAL, JSON_VARIANT_NUMBER)) + return true; + + /* All other magic variant types are only equal to themselves */ + if (json_variant_is_magic(v)) + return false; + + /* Handle the "number" pseudo type */ + if (type == JSON_VARIANT_NUMBER) + return IN_SET(rt, JSON_VARIANT_INTEGER, JSON_VARIANT_UNSIGNED, JSON_VARIANT_REAL); + + /* Integer conversions are OK in many cases */ + if (rt == JSON_VARIANT_INTEGER && type == JSON_VARIANT_UNSIGNED) + return v->value.integer >= 0; + if (rt == JSON_VARIANT_UNSIGNED && type == JSON_VARIANT_INTEGER) + return v->value.unsig <= INTMAX_MAX; + + /* Any integer that can be converted lossley to a real and back may also be considered a real */ + if (rt == JSON_VARIANT_INTEGER && type == JSON_VARIANT_REAL) + return (intmax_t) (long double) v->value.integer == v->value.integer; + if (rt == JSON_VARIANT_UNSIGNED && type == JSON_VARIANT_REAL) + return (uintmax_t) (long double) v->value.unsig == v->value.unsig; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wfloat-equal" + /* Any real that can be converted losslessly to an integer and back may also be considered an integer */ + if (rt == JSON_VARIANT_REAL && type == JSON_VARIANT_INTEGER) + return (long double) (intmax_t) v->value.real == v->value.real; + if (rt == JSON_VARIANT_REAL && type == JSON_VARIANT_UNSIGNED) + return (long double) (uintmax_t) v->value.real == v->value.real; +#pragma GCC diagnostic pop + + return false; +} + +size_t json_variant_elements(JsonVariant *v) { + if (!v) + return 0; + if (v == JSON_VARIANT_MAGIC_EMPTY_ARRAY || + v == JSON_VARIANT_MAGIC_EMPTY_OBJECT) + return 0; + if (!json_variant_is_regular(v)) + goto mismatch; + if (!IN_SET(v->type, JSON_VARIANT_ARRAY, JSON_VARIANT_OBJECT)) + goto mismatch; + if (v->is_reference) + return json_variant_elements(v->reference); + + return v->n_elements; + +mismatch: + log_debug("Number of elements in non-array/non-object JSON variant requested, returning 0."); + return 0; +} + +JsonVariant *json_variant_by_index(JsonVariant *v, size_t idx) { + if (!v) + return NULL; + if (v == JSON_VARIANT_MAGIC_EMPTY_ARRAY || + v == JSON_VARIANT_MAGIC_EMPTY_OBJECT) + return NULL; + if (!json_variant_is_regular(v)) + goto mismatch; + if (!IN_SET(v->type, JSON_VARIANT_ARRAY, JSON_VARIANT_OBJECT)) + goto mismatch; + if (v->is_reference) + return json_variant_by_index(v->reference, idx); + if (idx >= v->n_elements) + return NULL; + + return json_variant_conservative_normalize(v + 1 + idx); + +mismatch: + log_debug("Element in non-array/non-object JSON variant requested by index, returning NULL."); + return NULL; +} + +JsonVariant *json_variant_by_key_full(JsonVariant *v, const char *key, JsonVariant **ret_key) { + size_t i; + + if (!v) + goto not_found; + if (!key) + goto not_found; + if (v == JSON_VARIANT_MAGIC_EMPTY_OBJECT) + goto not_found; + if (!json_variant_is_regular(v)) + goto mismatch; + if (v->type != JSON_VARIANT_OBJECT) + goto mismatch; + if (v->is_reference) + return json_variant_by_key(v->reference, key); + + for (i = 0; i < v->n_elements; i += 2) { + JsonVariant *p; + + p = json_variant_dereference(v + 1 + i); + + if (!json_variant_has_type(p, JSON_VARIANT_STRING)) + continue; + + if (streq(json_variant_string(p), key)) { + + if (ret_key) + *ret_key = json_variant_conservative_normalize(v + 1 + i); + + return json_variant_conservative_normalize(v + 1 + i + 1); + } + } + +not_found: + if (ret_key) + *ret_key = NULL; + + return NULL; + +mismatch: + log_debug("Element in non-object JSON variant requested by key, returning NULL."); + if (ret_key) + *ret_key = NULL; + + return NULL; +} + +JsonVariant *json_variant_by_key(JsonVariant *v, const char *key) { + return json_variant_by_key_full(v, key, NULL); +} + +bool json_variant_equal(JsonVariant *a, JsonVariant *b) { + JsonVariantType t; + + a = json_variant_normalize(a); + b = json_variant_normalize(b); + + if (a == b) + return true; + + t = json_variant_type(a); + if (!json_variant_has_type(b, t)) + return false; + + switch (t) { + + case JSON_VARIANT_STRING: + return streq(json_variant_string(a), json_variant_string(b)); + + case JSON_VARIANT_INTEGER: + return json_variant_integer(a) == json_variant_integer(b); + + case JSON_VARIANT_UNSIGNED: + return json_variant_unsigned(a) == json_variant_unsigned(b); + + case JSON_VARIANT_REAL: +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wfloat-equal" + return json_variant_real(a) == json_variant_real(b); +#pragma GCC diagnostic pop + + case JSON_VARIANT_BOOLEAN: + return json_variant_boolean(a) == json_variant_boolean(b); + + case JSON_VARIANT_NULL: + return true; + + case JSON_VARIANT_ARRAY: { + size_t i, n; + + n = json_variant_elements(a); + if (n != json_variant_elements(b)) + return false; + + for (i = 0; i < n; i++) { + if (!json_variant_equal(json_variant_by_index(a, i), json_variant_by_index(b, i))) + return false; + } + + return true; + } + + case JSON_VARIANT_OBJECT: { + size_t i, n; + + n = json_variant_elements(a); + if (n != json_variant_elements(b)) + return false; + + /* Iterate through all keys in 'a' */ + for (i = 0; i < n; i += 2) { + bool found = false; + size_t j; + + /* Match them against all keys in 'b' */ + for (j = 0; j < n; j += 2) { + JsonVariant *key_b; + + key_b = json_variant_by_index(b, j); + + /* During the first iteration unmark everything */ + if (i == 0) + key_b->is_marked = false; + else if (key_b->is_marked) /* In later iterations if we already marked something, don't bother with it again */ + continue; + + if (found) + continue; + + if (json_variant_equal(json_variant_by_index(a, i), key_b) && + json_variant_equal(json_variant_by_index(a, i+1), json_variant_by_index(b, j+1))) { + /* Key and values match! */ + key_b->is_marked = found = true; + + /* In the first iteration we continue the inner loop since we want to mark + * everything, otherwise exit the loop quickly after we found what we were + * looking for. */ + if (i != 0) + break; + } + } + + if (!found) + return false; + } + + return true; + } + + default: + assert_not_reached("Unknown variant type."); + } +} + +int json_variant_get_source(JsonVariant *v, const char **ret_source, unsigned *ret_line, unsigned *ret_column) { + assert_return(v, -EINVAL); + + if (ret_source) + *ret_source = json_variant_is_regular(v) && v->source ? v->source->name : NULL; + + if (ret_line) + *ret_line = json_variant_is_regular(v) ? v->line : 0; + + if (ret_column) + *ret_column = json_variant_is_regular(v) ? v->column : 0; + + return 0; +} + +static int print_source(FILE *f, JsonVariant *v, JsonFormatFlags flags, bool whitespace) { + size_t w, k; + + if (!FLAGS_SET(flags, JSON_FORMAT_SOURCE|JSON_FORMAT_PRETTY)) + return 0; + + if (!json_variant_is_regular(v)) + return 0; + + if (!v->source && v->line == 0 && v->column == 0) + return 0; + + /* The max width we need to format the line numbers for this source file */ + w = (v->source && v->source->max_line > 0) ? + DECIMAL_STR_WIDTH(v->source->max_line) : + DECIMAL_STR_MAX(unsigned)-1; + k = (v->source && v->source->max_column > 0) ? + DECIMAL_STR_WIDTH(v->source->max_column) : + DECIMAL_STR_MAX(unsigned) -1; + + if (whitespace) { + size_t i, n; + + n = 1 + (v->source ? strlen(v->source->name) : 0) + + ((v->source && (v->line > 0 || v->column > 0)) ? 1 : 0) + + (v->line > 0 ? w : 0) + + (((v->source || v->line > 0) && v->column > 0) ? 1 : 0) + + (v->column > 0 ? k : 0) + + 2; + + for (i = 0; i < n; i++) + fputc(' ', f); + } else { + fputc('[', f); + + if (v->source) + fputs(v->source->name, f); + if (v->source && (v->line > 0 || v->column > 0)) + fputc(':', f); + if (v->line > 0) + fprintf(f, "%*u", (int) w, v->line); + if ((v->source || v->line > 0) || v->column > 0) + fputc(':', f); + if (v->column > 0) + fprintf(f, "%*u", (int) k, v->column); + + fputc(']', f); + fputc(' ', f); + } + + return 0; +} + +static int json_format(FILE *f, JsonVariant *v, JsonFormatFlags flags, const char *prefix) { + int r; + + assert(f); + assert(v); + + switch (json_variant_type(v)) { + + case JSON_VARIANT_REAL: { + locale_t loc; + + loc = newlocale(LC_NUMERIC_MASK, "C", (locale_t) 0); + if (loc == (locale_t) 0) + return -errno; + + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_HIGHLIGHT_BLUE, f); + + fprintf(f, "%.*Le", DECIMAL_DIG, json_variant_real(v)); + + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_NORMAL, f); + + freelocale(loc); + break; + } + + case JSON_VARIANT_INTEGER: + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_HIGHLIGHT_BLUE, f); + + fprintf(f, "%" PRIdMAX, json_variant_integer(v)); + + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_NORMAL, f); + break; + + case JSON_VARIANT_UNSIGNED: + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_HIGHLIGHT_BLUE, f); + + fprintf(f, "%" PRIuMAX, json_variant_unsigned(v)); + + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_NORMAL, f); + break; + + case JSON_VARIANT_BOOLEAN: + + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_HIGHLIGHT, f); + + if (json_variant_boolean(v)) + fputs("true", f); + else + fputs("false", f); + + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_NORMAL, f); + + break; + + case JSON_VARIANT_NULL: + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_HIGHLIGHT, f); + + fputs("null", f); + + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_NORMAL, f); + break; + + case JSON_VARIANT_STRING: { + const char *q; + + fputc('"', f); + + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_GREEN, f); + + for (q = json_variant_string(v); *q; q++) { + + switch (*q) { + + case '"': + fputs("\\\"", f); + break; + + case '\\': + fputs("\\\\", f); + break; + + case '\b': + fputs("\\b", f); + break; + + case '\f': + fputs("\\f", f); + break; + + case '\n': + fputs("\\n", f); + break; + + case '\r': + fputs("\\r", f); + break; + + case '\t': + fputs("\\t", f); + break; + + default: + if ((signed char) *q >= 0 && *q < ' ') + fprintf(f, "\\u%04x", *q); + else + fputc(*q, f); + break; + } + } + + if (flags & JSON_FORMAT_COLOR) + fputs(ANSI_NORMAL, f); + + fputc('"', f); + break; + } + + case JSON_VARIANT_ARRAY: { + size_t i, n; + + n = json_variant_elements(v); + + if (n == 0) + fputs("[]", f); + else { + _cleanup_free_ char *joined = NULL; + const char *prefix2; + + if (flags & JSON_FORMAT_PRETTY) { + joined = strjoin(strempty(prefix), "\t"); + if (!joined) + return -ENOMEM; + + prefix2 = joined; + fputs("[\n", f); + } else { + prefix2 = strempty(prefix); + fputc('[', f); + } + + for (i = 0; i < n; i++) { + JsonVariant *e; + + assert_se(e = json_variant_by_index(v, i)); + + if (i > 0) { + if (flags & JSON_FORMAT_PRETTY) + fputs(",\n", f); + else + fputc(',', f); + } + + if (flags & JSON_FORMAT_PRETTY) { + print_source(f, e, flags, false); + fputs(prefix2, f); + } + + r = json_format(f, e, flags, prefix2); + if (r < 0) + return r; + } + + if (flags & JSON_FORMAT_PRETTY) { + fputc('\n', f); + print_source(f, v, flags, true); + fputs(strempty(prefix), f); + } + + fputc(']', f); + } + break; + } + + case JSON_VARIANT_OBJECT: { + size_t i, n; + + n = json_variant_elements(v); + + if (n == 0) + fputs("{}", f); + else { + _cleanup_free_ char *joined = NULL; + const char *prefix2; + + if (flags & JSON_FORMAT_PRETTY) { + joined = strjoin(strempty(prefix), "\t"); + if (!joined) + return -ENOMEM; + + prefix2 = joined; + fputs("{\n", f); + } else { + prefix2 = strempty(prefix); + fputc('{', f); + } + + for (i = 0; i < n; i += 2) { + JsonVariant *e; + + e = json_variant_by_index(v, i); + + if (i > 0) { + if (flags & JSON_FORMAT_PRETTY) + fputs(",\n", f); + else + fputc(',', f); + } + + if (flags & JSON_FORMAT_PRETTY) { + print_source(f, e, flags, false); + fputs(prefix2, f); + } + + r = json_format(f, e, flags, prefix2); + if (r < 0) + return r; + + fputs(flags & JSON_FORMAT_PRETTY ? " : " : ":", f); + + r = json_format(f, json_variant_by_index(v, i+1), flags, prefix2); + if (r < 0) + return r; + } + + if (flags & JSON_FORMAT_PRETTY) { + fputc('\n', f); + print_source(f, v, flags, true); + fputs(strempty(prefix), f); + } + + fputc('}', f); + } + break; + } + + default: + assert_not_reached("Unexpected variant type."); + } + + return 0; +} + +int json_variant_format(JsonVariant *v, JsonFormatFlags flags, char **ret) { + _cleanup_free_ char *s = NULL; + size_t sz = 0; + int r; + + assert_return(v, -EINVAL); + assert_return(ret, -EINVAL); + + { + _cleanup_fclose_ FILE *f = NULL; + + f = open_memstream(&s, &sz); + if (!f) + return -ENOMEM; + + (void) __fsetlocking(f, FSETLOCKING_BYCALLER); + + json_variant_dump(v, flags, f, NULL); + + r = fflush_and_check(f); + } + if (r < 0) + return r; + + assert(s); + *ret = TAKE_PTR(s); + + return (int) sz; +} + +void json_variant_dump(JsonVariant *v, JsonFormatFlags flags, FILE *f, const char *prefix) { + if (!v) + return; + + if (!f) + f = stdout; + + print_source(f, v, flags, false); + + if (((flags & (JSON_FORMAT_COLOR_AUTO|JSON_FORMAT_COLOR)) == JSON_FORMAT_COLOR_AUTO) && colors_enabled()) + flags |= JSON_FORMAT_COLOR; + + if (flags & JSON_FORMAT_SSE) + fputs("data: ", f); + if (flags & JSON_FORMAT_SEQ) + fputc('\x1e', f); /* ASCII Record Separator */ + + json_format(f, v, flags, prefix); + + if (flags & (JSON_FORMAT_PRETTY|JSON_FORMAT_SEQ|JSON_FORMAT_SSE|JSON_FORMAT_NEWLINE)) + fputc('\n', f); + if (flags & JSON_FORMAT_SSE) + fputc('\n', f); /* In case of SSE add a second newline */ +} + +static int json_variant_copy(JsonVariant **nv, JsonVariant *v) { + JsonVariantType t; + JsonVariant *c; + JsonValue value; + const void *source; + size_t k; + + assert(nv); + assert(v); + + /* Let's copy the simple types literally, and the larger types by references */ + t = json_variant_type(v); + switch (t) { + case JSON_VARIANT_INTEGER: + k = sizeof(intmax_t); + value.integer = json_variant_integer(v); + source = &value; + break; + + case JSON_VARIANT_UNSIGNED: + k = sizeof(uintmax_t); + value.unsig = json_variant_unsigned(v); + source = &value; + break; + + case JSON_VARIANT_REAL: + k = sizeof(long double); + value.real = json_variant_real(v); + source = &value; + break; + + case JSON_VARIANT_BOOLEAN: + k = sizeof(bool); + value.boolean = json_variant_boolean(v); + source = &value; + break; + + case JSON_VARIANT_NULL: + k = 0; + source = NULL; + break; + + case JSON_VARIANT_STRING: + source = json_variant_string(v); + k = strnlen(source, INLINE_STRING_MAX + 1); + if (k <= INLINE_STRING_MAX) { + k ++; + break; + } + + _fallthrough_; + + default: + /* Everything else copy by reference */ + + c = malloc0(offsetof(JsonVariant, reference) + sizeof(JsonVariant*)); + if (!c) + return -ENOMEM; + + c->n_ref = 1; + c->type = t; + c->is_reference = true; + c->reference = json_variant_ref(json_variant_normalize(v)); + + *nv = c; + return 0; + } + + c = malloc0(offsetof(JsonVariant, value) + k); + if (!c) + return -ENOMEM; + + c->n_ref = 1; + c->type = t; + + memcpy_safe(&c->value, source, k); + + *nv = c; + return 0; +} + +static bool json_single_ref(JsonVariant *v) { + + /* Checks whether the caller is the single owner of the object, i.e. can get away with changing it */ + + if (!json_variant_is_regular(v)) + return false; + + if (v->is_embedded) + return json_single_ref(v->parent); + + assert(v->n_ref > 0); + return v->n_ref == 1; +} + +static int json_variant_set_source(JsonVariant **v, JsonSource *source, unsigned line, unsigned column) { + JsonVariant *w; + int r; + + assert(v); + + /* Patch in source and line/column number. Tries to do this in-place if the caller is the sole referencer of + * the object. If not, allocates a new object, possibly a surrogate for the original one */ + + if (!*v) + return 0; + + if (source && line > source->max_line) + source->max_line = line; + if (source && column > source->max_column) + source->max_column = column; + + if (!json_variant_is_regular(*v)) { + + if (!source && line == 0 && column == 0) + return 0; + + } else { + if (json_source_equal((*v)->source, source) && + (*v)->line == line && + (*v)->column == column) + return 0; + + if (json_single_ref(*v)) { /* Sole reference? */ + json_source_unref((*v)->source); + (*v)->source = json_source_ref(source); + (*v)->line = line; + (*v)->column = column; + return 1; + } + } + + r = json_variant_copy(&w, *v); + if (r < 0) + return r; + + assert(json_variant_is_regular(w)); + assert(!w->is_embedded); + assert(w->n_ref == 1); + assert(!w->source); + + w->source = json_source_ref(source); + w->line = line; + w->column = column; + + json_variant_unref(*v); + *v = w; + + return 1; +} + +static void inc_lines_columns(unsigned *line, unsigned *column, const char *s, size_t n) { + assert(line); + assert(column); + assert(s || n == 0); + + while (n > 0) { + + if (*s == '\n') { + (*line)++; + *column = 1; + } else if ((signed char) *s >= 0 && *s < 127) /* Process ASCII chars quickly */ + (*column)++; + else { + int w; + + w = utf8_encoded_valid_unichar(s); + if (w < 0) /* count invalid unichars as normal characters */ + w = 1; + else if ((size_t) w > n) /* never read more than the specified number of characters */ + w = (int) n; + + (*column)++; + + s += w; + n -= w; + continue; + } + + s++; + n--; + } +} + +static int unhex_ucs2(const char *c, uint16_t *ret) { + int aa, bb, cc, dd; + uint16_t x; + + assert(c); + assert(ret); + + aa = unhexchar(c[0]); + if (aa < 0) + return -EINVAL; + + bb = unhexchar(c[1]); + if (bb < 0) + return -EINVAL; + + cc = unhexchar(c[2]); + if (cc < 0) + return -EINVAL; + + dd = unhexchar(c[3]); + if (dd < 0) + return -EINVAL; + + x = ((uint16_t) aa << 12) | + ((uint16_t) bb << 8) | + ((uint16_t) cc << 4) | + ((uint16_t) dd); + + if (x <= 0) + return -EINVAL; + + *ret = x; + + return 0; +} + +static int json_parse_string(const char **p, char **ret) { + _cleanup_free_ char *s = NULL; + size_t n = 0, allocated = 0; + const char *c; + + assert(p); + assert(*p); + assert(ret); + + c = *p; + + if (*c != '"') + return -EINVAL; + + c++; + + for (;;) { + int len; + + /* Check for EOF */ + if (*c == 0) + return -EINVAL; + + /* Check for control characters 0x00..0x1f */ + if (*c > 0 && *c < ' ') + return -EINVAL; + + /* Check for control character 0x7f */ + if (*c == 0x7f) + return -EINVAL; + + if (*c == '"') { + if (!s) { + s = strdup(""); + if (!s) + return -ENOMEM; + } else + s[n] = 0; + + *p = c + 1; + + *ret = TAKE_PTR(s); + return JSON_TOKEN_STRING; + } + + if (*c == '\\') { + char ch = 0; + c++; + + if (*c == 0) + return -EINVAL; + + if (IN_SET(*c, '"', '\\', '/')) + ch = *c; + else if (*c == 'b') + ch = '\b'; + else if (*c == 'f') + ch = '\f'; + else if (*c == 'n') + ch = '\n'; + else if (*c == 'r') + ch = '\r'; + else if (*c == 't') + ch = '\t'; + else if (*c == 'u') { + char16_t x; + int r; + + r = unhex_ucs2(c + 1, &x); + if (r < 0) + return r; + + c += 5; + + if (!GREEDY_REALLOC(s, allocated, n + 5)) + return -ENOMEM; + + if (!utf16_is_surrogate(x)) + n += utf8_encode_unichar(s + n, (char32_t) x); + else if (utf16_is_trailing_surrogate(x)) + return -EINVAL; + else { + char16_t y; + + if (c[0] != '\\' || c[1] != 'u') + return -EINVAL; + + r = unhex_ucs2(c + 2, &y); + if (r < 0) + return r; + + c += 6; + + if (!utf16_is_trailing_surrogate(y)) + return -EINVAL; + + n += utf8_encode_unichar(s + n, utf16_surrogate_pair_to_unichar(x, y)); + } + + continue; + } else + return -EINVAL; + + if (!GREEDY_REALLOC(s, allocated, n + 2)) + return -ENOMEM; + + s[n++] = ch; + c ++; + continue; + } + + len = utf8_encoded_valid_unichar(c); + if (len < 0) + return len; + + if (!GREEDY_REALLOC(s, allocated, n + len + 1)) + return -ENOMEM; + + memcpy(s + n, c, len); + n += len; + c += len; + } +} + +static int json_parse_number(const char **p, JsonValue *ret) { + bool negative = false, exponent_negative = false, is_real = false; + long double x = 0.0, y = 0.0, exponent = 0.0, shift = 1.0; + intmax_t i = 0; + uintmax_t u = 0; + const char *c; + + assert(p); + assert(*p); + assert(ret); + + c = *p; + + if (*c == '-') { + negative = true; + c++; + } + + if (*c == '0') + c++; + else { + if (!strchr("123456789", *c) || *c == 0) + return -EINVAL; + + do { + if (!is_real) { + if (negative) { + + if (i < INTMAX_MIN / 10) /* overflow */ + is_real = true; + else { + intmax_t t = 10 * i; + + if (t < INTMAX_MIN + (*c - '0')) /* overflow */ + is_real = true; + else + i = t - (*c - '0'); + } + } else { + if (u > UINTMAX_MAX / 10) /* overflow */ + is_real = true; + else { + uintmax_t t = 10 * u; + + if (t > UINTMAX_MAX - (*c - '0')) /* overflow */ + is_real = true; + else + u = t + (*c - '0'); + } + } + } + + x = 10.0 * x + (*c - '0'); + + c++; + } while (strchr("0123456789", *c) && *c != 0); + } + + if (*c == '.') { + is_real = true; + c++; + + if (!strchr("0123456789", *c) || *c == 0) + return -EINVAL; + + do { + y = 10.0 * y + (*c - '0'); + shift = 10.0 * shift; + c++; + } while (strchr("0123456789", *c) && *c != 0); + } + + if (IN_SET(*c, 'e', 'E')) { + is_real = true; + c++; + + if (*c == '-') { + exponent_negative = true; + c++; + } else if (*c == '+') + c++; + + if (!strchr("0123456789", *c) || *c == 0) + return -EINVAL; + + do { + exponent = 10.0 * exponent + (*c - '0'); + c++; + } while (strchr("0123456789", *c) && *c != 0); + } + + *p = c; + + if (is_real) { + ret->real = ((negative ? -1.0 : 1.0) * (x + (y / shift))) * exp10l((exponent_negative ? -1.0 : 1.0) * exponent); + return JSON_TOKEN_REAL; + } else if (negative) { + ret->integer = i; + return JSON_TOKEN_INTEGER; + } else { + ret->unsig = u; + return JSON_TOKEN_UNSIGNED; + } +} + +int json_tokenize( + const char **p, + char **ret_string, + JsonValue *ret_value, + unsigned *ret_line, /* 'ret_line' returns the line at the beginning of this token */ + unsigned *ret_column, + void **state, + unsigned *line, /* 'line' is used as a line state, it always reflect the line we are at after the token was read */ + unsigned *column) { + + unsigned start_line, start_column; + const char *start, *c; + size_t n; + int t, r; + + enum { + STATE_NULL, + STATE_VALUE, + STATE_VALUE_POST, + }; + + assert(p); + assert(*p); + assert(ret_string); + assert(ret_value); + assert(ret_line); + assert(ret_column); + assert(line); + assert(column); + assert(state); + + t = PTR_TO_INT(*state); + if (t == STATE_NULL) { + *line = 1; + *column = 1; + t = STATE_VALUE; + } + + /* Skip over the whitespace */ + n = strspn(*p, WHITESPACE); + inc_lines_columns(line, column, *p, n); + c = *p + n; + + /* Remember where we started processing this token */ + start = c; + start_line = *line; + start_column = *column; + + if (*c == 0) { + *ret_string = NULL; + *ret_value = JSON_VALUE_NULL; + r = JSON_TOKEN_END; + goto finish; + } + + switch (t) { + + case STATE_VALUE: + + if (*c == '{') { + c++; + *state = INT_TO_PTR(STATE_VALUE); + r = JSON_TOKEN_OBJECT_OPEN; + goto null_return; + + } else if (*c == '}') { + c++; + *state = INT_TO_PTR(STATE_VALUE_POST); + r = JSON_TOKEN_OBJECT_CLOSE; + goto null_return; + + } else if (*c == '[') { + c++; + *state = INT_TO_PTR(STATE_VALUE); + r = JSON_TOKEN_ARRAY_OPEN; + goto null_return; + + } else if (*c == ']') { + c++; + *state = INT_TO_PTR(STATE_VALUE_POST); + r = JSON_TOKEN_ARRAY_CLOSE; + goto null_return; + + } else if (*c == '"') { + + r = json_parse_string(&c, ret_string); + if (r < 0) + return r; + + *ret_value = JSON_VALUE_NULL; + *state = INT_TO_PTR(STATE_VALUE_POST); + goto finish; + + } else if (strchr("-0123456789", *c)) { + + r = json_parse_number(&c, ret_value); + if (r < 0) + return r; + + *ret_string = NULL; + *state = INT_TO_PTR(STATE_VALUE_POST); + goto finish; + + } else if (startswith(c, "true")) { + *ret_string = NULL; + ret_value->boolean = true; + c += 4; + *state = INT_TO_PTR(STATE_VALUE_POST); + r = JSON_TOKEN_BOOLEAN; + goto finish; + + } else if (startswith(c, "false")) { + *ret_string = NULL; + ret_value->boolean = false; + c += 5; + *state = INT_TO_PTR(STATE_VALUE_POST); + r = JSON_TOKEN_BOOLEAN; + goto finish; + + } else if (startswith(c, "null")) { + *ret_string = NULL; + *ret_value = JSON_VALUE_NULL; + c += 4; + *state = INT_TO_PTR(STATE_VALUE_POST); + r = JSON_TOKEN_NULL; + goto finish; + + } + + return -EINVAL; + + case STATE_VALUE_POST: + + if (*c == ':') { + c++; + *state = INT_TO_PTR(STATE_VALUE); + r = JSON_TOKEN_COLON; + goto null_return; + + } else if (*c == ',') { + c++; + *state = INT_TO_PTR(STATE_VALUE); + r = JSON_TOKEN_COMMA; + goto null_return; + + } else if (*c == '}') { + c++; + *state = INT_TO_PTR(STATE_VALUE_POST); + r = JSON_TOKEN_OBJECT_CLOSE; + goto null_return; + + } else if (*c == ']') { + c++; + *state = INT_TO_PTR(STATE_VALUE_POST); + r = JSON_TOKEN_ARRAY_CLOSE; + goto null_return; + } + + return -EINVAL; + + default: + assert_not_reached("Unexpected tokenizer state"); + } + +null_return: + *ret_string = NULL; + *ret_value = JSON_VALUE_NULL; + +finish: + inc_lines_columns(line, column, start, c - start); + *p = c; + + *ret_line = start_line; + *ret_column = start_column; + + return r; +} + +typedef enum JsonExpect { + /* The following values are used by json_parse() */ + EXPECT_TOPLEVEL, + EXPECT_END, + EXPECT_OBJECT_FIRST_KEY, + EXPECT_OBJECT_NEXT_KEY, + EXPECT_OBJECT_COLON, + EXPECT_OBJECT_VALUE, + EXPECT_OBJECT_COMMA, + EXPECT_ARRAY_FIRST_ELEMENT, + EXPECT_ARRAY_NEXT_ELEMENT, + EXPECT_ARRAY_COMMA, + + /* And these are used by json_build() */ + EXPECT_ARRAY_ELEMENT, + EXPECT_OBJECT_KEY, +} JsonExpect; + +typedef struct JsonStack { + JsonExpect expect; + JsonVariant **elements; + size_t n_elements, n_elements_allocated; + unsigned line_before; + unsigned column_before; + size_t n_suppress; /* When building: if > 0, suppress this many subsequent elements. If == (size_t) -1, suppress all subsequent elements */ +} JsonStack; + +static void json_stack_release(JsonStack *s) { + assert(s); + + json_variant_unref_many(s->elements, s->n_elements); + s->elements = mfree(s->elements); +} + +static int json_parse_internal( + const char **input, + JsonSource *source, + JsonVariant **ret, + unsigned *line, + unsigned *column, + bool continue_end) { + + size_t n_stack = 1, n_stack_allocated = 0, i; + unsigned line_buffer = 0, column_buffer = 0; + void *tokenizer_state = NULL; + JsonStack *stack = NULL; + const char *p; + int r; + + assert_return(input, -EINVAL); + assert_return(ret, -EINVAL); + + p = *input; + + if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack)) + return -ENOMEM; + + stack[0] = (JsonStack) { + .expect = EXPECT_TOPLEVEL, + }; + + if (!line) + line = &line_buffer; + if (!column) + column = &column_buffer; + + for (;;) { + _cleanup_free_ char *string = NULL; + unsigned line_token, column_token; + JsonVariant *add = NULL; + JsonStack *current; + JsonValue value; + int token; + + assert(n_stack > 0); + current = stack + n_stack - 1; + + if (continue_end && current->expect == EXPECT_END) + goto done; + + token = json_tokenize(&p, &string, &value, &line_token, &column_token, &tokenizer_state, line, column); + if (token < 0) { + r = token; + goto finish; + } + + switch (token) { + + case JSON_TOKEN_END: + if (current->expect != EXPECT_END) { + r = -EINVAL; + goto finish; + } + + assert(current->n_elements == 1); + assert(n_stack == 1); + goto done; + + case JSON_TOKEN_COLON: + + if (current->expect != EXPECT_OBJECT_COLON) { + r = -EINVAL; + goto finish; + } + + current->expect = EXPECT_OBJECT_VALUE; + break; + + case JSON_TOKEN_COMMA: + + if (current->expect == EXPECT_OBJECT_COMMA) + current->expect = EXPECT_OBJECT_NEXT_KEY; + else if (current->expect == EXPECT_ARRAY_COMMA) + current->expect = EXPECT_ARRAY_NEXT_ELEMENT; + else { + r = -EINVAL; + goto finish; + } + + break; + + case JSON_TOKEN_OBJECT_OPEN: + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) { + r = -ENOMEM; + goto finish; + } + current = stack + n_stack - 1; + + /* Prepare the expect for when we return from the child */ + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_COMMA; + else { + assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)); + current->expect = EXPECT_ARRAY_COMMA; + } + + stack[n_stack++] = (JsonStack) { + .expect = EXPECT_OBJECT_FIRST_KEY, + .line_before = line_token, + .column_before = column_token, + }; + + current = stack + n_stack - 1; + break; + + case JSON_TOKEN_OBJECT_CLOSE: + if (!IN_SET(current->expect, EXPECT_OBJECT_FIRST_KEY, EXPECT_OBJECT_COMMA)) { + r = -EINVAL; + goto finish; + } + + assert(n_stack > 1); + + r = json_variant_new_object(&add, current->elements, current->n_elements); + if (r < 0) + goto finish; + + line_token = current->line_before; + column_token = current->column_before; + + json_stack_release(current); + n_stack--, current--; + + break; + + case JSON_TOKEN_ARRAY_OPEN: + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) { + r = -ENOMEM; + goto finish; + } + current = stack + n_stack - 1; + + /* Prepare the expect for when we return from the child */ + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_COMMA; + else { + assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)); + current->expect = EXPECT_ARRAY_COMMA; + } + + stack[n_stack++] = (JsonStack) { + .expect = EXPECT_ARRAY_FIRST_ELEMENT, + .line_before = line_token, + .column_before = column_token, + }; + + break; + + case JSON_TOKEN_ARRAY_CLOSE: + if (!IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_COMMA)) { + r = -EINVAL; + goto finish; + } + + assert(n_stack > 1); + + r = json_variant_new_array(&add, current->elements, current->n_elements); + if (r < 0) + goto finish; + + line_token = current->line_before; + column_token = current->column_before; + + json_stack_release(current); + n_stack--, current--; + break; + + case JSON_TOKEN_STRING: + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_FIRST_KEY, EXPECT_OBJECT_NEXT_KEY, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + r = json_variant_new_string(&add, string); + if (r < 0) + goto finish; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (IN_SET(current->expect, EXPECT_OBJECT_FIRST_KEY, EXPECT_OBJECT_NEXT_KEY)) + current->expect = EXPECT_OBJECT_COLON; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_COMMA; + else { + assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)); + current->expect = EXPECT_ARRAY_COMMA; + } + + break; + + case JSON_TOKEN_REAL: + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + r = json_variant_new_real(&add, value.real); + if (r < 0) + goto finish; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_COMMA; + else { + assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)); + current->expect = EXPECT_ARRAY_COMMA; + } + + break; + + case JSON_TOKEN_INTEGER: + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + r = json_variant_new_integer(&add, value.integer); + if (r < 0) + goto finish; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_COMMA; + else { + assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)); + current->expect = EXPECT_ARRAY_COMMA; + } + + break; + + case JSON_TOKEN_UNSIGNED: + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + r = json_variant_new_unsigned(&add, value.unsig); + if (r < 0) + goto finish; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_COMMA; + else { + assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)); + current->expect = EXPECT_ARRAY_COMMA; + } + + break; + + case JSON_TOKEN_BOOLEAN: + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + r = json_variant_new_boolean(&add, value.boolean); + if (r < 0) + goto finish; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_COMMA; + else { + assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)); + current->expect = EXPECT_ARRAY_COMMA; + } + + break; + + case JSON_TOKEN_NULL: + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + r = json_variant_new_null(&add); + if (r < 0) + goto finish; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_COMMA; + else { + assert(IN_SET(current->expect, EXPECT_ARRAY_FIRST_ELEMENT, EXPECT_ARRAY_NEXT_ELEMENT)); + current->expect = EXPECT_ARRAY_COMMA; + } + + break; + + default: + assert_not_reached("Unexpected token"); + } + + if (add) { + (void) json_variant_set_source(&add, source, line_token, column_token); + + if (!GREEDY_REALLOC(current->elements, current->n_elements_allocated, current->n_elements + 1)) { + r = -ENOMEM; + goto finish; + } + + current->elements[current->n_elements++] = add; + } + } + +done: + assert(n_stack == 1); + assert(stack[0].n_elements == 1); + + *ret = json_variant_ref(stack[0].elements[0]); + *input = p; + r = 0; + +finish: + for (i = 0; i < n_stack; i++) + json_stack_release(stack + i); + + free(stack); + + return r; +} + +int json_parse(const char *input, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column) { + return json_parse_internal(&input, NULL, ret, ret_line, ret_column, false); +} + +int json_parse_continue(const char **p, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column) { + return json_parse_internal(p, NULL, ret, ret_line, ret_column, true); +} + +int json_parse_file(FILE *f, const char *path, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column) { + _cleanup_(json_source_unrefp) JsonSource *source = NULL; + _cleanup_free_ char *text = NULL; + const char *p; + int r; + + if (f) + r = read_full_stream(f, &text, NULL); + else if (path) + r = read_full_file(path, &text, NULL); + else + return -EINVAL; + if (r < 0) + return r; + + if (path) { + source = json_source_new(path); + if (!source) + return -ENOMEM; + } + + p = text; + return json_parse_internal(&p, source, ret, ret_line, ret_column, false); +} + +int json_buildv(JsonVariant **ret, va_list ap) { + JsonStack *stack = NULL; + size_t n_stack = 1, n_stack_allocated = 0, i; + int r; + + assert_return(ret, -EINVAL); + + if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack)) + return -ENOMEM; + + stack[0] = (JsonStack) { + .expect = EXPECT_TOPLEVEL, + }; + + for (;;) { + _cleanup_(json_variant_unrefp) JsonVariant *add = NULL; + size_t n_subtract = 0; /* how much to subtract from current->n_suppress, i.e. how many elements would + * have been added to the current variant */ + JsonStack *current; + int command; + + assert(n_stack > 0); + current = stack + n_stack - 1; + + if (current->expect == EXPECT_END) + goto done; + + command = va_arg(ap, int); + + switch (command) { + + case _JSON_BUILD_STRING: { + const char *p; + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + p = va_arg(ap, const char *); + + if (current->n_suppress == 0) { + r = json_variant_new_string(&add, p); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + break; + } + + case _JSON_BUILD_INTEGER: { + intmax_t j; + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + j = va_arg(ap, intmax_t); + + if (current->n_suppress == 0) { + r = json_variant_new_integer(&add, j); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + break; + } + + case _JSON_BUILD_UNSIGNED: { + uintmax_t j; + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + j = va_arg(ap, uintmax_t); + + if (current->n_suppress == 0) { + r = json_variant_new_unsigned(&add, j); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + break; + } + + case _JSON_BUILD_REAL: { + long double d; + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + d = va_arg(ap, long double); + + if (current->n_suppress == 0) { + r = json_variant_new_real(&add, d); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + break; + } + + case _JSON_BUILD_BOOLEAN: { + bool b; + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + b = va_arg(ap, int); + + if (current->n_suppress == 0) { + r = json_variant_new_boolean(&add, b); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + break; + } + + case _JSON_BUILD_NULL: + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + if (current->n_suppress == 0) { + r = json_variant_new_null(&add); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + break; + + case _JSON_BUILD_VARIANT: + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + /* Note that we don't care for current->n_suppress here, after all the variant is already + * allocated anyway... */ + add = va_arg(ap, JsonVariant*); + if (!add) + add = JSON_VARIANT_MAGIC_NULL; + else + json_variant_ref(add); + + n_subtract = 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + break; + + case _JSON_BUILD_LITERAL: { + const char *l; + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + l = va_arg(ap, const char *); + + if (l) { + /* Note that we don't care for current->n_suppress here, we should generate parsing + * errors even in suppressed object properties */ + + r = json_parse(l, &add, NULL, NULL); + if (r < 0) + goto finish; + } else + add = JSON_VARIANT_MAGIC_NULL; + + n_subtract = 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + break; + } + + case _JSON_BUILD_ARRAY_BEGIN: + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) { + r = -ENOMEM; + goto finish; + } + current = stack + n_stack - 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + stack[n_stack++] = (JsonStack) { + .expect = EXPECT_ARRAY_ELEMENT, + .n_suppress = current->n_suppress != 0 ? (size_t) -1 : 0, /* if we shall suppress the + * new array, then we should + * also suppress all array + * members */ + }; + + break; + + case _JSON_BUILD_ARRAY_END: + if (current->expect != EXPECT_ARRAY_ELEMENT) { + r = -EINVAL; + goto finish; + } + + assert(n_stack > 1); + + if (current->n_suppress == 0) { + r = json_variant_new_array(&add, current->elements, current->n_elements); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + json_stack_release(current); + n_stack--, current--; + + break; + + case _JSON_BUILD_STRV: { + char **l; + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + l = va_arg(ap, char **); + + if (current->n_suppress == 0) { + r = json_variant_new_array_strv(&add, l); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + break; + } + + case _JSON_BUILD_OBJECT_BEGIN: + + if (!IN_SET(current->expect, EXPECT_TOPLEVEL, EXPECT_OBJECT_VALUE, EXPECT_ARRAY_ELEMENT)) { + r = -EINVAL; + goto finish; + } + + if (!GREEDY_REALLOC(stack, n_stack_allocated, n_stack+1)) { + r = -ENOMEM; + goto finish; + } + current = stack + n_stack - 1; + + if (current->expect == EXPECT_TOPLEVEL) + current->expect = EXPECT_END; + else if (current->expect == EXPECT_OBJECT_VALUE) + current->expect = EXPECT_OBJECT_KEY; + else + assert(current->expect == EXPECT_ARRAY_ELEMENT); + + stack[n_stack++] = (JsonStack) { + .expect = EXPECT_OBJECT_KEY, + .n_suppress = current->n_suppress != 0 ? (size_t) -1 : 0, /* if we shall suppress the + * new object, then we should + * also suppress all object + * members */ + }; + + break; + + case _JSON_BUILD_OBJECT_END: + + if (current->expect != EXPECT_OBJECT_KEY) { + r = -EINVAL; + goto finish; + } + + assert(n_stack > 1); + + if (current->n_suppress == 0) { + r = json_variant_new_object(&add, current->elements, current->n_elements); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + json_stack_release(current); + n_stack--, current--; + + break; + + case _JSON_BUILD_PAIR: { + const char *n; + + if (current->expect != EXPECT_OBJECT_KEY) { + r = -EINVAL; + goto finish; + } + + n = va_arg(ap, const char *); + + if (current->n_suppress == 0) { + r = json_variant_new_string(&add, n); + if (r < 0) + goto finish; + } + + n_subtract = 1; + + current->expect = EXPECT_OBJECT_VALUE; + break; + } + + case _JSON_BUILD_PAIR_CONDITION: { + const char *n; + bool b; + + if (current->expect != EXPECT_OBJECT_KEY) { + r = -EINVAL; + goto finish; + } + + b = va_arg(ap, int); + n = va_arg(ap, const char *); + + if (b && current->n_suppress == 0) { + r = json_variant_new_string(&add, n); + if (r < 0) + goto finish; + } + + n_subtract = 1; /* we generated one item */ + + if (!b && current->n_suppress != (size_t) -1) + current->n_suppress += 2; /* Suppress this one and the next item */ + + current->expect = EXPECT_OBJECT_VALUE; + break; + }} + + /* If a variant was generated, add it to our current variant, but only if we are not supposed to suppress additions */ + if (add && current->n_suppress == 0) { + if (!GREEDY_REALLOC(current->elements, current->n_elements_allocated, current->n_elements + 1)) { + r = -ENOMEM; + goto finish; + } + + current->elements[current->n_elements++] = TAKE_PTR(add); + } + + /* If we are supposed to suppress items, let's subtract how many items where generated from that + * counter. Except if the counter is (size_t) -1, i.e. we shall suppress an infinite number of elements + * on this stack level */ + if (current->n_suppress != (size_t) -1) { + if (current->n_suppress <= n_subtract) /* Saturated */ + current->n_suppress = 0; + else + current->n_suppress -= n_subtract; + } + } + +done: + assert(n_stack == 1); + assert(stack[0].n_elements == 1); + + *ret = json_variant_ref(stack[0].elements[0]); + r = 0; + +finish: + for (i = 0; i < n_stack; i++) + json_stack_release(stack + i); + + free(stack); + + va_end(ap); + + return r; +} + +int json_build(JsonVariant **ret, ...) { + va_list ap; + int r; + + va_start(ap, ret); + r = json_buildv(ret, ap); + va_end(ap); + + return r; +} + +int json_log_internal( + JsonVariant *variant, + int level, + int error, + const char *file, + int line, + const char *func, + const char *format, ...) { + + PROTECT_ERRNO; + + unsigned source_line, source_column; + char buffer[LINE_MAX]; + const char *source; + va_list ap; + int r; + + errno = ERRNO_VALUE(error); + + va_start(ap, format); + (void) vsnprintf(buffer, sizeof buffer, format, ap); + va_end(ap); + + if (variant) { + r = json_variant_get_source(variant, &source, &source_line, &source_column); + if (r < 0) + return r; + } else { + source = NULL; + source_line = 0; + source_column = 0; + } + + if (source && source_line > 0 && source_column > 0) + return log_struct_internal( + LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, level), + error, + file, line, func, + "MESSAGE_ID=" SD_MESSAGE_INVALID_CONFIGURATION_STR, + "CONFIG_FILE=%s", source, + "CONFIG_LINE=%u", source_line, + "CONFIG_COLUMN=%u", source_column, + LOG_MESSAGE("%s:%u: %s", source, line, buffer), + NULL); + else + return log_struct_internal( + LOG_REALM_PLUS_LEVEL(LOG_REALM_SYSTEMD, level), + error, + file, line, func, + "MESSAGE_ID=" SD_MESSAGE_INVALID_CONFIGURATION_STR, + LOG_MESSAGE("%s", buffer), + NULL); +} + +int json_dispatch(JsonVariant *v, const JsonDispatch table[], JsonDispatchCallback bad, JsonDispatchFlags flags, void *userdata) { + const JsonDispatch *p; + size_t i, n, m; + int r, done = 0; + bool *found; + + if (!json_variant_is_object(v)) { + json_log(v, flags, 0, "JSON variant is not an object."); + + if (flags & JSON_PERMISSIVE) + return 0; + + return -EINVAL; + } + + for (p = table, m = 0; p->name; p++) + m++; + + found = newa0(bool, m); + + n = json_variant_elements(v); + for (i = 0; i < n; i += 2) { + JsonVariant *key, *value; + + assert_se(key = json_variant_by_index(v, i)); + assert_se(value = json_variant_by_index(v, i+1)); + + for (p = table; p->name; p++) + if (p->name == (const char*) -1 || + streq_ptr(json_variant_string(key), p->name)) + break; + + if (p->name) { /* Found a matching entry! :-) */ + JsonDispatchFlags merged_flags; + + merged_flags = flags | p->flags; + + if (p->type != _JSON_VARIANT_TYPE_INVALID && + !json_variant_has_type(value, p->type)) { + + json_log(value, merged_flags, 0, + "Object field '%s' has wrong type %s, expected %s.", json_variant_string(key), + json_variant_type_to_string(json_variant_type(value)), json_variant_type_to_string(p->type)); + + if (merged_flags & JSON_PERMISSIVE) + continue; + + return -EINVAL; + } + + if (found[p-table]) { + json_log(value, merged_flags, 0, "Duplicate object field '%s'.", json_variant_string(key)); + + if (merged_flags & JSON_PERMISSIVE) + continue; + + return -ENOTUNIQ; + } + + found[p-table] = true; + + if (p->callback) { + r = p->callback(json_variant_string(key), value, merged_flags, (uint8_t*) userdata + p->offset); + if (r < 0) { + if (merged_flags & JSON_PERMISSIVE) + continue; + + return r; + } + } + + done ++; + + } else { /* Didn't find a matching entry! :-( */ + + if (bad) { + r = bad(json_variant_string(key), value, flags, userdata); + if (r < 0) { + if (flags & JSON_PERMISSIVE) + continue; + + return r; + } else + done ++; + + } else { + json_log(value, flags, 0, "Unexpected object field '%s'.", json_variant_string(key)); + + if (flags & JSON_PERMISSIVE) + continue; + + return -EADDRNOTAVAIL; + } + } + } + + for (p = table; p->name; p++) { + JsonDispatchFlags merged_flags = p->flags | flags; + + if ((merged_flags & JSON_MANDATORY) && !found[p-table]) { + json_log(v, merged_flags, 0, "Missing object field '%s'.", p->name); + + if ((merged_flags & JSON_PERMISSIVE)) + continue; + + return -ENXIO; + } + } + + return done; +} + +int json_dispatch_boolean(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + bool *b = userdata; + + assert(variant); + assert(b); + + if (!json_variant_is_boolean(variant)) { + json_log(variant, flags, 0, "JSON field '%s' is not a boolean.", strna(name)); + return -EINVAL; + } + + *b = json_variant_boolean(variant); + return 0; +} + +int json_dispatch_tristate(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + int *b = userdata; + + assert(variant); + assert(b); + + if (!json_variant_is_boolean(variant)) { + json_log(variant, flags, 0, "JSON field '%s' is not a boolean.", strna(name)); + return -EINVAL; + } + + *b = json_variant_boolean(variant); + return 0; +} + +int json_dispatch_integer(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + intmax_t *i = userdata; + + assert(variant); + assert(i); + + if (!json_variant_is_integer(variant)) { + json_log(variant, flags, 0, "JSON field '%s' is not an integer.", strna(name)); + return -EINVAL; + } + + *i = json_variant_integer(variant); + return 0; +} + +int json_dispatch_unsigned(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + uintmax_t *u = userdata; + + assert(variant); + assert(u); + + if (!json_variant_is_unsigned(variant)) { + json_log(variant, flags, 0, "JSON field '%s' is not an unsigned integer.", strna(name)); + return -EINVAL; + } + + *u = json_variant_unsigned(variant); + return 0; +} + +int json_dispatch_uint32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + uint32_t *u = userdata; + + assert(variant); + assert(u); + + if (!json_variant_is_unsigned(variant)) { + json_log(variant, flags, 0, "JSON field '%s' is not an unsigned integer.", strna(name)); + return -EINVAL; + } + + if (json_variant_unsigned(variant) > UINT32_MAX) { + json_log(variant, flags, 0, "JSON field '%s' out of bounds.", strna(name)); + return -ERANGE; + } + + *u = (uint32_t) json_variant_unsigned(variant); + return 0; +} + +int json_dispatch_int32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + int32_t *i = userdata; + + assert(variant); + assert(i); + + if (!json_variant_is_integer(variant)) { + json_log(variant, flags, 0, "JSON field '%s' is not an integer.", strna(name)); + return -EINVAL; + } + + if (json_variant_integer(variant) < INT32_MIN || json_variant_integer(variant) > INT32_MAX) { + json_log(variant, flags, 0, "JSON field '%s' out of bounds.", strna(name)); + return -ERANGE; + } + + *i = (int32_t) json_variant_integer(variant); + return 0; +} + +int json_dispatch_string(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + char **s = userdata; + int r; + + assert(variant); + assert(s); + + if (json_variant_is_null(variant)) { + *s = mfree(*s); + return 0; + } + + if (!json_variant_is_string(variant)) { + json_log(variant, flags, 0, "JSON field '%s' is not a string.", strna(name)); + return -EINVAL; + } + + r = free_and_strdup(s, json_variant_string(variant)); + if (r < 0) + return json_log(variant, flags, r, "Failed to allocate string: %m"); + + return 0; +} + +int json_dispatch_strv(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + _cleanup_strv_free_ char **l = NULL; + char ***s = userdata; + size_t i; + int r; + + assert(variant); + assert(s); + + if (json_variant_is_null(variant)) { + *s = strv_free(*s); + return 0; + } + + if (!json_variant_is_array(variant)) { + json_log(variant, 0, flags, "JSON field '%s' is not an array.", strna(name)); + return -EINVAL; + } + + for (i = 0; i < json_variant_elements(variant); i++) { + JsonVariant *e; + + assert_se(e = json_variant_by_index(variant, i)); + + if (!json_variant_is_string(e)) { + json_log(e, 0, flags, "JSON array element is not a string."); + return -EINVAL; + } + + r = strv_extend(&l, json_variant_string(e)); + if (r < 0) + return json_log(variant, flags, r, "Failed to append array element: %m"); + } + + strv_free_and_replace(*s, l); + return 0; +} + +int json_dispatch_variant(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata) { + JsonVariant **p = userdata; + + assert(variant); + assert(p); + + json_variant_unref(*p); + *p = json_variant_ref(variant); + + return 0; +} + +static const char* const json_variant_type_table[_JSON_VARIANT_TYPE_MAX] = { + [JSON_VARIANT_STRING] = "string", + [JSON_VARIANT_INTEGER] = "integer", + [JSON_VARIANT_UNSIGNED] = "unsigned", + [JSON_VARIANT_REAL] = "real", + [JSON_VARIANT_NUMBER] = "number", + [JSON_VARIANT_BOOLEAN] = "boolean", + [JSON_VARIANT_ARRAY] = "array", + [JSON_VARIANT_OBJECT] = "object", + [JSON_VARIANT_NULL] = "null", +}; + +DEFINE_STRING_TABLE_LOOKUP(json_variant_type, JsonVariantType); diff --git a/src/shared/json.h b/src/shared/json.h new file mode 100644 index 0000000..f8e035c --- /dev/null +++ b/src/shared/json.h @@ -0,0 +1,285 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +#include "macro.h" +#include "string-util.h" +#include "util.h" + +/* + In case you wonder why we have our own JSON implementation, here are a couple of reasons why this implementation has + benefits over various other implementatins: + + - We need support for 64bit signed and unsigned integers, i.e. the full 64,5bit range of -9223372036854775808…18446744073709551615 + - All our variants are immutable after creation + - Special values such as true, false, zero, null, empty strings, empty array, empty objects require zero dynamic memory + - Progressive parsing + - Our integer/real type implicitly converts, but only if that's safe and loss-lessly possible + - There's a "builder" for putting together objects easily in varargs function calls + - There's a "dispatcher" for mapping objects to C data structures + - Every variant optionally carries parsing location information, which simplifies debugging and parse log error generation + - Formatter has color, line, column support + + Limitations: + - Doesn't allow embedded NUL in strings + - Can't store integers outside of the -9223372036854775808…18446744073709551615 range (it will use 'long double' for + values outside this range, which is lossy) + - Can't store negative zero (will be treated identical to positive zero, and not retained across serialization) + - Can't store non-integer numbers that can't be stored in "long double" losslessly + - Allows creation and parsing of objects with duplicate keys. The "dispatcher" will refuse them however. This means + we can parse and pass around such objects, but will carefully refuse them when we convert them into our own data. + + (These limitations should be pretty much in line with those of other JSON implementations, in fact might be less + limiting in most cases even.) +*/ + +typedef struct JsonVariant JsonVariant; + +typedef enum JsonVariantType { + JSON_VARIANT_STRING, + JSON_VARIANT_INTEGER, + JSON_VARIANT_UNSIGNED, + JSON_VARIANT_REAL, + JSON_VARIANT_NUMBER, /* This a pseudo-type: we can never create variants of this type, but we use it as wildcard check for the above three types */ + JSON_VARIANT_BOOLEAN, + JSON_VARIANT_ARRAY, + JSON_VARIANT_OBJECT, + JSON_VARIANT_NULL, + _JSON_VARIANT_TYPE_MAX, + _JSON_VARIANT_TYPE_INVALID = -1 +} JsonVariantType; + +int json_variant_new_stringn(JsonVariant **ret, const char *s, size_t n); +int json_variant_new_integer(JsonVariant **ret, intmax_t i); +int json_variant_new_unsigned(JsonVariant **ret, uintmax_t u); +int json_variant_new_real(JsonVariant **ret, long double d); +int json_variant_new_boolean(JsonVariant **ret, bool b); +int json_variant_new_array(JsonVariant **ret, JsonVariant **array, size_t n); +int json_variant_new_array_bytes(JsonVariant **ret, const void *p, size_t n); +int json_variant_new_array_strv(JsonVariant **ret, char **l); +int json_variant_new_object(JsonVariant **ret, JsonVariant **array, size_t n); +int json_variant_new_null(JsonVariant **ret); + +static inline int json_variant_new_string(JsonVariant **ret, const char *s) { + return json_variant_new_stringn(ret, s, strlen_ptr(s)); +} + +JsonVariant *json_variant_ref(JsonVariant *v); +JsonVariant *json_variant_unref(JsonVariant *v); +void json_variant_unref_many(JsonVariant **array, size_t n); + +DEFINE_TRIVIAL_CLEANUP_FUNC(JsonVariant *, json_variant_unref); + +const char *json_variant_string(JsonVariant *v); +intmax_t json_variant_integer(JsonVariant *v); +uintmax_t json_variant_unsigned(JsonVariant *v); +long double json_variant_real(JsonVariant *v); +bool json_variant_boolean(JsonVariant *v); + +JsonVariantType json_variant_type(JsonVariant *v); +bool json_variant_has_type(JsonVariant *v, JsonVariantType type); + +static inline bool json_variant_is_string(JsonVariant *v) { + return json_variant_has_type(v, JSON_VARIANT_STRING); +} + +static inline bool json_variant_is_integer(JsonVariant *v) { + return json_variant_has_type(v, JSON_VARIANT_INTEGER); +} + +static inline bool json_variant_is_unsigned(JsonVariant *v) { + return json_variant_has_type(v, JSON_VARIANT_UNSIGNED); +} + +static inline bool json_variant_is_real(JsonVariant *v) { + return json_variant_has_type(v, JSON_VARIANT_REAL); +} + +static inline bool json_variant_is_number(JsonVariant *v) { + return json_variant_has_type(v, JSON_VARIANT_NUMBER); +} + +static inline bool json_variant_is_boolean(JsonVariant *v) { + return json_variant_has_type(v, JSON_VARIANT_BOOLEAN); +} + +static inline bool json_variant_is_array(JsonVariant *v) { + return json_variant_has_type(v, JSON_VARIANT_ARRAY); +} + +static inline bool json_variant_is_object(JsonVariant *v) { + return json_variant_has_type(v, JSON_VARIANT_OBJECT); +} + +static inline bool json_variant_is_null(JsonVariant *v) { + return json_variant_has_type(v, JSON_VARIANT_NULL); +} + +bool json_variant_is_negative(JsonVariant *v); + +size_t json_variant_elements(JsonVariant *v); +JsonVariant *json_variant_by_index(JsonVariant *v, size_t index); +JsonVariant *json_variant_by_key(JsonVariant *v, const char *key); +JsonVariant *json_variant_by_key_full(JsonVariant *v, const char *key, JsonVariant **ret_key); + +bool json_variant_equal(JsonVariant *a, JsonVariant *b); + +struct json_variant_foreach_state { + JsonVariant *variant; + size_t idx; +}; + +#define JSON_VARIANT_ARRAY_FOREACH(i, v) \ + for (struct json_variant_foreach_state _state = { (v), 0 }; \ + _state.idx < json_variant_elements(_state.variant) && \ + ({ i = json_variant_by_index(_state.variant, _state.idx); \ + true; }); \ + _state.idx++) + +#define JSON_VARIANT_OBJECT_FOREACH(k, e, v) \ + for (struct json_variant_foreach_state _state = { (v), 0 }; \ + _state.idx < json_variant_elements(_state.variant) && \ + ({ k = json_variant_by_index(_state.variant, _state.idx); \ + e = json_variant_by_index(_state.variant, _state.idx + 1); \ + true; }); \ + _state.idx += 2) + +int json_variant_get_source(JsonVariant *v, const char **ret_source, unsigned *ret_line, unsigned *ret_column); + +typedef enum JsonFormatFlags { + JSON_FORMAT_NEWLINE = 1 << 0, /* suffix with newline */ + JSON_FORMAT_PRETTY = 1 << 1, /* add internal whitespace to appeal to human readers */ + JSON_FORMAT_COLOR = 1 << 2, /* insert ANSI color sequences */ + JSON_FORMAT_COLOR_AUTO = 1 << 3, /* insert ANSI color sequences if colors_enabled() says so */ + JSON_FORMAT_SOURCE = 1 << 4, /* prefix with source filename/line/column */ + JSON_FORMAT_SSE = 1 << 5, /* prefix/suffix with W3C server-sent events */ + JSON_FORMAT_SEQ = 1 << 6, /* prefix/suffix with RFC 7464 application/json-seq */ +} JsonFormatFlags; + +int json_variant_format(JsonVariant *v, JsonFormatFlags flags, char **ret); +void json_variant_dump(JsonVariant *v, JsonFormatFlags flags, FILE *f, const char *prefix); + +int json_parse(const char *string, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column); +int json_parse_continue(const char **p, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column); +int json_parse_file(FILE *f, const char *path, JsonVariant **ret, unsigned *ret_line, unsigned *ret_column); + +enum { + _JSON_BUILD_STRING, + _JSON_BUILD_INTEGER, + _JSON_BUILD_UNSIGNED, + _JSON_BUILD_REAL, + _JSON_BUILD_BOOLEAN, + _JSON_BUILD_ARRAY_BEGIN, + _JSON_BUILD_ARRAY_END, + _JSON_BUILD_OBJECT_BEGIN, + _JSON_BUILD_OBJECT_END, + _JSON_BUILD_PAIR, + _JSON_BUILD_PAIR_CONDITION, + _JSON_BUILD_NULL, + _JSON_BUILD_VARIANT, + _JSON_BUILD_LITERAL, + _JSON_BUILD_STRV, + _JSON_BUILD_MAX, +}; + +#define JSON_BUILD_STRING(s) _JSON_BUILD_STRING, ({ const char *_x = s; _x; }) +#define JSON_BUILD_INTEGER(i) _JSON_BUILD_INTEGER, ({ intmax_t _x = i; _x; }) +#define JSON_BUILD_UNSIGNED(u) _JSON_BUILD_UNSIGNED, ({ uintmax_t _x = u; _x; }) +#define JSON_BUILD_REAL(d) _JSON_BUILD_REAL, ({ long double _x = d; _x; }) +#define JSON_BUILD_BOOLEAN(b) _JSON_BUILD_BOOLEAN, ({ bool _x = b; _x; }) +#define JSON_BUILD_ARRAY(...) _JSON_BUILD_ARRAY_BEGIN, __VA_ARGS__, _JSON_BUILD_ARRAY_END +#define JSON_BUILD_OBJECT(...) _JSON_BUILD_OBJECT_BEGIN, __VA_ARGS__, _JSON_BUILD_OBJECT_END +#define JSON_BUILD_PAIR(n, ...) _JSON_BUILD_PAIR, ({ const char *_x = n; _x; }), __VA_ARGS__ +#define JSON_BUILD_PAIR_CONDITION(c, n, ...) _JSON_BUILD_PAIR_CONDITION, ({ bool _x = c; _x; }), ({ const char *_x = n; _x; }), __VA_ARGS__ +#define JSON_BUILD_NULL _JSON_BUILD_NULL +#define JSON_BUILD_VARIANT(v) _JSON_BUILD_VARIANT, ({ JsonVariant *_x = v; _x; }) +#define JSON_BUILD_LITERAL(l) _JSON_BUILD_LITERAL, ({ const char *_x = l; _x; }) +#define JSON_BUILD_STRV(l) _JSON_BUILD_STRV, ({ char **_x = l; _x; }) + +int json_build(JsonVariant **ret, ...); +int json_buildv(JsonVariant **ret, va_list ap); + +/* A bitmask of flags used by the dispatch logic. Note that this is a combined bit mask, that is generated from the bit + * mask originally passed into json_dispatch(), the individual bitmask associated with the static JsonDispatch callout + * entry, as well the bitmask specified for json_log() calls */ +typedef enum JsonDispatchFlags { + /* The following three may be set in JsonDispatch's .flags field or the json_dispatch() flags parameter */ + JSON_PERMISSIVE = 1 << 0, /* Shall parsing errors be considered fatal for this property? */ + JSON_MANDATORY = 1 << 1, /* Should existance of this property be mandatory? */ + JSON_LOG = 1 << 2, /* Should the parser log about errors? */ + + /* The following two may be passed into log_json() in addition to the three above */ + JSON_DEBUG = 1 << 3, /* Indicates that this log message is a debug message */ + JSON_WARNING = 1 << 4, /* Indicates that this log message is a warning message */ +} JsonDispatchFlags; + +typedef int (*JsonDispatchCallback)(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); + +typedef struct JsonDispatch { + const char *name; + JsonVariantType type; + JsonDispatchCallback callback; + size_t offset; + JsonDispatchFlags flags; +} JsonDispatch; + +int json_dispatch(JsonVariant *v, const JsonDispatch table[], JsonDispatchCallback bad, JsonDispatchFlags flags, void *userdata); + +int json_dispatch_string(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_strv(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_boolean(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_tristate(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_variant(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_integer(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_unsigned(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_uint32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); +int json_dispatch_int32(const char *name, JsonVariant *variant, JsonDispatchFlags flags, void *userdata); + +assert_cc(sizeof(uintmax_t) == sizeof(uint64_t)) +#define json_dispatch_uint64 json_dispatch_unsigned + +assert_cc(sizeof(intmax_t) == sizeof(int64_t)) +#define json_dispatch_int64 json_dispatch_integer + +static inline int json_dispatch_level(JsonDispatchFlags flags) { + + /* Did the user request no logging? If so, then never log higher than LOG_DEBUG. Also, if this is marked as + * debug message, then also log at debug level. */ + + if (!(flags & JSON_LOG) || + (flags & JSON_DEBUG)) + return LOG_DEBUG; + + /* Are we invoked in permissive mode, or is this explicitly marked as warning message? Then this should be + * printed at LOG_WARNING */ + if (flags & (JSON_PERMISSIVE|JSON_WARNING)) + return LOG_WARNING; + + /* Otherwise it's an error. */ + return LOG_ERR; +} + +int json_log_internal(JsonVariant *variant, int level, int error, const char *file, int line, const char *func, const char *format, ...) _printf_(7, 8); + +#define json_log(variant, flags, error, ...) \ + ({ \ + int _level = json_dispatch_level(flags), _e = (error); \ + (log_get_max_level() >= LOG_PRI(_level)) \ + ? json_log_internal(variant, _level, _e, __FILE__, __LINE__, __func__, __VA_ARGS__) \ + : -abs(_e); \ + }) + +#define JSON_VARIANT_STRING_CONST(x) _JSON_VARIANT_STRING_CONST(UNIQ, (x)) + +#define _JSON_VARIANT_STRING_CONST(xq, x) \ + ({ \ + _align_(2) static const char UNIQ_T(json_string_const, xq)[] = (x); \ + assert((((uintptr_t) UNIQ_T(json_string_const, xq)) & 1) == 0); \ + (JsonVariant*) ((uintptr_t) UNIQ_T(json_string_const, xq) + 1); \ + }) + +const char *json_variant_type_to_string(JsonVariantType t); +JsonVariantType json_variant_type_from_string(const char *s); diff --git a/src/shared/libshared.sym b/src/shared/libshared.sym new file mode 100644 index 0000000..6a7495a --- /dev/null +++ b/src/shared/libshared.sym @@ -0,0 +1,3 @@ +SD_SHARED { + global: *; +}; diff --git a/src/shared/linux-3.13/dm-ioctl.h b/src/shared/linux-3.13/dm-ioctl.h new file mode 100644 index 0000000..c8a4302 --- /dev/null +++ b/src/shared/linux-3.13/dm-ioctl.h @@ -0,0 +1,355 @@ +/* + * Copyright (C) 2001 - 2003 Sistina Software (UK) Limited. + * Copyright (C) 2004 - 2009 Red Hat, Inc. All rights reserved. + * + * This file is released under the LGPL. + */ + +#ifndef _LINUX_DM_IOCTL_V4_H +#define _LINUX_DM_IOCTL_V4_H + +#include <linux/types.h> + +#define DM_DIR "mapper" /* Slashes not supported */ +#define DM_CONTROL_NODE "control" +#define DM_MAX_TYPE_NAME 16 +#define DM_NAME_LEN 128 +#define DM_UUID_LEN 129 + +/* + * A traditional ioctl interface for the device mapper. + * + * Each device can have two tables associated with it, an + * 'active' table which is the one currently used by io passing + * through the device, and an 'inactive' one which is a table + * that is being prepared as a replacement for the 'active' one. + * + * DM_VERSION: + * Just get the version information for the ioctl interface. + * + * DM_REMOVE_ALL: + * Remove all dm devices, destroy all tables. Only really used + * for debug. + * + * DM_LIST_DEVICES: + * Get a list of all the dm device names. + * + * DM_DEV_CREATE: + * Create a new device, neither the 'active' or 'inactive' table + * slots will be filled. The device will be in suspended state + * after creation, however any io to the device will get errored + * since it will be out-of-bounds. + * + * DM_DEV_REMOVE: + * Remove a device, destroy any tables. + * + * DM_DEV_RENAME: + * Rename a device or set its uuid if none was previously supplied. + * + * DM_SUSPEND: + * This performs both suspend and resume, depending which flag is + * passed in. + * Suspend: This command will not return until all pending io to + * the device has completed. Further io will be deferred until + * the device is resumed. + * Resume: It is no longer an error to issue this command on an + * unsuspended device. If a table is present in the 'inactive' + * slot, it will be moved to the active slot, then the old table + * from the active slot will be _destroyed_. Finally the device + * is resumed. + * + * DM_DEV_STATUS: + * Retrieves the status for the table in the 'active' slot. + * + * DM_DEV_WAIT: + * Wait for a significant event to occur to the device. This + * could either be caused by an event triggered by one of the + * targets of the table in the 'active' slot, or a table change. + * + * DM_TABLE_LOAD: + * Load a table into the 'inactive' slot for the device. The + * device does _not_ need to be suspended prior to this command. + * + * DM_TABLE_CLEAR: + * Destroy any table in the 'inactive' slot (ie. abort). + * + * DM_TABLE_DEPS: + * Return a set of device dependencies for the 'active' table. + * + * DM_TABLE_STATUS: + * Return the targets status for the 'active' table. + * + * DM_TARGET_MSG: + * Pass a message string to the target at a specific offset of a device. + * + * DM_DEV_SET_GEOMETRY: + * Set the geometry of a device by passing in a string in this format: + * + * "cylinders heads sectors_per_track start_sector" + * + * Beware that CHS geometry is nearly obsolete and only provided + * for compatibility with dm devices that can be booted by a PC + * BIOS. See struct hd_geometry for range limits. Also note that + * the geometry is erased if the device size changes. + */ + +/* + * All ioctl arguments consist of a single chunk of memory, with + * this structure at the start. If a uuid is specified any + * lookup (eg. for a DM_INFO) will be done on that, *not* the + * name. + */ +struct dm_ioctl { + /* + * The version number is made up of three parts: + * major - no backward or forward compatibility, + * minor - only backwards compatible, + * patch - both backwards and forwards compatible. + * + * All clients of the ioctl interface should fill in the + * version number of the interface that they were + * compiled with. + * + * All recognised ioctl commands (ie. those that don't + * return -ENOTTY) fill out this field, even if the + * command failed. + */ + __u32 version[3]; /* in/out */ + __u32 data_size; /* total size of data passed in + * including this struct */ + + __u32 data_start; /* offset to start of data + * relative to start of this struct */ + + __u32 target_count; /* in/out */ + __s32 open_count; /* out */ + __u32 flags; /* in/out */ + + /* + * event_nr holds either the event number (input and output) or the + * udev cookie value (input only). + * The DM_DEV_WAIT ioctl takes an event number as input. + * The DM_SUSPEND, DM_DEV_REMOVE and DM_DEV_RENAME ioctls + * use the field as a cookie to return in the DM_COOKIE + * variable with the uevents they issue. + * For output, the ioctls return the event number, not the cookie. + */ + __u32 event_nr; /* in/out */ + __u32 padding; + + __u64 dev; /* in/out */ + + char name[DM_NAME_LEN]; /* device name */ + char uuid[DM_UUID_LEN]; /* unique identifier for + * the block device */ + char data[7]; /* padding or data */ +}; + +/* + * Used to specify tables. These structures appear after the + * dm_ioctl. + */ +struct dm_target_spec { + __u64 sector_start; + __u64 length; + __s32 status; /* used when reading from kernel only */ + + /* + * Location of the next dm_target_spec. + * - When specifying targets on a DM_TABLE_LOAD command, this value is + * the number of bytes from the start of the "current" dm_target_spec + * to the start of the "next" dm_target_spec. + * - When retrieving targets on a DM_TABLE_STATUS command, this value + * is the number of bytes from the start of the first dm_target_spec + * (that follows the dm_ioctl struct) to the start of the "next" + * dm_target_spec. + */ + __u32 next; + + char target_type[DM_MAX_TYPE_NAME]; + + /* + * Parameter string starts immediately after this object. + * Be careful to add padding after string to ensure correct + * alignment of subsequent dm_target_spec. + */ +}; + +/* + * Used to retrieve the target dependencies. + */ +struct dm_target_deps { + __u32 count; /* Array size */ + __u32 padding; /* unused */ + __u64 dev[0]; /* out */ +}; + +/* + * Used to get a list of all dm devices. + */ +struct dm_name_list { + __u64 dev; + __u32 next; /* offset to the next record from + the _start_ of this */ + char name[0]; +}; + +/* + * Used to retrieve the target versions + */ +struct dm_target_versions { + __u32 next; + __u32 version[3]; + + char name[0]; +}; + +/* + * Used to pass message to a target + */ +struct dm_target_msg { + __u64 sector; /* Device sector */ + + char message[0]; +}; + +/* + * If you change this make sure you make the corresponding change + * to dm-ioctl.c:lookup_ioctl() + */ +enum { + /* Top level cmds */ + DM_VERSION_CMD = 0, + DM_REMOVE_ALL_CMD, + DM_LIST_DEVICES_CMD, + + /* device level cmds */ + DM_DEV_CREATE_CMD, + DM_DEV_REMOVE_CMD, + DM_DEV_RENAME_CMD, + DM_DEV_SUSPEND_CMD, + DM_DEV_STATUS_CMD, + DM_DEV_WAIT_CMD, + + /* Table level cmds */ + DM_TABLE_LOAD_CMD, + DM_TABLE_CLEAR_CMD, + DM_TABLE_DEPS_CMD, + DM_TABLE_STATUS_CMD, + + /* Added later */ + DM_LIST_VERSIONS_CMD, + DM_TARGET_MSG_CMD, + DM_DEV_SET_GEOMETRY_CMD +}; + +#define DM_IOCTL 0xfd + +#define DM_VERSION _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl) +#define DM_REMOVE_ALL _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl) +#define DM_LIST_DEVICES _IOWR(DM_IOCTL, DM_LIST_DEVICES_CMD, struct dm_ioctl) + +#define DM_DEV_CREATE _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl) +#define DM_DEV_REMOVE _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl) +#define DM_DEV_RENAME _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl) +#define DM_DEV_SUSPEND _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl) +#define DM_DEV_STATUS _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl) +#define DM_DEV_WAIT _IOWR(DM_IOCTL, DM_DEV_WAIT_CMD, struct dm_ioctl) + +#define DM_TABLE_LOAD _IOWR(DM_IOCTL, DM_TABLE_LOAD_CMD, struct dm_ioctl) +#define DM_TABLE_CLEAR _IOWR(DM_IOCTL, DM_TABLE_CLEAR_CMD, struct dm_ioctl) +#define DM_TABLE_DEPS _IOWR(DM_IOCTL, DM_TABLE_DEPS_CMD, struct dm_ioctl) +#define DM_TABLE_STATUS _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, struct dm_ioctl) + +#define DM_LIST_VERSIONS _IOWR(DM_IOCTL, DM_LIST_VERSIONS_CMD, struct dm_ioctl) + +#define DM_TARGET_MSG _IOWR(DM_IOCTL, DM_TARGET_MSG_CMD, struct dm_ioctl) +#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) + +#define DM_VERSION_MAJOR 4 +#define DM_VERSION_MINOR 27 +#define DM_VERSION_PATCHLEVEL 0 +#define DM_VERSION_EXTRA "-ioctl (2013-10-30)" + +/* Status bits */ +#define DM_READONLY_FLAG (1 << 0) /* In/Out */ +#define DM_SUSPEND_FLAG (1 << 1) /* In/Out */ +#define DM_PERSISTENT_DEV_FLAG (1 << 3) /* In */ + +/* + * Flag passed into ioctl STATUS command to get table information + * rather than current status. + */ +#define DM_STATUS_TABLE_FLAG (1 << 4) /* In */ + +/* + * Flags that indicate whether a table is present in either of + * the two table slots that a device has. + */ +#define DM_ACTIVE_PRESENT_FLAG (1 << 5) /* Out */ +#define DM_INACTIVE_PRESENT_FLAG (1 << 6) /* Out */ + +/* + * Indicates that the buffer passed in wasn't big enough for the + * results. + */ +#define DM_BUFFER_FULL_FLAG (1 << 8) /* Out */ + +/* + * This flag is now ignored. + */ +#define DM_SKIP_BDGET_FLAG (1 << 9) /* In */ + +/* + * Set this to avoid attempting to freeze any filesystem when suspending. + */ +#define DM_SKIP_LOCKFS_FLAG (1 << 10) /* In */ + +/* + * Set this to suspend without flushing queued ios. + * Also disables flushing uncommitted changes in the thin target before + * generating statistics for DM_TABLE_STATUS and DM_DEV_WAIT. + */ +#define DM_NOFLUSH_FLAG (1 << 11) /* In */ + +/* + * If set, any table information returned will relate to the inactive + * table instead of the live one. Always check DM_INACTIVE_PRESENT_FLAG + * is set before using the data returned. + */ +#define DM_QUERY_INACTIVE_TABLE_FLAG (1 << 12) /* In */ + +/* + * If set, a uevent was generated for which the caller may need to wait. + */ +#define DM_UEVENT_GENERATED_FLAG (1 << 13) /* Out */ + +/* + * If set, rename changes the uuid not the name. Only permitted + * if no uuid was previously supplied: an existing uuid cannot be changed. + */ +#define DM_UUID_FLAG (1 << 14) /* In */ + +/* + * If set, all buffers are wiped after use. Use when sending + * or requesting sensitive data such as an encryption key. + */ +#define DM_SECURE_DATA_FLAG (1 << 15) /* In */ + +/* + * If set, a message generated output data. + */ +#define DM_DATA_OUT_FLAG (1 << 16) /* Out */ + +/* + * If set with DM_DEV_REMOVE or DM_REMOVE_ALL this indicates that if + * the device cannot be removed immediately because it is still in use + * it should instead be scheduled for removal when it gets closed. + * + * On return from DM_DEV_REMOVE, DM_DEV_STATUS or other ioctls, this + * flag indicates that the device is scheduled to be removed when it + * gets closed. + */ +#define DM_DEFERRED_REMOVE (1 << 17) /* In/Out */ + +#endif /* _LINUX_DM_IOCTL_H */ diff --git a/src/shared/linux/auto_dev-ioctl.h b/src/shared/linux/auto_dev-ioctl.h new file mode 100644 index 0000000..d9838eb --- /dev/null +++ b/src/shared/linux/auto_dev-ioctl.h @@ -0,0 +1,229 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright © 2008 Red Hat, Inc. All rights reserved. + * Copyright © 2008 Ian Kent <raven@themaw.net> + * + * This file is part of the Linux kernel and is made available under + * the terms of the GNU General Public License, version 2, or at your + * option, any later version, incorporated herein by reference. + */ + +#ifndef _LINUX_AUTO_DEV_IOCTL_H +#define _LINUX_AUTO_DEV_IOCTL_H + +#include <linux/auto_fs.h> + +#ifdef __KERNEL__ +#include <linux/string.h> +#else +#include <string.h> +#endif /* __KERNEL__ */ + +#define AUTOFS_DEVICE_NAME "autofs" + +#define AUTOFS_DEV_IOCTL_VERSION_MAJOR 1 +#define AUTOFS_DEV_IOCTL_VERSION_MINOR 0 + +#define AUTOFS_DEVID_LEN 16 + +#define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl) + +/* + * An ioctl interface for autofs mount point control. + */ + +struct args_protover { + __u32 version; +}; + +struct args_protosubver { + __u32 sub_version; +}; + +struct args_openmount { + __u32 devid; +}; + +struct args_ready { + __u32 token; +}; + +struct args_fail { + __u32 token; + __s32 status; +}; + +struct args_setpipefd { + __s32 pipefd; +}; + +struct args_timeout { + __u64 timeout; +}; + +struct args_requester { + __u32 uid; + __u32 gid; +}; + +struct args_expire { + __u32 how; +}; + +struct args_askumount { + __u32 may_umount; +}; + +struct args_ismountpoint { + union { + struct args_in { + __u32 type; + } in; + struct args_out { + __u32 devid; + __u32 magic; + } out; + }; +}; + +/* + * All the ioctls use this structure. + * When sending a path size must account for the total length + * of the chunk of memory otherwise is is the size of the + * structure. + */ + +struct autofs_dev_ioctl { + __u32 ver_major; + __u32 ver_minor; + __u32 size; /* total size of data passed in + * including this struct */ + __s32 ioctlfd; /* automount command fd */ + + /* Command parameters */ + + union { + struct args_protover protover; + struct args_protosubver protosubver; + struct args_openmount openmount; + struct args_ready ready; + struct args_fail fail; + struct args_setpipefd setpipefd; + struct args_timeout timeout; + struct args_requester requester; + struct args_expire expire; + struct args_askumount askumount; + struct args_ismountpoint ismountpoint; + }; + + char path[0]; +}; + +static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in) { + memset(in, 0, sizeof(struct autofs_dev_ioctl)); + in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR; + in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR; + in->size = sizeof(struct autofs_dev_ioctl); + in->ioctlfd = -1; + return; +} + +/* + * If you change this make sure you make the corresponding change + * to autofs-dev-ioctl.c:lookup_ioctl() + */ +enum { + /* Get various version info */ + AUTOFS_DEV_IOCTL_VERSION_CMD = 0x71, + AUTOFS_DEV_IOCTL_PROTOVER_CMD, + AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD, + + /* Open mount ioctl fd */ + AUTOFS_DEV_IOCTL_OPENMOUNT_CMD, + + /* Close mount ioctl fd */ + AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD, + + /* Mount/expire status returns */ + AUTOFS_DEV_IOCTL_READY_CMD, + AUTOFS_DEV_IOCTL_FAIL_CMD, + + /* Activate/deactivate autofs mount */ + AUTOFS_DEV_IOCTL_SETPIPEFD_CMD, + AUTOFS_DEV_IOCTL_CATATONIC_CMD, + + /* Expiry timeout */ + AUTOFS_DEV_IOCTL_TIMEOUT_CMD, + + /* Get mount last requesting uid and gid */ + AUTOFS_DEV_IOCTL_REQUESTER_CMD, + + /* Check for eligible expire candidates */ + AUTOFS_DEV_IOCTL_EXPIRE_CMD, + + /* Request busy status */ + AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD, + + /* Check if path is a mountpoint */ + AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD, +}; + +#define AUTOFS_IOCTL 0x93 + +#define AUTOFS_DEV_IOCTL_VERSION \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_VERSION_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_PROTOVER \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_PROTOVER_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_PROTOSUBVER \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_OPENMOUNT \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_OPENMOUNT_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_CLOSEMOUNT \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_READY \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_READY_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_FAIL \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_FAIL_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_SETPIPEFD \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_SETPIPEFD_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_CATATONIC \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_CATATONIC_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_TIMEOUT \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_TIMEOUT_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_REQUESTER \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_REQUESTER_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_EXPIRE \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_EXPIRE_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_ASKUMOUNT \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD, struct autofs_dev_ioctl) + +#define AUTOFS_DEV_IOCTL_ISMOUNTPOINT \ + _IOWR(AUTOFS_IOCTL, \ + AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD, struct autofs_dev_ioctl) + +#endif /* _LINUX_AUTO_DEV_IOCTL_H */ diff --git a/src/shared/linux/bpf.h b/src/shared/linux/bpf.h new file mode 100644 index 0000000..1df9e7e --- /dev/null +++ b/src/shared/linux/bpf.h @@ -0,0 +1,1109 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef _UAPI__LINUX_BPF_H__ +#define _UAPI__LINUX_BPF_H__ + +#include <linux/types.h> +#include <linux/bpf_common.h> + +/* Extended instruction set based on top of classic BPF */ + +/* instruction classes */ +#define BPF_ALU64 0x07 /* alu mode in double word width */ + +/* ld/ldx fields */ +#define BPF_DW 0x18 /* double word (64-bit) */ +#define BPF_XADD 0xc0 /* exclusive add */ + +/* alu/jmp fields */ +#define BPF_MOV 0xb0 /* mov reg to reg */ +#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */ + +/* change endianness of a register */ +#define BPF_END 0xd0 /* flags for endianness conversion: */ +#define BPF_TO_LE 0x00 /* convert to little-endian */ +#define BPF_TO_BE 0x08 /* convert to big-endian */ +#define BPF_FROM_LE BPF_TO_LE +#define BPF_FROM_BE BPF_TO_BE + +/* jmp encodings */ +#define BPF_JNE 0x50 /* jump != */ +#define BPF_JLT 0xa0 /* LT is unsigned, '<' */ +#define BPF_JLE 0xb0 /* LE is unsigned, '<=' */ +#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ +#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ +#define BPF_JSLT 0xc0 /* SLT is signed, '<' */ +#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */ +#define BPF_CALL 0x80 /* function call */ +#define BPF_EXIT 0x90 /* function return */ + +/* Register numbers */ +enum { + BPF_REG_0 = 0, + BPF_REG_1, + BPF_REG_2, + BPF_REG_3, + BPF_REG_4, + BPF_REG_5, + BPF_REG_6, + BPF_REG_7, + BPF_REG_8, + BPF_REG_9, + BPF_REG_10, + __MAX_BPF_REG, +}; + +/* BPF has 10 general purpose 64-bit registers and stack frame. */ +#define MAX_BPF_REG __MAX_BPF_REG + +struct bpf_insn { + __u8 code; /* opcode */ + __u8 dst_reg:4; /* dest register */ + __u8 src_reg:4; /* source register */ + __s16 off; /* signed offset */ + __s32 imm; /* signed immediate constant */ +}; + +/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ +struct bpf_lpm_trie_key { + __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ + __u8 data[0]; /* Arbitrary size */ +}; + +/* BPF syscall commands, see bpf(2) man-page for details. */ +enum bpf_cmd { + BPF_MAP_CREATE, + BPF_MAP_LOOKUP_ELEM, + BPF_MAP_UPDATE_ELEM, + BPF_MAP_DELETE_ELEM, + BPF_MAP_GET_NEXT_KEY, + BPF_PROG_LOAD, + BPF_OBJ_PIN, + BPF_OBJ_GET, + BPF_PROG_ATTACH, + BPF_PROG_DETACH, + BPF_PROG_TEST_RUN, + BPF_PROG_GET_NEXT_ID, + BPF_MAP_GET_NEXT_ID, + BPF_PROG_GET_FD_BY_ID, + BPF_MAP_GET_FD_BY_ID, + BPF_OBJ_GET_INFO_BY_FD, + BPF_PROG_QUERY, +}; + +enum bpf_map_type { + BPF_MAP_TYPE_UNSPEC, + BPF_MAP_TYPE_HASH, + BPF_MAP_TYPE_ARRAY, + BPF_MAP_TYPE_PROG_ARRAY, + BPF_MAP_TYPE_PERF_EVENT_ARRAY, + BPF_MAP_TYPE_PERCPU_HASH, + BPF_MAP_TYPE_PERCPU_ARRAY, + BPF_MAP_TYPE_STACK_TRACE, + BPF_MAP_TYPE_CGROUP_ARRAY, + BPF_MAP_TYPE_LRU_HASH, + BPF_MAP_TYPE_LRU_PERCPU_HASH, + BPF_MAP_TYPE_LPM_TRIE, + BPF_MAP_TYPE_ARRAY_OF_MAPS, + BPF_MAP_TYPE_HASH_OF_MAPS, + BPF_MAP_TYPE_DEVMAP, + BPF_MAP_TYPE_SOCKMAP, + BPF_MAP_TYPE_CPUMAP, +}; + +enum bpf_prog_type { + BPF_PROG_TYPE_UNSPEC, + BPF_PROG_TYPE_SOCKET_FILTER, + BPF_PROG_TYPE_KPROBE, + BPF_PROG_TYPE_SCHED_CLS, + BPF_PROG_TYPE_SCHED_ACT, + BPF_PROG_TYPE_TRACEPOINT, + BPF_PROG_TYPE_XDP, + BPF_PROG_TYPE_PERF_EVENT, + BPF_PROG_TYPE_CGROUP_SKB, + BPF_PROG_TYPE_CGROUP_SOCK, + BPF_PROG_TYPE_LWT_IN, + BPF_PROG_TYPE_LWT_OUT, + BPF_PROG_TYPE_LWT_XMIT, + BPF_PROG_TYPE_SOCK_OPS, + BPF_PROG_TYPE_SK_SKB, + BPF_PROG_TYPE_CGROUP_DEVICE, +}; + +enum bpf_attach_type { + BPF_CGROUP_INET_INGRESS, + BPF_CGROUP_INET_EGRESS, + BPF_CGROUP_INET_SOCK_CREATE, + BPF_CGROUP_SOCK_OPS, + BPF_SK_SKB_STREAM_PARSER, + BPF_SK_SKB_STREAM_VERDICT, + BPF_CGROUP_DEVICE, + __MAX_BPF_ATTACH_TYPE +}; + +#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE + +/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command + * + * NONE(default): No further bpf programs allowed in the subtree. + * + * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, + * the program in this cgroup yields to sub-cgroup program. + * + * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, + * that cgroup program gets run in addition to the program in this cgroup. + * + * Only one program is allowed to be attached to a cgroup with + * NONE or BPF_F_ALLOW_OVERRIDE flag. + * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will + * release old program and attach the new one. Attach flags has to match. + * + * Multiple programs are allowed to be attached to a cgroup with + * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order + * (those that were attached first, run first) + * The programs of sub-cgroup are executed first, then programs of + * this cgroup and then programs of parent cgroup. + * When children program makes decision (like picking TCP CA or sock bind) + * parent program has a chance to override it. + * + * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. + * A cgroup with NONE doesn't allow any programs in sub-cgroups. + * Ex1: + * cgrp1 (MULTI progs A, B) -> + * cgrp2 (OVERRIDE prog C) -> + * cgrp3 (MULTI prog D) -> + * cgrp4 (OVERRIDE prog E) -> + * cgrp5 (NONE prog F) + * the event in cgrp5 triggers execution of F,D,A,B in that order. + * if prog F is detached, the execution is E,D,A,B + * if prog F and D are detached, the execution is E,A,B + * if prog F, E and D are detached, the execution is C,A,B + * + * All eligible programs are executed regardless of return code from + * earlier programs. + */ +#define BPF_F_ALLOW_OVERRIDE (1U << 0) +#define BPF_F_ALLOW_MULTI (1U << 1) + +/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the + * verifier will perform strict alignment checking as if the kernel + * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set, + * and NET_IP_ALIGN defined to 2. + */ +#define BPF_F_STRICT_ALIGNMENT (1U << 0) + +/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ +#define BPF_PSEUDO_MAP_FD 1 + +/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative + * offset to another bpf function + */ +#define BPF_PSEUDO_CALL 1 + +/* flags for BPF_MAP_UPDATE_ELEM command */ +#define BPF_ANY 0 /* create new element or update existing */ +#define BPF_NOEXIST 1 /* create new element if it didn't exist */ +#define BPF_EXIST 2 /* update existing element */ + +/* flags for BPF_MAP_CREATE command */ +#define BPF_F_NO_PREALLOC (1U << 0) +/* Instead of having one common LRU list in the + * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list + * which can scale and perform better. + * Note, the LRU nodes (including free nodes) cannot be moved + * across different LRU lists. + */ +#define BPF_F_NO_COMMON_LRU (1U << 1) +/* Specify numa node during map creation */ +#define BPF_F_NUMA_NODE (1U << 2) + +/* flags for BPF_PROG_QUERY */ +#define BPF_F_QUERY_EFFECTIVE (1U << 0) + +#define BPF_OBJ_NAME_LEN 16U + +/* Flags for accessing BPF object */ +#define BPF_F_RDONLY (1U << 3) +#define BPF_F_WRONLY (1U << 4) + +union bpf_attr { + struct { /* anonymous struct used by BPF_MAP_CREATE command */ + __u32 map_type; /* one of enum bpf_map_type */ + __u32 key_size; /* size of key in bytes */ + __u32 value_size; /* size of value in bytes */ + __u32 max_entries; /* max number of entries in a map */ + __u32 map_flags; /* BPF_MAP_CREATE related + * flags defined above. + */ + __u32 inner_map_fd; /* fd pointing to the inner map */ + __u32 numa_node; /* numa node (effective only if + * BPF_F_NUMA_NODE is set). + */ + char map_name[BPF_OBJ_NAME_LEN]; + __u32 map_ifindex; /* ifindex of netdev to create on */ + }; + + struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ + __u32 map_fd; + __aligned_u64 key; + union { + __aligned_u64 value; + __aligned_u64 next_key; + }; + __u64 flags; + }; + + struct { /* anonymous struct used by BPF_PROG_LOAD command */ + __u32 prog_type; /* one of enum bpf_prog_type */ + __u32 insn_cnt; + __aligned_u64 insns; + __aligned_u64 license; + __u32 log_level; /* verbosity level of verifier */ + __u32 log_size; /* size of user buffer */ + __aligned_u64 log_buf; /* user supplied buffer */ + __u32 kern_version; /* checked when prog_type=kprobe */ + __u32 prog_flags; + char prog_name[BPF_OBJ_NAME_LEN]; + __u32 prog_ifindex; /* ifindex of netdev to prep for */ + }; + + struct { /* anonymous struct used by BPF_OBJ_* commands */ + __aligned_u64 pathname; + __u32 bpf_fd; + __u32 file_flags; + }; + + struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ + __u32 target_fd; /* container object to attach to */ + __u32 attach_bpf_fd; /* eBPF program to attach */ + __u32 attach_type; + __u32 attach_flags; + }; + + struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ + __u32 prog_fd; + __u32 retval; + __u32 data_size_in; + __u32 data_size_out; + __aligned_u64 data_in; + __aligned_u64 data_out; + __u32 repeat; + __u32 duration; + } test; + + struct { /* anonymous struct used by BPF_*_GET_*_ID */ + union { + __u32 start_id; + __u32 prog_id; + __u32 map_id; + }; + __u32 next_id; + __u32 open_flags; + }; + + struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ + __u32 bpf_fd; + __u32 info_len; + __aligned_u64 info; + } info; + + struct { /* anonymous struct used by BPF_PROG_QUERY command */ + __u32 target_fd; /* container object to query */ + __u32 attach_type; + __u32 query_flags; + __u32 attach_flags; + __aligned_u64 prog_ids; + __u32 prog_cnt; + } query; +} __attribute__((aligned(8))); + +/* BPF helper function descriptions: + * + * void *bpf_map_lookup_elem(&map, &key) + * Return: Map value or NULL + * + * int bpf_map_update_elem(&map, &key, &value, flags) + * Return: 0 on success or negative error + * + * int bpf_map_delete_elem(&map, &key) + * Return: 0 on success or negative error + * + * int bpf_probe_read(void *dst, int size, void *src) + * Return: 0 on success or negative error + * + * u64 bpf_ktime_get_ns(void) + * Return: current ktime + * + * int bpf_trace_printk(const char *fmt, int fmt_size, ...) + * Return: length of buffer written or negative error + * + * u32 bpf_prandom_u32(void) + * Return: random value + * + * u32 bpf_raw_smp_processor_id(void) + * Return: SMP processor ID + * + * int bpf_skb_store_bytes(skb, offset, from, len, flags) + * store bytes into packet + * @skb: pointer to skb + * @offset: offset within packet from skb->mac_header + * @from: pointer where to copy bytes from + * @len: number of bytes to store into packet + * @flags: bit 0 - if true, recompute skb->csum + * other bits - reserved + * Return: 0 on success or negative error + * + * int bpf_l3_csum_replace(skb, offset, from, to, flags) + * recompute IP checksum + * @skb: pointer to skb + * @offset: offset within packet where IP checksum is located + * @from: old value of header field + * @to: new value of header field + * @flags: bits 0-3 - size of header field + * other bits - reserved + * Return: 0 on success or negative error + * + * int bpf_l4_csum_replace(skb, offset, from, to, flags) + * recompute TCP/UDP checksum + * @skb: pointer to skb + * @offset: offset within packet where TCP/UDP checksum is located + * @from: old value of header field + * @to: new value of header field + * @flags: bits 0-3 - size of header field + * bit 4 - is pseudo header + * other bits - reserved + * Return: 0 on success or negative error + * + * int bpf_tail_call(ctx, prog_array_map, index) + * jump into another BPF program + * @ctx: context pointer passed to next program + * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY + * @index: 32-bit index inside array that selects specific program to run + * Return: 0 on success or negative error + * + * int bpf_clone_redirect(skb, ifindex, flags) + * redirect to another netdev + * @skb: pointer to skb + * @ifindex: ifindex of the net device + * @flags: bit 0 - if set, redirect to ingress instead of egress + * other bits - reserved + * Return: 0 on success or negative error + * + * u64 bpf_get_current_pid_tgid(void) + * Return: current->tgid << 32 | current->pid + * + * u64 bpf_get_current_uid_gid(void) + * Return: current_gid << 32 | current_uid + * + * int bpf_get_current_comm(char *buf, int size_of_buf) + * stores current->comm into buf + * Return: 0 on success or negative error + * + * u32 bpf_get_cgroup_classid(skb) + * retrieve a proc's classid + * @skb: pointer to skb + * Return: classid if != 0 + * + * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) + * Return: 0 on success or negative error + * + * int bpf_skb_vlan_pop(skb) + * Return: 0 on success or negative error + * + * int bpf_skb_get_tunnel_key(skb, key, size, flags) + * int bpf_skb_set_tunnel_key(skb, key, size, flags) + * retrieve or populate tunnel metadata + * @skb: pointer to skb + * @key: pointer to 'struct bpf_tunnel_key' + * @size: size of 'struct bpf_tunnel_key' + * @flags: room for future extensions + * Return: 0 on success or negative error + * + * u64 bpf_perf_event_read(map, flags) + * read perf event counter value + * @map: pointer to perf_event_array map + * @flags: index of event in the map or bitmask flags + * Return: value of perf event counter read or error code + * + * int bpf_redirect(ifindex, flags) + * redirect to another netdev + * @ifindex: ifindex of the net device + * @flags: + * cls_bpf: + * bit 0 - if set, redirect to ingress instead of egress + * other bits - reserved + * xdp_bpf: + * all bits - reserved + * Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error + * xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error + * int bpf_redirect_map(map, key, flags) + * redirect to endpoint in map + * @map: pointer to dev map + * @key: index in map to lookup + * @flags: -- + * Return: XDP_REDIRECT on success or XDP_ABORT on error + * + * u32 bpf_get_route_realm(skb) + * retrieve a dst's tclassid + * @skb: pointer to skb + * Return: realm if != 0 + * + * int bpf_perf_event_output(ctx, map, flags, data, size) + * output perf raw sample + * @ctx: struct pt_regs* + * @map: pointer to perf_event_array map + * @flags: index of event in the map or bitmask flags + * @data: data on stack to be output as raw data + * @size: size of data + * Return: 0 on success or negative error + * + * int bpf_get_stackid(ctx, map, flags) + * walk user or kernel stack and return id + * @ctx: struct pt_regs* + * @map: pointer to stack_trace map + * @flags: bits 0-7 - numer of stack frames to skip + * bit 8 - collect user stack instead of kernel + * bit 9 - compare stacks by hash only + * bit 10 - if two different stacks hash into the same stackid + * discard old + * other bits - reserved + * Return: >= 0 stackid on success or negative error + * + * s64 bpf_csum_diff(from, from_size, to, to_size, seed) + * calculate csum diff + * @from: raw from buffer + * @from_size: length of from buffer + * @to: raw to buffer + * @to_size: length of to buffer + * @seed: optional seed + * Return: csum result or negative error code + * + * int bpf_skb_get_tunnel_opt(skb, opt, size) + * retrieve tunnel options metadata + * @skb: pointer to skb + * @opt: pointer to raw tunnel option data + * @size: size of @opt + * Return: option size + * + * int bpf_skb_set_tunnel_opt(skb, opt, size) + * populate tunnel options metadata + * @skb: pointer to skb + * @opt: pointer to raw tunnel option data + * @size: size of @opt + * Return: 0 on success or negative error + * + * int bpf_skb_change_proto(skb, proto, flags) + * Change protocol of the skb. Currently supported is v4 -> v6, + * v6 -> v4 transitions. The helper will also resize the skb. eBPF + * program is expected to fill the new headers via skb_store_bytes + * and lX_csum_replace. + * @skb: pointer to skb + * @proto: new skb->protocol type + * @flags: reserved + * Return: 0 on success or negative error + * + * int bpf_skb_change_type(skb, type) + * Change packet type of skb. + * @skb: pointer to skb + * @type: new skb->pkt_type type + * Return: 0 on success or negative error + * + * int bpf_skb_under_cgroup(skb, map, index) + * Check cgroup2 membership of skb + * @skb: pointer to skb + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 skb failed the cgroup2 descendant test + * == 1 skb succeeded the cgroup2 descendant test + * < 0 error + * + * u32 bpf_get_hash_recalc(skb) + * Retrieve and possibly recalculate skb->hash. + * @skb: pointer to skb + * Return: hash + * + * u64 bpf_get_current_task(void) + * Returns current task_struct + * Return: current + * + * int bpf_probe_write_user(void *dst, void *src, int len) + * safely attempt to write to a location + * @dst: destination address in userspace + * @src: source address on stack + * @len: number of bytes to copy + * Return: 0 on success or negative error + * + * int bpf_current_task_under_cgroup(map, index) + * Check cgroup2 membership of current task + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 current failed the cgroup2 descendant test + * == 1 current succeeded the cgroup2 descendant test + * < 0 error + * + * int bpf_skb_change_tail(skb, len, flags) + * The helper will resize the skb to the given new size, to be used f.e. + * with control messages. + * @skb: pointer to skb + * @len: new skb length + * @flags: reserved + * Return: 0 on success or negative error + * + * int bpf_skb_pull_data(skb, len) + * The helper will pull in non-linear data in case the skb is non-linear + * and not all of len are part of the linear section. Only needed for + * read/write with direct packet access. + * @skb: pointer to skb + * @len: len to make read/writeable + * Return: 0 on success or negative error + * + * s64 bpf_csum_update(skb, csum) + * Adds csum into skb->csum in case of CHECKSUM_COMPLETE. + * @skb: pointer to skb + * @csum: csum to add + * Return: csum on success or negative error + * + * void bpf_set_hash_invalid(skb) + * Invalidate current skb->hash. + * @skb: pointer to skb + * + * int bpf_get_numa_node_id() + * Return: Id of current NUMA node. + * + * int bpf_skb_change_head() + * Grows headroom of skb and adjusts MAC header offset accordingly. + * Will extends/reallocae as required automatically. + * May change skb data pointer and will thus invalidate any check + * performed for direct packet access. + * @skb: pointer to skb + * @len: length of header to be pushed in front + * @flags: Flags (unused for now) + * Return: 0 on success or negative error + * + * int bpf_xdp_adjust_head(xdp_md, delta) + * Adjust the xdp_md.data by delta + * @xdp_md: pointer to xdp_md + * @delta: An positive/negative integer to be added to xdp_md.data + * Return: 0 on success or negative on error + * + * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr) + * Copy a NUL terminated string from unsafe address. In case the string + * length is smaller than size, the target is not padded with further NUL + * bytes. In case the string length is larger than size, just count-1 + * bytes are copied and the last byte is set to NUL. + * @dst: destination address + * @size: maximum number of bytes to copy, including the trailing NUL + * @unsafe_ptr: unsafe address + * Return: + * > 0 length of the string including the trailing NUL on success + * < 0 error + * + * u64 bpf_get_socket_cookie(skb) + * Get the cookie for the socket stored inside sk_buff. + * @skb: pointer to skb + * Return: 8 Bytes non-decreasing number on success or 0 if the socket + * field is missing inside sk_buff + * + * u32 bpf_get_socket_uid(skb) + * Get the owner uid of the socket stored inside sk_buff. + * @skb: pointer to skb + * Return: uid of the socket owner on success or overflowuid if failed. + * + * u32 bpf_set_hash(skb, hash) + * Set full skb->hash. + * @skb: pointer to skb + * @hash: hash to set + * + * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen) + * Calls setsockopt. Not all opts are available, only those with + * integer optvals plus TCP_CONGESTION. + * Supported levels: SOL_SOCKET and IPPROTO_TCP + * @bpf_socket: pointer to bpf_socket + * @level: SOL_SOCKET or IPPROTO_TCP + * @optname: option name + * @optval: pointer to option value + * @optlen: length of optval in bytes + * Return: 0 or negative error + * + * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen) + * Calls getsockopt. Not all opts are available. + * Supported levels: IPPROTO_TCP + * @bpf_socket: pointer to bpf_socket + * @level: IPPROTO_TCP + * @optname: option name + * @optval: pointer to option value + * @optlen: length of optval in bytes + * Return: 0 or negative error + * + * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags) + * Set callback flags for sock_ops + * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct + * @flags: flags value + * Return: 0 for no error + * -EINVAL if there is no full tcp socket + * bits in flags that are not supported by current kernel + * + * int bpf_skb_adjust_room(skb, len_diff, mode, flags) + * Grow or shrink room in sk_buff. + * @skb: pointer to skb + * @len_diff: (signed) amount of room to grow/shrink + * @mode: operation mode (enum bpf_adj_room_mode) + * @flags: reserved for future use + * Return: 0 on success or negative error code + * + * int bpf_sk_redirect_map(map, key, flags) + * Redirect skb to a sock in map using key as a lookup key for the + * sock in map. + * @map: pointer to sockmap + * @key: key to lookup sock in map + * @flags: reserved for future use + * Return: SK_PASS + * + * int bpf_sock_map_update(skops, map, key, flags) + * @skops: pointer to bpf_sock_ops + * @map: pointer to sockmap to update + * @key: key to insert/update sock in map + * @flags: same flags as map update elem + * + * int bpf_xdp_adjust_meta(xdp_md, delta) + * Adjust the xdp_md.data_meta by delta + * @xdp_md: pointer to xdp_md + * @delta: An positive/negative integer to be added to xdp_md.data_meta + * Return: 0 on success or negative on error + * + * int bpf_perf_event_read_value(map, flags, buf, buf_size) + * read perf event counter value and perf event enabled/running time + * @map: pointer to perf_event_array map + * @flags: index of event in the map or bitmask flags + * @buf: buf to fill + * @buf_size: size of the buf + * Return: 0 on success or negative error code + * + * int bpf_perf_prog_read_value(ctx, buf, buf_size) + * read perf prog attached perf event counter and enabled/running time + * @ctx: pointer to ctx + * @buf: buf to fill + * @buf_size: size of the buf + * Return : 0 on success or negative error code + * + * int bpf_override_return(pt_regs, rc) + * @pt_regs: pointer to struct pt_regs + * @rc: the return value to set + */ +#define __BPF_FUNC_MAPPER(FN) \ + FN(unspec), \ + FN(map_lookup_elem), \ + FN(map_update_elem), \ + FN(map_delete_elem), \ + FN(probe_read), \ + FN(ktime_get_ns), \ + FN(trace_printk), \ + FN(get_prandom_u32), \ + FN(get_smp_processor_id), \ + FN(skb_store_bytes), \ + FN(l3_csum_replace), \ + FN(l4_csum_replace), \ + FN(tail_call), \ + FN(clone_redirect), \ + FN(get_current_pid_tgid), \ + FN(get_current_uid_gid), \ + FN(get_current_comm), \ + FN(get_cgroup_classid), \ + FN(skb_vlan_push), \ + FN(skb_vlan_pop), \ + FN(skb_get_tunnel_key), \ + FN(skb_set_tunnel_key), \ + FN(perf_event_read), \ + FN(redirect), \ + FN(get_route_realm), \ + FN(perf_event_output), \ + FN(skb_load_bytes), \ + FN(get_stackid), \ + FN(csum_diff), \ + FN(skb_get_tunnel_opt), \ + FN(skb_set_tunnel_opt), \ + FN(skb_change_proto), \ + FN(skb_change_type), \ + FN(skb_under_cgroup), \ + FN(get_hash_recalc), \ + FN(get_current_task), \ + FN(probe_write_user), \ + FN(current_task_under_cgroup), \ + FN(skb_change_tail), \ + FN(skb_pull_data), \ + FN(csum_update), \ + FN(set_hash_invalid), \ + FN(get_numa_node_id), \ + FN(skb_change_head), \ + FN(xdp_adjust_head), \ + FN(probe_read_str), \ + FN(get_socket_cookie), \ + FN(get_socket_uid), \ + FN(set_hash), \ + FN(setsockopt), \ + FN(skb_adjust_room), \ + FN(redirect_map), \ + FN(sk_redirect_map), \ + FN(sock_map_update), \ + FN(xdp_adjust_meta), \ + FN(perf_event_read_value), \ + FN(perf_prog_read_value), \ + FN(getsockopt), \ + FN(override_return), \ + FN(sock_ops_cb_flags_set), + +/* integer value in 'imm' field of BPF_CALL instruction selects which helper + * function eBPF program intends to call + */ +#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x +enum bpf_func_id { + __BPF_FUNC_MAPPER(__BPF_ENUM_FN) + __BPF_FUNC_MAX_ID, +}; +#undef __BPF_ENUM_FN + +/* All flags used by eBPF helper functions, placed here. */ + +/* BPF_FUNC_skb_store_bytes flags. */ +#define BPF_F_RECOMPUTE_CSUM (1ULL << 0) +#define BPF_F_INVALIDATE_HASH (1ULL << 1) + +/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags. + * First 4 bits are for passing the header field size. + */ +#define BPF_F_HDR_FIELD_MASK 0xfULL + +/* BPF_FUNC_l4_csum_replace flags. */ +#define BPF_F_PSEUDO_HDR (1ULL << 4) +#define BPF_F_MARK_MANGLED_0 (1ULL << 5) +#define BPF_F_MARK_ENFORCE (1ULL << 6) + +/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ +#define BPF_F_INGRESS (1ULL << 0) + +/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ +#define BPF_F_TUNINFO_IPV6 (1ULL << 0) + +/* BPF_FUNC_get_stackid flags. */ +#define BPF_F_SKIP_FIELD_MASK 0xffULL +#define BPF_F_USER_STACK (1ULL << 8) +#define BPF_F_FAST_STACK_CMP (1ULL << 9) +#define BPF_F_REUSE_STACKID (1ULL << 10) + +/* BPF_FUNC_skb_set_tunnel_key flags. */ +#define BPF_F_ZERO_CSUM_TX (1ULL << 1) +#define BPF_F_DONT_FRAGMENT (1ULL << 2) + +/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and + * BPF_FUNC_perf_event_read_value flags. + */ +#define BPF_F_INDEX_MASK 0xffffffffULL +#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK +/* BPF_FUNC_perf_event_output for sk_buff input context. */ +#define BPF_F_CTXLEN_MASK (0xfffffULL << 32) + +/* Mode for BPF_FUNC_skb_adjust_room helper. */ +enum bpf_adj_room_mode { + BPF_ADJ_ROOM_NET, +}; + +/* user accessible mirror of in-kernel sk_buff. + * new fields can only be added to the end of this structure + */ +struct __sk_buff { + __u32 len; + __u32 pkt_type; + __u32 mark; + __u32 queue_mapping; + __u32 protocol; + __u32 vlan_present; + __u32 vlan_tci; + __u32 vlan_proto; + __u32 priority; + __u32 ingress_ifindex; + __u32 ifindex; + __u32 tc_index; + __u32 cb[5]; + __u32 hash; + __u32 tc_classid; + __u32 data; + __u32 data_end; + __u32 napi_id; + + /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */ + __u32 family; + __u32 remote_ip4; /* Stored in network byte order */ + __u32 local_ip4; /* Stored in network byte order */ + __u32 remote_ip6[4]; /* Stored in network byte order */ + __u32 local_ip6[4]; /* Stored in network byte order */ + __u32 remote_port; /* Stored in network byte order */ + __u32 local_port; /* stored in host byte order */ + /* ... here. */ + + __u32 data_meta; +}; + +struct bpf_tunnel_key { + __u32 tunnel_id; + union { + __u32 remote_ipv4; + __u32 remote_ipv6[4]; + }; + __u8 tunnel_tos; + __u8 tunnel_ttl; + __u16 tunnel_ext; + __u32 tunnel_label; +}; + +/* Generic BPF return codes which all BPF program types may support. + * The values are binary compatible with their TC_ACT_* counter-part to + * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT + * programs. + * + * XDP is handled seprately, see XDP_*. + */ +enum bpf_ret_code { + BPF_OK = 0, + /* 1 reserved */ + BPF_DROP = 2, + /* 3-6 reserved */ + BPF_REDIRECT = 7, + /* >127 are reserved for prog type specific return codes */ +}; + +struct bpf_sock { + __u32 bound_dev_if; + __u32 family; + __u32 type; + __u32 protocol; + __u32 mark; + __u32 priority; +}; + +#define XDP_PACKET_HEADROOM 256 + +/* User return codes for XDP prog type. + * A valid XDP program must return one of these defined values. All other + * return codes are reserved for future use. Unknown return codes will + * result in packet drops and a warning via bpf_warn_invalid_xdp_action(). + */ +enum xdp_action { + XDP_ABORTED = 0, + XDP_DROP, + XDP_PASS, + XDP_TX, + XDP_REDIRECT, +}; + +/* user accessible metadata for XDP packet hook + * new fields must be added to the end of this structure + */ +struct xdp_md { + __u32 data; + __u32 data_end; + __u32 data_meta; + /* Below access go through struct xdp_rxq_info */ + __u32 ingress_ifindex; /* rxq->dev->ifindex */ + __u32 rx_queue_index; /* rxq->queue_index */ +}; + +enum sk_action { + SK_DROP = 0, + SK_PASS, +}; + +#define BPF_TAG_SIZE 8 + +struct bpf_prog_info { + __u32 type; + __u32 id; + __u8 tag[BPF_TAG_SIZE]; + __u32 jited_prog_len; + __u32 xlated_prog_len; + __aligned_u64 jited_prog_insns; + __aligned_u64 xlated_prog_insns; + __u64 load_time; /* ns since boottime */ + __u32 created_by_uid; + __u32 nr_map_ids; + __aligned_u64 map_ids; + char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u64 netns_dev; + __u64 netns_ino; +} __attribute__((aligned(8))); + +struct bpf_map_info { + __u32 type; + __u32 id; + __u32 key_size; + __u32 value_size; + __u32 max_entries; + __u32 map_flags; + char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u64 netns_dev; + __u64 netns_ino; +} __attribute__((aligned(8))); + +/* User bpf_sock_ops struct to access socket values and specify request ops + * and their replies. + * Some of this fields are in network (bigendian) byte order and may need + * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h). + * New fields can only be added at the end of this structure + */ +struct bpf_sock_ops { + __u32 op; + union { + __u32 args[4]; /* Optionally passed to bpf program */ + __u32 reply; /* Returned by bpf program */ + __u32 replylong[4]; /* Optionally returned by bpf prog */ + }; + __u32 family; + __u32 remote_ip4; /* Stored in network byte order */ + __u32 local_ip4; /* Stored in network byte order */ + __u32 remote_ip6[4]; /* Stored in network byte order */ + __u32 local_ip6[4]; /* Stored in network byte order */ + __u32 remote_port; /* Stored in network byte order */ + __u32 local_port; /* stored in host byte order */ + __u32 is_fullsock; /* Some TCP fields are only valid if + * there is a full socket. If not, the + * fields read as zero. + */ + __u32 snd_cwnd; + __u32 srtt_us; /* Averaged RTT << 3 in usecs */ + __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */ + __u32 state; + __u32 rtt_min; + __u32 snd_ssthresh; + __u32 rcv_nxt; + __u32 snd_nxt; + __u32 snd_una; + __u32 mss_cache; + __u32 ecn_flags; + __u32 rate_delivered; + __u32 rate_interval_us; + __u32 packets_out; + __u32 retrans_out; + __u32 total_retrans; + __u32 segs_in; + __u32 data_segs_in; + __u32 segs_out; + __u32 data_segs_out; + __u32 lost_out; + __u32 sacked_out; + __u32 sk_txhash; + __u64 bytes_received; + __u64 bytes_acked; +}; + +/* Definitions for bpf_sock_ops_cb_flags */ +#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) +#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) +#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) +#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently + * supported cb flags + */ + +/* List of known BPF sock_ops operators. + * New entries can only be added at the end + */ +enum { + BPF_SOCK_OPS_VOID, + BPF_SOCK_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or + * -1 if default value should be used + */ + BPF_SOCK_OPS_RWND_INIT, /* Should return initial advertized + * window (in packets) or -1 if default + * value should be used + */ + BPF_SOCK_OPS_TCP_CONNECT_CB, /* Calls BPF program right before an + * active connection is initialized + */ + BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, /* Calls BPF program when an + * active connection is + * established + */ + BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, /* Calls BPF program when a + * passive connection is + * established + */ + BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control + * needs ECN + */ + BPF_SOCK_OPS_BASE_RTT, /* Get base RTT. The correct value is + * based on the path and may be + * dependent on the congestion control + * algorithm. In general it indicates + * a congestion threshold. RTTs above + * this indicate congestion + */ + BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered. + * Arg1: value of icsk_retransmits + * Arg2: value of icsk_rto + * Arg3: whether RTO has expired + */ + BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted. + * Arg1: sequence number of 1st byte + * Arg2: # segments + * Arg3: return value of + * tcp_transmit_skb (0 => success) + */ + BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state. + * Arg1: old_state + * Arg2: new_state + */ +}; + +/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect + * changes between the TCP and BPF versions. Ideally this should never happen. + * If it does, we need to add code to convert them before calling + * the BPF sock_ops function. + */ +enum { + BPF_TCP_ESTABLISHED = 1, + BPF_TCP_SYN_SENT, + BPF_TCP_SYN_RECV, + BPF_TCP_FIN_WAIT1, + BPF_TCP_FIN_WAIT2, + BPF_TCP_TIME_WAIT, + BPF_TCP_CLOSE, + BPF_TCP_CLOSE_WAIT, + BPF_TCP_LAST_ACK, + BPF_TCP_LISTEN, + BPF_TCP_CLOSING, /* Now a valid state */ + BPF_TCP_NEW_SYN_RECV, + + BPF_TCP_MAX_STATES /* Leave at the end! */ +}; + +#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ +#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ + +struct bpf_perf_event_value { + __u64 counter; + __u64 enabled; + __u64 running; +}; + +#define BPF_DEVCG_ACC_MKNOD (1ULL << 0) +#define BPF_DEVCG_ACC_READ (1ULL << 1) +#define BPF_DEVCG_ACC_WRITE (1ULL << 2) + +#define BPF_DEVCG_DEV_BLOCK (1ULL << 0) +#define BPF_DEVCG_DEV_CHAR (1ULL << 1) + +struct bpf_cgroup_dev_ctx { + /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ + __u32 access_type; + __u32 major; + __u32 minor; +}; + +#endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/src/shared/linux/bpf_common.h b/src/shared/linux/bpf_common.h new file mode 100644 index 0000000..afe7433 --- /dev/null +++ b/src/shared/linux/bpf_common.h @@ -0,0 +1,55 @@ +#ifndef __LINUX_BPF_COMMON_H__ +#define __LINUX_BPF_COMMON_H__ + +/* Instruction classes */ +#define BPF_CLASS(code) ((code) & 0x07) +#define BPF_LD 0x00 +#define BPF_LDX 0x01 +#define BPF_ST 0x02 +#define BPF_STX 0x03 +#define BPF_ALU 0x04 +#define BPF_JMP 0x05 +#define BPF_RET 0x06 +#define BPF_MISC 0x07 + +/* ld/ldx fields */ +#define BPF_SIZE(code) ((code) & 0x18) +#define BPF_W 0x00 +#define BPF_H 0x08 +#define BPF_B 0x10 +#define BPF_MODE(code) ((code) & 0xe0) +#define BPF_IMM 0x00 +#define BPF_ABS 0x20 +#define BPF_IND 0x40 +#define BPF_MEM 0x60 +#define BPF_LEN 0x80 +#define BPF_MSH 0xa0 + +/* alu/jmp fields */ +#define BPF_OP(code) ((code) & 0xf0) +#define BPF_ADD 0x00 +#define BPF_SUB 0x10 +#define BPF_MUL 0x20 +#define BPF_DIV 0x30 +#define BPF_OR 0x40 +#define BPF_AND 0x50 +#define BPF_LSH 0x60 +#define BPF_RSH 0x70 +#define BPF_NEG 0x80 +#define BPF_MOD 0x90 +#define BPF_XOR 0xa0 + +#define BPF_JA 0x00 +#define BPF_JEQ 0x10 +#define BPF_JGT 0x20 +#define BPF_JGE 0x30 +#define BPF_JSET 0x40 +#define BPF_SRC(code) ((code) & 0x08) +#define BPF_K 0x00 +#define BPF_X 0x08 + +#ifndef BPF_MAXINSNS +#define BPF_MAXINSNS 4096 +#endif + +#endif /* __LINUX_BPF_COMMON_H__ */ diff --git a/src/shared/linux/libbpf.h b/src/shared/linux/libbpf.h new file mode 100644 index 0000000..391eee5 --- /dev/null +++ b/src/shared/linux/libbpf.h @@ -0,0 +1,207 @@ +/* eBPF mini library */ +#ifndef __LIBBPF_H +#define __LIBBPF_H + +#include <linux/bpf.h> + +struct bpf_insn; + +/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ + +#define BPF_ALU64_REG(OP, DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_ALU32_REG(OP, DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ + +#define BPF_ALU64_IMM(OP, DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_ALU32_IMM(OP, DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* Short form of mov, dst_reg = src_reg */ + +#define BPF_MOV64_REG(DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_MOV32_REG(DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +/* Short form of mov, dst_reg = imm32 */ + +#define BPF_MOV64_IMM(DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_MOV32_IMM(DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ +#define BPF_LD_IMM64(DST, IMM) \ + BPF_LD_IMM64_RAW(DST, 0, IMM) + +#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_DW | BPF_IMM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = (__u32) (IMM) }), \ + ((struct bpf_insn) { \ + .code = 0, /* zero is reserved opcode */ \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = ((__u64) (IMM)) >> 32 }) + +#ifndef BPF_PSEUDO_MAP_FD +# define BPF_PSEUDO_MAP_FD 1 +#endif + +/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */ +#define BPF_LD_MAP_FD(DST, MAP_FD) \ + BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) + +/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */ + +#define BPF_LD_ABS(SIZE, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* Memory load, dst_reg = *(uint *) (src_reg + off16) */ + +#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Memory store, *(uint *) (dst_reg + off16) = src_reg */ + +#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */ + +#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Memory store, *(uint *) (dst_reg + off16) = imm32 */ + +#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */ + +#define BPF_JMP_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ + +#define BPF_JMP_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Unconditional jumps */ + +#define BPF_JMP_A(OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_JA, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = 0 }) + +/* Raw code statement block */ + +#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ + ((struct bpf_insn) { \ + .code = CODE, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = IMM }) + +/* Program exit */ + +#define BPF_EXIT_INSN() \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_EXIT, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = 0 }) + +#endif diff --git a/src/shared/lockfile-util.c b/src/shared/lockfile-util.c new file mode 100644 index 0000000..260c208 --- /dev/null +++ b/src/shared/lockfile-util.c @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <sys/file.h> +#include <sys/stat.h> + +#include "alloc-util.h" +#include "fd-util.h" +#include "fs-util.h" +#include "lockfile-util.h" +#include "macro.h" +#include "missing_fcntl.h" +#include "path-util.h" + +int make_lock_file(const char *p, int operation, LockFile *ret) { + _cleanup_close_ int fd = -1; + _cleanup_free_ char *t = NULL; + int r; + + /* + * We use UNPOSIX locks if they are available. They have nice + * semantics, and are mostly compatible with NFS. However, + * they are only available on new kernels. When we detect we + * are running on an older kernel, then we fall back to good + * old BSD locks. They also have nice semantics, but are + * slightly problematic on NFS, where they are upgraded to + * POSIX locks, even though locally they are orthogonal to + * POSIX locks. + */ + + t = strdup(p); + if (!t) + return -ENOMEM; + + for (;;) { + struct flock fl = { + .l_type = (operation & ~LOCK_NB) == LOCK_EX ? F_WRLCK : F_RDLCK, + .l_whence = SEEK_SET, + }; + struct stat st; + + fd = open(p, O_CREAT|O_RDWR|O_NOFOLLOW|O_CLOEXEC|O_NOCTTY, 0600); + if (fd < 0) + return -errno; + + r = fcntl(fd, (operation & LOCK_NB) ? F_OFD_SETLK : F_OFD_SETLKW, &fl); + if (r < 0) { + + /* If the kernel is too old, use good old BSD locks */ + if (errno == EINVAL) + r = flock(fd, operation); + + if (r < 0) + return errno == EAGAIN ? -EBUSY : -errno; + } + + /* If we acquired the lock, let's check if the file + * still exists in the file system. If not, then the + * previous exclusive owner removed it and then closed + * it. In such a case our acquired lock is worthless, + * hence try again. */ + + r = fstat(fd, &st); + if (r < 0) + return -errno; + if (st.st_nlink > 0) + break; + + fd = safe_close(fd); + } + + ret->path = t; + ret->fd = fd; + ret->operation = operation; + + fd = -1; + t = NULL; + + return r; +} + +int make_lock_file_for(const char *p, int operation, LockFile *ret) { + const char *fn; + char *t; + + assert(p); + assert(ret); + + fn = basename(p); + if (!filename_is_valid(fn)) + return -EINVAL; + + t = newa(char, strlen(p) + 2 + 4 + 1); + stpcpy(stpcpy(stpcpy(mempcpy(t, p, fn - p), ".#"), fn), ".lck"); + + return make_lock_file(t, operation, ret); +} + +void release_lock_file(LockFile *f) { + int r; + + if (!f) + return; + + if (f->path) { + + /* If we are the exclusive owner we can safely delete + * the lock file itself. If we are not the exclusive + * owner, we can try becoming it. */ + + if (f->fd >= 0 && + (f->operation & ~LOCK_NB) == LOCK_SH) { + static const struct flock fl = { + .l_type = F_WRLCK, + .l_whence = SEEK_SET, + }; + + r = fcntl(f->fd, F_OFD_SETLK, &fl); + if (r < 0 && errno == EINVAL) + r = flock(f->fd, LOCK_EX|LOCK_NB); + + if (r >= 0) + f->operation = LOCK_EX|LOCK_NB; + } + + if ((f->operation & ~LOCK_NB) == LOCK_EX) + unlink_noerrno(f->path); + + f->path = mfree(f->path); + } + + f->fd = safe_close(f->fd); + f->operation = 0; +} diff --git a/src/shared/lockfile-util.h b/src/shared/lockfile-util.h new file mode 100644 index 0000000..e0eef34 --- /dev/null +++ b/src/shared/lockfile-util.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +typedef struct LockFile { + char *path; + int fd; + int operation; +} LockFile; + +int make_lock_file(const char *p, int operation, LockFile *ret); +int make_lock_file_for(const char *p, int operation, LockFile *ret); +void release_lock_file(LockFile *f); + +#define LOCK_FILE_INIT { .fd = -1, .path = NULL } diff --git a/src/shared/logs-show.c b/src/shared/logs-show.c new file mode 100644 index 0000000..15ef0f1 --- /dev/null +++ b/src/shared/logs-show.c @@ -0,0 +1,1462 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <syslog.h> +#include <time.h> +#include <unistd.h> + +#include "sd-id128.h" +#include "sd-journal.h" + +#include "alloc-util.h" +#include "fd-util.h" +#include "format-util.h" +#include "hashmap.h" +#include "hostname-util.h" +#include "io-util.h" +#include "journal-internal.h" +#include "json.h" +#include "log.h" +#include "logs-show.h" +#include "macro.h" +#include "output-mode.h" +#include "parse-util.h" +#include "process-util.h" +#include "sparse-endian.h" +#include "stdio-util.h" +#include "string-table.h" +#include "string-util.h" +#include "strv.h" +#include "terminal-util.h" +#include "time-util.h" +#include "utf8.h" +#include "util.h" + +/* up to three lines (each up to 100 characters) or 300 characters, whichever is less */ +#define PRINT_LINE_THRESHOLD 3 +#define PRINT_CHAR_THRESHOLD 300 + +#define JSON_THRESHOLD 4096U + +static int print_catalog(FILE *f, sd_journal *j) { + int r; + _cleanup_free_ char *t = NULL, *z = NULL; + + r = sd_journal_get_catalog(j, &t); + if (r < 0) + return r; + + z = strreplace(strstrip(t), "\n", "\n-- "); + if (!z) + return log_oom(); + + fputs("-- ", f); + fputs(z, f); + fputc('\n', f); + + return 0; +} + +static int parse_field(const void *data, size_t length, const char *field, size_t field_len, char **target, size_t *target_len) { + size_t nl; + char *buf; + + assert(data); + assert(field); + assert(target); + + if (length < field_len) + return 0; + + if (memcmp(data, field, field_len)) + return 0; + + nl = length - field_len; + + buf = newdup_suffix0(char, (const char*) data + field_len, nl); + if (!buf) + return log_oom(); + + free(*target); + *target = buf; + + if (target_len) + *target_len = nl; + + return 1; +} + +typedef struct ParseFieldVec { + const char *field; + size_t field_len; + char **target; + size_t *target_len; +} ParseFieldVec; + +#define PARSE_FIELD_VEC_ENTRY(_field, _target, _target_len) \ + { .field = _field, .field_len = strlen(_field), .target = _target, .target_len = _target_len } + +static int parse_fieldv(const void *data, size_t length, const ParseFieldVec *fields, unsigned n_fields) { + unsigned i; + + for (i = 0; i < n_fields; i++) { + const ParseFieldVec *f = &fields[i]; + int r; + + r = parse_field(data, length, f->field, f->field_len, f->target, f->target_len); + if (r < 0) + return r; + else if (r > 0) + break; + } + + return 0; +} + +static int field_set_test(Set *fields, const char *name, size_t n) { + char *s = NULL; + + if (!fields) + return 1; + + s = strndupa(name, n); + if (!s) + return log_oom(); + + return set_get(fields, s) ? 1 : 0; +} + +static bool shall_print(const char *p, size_t l, OutputFlags flags) { + assert(p); + + if (flags & OUTPUT_SHOW_ALL) + return true; + + if (l >= PRINT_CHAR_THRESHOLD) + return false; + + if (!utf8_is_printable(p, l)) + return false; + + return true; +} + +static bool print_multiline( + FILE *f, + unsigned prefix, + unsigned n_columns, + OutputFlags flags, + int priority, + const char* message, + size_t message_len, + size_t highlight[2]) { + + const char *color_on = "", *color_off = "", *highlight_on = ""; + const char *pos, *end; + bool ellipsized = false; + int line = 0; + + if (flags & OUTPUT_COLOR) { + if (priority <= LOG_ERR) { + color_on = ANSI_HIGHLIGHT_RED; + color_off = ANSI_NORMAL; + highlight_on = ANSI_HIGHLIGHT; + } else if (priority <= LOG_NOTICE) { + color_on = ANSI_HIGHLIGHT; + color_off = ANSI_NORMAL; + highlight_on = ANSI_HIGHLIGHT_RED; + } else if (priority >= LOG_DEBUG) { + color_on = ANSI_GREY; + color_off = ANSI_NORMAL; + highlight_on = ANSI_HIGHLIGHT_RED; + } + } + + /* A special case: make sure that we print a newline when + the message is empty. */ + if (message_len == 0) + fputs("\n", f); + + for (pos = message; + pos < message + message_len; + pos = end + 1, line++) { + bool continuation = line > 0; + bool tail_line; + int len; + for (end = pos; end < message + message_len && *end != '\n'; end++) + ; + len = end - pos; + assert(len >= 0); + + /* We need to figure out when we are showing not-last line, *and* + * will skip subsequent lines. In that case, we will put the dots + * at the end of the line, instead of putting dots in the middle + * or not at all. + */ + tail_line = + line + 1 == PRINT_LINE_THRESHOLD || + end + 1 >= message + PRINT_CHAR_THRESHOLD; + + if (flags & (OUTPUT_FULL_WIDTH | OUTPUT_SHOW_ALL) || + (prefix + len + 1 < n_columns && !tail_line)) { + if (highlight && + (size_t) (pos - message) <= highlight[0] && + highlight[0] < (size_t) len) { + + fprintf(f, "%*s%s%.*s", + continuation * prefix, "", + color_on, (int) highlight[0], pos); + fprintf(f, "%s%.*s", + highlight_on, + (int) (MIN((size_t) len, highlight[1]) - highlight[0]), + pos + highlight[0]); + if ((size_t) len > highlight[1]) + fprintf(f, "%s%.*s", + color_on, + (int) (len - highlight[1]), + pos + highlight[1]); + fprintf(f, "%s\n", color_off); + + } else + fprintf(f, "%*s%s%.*s%s\n", + continuation * prefix, "", + color_on, len, pos, color_off); + continue; + } + + /* Beyond this point, ellipsization will happen. */ + ellipsized = true; + + if (prefix < n_columns && n_columns - prefix >= 3) { + if (n_columns - prefix > (unsigned) len + 3) + fprintf(f, "%*s%s%.*s...%s\n", + continuation * prefix, "", + color_on, len, pos, color_off); + else { + _cleanup_free_ char *e; + + e = ellipsize_mem(pos, len, n_columns - prefix, + tail_line ? 100 : 90); + if (!e) + fprintf(f, "%*s%s%.*s%s\n", + continuation * prefix, "", + color_on, len, pos, color_off); + else + fprintf(f, "%*s%s%s%s\n", + continuation * prefix, "", + color_on, e, color_off); + } + } else + fputs("...\n", f); + + if (tail_line) + break; + } + + return ellipsized; +} + +static int output_timestamp_monotonic(FILE *f, sd_journal *j, const char *monotonic) { + sd_id128_t boot_id; + uint64_t t; + int r; + + assert(f); + assert(j); + + r = -ENXIO; + if (monotonic) + r = safe_atou64(monotonic, &t); + if (r < 0) + r = sd_journal_get_monotonic_usec(j, &t, &boot_id); + if (r < 0) + return log_error_errno(r, "Failed to get monotonic timestamp: %m"); + + fprintf(f, "[%5"PRI_USEC".%06"PRI_USEC"]", t / USEC_PER_SEC, t % USEC_PER_SEC); + return 1 + 5 + 1 + 6 + 1; +} + +static int output_timestamp_realtime(FILE *f, sd_journal *j, OutputMode mode, OutputFlags flags, const char *realtime) { + char buf[MAX(FORMAT_TIMESTAMP_MAX, 64)]; + struct tm *(*gettime_r)(const time_t *, struct tm *); + struct tm tm; + uint64_t x; + time_t t; + int r; + + assert(f); + assert(j); + + if (realtime) + r = safe_atou64(realtime, &x); + if (!realtime || r < 0 || !VALID_REALTIME(x)) + r = sd_journal_get_realtime_usec(j, &x); + if (r < 0) + return log_error_errno(r, "Failed to get realtime timestamp: %m"); + + if (IN_SET(mode, OUTPUT_SHORT_FULL, OUTPUT_WITH_UNIT)) { + const char *k; + + if (flags & OUTPUT_UTC) + k = format_timestamp_utc(buf, sizeof(buf), x); + else + k = format_timestamp(buf, sizeof(buf), x); + if (!k) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to format timestamp: %" PRIu64, x); + + } else { + char usec[7]; + + gettime_r = (flags & OUTPUT_UTC) ? gmtime_r : localtime_r; + t = (time_t) (x / USEC_PER_SEC); + + switch (mode) { + + case OUTPUT_SHORT_UNIX: + xsprintf(buf, "%10"PRI_TIME".%06"PRIu64, t, x % USEC_PER_SEC); + break; + + case OUTPUT_SHORT_ISO: + if (strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S%z", gettime_r(&t, &tm)) <= 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to format ISO time"); + break; + + case OUTPUT_SHORT_ISO_PRECISE: + /* No usec in strftime, so we leave space and copy over */ + if (strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S.xxxxxx%z", gettime_r(&t, &tm)) <= 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to format ISO-precise time"); + xsprintf(usec, "%06"PRI_USEC, x % USEC_PER_SEC); + memcpy(buf + 20, usec, 6); + break; + + case OUTPUT_SHORT: + case OUTPUT_SHORT_PRECISE: + + if (strftime(buf, sizeof(buf), "%b %d %H:%M:%S", gettime_r(&t, &tm)) <= 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to format syslog time"); + + if (mode == OUTPUT_SHORT_PRECISE) { + size_t k; + + assert(sizeof(buf) > strlen(buf)); + k = sizeof(buf) - strlen(buf); + + r = snprintf(buf + strlen(buf), k, ".%06"PRIu64, x % USEC_PER_SEC); + if (r <= 0 || (size_t) r >= k) /* too long? */ + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to format precise time"); + } + break; + + default: + assert_not_reached("Unknown time format"); + } + } + + fputs(buf, f); + return (int) strlen(buf); +} + +static int output_short( + FILE *f, + sd_journal *j, + OutputMode mode, + unsigned n_columns, + OutputFlags flags, + Set *output_fields, + const size_t highlight[2]) { + + int r; + const void *data; + size_t length; + size_t n = 0; + _cleanup_free_ char *hostname = NULL, *identifier = NULL, *comm = NULL, *pid = NULL, *fake_pid = NULL, *message = NULL, *realtime = NULL, *monotonic = NULL, *priority = NULL, *unit = NULL, *user_unit = NULL; + size_t hostname_len = 0, identifier_len = 0, comm_len = 0, pid_len = 0, fake_pid_len = 0, message_len = 0, realtime_len = 0, monotonic_len = 0, priority_len = 0, unit_len = 0, user_unit_len = 0; + int p = LOG_INFO; + bool ellipsized = false; + const ParseFieldVec fields[] = { + PARSE_FIELD_VEC_ENTRY("_PID=", &pid, &pid_len), + PARSE_FIELD_VEC_ENTRY("_COMM=", &comm, &comm_len), + PARSE_FIELD_VEC_ENTRY("MESSAGE=", &message, &message_len), + PARSE_FIELD_VEC_ENTRY("PRIORITY=", &priority, &priority_len), + PARSE_FIELD_VEC_ENTRY("_HOSTNAME=", &hostname, &hostname_len), + PARSE_FIELD_VEC_ENTRY("SYSLOG_PID=", &fake_pid, &fake_pid_len), + PARSE_FIELD_VEC_ENTRY("SYSLOG_IDENTIFIER=", &identifier, &identifier_len), + PARSE_FIELD_VEC_ENTRY("_SOURCE_REALTIME_TIMESTAMP=", &realtime, &realtime_len), + PARSE_FIELD_VEC_ENTRY("_SOURCE_MONOTONIC_TIMESTAMP=", &monotonic, &monotonic_len), + PARSE_FIELD_VEC_ENTRY("_SYSTEMD_UNIT=", &unit, &unit_len), + PARSE_FIELD_VEC_ENTRY("_SYSTEMD_USER_UNIT=", &user_unit, &user_unit_len), + }; + size_t highlight_shifted[] = {highlight ? highlight[0] : 0, highlight ? highlight[1] : 0}; + + assert(f); + assert(j); + + /* Set the threshold to one bigger than the actual print + * threshold, so that if the line is actually longer than what + * we're willing to print, ellipsization will occur. This way + * we won't output a misleading line without any indication of + * truncation. + */ + sd_journal_set_data_threshold(j, flags & (OUTPUT_SHOW_ALL|OUTPUT_FULL_WIDTH) ? 0 : PRINT_CHAR_THRESHOLD + 1); + + JOURNAL_FOREACH_DATA_RETVAL(j, data, length, r) { + r = parse_fieldv(data, length, fields, ELEMENTSOF(fields)); + if (r < 0) + return r; + } + if (r == -EBADMSG) { + log_debug_errno(r, "Skipping message we can't read: %m"); + return 0; + } + if (r < 0) + return log_error_errno(r, "Failed to get journal fields: %m"); + + if (!message) { + log_debug("Skipping message without MESSAGE= field."); + return 0; + } + + if (!(flags & OUTPUT_SHOW_ALL)) + strip_tab_ansi(&message, &message_len, highlight_shifted); + + if (priority_len == 1 && *priority >= '0' && *priority <= '7') + p = *priority - '0'; + + if (mode == OUTPUT_SHORT_MONOTONIC) + r = output_timestamp_monotonic(f, j, monotonic); + else + r = output_timestamp_realtime(f, j, mode, flags, realtime); + if (r < 0) + return r; + n += r; + + if (flags & OUTPUT_NO_HOSTNAME) { + /* Suppress display of the hostname if this is requested. */ + hostname = mfree(hostname); + hostname_len = 0; + } + + if (hostname && shall_print(hostname, hostname_len, flags)) { + fprintf(f, " %.*s", (int) hostname_len, hostname); + n += hostname_len + 1; + } + + if (mode == OUTPUT_WITH_UNIT && ((unit && shall_print(unit, unit_len, flags)) || (user_unit && shall_print(user_unit, user_unit_len, flags)))) { + if (unit) { + fprintf(f, " %.*s", (int) unit_len, unit); + n += unit_len + 1; + } + if (user_unit) { + if (unit) + fprintf(f, "/%.*s", (int) user_unit_len, user_unit); + else + fprintf(f, " %.*s", (int) user_unit_len, user_unit); + n += unit_len + 1; + } + } else if (identifier && shall_print(identifier, identifier_len, flags)) { + fprintf(f, " %.*s", (int) identifier_len, identifier); + n += identifier_len + 1; + } else if (comm && shall_print(comm, comm_len, flags)) { + fprintf(f, " %.*s", (int) comm_len, comm); + n += comm_len + 1; + } else + fputs(" unknown", f); + + if (pid && shall_print(pid, pid_len, flags)) { + fprintf(f, "[%.*s]", (int) pid_len, pid); + n += pid_len + 2; + } else if (fake_pid && shall_print(fake_pid, fake_pid_len, flags)) { + fprintf(f, "[%.*s]", (int) fake_pid_len, fake_pid); + n += fake_pid_len + 2; + } + + if (!(flags & OUTPUT_SHOW_ALL) && !utf8_is_printable(message, message_len)) { + char bytes[FORMAT_BYTES_MAX]; + fprintf(f, ": [%s blob data]\n", format_bytes(bytes, sizeof(bytes), message_len)); + } else { + fputs(": ", f); + ellipsized |= + print_multiline(f, n + 2, n_columns, flags, p, + message, message_len, + highlight_shifted); + } + + if (flags & OUTPUT_CATALOG) + print_catalog(f, j); + + return ellipsized; +} + +static int output_verbose( + FILE *f, + sd_journal *j, + OutputMode mode, + unsigned n_columns, + OutputFlags flags, + Set *output_fields, + const size_t highlight[2]) { + + const void *data; + size_t length; + _cleanup_free_ char *cursor = NULL; + uint64_t realtime = 0; + char ts[FORMAT_TIMESTAMP_MAX + 7]; + const char *timestamp; + int r; + + assert(f); + assert(j); + + sd_journal_set_data_threshold(j, 0); + + r = sd_journal_get_data(j, "_SOURCE_REALTIME_TIMESTAMP", &data, &length); + if (r == -ENOENT) + log_debug("Source realtime timestamp not found"); + else if (r < 0) + return log_full_errno(r == -EADDRNOTAVAIL ? LOG_DEBUG : LOG_ERR, r, "Failed to get source realtime timestamp: %m"); + else { + _cleanup_free_ char *value = NULL; + + r = parse_field(data, length, "_SOURCE_REALTIME_TIMESTAMP=", + STRLEN("_SOURCE_REALTIME_TIMESTAMP="), &value, + NULL); + if (r < 0) + return r; + assert(r > 0); + + r = safe_atou64(value, &realtime); + if (r < 0) + log_debug_errno(r, "Failed to parse realtime timestamp: %m"); + } + + if (r < 0) { + r = sd_journal_get_realtime_usec(j, &realtime); + if (r < 0) + return log_full_errno(r == -EADDRNOTAVAIL ? LOG_DEBUG : LOG_ERR, r, "Failed to get realtime timestamp: %m"); + } + + r = sd_journal_get_cursor(j, &cursor); + if (r < 0) + return log_error_errno(r, "Failed to get cursor: %m"); + + timestamp = flags & OUTPUT_UTC ? format_timestamp_us_utc(ts, sizeof ts, realtime) + : format_timestamp_us(ts, sizeof ts, realtime); + fprintf(f, "%s [%s]\n", + timestamp ?: "(no timestamp)", + cursor); + + JOURNAL_FOREACH_DATA_RETVAL(j, data, length, r) { + const char *c; + int fieldlen; + const char *on = "", *off = ""; + + c = memchr(data, '=', length); + if (!c) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Invalid field."); + fieldlen = c - (const char*) data; + + r = field_set_test(output_fields, data, fieldlen); + if (r < 0) + return r; + if (!r) + continue; + + if (flags & OUTPUT_COLOR && startswith(data, "MESSAGE=")) { + on = ANSI_HIGHLIGHT; + off = ANSI_NORMAL; + } + + if ((flags & OUTPUT_SHOW_ALL) || + (((length < PRINT_CHAR_THRESHOLD) || flags & OUTPUT_FULL_WIDTH) + && utf8_is_printable(data, length))) { + fprintf(f, " %s%.*s=", on, fieldlen, (const char*)data); + print_multiline(f, 4 + fieldlen + 1, 0, OUTPUT_FULL_WIDTH, 0, c + 1, length - fieldlen - 1, NULL); + fputs(off, f); + } else { + char bytes[FORMAT_BYTES_MAX]; + + fprintf(f, " %s%.*s=[%s blob data]%s\n", + on, + (int) (c - (const char*) data), + (const char*) data, + format_bytes(bytes, sizeof(bytes), length - (c - (const char *) data) - 1), + off); + } + } + + if (r < 0) + return r; + + if (flags & OUTPUT_CATALOG) + print_catalog(f, j); + + return 0; +} + +static int output_export( + FILE *f, + sd_journal *j, + OutputMode mode, + unsigned n_columns, + OutputFlags flags, + Set *output_fields, + const size_t highlight[2]) { + + sd_id128_t boot_id; + char sid[33]; + int r; + usec_t realtime, monotonic; + _cleanup_free_ char *cursor = NULL; + const void *data; + size_t length; + + assert(j); + + sd_journal_set_data_threshold(j, 0); + + r = sd_journal_get_realtime_usec(j, &realtime); + if (r < 0) + return log_error_errno(r, "Failed to get realtime timestamp: %m"); + + r = sd_journal_get_monotonic_usec(j, &monotonic, &boot_id); + if (r < 0) + return log_error_errno(r, "Failed to get monotonic timestamp: %m"); + + r = sd_journal_get_cursor(j, &cursor); + if (r < 0) + return log_error_errno(r, "Failed to get cursor: %m"); + + fprintf(f, + "__CURSOR=%s\n" + "__REALTIME_TIMESTAMP="USEC_FMT"\n" + "__MONOTONIC_TIMESTAMP="USEC_FMT"\n" + "_BOOT_ID=%s\n", + cursor, + realtime, + monotonic, + sd_id128_to_string(boot_id, sid)); + + JOURNAL_FOREACH_DATA_RETVAL(j, data, length, r) { + const char *c; + + /* We already printed the boot id from the data in the header, hence let's suppress it here */ + if (memory_startswith(data, length, "_BOOT_ID=")) + continue; + + c = memchr(data, '=', length); + if (!c) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Invalid field."); + + r = field_set_test(output_fields, data, c - (const char *) data); + if (r < 0) + return r; + if (!r) + continue; + + if (utf8_is_printable_newline(data, length, false)) + fwrite(data, length, 1, f); + else { + uint64_t le64; + + fwrite(data, c - (const char*) data, 1, f); + fputc('\n', f); + le64 = htole64(length - (c - (const char*) data) - 1); + fwrite(&le64, sizeof(le64), 1, f); + fwrite(c + 1, length - (c - (const char*) data) - 1, 1, f); + } + + fputc('\n', f); + } + if (r == -EBADMSG) { + log_debug_errno(r, "Skipping message we can't read: %m"); + return 0; + } + + if (r < 0) + return r; + + fputc('\n', f); + + return 0; +} + +void json_escape( + FILE *f, + const char* p, + size_t l, + OutputFlags flags) { + + assert(f); + assert(p); + + if (!(flags & OUTPUT_SHOW_ALL) && l >= JSON_THRESHOLD) + fputs("null", f); + + else if (!(flags & OUTPUT_SHOW_ALL) && !utf8_is_printable(p, l)) { + bool not_first = false; + + fputs("[ ", f); + + while (l > 0) { + if (not_first) + fprintf(f, ", %u", (uint8_t) *p); + else { + not_first = true; + fprintf(f, "%u", (uint8_t) *p); + } + + p++; + l--; + } + + fputs(" ]", f); + } else { + fputc('"', f); + + while (l > 0) { + if (IN_SET(*p, '"', '\\')) { + fputc('\\', f); + fputc(*p, f); + } else if (*p == '\n') + fputs("\\n", f); + else if ((uint8_t) *p < ' ') + fprintf(f, "\\u%04x", (uint8_t) *p); + else + fputc(*p, f); + + p++; + l--; + } + + fputc('"', f); + } +} + +struct json_data { + JsonVariant* name; + size_t n_values; + JsonVariant* values[]; +}; + +static int update_json_data( + Hashmap *h, + OutputFlags flags, + const char *name, + const void *value, + size_t size) { + + _cleanup_(json_variant_unrefp) JsonVariant *v = NULL; + struct json_data *d; + int r; + + if (!(flags & OUTPUT_SHOW_ALL) && strlen(name) + 1 + size >= JSON_THRESHOLD) + r = json_variant_new_null(&v); + else if (utf8_is_printable(value, size)) + r = json_variant_new_stringn(&v, value, size); + else + r = json_variant_new_array_bytes(&v, value, size); + if (r < 0) + return log_error_errno(r, "Failed to allocate JSON data: %m"); + + d = hashmap_get(h, name); + if (d) { + struct json_data *w; + + w = realloc(d, offsetof(struct json_data, values) + sizeof(JsonVariant*) * (d->n_values + 1)); + if (!w) + return log_oom(); + + d = w; + assert_se(hashmap_update(h, json_variant_string(d->name), d) >= 0); + } else { + _cleanup_(json_variant_unrefp) JsonVariant *n = NULL; + + r = json_variant_new_string(&n, name); + if (r < 0) + return log_error_errno(r, "Failed to allocate JSON name variant: %m"); + + d = malloc0(offsetof(struct json_data, values) + sizeof(JsonVariant*)); + if (!d) + return log_oom(); + + r = hashmap_put(h, json_variant_string(n), d); + if (r < 0) { + free(d); + return log_error_errno(r, "Failed to insert JSON name into hashmap: %m"); + } + + d->name = TAKE_PTR(n); + } + + d->values[d->n_values++] = TAKE_PTR(v); + return 0; +} + +static int update_json_data_split( + Hashmap *h, + OutputFlags flags, + Set *output_fields, + const void *data, + size_t size) { + + const char *eq; + char *name; + + assert(h); + assert(data || size == 0); + + if (memory_startswith(data, size, "_BOOT_ID=")) + return 0; + + eq = memchr(data, '=', MIN(size, JSON_THRESHOLD)); + if (!eq) + return 0; + + if (eq == data) + return 0; + + name = strndupa(data, eq - (const char*) data); + if (output_fields && !set_get(output_fields, name)) + return 0; + + return update_json_data(h, flags, name, eq + 1, size - (eq - (const char*) data) - 1); +} + +static int output_json( + FILE *f, + sd_journal *j, + OutputMode mode, + unsigned n_columns, + OutputFlags flags, + Set *output_fields, + const size_t highlight[2]) { + + char sid[SD_ID128_STRING_MAX], usecbuf[DECIMAL_STR_MAX(usec_t)]; + _cleanup_(json_variant_unrefp) JsonVariant *object = NULL; + _cleanup_free_ char *cursor = NULL; + uint64_t realtime, monotonic; + JsonVariant **array = NULL; + struct json_data *d; + sd_id128_t boot_id; + Hashmap *h = NULL; + size_t n = 0; + Iterator i; + int r; + + assert(j); + + (void) sd_journal_set_data_threshold(j, flags & OUTPUT_SHOW_ALL ? 0 : JSON_THRESHOLD); + + r = sd_journal_get_realtime_usec(j, &realtime); + if (r < 0) + return log_error_errno(r, "Failed to get realtime timestamp: %m"); + + r = sd_journal_get_monotonic_usec(j, &monotonic, &boot_id); + if (r < 0) + return log_error_errno(r, "Failed to get monotonic timestamp: %m"); + + r = sd_journal_get_cursor(j, &cursor); + if (r < 0) + return log_error_errno(r, "Failed to get cursor: %m"); + + h = hashmap_new(&string_hash_ops); + if (!h) + return log_oom(); + + r = update_json_data(h, flags, "__CURSOR", cursor, strlen(cursor)); + if (r < 0) + goto finish; + + xsprintf(usecbuf, USEC_FMT, realtime); + r = update_json_data(h, flags, "__REALTIME_TIMESTAMP", usecbuf, strlen(usecbuf)); + if (r < 0) + goto finish; + + xsprintf(usecbuf, USEC_FMT, monotonic); + r = update_json_data(h, flags, "__MONOTONIC_TIMESTAMP", usecbuf, strlen(usecbuf)); + if (r < 0) + goto finish; + + sd_id128_to_string(boot_id, sid); + r = update_json_data(h, flags, "_BOOT_ID", sid, strlen(sid)); + if (r < 0) + goto finish; + + for (;;) { + const void *data; + size_t size; + + r = sd_journal_enumerate_data(j, &data, &size); + if (r == -EBADMSG) { + log_debug_errno(r, "Skipping message we can't read: %m"); + r = 0; + goto finish; + } + if (r < 0) { + log_error_errno(r, "Failed to read journal: %m"); + goto finish; + } + if (r == 0) + break; + + r = update_json_data_split(h, flags, output_fields, data, size); + if (r < 0) + goto finish; + } + + array = new(JsonVariant*, hashmap_size(h)*2); + if (!array) { + r = log_oom(); + goto finish; + } + + HASHMAP_FOREACH(d, h, i) { + assert(d->n_values > 0); + + array[n++] = json_variant_ref(d->name); + + if (d->n_values == 1) + array[n++] = json_variant_ref(d->values[0]); + else { + _cleanup_(json_variant_unrefp) JsonVariant *q = NULL; + + r = json_variant_new_array(&q, d->values, d->n_values); + if (r < 0) { + log_error_errno(r, "Failed to create JSON array: %m"); + goto finish; + } + + array[n++] = TAKE_PTR(q); + } + } + + r = json_variant_new_object(&object, array, n); + if (r < 0) { + log_error_errno(r, "Failed to allocate JSON object: %m"); + goto finish; + } + + json_variant_dump(object, + output_mode_to_json_format_flags(mode) | + (FLAGS_SET(flags, OUTPUT_COLOR) ? JSON_FORMAT_COLOR : 0), + f, NULL); + + r = 0; + +finish: + while ((d = hashmap_steal_first(h))) { + size_t k; + + json_variant_unref(d->name); + for (k = 0; k < d->n_values; k++) + json_variant_unref(d->values[k]); + + free(d); + } + + hashmap_free(h); + + json_variant_unref_many(array, n); + free(array); + + return r; +} + +static int output_cat( + FILE *f, + sd_journal *j, + OutputMode mode, + unsigned n_columns, + OutputFlags flags, + Set *output_fields, + const size_t highlight[2]) { + + const void *data; + size_t l; + int r; + const char *highlight_on = "", *highlight_off = ""; + + assert(j); + assert(f); + + if (flags & OUTPUT_COLOR) { + highlight_on = ANSI_HIGHLIGHT_RED; + highlight_off = ANSI_NORMAL; + } + + sd_journal_set_data_threshold(j, 0); + + r = sd_journal_get_data(j, "MESSAGE", &data, &l); + if (r == -EBADMSG) { + log_debug_errno(r, "Skipping message we can't read: %m"); + return 0; + } + if (r < 0) { + /* An entry without MESSAGE=? */ + if (r == -ENOENT) + return 0; + + return log_error_errno(r, "Failed to get data: %m"); + } + + assert(l >= 8); + + if (highlight && (flags & OUTPUT_COLOR)) { + assert(highlight[0] <= highlight[1]); + assert(highlight[1] <= l - 8); + + fwrite((const char*) data + 8, 1, highlight[0], f); + fwrite(highlight_on, 1, strlen(highlight_on), f); + fwrite((const char*) data + 8 + highlight[0], 1, highlight[1] - highlight[0], f); + fwrite(highlight_off, 1, strlen(highlight_off), f); + fwrite((const char*) data + 8 + highlight[1], 1, l - 8 - highlight[1], f); + } else + fwrite((const char*) data + 8, 1, l - 8, f); + fputc('\n', f); + + return 0; +} + +static int (*output_funcs[_OUTPUT_MODE_MAX])( + FILE *f, + sd_journal *j, + OutputMode mode, + unsigned n_columns, + OutputFlags flags, + Set *output_fields, + const size_t highlight[2]) = { + + [OUTPUT_SHORT] = output_short, + [OUTPUT_SHORT_ISO] = output_short, + [OUTPUT_SHORT_ISO_PRECISE] = output_short, + [OUTPUT_SHORT_PRECISE] = output_short, + [OUTPUT_SHORT_MONOTONIC] = output_short, + [OUTPUT_SHORT_UNIX] = output_short, + [OUTPUT_SHORT_FULL] = output_short, + [OUTPUT_VERBOSE] = output_verbose, + [OUTPUT_EXPORT] = output_export, + [OUTPUT_JSON] = output_json, + [OUTPUT_JSON_PRETTY] = output_json, + [OUTPUT_JSON_SSE] = output_json, + [OUTPUT_JSON_SEQ] = output_json, + [OUTPUT_CAT] = output_cat, + [OUTPUT_WITH_UNIT] = output_short, +}; + +int show_journal_entry( + FILE *f, + sd_journal *j, + OutputMode mode, + unsigned n_columns, + OutputFlags flags, + char **output_fields, + const size_t highlight[2], + bool *ellipsized) { + + int ret; + _cleanup_set_free_free_ Set *fields = NULL; + assert(mode >= 0); + assert(mode < _OUTPUT_MODE_MAX); + + if (n_columns <= 0) + n_columns = columns(); + + if (output_fields) { + fields = set_new(&string_hash_ops); + if (!fields) + return log_oom(); + + ret = set_put_strdupv(fields, output_fields); + if (ret < 0) + return ret; + } + + ret = output_funcs[mode](f, j, mode, n_columns, flags, fields, highlight); + + if (ellipsized && ret > 0) + *ellipsized = true; + + return ret; +} + +static int maybe_print_begin_newline(FILE *f, OutputFlags *flags) { + assert(f); + assert(flags); + + if (!(*flags & OUTPUT_BEGIN_NEWLINE)) + return 0; + + /* Print a beginning new line if that's request, but only once + * on the first line we print. */ + + fputc('\n', f); + *flags &= ~OUTPUT_BEGIN_NEWLINE; + return 0; +} + +int show_journal( + FILE *f, + sd_journal *j, + OutputMode mode, + unsigned n_columns, + usec_t not_before, + unsigned how_many, + OutputFlags flags, + bool *ellipsized) { + + int r; + unsigned line = 0; + bool need_seek = false; + int warn_cutoff = flags & OUTPUT_WARN_CUTOFF; + + assert(j); + assert(mode >= 0); + assert(mode < _OUTPUT_MODE_MAX); + + if (how_many == (unsigned) -1) + need_seek = true; + else { + /* Seek to end */ + r = sd_journal_seek_tail(j); + if (r < 0) + return log_error_errno(r, "Failed to seek to tail: %m"); + + r = sd_journal_previous_skip(j, how_many); + if (r < 0) + return log_error_errno(r, "Failed to skip previous: %m"); + } + + for (;;) { + for (;;) { + usec_t usec; + + if (need_seek) { + r = sd_journal_next(j); + if (r < 0) + return log_error_errno(r, "Failed to iterate through journal: %m"); + } + + if (r == 0) + break; + + need_seek = true; + + if (not_before > 0) { + r = sd_journal_get_monotonic_usec(j, &usec, NULL); + + /* -ESTALE is returned if the + timestamp is not from this boot */ + if (r == -ESTALE) + continue; + else if (r < 0) + return log_error_errno(r, "Failed to get journal time: %m"); + + if (usec < not_before) + continue; + } + + line++; + maybe_print_begin_newline(f, &flags); + + r = show_journal_entry(f, j, mode, n_columns, flags, NULL, NULL, ellipsized); + if (r < 0) + return r; + } + + if (warn_cutoff && line < how_many && not_before > 0) { + sd_id128_t boot_id; + usec_t cutoff = 0; + + /* Check whether the cutoff line is too early */ + + r = sd_id128_get_boot(&boot_id); + if (r < 0) + return log_error_errno(r, "Failed to get boot id: %m"); + + r = sd_journal_get_cutoff_monotonic_usec(j, boot_id, &cutoff, NULL); + if (r < 0) + return log_error_errno(r, "Failed to get journal cutoff time: %m"); + + if (r > 0 && not_before < cutoff) { + maybe_print_begin_newline(f, &flags); + fprintf(f, "Warning: Journal has been rotated since unit was started. Log output is incomplete or unavailable.\n"); + } + + warn_cutoff = false; + } + + if (!(flags & OUTPUT_FOLLOW)) + break; + + r = sd_journal_wait(j, USEC_INFINITY); + if (r < 0) + return log_error_errno(r, "Failed to wait for journal: %m"); + + } + + return 0; +} + +int add_matches_for_unit(sd_journal *j, const char *unit) { + const char *m1, *m2, *m3, *m4; + int r; + + assert(j); + assert(unit); + + m1 = strjoina("_SYSTEMD_UNIT=", unit); + m2 = strjoina("COREDUMP_UNIT=", unit); + m3 = strjoina("UNIT=", unit); + m4 = strjoina("OBJECT_SYSTEMD_UNIT=", unit); + + (void)( + /* Look for messages from the service itself */ + (r = sd_journal_add_match(j, m1, 0)) || + + /* Look for coredumps of the service */ + (r = sd_journal_add_disjunction(j)) || + (r = sd_journal_add_match(j, "MESSAGE_ID=fc2e22bc6ee647b6b90729ab34a250b1", 0)) || + (r = sd_journal_add_match(j, "_UID=0", 0)) || + (r = sd_journal_add_match(j, m2, 0)) || + + /* Look for messages from PID 1 about this service */ + (r = sd_journal_add_disjunction(j)) || + (r = sd_journal_add_match(j, "_PID=1", 0)) || + (r = sd_journal_add_match(j, m3, 0)) || + + /* Look for messages from authorized daemons about this service */ + (r = sd_journal_add_disjunction(j)) || + (r = sd_journal_add_match(j, "_UID=0", 0)) || + (r = sd_journal_add_match(j, m4, 0)) + ); + + if (r == 0 && endswith(unit, ".slice")) { + const char *m5; + + m5 = strjoina("_SYSTEMD_SLICE=", unit); + + /* Show all messages belonging to a slice */ + (void)( + (r = sd_journal_add_disjunction(j)) || + (r = sd_journal_add_match(j, m5, 0)) + ); + } + + return r; +} + +int add_matches_for_user_unit(sd_journal *j, const char *unit, uid_t uid) { + int r; + char *m1, *m2, *m3, *m4; + char muid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)]; + + assert(j); + assert(unit); + + m1 = strjoina("_SYSTEMD_USER_UNIT=", unit); + m2 = strjoina("USER_UNIT=", unit); + m3 = strjoina("COREDUMP_USER_UNIT=", unit); + m4 = strjoina("OBJECT_SYSTEMD_USER_UNIT=", unit); + sprintf(muid, "_UID="UID_FMT, uid); + + (void) ( + /* Look for messages from the user service itself */ + (r = sd_journal_add_match(j, m1, 0)) || + (r = sd_journal_add_match(j, muid, 0)) || + + /* Look for messages from systemd about this service */ + (r = sd_journal_add_disjunction(j)) || + (r = sd_journal_add_match(j, m2, 0)) || + (r = sd_journal_add_match(j, muid, 0)) || + + /* Look for coredumps of the service */ + (r = sd_journal_add_disjunction(j)) || + (r = sd_journal_add_match(j, m3, 0)) || + (r = sd_journal_add_match(j, muid, 0)) || + (r = sd_journal_add_match(j, "_UID=0", 0)) || + + /* Look for messages from authorized daemons about this service */ + (r = sd_journal_add_disjunction(j)) || + (r = sd_journal_add_match(j, m4, 0)) || + (r = sd_journal_add_match(j, muid, 0)) || + (r = sd_journal_add_match(j, "_UID=0", 0)) + ); + + if (r == 0 && endswith(unit, ".slice")) { + const char *m5; + + m5 = strjoina("_SYSTEMD_SLICE=", unit); + + /* Show all messages belonging to a slice */ + (void)( + (r = sd_journal_add_disjunction(j)) || + (r = sd_journal_add_match(j, m5, 0)) || + (r = sd_journal_add_match(j, muid, 0)) + ); + } + + return r; +} + +static int get_boot_id_for_machine(const char *machine, sd_id128_t *boot_id) { + _cleanup_close_pair_ int pair[2] = { -1, -1 }; + _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, rootfd = -1; + pid_t pid, child; + char buf[37]; + ssize_t k; + int r; + + assert(machine); + assert(boot_id); + + if (!machine_name_is_valid(machine)) + return -EINVAL; + + r = container_get_leader(machine, &pid); + if (r < 0) + return r; + + r = namespace_open(pid, &pidnsfd, &mntnsfd, NULL, NULL, &rootfd); + if (r < 0) + return r; + + if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0) + return -errno; + + r = namespace_fork("(sd-bootidns)", "(sd-bootid)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG, + pidnsfd, mntnsfd, -1, -1, rootfd, &child); + if (r < 0) + return r; + if (r == 0) { + int fd; + + pair[0] = safe_close(pair[0]); + + fd = open("/proc/sys/kernel/random/boot_id", O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (fd < 0) + _exit(EXIT_FAILURE); + + r = loop_read_exact(fd, buf, 36, false); + safe_close(fd); + if (r < 0) + _exit(EXIT_FAILURE); + + k = send(pair[1], buf, 36, MSG_NOSIGNAL); + if (k != 36) + _exit(EXIT_FAILURE); + + _exit(EXIT_SUCCESS); + } + + pair[1] = safe_close(pair[1]); + + r = wait_for_terminate_and_check("(sd-bootidns)", child, 0); + if (r < 0) + return r; + if (r != EXIT_SUCCESS) + return -EIO; + + k = recv(pair[0], buf, 36, 0); + if (k != 36) + return -EIO; + + buf[36] = 0; + r = sd_id128_from_string(buf, boot_id); + if (r < 0) + return r; + + return 0; +} + +int add_match_this_boot(sd_journal *j, const char *machine) { + char match[9+32+1] = "_BOOT_ID="; + sd_id128_t boot_id; + int r; + + assert(j); + + if (machine) { + r = get_boot_id_for_machine(machine, &boot_id); + if (r < 0) + return log_error_errno(r, "Failed to get boot id of container %s: %m", machine); + } else { + r = sd_id128_get_boot(&boot_id); + if (r < 0) + return log_error_errno(r, "Failed to get boot id: %m"); + } + + sd_id128_to_string(boot_id, match + 9); + r = sd_journal_add_match(j, match, strlen(match)); + if (r < 0) + return log_error_errno(r, "Failed to add match: %m"); + + r = sd_journal_add_conjunction(j); + if (r < 0) + return log_error_errno(r, "Failed to add conjunction: %m"); + + return 0; +} + +int show_journal_by_unit( + FILE *f, + const char *unit, + OutputMode mode, + unsigned n_columns, + usec_t not_before, + unsigned how_many, + uid_t uid, + OutputFlags flags, + int journal_open_flags, + bool system_unit, + bool *ellipsized) { + + _cleanup_(sd_journal_closep) sd_journal *j = NULL; + int r; + + assert(mode >= 0); + assert(mode < _OUTPUT_MODE_MAX); + assert(unit); + + if (how_many <= 0) + return 0; + + r = sd_journal_open(&j, journal_open_flags); + if (r < 0) + return log_error_errno(r, "Failed to open journal: %m"); + + r = add_match_this_boot(j, NULL); + if (r < 0) + return r; + + if (system_unit) + r = add_matches_for_unit(j, unit); + else + r = add_matches_for_user_unit(j, unit, uid); + if (r < 0) + return log_error_errno(r, "Failed to add unit matches: %m"); + + if (DEBUG_LOGGING) { + _cleanup_free_ char *filter; + + filter = journal_make_match_string(j); + if (!filter) + return log_oom(); + + log_debug("Journal filter: %s", filter); + } + + return show_journal(f, j, mode, n_columns, not_before, how_many, flags, ellipsized); +} diff --git a/src/shared/logs-show.h b/src/shared/logs-show.h new file mode 100644 index 0000000..1e0c4ea --- /dev/null +++ b/src/shared/logs-show.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <sys/types.h> + +#include "sd-journal.h" + +#include "macro.h" +#include "output-mode.h" +#include "time-util.h" +#include "util.h" + +int show_journal_entry( + FILE *f, + sd_journal *j, + OutputMode mode, + unsigned n_columns, + OutputFlags flags, + char **output_fields, + const size_t highlight[2], + bool *ellipsized); +int show_journal( + FILE *f, + sd_journal *j, + OutputMode mode, + unsigned n_columns, + usec_t not_before, + unsigned how_many, + OutputFlags flags, + bool *ellipsized); + +int add_match_this_boot(sd_journal *j, const char *machine); + +int add_matches_for_unit( + sd_journal *j, + const char *unit); + +int add_matches_for_user_unit( + sd_journal *j, + const char *unit, + uid_t uid); + +int show_journal_by_unit( + FILE *f, + const char *unit, + OutputMode mode, + unsigned n_columns, + usec_t not_before, + unsigned how_many, + uid_t uid, + OutputFlags flags, + int journal_open_flags, + bool system_unit, + bool *ellipsized); + +void json_escape( + FILE *f, + const char* p, + size_t l, + OutputFlags flags); diff --git a/src/shared/loop-util.c b/src/shared/loop-util.c new file mode 100644 index 0000000..bf426eb --- /dev/null +++ b/src/shared/loop-util.c @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fcntl.h> +#include <linux/loop.h> +#include <sys/ioctl.h> +#include <sys/stat.h> + +#include "alloc-util.h" +#include "fd-util.h" +#include "loop-util.h" +#include "stat-util.h" + +int loop_device_make(int fd, int open_flags, LoopDevice **ret) { + const struct loop_info64 info = { + .lo_flags = LO_FLAGS_AUTOCLEAR|LO_FLAGS_PARTSCAN|(open_flags == O_RDONLY ? LO_FLAGS_READ_ONLY : 0), + }; + + _cleanup_close_ int control = -1, loop = -1; + _cleanup_free_ char *loopdev = NULL; + struct stat st; + LoopDevice *d; + int nr, r; + + assert(fd >= 0); + assert(ret); + assert(IN_SET(open_flags, O_RDWR, O_RDONLY)); + + if (fstat(fd, &st) < 0) + return -errno; + + if (S_ISBLK(st.st_mode)) { + int copy; + + /* If this is already a block device, store a copy of the fd as it is */ + + copy = fcntl(fd, F_DUPFD_CLOEXEC, 3); + if (copy < 0) + return -errno; + + d = new0(LoopDevice, 1); + if (!d) + return -ENOMEM; + + *d = (LoopDevice) { + .fd = copy, + .nr = -1, + .relinquished = true, /* It's not allocated by us, don't destroy it when this object is freed */ + }; + + *ret = d; + return d->fd; + } + + r = stat_verify_regular(&st); + if (r < 0) + return r; + + control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK); + if (control < 0) + return -errno; + + nr = ioctl(control, LOOP_CTL_GET_FREE); + if (nr < 0) + return -errno; + + if (asprintf(&loopdev, "/dev/loop%i", nr) < 0) + return -ENOMEM; + + loop = open(loopdev, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags); + if (loop < 0) + return -errno; + + if (ioctl(loop, LOOP_SET_FD, fd) < 0) + return -errno; + + if (ioctl(loop, LOOP_SET_STATUS64, &info) < 0) + return -errno; + + d = new(LoopDevice, 1); + if (!d) + return -ENOMEM; + + *d = (LoopDevice) { + .fd = TAKE_FD(loop), + .node = TAKE_PTR(loopdev), + .nr = nr, + }; + + *ret = d; + return d->fd; +} + +int loop_device_make_by_path(const char *path, int open_flags, LoopDevice **ret) { + _cleanup_close_ int fd = -1; + + assert(path); + assert(ret); + assert(IN_SET(open_flags, O_RDWR, O_RDONLY)); + + fd = open(path, O_CLOEXEC|O_NONBLOCK|O_NOCTTY|open_flags); + if (fd < 0) + return -errno; + + return loop_device_make(fd, open_flags, ret); +} + +LoopDevice* loop_device_unref(LoopDevice *d) { + if (!d) + return NULL; + + if (d->fd >= 0) { + + if (d->nr >= 0 && !d->relinquished) { + if (ioctl(d->fd, LOOP_CLR_FD) < 0) + log_debug_errno(errno, "Failed to clear loop device: %m"); + + } + + safe_close(d->fd); + } + + if (d->nr >= 0 && !d->relinquished) { + _cleanup_close_ int control = -1; + + control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK); + if (control < 0) + log_debug_errno(errno, "Failed to open loop control device: %m"); + else { + if (ioctl(control, LOOP_CTL_REMOVE, d->nr) < 0) + log_debug_errno(errno, "Failed to remove loop device: %m"); + } + } + + free(d->node); + return mfree(d); +} + +void loop_device_relinquish(LoopDevice *d) { + assert(d); + + /* Don't attempt to clean up the loop device anymore from this point on. Leave the clean-ing up to the kernel + * itself, using the loop device "auto-clear" logic we already turned on when creating the device. */ + + d->relinquished = true; +} diff --git a/src/shared/loop-util.h b/src/shared/loop-util.h new file mode 100644 index 0000000..d78466c --- /dev/null +++ b/src/shared/loop-util.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include "macro.h" + +typedef struct LoopDevice LoopDevice; + +/* Some helpers for setting up loopback block devices */ + +struct LoopDevice { + int fd; + int nr; + char *node; + bool relinquished; +}; + +int loop_device_make(int fd, int open_flags, LoopDevice **ret); +int loop_device_make_by_path(const char *path, int open_flags, LoopDevice **ret); + +LoopDevice* loop_device_unref(LoopDevice *d); +DEFINE_TRIVIAL_CLEANUP_FUNC(LoopDevice*, loop_device_unref); + +void loop_device_relinquish(LoopDevice *d); diff --git a/src/shared/machine-image.c b/src/shared/machine-image.c new file mode 100644 index 0000000..af06ab2 --- /dev/null +++ b/src/shared/machine-image.c @@ -0,0 +1,1249 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/file.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <unistd.h> +#include <linux/fs.h> + +#include "alloc-util.h" +#include "btrfs-util.h" +#include "chattr-util.h" +#include "copy.h" +#include "dirent-util.h" +#include "dissect-image.h" +#include "env-file.h" +#include "env-util.h" +#include "fd-util.h" +#include "fs-util.h" +#include "hashmap.h" +#include "hostname-util.h" +#include "id128-util.h" +#include "lockfile-util.h" +#include "log.h" +#include "loop-util.h" +#include "machine-image.h" +#include "macro.h" +#include "mkdir.h" +#include "os-util.h" +#include "path-util.h" +#include "rm-rf.h" +#include "string-table.h" +#include "string-util.h" +#include "strv.h" +#include "time-util.h" +#include "utf8.h" +#include "util.h" +#include "xattr-util.h" + +static const char* const image_search_path[_IMAGE_CLASS_MAX] = { + [IMAGE_MACHINE] = "/etc/machines\0" /* only place symlinks here */ + "/run/machines\0" /* and here too */ + "/var/lib/machines\0" /* the main place for images */ + "/var/lib/container\0" /* legacy */ + "/usr/local/lib/machines\0" + "/usr/lib/machines\0", + + [IMAGE_PORTABLE] = "/etc/portables\0" /* only place symlinks here */ + "/run/portables\0" /* and here too */ + "/var/lib/portables\0" /* the main place for images */ + "/usr/local/lib/portables\0" + "/usr/lib/portables\0", +}; + +static Image *image_free(Image *i) { + assert(i); + + free(i->name); + free(i->path); + + free(i->hostname); + strv_free(i->machine_info); + strv_free(i->os_release); + + return mfree(i); +} + +DEFINE_TRIVIAL_REF_UNREF_FUNC(Image, image, image_free); +DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(image_hash_ops, char, string_hash_func, string_compare_func, + Image, image_unref); + +static char **image_settings_path(Image *image) { + _cleanup_strv_free_ char **l = NULL; + const char *fn, *s; + unsigned i = 0; + + assert(image); + + l = new0(char*, 4); + if (!l) + return NULL; + + fn = strjoina(image->name, ".nspawn"); + + FOREACH_STRING(s, "/etc/systemd/nspawn/", "/run/systemd/nspawn/") { + l[i] = strappend(s, fn); + if (!l[i]) + return NULL; + + i++; + } + + l[i] = file_in_same_dir(image->path, fn); + if (!l[i]) + return NULL; + + return TAKE_PTR(l); +} + +static char *image_roothash_path(Image *image) { + const char *fn; + + assert(image); + + fn = strjoina(image->name, ".roothash"); + + return file_in_same_dir(image->path, fn); +} + +static int image_new( + ImageType t, + const char *pretty, + const char *path, + const char *filename, + bool read_only, + usec_t crtime, + usec_t mtime, + Image **ret) { + + _cleanup_(image_unrefp) Image *i = NULL; + + assert(t >= 0); + assert(t < _IMAGE_TYPE_MAX); + assert(pretty); + assert(filename); + assert(ret); + + i = new0(Image, 1); + if (!i) + return -ENOMEM; + + i->n_ref = 1; + i->type = t; + i->read_only = read_only; + i->crtime = crtime; + i->mtime = mtime; + i->usage = i->usage_exclusive = (uint64_t) -1; + i->limit = i->limit_exclusive = (uint64_t) -1; + + i->name = strdup(pretty); + if (!i->name) + return -ENOMEM; + + if (path) + i->path = strjoin(path, "/", filename); + else + i->path = strdup(filename); + if (!i->path) + return -ENOMEM; + + path_simplify(i->path, false); + + *ret = TAKE_PTR(i); + + return 0; +} + +static int extract_pretty(const char *path, const char *suffix, char **ret) { + _cleanup_free_ char *name = NULL; + const char *p; + size_t n; + + assert(path); + assert(ret); + + p = last_path_component(path); + n = strcspn(p, "/"); + + name = strndup(p, n); + if (!name) + return -ENOMEM; + + if (suffix) { + char *e; + + e = endswith(name, suffix); + if (!e) + return -EINVAL; + + *e = 0; + } + + if (!image_name_is_valid(name)) + return -EINVAL; + + *ret = TAKE_PTR(name); + return 0; +} + +static int image_make( + const char *pretty, + int dfd, + const char *path, + const char *filename, + const struct stat *st, + Image **ret) { + + _cleanup_free_ char *pretty_buffer = NULL; + struct stat stbuf; + bool read_only; + int r; + + assert(dfd >= 0 || dfd == AT_FDCWD); + assert(filename); + + /* We explicitly *do* follow symlinks here, since we want to allow symlinking trees, raw files and block + * devices into /var/lib/machines/, and treat them normally. + * + * This function returns -ENOENT if we can't find the image after all, and -EMEDIUMTYPE if it's not a file we + * recognize. */ + + if (!st) { + if (fstatat(dfd, filename, &stbuf, 0) < 0) + return -errno; + + st = &stbuf; + } + + read_only = + (path && path_startswith(path, "/usr")) || + (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS); + + if (S_ISDIR(st->st_mode)) { + _cleanup_close_ int fd = -1; + unsigned file_attr = 0; + + if (!ret) + return 0; + + if (!pretty) { + r = extract_pretty(filename, NULL, &pretty_buffer); + if (r < 0) + return r; + + pretty = pretty_buffer; + } + + fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY); + if (fd < 0) + return -errno; + + /* btrfs subvolumes have inode 256 */ + if (st->st_ino == 256) { + + r = btrfs_is_filesystem(fd); + if (r < 0) + return r; + if (r) { + BtrfsSubvolInfo info; + + /* It's a btrfs subvolume */ + + r = btrfs_subvol_get_info_fd(fd, 0, &info); + if (r < 0) + return r; + + r = image_new(IMAGE_SUBVOLUME, + pretty, + path, + filename, + info.read_only || read_only, + info.otime, + 0, + ret); + if (r < 0) + return r; + + if (btrfs_quota_scan_ongoing(fd) == 0) { + BtrfsQuotaInfo quota; + + r = btrfs_subvol_get_subtree_quota_fd(fd, 0, "a); + if (r >= 0) { + (*ret)->usage = quota.referenced; + (*ret)->usage_exclusive = quota.exclusive; + + (*ret)->limit = quota.referenced_max; + (*ret)->limit_exclusive = quota.exclusive_max; + } + } + + return 0; + } + } + + /* If the IMMUTABLE bit is set, we consider the + * directory read-only. Since the ioctl is not + * supported everywhere we ignore failures. */ + (void) read_attr_fd(fd, &file_attr); + + /* It's just a normal directory. */ + r = image_new(IMAGE_DIRECTORY, + pretty, + path, + filename, + read_only || (file_attr & FS_IMMUTABLE_FL), + 0, + 0, + ret); + if (r < 0) + return r; + + return 0; + + } else if (S_ISREG(st->st_mode) && endswith(filename, ".raw")) { + usec_t crtime = 0; + + /* It's a RAW disk image */ + + if (!ret) + return 0; + + (void) fd_getcrtime_at(dfd, filename, &crtime, 0); + + if (!pretty) { + r = extract_pretty(filename, ".raw", &pretty_buffer); + if (r < 0) + return r; + + pretty = pretty_buffer; + } + + r = image_new(IMAGE_RAW, + pretty, + path, + filename, + !(st->st_mode & 0222) || read_only, + crtime, + timespec_load(&st->st_mtim), + ret); + if (r < 0) + return r; + + (*ret)->usage = (*ret)->usage_exclusive = st->st_blocks * 512; + (*ret)->limit = (*ret)->limit_exclusive = st->st_size; + + return 0; + + } else if (S_ISBLK(st->st_mode)) { + _cleanup_close_ int block_fd = -1; + uint64_t size = UINT64_MAX; + + /* A block device */ + + if (!ret) + return 0; + + if (!pretty) { + r = extract_pretty(filename, NULL, &pretty_buffer); + if (r < 0) + return r; + + pretty = pretty_buffer; + } + + block_fd = openat(dfd, filename, O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY); + if (block_fd < 0) + log_debug_errno(errno, "Failed to open block device %s/%s, ignoring: %m", path, filename); + else { + /* Refresh stat data after opening the node */ + if (fstat(block_fd, &stbuf) < 0) + return -errno; + st = &stbuf; + + if (!S_ISBLK(st->st_mode)) /* Verify that what we opened is actually what we think it is */ + return -ENOTTY; + + if (!read_only) { + int state = 0; + + if (ioctl(block_fd, BLKROGET, &state) < 0) + log_debug_errno(errno, "Failed to issue BLKROGET on device %s/%s, ignoring: %m", path, filename); + else if (state) + read_only = true; + } + + if (ioctl(block_fd, BLKGETSIZE64, &size) < 0) + log_debug_errno(errno, "Failed to issue BLKGETSIZE64 on device %s/%s, ignoring: %m", path, filename); + + block_fd = safe_close(block_fd); + } + + r = image_new(IMAGE_BLOCK, + pretty, + path, + filename, + !(st->st_mode & 0222) || read_only, + 0, + 0, + ret); + if (r < 0) + return r; + + if (size != 0 && size != UINT64_MAX) + (*ret)->usage = (*ret)->usage_exclusive = (*ret)->limit = (*ret)->limit_exclusive = size; + + return 0; + } + + return -EMEDIUMTYPE; +} + +int image_find(ImageClass class, const char *name, Image **ret) { + const char *path; + int r; + + assert(class >= 0); + assert(class < _IMAGE_CLASS_MAX); + assert(name); + + /* There are no images with invalid names */ + if (!image_name_is_valid(name)) + return -ENOENT; + + NULSTR_FOREACH(path, image_search_path[class]) { + _cleanup_closedir_ DIR *d = NULL; + struct stat st; + + d = opendir(path); + if (!d) { + if (errno == ENOENT) + continue; + + return -errno; + } + + /* As mentioned above, we follow symlinks on this fstatat(), because we want to permit people to + * symlink block devices into the search path */ + if (fstatat(dirfd(d), name, &st, 0) < 0) { + _cleanup_free_ char *raw = NULL; + + if (errno != ENOENT) + return -errno; + + raw = strappend(name, ".raw"); + if (!raw) + return -ENOMEM; + + if (fstatat(dirfd(d), raw, &st, 0) < 0) { + + if (errno == ENOENT) + continue; + + return -errno; + } + + if (!S_ISREG(st.st_mode)) + continue; + + r = image_make(name, dirfd(d), path, raw, &st, ret); + + } else { + if (!S_ISDIR(st.st_mode) && !S_ISBLK(st.st_mode)) + continue; + + r = image_make(name, dirfd(d), path, name, &st, ret); + } + if (IN_SET(r, -ENOENT, -EMEDIUMTYPE)) + continue; + if (r < 0) + return r; + + if (ret) + (*ret)->discoverable = true; + + return 1; + } + + if (class == IMAGE_MACHINE && streq(name, ".host")) { + r = image_make(".host", AT_FDCWD, NULL, "/", NULL, ret); + if (r < 0) + return r; + + if (ret) + (*ret)->discoverable = true; + + return r; + } + + return -ENOENT; +}; + +int image_from_path(const char *path, Image **ret) { + + /* Note that we don't set the 'discoverable' field of the returned object, because we don't check here whether + * the image is in the image search path. And if it is we don't know if the path we used is actually not + * overridden by another, different image earlier in the search path */ + + if (path_equal(path, "/")) + return image_make(".host", AT_FDCWD, NULL, "/", NULL, ret); + + return image_make(NULL, AT_FDCWD, NULL, path, NULL, ret); +} + +int image_find_harder(ImageClass class, const char *name_or_path, Image **ret) { + if (image_name_is_valid(name_or_path)) + return image_find(class, name_or_path, ret); + + return image_from_path(name_or_path, ret); +} + +int image_discover(ImageClass class, Hashmap *h) { + const char *path; + int r; + + assert(class >= 0); + assert(class < _IMAGE_CLASS_MAX); + assert(h); + + NULSTR_FOREACH(path, image_search_path[class]) { + _cleanup_closedir_ DIR *d = NULL; + struct dirent *de; + + d = opendir(path); + if (!d) { + if (errno == ENOENT) + continue; + + return -errno; + } + + FOREACH_DIRENT_ALL(de, d, return -errno) { + _cleanup_(image_unrefp) Image *image = NULL; + _cleanup_free_ char *truncated = NULL; + const char *pretty; + struct stat st; + + if (dot_or_dot_dot(de->d_name)) + continue; + + /* As mentioned above, we follow symlinks on this fstatat(), because we want to permit people + * to symlink block devices into the search path */ + if (fstatat(dirfd(d), de->d_name, &st, 0) < 0) { + if (errno == ENOENT) + continue; + + return -errno; + } + + if (S_ISREG(st.st_mode)) { + const char *e; + + e = endswith(de->d_name, ".raw"); + if (!e) + continue; + + truncated = strndup(de->d_name, e - de->d_name); + if (!truncated) + return -ENOMEM; + + pretty = truncated; + } else if (S_ISDIR(st.st_mode) || S_ISBLK(st.st_mode)) + pretty = de->d_name; + else + continue; + + if (!image_name_is_valid(pretty)) + continue; + + if (hashmap_contains(h, pretty)) + continue; + + r = image_make(pretty, dirfd(d), path, de->d_name, &st, &image); + if (IN_SET(r, -ENOENT, -EMEDIUMTYPE)) + continue; + if (r < 0) + return r; + + image->discoverable = true; + + r = hashmap_put(h, image->name, image); + if (r < 0) + return r; + + image = NULL; + } + } + + if (class == IMAGE_MACHINE && !hashmap_contains(h, ".host")) { + _cleanup_(image_unrefp) Image *image = NULL; + + r = image_make(".host", AT_FDCWD, NULL, "/", NULL, &image); + if (r < 0) + return r; + + image->discoverable = true; + + r = hashmap_put(h, image->name, image); + if (r < 0) + return r; + + image = NULL; + } + + return 0; +} + +int image_remove(Image *i) { + _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT; + _cleanup_strv_free_ char **settings = NULL; + _cleanup_free_ char *roothash = NULL; + char **j; + int r; + + assert(i); + + if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i)) + return -EROFS; + + settings = image_settings_path(i); + if (!settings) + return -ENOMEM; + + roothash = image_roothash_path(i); + if (!roothash) + return -ENOMEM; + + /* Make sure we don't interfere with a running nspawn */ + r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock); + if (r < 0) + return r; + + switch (i->type) { + + case IMAGE_SUBVOLUME: + + /* Let's unlink first, maybe it is a symlink? If that works we are happy. Otherwise, let's get out the + * big guns */ + if (unlink(i->path) < 0) { + r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA); + if (r < 0) + return r; + } + + break; + + case IMAGE_DIRECTORY: + /* Allow deletion of read-only directories */ + (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL, NULL); + r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME); + if (r < 0) + return r; + + break; + + case IMAGE_BLOCK: + + /* If this is inside of /dev, then it's a real block device, hence let's not touch the device node + * itself (but let's remove the stuff stored alongside it). If it's anywhere else, let's try to unlink + * the thing (it's most likely a symlink after all). */ + + if (path_startswith(i->path, "/dev")) + break; + + _fallthrough_; + case IMAGE_RAW: + if (unlink(i->path) < 0) + return -errno; + break; + + default: + return -EOPNOTSUPP; + } + + STRV_FOREACH(j, settings) { + if (unlink(*j) < 0 && errno != ENOENT) + log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", *j); + } + + if (unlink(roothash) < 0 && errno != ENOENT) + log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", roothash); + + return 0; +} + +static int rename_auxiliary_file(const char *path, const char *new_name, const char *suffix) { + _cleanup_free_ char *rs = NULL; + const char *fn; + + fn = strjoina(new_name, suffix); + + rs = file_in_same_dir(path, fn); + if (!rs) + return -ENOMEM; + + return rename_noreplace(AT_FDCWD, path, AT_FDCWD, rs); +} + +int image_rename(Image *i, const char *new_name) { + _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT; + _cleanup_free_ char *new_path = NULL, *nn = NULL, *roothash = NULL; + _cleanup_strv_free_ char **settings = NULL; + unsigned file_attr = 0; + char **j; + int r; + + assert(i); + + if (!image_name_is_valid(new_name)) + return -EINVAL; + + if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i)) + return -EROFS; + + settings = image_settings_path(i); + if (!settings) + return -ENOMEM; + + roothash = image_roothash_path(i); + if (!roothash) + return -ENOMEM; + + /* Make sure we don't interfere with a running nspawn */ + r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock); + if (r < 0) + return r; + + /* Make sure nobody takes the new name, between the time we + * checked it is currently unused in all search paths, and the + * time we take possession of it */ + r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock); + if (r < 0) + return r; + + r = image_find(IMAGE_MACHINE, new_name, NULL); + if (r >= 0) + return -EEXIST; + if (r != -ENOENT) + return r; + + switch (i->type) { + + case IMAGE_DIRECTORY: + /* Turn of the immutable bit while we rename the image, so that we can rename it */ + (void) read_attr_path(i->path, &file_attr); + + if (file_attr & FS_IMMUTABLE_FL) + (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL, NULL); + + _fallthrough_; + case IMAGE_SUBVOLUME: + new_path = file_in_same_dir(i->path, new_name); + break; + + case IMAGE_BLOCK: + + /* Refuse renaming raw block devices in /dev, the names are picked by udev after all. */ + if (path_startswith(i->path, "/dev")) + return -EROFS; + + new_path = file_in_same_dir(i->path, new_name); + break; + + case IMAGE_RAW: { + const char *fn; + + fn = strjoina(new_name, ".raw"); + new_path = file_in_same_dir(i->path, fn); + break; + } + + default: + return -EOPNOTSUPP; + } + + if (!new_path) + return -ENOMEM; + + nn = strdup(new_name); + if (!nn) + return -ENOMEM; + + r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path); + if (r < 0) + return r; + + /* Restore the immutable bit, if it was set before */ + if (file_attr & FS_IMMUTABLE_FL) + (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL, NULL); + + free_and_replace(i->path, new_path); + free_and_replace(i->name, nn); + + STRV_FOREACH(j, settings) { + r = rename_auxiliary_file(*j, new_name, ".nspawn"); + if (r < 0 && r != -ENOENT) + log_debug_errno(r, "Failed to rename settings file %s, ignoring: %m", *j); + } + + r = rename_auxiliary_file(roothash, new_name, ".roothash"); + if (r < 0 && r != -ENOENT) + log_debug_errno(r, "Failed to rename roothash file %s, ignoring: %m", roothash); + + return 0; +} + +static int clone_auxiliary_file(const char *path, const char *new_name, const char *suffix) { + _cleanup_free_ char *rs = NULL; + const char *fn; + + fn = strjoina(new_name, suffix); + + rs = file_in_same_dir(path, fn); + if (!rs) + return -ENOMEM; + + return copy_file_atomic(path, rs, 0664, 0, COPY_REFLINK); +} + +int image_clone(Image *i, const char *new_name, bool read_only) { + _cleanup_(release_lock_file) LockFile name_lock = LOCK_FILE_INIT; + _cleanup_strv_free_ char **settings = NULL; + _cleanup_free_ char *roothash = NULL; + const char *new_path; + char **j; + int r; + + assert(i); + + if (!image_name_is_valid(new_name)) + return -EINVAL; + + settings = image_settings_path(i); + if (!settings) + return -ENOMEM; + + roothash = image_roothash_path(i); + if (!roothash) + return -ENOMEM; + + /* Make sure nobody takes the new name, between the time we + * checked it is currently unused in all search paths, and the + * time we take possession of it */ + r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock); + if (r < 0) + return r; + + r = image_find(IMAGE_MACHINE, new_name, NULL); + if (r >= 0) + return -EEXIST; + if (r != -ENOENT) + return r; + + switch (i->type) { + + case IMAGE_SUBVOLUME: + case IMAGE_DIRECTORY: + /* If we can we'll always try to create a new btrfs subvolume here, even if the source is a plain + * directory. */ + + new_path = strjoina("/var/lib/machines/", new_name); + + r = btrfs_subvol_snapshot(i->path, new_path, + (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | + BTRFS_SNAPSHOT_FALLBACK_COPY | + BTRFS_SNAPSHOT_FALLBACK_DIRECTORY | + BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE | + BTRFS_SNAPSHOT_RECURSIVE | + BTRFS_SNAPSHOT_QUOTA); + if (r >= 0) + /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */ + (void) btrfs_subvol_auto_qgroup(new_path, 0, true); + + break; + + case IMAGE_RAW: + new_path = strjoina("/var/lib/machines/", new_name, ".raw"); + + r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, FS_NOCOW_FL, COPY_REFLINK); + break; + + case IMAGE_BLOCK: + default: + return -EOPNOTSUPP; + } + + if (r < 0) + return r; + + STRV_FOREACH(j, settings) { + r = clone_auxiliary_file(*j, new_name, ".nspawn"); + if (r < 0 && r != -ENOENT) + log_debug_errno(r, "Failed to clone settings %s, ignoring: %m", *j); + } + + r = clone_auxiliary_file(roothash, new_name, ".roothash"); + if (r < 0 && r != -ENOENT) + log_debug_errno(r, "Failed to clone root hash file %s, ignoring: %m", roothash); + + return 0; +} + +int image_read_only(Image *i, bool b) { + _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT; + int r; + + assert(i); + + if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i)) + return -EROFS; + + /* Make sure we don't interfere with a running nspawn */ + r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock); + if (r < 0) + return r; + + switch (i->type) { + + case IMAGE_SUBVOLUME: + + /* Note that we set the flag only on the top-level + * subvolume of the image. */ + + r = btrfs_subvol_set_read_only(i->path, b); + if (r < 0) + return r; + + break; + + case IMAGE_DIRECTORY: + /* For simple directory trees we cannot use the access + mode of the top-level directory, since it has an + effect on the container itself. However, we can + use the "immutable" flag, to at least make the + top-level directory read-only. It's not as good as + a read-only subvolume, but at least something, and + we can read the value back. */ + + r = chattr_path(i->path, b ? FS_IMMUTABLE_FL : 0, FS_IMMUTABLE_FL, NULL); + if (r < 0) + return r; + + break; + + case IMAGE_RAW: { + struct stat st; + + if (stat(i->path, &st) < 0) + return -errno; + + if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0) + return -errno; + + /* If the images is now read-only, it's a good time to + * defrag it, given that no write patterns will + * fragment it again. */ + if (b) + (void) btrfs_defrag(i->path); + break; + } + + case IMAGE_BLOCK: { + _cleanup_close_ int fd = -1; + struct stat st; + int state = b; + + fd = open(i->path, O_CLOEXEC|O_RDONLY|O_NONBLOCK|O_NOCTTY); + if (fd < 0) + return -errno; + + if (fstat(fd, &st) < 0) + return -errno; + if (!S_ISBLK(st.st_mode)) + return -ENOTTY; + + if (ioctl(fd, BLKROSET, &state) < 0) + return -errno; + + break; + } + + default: + return -EOPNOTSUPP; + } + + return 0; +} + +int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) { + _cleanup_free_ char *p = NULL; + LockFile t = LOCK_FILE_INIT; + struct stat st; + int r; + + assert(path); + assert(global); + assert(local); + + /* Locks an image path. This actually creates two locks: one + * "local" one, next to the image path itself, which might be + * shared via NFS. And another "global" one, in /run, that + * uses the device/inode number. This has the benefit that we + * can even lock a tree that is a mount point, correctly. */ + + if (!path_is_absolute(path)) + return -EINVAL; + + if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) { + *local = *global = (LockFile) LOCK_FILE_INIT; + return 0; + } + + if (path_equal(path, "/")) + return -EBUSY; + + if (stat(path, &st) >= 0) { + if (S_ISBLK(st.st_mode)) + r = asprintf(&p, "/run/systemd/nspawn/locks/block-%u:%u", major(st.st_rdev), minor(st.st_rdev)); + else if (S_ISDIR(st.st_mode) || S_ISREG(st.st_mode)) + r = asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino); + else + return -ENOTTY; + + if (r < 0) + return -ENOMEM; + } + + /* For block devices we don't need the "local" lock, as the major/minor lock above should be sufficient, since + * block devices are device local anyway. */ + if (!path_startswith(path, "/dev")) { + r = make_lock_file_for(path, operation, &t); + if (r < 0) { + if ((operation & LOCK_SH) && r == -EROFS) + log_debug_errno(r, "Failed to create shared lock for '%s', ignoring: %m", path); + else + return r; + } + } + + if (p) { + mkdir_p("/run/systemd/nspawn/locks", 0700); + + r = make_lock_file(p, operation, global); + if (r < 0) { + release_lock_file(&t); + return r; + } + } else + *global = (LockFile) LOCK_FILE_INIT; + + *local = t; + return 0; +} + +int image_set_limit(Image *i, uint64_t referenced_max) { + assert(i); + + if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i)) + return -EROFS; + + if (i->type != IMAGE_SUBVOLUME) + return -EOPNOTSUPP; + + /* We set the quota both for the subvolume as well as for the + * subtree. The latter is mostly for historical reasons, since + * we didn't use to have a concept of subtree quota, and hence + * only modified the subvolume quota. */ + + (void) btrfs_qgroup_set_limit(i->path, 0, referenced_max); + (void) btrfs_subvol_auto_qgroup(i->path, 0, true); + return btrfs_subvol_set_subtree_quota_limit(i->path, 0, referenced_max); +} + +int image_read_metadata(Image *i) { + _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT; + int r; + + assert(i); + + r = image_path_lock(i->path, LOCK_SH|LOCK_NB, &global_lock, &local_lock); + if (r < 0) + return r; + + switch (i->type) { + + case IMAGE_SUBVOLUME: + case IMAGE_DIRECTORY: { + _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL; + sd_id128_t machine_id = SD_ID128_NULL; + _cleanup_free_ char *hostname = NULL; + _cleanup_free_ char *path = NULL; + + r = chase_symlinks("/etc/hostname", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path); + if (r < 0 && r != -ENOENT) + log_debug_errno(r, "Failed to chase /etc/hostname in image %s: %m", i->name); + else if (r >= 0) { + r = read_etc_hostname(path, &hostname); + if (r < 0) + log_debug_errno(errno, "Failed to read /etc/hostname of image %s: %m", i->name); + } + + path = mfree(path); + + r = chase_symlinks("/etc/machine-id", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path); + if (r < 0 && r != -ENOENT) + log_debug_errno(r, "Failed to chase /etc/machine-id in image %s: %m", i->name); + else if (r >= 0) { + _cleanup_close_ int fd = -1; + + fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (fd < 0) + log_debug_errno(errno, "Failed to open %s: %m", path); + else { + r = id128_read_fd(fd, ID128_PLAIN, &machine_id); + if (r < 0) + log_debug_errno(r, "Image %s contains invalid machine ID.", i->name); + } + } + + path = mfree(path); + + r = chase_symlinks("/etc/machine-info", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path); + if (r < 0 && r != -ENOENT) + log_debug_errno(r, "Failed to chase /etc/machine-info in image %s: %m", i->name); + else if (r >= 0) { + r = load_env_file_pairs(NULL, path, &machine_info); + if (r < 0) + log_debug_errno(r, "Failed to parse machine-info data of %s: %m", i->name); + } + + r = load_os_release_pairs(i->path, &os_release); + if (r < 0) + log_debug_errno(r, "Failed to read os-release in image, ignoring: %m"); + + free_and_replace(i->hostname, hostname); + i->machine_id = machine_id; + strv_free_and_replace(i->machine_info, machine_info); + strv_free_and_replace(i->os_release, os_release); + + break; + } + + case IMAGE_RAW: + case IMAGE_BLOCK: { + _cleanup_(loop_device_unrefp) LoopDevice *d = NULL; + _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL; + + r = loop_device_make_by_path(i->path, O_RDONLY, &d); + if (r < 0) + return r; + + r = dissect_image(d->fd, NULL, 0, DISSECT_IMAGE_REQUIRE_ROOT, &m); + if (r < 0) + return r; + + r = dissected_image_acquire_metadata(m); + if (r < 0) + return r; + + free_and_replace(i->hostname, m->hostname); + i->machine_id = m->machine_id; + strv_free_and_replace(i->machine_info, m->machine_info); + strv_free_and_replace(i->os_release, m->os_release); + + break; + } + + default: + return -EOPNOTSUPP; + } + + i->metadata_valid = true; + + return 0; +} + +int image_name_lock(const char *name, int operation, LockFile *ret) { + const char *p; + + assert(name); + assert(ret); + + /* Locks an image name, regardless of the precise path used. */ + + if (!image_name_is_valid(name)) + return -EINVAL; + + if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) { + *ret = (LockFile) LOCK_FILE_INIT; + return 0; + } + + if (streq(name, ".host")) + return -EBUSY; + + mkdir_p("/run/systemd/nspawn/locks", 0700); + p = strjoina("/run/systemd/nspawn/locks/name-", name); + + return make_lock_file(p, operation, ret); +} + +bool image_name_is_valid(const char *s) { + if (!filename_is_valid(s)) + return false; + + if (string_has_cc(s, NULL)) + return false; + + if (!utf8_is_valid(s)) + return false; + + /* Temporary files for atomically creating new files */ + if (startswith(s, ".#")) + return false; + + return true; +} + +bool image_in_search_path(ImageClass class, const char *image) { + const char *path; + + assert(image); + + NULSTR_FOREACH(path, image_search_path[class]) { + const char *p; + size_t k; + + p = path_startswith(image, path); + if (!p) + continue; + + /* Make sure there's a filename following */ + k = strcspn(p, "/"); + if (k == 0) + continue; + + p += k; + + /* Accept trailing slashes */ + if (p[strspn(p, "/")] == 0) + return true; + + } + + return false; +} + +static const char* const image_type_table[_IMAGE_TYPE_MAX] = { + [IMAGE_DIRECTORY] = "directory", + [IMAGE_SUBVOLUME] = "subvolume", + [IMAGE_RAW] = "raw", + [IMAGE_BLOCK] = "block", +}; + +DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType); diff --git a/src/shared/machine-image.h b/src/shared/machine-image.h new file mode 100644 index 0000000..9fd4589 --- /dev/null +++ b/src/shared/machine-image.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <stdint.h> + +#include "sd-id128.h" + +#include "hashmap.h" +#include "lockfile-util.h" +#include "macro.h" +#include "path-util.h" +#include "string-util.h" +#include "time-util.h" + +typedef enum ImageClass { + IMAGE_MACHINE, + IMAGE_PORTABLE, + _IMAGE_CLASS_MAX, + _IMAGE_CLASS_INVALID = -1 +} ImageClass; + +typedef enum ImageType { + IMAGE_DIRECTORY, + IMAGE_SUBVOLUME, + IMAGE_RAW, + IMAGE_BLOCK, + _IMAGE_TYPE_MAX, + _IMAGE_TYPE_INVALID = -1 +} ImageType; + +typedef struct Image { + unsigned n_ref; + + ImageType type; + char *name; + char *path; + bool read_only; + + usec_t crtime; + usec_t mtime; + + uint64_t usage; + uint64_t usage_exclusive; + uint64_t limit; + uint64_t limit_exclusive; + + char *hostname; + sd_id128_t machine_id; + char **machine_info; + char **os_release; + + bool metadata_valid:1; + bool discoverable:1; /* true if we know for sure that image_find() would find the image given just the short name */ + + void *userdata; +} Image; + +Image *image_unref(Image *i); +Image *image_ref(Image *i); + +DEFINE_TRIVIAL_CLEANUP_FUNC(Image*, image_unref); + +int image_find(ImageClass class, const char *name, Image **ret); +int image_from_path(const char *path, Image **ret); +int image_find_harder(ImageClass class, const char *name_or_path, Image **ret); +int image_discover(ImageClass class, Hashmap *map); + +int image_remove(Image *i); +int image_rename(Image *i, const char *new_name); +int image_clone(Image *i, const char *new_name, bool read_only); +int image_read_only(Image *i, bool b); + +const char* image_type_to_string(ImageType t) _const_; +ImageType image_type_from_string(const char *s) _pure_; + +bool image_name_is_valid(const char *s) _pure_; + +int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local); +int image_name_lock(const char *name, int operation, LockFile *ret); + +int image_set_limit(Image *i, uint64_t referenced_max); + +int image_read_metadata(Image *i); + +bool image_in_search_path(ImageClass class, const char *image); + +static inline bool IMAGE_IS_HIDDEN(const struct Image *i) { + assert(i); + + return i->name && i->name[0] == '.'; +} + +static inline bool IMAGE_IS_VENDOR(const struct Image *i) { + assert(i); + + return i->path && path_startswith(i->path, "/usr"); +} + +static inline bool IMAGE_IS_HOST(const struct Image *i) { + assert(i); + + if (i->name && streq(i->name, ".host")) + return true; + + if (i->path && path_equal(i->path, "/")) + return true; + + return false; +} + +extern const struct hash_ops image_hash_ops; diff --git a/src/shared/machine-pool.c b/src/shared/machine-pool.c new file mode 100644 index 0000000..de4f704 --- /dev/null +++ b/src/shared/machine-pool.c @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <sys/statfs.h> + +#include "btrfs-util.h" +#include "label.h" +#include "machine-pool.h" +#include "missing.h" +#include "stat-util.h" + +static int check_btrfs(void) { + struct statfs sfs; + + if (statfs("/var/lib/machines", &sfs) < 0) { + if (errno != ENOENT) + return -errno; + + if (statfs("/var/lib", &sfs) < 0) + return -errno; + } + + return F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC); +} + +int setup_machine_directory(sd_bus_error *error) { + int r; + + r = check_btrfs(); + if (r < 0) + return sd_bus_error_set_errnof(error, r, "Failed to determine whether /var/lib/machines is located on btrfs: %m"); + if (r == 0) + return 0; + + (void) btrfs_subvol_make_label("/var/lib/machines"); + + r = btrfs_quota_enable("/var/lib/machines", true); + if (r < 0) + log_warning_errno(r, "Failed to enable quota for /var/lib/machines, ignoring: %m"); + + r = btrfs_subvol_auto_qgroup("/var/lib/machines", 0, true); + if (r < 0) + log_warning_errno(r, "Failed to set up default quota hierarchy for /var/lib/machines, ignoring: %m"); + + return 1; +} diff --git a/src/shared/machine-pool.h b/src/shared/machine-pool.h new file mode 100644 index 0000000..6f59a18 --- /dev/null +++ b/src/shared/machine-pool.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdint.h> + +#include "sd-bus.h" + +int setup_machine_directory(sd_bus_error *error); diff --git a/src/shared/main-func.h b/src/shared/main-func.h new file mode 100644 index 0000000..3c182e8 --- /dev/null +++ b/src/shared/main-func.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdlib.h> + +#include "pager.h" +#include "selinux-util.h" +#include "spawn-ask-password-agent.h" +#include "spawn-polkit-agent.h" +#include "static-destruct.h" + +#define _DEFINE_MAIN_FUNCTION(intro, impl, ret) \ + int main(int argc, char *argv[]) { \ + int r; \ + intro; \ + r = impl; \ + static_destruct(); \ + ask_password_agent_close(); \ + polkit_agent_close(); \ + mac_selinux_finish(); \ + pager_close(); \ + return ret; \ + } + +/* Negative return values from impl are mapped to EXIT_FAILURE, and + * everything else means success! */ +#define DEFINE_MAIN_FUNCTION(impl) \ + _DEFINE_MAIN_FUNCTION(,impl(argc, argv), r < 0 ? EXIT_FAILURE : EXIT_SUCCESS) + +/* Zero is mapped to EXIT_SUCCESS, negative values are mapped to EXIT_FAILURE, + * and postive values are propagated. + * Note: "true" means failure! */ +#define DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(impl) \ + _DEFINE_MAIN_FUNCTION(,impl(argc, argv), r < 0 ? EXIT_FAILURE : r) diff --git a/src/shared/meson.build b/src/shared/meson.build new file mode 100644 index 0000000..99d6ba1 --- /dev/null +++ b/src/shared/meson.build @@ -0,0 +1,277 @@ +# SPDX-License-Identifier: LGPL-2.1+ + +shared_sources = files(''' + acl-util.h + acpi-fpdt.c + acpi-fpdt.h + apparmor-util.c + apparmor-util.h + ask-password-api.c + ask-password-api.h + barrier.c + barrier.h + base-filesystem.c + base-filesystem.h + bitmap.c + bitmap.h + blkid-util.h + boot-timestamps.c + boot-timestamps.h + bootspec.c + bootspec.h + bpf-program.c + bpf-program.h + bus-unit-util.c + bus-unit-util.h + bus-util.c + bus-util.h + calendarspec.c + calendarspec.h + cgroup-show.c + cgroup-show.h + clean-ipc.c + clean-ipc.h + clock-util.c + clock-util.h + condition.c + condition.h + conf-parser.c + conf-parser.h + cpu-set-util.c + cpu-set-util.h + crypt-util.c + crypt-util.h + daemon-util.h + dev-setup.c + dev-setup.h + dissect-image.c + dissect-image.h + dns-domain.c + dns-domain.h + dropin.c + dropin.h + efivars.c + efivars.h + enable-mempool.c + env-file-label.c + env-file-label.h + exec-util.c + exec-util.h + exit-status.c + exit-status.h + fdset.c + fdset.h + fileio-label.c + fileio-label.h + firewall-util.h + format-table.c + format-table.h + fstab-util.c + fstab-util.h + generator.c + generator.h + gpt.h + id128-print.c + id128-print.h + ima-util.c + ima-util.h + import-util.c + import-util.h + initreq.h + install-printf.c + install-printf.h + install.c + install.h + ip-protocol-list.c + ip-protocol-list.h + journal-importer.c + journal-importer.h + journal-util.c + journal-util.h + json-internal.h + json.c + json.h + lockfile-util.c + lockfile-util.h + logs-show.c + logs-show.h + loop-util.c + loop-util.h + machine-image.c + machine-image.h + machine-pool.c + machine-pool.h + main-func.h + module-util.h + mount-util.c + mount-util.h + nscd-flush.c + nscd-flush.h + nsflags.c + nsflags.h + os-util.c + os-util.h + output-mode.c + output-mode.h + pager.c + pager.h + path-lookup.c + path-lookup.h + pretty-print.c + pretty-print.h + ptyfwd.c + ptyfwd.h + reboot-util.c + reboot-util.h + resolve-util.c + resolve-util.h + seccomp-util.h + securebits-util.c + securebits-util.h + serialize.c + serialize.h + sleep-config.c + sleep-config.h + spawn-ask-password-agent.c + spawn-ask-password-agent.h + spawn-polkit-agent.c + spawn-polkit-agent.h + specifier.c + specifier.h + switch-root.c + switch-root.h + sysctl-util.c + sysctl-util.h + tmpfile-util-label.c + tmpfile-util-label.h + tomoyo-util.c + tomoyo-util.h + udev-util.c + udev-util.h + uid-range.c + uid-range.h + utmp-wtmp.h + verbs.c + verbs.h + vlan-util.c + vlan-util.h + volatile-util.c + volatile-util.h + watchdog.c + watchdog.h + web-util.c + web-util.h + wireguard-netlink.h + xml.c + xml.h +'''.split()) + +if get_option('tests') != 'false' + shared_sources += files('tests.c', 'tests.h') +endif + +test_tables_h = files('test-tables.h') +shared_sources += test_tables_h + +if conf.get('HAVE_ACL') == 1 + shared_sources += files('acl-util.c') +endif + +if conf.get('ENABLE_UTMP') == 1 + shared_sources += files('utmp-wtmp.c') +endif + +if conf.get('HAVE_SECCOMP') == 1 + shared_sources += files('seccomp-util.c') +endif + +if conf.get('HAVE_LIBIPTC') == 1 + shared_sources += files('firewall-util.c') +endif + +if conf.get('HAVE_KMOD') == 1 + shared_sources += files('module-util.c') +endif + +generate_ip_protocol_list = find_program('generate-ip-protocol-list.sh') +ip_protocol_list_txt = custom_target( + 'ip-protocol-list.txt', + output : 'ip-protocol-list.txt', + command : [generate_ip_protocol_list, cpp], + capture : true) + +fname = 'ip-protocol-from-name.gperf' +gperf_file = custom_target( + fname, + input : ip_protocol_list_txt, + output : fname, + command : [generate_gperfs, 'ip_protocol', 'IPPROTO_', '@INPUT@'], + capture : true) + +fname = 'ip-protocol-from-name.h' +target1 = custom_target( + fname, + input : gperf_file, + output : fname, + command : [gperf, + '-L', 'ANSI-C', '-t', '--ignore-case', + '-N', 'lookup_ip_protocol', + '-H', 'hash_ip_protocol_name', + '-p', '-C', + '@INPUT@'], + capture : true) + +fname = 'ip-protocol-to-name.h' +awkscript = 'ip-protocol-to-name.awk' +target2 = custom_target( + fname, + input : [awkscript, ip_protocol_list_txt], + output : fname, + command : [awk, '-f', '@INPUT0@', '@INPUT1@'], + capture : true) + +shared_generated_gperf_headers = [target1, target2] +shared_sources += shared_generated_gperf_headers + +libshared_name = 'systemd-shared-@0@'.format(meson.project_version()) + +libshared_deps = [threads, + librt, + libcap, + libacl, + libcryptsetup, + libgcrypt, + libiptc, + libkmod, + libmount, + libseccomp, + libselinux, + libidn, + libxz, + liblz4, + libblkid] + +libshared_sym_path = '@0@/libshared.sym'.format(meson.current_source_dir()) + +libshared_static = static_library( + libshared_name, + shared_sources, + include_directories : includes, + dependencies : libshared_deps, + c_args : ['-fvisibility=default']) + +libshared = shared_library( + libshared_name, + libudev_sources, + include_directories : includes, + link_args : ['-shared', + '-Wl,--version-script=' + libshared_sym_path], + link_whole : [libshared_static, + libbasic, + libbasic_gcrypt, + libsystemd_static, + libjournal_client], + c_args : ['-fvisibility=default'], + dependencies : libshared_deps, + install : true, + install_dir : rootlibexecdir) diff --git a/src/shared/module-util.c b/src/shared/module-util.c new file mode 100644 index 0000000..a34fe8f --- /dev/null +++ b/src/shared/module-util.c @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> + +#include "module-util.h" + +int module_load_and_warn(struct kmod_ctx *ctx, const char *module, bool verbose) { + const int probe_flags = KMOD_PROBE_APPLY_BLACKLIST; + struct kmod_list *itr; + _cleanup_(kmod_module_unref_listp) struct kmod_list *modlist = NULL; + int r = 0; + + /* verbose==true means we should log at non-debug level if we + * fail to find or load the module. */ + + log_debug("Loading module: %s", module); + + r = kmod_module_new_from_lookup(ctx, module, &modlist); + if (r < 0) + return log_full_errno(verbose ? LOG_ERR : LOG_DEBUG, r, + "Failed to lookup module alias '%s': %m", module); + + if (!modlist) { + log_full_errno(verbose ? LOG_ERR : LOG_DEBUG, r, + "Failed to find module '%s'", module); + return -ENOENT; + } + + kmod_list_foreach(itr, modlist) { + _cleanup_(kmod_module_unrefp) struct kmod_module *mod = NULL; + int state, err; + + mod = kmod_module_get_module(itr); + state = kmod_module_get_initstate(mod); + + switch (state) { + case KMOD_MODULE_BUILTIN: + log_full(verbose ? LOG_INFO : LOG_DEBUG, + "Module '%s' is builtin", kmod_module_get_name(mod)); + break; + + case KMOD_MODULE_LIVE: + log_debug("Module '%s' is already loaded", kmod_module_get_name(mod)); + break; + + default: + err = kmod_module_probe_insert_module(mod, probe_flags, + NULL, NULL, NULL, NULL); + if (err == 0) + log_full(verbose ? LOG_INFO : LOG_DEBUG, + "Inserted module '%s'", kmod_module_get_name(mod)); + else if (err == KMOD_PROBE_APPLY_BLACKLIST) + log_full(verbose ? LOG_INFO : LOG_DEBUG, + "Module '%s' is blacklisted", kmod_module_get_name(mod)); + else { + assert(err < 0); + + log_full_errno(!verbose ? LOG_DEBUG : + err == -ENODEV ? LOG_NOTICE : + err == -ENOENT ? LOG_WARNING : + LOG_ERR, + err, + "Failed to insert module '%s': %m", + kmod_module_get_name(mod)); + if (!IN_SET(err, -ENODEV, -ENOENT)) + r = err; + } + } + } + + return r; +} diff --git a/src/shared/module-util.h b/src/shared/module-util.h new file mode 100644 index 0000000..c386c5b --- /dev/null +++ b/src/shared/module-util.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <libkmod.h> + +#include "macro.h" + +DEFINE_TRIVIAL_CLEANUP_FUNC(struct kmod_ctx*, kmod_unref); +DEFINE_TRIVIAL_CLEANUP_FUNC(struct kmod_module*, kmod_module_unref); +DEFINE_TRIVIAL_CLEANUP_FUNC(struct kmod_list*, kmod_module_unref_list); + +int module_load_and_warn(struct kmod_ctx *ctx, const char *module, bool verbose); diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c new file mode 100644 index 0000000..9fa995f --- /dev/null +++ b/src/shared/mount-util.c @@ -0,0 +1,570 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stdio_ext.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/statvfs.h> +#include <unistd.h> + +/* Include later */ +#include <libmount.h> + +#include "alloc-util.h" +#include "escape.h" +#include "extract-word.h" +#include "fd-util.h" +#include "fileio.h" +#include "fs-util.h" +#include "hashmap.h" +#include "mount-util.h" +#include "mountpoint-util.h" +#include "parse-util.h" +#include "path-util.h" +#include "set.h" +#include "stdio-util.h" +#include "string-util.h" +#include "strv.h" + +int umount_recursive(const char *prefix, int flags) { + bool again; + int n = 0, r; + + /* Try to umount everything recursively below a + * directory. Also, take care of stacked mounts, and keep + * unmounting them until they are gone. */ + + do { + _cleanup_fclose_ FILE *proc_self_mountinfo = NULL; + + again = false; + r = 0; + + proc_self_mountinfo = fopen("/proc/self/mountinfo", "re"); + if (!proc_self_mountinfo) + return -errno; + + (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER); + + for (;;) { + _cleanup_free_ char *path = NULL, *p = NULL; + int k; + + k = fscanf(proc_self_mountinfo, + "%*s " /* (1) mount id */ + "%*s " /* (2) parent id */ + "%*s " /* (3) major:minor */ + "%*s " /* (4) root */ + "%ms " /* (5) mount point */ + "%*s" /* (6) mount options */ + "%*[^-]" /* (7) optional fields */ + "- " /* (8) separator */ + "%*s " /* (9) file system type */ + "%*s" /* (10) mount source */ + "%*s" /* (11) mount options 2 */ + "%*[^\n]", /* some rubbish at the end */ + &path); + if (k != 1) { + if (k == EOF) + break; + + continue; + } + + r = cunescape(path, UNESCAPE_RELAX, &p); + if (r < 0) + return r; + + if (!path_startswith(p, prefix)) + continue; + + if (umount2(p, flags) < 0) { + r = log_debug_errno(errno, "Failed to umount %s: %m", p); + continue; + } + + log_debug("Successfully unmounted %s", p); + + again = true; + n++; + + break; + } + + } while (again); + + return r ? r : n; +} + +static int get_mount_flags(const char *path, unsigned long *flags) { + struct statvfs buf; + + if (statvfs(path, &buf) < 0) + return -errno; + *flags = buf.f_flag; + return 0; +} + +/* Use this function only if do you have direct access to /proc/self/mountinfo + * and need the caller to open it for you. This is the case when /proc is + * masked or not mounted. Otherwise, use bind_remount_recursive. */ +int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **blacklist, FILE *proc_self_mountinfo) { + _cleanup_set_free_free_ Set *done = NULL; + _cleanup_free_ char *cleaned = NULL; + int r; + + assert(proc_self_mountinfo); + + /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already + * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write + * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to + * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each + * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We + * do not have any effect on future submounts that might get propagated, they migt be writable. This includes + * future submounts that have been triggered via autofs. + * + * If the "blacklist" parameter is specified it may contain a list of subtrees to exclude from the + * remount operation. Note that we'll ignore the blacklist for the top-level path. */ + + cleaned = strdup(prefix); + if (!cleaned) + return -ENOMEM; + + path_simplify(cleaned, false); + + done = set_new(&path_hash_ops); + if (!done) + return -ENOMEM; + + for (;;) { + _cleanup_set_free_free_ Set *todo = NULL; + bool top_autofs = false; + char *x; + unsigned long orig_flags; + + todo = set_new(&path_hash_ops); + if (!todo) + return -ENOMEM; + + rewind(proc_self_mountinfo); + + for (;;) { + _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL; + int k; + + k = fscanf(proc_self_mountinfo, + "%*s " /* (1) mount id */ + "%*s " /* (2) parent id */ + "%*s " /* (3) major:minor */ + "%*s " /* (4) root */ + "%ms " /* (5) mount point */ + "%*s" /* (6) mount options (superblock) */ + "%*[^-]" /* (7) optional fields */ + "- " /* (8) separator */ + "%ms " /* (9) file system type */ + "%*s" /* (10) mount source */ + "%*s" /* (11) mount options (bind mount) */ + "%*[^\n]", /* some rubbish at the end */ + &path, + &type); + if (k != 2) { + if (k == EOF) + break; + + continue; + } + + r = cunescape(path, UNESCAPE_RELAX, &p); + if (r < 0) + return r; + + if (!path_startswith(p, cleaned)) + continue; + + /* Ignore this mount if it is blacklisted, but only if it isn't the top-level mount we shall + * operate on. */ + if (!path_equal(cleaned, p)) { + bool blacklisted = false; + char **i; + + STRV_FOREACH(i, blacklist) { + + if (path_equal(*i, cleaned)) + continue; + + if (!path_startswith(*i, cleaned)) + continue; + + if (path_startswith(p, *i)) { + blacklisted = true; + log_debug("Not remounting %s blacklisted by %s, called for %s", p, *i, cleaned); + break; + } + } + if (blacklisted) + continue; + } + + /* Let's ignore autofs mounts. If they aren't + * triggered yet, we want to avoid triggering + * them, as we don't make any guarantees for + * future submounts anyway. If they are + * already triggered, then we will find + * another entry for this. */ + if (streq(type, "autofs")) { + top_autofs = top_autofs || path_equal(cleaned, p); + continue; + } + + if (!set_contains(done, p)) { + r = set_consume(todo, p); + p = NULL; + if (r == -EEXIST) + continue; + if (r < 0) + return r; + } + } + + /* If we have no submounts to process anymore and if + * the root is either already done, or an autofs, we + * are done */ + if (set_isempty(todo) && + (top_autofs || set_contains(done, cleaned))) + return 0; + + if (!set_contains(done, cleaned) && + !set_contains(todo, cleaned)) { + /* The prefix directory itself is not yet a mount, make it one. */ + if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0) + return -errno; + + orig_flags = 0; + (void) get_mount_flags(cleaned, &orig_flags); + orig_flags &= ~MS_RDONLY; + + if (mount(NULL, cleaned, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) + return -errno; + + log_debug("Made top-level directory %s a mount point.", prefix); + + x = strdup(cleaned); + if (!x) + return -ENOMEM; + + r = set_consume(done, x); + if (r < 0) + return r; + } + + while ((x = set_steal_first(todo))) { + + r = set_consume(done, x); + if (IN_SET(r, 0, -EEXIST)) + continue; + if (r < 0) + return r; + + /* Deal with mount points that are obstructed by a later mount */ + r = path_is_mount_point(x, NULL, 0); + if (IN_SET(r, 0, -ENOENT)) + continue; + if (IN_SET(r, -EACCES, -EPERM)) { + /* Even if root user invoke this, submounts under private FUSE or NFS mount points + * may not be acceessed. E.g., + * + * $ bindfs --no-allow-other ~/mnt/mnt ~/mnt/mnt + * $ bindfs --no-allow-other ~/mnt ~/mnt + * + * Then, root user cannot access the mount point ~/mnt/mnt. + * In such cases, the submounts are ignored, as we have no way to manage them. */ + log_debug_errno(r, "Failed to determine '%s' is mount point or not, ignoring: %m", x); + continue; + } + if (r < 0) + return r; + + /* Try to reuse the original flag set */ + orig_flags = 0; + (void) get_mount_flags(x, &orig_flags); + orig_flags &= ~MS_RDONLY; + + if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) + return -errno; + + log_debug("Remounted %s read-only.", x); + } + } +} + +int bind_remount_recursive(const char *prefix, bool ro, char **blacklist) { + _cleanup_fclose_ FILE *proc_self_mountinfo = NULL; + + proc_self_mountinfo = fopen("/proc/self/mountinfo", "re"); + if (!proc_self_mountinfo) + return -errno; + + (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER); + + return bind_remount_recursive_with_mountinfo(prefix, ro, blacklist, proc_self_mountinfo); +} + +int mount_move_root(const char *path) { + assert(path); + + if (chdir(path) < 0) + return -errno; + + if (mount(path, "/", NULL, MS_MOVE, NULL) < 0) + return -errno; + + if (chroot(".") < 0) + return -errno; + + if (chdir("/") < 0) + return -errno; + + return 0; +} + +int repeat_unmount(const char *path, int flags) { + bool done = false; + + assert(path); + + /* If there are multiple mounts on a mount point, this + * removes them all */ + + for (;;) { + if (umount2(path, flags) < 0) { + + if (errno == EINVAL) + return done; + + return -errno; + } + + done = true; + } +} + +const char* mode_to_inaccessible_node(mode_t mode) { + /* This function maps a node type to a corresponding inaccessible file node. These nodes are created during + * early boot by PID 1. In some cases we lacked the privs to create the character and block devices (maybe + * because we run in an userns environment, or miss CAP_SYS_MKNOD, or run with a devices policy that excludes + * device nodes with major and minor of 0), but that's fine, in that case we use an AF_UNIX file node instead, + * which is not the same, but close enough for most uses. And most importantly, the kernel allows bind mounts + * from socket nodes to any non-directory file nodes, and that's the most important thing that matters. */ + + switch(mode & S_IFMT) { + case S_IFREG: + return "/run/systemd/inaccessible/reg"; + + case S_IFDIR: + return "/run/systemd/inaccessible/dir"; + + case S_IFCHR: + if (access("/run/systemd/inaccessible/chr", F_OK) == 0) + return "/run/systemd/inaccessible/chr"; + return "/run/systemd/inaccessible/sock"; + + case S_IFBLK: + if (access("/run/systemd/inaccessible/blk", F_OK) == 0) + return "/run/systemd/inaccessible/blk"; + return "/run/systemd/inaccessible/sock"; + + case S_IFIFO: + return "/run/systemd/inaccessible/fifo"; + + case S_IFSOCK: + return "/run/systemd/inaccessible/sock"; + } + return NULL; +} + +#define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "") +static char* mount_flags_to_string(long unsigned flags) { + char *x; + _cleanup_free_ char *y = NULL; + long unsigned overflow; + + overflow = flags & ~(MS_RDONLY | + MS_NOSUID | + MS_NODEV | + MS_NOEXEC | + MS_SYNCHRONOUS | + MS_REMOUNT | + MS_MANDLOCK | + MS_DIRSYNC | + MS_NOATIME | + MS_NODIRATIME | + MS_BIND | + MS_MOVE | + MS_REC | + MS_SILENT | + MS_POSIXACL | + MS_UNBINDABLE | + MS_PRIVATE | + MS_SLAVE | + MS_SHARED | + MS_RELATIME | + MS_KERNMOUNT | + MS_I_VERSION | + MS_STRICTATIME | + MS_LAZYTIME); + + if (flags == 0 || overflow != 0) + if (asprintf(&y, "%lx", overflow) < 0) + return NULL; + + x = strjoin(FLAG(MS_RDONLY), + FLAG(MS_NOSUID), + FLAG(MS_NODEV), + FLAG(MS_NOEXEC), + FLAG(MS_SYNCHRONOUS), + FLAG(MS_REMOUNT), + FLAG(MS_MANDLOCK), + FLAG(MS_DIRSYNC), + FLAG(MS_NOATIME), + FLAG(MS_NODIRATIME), + FLAG(MS_BIND), + FLAG(MS_MOVE), + FLAG(MS_REC), + FLAG(MS_SILENT), + FLAG(MS_POSIXACL), + FLAG(MS_UNBINDABLE), + FLAG(MS_PRIVATE), + FLAG(MS_SLAVE), + FLAG(MS_SHARED), + FLAG(MS_RELATIME), + FLAG(MS_KERNMOUNT), + FLAG(MS_I_VERSION), + FLAG(MS_STRICTATIME), + FLAG(MS_LAZYTIME), + y); + if (!x) + return NULL; + if (!y) + x[strlen(x) - 1] = '\0'; /* truncate the last | */ + return x; +} + +int mount_verbose( + int error_log_level, + const char *what, + const char *where, + const char *type, + unsigned long flags, + const char *options) { + + _cleanup_free_ char *fl = NULL, *o = NULL; + unsigned long f; + int r; + + r = mount_option_mangle(options, flags, &f, &o); + if (r < 0) + return log_full_errno(error_log_level, r, + "Failed to mangle mount options %s: %m", + strempty(options)); + + fl = mount_flags_to_string(f); + + if ((f & MS_REMOUNT) && !what && !type) + log_debug("Remounting %s (%s \"%s\")...", + where, strnull(fl), strempty(o)); + else if (!what && !type) + log_debug("Mounting %s (%s \"%s\")...", + where, strnull(fl), strempty(o)); + else if ((f & MS_BIND) && !type) + log_debug("Bind-mounting %s on %s (%s \"%s\")...", + what, where, strnull(fl), strempty(o)); + else if (f & MS_MOVE) + log_debug("Moving mount %s → %s (%s \"%s\")...", + what, where, strnull(fl), strempty(o)); + else + log_debug("Mounting %s on %s (%s \"%s\")...", + strna(type), where, strnull(fl), strempty(o)); + if (mount(what, where, type, f, o) < 0) + return log_full_errno(error_log_level, errno, + "Failed to mount %s (type %s) on %s (%s \"%s\"): %m", + strna(what), strna(type), where, strnull(fl), strempty(o)); + return 0; +} + +int umount_verbose(const char *what) { + log_debug("Umounting %s...", what); + if (umount(what) < 0) + return log_error_errno(errno, "Failed to unmount %s: %m", what); + return 0; +} + +int mount_option_mangle( + const char *options, + unsigned long mount_flags, + unsigned long *ret_mount_flags, + char **ret_remaining_options) { + + const struct libmnt_optmap *map; + _cleanup_free_ char *ret = NULL; + const char *p; + int r; + + /* This extracts mount flags from the mount options, and store + * non-mount-flag options to '*ret_remaining_options'. + * E.g., + * "rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000" + * is split to MS_NOSUID|MS_NODEV|MS_RELATIME and + * "size=1630748k,mode=700,uid=1000,gid=1000". + * See more examples in test-mount-utils.c. + * + * Note that if 'options' does not contain any non-mount-flag options, + * then '*ret_remaining_options' is set to NULL instread of empty string. + * Note that this does not check validity of options stored in + * '*ret_remaining_options'. + * Note that if 'options' is NULL, then this just copies 'mount_flags' + * to '*ret_mount_flags'. */ + + assert(ret_mount_flags); + assert(ret_remaining_options); + + map = mnt_get_builtin_optmap(MNT_LINUX_MAP); + if (!map) + return -EINVAL; + + p = options; + for (;;) { + _cleanup_free_ char *word = NULL; + const struct libmnt_optmap *ent; + + r = extract_first_word(&p, &word, ",", EXTRACT_QUOTES); + if (r < 0) + return r; + if (r == 0) + break; + + for (ent = map; ent->name; ent++) { + /* All entries in MNT_LINUX_MAP do not take any argument. + * Thus, ent->name does not contain "=" or "[=]". */ + if (!streq(word, ent->name)) + continue; + + if (!(ent->mask & MNT_INVERT)) + mount_flags |= ent->id; + else if (mount_flags & ent->id) + mount_flags ^= ent->id; + + break; + } + + /* If 'word' is not a mount flag, then store it in '*ret_remaining_options'. */ + if (!ent->name && !strextend_with_separator(&ret, ",", word, NULL)) + return -ENOMEM; + } + + *ret_mount_flags = mount_flags; + *ret_remaining_options = TAKE_PTR(ret); + + return 0; +} diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h new file mode 100644 index 0000000..00df1b0 --- /dev/null +++ b/src/shared/mount-util.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <mntent.h> +#include <stdio.h> + +#include "macro.h" + +int repeat_unmount(const char *path, int flags); +int umount_recursive(const char *target, int flags); +int bind_remount_recursive(const char *prefix, bool ro, char **blacklist); +int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **blacklist, FILE *proc_self_mountinfo); + +int mount_move_root(const char *path); + +DEFINE_TRIVIAL_CLEANUP_FUNC(FILE*, endmntent); +#define _cleanup_endmntent_ _cleanup_(endmntentp) + +int mount_verbose( + int error_log_level, + const char *what, + const char *where, + const char *type, + unsigned long flags, + const char *options); +int umount_verbose(const char *where); + +int mount_option_mangle( + const char *options, + unsigned long mount_flags, + unsigned long *ret_mount_flags, + char **ret_remaining_options); + +const char* mode_to_inaccessible_node(mode_t mode); diff --git a/src/shared/nscd-flush.c b/src/shared/nscd-flush.c new file mode 100644 index 0000000..5a04468 --- /dev/null +++ b/src/shared/nscd-flush.c @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#include <sys/poll.h> + +#include "fd-util.h" +#include "io-util.h" +#include "nscd-flush.h" +#include "socket-util.h" +#include "strv.h" +#include "time-util.h" + +#define NSCD_FLUSH_CACHE_TIMEOUT_USEC (5*USEC_PER_SEC) + +struct nscdInvalidateRequest { + int32_t version; + int32_t type; /* in glibc this is an enum. We don't replicate this here 1:1. Also, wtf, how unportable is that + * even? */ + int32_t key_len; + char dbname[]; +}; + +static const union sockaddr_union nscd_sa = { + .un.sun_family = AF_UNIX, + .un.sun_path = "/run/nscd/socket", +}; + +static int nscd_flush_cache_one(const char *database, usec_t end) { + size_t req_size, has_written = 0, has_read = 0, l; + struct nscdInvalidateRequest *req; + _cleanup_close_ int fd = -1; + int32_t resp; + int events; + + assert(database); + + l = strlen(database); + req_size = offsetof(struct nscdInvalidateRequest, dbname) + l + 1; + + req = alloca(req_size); + *req = (struct nscdInvalidateRequest) { + .version = 2, + .type = 10, + .key_len = l + 1, + }; + + strcpy(req->dbname, database); + + fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0); + if (fd < 0) + return log_debug_errno(errno, "Failed to allocate nscd socket: %m"); + + /* Note: connect() returns EINPROGRESS if O_NONBLOCK is set and establishing a connection takes time. The + * kernel lets us know this way that the connection is now being established, and we should watch with poll() + * to learn when it is fully established. That said, AF_UNIX on Linux never triggers this IRL (connect() is + * always instant on AF_UNIX), hence handling this is mostly just an excercise in defensive, protocol-agnostic + * programming. + * + * connect() returns EAGAIN if the socket's backlog limit has been reached. When we see this we give up right + * away, after all this entire function here is written in a defensive style so that a non-responding nscd + * doesn't stall us for good. (Even if we wanted to handle this better: the Linux kernel doesn't really have a + * nice way to connect() to a server synchronously with a time limit that would also cover dealing with the + * backlog limit. After all SO_RCVTIMEO and SR_SNDTIMEO don't apply to connect(), and alarm() is frickin' ugly + * and not really reasonably usable from threads-aware code.) */ + if (connect(fd, &nscd_sa.sa, SOCKADDR_UN_LEN(nscd_sa.un)) < 0) { + if (errno == EAGAIN) + return log_debug_errno(errno, "nscd is overloaded (backlog limit reached) and refuses to take further connections: %m"); + if (errno != EINPROGRESS) + return log_debug_errno(errno, "Failed to connect to nscd socket: %m"); + + /* Continue in case of EINPROGRESS, but don't bother with send() or recv() until being notified that + * establishing the connection is complete. */ + events = 0; + } else + events = POLLIN|POLLOUT; /* Let's assume initially that we can write and read to the fd, to suppress + * one poll() invocation */ + for (;;) { + usec_t p; + + if (events & POLLOUT) { + ssize_t m; + + assert(has_written < req_size); + + m = send(fd, (uint8_t*) req + has_written, req_size - has_written, MSG_NOSIGNAL); + if (m < 0) { + if (errno != EAGAIN) /* Note that EAGAIN is returned by the kernel whenever it can't + * take the data right now, and that includes if the connect() is + * asynchronous and we saw EINPROGRESS on it, and it hasn't + * completed yet. */ + return log_debug_errno(errno, "Failed to write to nscd socket: %m"); + } else + has_written += m; + } + + if (events & (POLLIN|POLLERR|POLLHUP)) { + ssize_t m; + + if (has_read >= sizeof(resp)) + return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Response from nscd longer than expected: %m"); + + m = recv(fd, (uint8_t*) &resp + has_read, sizeof(resp) - has_read, 0); + if (m < 0) { + if (errno != EAGAIN) + return log_debug_errno(errno, "Failed to read from nscd socket: %m"); + } else if (m == 0) { /* EOF */ + if (has_read == 0 && has_written >= req_size) /* Older nscd immediately terminated the + * connection, accept that as OK */ + return 1; + + return log_debug_errno(SYNTHETIC_ERRNO(EIO), "nscd prematurely ended connection."); + } else + has_read += m; + } + + if (has_written >= req_size && has_read >= sizeof(resp)) { /* done? */ + if (resp < 0) + return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), "nscd sent us a negative error numer: %i", resp); + if (resp > 0) + return log_debug_errno(resp, "nscd return failure code on invalidating '%s'.", database); + return 1; + } + + p = now(CLOCK_MONOTONIC); + if (p >= end) + return -ETIMEDOUT; + + events = fd_wait_for_event(fd, POLLIN | (has_written < req_size ? POLLOUT : 0), end - p); + if (events < 0) + return events; + } +} + +int nscd_flush_cache(char **databases) { + usec_t end; + int r = 0; + char **i; + + /* Tries to invalidate the specified database in nscd. We do this carefully, with a 5s time-out, so that we + * don't block indefinitely on another service. */ + + end = usec_add(now(CLOCK_MONOTONIC), NSCD_FLUSH_CACHE_TIMEOUT_USEC); + + STRV_FOREACH(i, databases) { + int k; + + k = nscd_flush_cache_one(*i, end); + if (k < 0 && r >= 0) + r = k; + } + + return r; +} diff --git a/src/shared/nscd-flush.h b/src/shared/nscd-flush.h new file mode 100644 index 0000000..22774bf --- /dev/null +++ b/src/shared/nscd-flush.h @@ -0,0 +1,4 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +int nscd_flush_cache(char **databases); diff --git a/src/shared/nsflags.c b/src/shared/nsflags.c new file mode 100644 index 0000000..8cc2d08 --- /dev/null +++ b/src/shared/nsflags.c @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> + +#include "alloc-util.h" +#include "extract-word.h" +#include "nsflags.h" +#include "string-util.h" + +const struct namespace_flag_map namespace_flag_map[] = { + { CLONE_NEWCGROUP, "cgroup" }, + { CLONE_NEWIPC, "ipc" }, + { CLONE_NEWNET, "net" }, + /* So, the mount namespace flag is called CLONE_NEWNS for historical reasons. Let's expose it here under a more + * explanatory name: "mnt". This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */ + { CLONE_NEWNS, "mnt" }, + { CLONE_NEWPID, "pid" }, + { CLONE_NEWUSER, "user" }, + { CLONE_NEWUTS, "uts" }, + {} +}; + +int namespace_flags_from_string(const char *name, unsigned long *ret) { + unsigned long flags = 0; + int r; + + assert_se(ret); + + for (;;) { + _cleanup_free_ char *word = NULL; + unsigned long f = 0; + unsigned i; + + r = extract_first_word(&name, &word, NULL, 0); + if (r < 0) + return r; + if (r == 0) + break; + + for (i = 0; namespace_flag_map[i].name; i++) + if (streq(word, namespace_flag_map[i].name)) { + f = namespace_flag_map[i].flag; + break; + } + + if (f == 0) + return -EINVAL; + + flags |= f; + } + + *ret = flags; + return 0; +} + +int namespace_flags_to_string(unsigned long flags, char **ret) { + _cleanup_free_ char *s = NULL; + unsigned i; + + for (i = 0; namespace_flag_map[i].name; i++) { + if ((flags & namespace_flag_map[i].flag) != namespace_flag_map[i].flag) + continue; + + if (!strextend_with_separator(&s, " ", namespace_flag_map[i].name, NULL)) + return -ENOMEM; + } + + if (!s) { + s = strdup(""); + if (!s) + return -ENOMEM; + } + + *ret = TAKE_PTR(s); + + return 0; +} diff --git a/src/shared/nsflags.h b/src/shared/nsflags.h new file mode 100644 index 0000000..0aeb0bc --- /dev/null +++ b/src/shared/nsflags.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include "missing_sched.h" + +/* The combination of all namespace flags defined by the kernel. The right type for this isn't clear. setns() and + * unshare() expect these flags to be passed as (signed) "int", while clone() wants them as "unsigned long". The latter + * is definitely more appropriate for a flags parameter, and also the larger type of the two, hence let's stick to that + * here. */ +#define NAMESPACE_FLAGS_ALL \ + ((unsigned long) (CLONE_NEWCGROUP| \ + CLONE_NEWIPC| \ + CLONE_NEWNET| \ + CLONE_NEWNS| \ + CLONE_NEWPID| \ + CLONE_NEWUSER| \ + CLONE_NEWUTS)) + +#define NAMESPACE_FLAGS_INITIAL ((unsigned long) -1) + +int namespace_flags_from_string(const char *name, unsigned long *ret); +int namespace_flags_to_string(unsigned long flags, char **ret); + +struct namespace_flag_map { + unsigned long flag; + const char *name; +}; + +extern const struct namespace_flag_map namespace_flag_map[]; diff --git a/src/shared/os-util.c b/src/shared/os-util.c new file mode 100644 index 0000000..b2d5ce3 --- /dev/null +++ b/src/shared/os-util.c @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "alloc-util.h" +#include "env-file.h" +#include "fd-util.h" +#include "fs-util.h" +#include "macro.h" +#include "os-util.h" +#include "string-util.h" +#include "strv.h" + +int path_is_os_tree(const char *path) { + int r; + + assert(path); + + /* Does the path exist at all? If not, generate an error immediately. This is useful so that a missing root dir + * always results in -ENOENT, and we can properly distuingish the case where the whole root doesn't exist from + * the case where just the os-release file is missing. */ + if (laccess(path, F_OK) < 0) + return -errno; + + /* We use {/etc|/usr/lib}/os-release as flag file if something is an OS */ + r = open_os_release(path, NULL, NULL); + if (r == -ENOENT) /* We got nothing */ + return 0; + if (r < 0) + return r; + + return 1; +} + +int open_os_release(const char *root, char **ret_path, int *ret_fd) { + _cleanup_free_ char *q = NULL; + const char *p; + int k; + + FOREACH_STRING(p, "/etc/os-release", "/usr/lib/os-release") { + k = chase_symlinks(p, root, CHASE_PREFIX_ROOT|(ret_fd ? CHASE_OPEN : 0), (ret_path ? &q : NULL)); + if (k != -ENOENT) + break; + } + if (k < 0) + return k; + + if (ret_fd) { + int real_fd; + + /* Convert the O_PATH fd into a proper, readable one */ + real_fd = fd_reopen(k, O_RDONLY|O_CLOEXEC|O_NOCTTY); + safe_close(k); + if (real_fd < 0) + return real_fd; + + *ret_fd = real_fd; + } + + if (ret_path) + *ret_path = TAKE_PTR(q); + + return 0; +} + +int fopen_os_release(const char *root, char **ret_path, FILE **ret_file) { + _cleanup_free_ char *p = NULL; + _cleanup_close_ int fd = -1; + FILE *f; + int r; + + if (!ret_file) + return open_os_release(root, ret_path, NULL); + + r = open_os_release(root, ret_path ? &p : NULL, &fd); + if (r < 0) + return r; + + f = fdopen(fd, "r"); + if (!f) + return -errno; + fd = -1; + + *ret_file = f; + + if (ret_path) + *ret_path = TAKE_PTR(p); + + return 0; +} + +int parse_os_release(const char *root, ...) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *p = NULL; + va_list ap; + int r; + + r = fopen_os_release(root, &p, &f); + if (r < 0) + return r; + + va_start(ap, root); + r = parse_env_filev(f, p, ap); + va_end(ap); + + return r; +} + +int load_os_release_pairs(const char *root, char ***ret) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *p = NULL; + int r; + + r = fopen_os_release(root, &p, &f); + if (r < 0) + return r; + + return load_env_file_pairs(f, p, ret); +} diff --git a/src/shared/os-util.h b/src/shared/os-util.h new file mode 100644 index 0000000..27ec7ac --- /dev/null +++ b/src/shared/os-util.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdio.h> + +int path_is_os_tree(const char *path); + +int open_os_release(const char *root, char **ret_path, int *ret_fd); +int fopen_os_release(const char *root, char **ret_path, FILE **ret_file); + +int parse_os_release(const char *root, ...) _sentinel_; +int load_os_release_pairs(const char *root, char ***ret); diff --git a/src/shared/output-mode.c b/src/shared/output-mode.c new file mode 100644 index 0000000..107b345 --- /dev/null +++ b/src/shared/output-mode.c @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "output-mode.h" +#include "string-table.h" + +JsonFormatFlags output_mode_to_json_format_flags(OutputMode m) { + + switch (m) { + + case OUTPUT_JSON_SSE: + return JSON_FORMAT_SSE; + + case OUTPUT_JSON_SEQ: + return JSON_FORMAT_SEQ; + + case OUTPUT_JSON_PRETTY: + return JSON_FORMAT_PRETTY; + + default: + return JSON_FORMAT_NEWLINE; + } +} + +static const char *const output_mode_table[_OUTPUT_MODE_MAX] = { + [OUTPUT_SHORT] = "short", + [OUTPUT_SHORT_FULL] = "short-full", + [OUTPUT_SHORT_ISO] = "short-iso", + [OUTPUT_SHORT_ISO_PRECISE] = "short-iso-precise", + [OUTPUT_SHORT_PRECISE] = "short-precise", + [OUTPUT_SHORT_MONOTONIC] = "short-monotonic", + [OUTPUT_SHORT_UNIX] = "short-unix", + [OUTPUT_VERBOSE] = "verbose", + [OUTPUT_EXPORT] = "export", + [OUTPUT_JSON] = "json", + [OUTPUT_JSON_PRETTY] = "json-pretty", + [OUTPUT_JSON_SSE] = "json-sse", + [OUTPUT_JSON_SEQ] = "json-seq", + [OUTPUT_CAT] = "cat", + [OUTPUT_WITH_UNIT] = "with-unit", +}; + +DEFINE_STRING_TABLE_LOOKUP(output_mode, OutputMode); diff --git a/src/shared/output-mode.h b/src/shared/output-mode.h new file mode 100644 index 0000000..00b6032 --- /dev/null +++ b/src/shared/output-mode.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include "json.h" +#include "macro.h" + +typedef enum OutputMode { + OUTPUT_SHORT, + OUTPUT_SHORT_FULL, + OUTPUT_SHORT_ISO, + OUTPUT_SHORT_ISO_PRECISE, + OUTPUT_SHORT_PRECISE, + OUTPUT_SHORT_MONOTONIC, + OUTPUT_SHORT_UNIX, + OUTPUT_VERBOSE, + OUTPUT_EXPORT, + OUTPUT_JSON, + OUTPUT_JSON_PRETTY, + OUTPUT_JSON_SSE, + OUTPUT_JSON_SEQ, + OUTPUT_CAT, + OUTPUT_WITH_UNIT, + _OUTPUT_MODE_MAX, + _OUTPUT_MODE_INVALID = -1 +} OutputMode; + +static inline bool OUTPUT_MODE_IS_JSON(OutputMode m) { + return IN_SET(m, OUTPUT_JSON, OUTPUT_JSON_PRETTY, OUTPUT_JSON_SSE, OUTPUT_JSON_SEQ); +} + +/* The output flags definitions are shared by the logs and process tree output. Some apply to both, some only to the + * logs output, others only to the process tree output. */ + +typedef enum OutputFlags { + OUTPUT_SHOW_ALL = 1 << 0, + OUTPUT_FOLLOW = 1 << 1, + OUTPUT_WARN_CUTOFF = 1 << 2, + OUTPUT_FULL_WIDTH = 1 << 3, + OUTPUT_COLOR = 1 << 4, + OUTPUT_CATALOG = 1 << 5, + OUTPUT_BEGIN_NEWLINE = 1 << 6, + OUTPUT_UTC = 1 << 7, + OUTPUT_KERNEL_THREADS = 1 << 8, + OUTPUT_NO_HOSTNAME = 1 << 9, +} OutputFlags; + +JsonFormatFlags output_mode_to_json_format_flags(OutputMode m); + +const char* output_mode_to_string(OutputMode m) _const_; +OutputMode output_mode_from_string(const char *s) _pure_; diff --git a/src/shared/pager.c b/src/shared/pager.c new file mode 100644 index 0000000..bf2597e --- /dev/null +++ b/src/shared/pager.c @@ -0,0 +1,292 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <signal.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/prctl.h> +#include <unistd.h> + +#include "copy.h" +#include "fd-util.h" +#include "fileio.h" +#include "io-util.h" +#include "locale-util.h" +#include "log.h" +#include "macro.h" +#include "pager.h" +#include "process-util.h" +#include "rlimit-util.h" +#include "signal-util.h" +#include "string-util.h" +#include "strv.h" +#include "terminal-util.h" + +static pid_t pager_pid = 0; + +static int stored_stdout = -1; +static int stored_stderr = -1; +static bool stdout_redirected = false; +static bool stderr_redirected = false; + +_noreturn_ static void pager_fallback(void) { + int r; + + r = copy_bytes(STDIN_FILENO, STDOUT_FILENO, (uint64_t) -1, 0); + if (r < 0) { + log_error_errno(r, "Internal pager failed: %m"); + _exit(EXIT_FAILURE); + } + + _exit(EXIT_SUCCESS); +} + +static int no_quit_on_interrupt(int exe_name_fd, const char *less_opts) { + _cleanup_fclose_ FILE *file = NULL; + _cleanup_free_ char *line = NULL; + int r; + + assert(exe_name_fd >= 0); + assert(less_opts); + + /* This takes ownership of exe_name_fd */ + file = fdopen(exe_name_fd, "r"); + if (!file) { + safe_close(exe_name_fd); + return log_error_errno(errno, "Failed to create FILE object: %m"); + } + + /* Find the last line */ + for (;;) { + _cleanup_free_ char *t = NULL; + + r = read_line(file, LONG_LINE_MAX, &t); + if (r < 0) + return log_error_errno(r, "Failed to read from socket: %m"); + if (r == 0) + break; + + free_and_replace(line, t); + } + + /* We only treat "less" specially. + * Return true whenever option K is *not* set. */ + r = streq_ptr(line, "less") && !strchr(less_opts, 'K'); + + log_debug("Pager executable is \"%s\", options \"%s\", quit_on_interrupt: %s", + strnull(line), less_opts, yes_no(!r)); + return r; +} + +int pager_open(PagerFlags flags) { + _cleanup_close_pair_ int fd[2] = { -1, -1 }, exe_name_pipe[2] = { -1, -1 }; + _cleanup_strv_free_ char **pager_args = NULL; + const char *pager, *less_opts; + int r; + + if (flags & PAGER_DISABLE) + return 0; + + if (pager_pid > 0) + return 1; + + if (terminal_is_dumb()) + return 0; + + if (!is_main_thread()) + return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Pager invoked from wrong thread."); + + pager = getenv("SYSTEMD_PAGER"); + if (!pager) + pager = getenv("PAGER"); + + if (pager) { + pager_args = strv_split(pager, WHITESPACE); + if (!pager_args) + return log_oom(); + + /* If the pager is explicitly turned off, honour it */ + if (strv_isempty(pager_args) || strv_equal(pager_args, STRV_MAKE("cat"))) + return 0; + } + + /* Determine and cache number of columns/lines before we spawn the pager so that we get the value from the + * actual tty */ + (void) columns(); + (void) lines(); + + if (pipe2(fd, O_CLOEXEC) < 0) + return log_error_errno(errno, "Failed to create pager pipe: %m"); + + /* This is a pipe to feed the name of the executed pager binary into the parent */ + if (pipe2(exe_name_pipe, O_CLOEXEC) < 0) + return log_error_errno(errno, "Failed to create exe_name pipe: %m"); + + /* Initialize a good set of less options */ + less_opts = getenv("SYSTEMD_LESS"); + if (!less_opts) + less_opts = "FRSXMK"; + if (flags & PAGER_JUMP_TO_END) + less_opts = strjoina(less_opts, " +G"); + + r = safe_fork("(pager)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pager_pid); + if (r < 0) + return r; + if (r == 0) { + const char *less_charset, *exe; + + /* In the child start the pager */ + + if (dup2(fd[0], STDIN_FILENO) < 0) { + log_error_errno(errno, "Failed to duplicate file descriptor to STDIN: %m"); + _exit(EXIT_FAILURE); + } + + safe_close_pair(fd); + + if (setenv("LESS", less_opts, 1) < 0) { + log_error_errno(errno, "Failed to set environment variable LESS: %m"); + _exit(EXIT_FAILURE); + } + + /* Initialize a good charset for less. This is + * particularly important if we output UTF-8 + * characters. */ + less_charset = getenv("SYSTEMD_LESSCHARSET"); + if (!less_charset && is_locale_utf8()) + less_charset = "utf-8"; + if (less_charset && + setenv("LESSCHARSET", less_charset, 1) < 0) { + log_error_errno(errno, "Failed to set environment variable LESSCHARSET: %m"); + _exit(EXIT_FAILURE); + } + + if (pager_args) { + r = loop_write(exe_name_pipe[1], pager_args[0], strlen(pager_args[0]) + 1, false); + if (r < 0) { + log_error_errno(r, "Failed to write pager name to socket: %m"); + _exit(EXIT_FAILURE); + } + + execvp(pager_args[0], pager_args); + log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno, + "Failed to execute '%s', using fallback pagers: %m", pager_args[0]); + } + + /* Debian's alternatives command for pagers is + * called 'pager'. Note that we do not call + * sensible-pagers here, since that is just a + * shell script that implements a logic that + * is similar to this one anyway, but is + * Debian-specific. */ + FOREACH_STRING(exe, "pager", "less", "more") { + r = loop_write(exe_name_pipe[1], exe, strlen(exe) + 1, false); + if (r < 0) { + log_error_errno(r, "Failed to write pager name to socket: %m"); + _exit(EXIT_FAILURE); + } + execlp(exe, exe, NULL); + log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno, + "Failed to execute '%s', using next fallback pager: %m", exe); + } + + r = loop_write(exe_name_pipe[1], "(built-in)", strlen("(built-in") + 1, false); + if (r < 0) { + log_error_errno(r, "Failed to write pager name to socket: %m"); + _exit(EXIT_FAILURE); + } + pager_fallback(); + /* not reached */ + } + + /* Return in the parent */ + stored_stdout = fcntl(STDOUT_FILENO, F_DUPFD_CLOEXEC, 3); + if (dup2(fd[1], STDOUT_FILENO) < 0) { + stored_stdout = safe_close(stored_stdout); + return log_error_errno(errno, "Failed to duplicate pager pipe: %m"); + } + stdout_redirected = true; + + stored_stderr = fcntl(STDERR_FILENO, F_DUPFD_CLOEXEC, 3); + if (dup2(fd[1], STDERR_FILENO) < 0) { + stored_stderr = safe_close(stored_stderr); + return log_error_errno(errno, "Failed to duplicate pager pipe: %m"); + } + stderr_redirected = true; + + exe_name_pipe[1] = safe_close(exe_name_pipe[1]); + + r = no_quit_on_interrupt(TAKE_FD(exe_name_pipe[0]), less_opts); + if (r < 0) + return r; + if (r > 0) + (void) ignore_signals(SIGINT, -1); + + return 1; +} + +void pager_close(void) { + + if (pager_pid <= 0) + return; + + /* Inform pager that we are done */ + (void) fflush(stdout); + if (stdout_redirected) + if (stored_stdout < 0 || dup2(stored_stdout, STDOUT_FILENO) < 0) + (void) close(STDOUT_FILENO); + stored_stdout = safe_close(stored_stdout); + (void) fflush(stderr); + if (stderr_redirected) + if (stored_stderr < 0 || dup2(stored_stderr, STDERR_FILENO) < 0) + (void) close(STDERR_FILENO); + stored_stderr = safe_close(stored_stderr); + stdout_redirected = stderr_redirected = false; + + (void) kill(pager_pid, SIGCONT); + (void) wait_for_terminate(pager_pid, NULL); + pager_pid = 0; +} + +bool pager_have(void) { + return pager_pid > 0; +} + +int show_man_page(const char *desc, bool null_stdio) { + const char *args[4] = { "man", NULL, NULL, NULL }; + char *e = NULL; + pid_t pid; + size_t k; + int r; + + k = strlen(desc); + + if (desc[k-1] == ')') + e = strrchr(desc, '('); + + if (e) { + char *page = NULL, *section = NULL; + + page = strndupa(desc, e - desc); + section = strndupa(e + 1, desc + k - e - 2); + + args[1] = section; + args[2] = page; + } else + args[1] = desc; + + r = safe_fork("(man)", FORK_RESET_SIGNALS|FORK_DEATHSIG|(null_stdio ? FORK_NULL_STDIO : 0)|FORK_RLIMIT_NOFILE_SAFE|FORK_LOG, &pid); + if (r < 0) + return r; + if (r == 0) { + /* Child */ + execvp(args[0], (char**) args); + log_error_errno(errno, "Failed to execute man: %m"); + _exit(EXIT_FAILURE); + } + + return wait_for_terminate_and_check(NULL, pid, 0); +} diff --git a/src/shared/pager.h b/src/shared/pager.h new file mode 100644 index 0000000..8299e23 --- /dev/null +++ b/src/shared/pager.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +#include "macro.h" + +typedef enum PagerFlags { + PAGER_DISABLE = 1 << 0, + PAGER_JUMP_TO_END = 1 << 1, +} PagerFlags; + +int pager_open(PagerFlags flags); +void pager_close(void); +bool pager_have(void) _pure_; + +int show_man_page(const char *page, bool null_stdio); diff --git a/src/shared/path-lookup.c b/src/shared/path-lookup.c new file mode 100644 index 0000000..442fde7 --- /dev/null +++ b/src/shared/path-lookup.c @@ -0,0 +1,903 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "alloc-util.h" +#include "fs-util.h" +#include "install.h" +#include "log.h" +#include "macro.h" +#include "mkdir.h" +#include "path-lookup.h" +#include "path-util.h" +#include "rm-rf.h" +#include "stat-util.h" +#include "string-util.h" +#include "strv.h" +#include "tmpfile-util.h" +#include "user-util.h" +#include "util.h" + +int xdg_user_runtime_dir(char **ret, const char *suffix) { + const char *e; + char *j; + + assert(ret); + assert(suffix); + + e = getenv("XDG_RUNTIME_DIR"); + if (!e) + return -ENXIO; + + j = strappend(e, suffix); + if (!j) + return -ENOMEM; + + *ret = j; + return 0; +} + +int xdg_user_config_dir(char **ret, const char *suffix) { + const char *e; + char *j; + int r; + + assert(ret); + + e = getenv("XDG_CONFIG_HOME"); + if (e) + j = strappend(e, suffix); + else { + _cleanup_free_ char *home = NULL; + + r = get_home_dir(&home); + if (r < 0) + return r; + + j = strjoin(home, "/.config", suffix); + } + + if (!j) + return -ENOMEM; + + *ret = j; + return 0; +} + +int xdg_user_data_dir(char **ret, const char *suffix) { + const char *e; + char *j; + int r; + + assert(ret); + assert(suffix); + + /* We don't treat /etc/xdg/systemd here as the spec + * suggests because we assume that is a link to + * /etc/systemd/ anyway. */ + + e = getenv("XDG_DATA_HOME"); + if (e) + j = strappend(e, suffix); + else { + _cleanup_free_ char *home = NULL; + + r = get_home_dir(&home); + if (r < 0) + return r; + + j = strjoin(home, "/.local/share", suffix); + } + if (!j) + return -ENOMEM; + + *ret = j; + return 1; +} + +static const char* const user_data_unit_paths[] = { + "/usr/local/lib/systemd/user", + "/usr/local/share/systemd/user", + USER_DATA_UNIT_PATH, + "/usr/lib/systemd/user", + "/usr/share/systemd/user", + NULL +}; + +static const char* const user_config_unit_paths[] = { + USER_CONFIG_UNIT_PATH, + "/etc/systemd/user", + NULL +}; + +int xdg_user_dirs(char ***ret_config_dirs, char ***ret_data_dirs) { + /* Implement the mechanisms defined in + * + * http://standards.freedesktop.org/basedir-spec/basedir-spec-0.6.html + * + * We look in both the config and the data dirs because we + * want to encourage that distributors ship their unit files + * as data, and allow overriding as configuration. + */ + const char *e; + _cleanup_strv_free_ char **config_dirs = NULL, **data_dirs = NULL; + + e = getenv("XDG_CONFIG_DIRS"); + if (e) { + config_dirs = strv_split(e, ":"); + if (!config_dirs) + return -ENOMEM; + } + + e = getenv("XDG_DATA_DIRS"); + if (e) + data_dirs = strv_split(e, ":"); + else + data_dirs = strv_new("/usr/local/share", + "/usr/share"); + if (!data_dirs) + return -ENOMEM; + + *ret_config_dirs = TAKE_PTR(config_dirs); + *ret_data_dirs = TAKE_PTR(data_dirs); + + return 0; +} + +static char** user_dirs( + const char *persistent_config, + const char *runtime_config, + const char *global_persistent_config, + const char *global_runtime_config, + const char *generator, + const char *generator_early, + const char *generator_late, + const char *transient, + const char *persistent_control, + const char *runtime_control) { + + _cleanup_strv_free_ char **config_dirs = NULL, **data_dirs = NULL; + _cleanup_free_ char *data_home = NULL; + _cleanup_strv_free_ char **res = NULL; + int r; + + r = xdg_user_dirs(&config_dirs, &data_dirs); + if (r < 0) + return NULL; + + r = xdg_user_data_dir(&data_home, "/systemd/user"); + if (r < 0 && r != -ENXIO) + return NULL; + + /* Now merge everything we found. */ + if (strv_extend(&res, persistent_control) < 0) + return NULL; + + if (strv_extend(&res, runtime_control) < 0) + return NULL; + + if (strv_extend(&res, transient) < 0) + return NULL; + + if (strv_extend(&res, generator_early) < 0) + return NULL; + + if (strv_extend_strv_concat(&res, config_dirs, "/systemd/user") < 0) + return NULL; + + if (strv_extend(&res, persistent_config) < 0) + return NULL; + + /* global config has lower priority than the user config of the same type */ + if (strv_extend(&res, global_persistent_config) < 0) + return NULL; + + if (strv_extend_strv(&res, (char**) user_config_unit_paths, false) < 0) + return NULL; + + if (strv_extend(&res, runtime_config) < 0) + return NULL; + + if (strv_extend(&res, global_runtime_config) < 0) + return NULL; + + if (strv_extend(&res, generator) < 0) + return NULL; + + if (strv_extend(&res, data_home) < 0) + return NULL; + + if (strv_extend_strv_concat(&res, data_dirs, "/systemd/user") < 0) + return NULL; + + if (strv_extend_strv(&res, (char**) user_data_unit_paths, false) < 0) + return NULL; + + if (strv_extend(&res, generator_late) < 0) + return NULL; + + if (path_strv_make_absolute_cwd(res) < 0) + return NULL; + + return TAKE_PTR(res); +} + +bool path_is_user_data_dir(const char *path) { + assert(path); + + return strv_contains((char**) user_data_unit_paths, path); +} + +bool path_is_user_config_dir(const char *path) { + assert(path); + + return strv_contains((char**) user_config_unit_paths, path); +} + +static int acquire_generator_dirs( + UnitFileScope scope, + const char *tempdir, + char **generator, + char **generator_early, + char **generator_late) { + + _cleanup_free_ char *x = NULL, *y = NULL, *z = NULL; + const char *prefix; + + assert(generator); + assert(generator_early); + assert(generator_late); + assert(IN_SET(scope, UNIT_FILE_SYSTEM, UNIT_FILE_USER, UNIT_FILE_GLOBAL)); + + if (scope == UNIT_FILE_GLOBAL) + return -EOPNOTSUPP; + + if (tempdir) + prefix = tempdir; + else if (scope == UNIT_FILE_SYSTEM) + prefix = "/run/systemd"; + else { + /* UNIT_FILE_USER */ + const char *e; + + e = getenv("XDG_RUNTIME_DIR"); + if (!e) + return -ENXIO; + + prefix = strjoina(e, "/systemd"); + } + + x = strappend(prefix, "/generator"); + if (!x) + return -ENOMEM; + + y = strappend(prefix, "/generator.early"); + if (!y) + return -ENOMEM; + + z = strappend(prefix, "/generator.late"); + if (!z) + return -ENOMEM; + + *generator = TAKE_PTR(x); + *generator_early = TAKE_PTR(y); + *generator_late = TAKE_PTR(z); + + return 0; +} + +static int acquire_transient_dir( + UnitFileScope scope, + const char *tempdir, + char **ret) { + + char *transient; + + assert(ret); + assert(IN_SET(scope, UNIT_FILE_SYSTEM, UNIT_FILE_USER, UNIT_FILE_GLOBAL)); + + if (scope == UNIT_FILE_GLOBAL) + return -EOPNOTSUPP; + + if (tempdir) + transient = strjoin(tempdir, "/transient"); + else if (scope == UNIT_FILE_SYSTEM) + transient = strdup("/run/systemd/transient"); + else + return xdg_user_runtime_dir(ret, "/systemd/transient"); + + if (!transient) + return -ENOMEM; + *ret = transient; + return 0; +} + +static int acquire_config_dirs(UnitFileScope scope, char **persistent, char **runtime) { + _cleanup_free_ char *a = NULL, *b = NULL; + int r; + + assert(persistent); + assert(runtime); + + switch (scope) { + + case UNIT_FILE_SYSTEM: + a = strdup(SYSTEM_CONFIG_UNIT_PATH); + b = strdup("/run/systemd/system"); + break; + + case UNIT_FILE_GLOBAL: + a = strdup(USER_CONFIG_UNIT_PATH); + b = strdup("/run/systemd/user"); + break; + + case UNIT_FILE_USER: + r = xdg_user_config_dir(&a, "/systemd/user"); + if (r < 0 && r != -ENXIO) + return r; + + r = xdg_user_runtime_dir(runtime, "/systemd/user"); + if (r < 0) { + if (r != -ENXIO) + return r; + + /* If XDG_RUNTIME_DIR is not set, don't consider that fatal, simply initialize the runtime + * directory to NULL */ + *runtime = NULL; + } + + *persistent = TAKE_PTR(a); + + return 0; + + default: + assert_not_reached("Hmm, unexpected scope value."); + } + + if (!a || !b) + return -ENOMEM; + + *persistent = TAKE_PTR(a); + *runtime = TAKE_PTR(b); + + return 0; +} + +static int acquire_control_dirs(UnitFileScope scope, char **persistent, char **runtime) { + _cleanup_free_ char *a = NULL; + int r; + + assert(persistent); + assert(runtime); + + switch (scope) { + + case UNIT_FILE_SYSTEM: { + _cleanup_free_ char *b = NULL; + + a = strdup("/etc/systemd/system.control"); + if (!a) + return -ENOMEM; + + b = strdup("/run/systemd/system.control"); + if (!b) + return -ENOMEM; + + *runtime = TAKE_PTR(b); + + break; + } + + case UNIT_FILE_USER: + r = xdg_user_config_dir(&a, "/systemd/user.control"); + if (r < 0 && r != -ENXIO) + return r; + + r = xdg_user_runtime_dir(runtime, "/systemd/user.control"); + if (r < 0) { + if (r != -ENXIO) + return r; + + /* If XDG_RUNTIME_DIR is not set, don't consider this fatal, simply initialize the directory to + * NULL */ + *runtime = NULL; + } + + break; + + case UNIT_FILE_GLOBAL: + return -EOPNOTSUPP; + + default: + assert_not_reached("Hmm, unexpected scope value."); + } + + *persistent = TAKE_PTR(a); + + return 0; +} + +static int acquire_attached_dirs( + UnitFileScope scope, + char **ret_persistent, + char **ret_runtime) { + + _cleanup_free_ char *a = NULL, *b = NULL; + + assert(ret_persistent); + assert(ret_runtime); + + /* Portable services are not available to regular users for now. */ + if (scope != UNIT_FILE_SYSTEM) + return -EOPNOTSUPP; + + a = strdup("/etc/systemd/system.attached"); + if (!a) + return -ENOMEM; + + b = strdup("/run/systemd/system.attached"); + if (!b) + return -ENOMEM; + + *ret_persistent = TAKE_PTR(a); + *ret_runtime = TAKE_PTR(b); + + return 0; +} + +static int patch_root_prefix(char **p, const char *root_dir) { + char *c; + + assert(p); + + if (!*p) + return 0; + + c = prefix_root(root_dir, *p); + if (!c) + return -ENOMEM; + + free(*p); + *p = c; + + return 0; +} + +static int patch_root_prefix_strv(char **l, const char *root_dir) { + char **i; + int r; + + if (!root_dir) + return 0; + + STRV_FOREACH(i, l) { + r = patch_root_prefix(i, root_dir); + if (r < 0) + return r; + } + + return 0; +} + +int lookup_paths_init( + LookupPaths *p, + UnitFileScope scope, + LookupPathsFlags flags, + const char *root_dir) { + + _cleanup_(rmdir_and_freep) char *tempdir = NULL; + _cleanup_free_ char + *root = NULL, + *persistent_config = NULL, *runtime_config = NULL, + *global_persistent_config = NULL, *global_runtime_config = NULL, + *generator = NULL, *generator_early = NULL, *generator_late = NULL, + *transient = NULL, + *persistent_control = NULL, *runtime_control = NULL, + *persistent_attached = NULL, *runtime_attached = NULL; + bool append = false; /* Add items from SYSTEMD_UNIT_PATH before normal directories */ + _cleanup_strv_free_ char **paths = NULL; + const char *e; + int r; + + assert(p); + assert(scope >= 0); + assert(scope < _UNIT_FILE_SCOPE_MAX); + +#if HAVE_SPLIT_USR + flags |= LOOKUP_PATHS_SPLIT_USR; +#endif + + if (!empty_or_root(root_dir)) { + if (scope == UNIT_FILE_USER) + return -EINVAL; + + r = is_dir(root_dir, true); + if (r < 0) + return r; + if (r == 0) + return -ENOTDIR; + + root = strdup(root_dir); + if (!root) + return -ENOMEM; + } + + if (flags & LOOKUP_PATHS_TEMPORARY_GENERATED) { + r = mkdtemp_malloc("/tmp/systemd-temporary-XXXXXX", &tempdir); + if (r < 0) + return log_debug_errno(r, "Failed to create temporary directory: %m"); + } + + /* Note: when XDG_RUNTIME_DIR is not set this will not return -ENXIO, but simply set runtime_config to NULL */ + r = acquire_config_dirs(scope, &persistent_config, &runtime_config); + if (r < 0) + return r; + + if (scope == UNIT_FILE_USER) { + r = acquire_config_dirs(UNIT_FILE_GLOBAL, &global_persistent_config, &global_runtime_config); + if (r < 0) + return r; + } + + if ((flags & LOOKUP_PATHS_EXCLUDE_GENERATED) == 0) { + /* Note: if XDG_RUNTIME_DIR is not set, this will fail completely with ENXIO */ + r = acquire_generator_dirs(scope, tempdir, + &generator, &generator_early, &generator_late); + if (r < 0 && !IN_SET(r, -EOPNOTSUPP, -ENXIO)) + return r; + } + + /* Note: if XDG_RUNTIME_DIR is not set, this will fail completely with ENXIO */ + r = acquire_transient_dir(scope, tempdir, &transient); + if (r < 0 && !IN_SET(r, -EOPNOTSUPP, -ENXIO)) + return r; + + /* Note: when XDG_RUNTIME_DIR is not set this will not return -ENXIO, but simply set runtime_control to NULL */ + r = acquire_control_dirs(scope, &persistent_control, &runtime_control); + if (r < 0 && r != -EOPNOTSUPP) + return r; + + r = acquire_attached_dirs(scope, &persistent_attached, &runtime_attached); + if (r < 0 && r != -EOPNOTSUPP) + return r; + + /* First priority is whatever has been passed to us via env vars */ + e = getenv("SYSTEMD_UNIT_PATH"); + if (e) { + const char *k; + + k = endswith(e, ":"); + if (k) { + e = strndupa(e, k - e); + append = true; + } + + /* FIXME: empty components in other places should be rejected. */ + + r = path_split_and_make_absolute(e, &paths); + if (r < 0) + return r; + } + + if (!paths || append) { + /* Let's figure something out. */ + + _cleanup_strv_free_ char **add = NULL; + + /* For the user units we include share/ in the search + * path in order to comply with the XDG basedir spec. + * For the system stuff we avoid such nonsense. OTOH + * we include /lib in the search path for the system + * stuff but avoid it for user stuff. */ + + switch (scope) { + + case UNIT_FILE_SYSTEM: + add = strv_new( + /* If you modify this you also want to modify + * systemdsystemunitpath= in systemd.pc.in! */ + STRV_IFNOTNULL(persistent_control), + STRV_IFNOTNULL(runtime_control), + STRV_IFNOTNULL(transient), + STRV_IFNOTNULL(generator_early), + persistent_config, + SYSTEM_CONFIG_UNIT_PATH, + "/etc/systemd/system", + STRV_IFNOTNULL(persistent_attached), + runtime_config, + "/run/systemd/system", + STRV_IFNOTNULL(runtime_attached), + STRV_IFNOTNULL(generator), + "/usr/local/lib/systemd/system", + SYSTEM_DATA_UNIT_PATH, + "/usr/lib/systemd/system", + STRV_IFNOTNULL(flags & LOOKUP_PATHS_SPLIT_USR ? "/lib/systemd/system" : NULL), + STRV_IFNOTNULL(generator_late)); + break; + + case UNIT_FILE_GLOBAL: + add = strv_new( + /* If you modify this you also want to modify + * systemduserunitpath= in systemd.pc.in, and + * the arrays in user_dirs() above! */ + STRV_IFNOTNULL(persistent_control), + STRV_IFNOTNULL(runtime_control), + STRV_IFNOTNULL(transient), + STRV_IFNOTNULL(generator_early), + persistent_config, + USER_CONFIG_UNIT_PATH, + "/etc/systemd/user", + runtime_config, + "/run/systemd/user", + STRV_IFNOTNULL(generator), + "/usr/local/share/systemd/user", + "/usr/share/systemd/user", + "/usr/local/lib/systemd/user", + USER_DATA_UNIT_PATH, + "/usr/lib/systemd/user", + STRV_IFNOTNULL(generator_late)); + break; + + case UNIT_FILE_USER: + add = user_dirs(persistent_config, runtime_config, + global_persistent_config, global_runtime_config, + generator, generator_early, generator_late, + transient, + persistent_control, runtime_control); + break; + + default: + assert_not_reached("Hmm, unexpected scope?"); + } + + if (!add) + return -ENOMEM; + + if (paths) { + r = strv_extend_strv(&paths, add, true); + if (r < 0) + return r; + } else + /* Small optimization: if paths is NULL (and it usually is), we can simply assign 'add' to it, + * and don't have to copy anything */ + paths = TAKE_PTR(add); + } + + r = patch_root_prefix(&persistent_config, root); + if (r < 0) + return r; + r = patch_root_prefix(&runtime_config, root); + if (r < 0) + return r; + + r = patch_root_prefix(&generator, root); + if (r < 0) + return r; + r = patch_root_prefix(&generator_early, root); + if (r < 0) + return r; + r = patch_root_prefix(&generator_late, root); + if (r < 0) + return r; + + r = patch_root_prefix(&transient, root); + if (r < 0) + return r; + + r = patch_root_prefix(&persistent_control, root); + if (r < 0) + return r; + r = patch_root_prefix(&runtime_control, root); + if (r < 0) + return r; + + r = patch_root_prefix(&persistent_attached, root); + if (r < 0) + return r; + r = patch_root_prefix(&runtime_attached, root); + if (r < 0) + return r; + + r = patch_root_prefix_strv(paths, root); + if (r < 0) + return -ENOMEM; + + *p = (LookupPaths) { + .search_path = strv_uniq(paths), + + .persistent_config = TAKE_PTR(persistent_config), + .runtime_config = TAKE_PTR(runtime_config), + + .generator = TAKE_PTR(generator), + .generator_early = TAKE_PTR(generator_early), + .generator_late = TAKE_PTR(generator_late), + + .transient = TAKE_PTR(transient), + + .persistent_control = TAKE_PTR(persistent_control), + .runtime_control = TAKE_PTR(runtime_control), + + .persistent_attached = TAKE_PTR(persistent_attached), + .runtime_attached = TAKE_PTR(runtime_attached), + + .root_dir = TAKE_PTR(root), + .temporary_dir = TAKE_PTR(tempdir), + }; + + paths = NULL; + return 0; +} + +void lookup_paths_free(LookupPaths *p) { + if (!p) + return; + + p->search_path = strv_free(p->search_path); + + p->persistent_config = mfree(p->persistent_config); + p->runtime_config = mfree(p->runtime_config); + + p->persistent_attached = mfree(p->persistent_attached); + p->runtime_attached = mfree(p->runtime_attached); + + p->generator = mfree(p->generator); + p->generator_early = mfree(p->generator_early); + p->generator_late = mfree(p->generator_late); + + p->transient = mfree(p->transient); + + p->persistent_control = mfree(p->persistent_control); + p->runtime_control = mfree(p->runtime_control); + + p->root_dir = mfree(p->root_dir); + p->temporary_dir = mfree(p->temporary_dir); +} + +int lookup_paths_reduce(LookupPaths *p) { + _cleanup_free_ struct stat *stats = NULL; + size_t n_stats = 0, allocated = 0; + size_t c = 0; + int r; + + assert(p); + + /* Drop duplicates and non-existing directories from the search path. We figure out whether two directories are + * the same by comparing their device and inode numbers. */ + + if (!p->search_path) + return 0; + + while (p->search_path[c]) { + struct stat st; + size_t k; + + /* Never strip the transient and control directories from the path */ + if (path_equal_ptr(p->search_path[c], p->transient) || + path_equal_ptr(p->search_path[c], p->persistent_control) || + path_equal_ptr(p->search_path[c], p->runtime_control)) { + c++; + continue; + } + + r = chase_symlinks_and_stat(p->search_path[c], p->root_dir, 0, NULL, &st); + if (r == -ENOENT) + goto remove_item; + if (r < 0) { + /* If something we don't grok happened, let's better leave it in. */ + log_debug_errno(r, "Failed to chase and stat %s: %m", p->search_path[c]); + c++; + continue; + } + + for (k = 0; k < n_stats; k++) + if (stats[k].st_dev == st.st_dev && + stats[k].st_ino == st.st_ino) + break; + + if (k < n_stats) /* Is there already an entry with the same device/inode? */ + goto remove_item; + + if (!GREEDY_REALLOC(stats, allocated, n_stats+1)) + return -ENOMEM; + + stats[n_stats++] = st; + c++; + continue; + + remove_item: + free(p->search_path[c]); + memmove(p->search_path + c, + p->search_path + c + 1, + (strv_length(p->search_path + c + 1) + 1) * sizeof(char*)); + } + + if (strv_isempty(p->search_path)) { + log_debug("Ignoring unit files."); + p->search_path = strv_free(p->search_path); + } else { + _cleanup_free_ char *t; + + t = strv_join(p->search_path, "\n\t"); + if (!t) + return -ENOMEM; + + log_debug("Looking for unit files in (higher priority first):\n\t%s", t); + } + + return 0; +} + +int lookup_paths_mkdir_generator(LookupPaths *p) { + int r, q; + + assert(p); + + if (!p->generator || !p->generator_early || !p->generator_late) + return -EINVAL; + + r = mkdir_p_label(p->generator, 0755); + + q = mkdir_p_label(p->generator_early, 0755); + if (q < 0 && r >= 0) + r = q; + + q = mkdir_p_label(p->generator_late, 0755); + if (q < 0 && r >= 0) + r = q; + + return r; +} + +void lookup_paths_trim_generator(LookupPaths *p) { + assert(p); + + /* Trim empty dirs */ + + if (p->generator) + (void) rmdir(p->generator); + if (p->generator_early) + (void) rmdir(p->generator_early); + if (p->generator_late) + (void) rmdir(p->generator_late); +} + +void lookup_paths_flush_generator(LookupPaths *p) { + assert(p); + + /* Flush the generated unit files in full */ + + if (p->generator) + (void) rm_rf(p->generator, REMOVE_ROOT|REMOVE_PHYSICAL); + if (p->generator_early) + (void) rm_rf(p->generator_early, REMOVE_ROOT|REMOVE_PHYSICAL); + if (p->generator_late) + (void) rm_rf(p->generator_late, REMOVE_ROOT|REMOVE_PHYSICAL); + + if (p->temporary_dir) + (void) rm_rf(p->temporary_dir, REMOVE_ROOT|REMOVE_PHYSICAL); +} + +char **generator_binary_paths(UnitFileScope scope) { + + switch (scope) { + + case UNIT_FILE_SYSTEM: + return strv_new("/run/systemd/system-generators", + "/etc/systemd/system-generators", + "/usr/local/lib/systemd/system-generators", + SYSTEM_GENERATOR_PATH); + + case UNIT_FILE_GLOBAL: + case UNIT_FILE_USER: + return strv_new("/run/systemd/user-generators", + "/etc/systemd/user-generators", + "/usr/local/lib/systemd/user-generators", + USER_GENERATOR_PATH); + + default: + assert_not_reached("Hmm, unexpected scope."); + } +} diff --git a/src/shared/path-lookup.h b/src/shared/path-lookup.h new file mode 100644 index 0000000..cb7d4d5 --- /dev/null +++ b/src/shared/path-lookup.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +typedef struct LookupPaths LookupPaths; + +#include "install.h" +#include "macro.h" + +typedef enum LookupPathsFlags { + LOOKUP_PATHS_EXCLUDE_GENERATED = 1 << 0, + LOOKUP_PATHS_TEMPORARY_GENERATED = 1 << 1, + LOOKUP_PATHS_SPLIT_USR = 1 << 2, +} LookupPathsFlags; + +struct LookupPaths { + /* Where we look for unit files. This includes the individual special paths below, but also any vendor + * supplied, static unit file paths. */ + char **search_path; + + /* Where we shall create or remove our installation symlinks, aka "configuration", and where the user/admin + * shall place his own unit files. */ + char *persistent_config; + char *runtime_config; + + /* Where units from a portable service image shall be placed. */ + char *persistent_attached; + char *runtime_attached; + + /* Where to place generated unit files (i.e. those a "generator" tool generated). Note the special semantics of + * this directory: the generators are flushed each time a "systemctl daemon-reload" is issued. The user should + * not alter these directories directly. */ + char *generator; + char *generator_early; + char *generator_late; + + /* Where to place transient unit files (i.e. those created dynamically via the bus API). Note the special + * semantics of this directory: all units created transiently have their unit files removed as the transient + * unit is unloaded. The user should not alter this directory directly. */ + char *transient; + + /* Where the snippets created by "systemctl set-property" are placed. Note that for transient units, the + * snippets are placed in the transient directory though (see above). The user should not alter this directory + * directly. */ + char *persistent_control; + char *runtime_control; + + /* The root directory prepended to all items above, or NULL */ + char *root_dir; + + /* A temporary directory when running in test mode, to be nuked */ + char *temporary_dir; +}; + +int lookup_paths_init(LookupPaths *p, UnitFileScope scope, LookupPathsFlags flags, const char *root_dir); + +int xdg_user_dirs(char ***ret_config_dirs, char ***ret_data_dirs); +int xdg_user_runtime_dir(char **ret, const char *suffix); +int xdg_user_config_dir(char **ret, const char *suffix); +int xdg_user_data_dir(char **ret, const char *suffix); + +bool path_is_user_data_dir(const char *path); +bool path_is_user_config_dir(const char *path); + +int lookup_paths_reduce(LookupPaths *p); + +int lookup_paths_mkdir_generator(LookupPaths *p); +void lookup_paths_trim_generator(LookupPaths *p); +void lookup_paths_flush_generator(LookupPaths *p); + +void lookup_paths_free(LookupPaths *p); + +char **generator_binary_paths(UnitFileScope scope); diff --git a/src/shared/pretty-print.c b/src/shared/pretty-print.c new file mode 100644 index 0000000..de6274a --- /dev/null +++ b/src/shared/pretty-print.c @@ -0,0 +1,247 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <sys/utsname.h> +#include <errno.h> +#include <stdio.h> + +#include "alloc-util.h" +#include "conf-files.h" +#include "def.h" +#include "env-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "pager.h" +#include "path-util.h" +#include "pretty-print.h" +#include "string-util.h" +#include "strv.h" +#include "terminal-util.h" +#include "util.h" + +static bool urlify_enabled(void) { + static int cached_urlify_enabled = -1; + + /* Unfortunately 'less' doesn't support links like this yet 😭, hence let's disable this as long as there's a + * pager in effect. Let's drop this check as soon as less got fixed a and enough time passed so that it's safe + * to assume that a link-enabled 'less' version has hit most installations. */ + + if (cached_urlify_enabled < 0) { + int val; + + val = getenv_bool("SYSTEMD_URLIFY"); + if (val >= 0) + cached_urlify_enabled = val; + else + cached_urlify_enabled = colors_enabled() && !pager_have(); + } + + return cached_urlify_enabled; +} + +int terminal_urlify(const char *url, const char *text, char **ret) { + char *n; + + assert(url); + + /* Takes an URL and a pretty string and formats it as clickable link for the terminal. See + * https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda for details. */ + + if (isempty(text)) + text = url; + + if (urlify_enabled()) + n = strjoin("\x1B]8;;", url, "\a", text, "\x1B]8;;\a"); + else + n = strdup(text); + if (!n) + return -ENOMEM; + + *ret = n; + return 0; +} + +int file_url_from_path(const char *path, char **ret) { + _cleanup_free_ char *absolute = NULL; + struct utsname u; + char *url = NULL; + int r; + + if (uname(&u) < 0) + return -errno; + + if (!path_is_absolute(path)) { + r = path_make_absolute_cwd(path, &absolute); + if (r < 0) + return r; + + path = absolute; + } + + /* As suggested by https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda, let's include the local + * hostname here. Note that we don't use gethostname_malloc() or gethostname_strict() since we are interested + * in the raw string the kernel has set, whatever it may be, under the assumption that terminals are not overly + * careful with validating the strings either. */ + + url = strjoin("file://", u.nodename, path); + if (!url) + return -ENOMEM; + + *ret = url; + return 0; +} + +int terminal_urlify_path(const char *path, const char *text, char **ret) { + _cleanup_free_ char *url = NULL; + int r; + + assert(path); + + /* Much like terminal_urlify() above, but takes a file system path as input + * and turns it into a proper file:// URL first. */ + + if (isempty(path)) + return -EINVAL; + + if (isempty(text)) + text = path; + + if (!urlify_enabled()) { + char *n; + + n = strdup(text); + if (!n) + return -ENOMEM; + + *ret = n; + return 0; + } + + r = file_url_from_path(path, &url); + if (r < 0) + return r; + + return terminal_urlify(url, text, ret); +} + +int terminal_urlify_man(const char *page, const char *section, char **ret) { + const char *url, *text; + + url = strjoina("man:", page, "(", section, ")"); + text = strjoina(page, "(", section, ") man page"); + + return terminal_urlify(url, text, ret); +} + +static int cat_file(const char *filename, bool newline) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *urlified = NULL; + int r; + + f = fopen(filename, "re"); + if (!f) + return -errno; + + r = terminal_urlify_path(filename, NULL, &urlified); + if (r < 0) + return r; + + printf("%s%s# %s%s\n", + newline ? "\n" : "", + ansi_highlight_blue(), + urlified, + ansi_normal()); + fflush(stdout); + + for (;;) { + _cleanup_free_ char *line = NULL; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return log_error_errno(r, "Failed to read \"%s\": %m", filename); + if (r == 0) + break; + + puts(line); + } + + return 0; +} + +int cat_files(const char *file, char **dropins, CatFlags flags) { + char **path; + int r; + + if (file) { + r = cat_file(file, false); + if (r == -ENOENT && (flags & CAT_FLAGS_MAIN_FILE_OPTIONAL)) + printf("%s# config file %s not found%s\n", + ansi_highlight_magenta(), + file, + ansi_normal()); + else if (r < 0) + return log_warning_errno(r, "Failed to cat %s: %m", file); + } + + STRV_FOREACH(path, dropins) { + r = cat_file(*path, file || path != dropins); + if (r < 0) + return log_warning_errno(r, "Failed to cat %s: %m", *path); + } + + return 0; +} + +void print_separator(void) { + + /* Outputs a separator line that resolves to whitespace when copied from the terminal. We do that by outputting + * one line filled with spaces with ANSI underline set, followed by a second (empty) line. */ + + if (underline_enabled()) { + size_t i, c; + + c = columns(); + + flockfile(stdout); + fputs_unlocked(ANSI_UNDERLINE, stdout); + + for (i = 0; i < c; i++) + fputc_unlocked(' ', stdout); + + fputs_unlocked(ANSI_NORMAL "\n\n", stdout); + funlockfile(stdout); + } else + fputs("\n\n", stdout); +} + +int conf_files_cat(const char *root, const char *name) { + _cleanup_strv_free_ char **dirs = NULL, **files = NULL; + _cleanup_free_ char *path = NULL; + const char *dir; + char **t; + int r; + + NULSTR_FOREACH(dir, CONF_PATHS_NULSTR("")) { + assert(endswith(dir, "/")); + r = strv_extendf(&dirs, "%s%s.d", dir, name); + if (r < 0) + return log_error_errno(r, "Failed to build directory list: %m"); + } + + r = conf_files_list_strv(&files, ".conf", root, 0, (const char* const*) dirs); + if (r < 0) + return log_error_errno(r, "Failed to query file list: %m"); + + path = path_join(root, "/etc", name); + if (!path) + return log_oom(); + + if (DEBUG_LOGGING) { + log_debug("Looking for configuration in:"); + log_debug(" %s", path); + STRV_FOREACH(t, dirs) + log_debug(" %s/*.conf", *t); + } + + /* show */ + return cat_files(path, files, CAT_FLAGS_MAIN_FILE_OPTIONAL); +} diff --git a/src/shared/pretty-print.h b/src/shared/pretty-print.h new file mode 100644 index 0000000..12ab9ac --- /dev/null +++ b/src/shared/pretty-print.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +void print_separator(void); + +int file_url_from_path(const char *path, char **ret); + +int terminal_urlify(const char *url, const char *text, char **ret); +int terminal_urlify_path(const char *path, const char *text, char **ret); +int terminal_urlify_man(const char *page, const char *section, char **ret); + +typedef enum CatFlags { + CAT_FLAGS_MAIN_FILE_OPTIONAL = 1 << 0, +} CatFlags; + +int cat_files(const char *file, char **dropins, CatFlags flags); +int conf_files_cat(const char *root, const char *name); diff --git a/src/shared/ptyfwd.c b/src/shared/ptyfwd.c new file mode 100644 index 0000000..fe17b37 --- /dev/null +++ b/src/shared/ptyfwd.c @@ -0,0 +1,631 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <limits.h> +#include <signal.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <sys/epoll.h> +#include <sys/ioctl.h> +#include <sys/time.h> +#include <termios.h> +#include <unistd.h> + +#include "sd-event.h" + +#include "alloc-util.h" +#include "fd-util.h" +#include "log.h" +#include "macro.h" +#include "ptyfwd.h" +#include "terminal-util.h" +#include "time-util.h" + +struct PTYForward { + sd_event *event; + + int master; + + PTYForwardFlags flags; + + sd_event_source *stdin_event_source; + sd_event_source *stdout_event_source; + sd_event_source *master_event_source; + + sd_event_source *sigwinch_event_source; + + struct termios saved_stdin_attr; + struct termios saved_stdout_attr; + + bool saved_stdin:1; + bool saved_stdout:1; + + bool stdin_readable:1; + bool stdin_hangup:1; + bool stdout_writable:1; + bool stdout_hangup:1; + bool master_readable:1; + bool master_writable:1; + bool master_hangup:1; + + bool read_from_master:1; + + bool done:1; + bool drain:1; + + bool last_char_set:1; + char last_char; + + char in_buffer[LINE_MAX], out_buffer[LINE_MAX]; + size_t in_buffer_full, out_buffer_full; + + usec_t escape_timestamp; + unsigned escape_counter; + + PTYForwardHandler handler; + void *userdata; +}; + +#define ESCAPE_USEC (1*USEC_PER_SEC) + +static void pty_forward_disconnect(PTYForward *f) { + + if (f) { + f->stdin_event_source = sd_event_source_unref(f->stdin_event_source); + f->stdout_event_source = sd_event_source_unref(f->stdout_event_source); + + f->master_event_source = sd_event_source_unref(f->master_event_source); + f->sigwinch_event_source = sd_event_source_unref(f->sigwinch_event_source); + f->event = sd_event_unref(f->event); + + if (f->saved_stdout) + tcsetattr(STDOUT_FILENO, TCSANOW, &f->saved_stdout_attr); + if (f->saved_stdin) + tcsetattr(STDIN_FILENO, TCSANOW, &f->saved_stdin_attr); + + f->saved_stdout = f->saved_stdin = false; + } + + /* STDIN/STDOUT should not be nonblocking normally, so let's unconditionally reset it */ + (void) fd_nonblock(STDIN_FILENO, false); + (void) fd_nonblock(STDOUT_FILENO, false); +} + +static int pty_forward_done(PTYForward *f, int rcode) { + _cleanup_(sd_event_unrefp) sd_event *e = NULL; + assert(f); + + if (f->done) + return 0; + + e = sd_event_ref(f->event); + + f->done = true; + pty_forward_disconnect(f); + + if (f->handler) + return f->handler(f, rcode, f->userdata); + else + return sd_event_exit(e, rcode < 0 ? EXIT_FAILURE : rcode); +} + +static bool look_for_escape(PTYForward *f, const char *buffer, size_t n) { + const char *p; + + assert(f); + assert(buffer); + assert(n > 0); + + for (p = buffer; p < buffer + n; p++) { + + /* Check for ^] */ + if (*p == 0x1D) { + usec_t nw = now(CLOCK_MONOTONIC); + + if (f->escape_counter == 0 || nw > f->escape_timestamp + ESCAPE_USEC) { + f->escape_timestamp = nw; + f->escape_counter = 1; + } else { + (f->escape_counter)++; + + if (f->escape_counter >= 3) + return true; + } + } else { + f->escape_timestamp = 0; + f->escape_counter = 0; + } + } + + return false; +} + +static bool ignore_vhangup(PTYForward *f) { + assert(f); + + if (f->flags & PTY_FORWARD_IGNORE_VHANGUP) + return true; + + if ((f->flags & PTY_FORWARD_IGNORE_INITIAL_VHANGUP) && !f->read_from_master) + return true; + + return false; +} + +static bool drained(PTYForward *f) { + int q = 0; + + assert(f); + + if (f->out_buffer_full > 0) + return false; + + if (f->master_readable) + return false; + + if (ioctl(f->master, TIOCINQ, &q) < 0) + log_debug_errno(errno, "TIOCINQ failed on master: %m"); + else if (q > 0) + return false; + + if (ioctl(f->master, TIOCOUTQ, &q) < 0) + log_debug_errno(errno, "TIOCOUTQ failed on master: %m"); + else if (q > 0) + return false; + + return true; +} + +static int shovel(PTYForward *f) { + ssize_t k; + + assert(f); + + while ((f->stdin_readable && f->in_buffer_full <= 0) || + (f->master_writable && f->in_buffer_full > 0) || + (f->master_readable && f->out_buffer_full <= 0) || + (f->stdout_writable && f->out_buffer_full > 0)) { + + if (f->stdin_readable && f->in_buffer_full < LINE_MAX) { + + k = read(STDIN_FILENO, f->in_buffer + f->in_buffer_full, LINE_MAX - f->in_buffer_full); + if (k < 0) { + + if (errno == EAGAIN) + f->stdin_readable = false; + else if (IN_SET(errno, EIO, EPIPE, ECONNRESET)) { + f->stdin_readable = false; + f->stdin_hangup = true; + + f->stdin_event_source = sd_event_source_unref(f->stdin_event_source); + } else { + log_error_errno(errno, "read(): %m"); + return pty_forward_done(f, -errno); + } + } else if (k == 0) { + /* EOF on stdin */ + f->stdin_readable = false; + f->stdin_hangup = true; + + f->stdin_event_source = sd_event_source_unref(f->stdin_event_source); + } else { + /* Check if ^] has been pressed three times within one second. If we get this we quite + * immediately. */ + if (look_for_escape(f, f->in_buffer + f->in_buffer_full, k)) + return pty_forward_done(f, -ECANCELED); + + f->in_buffer_full += (size_t) k; + } + } + + if (f->master_writable && f->in_buffer_full > 0) { + + k = write(f->master, f->in_buffer, f->in_buffer_full); + if (k < 0) { + + if (IN_SET(errno, EAGAIN, EIO)) + f->master_writable = false; + else if (IN_SET(errno, EPIPE, ECONNRESET)) { + f->master_writable = f->master_readable = false; + f->master_hangup = true; + + f->master_event_source = sd_event_source_unref(f->master_event_source); + } else { + log_error_errno(errno, "write(): %m"); + return pty_forward_done(f, -errno); + } + } else { + assert(f->in_buffer_full >= (size_t) k); + memmove(f->in_buffer, f->in_buffer + k, f->in_buffer_full - k); + f->in_buffer_full -= k; + } + } + + if (f->master_readable && f->out_buffer_full < LINE_MAX) { + + k = read(f->master, f->out_buffer + f->out_buffer_full, LINE_MAX - f->out_buffer_full); + if (k < 0) { + + /* Note that EIO on the master device + * might be caused by vhangup() or + * temporary closing of everything on + * the other side, we treat it like + * EAGAIN here and try again, unless + * ignore_vhangup is off. */ + + if (errno == EAGAIN || (errno == EIO && ignore_vhangup(f))) + f->master_readable = false; + else if (IN_SET(errno, EPIPE, ECONNRESET, EIO)) { + f->master_readable = f->master_writable = false; + f->master_hangup = true; + + f->master_event_source = sd_event_source_unref(f->master_event_source); + } else { + log_error_errno(errno, "read(): %m"); + return pty_forward_done(f, -errno); + } + } else { + f->read_from_master = true; + f->out_buffer_full += (size_t) k; + } + } + + if (f->stdout_writable && f->out_buffer_full > 0) { + + k = write(STDOUT_FILENO, f->out_buffer, f->out_buffer_full); + if (k < 0) { + + if (errno == EAGAIN) + f->stdout_writable = false; + else if (IN_SET(errno, EIO, EPIPE, ECONNRESET)) { + f->stdout_writable = false; + f->stdout_hangup = true; + f->stdout_event_source = sd_event_source_unref(f->stdout_event_source); + } else { + log_error_errno(errno, "write(): %m"); + return pty_forward_done(f, -errno); + } + + } else { + + if (k > 0) { + f->last_char = f->out_buffer[k-1]; + f->last_char_set = true; + } + + assert(f->out_buffer_full >= (size_t) k); + memmove(f->out_buffer, f->out_buffer + k, f->out_buffer_full - k); + f->out_buffer_full -= k; + } + } + } + + if (f->stdin_hangup || f->stdout_hangup || f->master_hangup) { + /* Exit the loop if any side hung up and if there's + * nothing more to write or nothing we could write. */ + + if ((f->out_buffer_full <= 0 || f->stdout_hangup) && + (f->in_buffer_full <= 0 || f->master_hangup)) + return pty_forward_done(f, 0); + } + + /* If we were asked to drain, and there's nothing more to handle from the master, then call the callback + * too. */ + if (f->drain && drained(f)) + return pty_forward_done(f, 0); + + return 0; +} + +static int on_master_event(sd_event_source *e, int fd, uint32_t revents, void *userdata) { + PTYForward *f = userdata; + + assert(f); + assert(e); + assert(e == f->master_event_source); + assert(fd >= 0); + assert(fd == f->master); + + if (revents & (EPOLLIN|EPOLLHUP)) + f->master_readable = true; + + if (revents & (EPOLLOUT|EPOLLHUP)) + f->master_writable = true; + + return shovel(f); +} + +static int on_stdin_event(sd_event_source *e, int fd, uint32_t revents, void *userdata) { + PTYForward *f = userdata; + + assert(f); + assert(e); + assert(e == f->stdin_event_source); + assert(fd >= 0); + assert(fd == STDIN_FILENO); + + if (revents & (EPOLLIN|EPOLLHUP)) + f->stdin_readable = true; + + return shovel(f); +} + +static int on_stdout_event(sd_event_source *e, int fd, uint32_t revents, void *userdata) { + PTYForward *f = userdata; + + assert(f); + assert(e); + assert(e == f->stdout_event_source); + assert(fd >= 0); + assert(fd == STDOUT_FILENO); + + if (revents & (EPOLLOUT|EPOLLHUP)) + f->stdout_writable = true; + + return shovel(f); +} + +static int on_sigwinch_event(sd_event_source *e, const struct signalfd_siginfo *si, void *userdata) { + PTYForward *f = userdata; + struct winsize ws; + + assert(f); + assert(e); + assert(e == f->sigwinch_event_source); + + /* The window size changed, let's forward that. */ + if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &ws) >= 0) + (void) ioctl(f->master, TIOCSWINSZ, &ws); + + return 0; +} + +int pty_forward_new( + sd_event *event, + int master, + PTYForwardFlags flags, + PTYForward **ret) { + + _cleanup_(pty_forward_freep) PTYForward *f = NULL; + struct winsize ws; + int r; + + f = new(PTYForward, 1); + if (!f) + return -ENOMEM; + + *f = (struct PTYForward) { + .flags = flags, + .master = -1, + }; + + if (event) + f->event = sd_event_ref(event); + else { + r = sd_event_default(&f->event); + if (r < 0) + return r; + } + + if (!(flags & PTY_FORWARD_READ_ONLY)) { + r = fd_nonblock(STDIN_FILENO, true); + if (r < 0) + return r; + + r = fd_nonblock(STDOUT_FILENO, true); + if (r < 0) + return r; + } + + r = fd_nonblock(master, true); + if (r < 0) + return r; + + f->master = master; + + if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &ws) < 0) { + /* If we can't get the resolution from the output fd, then use our internal, regular width/height, + * i.e. something derived from $COLUMNS and $LINES if set. */ + + ws = (struct winsize) { + .ws_row = lines(), + .ws_col = columns(), + }; + } + + (void) ioctl(master, TIOCSWINSZ, &ws); + + if (!(flags & PTY_FORWARD_READ_ONLY)) { + if (tcgetattr(STDIN_FILENO, &f->saved_stdin_attr) >= 0) { + struct termios raw_stdin_attr; + + f->saved_stdin = true; + + raw_stdin_attr = f->saved_stdin_attr; + cfmakeraw(&raw_stdin_attr); + raw_stdin_attr.c_oflag = f->saved_stdin_attr.c_oflag; + tcsetattr(STDIN_FILENO, TCSANOW, &raw_stdin_attr); + } + + if (tcgetattr(STDOUT_FILENO, &f->saved_stdout_attr) >= 0) { + struct termios raw_stdout_attr; + + f->saved_stdout = true; + + raw_stdout_attr = f->saved_stdout_attr; + cfmakeraw(&raw_stdout_attr); + raw_stdout_attr.c_iflag = f->saved_stdout_attr.c_iflag; + raw_stdout_attr.c_lflag = f->saved_stdout_attr.c_lflag; + tcsetattr(STDOUT_FILENO, TCSANOW, &raw_stdout_attr); + } + + r = sd_event_add_io(f->event, &f->stdin_event_source, STDIN_FILENO, EPOLLIN|EPOLLET, on_stdin_event, f); + if (r < 0 && r != -EPERM) + return r; + + if (r >= 0) + (void) sd_event_source_set_description(f->stdin_event_source, "ptyfwd-stdin"); + } + + r = sd_event_add_io(f->event, &f->stdout_event_source, STDOUT_FILENO, EPOLLOUT|EPOLLET, on_stdout_event, f); + if (r == -EPERM) + /* stdout without epoll support. Likely redirected to regular file. */ + f->stdout_writable = true; + else if (r < 0) + return r; + else + (void) sd_event_source_set_description(f->stdout_event_source, "ptyfwd-stdout"); + + r = sd_event_add_io(f->event, &f->master_event_source, master, EPOLLIN|EPOLLOUT|EPOLLET, on_master_event, f); + if (r < 0) + return r; + + (void) sd_event_source_set_description(f->master_event_source, "ptyfwd-master"); + + r = sd_event_add_signal(f->event, &f->sigwinch_event_source, SIGWINCH, on_sigwinch_event, f); + if (r < 0) + return r; + + (void) sd_event_source_set_description(f->sigwinch_event_source, "ptyfwd-sigwinch"); + + *ret = TAKE_PTR(f); + + return 0; +} + +PTYForward *pty_forward_free(PTYForward *f) { + pty_forward_disconnect(f); + return mfree(f); +} + +int pty_forward_get_last_char(PTYForward *f, char *ch) { + assert(f); + assert(ch); + + if (!f->last_char_set) + return -ENXIO; + + *ch = f->last_char; + return 0; +} + +int pty_forward_set_ignore_vhangup(PTYForward *f, bool b) { + int r; + + assert(f); + + if (!!(f->flags & PTY_FORWARD_IGNORE_VHANGUP) == b) + return 0; + + SET_FLAG(f->flags, PTY_FORWARD_IGNORE_VHANGUP, b); + + if (!ignore_vhangup(f)) { + + /* We shall now react to vhangup()s? Let's check + * immediately if we might be in one */ + + f->master_readable = true; + r = shovel(f); + if (r < 0) + return r; + } + + return 0; +} + +bool pty_forward_get_ignore_vhangup(PTYForward *f) { + assert(f); + + return !!(f->flags & PTY_FORWARD_IGNORE_VHANGUP); +} + +bool pty_forward_is_done(PTYForward *f) { + assert(f); + + return f->done; +} + +void pty_forward_set_handler(PTYForward *f, PTYForwardHandler cb, void *userdata) { + assert(f); + + f->handler = cb; + f->userdata = userdata; +} + +bool pty_forward_drain(PTYForward *f) { + assert(f); + + /* Starts draining the forwarder. Specifically: + * + * - Returns true if there are no unprocessed bytes from the pty, false otherwise + * + * - Makes sure the handler function is called the next time the number of unprocessed bytes hits zero + */ + + f->drain = true; + return drained(f); +} + +int pty_forward_set_priority(PTYForward *f, int64_t priority) { + int r; + assert(f); + + r = sd_event_source_set_priority(f->stdin_event_source, priority); + if (r < 0) + return r; + + r = sd_event_source_set_priority(f->stdout_event_source, priority); + if (r < 0) + return r; + + r = sd_event_source_set_priority(f->master_event_source, priority); + if (r < 0) + return r; + + r = sd_event_source_set_priority(f->sigwinch_event_source, priority); + if (r < 0) + return r; + + return 0; +} + +int pty_forward_set_width_height(PTYForward *f, unsigned width, unsigned height) { + struct winsize ws; + + assert(f); + + if (width == (unsigned) -1 && height == (unsigned) -1) + return 0; /* noop */ + + if (width != (unsigned) -1 && + (width == 0 || width > USHRT_MAX)) + return -ERANGE; + + if (height != (unsigned) -1 && + (height == 0 || height > USHRT_MAX)) + return -ERANGE; + + if (width == (unsigned) -1 || height == (unsigned) -1) { + if (ioctl(f->master, TIOCGWINSZ, &ws) < 0) + return -errno; + + if (width != (unsigned) -1) + ws.ws_col = width; + if (height != (unsigned) -1) + ws.ws_row = height; + } else + ws = (struct winsize) { + .ws_row = height, + .ws_col = width, + }; + + if (ioctl(f->master, TIOCSWINSZ, &ws) < 0) + return -errno; + + /* Make sure we ignore SIGWINCH window size events from now on */ + f->sigwinch_event_source = sd_event_source_unref(f->sigwinch_event_source); + + return 0; +} diff --git a/src/shared/ptyfwd.h b/src/shared/ptyfwd.h new file mode 100644 index 0000000..887d3cb --- /dev/null +++ b/src/shared/ptyfwd.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +#include "sd-event.h" + +#include "macro.h" + +typedef struct PTYForward PTYForward; + +typedef enum PTYForwardFlags { + PTY_FORWARD_READ_ONLY = 1, + + /* Continue reading after hangup? */ + PTY_FORWARD_IGNORE_VHANGUP = 2, + + /* Continue reading after hangup but only if we never read anything else? */ + PTY_FORWARD_IGNORE_INITIAL_VHANGUP = 4, +} PTYForwardFlags; + +typedef int (*PTYForwardHandler)(PTYForward *f, int rcode, void *userdata); + +int pty_forward_new(sd_event *event, int master, PTYForwardFlags flags, PTYForward **f); +PTYForward *pty_forward_free(PTYForward *f); + +int pty_forward_get_last_char(PTYForward *f, char *ch); + +int pty_forward_set_ignore_vhangup(PTYForward *f, bool ignore_vhangup); +bool pty_forward_get_ignore_vhangup(PTYForward *f); + +bool pty_forward_is_done(PTYForward *f); + +void pty_forward_set_handler(PTYForward *f, PTYForwardHandler handler, void *userdata); + +bool pty_forward_drain(PTYForward *f); + +int pty_forward_set_priority(PTYForward *f, int64_t priority); + +int pty_forward_set_width_height(PTYForward *f, unsigned width, unsigned height); + +DEFINE_TRIVIAL_CLEANUP_FUNC(PTYForward*, pty_forward_free); diff --git a/src/shared/reboot-util.c b/src/shared/reboot-util.c new file mode 100644 index 0000000..ca40159 --- /dev/null +++ b/src/shared/reboot-util.c @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "fileio.h" +#include "log.h" +#include "raw-reboot.h" +#include "reboot-util.h" +#include "string-util.h" +#include "umask-util.h" +#include "virt.h" + +int update_reboot_parameter_and_warn(const char *parameter) { + int r; + + if (isempty(parameter)) { + if (unlink("/run/systemd/reboot-param") < 0) { + if (errno == ENOENT) + return 0; + + return log_warning_errno(errno, "Failed to unlink reboot parameter file: %m"); + } + + return 0; + } + + RUN_WITH_UMASK(0022) { + r = write_string_file("/run/systemd/reboot-param", parameter, + WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_ATOMIC); + if (r < 0) + return log_warning_errno(r, "Failed to write reboot parameter file: %m"); + } + + return 0; +} + +int reboot_with_parameter(RebootFlags flags) { + int r; + + /* Reboots the system with a parameter that is read from /run/systemd/reboot-param. Returns 0 if REBOOT_DRY_RUN + * was set and the actual reboot operation was hence skipped. If REBOOT_FALLBACK is set and the reboot with + * parameter doesn't work out a fallback to classic reboot() is attempted. If REBOOT_FALLBACK is not set, 0 is + * returned instead, which should be considered indication for the caller to fall back to reboot() on its own, + * or somehow else deal with this. If REBOOT_LOG is specified will log about what it is going to do, as well as + * all errors. */ + + if (detect_container() == 0) { + _cleanup_free_ char *parameter = NULL; + + r = read_one_line_file("/run/systemd/reboot-param", ¶meter); + if (r < 0 && r != -ENOENT) + log_full_errno(flags & REBOOT_LOG ? LOG_WARNING : LOG_DEBUG, r, + "Failed to read reboot parameter file, ignoring: %m"); + + if (!isempty(parameter)) { + + log_full(flags & REBOOT_LOG ? LOG_INFO : LOG_DEBUG, + "Rebooting with argument '%s'.", parameter); + + if (flags & REBOOT_DRY_RUN) + return 0; + + (void) raw_reboot(LINUX_REBOOT_CMD_RESTART2, parameter); + + log_full_errno(flags & REBOOT_LOG ? LOG_WARNING : LOG_DEBUG, errno, + "Failed to reboot with parameter, retrying without: %m"); + } + } + + if (!(flags & REBOOT_FALLBACK)) + return 0; + + log_full(flags & REBOOT_LOG ? LOG_INFO : LOG_DEBUG, "Rebooting."); + + if (flags & REBOOT_DRY_RUN) + return 0; + + (void) reboot(RB_AUTOBOOT); + + return log_full_errno(flags & REBOOT_LOG ? LOG_ERR : LOG_DEBUG, errno, "Failed to reboot: %m"); +} diff --git a/src/shared/reboot-util.h b/src/shared/reboot-util.h new file mode 100644 index 0000000..d459333 --- /dev/null +++ b/src/shared/reboot-util.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +int update_reboot_parameter_and_warn(const char *parameter); + +typedef enum RebootFlags { + REBOOT_LOG = 1 << 0, /* log about what we are going to do and all errors */ + REBOOT_DRY_RUN = 1 << 1, /* return 0 right before actually doing the reboot */ + REBOOT_FALLBACK = 1 << 2, /* fallback to plain reboot() if argument-based reboot doesn't work, isn't configured or doesn't apply otherwise */ +} RebootFlags; + +int reboot_with_parameter(RebootFlags flags); diff --git a/src/shared/resolve-util.c b/src/shared/resolve-util.c new file mode 100644 index 0000000..a5d4a14 --- /dev/null +++ b/src/shared/resolve-util.c @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "conf-parser.h" +#include "resolve-util.h" +#include "string-table.h" + +DEFINE_CONFIG_PARSE_ENUM(config_parse_resolve_support, resolve_support, ResolveSupport, "Failed to parse resolve support setting"); +DEFINE_CONFIG_PARSE_ENUM(config_parse_dnssec_mode, dnssec_mode, DnssecMode, "Failed to parse DNSSEC mode setting"); +DEFINE_CONFIG_PARSE_ENUM(config_parse_dns_over_tls_mode, dns_over_tls_mode, DnsOverTlsMode, "Failed to parse DNS-over-TLS mode setting"); + +static const char* const resolve_support_table[_RESOLVE_SUPPORT_MAX] = { + [RESOLVE_SUPPORT_NO] = "no", + [RESOLVE_SUPPORT_YES] = "yes", + [RESOLVE_SUPPORT_RESOLVE] = "resolve", +}; +DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(resolve_support, ResolveSupport, RESOLVE_SUPPORT_YES); + +static const char* const dnssec_mode_table[_DNSSEC_MODE_MAX] = { + [DNSSEC_NO] = "no", + [DNSSEC_ALLOW_DOWNGRADE] = "allow-downgrade", + [DNSSEC_YES] = "yes", +}; +DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dnssec_mode, DnssecMode, DNSSEC_YES); + +static const char* const dns_over_tls_mode_table[_DNS_OVER_TLS_MODE_MAX] = { + [DNS_OVER_TLS_NO] = "no", + [DNS_OVER_TLS_OPPORTUNISTIC] = "opportunistic", +}; +DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(dns_over_tls_mode, DnsOverTlsMode, _DNS_OVER_TLS_MODE_INVALID); diff --git a/src/shared/resolve-util.h b/src/shared/resolve-util.h new file mode 100644 index 0000000..5883342 --- /dev/null +++ b/src/shared/resolve-util.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include "conf-parser.h" +#include "macro.h" + +typedef enum ResolveSupport ResolveSupport; +typedef enum DnssecMode DnssecMode; +typedef enum DnsOverTlsMode DnsOverTlsMode; + +enum ResolveSupport { + RESOLVE_SUPPORT_NO, + RESOLVE_SUPPORT_YES, + RESOLVE_SUPPORT_RESOLVE, + _RESOLVE_SUPPORT_MAX, + _RESOLVE_SUPPORT_INVALID = -1 +}; + +enum DnssecMode { + /* No DNSSEC validation is done */ + DNSSEC_NO, + + /* Validate locally, if the server knows DO, but if not, + * don't. Don't trust the AD bit. If the server doesn't do + * DNSSEC properly, downgrade to non-DNSSEC operation. Of + * course, we then are vulnerable to a downgrade attack, but + * that's life and what is configured. */ + DNSSEC_ALLOW_DOWNGRADE, + + /* Insist on DNSSEC server support, and rather fail than downgrading. */ + DNSSEC_YES, + + _DNSSEC_MODE_MAX, + _DNSSEC_MODE_INVALID = -1 +}; + +enum DnsOverTlsMode { + /* No connection is made for DNS-over-TLS */ + DNS_OVER_TLS_NO, + + /* Try to connect using DNS-over-TLS, but if connection fails, + * fallback to using an unencrypted connection */ + DNS_OVER_TLS_OPPORTUNISTIC, + + _DNS_OVER_TLS_MODE_MAX, + _DNS_OVER_TLS_MODE_INVALID = -1 +}; + +CONFIG_PARSER_PROTOTYPE(config_parse_resolve_support); +CONFIG_PARSER_PROTOTYPE(config_parse_dnssec_mode); +CONFIG_PARSER_PROTOTYPE(config_parse_dns_over_tls_mode); + +const char* resolve_support_to_string(ResolveSupport p) _const_; +ResolveSupport resolve_support_from_string(const char *s) _pure_; + +const char* dnssec_mode_to_string(DnssecMode p) _const_; +DnssecMode dnssec_mode_from_string(const char *s) _pure_; + +const char* dns_over_tls_mode_to_string(DnsOverTlsMode p) _const_; +DnsOverTlsMode dns_over_tls_mode_from_string(const char *s) _pure_; diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c new file mode 100644 index 0000000..cc58b3c --- /dev/null +++ b/src/shared/seccomp-util.c @@ -0,0 +1,1764 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <linux/seccomp.h> +#include <seccomp.h> +#include <stddef.h> +#include <sys/mman.h> +#include <sys/prctl.h> +#include <sys/shm.h> + +#include "af-list.h" +#include "alloc-util.h" +#include "macro.h" +#include "nsflags.h" +#include "process-util.h" +#include "seccomp-util.h" +#include "set.h" +#include "string-util.h" +#include "strv.h" +#include "util.h" +#include "errno-list.h" + +const uint32_t seccomp_local_archs[] = { + + /* Note: always list the native arch we are compiled as last, so that users can blacklist seccomp(), but our own calls to it still succeed */ + +#if defined(__x86_64__) && defined(__ILP32__) + SCMP_ARCH_X86, + SCMP_ARCH_X86_64, + SCMP_ARCH_X32, /* native */ +#elif defined(__x86_64__) && !defined(__ILP32__) + SCMP_ARCH_X86, + SCMP_ARCH_X32, + SCMP_ARCH_X86_64, /* native */ +#elif defined(__i386__) + SCMP_ARCH_X86, +#elif defined(__aarch64__) + SCMP_ARCH_ARM, + SCMP_ARCH_AARCH64, /* native */ +#elif defined(__arm__) + SCMP_ARCH_ARM, +#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI32 + SCMP_ARCH_MIPSEL, + SCMP_ARCH_MIPS, /* native */ +#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI32 + SCMP_ARCH_MIPS, + SCMP_ARCH_MIPSEL, /* native */ +#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI64 + SCMP_ARCH_MIPSEL, + SCMP_ARCH_MIPS, + SCMP_ARCH_MIPSEL64N32, + SCMP_ARCH_MIPS64N32, + SCMP_ARCH_MIPSEL64, + SCMP_ARCH_MIPS64, /* native */ +#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_ABI64 + SCMP_ARCH_MIPS, + SCMP_ARCH_MIPSEL, + SCMP_ARCH_MIPS64N32, + SCMP_ARCH_MIPSEL64N32, + SCMP_ARCH_MIPS64, + SCMP_ARCH_MIPSEL64, /* native */ +#elif defined(__mips__) && __BYTE_ORDER == __BIG_ENDIAN && _MIPS_SIM == _MIPS_SIM_NABI32 + SCMP_ARCH_MIPSEL, + SCMP_ARCH_MIPS, + SCMP_ARCH_MIPSEL64, + SCMP_ARCH_MIPS64, + SCMP_ARCH_MIPSEL64N32, + SCMP_ARCH_MIPS64N32, /* native */ +#elif defined(__mips__) && __BYTE_ORDER == __LITTLE_ENDIAN && _MIPS_SIM == _MIPS_SIM_NABI32 + SCMP_ARCH_MIPS, + SCMP_ARCH_MIPSEL, + SCMP_ARCH_MIPS64, + SCMP_ARCH_MIPSEL64, + SCMP_ARCH_MIPS64N32, + SCMP_ARCH_MIPSEL64N32, /* native */ +#elif defined(__powerpc64__) && __BYTE_ORDER == __BIG_ENDIAN + SCMP_ARCH_PPC, + SCMP_ARCH_PPC64LE, + SCMP_ARCH_PPC64, /* native */ +#elif defined(__powerpc64__) && __BYTE_ORDER == __LITTLE_ENDIAN + SCMP_ARCH_PPC, + SCMP_ARCH_PPC64, + SCMP_ARCH_PPC64LE, /* native */ +#elif defined(__powerpc__) + SCMP_ARCH_PPC, +#elif defined(__s390x__) + SCMP_ARCH_S390, + SCMP_ARCH_S390X, /* native */ +#elif defined(__s390__) + SCMP_ARCH_S390, +#endif + (uint32_t) -1 + }; + +const char* seccomp_arch_to_string(uint32_t c) { + /* Maintain order used in <seccomp.h>. + * + * Names used here should be the same as those used for ConditionArchitecture=, + * except for "subarchitectures" like x32. */ + + switch(c) { + case SCMP_ARCH_NATIVE: + return "native"; + case SCMP_ARCH_X86: + return "x86"; + case SCMP_ARCH_X86_64: + return "x86-64"; + case SCMP_ARCH_X32: + return "x32"; + case SCMP_ARCH_ARM: + return "arm"; + case SCMP_ARCH_AARCH64: + return "arm64"; + case SCMP_ARCH_MIPS: + return "mips"; + case SCMP_ARCH_MIPS64: + return "mips64"; + case SCMP_ARCH_MIPS64N32: + return "mips64-n32"; + case SCMP_ARCH_MIPSEL: + return "mips-le"; + case SCMP_ARCH_MIPSEL64: + return "mips64-le"; + case SCMP_ARCH_MIPSEL64N32: + return "mips64-le-n32"; + case SCMP_ARCH_PPC: + return "ppc"; + case SCMP_ARCH_PPC64: + return "ppc64"; + case SCMP_ARCH_PPC64LE: + return "ppc64-le"; + case SCMP_ARCH_S390: + return "s390"; + case SCMP_ARCH_S390X: + return "s390x"; + default: + return NULL; + } +} + +int seccomp_arch_from_string(const char *n, uint32_t *ret) { + if (!n) + return -EINVAL; + + assert(ret); + + if (streq(n, "native")) + *ret = SCMP_ARCH_NATIVE; + else if (streq(n, "x86")) + *ret = SCMP_ARCH_X86; + else if (streq(n, "x86-64")) + *ret = SCMP_ARCH_X86_64; + else if (streq(n, "x32")) + *ret = SCMP_ARCH_X32; + else if (streq(n, "arm")) + *ret = SCMP_ARCH_ARM; + else if (streq(n, "arm64")) + *ret = SCMP_ARCH_AARCH64; + else if (streq(n, "mips")) + *ret = SCMP_ARCH_MIPS; + else if (streq(n, "mips64")) + *ret = SCMP_ARCH_MIPS64; + else if (streq(n, "mips64-n32")) + *ret = SCMP_ARCH_MIPS64N32; + else if (streq(n, "mips-le")) + *ret = SCMP_ARCH_MIPSEL; + else if (streq(n, "mips64-le")) + *ret = SCMP_ARCH_MIPSEL64; + else if (streq(n, "mips64-le-n32")) + *ret = SCMP_ARCH_MIPSEL64N32; + else if (streq(n, "ppc")) + *ret = SCMP_ARCH_PPC; + else if (streq(n, "ppc64")) + *ret = SCMP_ARCH_PPC64; + else if (streq(n, "ppc64-le")) + *ret = SCMP_ARCH_PPC64LE; + else if (streq(n, "s390")) + *ret = SCMP_ARCH_S390; + else if (streq(n, "s390x")) + *ret = SCMP_ARCH_S390X; + else + return -EINVAL; + + return 0; +} + +int seccomp_init_for_arch(scmp_filter_ctx *ret, uint32_t arch, uint32_t default_action) { + scmp_filter_ctx seccomp; + int r; + + /* Much like seccomp_init(), but initializes the filter for one specific architecture only, without affecting + * any others. Also, turns off the NNP fiddling. */ + + seccomp = seccomp_init(default_action); + if (!seccomp) + return -ENOMEM; + + if (arch != SCMP_ARCH_NATIVE && + arch != seccomp_arch_native()) { + + r = seccomp_arch_remove(seccomp, seccomp_arch_native()); + if (r < 0) + goto finish; + + r = seccomp_arch_add(seccomp, arch); + if (r < 0) + goto finish; + + assert(seccomp_arch_exist(seccomp, arch) >= 0); + assert(seccomp_arch_exist(seccomp, SCMP_ARCH_NATIVE) == -EEXIST); + assert(seccomp_arch_exist(seccomp, seccomp_arch_native()) == -EEXIST); + } else { + assert(seccomp_arch_exist(seccomp, SCMP_ARCH_NATIVE) >= 0); + assert(seccomp_arch_exist(seccomp, seccomp_arch_native()) >= 0); + } + + r = seccomp_attr_set(seccomp, SCMP_FLTATR_ACT_BADARCH, SCMP_ACT_ALLOW); + if (r < 0) + goto finish; + + r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); + if (r < 0) + goto finish; + + *ret = seccomp; + return 0; + +finish: + seccomp_release(seccomp); + return r; +} + +static bool is_basic_seccomp_available(void) { + return prctl(PR_GET_SECCOMP, 0, 0, 0, 0) >= 0; +} + +static bool is_seccomp_filter_available(void) { + return prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, 0, 0) < 0 && + errno == EFAULT; +} + +bool is_seccomp_available(void) { + static int cached_enabled = -1; + + if (cached_enabled < 0) + cached_enabled = + is_basic_seccomp_available() && + is_seccomp_filter_available(); + + return cached_enabled; +} + +const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = { + [SYSCALL_FILTER_SET_DEFAULT] = { + .name = "@default", + .help = "System calls that are always permitted", + .value = + "clock_getres\0" + "clock_gettime\0" + "clock_nanosleep\0" + "execve\0" + "exit\0" + "exit_group\0" + "futex\0" + "get_robust_list\0" + "get_thread_area\0" + "getegid\0" + "getegid32\0" + "geteuid\0" + "geteuid32\0" + "getgid\0" + "getgid32\0" + "getgroups\0" + "getgroups32\0" + "getpgid\0" + "getpgrp\0" + "getpid\0" + "getppid\0" + "getresgid\0" + "getresgid32\0" + "getresuid\0" + "getresuid32\0" + "getrlimit\0" /* make sure processes can query stack size and such */ + "getsid\0" + "gettid\0" + "gettimeofday\0" + "getuid\0" + "getuid32\0" + "membarrier\0" + "nanosleep\0" + "pause\0" + "prlimit64\0" + "restart_syscall\0" + "rt_sigreturn\0" + "sched_yield\0" + "set_robust_list\0" + "set_thread_area\0" + "set_tid_address\0" + "set_tls\0" + "sigreturn\0" + "time\0" + "ugetrlimit\0" + }, + [SYSCALL_FILTER_SET_AIO] = { + .name = "@aio", + .help = "Asynchronous IO", + .value = + "io_cancel\0" + "io_destroy\0" + "io_getevents\0" + "io_pgetevents\0" + "io_setup\0" + "io_submit\0" + }, + [SYSCALL_FILTER_SET_BASIC_IO] = { + .name = "@basic-io", + .help = "Basic IO", + .value = + "_llseek\0" + "close\0" + "dup\0" + "dup2\0" + "dup3\0" + "lseek\0" + "pread64\0" + "preadv\0" + "preadv2\0" + "pwrite64\0" + "pwritev\0" + "pwritev2\0" + "read\0" + "readv\0" + "write\0" + "writev\0" + }, + [SYSCALL_FILTER_SET_CHOWN] = { + .name = "@chown", + .help = "Change ownership of files and directories", + .value = + "chown\0" + "chown32\0" + "fchown\0" + "fchown32\0" + "fchownat\0" + "lchown\0" + "lchown32\0" + }, + [SYSCALL_FILTER_SET_CLOCK] = { + .name = "@clock", + .help = "Change the system time", + .value = + "adjtimex\0" + "clock_adjtime\0" + "clock_settime\0" + "settimeofday\0" + "stime\0" + }, + [SYSCALL_FILTER_SET_CPU_EMULATION] = { + .name = "@cpu-emulation", + .help = "System calls for CPU emulation functionality", + .value = + "modify_ldt\0" + "subpage_prot\0" + "switch_endian\0" + "vm86\0" + "vm86old\0" + }, + [SYSCALL_FILTER_SET_DEBUG] = { + .name = "@debug", + .help = "Debugging, performance monitoring and tracing functionality", + .value = + "lookup_dcookie\0" + "perf_event_open\0" + "ptrace\0" + "rtas\0" +#ifdef __NR_s390_runtime_instr + "s390_runtime_instr\0" +#endif + "sys_debug_setcontext\0" + }, + [SYSCALL_FILTER_SET_FILE_SYSTEM] = { + .name = "@file-system", + .help = "File system operations", + .value = + "access\0" + "chdir\0" + "chmod\0" + "close\0" + "creat\0" + "faccessat\0" + "fallocate\0" + "fchdir\0" + "fchmod\0" + "fchmodat\0" + "fcntl\0" + "fcntl64\0" + "fgetxattr\0" + "flistxattr\0" + "fremovexattr\0" + "fsetxattr\0" + "fstat\0" + "fstat64\0" + "fstatat64\0" + "fstatfs\0" + "fstatfs64\0" + "ftruncate\0" + "ftruncate64\0" + "futimesat\0" + "getcwd\0" + "getdents\0" + "getdents64\0" + "getxattr\0" + "inotify_add_watch\0" + "inotify_init\0" + "inotify_init1\0" + "inotify_rm_watch\0" + "lgetxattr\0" + "link\0" + "linkat\0" + "listxattr\0" + "llistxattr\0" + "lremovexattr\0" + "lsetxattr\0" + "lstat\0" + "lstat64\0" + "mkdir\0" + "mkdirat\0" + "mknod\0" + "mknodat\0" + "mmap\0" + "mmap2\0" + "munmap\0" + "newfstatat\0" + "oldfstat\0" + "oldlstat\0" + "oldstat\0" + "open\0" + "openat\0" + "readlink\0" + "readlinkat\0" + "removexattr\0" + "rename\0" + "renameat\0" + "renameat2\0" + "rmdir\0" + "setxattr\0" + "stat\0" + "stat64\0" + "statfs\0" + "statfs64\0" +#ifdef __NR_statx + "statx\0" +#endif + "symlink\0" + "symlinkat\0" + "truncate\0" + "truncate64\0" + "unlink\0" + "unlinkat\0" + "utime\0" + "utimensat\0" + "utimes\0" + }, + [SYSCALL_FILTER_SET_IO_EVENT] = { + .name = "@io-event", + .help = "Event loop system calls", + .value = + "_newselect\0" + "epoll_create\0" + "epoll_create1\0" + "epoll_ctl\0" + "epoll_ctl_old\0" + "epoll_pwait\0" + "epoll_wait\0" + "epoll_wait_old\0" + "eventfd\0" + "eventfd2\0" + "poll\0" + "ppoll\0" + "pselect6\0" + "select\0" + }, + [SYSCALL_FILTER_SET_IPC] = { + .name = "@ipc", + .help = "SysV IPC, POSIX Message Queues or other IPC", + .value = + "ipc\0" + "memfd_create\0" + "mq_getsetattr\0" + "mq_notify\0" + "mq_open\0" + "mq_timedreceive\0" + "mq_timedsend\0" + "mq_unlink\0" + "msgctl\0" + "msgget\0" + "msgrcv\0" + "msgsnd\0" + "pipe\0" + "pipe2\0" + "process_vm_readv\0" + "process_vm_writev\0" + "semctl\0" + "semget\0" + "semop\0" + "semtimedop\0" + "shmat\0" + "shmctl\0" + "shmdt\0" + "shmget\0" + }, + [SYSCALL_FILTER_SET_KEYRING] = { + .name = "@keyring", + .help = "Kernel keyring access", + .value = + "add_key\0" + "keyctl\0" + "request_key\0" + }, + [SYSCALL_FILTER_SET_MEMLOCK] = { + .name = "@memlock", + .help = "Memory locking control", + .value = + "mlock\0" + "mlock2\0" + "mlockall\0" + "munlock\0" + "munlockall\0" + }, + [SYSCALL_FILTER_SET_MODULE] = { + .name = "@module", + .help = "Loading and unloading of kernel modules", + .value = + "delete_module\0" + "finit_module\0" + "init_module\0" + }, + [SYSCALL_FILTER_SET_MOUNT] = { + .name = "@mount", + .help = "Mounting and unmounting of file systems", + .value = + "chroot\0" + "mount\0" + "pivot_root\0" + "umount\0" + "umount2\0" + }, + [SYSCALL_FILTER_SET_NETWORK_IO] = { + .name = "@network-io", + .help = "Network or Unix socket IO, should not be needed if not network facing", + .value = + "accept\0" + "accept4\0" + "bind\0" + "connect\0" + "getpeername\0" + "getsockname\0" + "getsockopt\0" + "listen\0" + "recv\0" + "recvfrom\0" + "recvmmsg\0" + "recvmsg\0" + "send\0" + "sendmmsg\0" + "sendmsg\0" + "sendto\0" + "setsockopt\0" + "shutdown\0" + "socket\0" + "socketcall\0" + "socketpair\0" + }, + [SYSCALL_FILTER_SET_OBSOLETE] = { + /* some unknown even to libseccomp */ + .name = "@obsolete", + .help = "Unusual, obsolete or unimplemented system calls", + .value = + "_sysctl\0" + "afs_syscall\0" + "bdflush\0" + "break\0" + "create_module\0" + "ftime\0" + "get_kernel_syms\0" + "getpmsg\0" + "gtty\0" + "idle\0" + "lock\0" + "mpx\0" + "prof\0" + "profil\0" + "putpmsg\0" + "query_module\0" + "security\0" + "sgetmask\0" + "ssetmask\0" + "stty\0" + "sysfs\0" + "tuxcall\0" + "ulimit\0" + "uselib\0" + "ustat\0" + "vserver\0" + }, + [SYSCALL_FILTER_SET_PRIVILEGED] = { + .name = "@privileged", + .help = "All system calls which need super-user capabilities", + .value = + "@chown\0" + "@clock\0" + "@module\0" + "@raw-io\0" + "@reboot\0" + "@swap\0" + "_sysctl\0" + "acct\0" + "bpf\0" + "capset\0" + "chroot\0" + "fanotify_init\0" + "nfsservctl\0" + "open_by_handle_at\0" + "pivot_root\0" + "quotactl\0" + "setdomainname\0" + "setfsuid\0" + "setfsuid32\0" + "setgroups\0" + "setgroups32\0" + "sethostname\0" + "setresuid\0" + "setresuid32\0" + "setreuid\0" + "setreuid32\0" + "setuid\0" /* We list the explicit system calls here, as @setuid also includes setgid() which is not necessarily privileged */ + "setuid32\0" + "vhangup\0" + }, + [SYSCALL_FILTER_SET_PROCESS] = { + .name = "@process", + .help = "Process control, execution, namespaceing operations", + .value = + "arch_prctl\0" + "capget\0" /* Able to query arbitrary processes */ + "clone\0" + "execveat\0" + "fork\0" + "getrusage\0" + "kill\0" + "prctl\0" + "rt_sigqueueinfo\0" + "rt_tgsigqueueinfo\0" + "setns\0" + "swapcontext\0" /* Some archs e.g. powerpc32 are using it to do userspace context switches */ + "tgkill\0" + "times\0" + "tkill\0" + "unshare\0" + "vfork\0" + "wait4\0" + "waitid\0" + "waitpid\0" + }, + [SYSCALL_FILTER_SET_RAW_IO] = { + .name = "@raw-io", + .help = "Raw I/O port access", + .value = + "ioperm\0" + "iopl\0" + "pciconfig_iobase\0" + "pciconfig_read\0" + "pciconfig_write\0" +#ifdef __NR_s390_pci_mmio_read + "s390_pci_mmio_read\0" +#endif +#ifdef __NR_s390_pci_mmio_write + "s390_pci_mmio_write\0" +#endif + }, + [SYSCALL_FILTER_SET_REBOOT] = { + .name = "@reboot", + .help = "Reboot and reboot preparation/kexec", + .value = + "kexec_file_load\0" + "kexec_load\0" + "reboot\0" + }, + [SYSCALL_FILTER_SET_RESOURCES] = { + .name = "@resources", + .help = "Alter resource settings", + .value = + "ioprio_set\0" + "mbind\0" + "migrate_pages\0" + "move_pages\0" + "nice\0" + "sched_setaffinity\0" + "sched_setattr\0" + "sched_setparam\0" + "sched_setscheduler\0" + "set_mempolicy\0" + "setpriority\0" + "setrlimit\0" + }, + [SYSCALL_FILTER_SET_SETUID] = { + .name = "@setuid", + .help = "Operations for changing user/group credentials", + .value = + "setgid\0" + "setgid32\0" + "setgroups\0" + "setgroups32\0" + "setregid\0" + "setregid32\0" + "setresgid\0" + "setresgid32\0" + "setresuid\0" + "setresuid32\0" + "setreuid\0" + "setreuid32\0" + "setuid\0" + "setuid32\0" + }, + [SYSCALL_FILTER_SET_SIGNAL] = { + .name = "@signal", + .help = "Process signal handling", + .value = + "rt_sigaction\0" + "rt_sigpending\0" + "rt_sigprocmask\0" + "rt_sigsuspend\0" + "rt_sigtimedwait\0" + "sigaction\0" + "sigaltstack\0" + "signal\0" + "signalfd\0" + "signalfd4\0" + "sigpending\0" + "sigprocmask\0" + "sigsuspend\0" + }, + [SYSCALL_FILTER_SET_SWAP] = { + .name = "@swap", + .help = "Enable/disable swap devices", + .value = + "swapoff\0" + "swapon\0" + }, + [SYSCALL_FILTER_SET_SYNC] = { + .name = "@sync", + .help = "Synchronize files and memory to storage", + .value = + "fdatasync\0" + "fsync\0" + "msync\0" + "sync\0" + "sync_file_range\0" + "syncfs\0" + }, + [SYSCALL_FILTER_SET_SYSTEM_SERVICE] = { + .name = "@system-service", + .help = "General system service operations", + .value = + "@aio\0" + "@basic-io\0" + "@chown\0" + "@default\0" + "@file-system\0" + "@io-event\0" + "@ipc\0" + "@keyring\0" + "@memlock\0" + "@network-io\0" + "@process\0" + "@resources\0" + "@setuid\0" + "@signal\0" + "@sync\0" + "@timer\0" + "brk\0" + "capget\0" + "capset\0" + "copy_file_range\0" + "fadvise64\0" + "fadvise64_64\0" + "flock\0" + "get_mempolicy\0" + "getcpu\0" + "getpriority\0" + "getrandom\0" + "ioctl\0" + "ioprio_get\0" + "kcmp\0" + "madvise\0" + "mprotect\0" + "mremap\0" + "name_to_handle_at\0" + "oldolduname\0" + "olduname\0" + "personality\0" + "readahead\0" + "readdir\0" + "remap_file_pages\0" + "sched_get_priority_max\0" + "sched_get_priority_min\0" + "sched_getaffinity\0" + "sched_getattr\0" + "sched_getparam\0" + "sched_getscheduler\0" + "sched_rr_get_interval\0" + "sched_yield\0" + "sendfile\0" + "sendfile64\0" + "setfsgid\0" + "setfsgid32\0" + "setfsuid\0" + "setfsuid32\0" + "setpgid\0" + "setsid\0" + "splice\0" + "sysinfo\0" + "tee\0" + "umask\0" + "uname\0" + "userfaultfd\0" + "vmsplice\0" + }, + [SYSCALL_FILTER_SET_TIMER] = { + .name = "@timer", + .help = "Schedule operations by time", + .value = + "alarm\0" + "getitimer\0" + "setitimer\0" + "timer_create\0" + "timer_delete\0" + "timer_getoverrun\0" + "timer_gettime\0" + "timer_settime\0" + "timerfd_create\0" + "timerfd_gettime\0" + "timerfd_settime\0" + "times\0" + }, +}; + +const SyscallFilterSet *syscall_filter_set_find(const char *name) { + unsigned i; + + if (isempty(name) || name[0] != '@') + return NULL; + + for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++) + if (streq(syscall_filter_sets[i].name, name)) + return syscall_filter_sets + i; + + return NULL; +} + +static int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterSet *set, uint32_t action, char **exclude, bool log_missing); + +int seccomp_add_syscall_filter_item(scmp_filter_ctx *seccomp, const char *name, uint32_t action, char **exclude, bool log_missing) { + assert(seccomp); + assert(name); + + if (strv_contains(exclude, name)) + return 0; + + if (name[0] == '@') { + const SyscallFilterSet *other; + + other = syscall_filter_set_find(name); + if (!other) + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), + "Filter set %s is not known!", + name); + + return seccomp_add_syscall_filter_set(seccomp, other, action, exclude, log_missing); + + } else { + int id, r; + + id = seccomp_syscall_resolve_name(name); + if (id == __NR_SCMP_ERROR) { + if (log_missing) + log_debug("System call %s is not known, ignoring.", name); + return 0; + } + + r = seccomp_rule_add_exact(seccomp, action, id, 0); + if (r < 0) { + /* If the system call is not known on this architecture, then that's fine, let's ignore it */ + bool ignore = r == -EDOM; + + if (!ignore || log_missing) + log_debug_errno(r, "Failed to add rule for system call %s() / %d%s: %m", + name, id, ignore ? ", ignoring" : ""); + if (!ignore) + return r; + } + + return 0; + } +} + +static int seccomp_add_syscall_filter_set( + scmp_filter_ctx seccomp, + const SyscallFilterSet *set, + uint32_t action, + char **exclude, + bool log_missing) { + + const char *sys; + int r; + + assert(seccomp); + assert(set); + + NULSTR_FOREACH(sys, set->value) { + r = seccomp_add_syscall_filter_item(seccomp, sys, action, exclude, log_missing); + if (r < 0) + return r; + } + + return 0; +} + +int seccomp_load_syscall_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action, bool log_missing) { + uint32_t arch; + int r; + + assert(set); + + /* The one-stop solution: allocate a seccomp object, add the specified filter to it, and apply it. Once for + * each local arch. */ + + SECCOMP_FOREACH_LOCAL_ARCH(arch) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + + log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch)); + + r = seccomp_init_for_arch(&seccomp, arch, default_action); + if (r < 0) + return r; + + r = seccomp_add_syscall_filter_set(seccomp, set, action, NULL, log_missing); + if (r < 0) + return log_debug_errno(r, "Failed to add filter set: %m"); + + r = seccomp_load(seccomp); + if (IN_SET(r, -EPERM, -EACCES)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + } + + return 0; +} + +int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Hashmap* set, uint32_t action, bool log_missing) { + uint32_t arch; + int r; + + /* Similar to seccomp_load_syscall_filter_set(), but takes a raw Set* of syscalls, instead of a + * SyscallFilterSet* table. */ + + if (hashmap_isempty(set) && default_action == SCMP_ACT_ALLOW) + return 0; + + SECCOMP_FOREACH_LOCAL_ARCH(arch) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + Iterator i; + void *syscall_id, *val; + + log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch)); + + r = seccomp_init_for_arch(&seccomp, arch, default_action); + if (r < 0) + return r; + + HASHMAP_FOREACH_KEY(val, syscall_id, set, i) { + uint32_t a = action; + int id = PTR_TO_INT(syscall_id) - 1; + int error = PTR_TO_INT(val); + + if (action != SCMP_ACT_ALLOW && error >= 0) + a = SCMP_ACT_ERRNO(error); + + r = seccomp_rule_add_exact(seccomp, a, id, 0); + if (r < 0) { + /* If the system call is not known on this architecture, then that's fine, let's ignore it */ + _cleanup_free_ char *n = NULL; + bool ignore; + + n = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, id); + ignore = r == -EDOM; + if (!ignore || log_missing) + log_debug_errno(r, "Failed to add rule for system call %s() / %d%s: %m", + strna(n), id, ignore ? ", ignoring" : ""); + if (!ignore) + return r; + } + } + + r = seccomp_load(seccomp); + if (IN_SET(r, -EPERM, -EACCES)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to install filter set for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + } + + return 0; +} + +int seccomp_parse_syscall_filter_full( + const char *name, + int errno_num, + Hashmap *filter, + SeccompParseFlags flags, + const char *unit, + const char *filename, + unsigned line) { + + int r; + + assert(name); + assert(filter); + + if (name[0] == '@') { + const SyscallFilterSet *set; + const char *i; + + set = syscall_filter_set_find(name); + if (!set) { + if (!(flags & SECCOMP_PARSE_PERMISSIVE)) + return -EINVAL; + + log_syntax(unit, flags & SECCOMP_PARSE_LOG ? LOG_WARNING : LOG_DEBUG, filename, line, 0, + "Unknown system call group, ignoring: %s", name); + return 0; + } + + NULSTR_FOREACH(i, set->value) { + /* Call ourselves again, for the group to parse. Note that we downgrade logging here (i.e. take + * away the SECCOMP_PARSE_LOG flag) since any issues in the group table are our own problem, + * not a problem in user configuration data and we shouldn't pretend otherwise by complaining + * about them. */ + r = seccomp_parse_syscall_filter_full(i, errno_num, filter, flags &~ SECCOMP_PARSE_LOG, unit, filename, line); + if (r < 0) + return r; + } + } else { + int id; + + id = seccomp_syscall_resolve_name(name); + if (id == __NR_SCMP_ERROR) { + if (!(flags & SECCOMP_PARSE_PERMISSIVE)) + return -EINVAL; + + log_syntax(unit, flags & SECCOMP_PARSE_LOG ? LOG_WARNING : LOG_DEBUG, filename, line, 0, + "Failed to parse system call, ignoring: %s", name); + return 0; + } + + /* If we previously wanted to forbid a syscall and now + * we want to allow it, then remove it from the list. */ + if (!(flags & SECCOMP_PARSE_INVERT) == !!(flags & SECCOMP_PARSE_WHITELIST)) { + r = hashmap_put(filter, INT_TO_PTR(id + 1), INT_TO_PTR(errno_num)); + if (r < 0) + switch (r) { + case -ENOMEM: + return flags & SECCOMP_PARSE_LOG ? log_oom() : -ENOMEM; + case -EEXIST: + assert_se(hashmap_update(filter, INT_TO_PTR(id + 1), INT_TO_PTR(errno_num)) == 0); + break; + default: + return r; + } + } else + (void) hashmap_remove(filter, INT_TO_PTR(id + 1)); + } + + return 0; +} + +int seccomp_restrict_namespaces(unsigned long retain) { + uint32_t arch; + int r; + + if (DEBUG_LOGGING) { + _cleanup_free_ char *s = NULL; + + (void) namespace_flags_to_string(retain, &s); + log_debug("Restricting namespace to: %s.", strna(s)); + } + + /* NOOP? */ + if ((retain & NAMESPACE_FLAGS_ALL) == NAMESPACE_FLAGS_ALL) + return 0; + + SECCOMP_FOREACH_LOCAL_ARCH(arch) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + unsigned i; + + log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch)); + + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); + if (r < 0) + return r; + + if ((retain & NAMESPACE_FLAGS_ALL) == 0) + /* If every single kind of namespace shall be prohibited, then let's block the whole setns() syscall + * altogether. */ + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(setns), + 0); + else + /* Otherwise, block only the invocations with the appropriate flags in the loop below, but also the + * special invocation with a zero flags argument, right here. */ + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(setns), + 1, + SCMP_A1(SCMP_CMP_EQ, 0)); + if (r < 0) { + log_debug_errno(r, "Failed to add setns() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } + + for (i = 0; namespace_flag_map[i].name; i++) { + unsigned long f; + + f = namespace_flag_map[i].flag; + if ((retain & f) == f) { + log_debug("Permitting %s.", namespace_flag_map[i].name); + continue; + } + + log_debug("Blocking %s.", namespace_flag_map[i].name); + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(unshare), + 1, + SCMP_A0(SCMP_CMP_MASKED_EQ, f, f)); + if (r < 0) { + log_debug_errno(r, "Failed to add unshare() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + break; + } + + /* On s390/s390x the first two parameters to clone are switched */ + if (!IN_SET(arch, SCMP_ARCH_S390, SCMP_ARCH_S390X)) + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(clone), + 1, + SCMP_A0(SCMP_CMP_MASKED_EQ, f, f)); + else + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(clone), + 1, + SCMP_A1(SCMP_CMP_MASKED_EQ, f, f)); + if (r < 0) { + log_debug_errno(r, "Failed to add clone() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + break; + } + + if ((retain & NAMESPACE_FLAGS_ALL) != 0) { + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(setns), + 1, + SCMP_A1(SCMP_CMP_MASKED_EQ, f, f)); + if (r < 0) { + log_debug_errno(r, "Failed to add setns() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + break; + } + } + } + if (r < 0) + continue; + + r = seccomp_load(seccomp); + if (IN_SET(r, -EPERM, -EACCES)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to install namespace restriction rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + } + + return 0; +} + +int seccomp_protect_sysctl(void) { + uint32_t arch; + int r; + + SECCOMP_FOREACH_LOCAL_ARCH(arch) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + + log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch)); + + if (IN_SET(arch, SCMP_ARCH_X32, SCMP_ARCH_AARCH64)) + /* No _sysctl syscall */ + continue; + + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); + if (r < 0) + return r; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(_sysctl), + 0); + if (r < 0) { + log_debug_errno(r, "Failed to add _sysctl() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } + + r = seccomp_load(seccomp); + if (IN_SET(r, -EPERM, -EACCES)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to install sysctl protection rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + } + + return 0; +} + +int seccomp_restrict_address_families(Set *address_families, bool whitelist) { + uint32_t arch; + int r; + + SECCOMP_FOREACH_LOCAL_ARCH(arch) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + bool supported; + Iterator i; + + log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch)); + + switch (arch) { + + case SCMP_ARCH_X86_64: + case SCMP_ARCH_X32: + case SCMP_ARCH_ARM: + case SCMP_ARCH_AARCH64: + case SCMP_ARCH_PPC: + case SCMP_ARCH_PPC64: + case SCMP_ARCH_PPC64LE: + case SCMP_ARCH_MIPSEL64N32: + case SCMP_ARCH_MIPS64N32: + case SCMP_ARCH_MIPSEL64: + case SCMP_ARCH_MIPS64: + /* These we know we support (i.e. are the ones that do not use socketcall()) */ + supported = true; + break; + + case SCMP_ARCH_S390: + case SCMP_ARCH_S390X: + case SCMP_ARCH_X86: + case SCMP_ARCH_MIPSEL: + case SCMP_ARCH_MIPS: + default: + /* These we either know we don't support (i.e. are the ones that do use socketcall()), or we + * don't know */ + supported = false; + break; + } + + if (!supported) + continue; + + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); + if (r < 0) + return r; + + if (whitelist) { + int af, first = 0, last = 0; + void *afp; + + /* If this is a whitelist, we first block the address families that are out of range and then + * everything that is not in the set. First, we find the lowest and highest address family in + * the set. */ + + SET_FOREACH(afp, address_families, i) { + af = PTR_TO_INT(afp); + + if (af <= 0 || af >= af_max()) + continue; + + if (first == 0 || af < first) + first = af; + + if (last == 0 || af > last) + last = af; + } + + assert((first == 0) == (last == 0)); + + if (first == 0) { + + /* No entries in the valid range, block everything */ + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EAFNOSUPPORT), + SCMP_SYS(socket), + 0); + if (r < 0) { + log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } + + } else { + + /* Block everything below the first entry */ + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EAFNOSUPPORT), + SCMP_SYS(socket), + 1, + SCMP_A0(SCMP_CMP_LT, first)); + if (r < 0) { + log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } + + /* Block everything above the last entry */ + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EAFNOSUPPORT), + SCMP_SYS(socket), + 1, + SCMP_A0(SCMP_CMP_GT, last)); + if (r < 0) { + log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } + + /* Block everything between the first and last entry */ + for (af = 1; af < af_max(); af++) { + + if (set_contains(address_families, INT_TO_PTR(af))) + continue; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EAFNOSUPPORT), + SCMP_SYS(socket), + 1, + SCMP_A0(SCMP_CMP_EQ, af)); + if (r < 0) + break; + } + if (r < 0) { + log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } + } + + } else { + void *af; + + /* If this is a blacklist, then generate one rule for + * each address family that are then combined in OR + * checks. */ + + SET_FOREACH(af, address_families, i) { + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EAFNOSUPPORT), + SCMP_SYS(socket), + 1, + SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af))); + if (r < 0) + break; + } + if (r < 0) { + log_debug_errno(r, "Failed to add socket() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } + } + + r = seccomp_load(seccomp); + if (IN_SET(r, -EPERM, -EACCES)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to install socket family rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + } + + return 0; +} + +int seccomp_restrict_realtime(void) { + static const int permitted_policies[] = { + SCHED_OTHER, + SCHED_BATCH, + SCHED_IDLE, + }; + + int r, max_policy = 0; + uint32_t arch; + unsigned i; + + /* Determine the highest policy constant we want to allow */ + for (i = 0; i < ELEMENTSOF(permitted_policies); i++) + if (permitted_policies[i] > max_policy) + max_policy = permitted_policies[i]; + + SECCOMP_FOREACH_LOCAL_ARCH(arch) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + int p; + + log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch)); + + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); + if (r < 0) + return r; + + /* Go through all policies with lower values than that, and block them -- unless they appear in the + * whitelist. */ + for (p = 0; p < max_policy; p++) { + bool good = false; + + /* Check if this is in the whitelist. */ + for (i = 0; i < ELEMENTSOF(permitted_policies); i++) + if (permitted_policies[i] == p) { + good = true; + break; + } + + if (good) + continue; + + /* Deny this policy */ + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(sched_setscheduler), + 1, + SCMP_A1(SCMP_CMP_EQ, p)); + if (r < 0) { + log_debug_errno(r, "Failed to add scheduler rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } + } + + /* Blacklist all other policies, i.e. the ones with higher values. Note that all comparisons are + * unsigned here, hence no need no check for < 0 values. */ + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(sched_setscheduler), + 1, + SCMP_A1(SCMP_CMP_GT, max_policy)); + if (r < 0) { + log_debug_errno(r, "Failed to add scheduler rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } + + r = seccomp_load(seccomp); + if (IN_SET(r, -EPERM, -EACCES)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to install realtime protection rules for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + } + + return 0; +} + +static int add_seccomp_syscall_filter(scmp_filter_ctx seccomp, + uint32_t arch, + int nr, + unsigned arg_cnt, + const struct scmp_arg_cmp arg) { + int r; + + r = seccomp_rule_add_exact(seccomp, SCMP_ACT_ERRNO(EPERM), nr, arg_cnt, arg); + if (r < 0) { + _cleanup_free_ char *n = NULL; + + n = seccomp_syscall_resolve_num_arch(arch, nr); + log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m", + strna(n), + seccomp_arch_to_string(arch)); + } + + return r; +} + +/* For known architectures, check that syscalls are indeed defined or not. */ +#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) +assert_cc(SCMP_SYS(shmget) > 0); +assert_cc(SCMP_SYS(shmat) > 0); +assert_cc(SCMP_SYS(shmdt) > 0); +#elif defined(__i386__) || defined(__powerpc64__) +assert_cc(SCMP_SYS(shmget) < 0); +assert_cc(SCMP_SYS(shmat) < 0); +assert_cc(SCMP_SYS(shmdt) < 0); +#endif + +int seccomp_memory_deny_write_execute(void) { + + uint32_t arch; + int r; + + SECCOMP_FOREACH_LOCAL_ARCH(arch) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + int filter_syscall = 0, block_syscall = 0, shmat_syscall = 0; + + log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch)); + + switch (arch) { + + case SCMP_ARCH_X86: + filter_syscall = SCMP_SYS(mmap2); + block_syscall = SCMP_SYS(mmap); + break; + + case SCMP_ARCH_PPC: + case SCMP_ARCH_PPC64: + case SCMP_ARCH_PPC64LE: + filter_syscall = SCMP_SYS(mmap); + + /* Note that shmat() isn't available, and the call is multiplexed through ipc(). + * We ignore that here, which means there's still a way to get writable/executable + * memory, if an IPC key is mapped like this. That's a pity, but no total loss. */ + + break; + + case SCMP_ARCH_ARM: + filter_syscall = SCMP_SYS(mmap2); /* arm has only mmap2 */ + shmat_syscall = SCMP_SYS(shmat); + break; + + case SCMP_ARCH_X86_64: + case SCMP_ARCH_X32: + case SCMP_ARCH_AARCH64: + filter_syscall = SCMP_SYS(mmap); /* amd64, x32, and arm64 have only mmap */ + shmat_syscall = SCMP_SYS(shmat); + break; + + /* Please add more definitions here, if you port systemd to other architectures! */ + +#if !defined(__i386__) && !defined(__x86_64__) && !defined(__powerpc__) && !defined(__powerpc64__) && !defined(__arm__) && !defined(__aarch64__) +#warning "Consider adding the right mmap() syscall definitions here!" +#endif + } + + /* Can't filter mmap() on this arch, then skip it */ + if (filter_syscall == 0) + continue; + + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); + if (r < 0) + return r; + + r = add_seccomp_syscall_filter(seccomp, arch, filter_syscall, + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE)); + if (r < 0) + continue; + + if (block_syscall != 0) { + r = add_seccomp_syscall_filter(seccomp, arch, block_syscall, 0, (const struct scmp_arg_cmp){} ); + if (r < 0) + continue; + } + + r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(mprotect), + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC)); + if (r < 0) + continue; + +#ifdef __NR_pkey_mprotect + r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(pkey_mprotect), + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC)); + if (r < 0) + continue; +#endif + + if (shmat_syscall != 0) { + r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(shmat), + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC)); + if (r < 0) + continue; + } + + r = seccomp_load(seccomp); + if (IN_SET(r, -EPERM, -EACCES)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to install MemoryDenyWriteExecute= rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + } + + return 0; +} + +int seccomp_restrict_archs(Set *archs) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + Iterator i; + void *id; + int r; + + /* This installs a filter with no rules, but that restricts the system call architectures to the specified + * list. + * + * There are some qualifications. However the most important use is to stop processes from bypassing + * system call restrictions, in case they used a broader (multiplexing) syscall which is only available + * in a non-native architecture. There are no holes in this use case, at least so far. */ + + /* Note libseccomp includes our "native" (current) architecture in the filter by default. + * We do not remove it. For example, our callers expect to be able to call execve() afterwards + * to run a program with the restrictions applied. */ + seccomp = seccomp_init(SCMP_ACT_ALLOW); + if (!seccomp) + return -ENOMEM; + + SET_FOREACH(id, archs, i) { + r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1); + if (r < 0 && r != -EEXIST) + return r; + } + + /* The vdso for x32 assumes that x86-64 syscalls are available. Let's allow them, since x32 + * x32 syscalls should basically match x86-64 for everything except the pointer type. + * The important thing is that you can block the old 32-bit x86 syscalls. + * https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=850047 */ + + if (seccomp_arch_native() == SCMP_ARCH_X32 || + set_contains(archs, UINT32_TO_PTR(SCMP_ARCH_X32 + 1))) { + + r = seccomp_arch_add(seccomp, SCMP_ARCH_X86_64); + if (r < 0 && r != -EEXIST) + return r; + } + + r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); + if (r < 0) + return r; + + r = seccomp_load(seccomp); + if (IN_SET(r, -EPERM, -EACCES)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to restrict system call architectures, skipping: %m"); + + return 0; +} + +int parse_syscall_archs(char **l, Set **archs) { + _cleanup_set_free_ Set *_archs; + char **s; + int r; + + assert(l); + assert(archs); + + r = set_ensure_allocated(&_archs, NULL); + if (r < 0) + return r; + + STRV_FOREACH(s, l) { + uint32_t a; + + r = seccomp_arch_from_string(*s, &a); + if (r < 0) + return -EINVAL; + + r = set_put(_archs, UINT32_TO_PTR(a + 1)); + if (r < 0) + return -ENOMEM; + } + + *archs = TAKE_PTR(_archs); + + return 0; +} + +int seccomp_filter_set_add(Hashmap *filter, bool add, const SyscallFilterSet *set) { + const char *i; + int r; + + assert(set); + + NULSTR_FOREACH(i, set->value) { + + if (i[0] == '@') { + const SyscallFilterSet *more; + + more = syscall_filter_set_find(i); + if (!more) + return -ENXIO; + + r = seccomp_filter_set_add(filter, add, more); + if (r < 0) + return r; + } else { + int id; + + id = seccomp_syscall_resolve_name(i); + if (id == __NR_SCMP_ERROR) { + log_debug("Couldn't resolve system call, ignoring: %s", i); + continue; + } + + if (add) { + r = hashmap_put(filter, INT_TO_PTR(id + 1), INT_TO_PTR(-1)); + if (r < 0) + return r; + } else + (void) hashmap_remove(filter, INT_TO_PTR(id + 1)); + } + } + + return 0; +} + +int seccomp_lock_personality(unsigned long personality) { + uint32_t arch; + int r; + + if (personality >= PERSONALITY_INVALID) + return -EINVAL; + + SECCOMP_FOREACH_LOCAL_ARCH(arch) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); + if (r < 0) + return r; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(personality), + 1, + SCMP_A0(SCMP_CMP_NE, personality)); + if (r < 0) { + log_debug_errno(r, "Failed to add scheduler rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } + + r = seccomp_load(seccomp); + if (IN_SET(r, -EPERM, -EACCES)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to enable personality lock for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + } + + return 0; +} diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h new file mode 100644 index 0000000..d8a36c4 --- /dev/null +++ b/src/shared/seccomp-util.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <seccomp.h> +#include <stdbool.h> +#include <stdint.h> + +#include "set.h" + +const char* seccomp_arch_to_string(uint32_t c); +int seccomp_arch_from_string(const char *n, uint32_t *ret); + +int seccomp_init_for_arch(scmp_filter_ctx *ret, uint32_t arch, uint32_t default_action); + +bool is_seccomp_available(void); + +typedef struct SyscallFilterSet { + const char *name; + const char *help; + const char *value; +} SyscallFilterSet; + +enum { + /* Please leave DEFAULT first, but sort the rest alphabetically */ + SYSCALL_FILTER_SET_DEFAULT, + SYSCALL_FILTER_SET_AIO, + SYSCALL_FILTER_SET_BASIC_IO, + SYSCALL_FILTER_SET_CHOWN, + SYSCALL_FILTER_SET_CLOCK, + SYSCALL_FILTER_SET_CPU_EMULATION, + SYSCALL_FILTER_SET_DEBUG, + SYSCALL_FILTER_SET_FILE_SYSTEM, + SYSCALL_FILTER_SET_IO_EVENT, + SYSCALL_FILTER_SET_IPC, + SYSCALL_FILTER_SET_KEYRING, + SYSCALL_FILTER_SET_MEMLOCK, + SYSCALL_FILTER_SET_MODULE, + SYSCALL_FILTER_SET_MOUNT, + SYSCALL_FILTER_SET_NETWORK_IO, + SYSCALL_FILTER_SET_OBSOLETE, + SYSCALL_FILTER_SET_PRIVILEGED, + SYSCALL_FILTER_SET_PROCESS, + SYSCALL_FILTER_SET_RAW_IO, + SYSCALL_FILTER_SET_REBOOT, + SYSCALL_FILTER_SET_RESOURCES, + SYSCALL_FILTER_SET_SETUID, + SYSCALL_FILTER_SET_SIGNAL, + SYSCALL_FILTER_SET_SWAP, + SYSCALL_FILTER_SET_SYNC, + SYSCALL_FILTER_SET_SYSTEM_SERVICE, + SYSCALL_FILTER_SET_TIMER, + _SYSCALL_FILTER_SET_MAX +}; + +extern const SyscallFilterSet syscall_filter_sets[]; + +const SyscallFilterSet *syscall_filter_set_find(const char *name); + +int seccomp_filter_set_add(Hashmap *s, bool b, const SyscallFilterSet *set); + +int seccomp_add_syscall_filter_item(scmp_filter_ctx *ctx, const char *name, uint32_t action, char **exclude, bool log_missing); + +int seccomp_load_syscall_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action, bool log_missing); +int seccomp_load_syscall_filter_set_raw(uint32_t default_action, Hashmap* set, uint32_t action, bool log_missing); + +typedef enum SeccompParseFlags { + SECCOMP_PARSE_INVERT = 1 << 0, + SECCOMP_PARSE_WHITELIST = 1 << 1, + SECCOMP_PARSE_LOG = 1 << 2, + SECCOMP_PARSE_PERMISSIVE = 1 << 3, +} SeccompParseFlags; + +int seccomp_parse_syscall_filter_full( + const char *name, int errno_num, Hashmap *filter, SeccompParseFlags flags, + const char *unit, const char *filename, unsigned line); + +static inline int seccomp_parse_syscall_filter(const char *name, int errno_num, Hashmap *filter, SeccompParseFlags flags) { + return seccomp_parse_syscall_filter_full(name, errno_num, filter, flags, NULL, NULL, 0); +} + +int seccomp_restrict_archs(Set *archs); +int seccomp_restrict_namespaces(unsigned long retain); +int seccomp_protect_sysctl(void); +int seccomp_restrict_address_families(Set *address_families, bool whitelist); +int seccomp_restrict_realtime(void); +int seccomp_memory_deny_write_execute(void); +int seccomp_lock_personality(unsigned long personality); + +extern const uint32_t seccomp_local_archs[]; + +#define SECCOMP_FOREACH_LOCAL_ARCH(arch) \ + for (unsigned _i = ({ (arch) = seccomp_local_archs[0]; 0; }); \ + seccomp_local_archs[_i] != (uint32_t) -1; \ + (arch) = seccomp_local_archs[++_i]) + +DEFINE_TRIVIAL_CLEANUP_FUNC(scmp_filter_ctx, seccomp_release); + +int parse_syscall_archs(char **l, Set **archs); diff --git a/src/shared/securebits-util.c b/src/shared/securebits-util.c new file mode 100644 index 0000000..6d31dfe --- /dev/null +++ b/src/shared/securebits-util.c @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stdio.h> + +#include "alloc-util.h" +#include "extract-word.h" +#include "securebits-util.h" +#include "string-util.h" + +int secure_bits_to_string_alloc(int i, char **s) { + _cleanup_free_ char *str = NULL; + size_t len; + int r; + + assert(s); + + r = asprintf(&str, "%s%s%s%s%s%s", + (i & (1 << SECURE_KEEP_CAPS)) ? "keep-caps " : "", + (i & (1 << SECURE_KEEP_CAPS_LOCKED)) ? "keep-caps-locked " : "", + (i & (1 << SECURE_NO_SETUID_FIXUP)) ? "no-setuid-fixup " : "", + (i & (1 << SECURE_NO_SETUID_FIXUP_LOCKED)) ? "no-setuid-fixup-locked " : "", + (i & (1 << SECURE_NOROOT)) ? "noroot " : "", + (i & (1 << SECURE_NOROOT_LOCKED)) ? "noroot-locked " : ""); + if (r < 0) + return -ENOMEM; + + len = strlen(str); + if (len != 0) + str[len - 1] = '\0'; + + *s = TAKE_PTR(str); + + return 0; +} + +int secure_bits_from_string(const char *s) { + int secure_bits = 0; + const char *p; + int r; + + for (p = s;;) { + _cleanup_free_ char *word = NULL; + + r = extract_first_word(&p, &word, NULL, EXTRACT_QUOTES); + if (r == -ENOMEM) + return r; + if (r <= 0) + break; + + if (streq(word, "keep-caps")) + secure_bits |= 1 << SECURE_KEEP_CAPS; + else if (streq(word, "keep-caps-locked")) + secure_bits |= 1 << SECURE_KEEP_CAPS_LOCKED; + else if (streq(word, "no-setuid-fixup")) + secure_bits |= 1 << SECURE_NO_SETUID_FIXUP; + else if (streq(word, "no-setuid-fixup-locked")) + secure_bits |= 1 << SECURE_NO_SETUID_FIXUP_LOCKED; + else if (streq(word, "noroot")) + secure_bits |= 1 << SECURE_NOROOT; + else if (streq(word, "noroot-locked")) + secure_bits |= 1 << SECURE_NOROOT_LOCKED; + } + + return secure_bits; +} diff --git a/src/shared/securebits-util.h b/src/shared/securebits-util.h new file mode 100644 index 0000000..b5ec6ee --- /dev/null +++ b/src/shared/securebits-util.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include "missing_securebits.h" + +int secure_bits_to_string_alloc(int i, char **s); +int secure_bits_from_string(const char *s); + +static inline bool secure_bits_is_valid(int i) { + return ((SECURE_ALL_BITS | SECURE_ALL_LOCKS) & i) == i; +} + +static inline int secure_bits_to_string_alloc_with_check(int n, char **s) { + if (!secure_bits_is_valid(n)) + return -EINVAL; + + return secure_bits_to_string_alloc(n, s); +} diff --git a/src/shared/serialize.c b/src/shared/serialize.c new file mode 100644 index 0000000..0333f87 --- /dev/null +++ b/src/shared/serialize.c @@ -0,0 +1,214 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <sys/mman.h> + +#include "alloc-util.h" +#include "env-util.h" +#include "escape.h" +#include "fileio.h" +#include "missing.h" +#include "parse-util.h" +#include "process-util.h" +#include "serialize.h" +#include "strv.h" +#include "tmpfile-util.h" + +int serialize_item(FILE *f, const char *key, const char *value) { + assert(f); + assert(key); + + if (!value) + return 0; + + /* Make sure that anything we serialize we can also read back again with read_line() with a maximum line size + * of LONG_LINE_MAX. This is a safety net only. All code calling us should filter this out earlier anyway. */ + if (strlen(key) + 1 + strlen(value) + 1 > LONG_LINE_MAX) { + log_warning("Attempted to serialize overly long item '%s', refusing.", key); + return -EINVAL; + } + + fputs(key, f); + fputc('=', f); + fputs(value, f); + fputc('\n', f); + + return 1; +} + +int serialize_item_escaped(FILE *f, const char *key, const char *value) { + _cleanup_free_ char *c = NULL; + + assert(f); + assert(key); + + if (!value) + return 0; + + c = cescape(value); + if (!c) + return log_oom(); + + return serialize_item(f, key, c); +} + +int serialize_item_format(FILE *f, const char *key, const char *format, ...) { + char buf[LONG_LINE_MAX]; + va_list ap; + int k; + + assert(f); + assert(key); + assert(format); + + va_start(ap, format); + k = vsnprintf(buf, sizeof(buf), format, ap); + va_end(ap); + + if (k < 0 || (size_t) k >= sizeof(buf) || strlen(key) + 1 + k + 1 > LONG_LINE_MAX) { + log_warning("Attempted to serialize overly long item '%s', refusing.", key); + return -EINVAL; + } + + fputs(key, f); + fputc('=', f); + fputs(buf, f); + fputc('\n', f); + + return 1; +} + +int serialize_fd(FILE *f, FDSet *fds, const char *key, int fd) { + int copy; + + assert(f); + assert(key); + + if (fd < 0) + return 0; + + copy = fdset_put_dup(fds, fd); + if (copy < 0) + return log_error_errno(copy, "Failed to add file descriptor to serialization set: %m"); + + return serialize_item_format(f, key, "%i", copy); +} + +int serialize_usec(FILE *f, const char *key, usec_t usec) { + assert(f); + assert(key); + + if (usec == USEC_INFINITY) + return 0; + + return serialize_item_format(f, key, USEC_FMT, usec); +} + +int serialize_dual_timestamp(FILE *f, const char *name, const dual_timestamp *t) { + assert(f); + assert(name); + assert(t); + + if (!dual_timestamp_is_set(t)) + return 0; + + return serialize_item_format(f, name, USEC_FMT " " USEC_FMT, t->realtime, t->monotonic); +} + +int serialize_strv(FILE *f, const char *key, char **l) { + int ret = 0, r; + char **i; + + /* Returns the first error, or positive if anything was serialized, 0 otherwise. */ + + STRV_FOREACH(i, l) { + r = serialize_item_escaped(f, key, *i); + if ((ret >= 0 && r < 0) || + (ret == 0 && r > 0)) + ret = r; + } + + return ret; +} + +int deserialize_usec(const char *value, usec_t *ret) { + int r; + + assert(value); + + r = safe_atou64(value, ret); + if (r < 0) + return log_debug_errno(r, "Failed to parse usec value \"%s\": %m", value); + + return 0; +} + +int deserialize_dual_timestamp(const char *value, dual_timestamp *t) { + uint64_t a, b; + int r, pos; + + assert(value); + assert(t); + + pos = strspn(value, WHITESPACE); + if (value[pos] == '-') + return -EINVAL; + pos += strspn(value + pos, DIGITS); + pos += strspn(value + pos, WHITESPACE); + if (value[pos] == '-') + return -EINVAL; + + r = sscanf(value, "%" PRIu64 "%" PRIu64 "%n", &a, &b, &pos); + if (r != 2) + return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to parse dual timestamp value \"%s\".", + value); + + if (value[pos] != '\0') + /* trailing garbage */ + return -EINVAL; + + t->realtime = a; + t->monotonic = b; + + return 0; +} + +int deserialize_environment(const char *value, char ***list) { + _cleanup_free_ char *unescaped = NULL; + int r; + + assert(value); + assert(list); + + /* Changes the *environment strv inline. */ + + r = cunescape(value, 0, &unescaped); + if (r < 0) + return log_error_errno(r, "Failed to unescape: %m"); + + r = strv_env_replace(list, unescaped); + if (r < 0) + return log_error_errno(r, "Failed to append environment variable: %m"); + + unescaped = NULL; /* now part of 'list' */ + return 0; +} + +int open_serialization_fd(const char *ident) { + int fd; + + fd = memfd_create(ident, MFD_CLOEXEC); + if (fd < 0) { + const char *path; + + path = getpid_cached() == 1 ? "/run/systemd" : "/tmp"; + fd = open_tmpfile_unlinkable(path, O_RDWR|O_CLOEXEC); + if (fd < 0) + return fd; + + log_debug("Serializing %s to %s.", ident, path); + } else + log_debug("Serializing %s to memfd.", ident); + + return fd; +} diff --git a/src/shared/serialize.h b/src/shared/serialize.h new file mode 100644 index 0000000..4cbd98b --- /dev/null +++ b/src/shared/serialize.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdio.h> + +#include "fdset.h" +#include "macro.h" + +int serialize_item(FILE *f, const char *key, const char *value); +int serialize_item_escaped(FILE *f, const char *key, const char *value); +int serialize_item_format(FILE *f, const char *key, const char *value, ...) _printf_(3,4); +int serialize_fd(FILE *f, FDSet *fds, const char *key, int fd); +int serialize_usec(FILE *f, const char *key, usec_t usec); +int serialize_dual_timestamp(FILE *f, const char *key, const dual_timestamp *t); +int serialize_strv(FILE *f, const char *key, char **l); + +static inline int serialize_bool(FILE *f, const char *key, bool b) { + return serialize_item(f, key, yes_no(b)); +} + +int deserialize_usec(const char *value, usec_t *timestamp); +int deserialize_dual_timestamp(const char *value, dual_timestamp *t); +int deserialize_environment(const char *value, char ***environment); + +int open_serialization_fd(const char *ident); diff --git a/src/shared/sleep-config.c b/src/shared/sleep-config.c new file mode 100644 index 0000000..2e22bd0 --- /dev/null +++ b/src/shared/sleep-config.c @@ -0,0 +1,436 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/*** + Copyright © 2018 Dell Inc. +***/ + +#include <errno.h> +#include <linux/fs.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <sys/ioctl.h> +#include <syslog.h> +#include <unistd.h> + +#include "alloc-util.h" +#include "conf-parser.h" +#include "def.h" +#include "env-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "log.h" +#include "macro.h" +#include "parse-util.h" +#include "path-util.h" +#include "sleep-config.h" +#include "string-util.h" +#include "strv.h" + +int parse_sleep_config(const char *verb, bool *ret_allow, char ***ret_modes, char ***ret_states, usec_t *ret_delay) { + int allow_suspend = -1, allow_hibernate = -1, + allow_s2h = -1, allow_hybrid_sleep = -1; + bool allow; + _cleanup_strv_free_ char + **suspend_mode = NULL, **suspend_state = NULL, + **hibernate_mode = NULL, **hibernate_state = NULL, + **hybrid_mode = NULL, **hybrid_state = NULL; + _cleanup_strv_free_ char **modes, **states; /* always initialized below */ + usec_t delay = 180 * USEC_PER_MINUTE; + + const ConfigTableItem items[] = { + { "Sleep", "AllowSuspend", config_parse_tristate, 0, &allow_suspend }, + { "Sleep", "AllowHibernation", config_parse_tristate, 0, &allow_hibernate }, + { "Sleep", "AllowSuspendThenHibernate", config_parse_tristate, 0, &allow_s2h }, + { "Sleep", "AllowHybridSleep", config_parse_tristate, 0, &allow_hybrid_sleep }, + + { "Sleep", "SuspendMode", config_parse_strv, 0, &suspend_mode }, + { "Sleep", "SuspendState", config_parse_strv, 0, &suspend_state }, + { "Sleep", "HibernateMode", config_parse_strv, 0, &hibernate_mode }, + { "Sleep", "HibernateState", config_parse_strv, 0, &hibernate_state }, + { "Sleep", "HybridSleepMode", config_parse_strv, 0, &hybrid_mode }, + { "Sleep", "HybridSleepState", config_parse_strv, 0, &hybrid_state }, + + { "Sleep", "HibernateDelaySec", config_parse_sec, 0, &delay}, + {} + }; + + (void) config_parse_many_nulstr(PKGSYSCONFDIR "/sleep.conf", + CONF_PATHS_NULSTR("systemd/sleep.conf.d"), + "Sleep\0", config_item_table_lookup, items, + CONFIG_PARSE_WARN, NULL); + + if (streq(verb, "suspend")) { + allow = allow_suspend != 0; + + /* empty by default */ + modes = TAKE_PTR(suspend_mode); + + if (suspend_state) + states = TAKE_PTR(suspend_state); + else + states = strv_new("mem", "standby", "freeze"); + + } else if (streq(verb, "hibernate")) { + allow = allow_hibernate != 0; + + if (hibernate_mode) + modes = TAKE_PTR(hibernate_mode); + else + modes = strv_new("platform", "shutdown"); + + if (hibernate_state) + states = TAKE_PTR(hibernate_state); + else + states = strv_new("disk"); + + } else if (streq(verb, "hybrid-sleep")) { + allow = allow_hybrid_sleep > 0 || + (allow_suspend != 0 && allow_hibernate != 0); + + if (hybrid_mode) + modes = TAKE_PTR(hybrid_mode); + else + modes = strv_new("suspend", "platform", "shutdown"); + + if (hybrid_state) + states = TAKE_PTR(hybrid_state); + else + states = strv_new("disk"); + + } else if (streq(verb, "suspend-then-hibernate")) { + allow = allow_s2h > 0 || + (allow_suspend != 0 && allow_hibernate != 0); + + modes = states = NULL; + } else + assert_not_reached("what verb"); + + if ((!modes && STR_IN_SET(verb, "hibernate", "hybrid-sleep")) || + (!states && !streq(verb, "suspend-then-hibernate"))) + return log_oom(); + + if (ret_allow) + *ret_allow = allow; + if (ret_modes) + *ret_modes = TAKE_PTR(modes); + if (ret_states) + *ret_states = TAKE_PTR(states); + if (ret_delay) + *ret_delay = delay; + + return 0; +} + +int can_sleep_state(char **types) { + char **type; + int r; + _cleanup_free_ char *p = NULL; + + if (strv_isempty(types)) + return true; + + /* If /sys is read-only we cannot sleep */ + if (access("/sys/power/state", W_OK) < 0) + return false; + + r = read_one_line_file("/sys/power/state", &p); + if (r < 0) + return false; + + STRV_FOREACH(type, types) { + const char *word, *state; + size_t l, k; + + k = strlen(*type); + FOREACH_WORD_SEPARATOR(word, l, p, WHITESPACE, state) + if (l == k && memcmp(word, *type, l) == 0) + return true; + } + + return false; +} + +int can_sleep_disk(char **types) { + char **type; + int r; + _cleanup_free_ char *p = NULL; + + if (strv_isempty(types)) + return true; + + /* If /sys is read-only we cannot sleep */ + if (access("/sys/power/disk", W_OK) < 0) { + log_debug_errno(errno, "/sys/power/disk is not writable: %m"); + return false; + } + + r = read_one_line_file("/sys/power/disk", &p); + if (r < 0) { + log_debug_errno(r, "Couldn't read /sys/power/disk: %m"); + return false; + } + + STRV_FOREACH(type, types) { + const char *word, *state; + size_t l, k; + + k = strlen(*type); + FOREACH_WORD_SEPARATOR(word, l, p, WHITESPACE, state) { + if (l == k && memcmp(word, *type, l) == 0) + return true; + + if (l == k + 2 && + word[0] == '[' && + memcmp(word + 1, *type, l - 2) == 0 && + word[l-1] == ']') + return true; + } + } + + return false; +} + +#define HIBERNATION_SWAP_THRESHOLD 0.98 + +int find_hibernate_location(char **device, char **type, size_t *size, size_t *used) { + _cleanup_fclose_ FILE *f; + unsigned i; + + f = fopen("/proc/swaps", "re"); + if (!f) { + log_full(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, + "Failed to retrieve open /proc/swaps: %m"); + assert(errno > 0); + return -errno; + } + + (void) fscanf(f, "%*s %*s %*s %*s %*s\n"); + + for (i = 1;; i++) { + _cleanup_free_ char *dev_field = NULL, *type_field = NULL; + size_t size_field, used_field; + int k; + + k = fscanf(f, + "%ms " /* device/file */ + "%ms " /* type of swap */ + "%zu " /* swap size */ + "%zu " /* used */ + "%*i\n", /* priority */ + &dev_field, &type_field, &size_field, &used_field); + if (k == EOF) + break; + if (k != 4) { + log_warning("Failed to parse /proc/swaps:%u", i); + continue; + } + + if (streq(type_field, "file")) { + + if (endswith(dev_field, "\\040(deleted)")) { + log_warning("Ignoring deleted swap file '%s'.", dev_field); + continue; + } + + } else if (streq(type_field, "partition")) { + const char *fn; + + fn = path_startswith(dev_field, "/dev/"); + if (fn && startswith(fn, "zram")) { + log_debug("Ignoring compressed RAM swap device '%s'.", dev_field); + continue; + } + } + + if (device) + *device = TAKE_PTR(dev_field); + if (type) + *type = TAKE_PTR(type_field); + if (size) + *size = size_field; + if (used) + *used = used_field; + return 0; + } + + return log_debug_errno(SYNTHETIC_ERRNO(ENOSYS), + "No swap partitions were found."); +} + +static bool enough_swap_for_hibernation(void) { + _cleanup_free_ char *active = NULL; + unsigned long long act = 0; + size_t size = 0, used = 0; + int r; + + if (getenv_bool("SYSTEMD_BYPASS_HIBERNATION_MEMORY_CHECK") > 0) + return true; + + r = find_hibernate_location(NULL, NULL, &size, &used); + if (r < 0) + return false; + + r = get_proc_field("/proc/meminfo", "Active(anon)", WHITESPACE, &active); + if (r < 0) { + log_debug_errno(r, "Failed to retrieve Active(anon) from /proc/meminfo: %m"); + return false; + } + + r = safe_atollu(active, &act); + if (r < 0) { + log_debug_errno(r, "Failed to parse Active(anon) from /proc/meminfo: %s: %m", active); + return false; + } + + r = act <= (size - used) * HIBERNATION_SWAP_THRESHOLD; + log_debug("%s swap for hibernation, Active(anon)=%llu kB, size=%zu kB, used=%zu kB, threshold=%.2g%%", + r ? "Enough" : "Not enough", act, size, used, 100*HIBERNATION_SWAP_THRESHOLD); + + return r; +} + +int read_fiemap(int fd, struct fiemap **ret) { + _cleanup_free_ struct fiemap *fiemap = NULL, *result_fiemap = NULL; + struct stat statinfo; + uint32_t result_extents = 0; + uint64_t fiemap_start = 0, fiemap_length; + const size_t n_extra = DIV_ROUND_UP(sizeof(struct fiemap), sizeof(struct fiemap_extent)); + size_t fiemap_allocated = n_extra, result_fiemap_allocated = n_extra; + + if (fstat(fd, &statinfo) < 0) + return log_debug_errno(errno, "Cannot determine file size: %m"); + if (!S_ISREG(statinfo.st_mode)) + return -ENOTTY; + fiemap_length = statinfo.st_size; + + /* Zero this out in case we run on a file with no extents */ + fiemap = calloc(n_extra, sizeof(struct fiemap_extent)); + if (!fiemap) + return -ENOMEM; + + result_fiemap = malloc_multiply(n_extra, sizeof(struct fiemap_extent)); + if (!result_fiemap) + return -ENOMEM; + + /* XFS filesystem has incorrect implementation of fiemap ioctl and + * returns extents for only one block-group at a time, so we need + * to handle it manually, starting the next fiemap call from the end + * of the last extent + */ + while (fiemap_start < fiemap_length) { + *fiemap = (struct fiemap) { + .fm_start = fiemap_start, + .fm_length = fiemap_length, + .fm_flags = FIEMAP_FLAG_SYNC, + }; + + /* Find out how many extents there are */ + if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0) + return log_debug_errno(errno, "Failed to read extents: %m"); + + /* Nothing to process */ + if (fiemap->fm_mapped_extents == 0) + break; + + /* Resize fiemap to allow us to read in the extents, result fiemap has to hold all + * the extents for the whole file. Add space for the initial struct fiemap. */ + if (!greedy_realloc0((void**) &fiemap, &fiemap_allocated, + n_extra + fiemap->fm_mapped_extents, sizeof(struct fiemap_extent))) + return -ENOMEM; + + fiemap->fm_extent_count = fiemap->fm_mapped_extents; + fiemap->fm_mapped_extents = 0; + + if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0) + return log_debug_errno(errno, "Failed to read extents: %m"); + + /* Resize result_fiemap to allow us to copy in the extents */ + if (!greedy_realloc((void**) &result_fiemap, &result_fiemap_allocated, + n_extra + result_extents + fiemap->fm_mapped_extents, sizeof(struct fiemap_extent))) + return -ENOMEM; + + memcpy(result_fiemap->fm_extents + result_extents, + fiemap->fm_extents, + sizeof(struct fiemap_extent) * fiemap->fm_mapped_extents); + + result_extents += fiemap->fm_mapped_extents; + + /* Highly unlikely that it is zero */ + if (_likely_(fiemap->fm_mapped_extents > 0)) { + uint32_t i = fiemap->fm_mapped_extents - 1; + + fiemap_start = fiemap->fm_extents[i].fe_logical + + fiemap->fm_extents[i].fe_length; + + if (fiemap->fm_extents[i].fe_flags & FIEMAP_EXTENT_LAST) + break; + } + } + + memcpy(result_fiemap, fiemap, sizeof(struct fiemap)); + result_fiemap->fm_mapped_extents = result_extents; + *ret = TAKE_PTR(result_fiemap); + return 0; +} + +static int can_sleep_internal(const char *verb, bool check_allowed); + +static bool can_s2h(void) { + const char *p; + int r; + + r = access("/sys/class/rtc/rtc0/wakealarm", W_OK); + if (r < 0) { + log_full(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, + "/sys/class/rct/rct0/wakealarm is not writable %m"); + return false; + } + + FOREACH_STRING(p, "suspend", "hibernate") { + r = can_sleep_internal(p, false); + if (IN_SET(r, 0, -ENOSPC, -EADV)) { + log_debug("Unable to %s system.", p); + return false; + } + if (r < 0) + return log_debug_errno(r, "Failed to check if %s is possible: %m", p); + } + + return true; +} + +static int can_sleep_internal(const char *verb, bool check_allowed) { + bool allow; + _cleanup_strv_free_ char **modes = NULL, **states = NULL; + int r; + + assert(STR_IN_SET(verb, "suspend", "hibernate", "hybrid-sleep", "suspend-then-hibernate")); + + r = parse_sleep_config(verb, &allow, &modes, &states, NULL); + if (r < 0) + return false; + + if (check_allowed && !allow) { + log_debug("Sleep mode \"%s\" is disabled by configuration.", verb); + return false; + } + + if (streq(verb, "suspend-then-hibernate")) + return can_s2h(); + + if (!can_sleep_state(states) || !can_sleep_disk(modes)) + return false; + + if (streq(verb, "suspend")) + return true; + + if (!enough_swap_for_hibernation()) + return -ENOSPC; + + return true; +} + +int can_sleep(const char *verb) { + return can_sleep_internal(verb, true); +} diff --git a/src/shared/sleep-config.h b/src/shared/sleep-config.h new file mode 100644 index 0000000..c584f44 --- /dev/null +++ b/src/shared/sleep-config.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <linux/fiemap.h> +#include "time-util.h" + +int read_fiemap(int fd, struct fiemap **ret); +int parse_sleep_config(const char *verb, bool *ret_allow, char ***ret_modes, char ***ret_states, usec_t *ret_delay); +int find_hibernate_location(char **device, char **type, size_t *size, size_t *used); + +int can_sleep(const char *verb); +int can_sleep_disk(char **types); +int can_sleep_state(char **types); diff --git a/src/shared/spawn-ask-password-agent.c b/src/shared/spawn-ask-password-agent.c new file mode 100644 index 0000000..309071c --- /dev/null +++ b/src/shared/spawn-ask-password-agent.c @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <signal.h> +#include <stdlib.h> +#include <unistd.h> + +#include "log.h" +#include "process-util.h" +#include "spawn-ask-password-agent.h" +#include "util.h" + +static pid_t agent_pid = 0; + +int ask_password_agent_open(void) { + int r; + + if (agent_pid > 0) + return 0; + + /* We check STDIN here, not STDOUT, since this is about input, + * not output */ + if (!isatty(STDIN_FILENO)) + return 0; + + if (!is_main_thread()) + return -EPERM; + + r = fork_agent("(sd-askpwagent)", + NULL, 0, + &agent_pid, + SYSTEMD_TTY_ASK_PASSWORD_AGENT_BINARY_PATH, + SYSTEMD_TTY_ASK_PASSWORD_AGENT_BINARY_PATH, "--watch", NULL); + if (r < 0) + return log_error_errno(r, "Failed to fork TTY ask password agent: %m"); + + return 1; +} + +void ask_password_agent_close(void) { + + if (agent_pid <= 0) + return; + + /* Inform agent that we are done */ + (void) kill_and_sigcont(agent_pid, SIGTERM); + (void) wait_for_terminate(agent_pid, NULL); + agent_pid = 0; +} diff --git a/src/shared/spawn-ask-password-agent.h b/src/shared/spawn-ask-password-agent.h new file mode 100644 index 0000000..97e73bd --- /dev/null +++ b/src/shared/spawn-ask-password-agent.h @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +int ask_password_agent_open(void); +void ask_password_agent_close(void); diff --git a/src/shared/spawn-polkit-agent.c b/src/shared/spawn-polkit-agent.c new file mode 100644 index 0000000..180cb79 --- /dev/null +++ b/src/shared/spawn-polkit-agent.c @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <poll.h> +#include <signal.h> +#include <stdlib.h> +#include <unistd.h> + +#include "fd-util.h" +#include "io-util.h" +#include "log.h" +#include "macro.h" +#include "process-util.h" +#include "spawn-polkit-agent.h" +#include "stdio-util.h" +#include "time-util.h" +#include "util.h" + +#if ENABLE_POLKIT +static pid_t agent_pid = 0; + +int polkit_agent_open(void) { + char notify_fd[DECIMAL_STR_MAX(int) + 1]; + int pipe_fd[2], r; + + if (agent_pid > 0) + return 0; + + /* Clients that run as root don't need to activate/query polkit */ + if (geteuid() == 0) + return 0; + + /* We check STDIN here, not STDOUT, since this is about input, not output */ + if (!isatty(STDIN_FILENO)) + return 0; + + if (!is_main_thread()) + return -EPERM; + + if (pipe2(pipe_fd, 0) < 0) + return -errno; + + xsprintf(notify_fd, "%i", pipe_fd[1]); + + r = fork_agent("(polkit-agent)", + &pipe_fd[1], 1, + &agent_pid, + POLKIT_AGENT_BINARY_PATH, + POLKIT_AGENT_BINARY_PATH, "--notify-fd", notify_fd, "--fallback", NULL); + + /* Close the writing side, because that's the one for the agent */ + safe_close(pipe_fd[1]); + + if (r < 0) + log_error_errno(r, "Failed to fork TTY ask password agent: %m"); + else + /* Wait until the agent closes the fd */ + fd_wait_for_event(pipe_fd[0], POLLHUP, USEC_INFINITY); + + safe_close(pipe_fd[0]); + + return r; +} + +void polkit_agent_close(void) { + + if (agent_pid <= 0) + return; + + /* Inform agent that we are done */ + (void) kill_and_sigcont(agent_pid, SIGTERM); + (void) wait_for_terminate(agent_pid, NULL); + agent_pid = 0; +} + +#else + +int polkit_agent_open(void) { + return 0; +} + +void polkit_agent_close(void) { +} + +#endif diff --git a/src/shared/spawn-polkit-agent.h b/src/shared/spawn-polkit-agent.h new file mode 100644 index 0000000..190b970 --- /dev/null +++ b/src/shared/spawn-polkit-agent.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include "bus-util.h" + +int polkit_agent_open(void); +void polkit_agent_close(void); + +static inline int polkit_agent_open_if_enabled( + BusTransport transport, + bool ask_password) { + + /* Open the polkit agent as a child process if necessary */ + + if (transport != BUS_TRANSPORT_LOCAL) + return 0; + + if (!ask_password) + return 0; + + return polkit_agent_open(); +} diff --git a/src/shared/specifier.c b/src/shared/specifier.c new file mode 100644 index 0000000..b8f7537 --- /dev/null +++ b/src/shared/specifier.c @@ -0,0 +1,299 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <sys/utsname.h> + +#include "sd-id128.h" + +#include "alloc-util.h" +#include "fs-util.h" +#include "hostname-util.h" +#include "macro.h" +#include "specifier.h" +#include "string-util.h" +#include "strv.h" +#include "user-util.h" + +/* + * Generic infrastructure for replacing %x style specifiers in + * strings. Will call a callback for each replacement. + */ + +/* Any ASCII character or digit: our pool of potential specifiers, + * and "%" used for escaping. */ +#define POSSIBLE_SPECIFIERS ALPHANUMERICAL "%" + +int specifier_printf(const char *text, const Specifier table[], const void *userdata, char **_ret) { + size_t l, allocated = 0; + _cleanup_free_ char *ret = NULL; + char *t; + const char *f; + bool percent = false; + int r; + + assert(text); + assert(table); + + l = strlen(text); + if (!GREEDY_REALLOC(ret, allocated, l + 1)) + return -ENOMEM; + t = ret; + + for (f = text; *f; f++, l--) + if (percent) { + if (*f == '%') + *(t++) = '%'; + else { + const Specifier *i; + + for (i = table; i->specifier; i++) + if (i->specifier == *f) + break; + + if (i->lookup) { + _cleanup_free_ char *w = NULL; + size_t k, j; + + r = i->lookup(i->specifier, i->data, userdata, &w); + if (r < 0) + return r; + + j = t - ret; + k = strlen(w); + + if (!GREEDY_REALLOC(ret, allocated, j + k + l + 1)) + return -ENOMEM; + memcpy(ret + j, w, k); + t = ret + j + k; + } else if (strchr(POSSIBLE_SPECIFIERS, *f)) + /* Oops, an unknown specifier. */ + return -EBADSLT; + else { + *(t++) = '%'; + *(t++) = *f; + } + } + + percent = false; + } else if (*f == '%') + percent = true; + else + *(t++) = *f; + + /* If string ended with a stray %, also end with % */ + if (percent) + *(t++) = '%'; + *(t++) = 0; + + /* Try to deallocate unused bytes, but don't sweat it too much */ + if ((size_t)(t - ret) < allocated) { + t = realloc(ret, t - ret); + if (t) + ret = t; + } + + *_ret = TAKE_PTR(ret); + return 0; +} + +/* Generic handler for simple string replacements */ + +int specifier_string(char specifier, const void *data, const void *userdata, char **ret) { + char *n; + + n = strdup(strempty(data)); + if (!n) + return -ENOMEM; + + *ret = n; + return 0; +} + +int specifier_machine_id(char specifier, const void *data, const void *userdata, char **ret) { + sd_id128_t id; + char *n; + int r; + + r = sd_id128_get_machine(&id); + if (r < 0) + return r; + + n = new(char, 33); + if (!n) + return -ENOMEM; + + *ret = sd_id128_to_string(id, n); + return 0; +} + +int specifier_boot_id(char specifier, const void *data, const void *userdata, char **ret) { + sd_id128_t id; + char *n; + int r; + + r = sd_id128_get_boot(&id); + if (r < 0) + return r; + + n = new(char, 33); + if (!n) + return -ENOMEM; + + *ret = sd_id128_to_string(id, n); + return 0; +} + +int specifier_host_name(char specifier, const void *data, const void *userdata, char **ret) { + char *n; + + n = gethostname_malloc(); + if (!n) + return -ENOMEM; + + *ret = n; + return 0; +} + +int specifier_kernel_release(char specifier, const void *data, const void *userdata, char **ret) { + struct utsname uts; + char *n; + int r; + + r = uname(&uts); + if (r < 0) + return -errno; + + n = strdup(uts.release); + if (!n) + return -ENOMEM; + + *ret = n; + return 0; +} + +int specifier_group_name(char specifier, const void *data, const void *userdata, char **ret) { + char *t; + + t = gid_to_name(getgid()); + if (!t) + return -ENOMEM; + + *ret = t; + return 0; +} + +int specifier_group_id(char specifier, const void *data, const void *userdata, char **ret) { + if (asprintf(ret, UID_FMT, getgid()) < 0) + return -ENOMEM; + + return 0; +} + +int specifier_user_name(char specifier, const void *data, const void *userdata, char **ret) { + char *t; + + /* If we are UID 0 (root), this will not result in NSS, otherwise it might. This is good, as we want to be able + * to run this in PID 1, where our user ID is 0, but where NSS lookups are not allowed. + + * We don't use getusername_malloc() here, because we don't want to look at $USER, to remain consistent with + * specifer_user_id() below. + */ + + t = uid_to_name(getuid()); + if (!t) + return -ENOMEM; + + *ret = t; + return 0; +} + +int specifier_user_id(char specifier, const void *data, const void *userdata, char **ret) { + + if (asprintf(ret, UID_FMT, getuid()) < 0) + return -ENOMEM; + + return 0; +} + +int specifier_user_home(char specifier, const void *data, const void *userdata, char **ret) { + + /* On PID 1 (which runs as root) this will not result in NSS, + * which is good. See above */ + + return get_home_dir(ret); +} + +int specifier_user_shell(char specifier, const void *data, const void *userdata, char **ret) { + + /* On PID 1 (which runs as root) this will not result in NSS, + * which is good. See above */ + + return get_shell(ret); +} + +int specifier_tmp_dir(char specifier, const void *data, const void *userdata, char **ret) { + const char *p; + char *copy; + int r; + + r = tmp_dir(&p); + if (r < 0) + return r; + + copy = strdup(p); + if (!copy) + return -ENOMEM; + + *ret = copy; + return 0; +} + +int specifier_var_tmp_dir(char specifier, const void *data, const void *userdata, char **ret) { + const char *p; + char *copy; + int r; + + r = var_tmp_dir(&p); + if (r < 0) + return r; + + copy = strdup(p); + if (!copy) + return -ENOMEM; + + *ret = copy; + return 0; +} + +int specifier_escape_strv(char **l, char ***ret) { + char **z, **p, **q; + + assert(ret); + + if (strv_isempty(l)) { + *ret = NULL; + return 0; + } + + z = new(char*, strv_length(l)+1); + if (!z) + return -ENOMEM; + + for (p = l, q = z; *p; p++, q++) { + + *q = specifier_escape(*p); + if (!*q) { + strv_free(z); + return -ENOMEM; + } + } + + *q = NULL; + *ret = z; + + return 0; +} diff --git a/src/shared/specifier.h b/src/shared/specifier.h new file mode 100644 index 0000000..d0221ef --- /dev/null +++ b/src/shared/specifier.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include "string-util.h" + +typedef int (*SpecifierCallback)(char specifier, const void *data, const void *userdata, char **ret); + +typedef struct Specifier { + const char specifier; + const SpecifierCallback lookup; + const void *data; +} Specifier; + +int specifier_printf(const char *text, const Specifier table[], const void *userdata, char **ret); + +int specifier_string(char specifier, const void *data, const void *userdata, char **ret); + +int specifier_machine_id(char specifier, const void *data, const void *userdata, char **ret); +int specifier_boot_id(char specifier, const void *data, const void *userdata, char **ret); +int specifier_host_name(char specifier, const void *data, const void *userdata, char **ret); +int specifier_kernel_release(char specifier, const void *data, const void *userdata, char **ret); + +int specifier_group_name(char specifier, const void *data, const void *userdata, char **ret); +int specifier_group_id(char specifier, const void *data, const void *userdata, char **ret); +int specifier_user_name(char specifier, const void *data, const void *userdata, char **ret); +int specifier_user_id(char specifier, const void *data, const void *userdata, char **ret); +int specifier_user_home(char specifier, const void *data, const void *userdata, char **ret); +int specifier_user_shell(char specifier, const void *data, const void *userdata, char **ret); + +int specifier_tmp_dir(char specifier, const void *data, const void *userdata, char **ret); +int specifier_var_tmp_dir(char specifier, const void *data, const void *userdata, char **ret); + +static inline char* specifier_escape(const char *string) { + return strreplace(string, "%", "%%"); +} + +int specifier_escape_strv(char **l, char ***ret); diff --git a/src/shared/switch-root.c b/src/shared/switch-root.c new file mode 100644 index 0000000..dbb4622 --- /dev/null +++ b/src/shared/switch-root.c @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <stdio.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "base-filesystem.h" +#include "fd-util.h" +#include "fs-util.h" +#include "log.h" +#include "missing.h" +#include "mkdir.h" +#include "mount-util.h" +#include "mountpoint-util.h" +#include "path-util.h" +#include "rm-rf.h" +#include "stdio-util.h" +#include "string-util.h" +#include "strv.h" +#include "switch-root.h" +#include "user-util.h" +#include "util.h" + +int switch_root(const char *new_root, + const char *old_root_after, /* path below the new root, where to place the old root after the transition */ + bool unmount_old_root, + unsigned long mount_flags) { /* MS_MOVE or MS_BIND */ + + _cleanup_free_ char *resolved_old_root_after = NULL; + _cleanup_close_ int old_root_fd = -1; + bool old_root_remove; + const char *i; + int r; + + assert(new_root); + assert(old_root_after); + + if (path_equal(new_root, "/")) + return 0; + + /* Check if we shall remove the contents of the old root */ + old_root_remove = in_initrd(); + if (old_root_remove) { + old_root_fd = open("/", O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY|O_DIRECTORY); + if (old_root_fd < 0) + return log_error_errno(errno, "Failed to open root directory: %m"); + } + + /* Determine where we shall place the old root after the transition */ + r = chase_symlinks(old_root_after, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &resolved_old_root_after); + if (r < 0) + return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, old_root_after); + if (r == 0) /* Doesn't exist yet. Let's create it */ + (void) mkdir_p_label(resolved_old_root_after, 0755); + + /* Work-around for kernel design: the kernel refuses MS_MOVE if any file systems are mounted MS_SHARED. Hence + * remount them MS_PRIVATE here as a work-around. + * + * https://bugzilla.redhat.com/show_bug.cgi?id=847418 */ + if (mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL) < 0) + return log_error_errno(errno, "Failed to set \"/\" mount propagation to private: %m"); + + FOREACH_STRING(i, "/sys", "/dev", "/run", "/proc") { + _cleanup_free_ char *chased = NULL; + + r = chase_symlinks(i, new_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &chased); + if (r < 0) + return log_error_errno(r, "Failed to resolve %s/%s: %m", new_root, i); + if (r > 0) { + /* Already exists. Let's see if it is a mount point already. */ + r = path_is_mount_point(chased, NULL, 0); + if (r < 0) + return log_error_errno(r, "Failed to determine whether %s is a mount point: %m", chased); + if (r > 0) /* If it is already mounted, then do nothing */ + continue; + } else + /* Doesn't exist yet? */ + (void) mkdir_p_label(chased, 0755); + + if (mount(i, chased, NULL, mount_flags, NULL) < 0) + return log_error_errno(errno, "Failed to mount %s to %s: %m", i, chased); + } + + /* Do not fail if base_filesystem_create() fails. Not all switch roots are like base_filesystem_create() wants + * them to look like. They might even boot, if they are RO and don't have the FS layout. Just ignore the error + * and switch_root() nevertheless. */ + (void) base_filesystem_create(new_root, UID_INVALID, GID_INVALID); + + if (chdir(new_root) < 0) + return log_error_errno(errno, "Failed to change directory to %s: %m", new_root); + + /* We first try a pivot_root() so that we can umount the old root dir. In many cases (i.e. where rootfs is /), + * that's not possible however, and hence we simply overmount root */ + if (pivot_root(new_root, resolved_old_root_after) >= 0) { + + /* Immediately get rid of the old root, if detach_oldroot is set. + * Since we are running off it we need to do this lazily. */ + if (unmount_old_root) { + r = umount_recursive(old_root_after, MNT_DETACH); + if (r < 0) + log_warning_errno(r, "Failed to unmount old root directory tree, ignoring: %m"); + } + + } else if (mount(new_root, "/", NULL, MS_MOVE, NULL) < 0) + return log_error_errno(errno, "Failed to move %s to /: %m", new_root); + + if (chroot(".") < 0) + return log_error_errno(errno, "Failed to change root: %m"); + + if (chdir("/") < 0) + return log_error_errno(errno, "Failed to change directory: %m"); + + if (old_root_fd >= 0) { + struct stat rb; + + if (fstat(old_root_fd, &rb) < 0) + log_warning_errno(errno, "Failed to stat old root directory, leaving: %m"); + else { + (void) rm_rf_children(old_root_fd, 0, &rb); + old_root_fd = -1; + } + } + + return 0; +} diff --git a/src/shared/switch-root.h b/src/shared/switch-root.h new file mode 100644 index 0000000..f4d48cb --- /dev/null +++ b/src/shared/switch-root.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +int switch_root(const char *new_root, const char *oldroot, bool detach_oldroot, unsigned long mountflags); diff --git a/src/shared/sysctl-util.c b/src/shared/sysctl-util.c new file mode 100644 index 0000000..480e6c3 --- /dev/null +++ b/src/shared/sysctl-util.c @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#include "fd-util.h" +#include "fileio.h" +#include "log.h" +#include "macro.h" +#include "string-util.h" +#include "sysctl-util.h" + +char *sysctl_normalize(char *s) { + char *n; + + n = strpbrk(s, "/."); + /* If the first separator is a slash, the path is + * assumed to be normalized and slashes remain slashes + * and dots remains dots. */ + if (!n || *n == '/') + return s; + + /* Otherwise, dots become slashes and slashes become + * dots. Fun. */ + while (n) { + if (*n == '.') + *n = '/'; + else + *n = '.'; + + n = strpbrk(n + 1, "/."); + } + + return s; +} + +int sysctl_write(const char *property, const char *value) { + char *p; + _cleanup_close_ int fd = -1; + + assert(property); + assert(value); + + log_debug("Setting '%s' to '%.*s'.", property, (int) strcspn(value, NEWLINE), value); + + p = strjoina("/proc/sys/", property); + fd = open(p, O_WRONLY|O_CLOEXEC); + if (fd < 0) + return -errno; + + if (!endswith(value, "\n")) + value = strjoina(value, "\n"); + + if (write(fd, value, strlen(value)) < 0) + return -errno; + + return 0; +} + +int sysctl_read(const char *property, char **content) { + char *p; + + assert(property); + assert(content); + + p = strjoina("/proc/sys/", property); + return read_full_file(p, content, NULL); +} diff --git a/src/shared/sysctl-util.h b/src/shared/sysctl-util.h new file mode 100644 index 0000000..fd7c78b --- /dev/null +++ b/src/shared/sysctl-util.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +char *sysctl_normalize(char *s); +int sysctl_read(const char *property, char **value); +int sysctl_write(const char *property, const char *value); + diff --git a/src/shared/test-tables.h b/src/shared/test-tables.h new file mode 100644 index 0000000..4eeda3b --- /dev/null +++ b/src/shared/test-tables.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +typedef const char* (*lookup_t)(int); +typedef int (*reverse_t)(const char*); + +static inline void _test_table(const char *name, + lookup_t lookup, + reverse_t reverse, + int size, + bool sparse) { + int i, boring = 0; + + for (i = -1; i < size + 1; i++) { + const char* val = lookup(i); + int rev; + + if (val) { + rev = reverse(val); + boring = 0; + } else { + rev = reverse("--no-such--value----"); + boring += i >= 0; + } + + if (boring < 1 || i == size) + printf("%s: %d → %s → %d\n", name, i, val, rev); + else if (boring == 1) + printf("%*s ...\n", (int) strlen(name), ""); + + assert_se(!(i >= 0 && i < size ? + sparse ? rev != i && rev != -1 : val == NULL || rev != i : + val != NULL || rev != -1)); + } +} + +#define test_table(lower, upper) \ + _test_table(STRINGIFY(lower), lower##_to_string, lower##_from_string, _##upper##_MAX, false) + +#define test_table_sparse(lower, upper) \ + _test_table(STRINGIFY(lower), lower##_to_string, lower##_from_string, _##upper##_MAX, true) diff --git a/src/shared/tests.c b/src/shared/tests.c new file mode 100644 index 0000000..11ea12e --- /dev/null +++ b/src/shared/tests.c @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <sched.h> +#include <signal.h> +#include <stdlib.h> +#include <sys/mount.h> +#include <sys/wait.h> +#include <util.h> + +/* When we include libgen.h because we need dirname() we immediately + * undefine basename() since libgen.h defines it as a macro to the POSIX + * version which is really broken. We prefer GNU basename(). */ +#include <libgen.h> +#undef basename + +#include "alloc-util.h" +#include "env-file.h" +#include "env-util.h" +#include "fs-util.h" +#include "log.h" +#include "path-util.h" +#include "strv.h" +#include "tests.h" + +char* setup_fake_runtime_dir(void) { + char t[] = "/tmp/fake-xdg-runtime-XXXXXX", *p; + + assert_se(mkdtemp(t)); + assert_se(setenv("XDG_RUNTIME_DIR", t, 1) >= 0); + assert_se(p = strdup(t)); + + return p; +} + +static void load_testdata_env(void) { + static bool called = false; + _cleanup_free_ char *s = NULL; + _cleanup_free_ char *envpath = NULL; + _cleanup_strv_free_ char **pairs = NULL; + char **k, **v; + + if (called) + return; + called = true; + + assert_se(readlink_and_make_absolute("/proc/self/exe", &s) >= 0); + dirname(s); + + envpath = path_join(s, "systemd-runtest.env"); + if (load_env_file_pairs(NULL, envpath, &pairs) < 0) + return; + + STRV_FOREACH_PAIR(k, v, pairs) + setenv(*k, *v, 0); +} + +const char* get_testdata_dir(void) { + const char *env; + + load_testdata_env(); + + /* if the env var is set, use that */ + env = getenv("SYSTEMD_TEST_DATA"); + if (!env) + env = SYSTEMD_TEST_DATA; + if (access(env, F_OK) < 0) { + fprintf(stderr, "ERROR: $SYSTEMD_TEST_DATA directory [%s] does not exist\n", env); + exit(EXIT_FAILURE); + } + + return env; +} + +const char* get_catalog_dir(void) { + const char *env; + + load_testdata_env(); + + /* if the env var is set, use that */ + env = getenv("SYSTEMD_CATALOG_DIR"); + if (!env) + env = SYSTEMD_CATALOG_DIR; + if (access(env, F_OK) < 0) { + fprintf(stderr, "ERROR: $SYSTEMD_CATALOG_DIR directory [%s] does not exist\n", env); + exit(EXIT_FAILURE); + } + return env; +} + +bool slow_tests_enabled(void) { + int r; + + r = getenv_bool("SYSTEMD_SLOW_TESTS"); + if (r >= 0) + return r; + + if (r != -ENXIO) + log_warning_errno(r, "Cannot parse $SYSTEMD_SLOW_TESTS, ignoring."); + return SYSTEMD_SLOW_TESTS_DEFAULT; +} + +void test_setup_logging(int level) { + log_set_max_level(level); + log_parse_environment(); + log_open(); +} + +int log_tests_skipped(const char *message) { + log_notice("%s: %s, skipping tests.", + program_invocation_short_name, message); + return EXIT_TEST_SKIP; +} + +int log_tests_skipped_errno(int r, const char *message) { + log_notice_errno(r, "%s: %s, skipping tests: %m", + program_invocation_short_name, message); + return EXIT_TEST_SKIP; +} + +bool have_namespaces(void) { + siginfo_t si = {}; + pid_t pid; + + /* Checks whether namespaces are available. In some cases they aren't. We do this by calling unshare(), and we + * do so in a child process in order not to affect our own process. */ + + pid = fork(); + assert_se(pid >= 0); + + if (pid == 0) { + /* child */ + if (unshare(CLONE_NEWNS) < 0) + _exit(EXIT_FAILURE); + + if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) + _exit(EXIT_FAILURE); + + _exit(EXIT_SUCCESS); + } + + assert_se(waitid(P_PID, pid, &si, WEXITED) >= 0); + assert_se(si.si_code == CLD_EXITED); + + if (si.si_status == EXIT_SUCCESS) + return true; + + if (si.si_status == EXIT_FAILURE) + return false; + + assert_not_reached("unexpected exit code"); +} diff --git a/src/shared/tests.h b/src/shared/tests.h new file mode 100644 index 0000000..718196f --- /dev/null +++ b/src/shared/tests.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +char* setup_fake_runtime_dir(void); +const char* get_testdata_dir(void); +const char* get_catalog_dir(void); +bool slow_tests_enabled(void); +void test_setup_logging(int level); +int log_tests_skipped(const char *message); +int log_tests_skipped_errno(int r, const char *message); + +bool have_namespaces(void); diff --git a/src/shared/tmpfile-util-label.c b/src/shared/tmpfile-util-label.c new file mode 100644 index 0000000..c12d7c1 --- /dev/null +++ b/src/shared/tmpfile-util-label.c @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <sys/stat.h> + +#include "selinux-util.h" +#include "tmpfile-util-label.h" +#include "tmpfile-util.h" + +int fopen_temporary_label( + const char *target, + const char *path, + FILE **f, + char **temp_path) { + + int r; + + r = mac_selinux_create_file_prepare(target, S_IFREG); + if (r < 0) + return r; + + r = fopen_temporary(path, f, temp_path); + + mac_selinux_create_file_clear(); + + return r; +} diff --git a/src/shared/tmpfile-util-label.h b/src/shared/tmpfile-util-label.h new file mode 100644 index 0000000..97a8751 --- /dev/null +++ b/src/shared/tmpfile-util-label.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdio.h> + +/* These functions are split out of tmpfile-util.h (and not for example just flags to the functions they wrap) in order + * to optimize linking: This way, -lselinux is needed only for the callers of these functions that need selinux, but + * not for all */ + +int fopen_temporary_label(const char *target, const char *path, FILE **f, char **temp_path); diff --git a/src/shared/tomoyo-util.c b/src/shared/tomoyo-util.c new file mode 100644 index 0000000..75c24d8 --- /dev/null +++ b/src/shared/tomoyo-util.c @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <unistd.h> + +#include "tomoyo-util.h" + +bool mac_tomoyo_use(void) { + static int cached_use = -1; + + if (cached_use < 0) + cached_use = (access("/sys/kernel/security/tomoyo/version", + F_OK) == 0); + + return cached_use; +} diff --git a/src/shared/tomoyo-util.h b/src/shared/tomoyo-util.h new file mode 100644 index 0000000..06e8227 --- /dev/null +++ b/src/shared/tomoyo-util.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +bool mac_tomoyo_use(void); diff --git a/src/shared/udev-util.c b/src/shared/udev-util.c new file mode 100644 index 0000000..4200032 --- /dev/null +++ b/src/shared/udev-util.c @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <string.h> + +#include "alloc-util.h" +#include "env-file.h" +#include "log.h" +#include "parse-util.h" +#include "string-table.h" +#include "string-util.h" +#include "udev-util.h" +#include "udev.h" + +static const char* const resolve_name_timing_table[_RESOLVE_NAME_TIMING_MAX] = { + [RESOLVE_NAME_NEVER] = "never", + [RESOLVE_NAME_LATE] = "late", + [RESOLVE_NAME_EARLY] = "early", +}; + +DEFINE_STRING_TABLE_LOOKUP(resolve_name_timing, ResolveNameTiming); + +int udev_parse_config_full( + unsigned *ret_children_max, + usec_t *ret_exec_delay_usec, + usec_t *ret_event_timeout_usec, + ResolveNameTiming *ret_resolve_name_timing) { + + _cleanup_free_ char *log_val = NULL, *children_max = NULL, *exec_delay = NULL, *event_timeout = NULL, *resolve_names = NULL; + int r; + + r = parse_env_file(NULL, "/etc/udev/udev.conf", + "udev_log", &log_val, + "children_max", &children_max, + "exec_delay", &exec_delay, + "event_timeout", &event_timeout, + "resolve_names", &resolve_names); + if (r == -ENOENT) + return 0; + if (r < 0) + return r; + + if (log_val) { + const char *log; + size_t n; + + /* unquote */ + n = strlen(log_val); + if (n >= 2 && + ((log_val[0] == '"' && log_val[n-1] == '"') || + (log_val[0] == '\'' && log_val[n-1] == '\''))) { + log_val[n - 1] = '\0'; + log = log_val + 1; + } else + log = log_val; + + /* we set the udev log level here explicitly, this is supposed + * to regulate the code in libudev/ and udev/. */ + r = log_set_max_level_from_string_realm(LOG_REALM_UDEV, log); + if (r < 0) + log_debug_errno(r, "/etc/udev/udev.conf: failed to set udev log level '%s', ignoring: %m", log); + } + + if (ret_children_max && children_max) { + r = safe_atou(children_max, ret_children_max); + if (r < 0) + log_notice_errno(r, "/etc/udev/udev.conf: failed to set parse children_max=%s, ignoring: %m", children_max); + } + + if (ret_exec_delay_usec && exec_delay) { + r = parse_sec(exec_delay, ret_exec_delay_usec); + if (r < 0) + log_notice_errno(r, "/etc/udev/udev.conf: failed to set parse exec_delay=%s, ignoring: %m", exec_delay); + } + + if (ret_event_timeout_usec && event_timeout) { + r = parse_sec(event_timeout, ret_event_timeout_usec); + if (r < 0) + log_notice_errno(r, "/etc/udev/udev.conf: failed to set parse event_timeout=%s, ignoring: %m", event_timeout); + } + + if (ret_resolve_name_timing && resolve_names) { + ResolveNameTiming t; + + t = resolve_name_timing_from_string(resolve_names); + if (t < 0) + log_notice("/etc/udev/udev.conf: failed to set parse resolve_names=%s, ignoring.", resolve_names); + else + *ret_resolve_name_timing = t; + } + + return 0; +} + +struct DeviceMonitorData { + const char *sysname; + sd_device *device; +}; + +static int device_monitor_handler(sd_device_monitor *monitor, sd_device *device, void *userdata) { + struct DeviceMonitorData *data = userdata; + const char *sysname; + + assert(device); + assert(data); + assert(data->sysname); + assert(!data->device); + + if (sd_device_get_sysname(device, &sysname) >= 0 && streq(sysname, data->sysname)) { + data->device = sd_device_ref(device); + return sd_event_exit(sd_device_monitor_get_event(monitor), 0); + } + + return 0; +} + +int device_wait_for_initialization(sd_device *device, const char *subsystem, sd_device **ret) { + _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor = NULL; + _cleanup_(sd_event_unrefp) sd_event *event = NULL; + struct DeviceMonitorData data = {}; + int r; + + assert(device); + assert(subsystem); + + if (sd_device_get_is_initialized(device) > 0) { + if (ret) + *ret = sd_device_ref(device); + return 0; + } + + assert_se(sd_device_get_sysname(device, &data.sysname) >= 0); + + /* Wait until the device is initialized, so that we can get access to the ID_PATH property */ + + r = sd_event_default(&event); + if (r < 0) + return log_error_errno(r, "Failed to get default event: %m"); + + r = sd_device_monitor_new(&monitor); + if (r < 0) + return log_error_errno(r, "Failed to acquire monitor: %m"); + + r = sd_device_monitor_filter_add_match_subsystem_devtype(monitor, subsystem, NULL); + if (r < 0) + return log_error_errno(r, "Failed to add %s subsystem match to monitor: %m", subsystem); + + r = sd_device_monitor_attach_event(monitor, event); + if (r < 0) + return log_error_errno(r, "Failed to attach event to device monitor: %m"); + + r = sd_device_monitor_start(monitor, device_monitor_handler, &data); + if (r < 0) + return log_error_errno(r, "Failed to start device monitor: %m"); + + /* Check again, maybe things changed. Udev will re-read the db if the device wasn't initialized + * yet. */ + if (sd_device_get_is_initialized(device) > 0) { + if (ret) + *ret = sd_device_ref(device); + return 0; + } + + r = sd_event_loop(event); + if (r < 0) + return log_error_errno(r, "Event loop failed: %m"); + + if (ret) + *ret = TAKE_PTR(data.device); + return 0; +} diff --git a/src/shared/udev-util.h b/src/shared/udev-util.h new file mode 100644 index 0000000..932c4a9 --- /dev/null +++ b/src/shared/udev-util.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include "sd-device.h" + +#include "time-util.h" + +typedef enum ResolveNameTiming { + RESOLVE_NAME_NEVER, + RESOLVE_NAME_LATE, + RESOLVE_NAME_EARLY, + _RESOLVE_NAME_TIMING_MAX, + _RESOLVE_NAME_TIMING_INVALID = -1, +} ResolveNameTiming; + +ResolveNameTiming resolve_name_timing_from_string(const char *s) _pure_; +const char *resolve_name_timing_to_string(ResolveNameTiming i) _const_; + +int udev_parse_config_full( + unsigned *ret_children_max, + usec_t *ret_exec_delay_usec, + usec_t *ret_event_timeout_usec, + ResolveNameTiming *ret_resolve_name_timing); + +static inline int udev_parse_config(void) { + return udev_parse_config_full(NULL, NULL, NULL, NULL); +} + +int device_wait_for_initialization(sd_device *device, const char *subsystem, sd_device **ret); diff --git a/src/shared/uid-range.c b/src/shared/uid-range.c new file mode 100644 index 0000000..5fa7bd2 --- /dev/null +++ b/src/shared/uid-range.c @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +#include "alloc-util.h" +#include "macro.h" +#include "uid-range.h" +#include "user-util.h" +#include "util.h" + +static bool uid_range_intersect(UidRange *range, uid_t start, uid_t nr) { + assert(range); + + return range->start <= start + nr && + range->start + range->nr >= start; +} + +static void uid_range_coalesce(UidRange **p, unsigned *n) { + unsigned i, j; + + assert(p); + assert(n); + + for (i = 0; i < *n; i++) { + for (j = i + 1; j < *n; j++) { + UidRange *x = (*p)+i, *y = (*p)+j; + + if (uid_range_intersect(x, y->start, y->nr)) { + uid_t begin, end; + + begin = MIN(x->start, y->start); + end = MAX(x->start + x->nr, y->start + y->nr); + + x->start = begin; + x->nr = end - begin; + + if (*n > j+1) + memmove(y, y+1, sizeof(UidRange) * (*n - j -1)); + + (*n)--; + j--; + } + } + } +} + +static int uid_range_compare(const UidRange *a, const UidRange *b) { + int r; + + r = CMP(a->start, b->start); + if (r != 0) + return r; + + return CMP(a->nr, b->nr); +} + +int uid_range_add(UidRange **p, unsigned *n, uid_t start, uid_t nr) { + bool found = false; + UidRange *x; + unsigned i; + + assert(p); + assert(n); + + if (nr <= 0) + return 0; + + for (i = 0; i < *n; i++) { + x = (*p) + i; + if (uid_range_intersect(x, start, nr)) { + found = true; + break; + } + } + + if (found) { + uid_t begin, end; + + begin = MIN(x->start, start); + end = MAX(x->start + x->nr, start + nr); + + x->start = begin; + x->nr = end - begin; + } else { + UidRange *t; + + t = reallocarray(*p, *n + 1, sizeof(UidRange)); + if (!t) + return -ENOMEM; + + *p = t; + x = t + ((*n) ++); + + x->start = start; + x->nr = nr; + } + + typesafe_qsort(*p, *n, uid_range_compare); + uid_range_coalesce(p, n); + + return *n; +} + +int uid_range_add_str(UidRange **p, unsigned *n, const char *s) { + uid_t start, nr; + const char *t; + int r; + + assert(p); + assert(n); + assert(s); + + t = strchr(s, '-'); + if (t) { + char *b; + uid_t end; + + b = strndupa(s, t - s); + r = parse_uid(b, &start); + if (r < 0) + return r; + + r = parse_uid(t+1, &end); + if (r < 0) + return r; + + if (end < start) + return -EINVAL; + + nr = end - start + 1; + } else { + r = parse_uid(s, &start); + if (r < 0) + return r; + + nr = 1; + } + + return uid_range_add(p, n, start, nr); +} + +int uid_range_next_lower(const UidRange *p, unsigned n, uid_t *uid) { + uid_t closest = UID_INVALID, candidate; + unsigned i; + + assert(p); + assert(uid); + + candidate = *uid - 1; + + for (i = 0; i < n; i++) { + uid_t begin, end; + + begin = p[i].start; + end = p[i].start + p[i].nr - 1; + + if (candidate >= begin && candidate <= end) { + *uid = candidate; + return 1; + } + + if (end < candidate) + closest = end; + } + + if (closest == UID_INVALID) + return -EBUSY; + + *uid = closest; + return 1; +} + +bool uid_range_contains(const UidRange *p, unsigned n, uid_t uid) { + unsigned i; + + assert(p); + assert(uid); + + for (i = 0; i < n; i++) + if (uid >= p[i].start && uid < p[i].start + p[i].nr) + return true; + + return false; +} diff --git a/src/shared/uid-range.h b/src/shared/uid-range.h new file mode 100644 index 0000000..49ba382 --- /dev/null +++ b/src/shared/uid-range.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <sys/types.h> + +typedef struct UidRange { + uid_t start, nr; +} UidRange; + +int uid_range_add(UidRange **p, unsigned *n, uid_t start, uid_t nr); +int uid_range_add_str(UidRange **p, unsigned *n, const char *s); + +int uid_range_next_lower(const UidRange *p, unsigned n, uid_t *uid); +bool uid_range_contains(const UidRange *p, unsigned n, uid_t uid); diff --git a/src/shared/utmp-wtmp.c b/src/shared/utmp-wtmp.c new file mode 100644 index 0000000..ef9427f --- /dev/null +++ b/src/shared/utmp-wtmp.c @@ -0,0 +1,427 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <sys/utsname.h> +#include <unistd.h> +#include <utmpx.h> + +#include "alloc-util.h" +#include "fd-util.h" +#include "hostname-util.h" +#include "macro.h" +#include "path-util.h" +#include "string-util.h" +#include "terminal-util.h" +#include "time-util.h" +#include "user-util.h" +#include "util.h" +#include "utmp-wtmp.h" + +int utmp_get_runlevel(int *runlevel, int *previous) { + struct utmpx *found, lookup = { .ut_type = RUN_LVL }; + int r; + const char *e; + + assert(runlevel); + + /* If these values are set in the environment this takes + * precedence. Presumably, sysvinit does this to work around a + * race condition that would otherwise exist where we'd always + * go to disk and hence might read runlevel data that might be + * very new and does not apply to the current script being + * executed. */ + + e = getenv("RUNLEVEL"); + if (e && e[0] > 0) { + *runlevel = e[0]; + + if (previous) { + /* $PREVLEVEL seems to be an Upstart thing */ + + e = getenv("PREVLEVEL"); + if (e && e[0] > 0) + *previous = e[0]; + else + *previous = 0; + } + + return 0; + } + + if (utmpxname(_PATH_UTMPX) < 0) + return -errno; + + setutxent(); + + found = getutxid(&lookup); + if (!found) + r = -errno; + else { + int a, b; + + a = found->ut_pid & 0xFF; + b = (found->ut_pid >> 8) & 0xFF; + + *runlevel = a; + if (previous) + *previous = b; + + r = 0; + } + + endutxent(); + + return r; +} + +static void init_timestamp(struct utmpx *store, usec_t t) { + assert(store); + + if (t <= 0) + t = now(CLOCK_REALTIME); + + store->ut_tv.tv_sec = t / USEC_PER_SEC; + store->ut_tv.tv_usec = t % USEC_PER_SEC; +} + +static void init_entry(struct utmpx *store, usec_t t) { + struct utsname uts = {}; + + assert(store); + + init_timestamp(store, t); + + if (uname(&uts) >= 0) + strncpy(store->ut_host, uts.release, sizeof(store->ut_host)); + + strncpy(store->ut_line, "~", sizeof(store->ut_line)); /* or ~~ ? */ + strncpy(store->ut_id, "~~", sizeof(store->ut_id)); +} + +static int write_entry_utmp(const struct utmpx *store) { + int r; + + assert(store); + + /* utmp is similar to wtmp, but there is only one entry for + * each entry type resp. user; i.e. basically a key/value + * table. */ + + if (utmpxname(_PATH_UTMPX) < 0) + return -errno; + + setutxent(); + + if (!pututxline(store)) + r = -errno; + else + r = 0; + + endutxent(); + + return r; +} + +static int write_entry_wtmp(const struct utmpx *store) { + assert(store); + + /* wtmp is a simple append-only file where each entry is + simply appended to the end; i.e. basically a log. */ + + errno = 0; + updwtmpx(_PATH_WTMPX, store); + return -errno; +} + +static int write_utmp_wtmp(const struct utmpx *store_utmp, const struct utmpx *store_wtmp) { + int r, s; + + r = write_entry_utmp(store_utmp); + s = write_entry_wtmp(store_wtmp); + + if (r >= 0) + r = s; + + /* If utmp/wtmp have been disabled, that's a good thing, hence + * ignore the errors */ + if (r == -ENOENT) + r = 0; + + return r; +} + +static int write_entry_both(const struct utmpx *store) { + return write_utmp_wtmp(store, store); +} + +int utmp_put_shutdown(void) { + struct utmpx store = {}; + + init_entry(&store, 0); + + store.ut_type = RUN_LVL; + strncpy(store.ut_user, "shutdown", sizeof(store.ut_user)); + + return write_entry_both(&store); +} + +int utmp_put_reboot(usec_t t) { + struct utmpx store = {}; + + init_entry(&store, t); + + store.ut_type = BOOT_TIME; + strncpy(store.ut_user, "reboot", sizeof(store.ut_user)); + + return write_entry_both(&store); +} + +_pure_ static const char *sanitize_id(const char *id) { + size_t l; + + assert(id); + l = strlen(id); + + if (l <= sizeof(((struct utmpx*) NULL)->ut_id)) + return id; + + return id + l - sizeof(((struct utmpx*) NULL)->ut_id); +} + +int utmp_put_init_process(const char *id, pid_t pid, pid_t sid, const char *line, int ut_type, const char *user) { + struct utmpx store = { + .ut_type = INIT_PROCESS, + .ut_pid = pid, + .ut_session = sid, + }; + int r; + + assert(id); + + init_timestamp(&store, 0); + + /* ut_id needs only be nul-terminated if it is shorter than sizeof(ut_id) */ + strncpy(store.ut_id, sanitize_id(id), sizeof(store.ut_id)); + + if (line) + strncpy(store.ut_line, basename(line), sizeof(store.ut_line)); + + r = write_entry_both(&store); + if (r < 0) + return r; + + if (IN_SET(ut_type, LOGIN_PROCESS, USER_PROCESS)) { + store.ut_type = LOGIN_PROCESS; + r = write_entry_both(&store); + if (r < 0) + return r; + } + + if (ut_type == USER_PROCESS) { + store.ut_type = USER_PROCESS; + strncpy(store.ut_user, user, sizeof(store.ut_user)-1); + r = write_entry_both(&store); + if (r < 0) + return r; + } + + return 0; +} + +int utmp_put_dead_process(const char *id, pid_t pid, int code, int status) { + struct utmpx lookup = { + .ut_type = INIT_PROCESS /* looks for DEAD_PROCESS, LOGIN_PROCESS, USER_PROCESS, too */ + }, store, store_wtmp, *found; + + assert(id); + + setutxent(); + + /* ut_id needs only be nul-terminated if it is shorter than sizeof(ut_id) */ + strncpy(lookup.ut_id, sanitize_id(id), sizeof(lookup.ut_id)); + + found = getutxid(&lookup); + if (!found) + return 0; + + if (found->ut_pid != pid) + return 0; + + memcpy(&store, found, sizeof(store)); + store.ut_type = DEAD_PROCESS; + store.ut_exit.e_termination = code; + store.ut_exit.e_exit = status; + + zero(store.ut_user); + zero(store.ut_host); + zero(store.ut_tv); + + memcpy(&store_wtmp, &store, sizeof(store_wtmp)); + /* wtmp wants the current time */ + init_timestamp(&store_wtmp, 0); + + return write_utmp_wtmp(&store, &store_wtmp); +} + +int utmp_put_runlevel(int runlevel, int previous) { + struct utmpx store = {}; + int r; + + assert(runlevel > 0); + + if (previous <= 0) { + /* Find the old runlevel automatically */ + + r = utmp_get_runlevel(&previous, NULL); + if (r < 0) { + if (r != -ESRCH) + return r; + + previous = 0; + } + } + + if (previous == runlevel) + return 0; + + init_entry(&store, 0); + + store.ut_type = RUN_LVL; + store.ut_pid = (runlevel & 0xFF) | ((previous & 0xFF) << 8); + strncpy(store.ut_user, "runlevel", sizeof(store.ut_user)); + + return write_entry_both(&store); +} + +#define TIMEOUT_MSEC 50 + +static int write_to_terminal(const char *tty, const char *message) { + _cleanup_close_ int fd = -1; + const char *p; + size_t left; + usec_t end; + + assert(tty); + assert(message); + + fd = open(tty, O_WRONLY|O_NONBLOCK|O_NOCTTY|O_CLOEXEC); + if (fd < 0 || !isatty(fd)) + return -errno; + + p = message; + left = strlen(message); + + end = now(CLOCK_MONOTONIC) + TIMEOUT_MSEC*USEC_PER_MSEC; + + while (left > 0) { + ssize_t n; + struct pollfd pollfd = { + .fd = fd, + .events = POLLOUT, + }; + usec_t t; + int k; + + t = now(CLOCK_MONOTONIC); + + if (t >= end) + return -ETIME; + + k = poll(&pollfd, 1, (end - t) / USEC_PER_MSEC); + if (k < 0) + return -errno; + + if (k == 0) + return -ETIME; + + n = write(fd, p, left); + if (n < 0) { + if (errno == EAGAIN) + continue; + + return -errno; + } + + assert((size_t) n <= left); + + p += n; + left -= n; + } + + return 0; +} + +int utmp_wall( + const char *message, + const char *username, + const char *origin_tty, + bool (*match_tty)(const char *tty, void *userdata), + void *userdata) { + + _cleanup_free_ char *text = NULL, *hn = NULL, *un = NULL, *stdin_tty = NULL; + char date[FORMAT_TIMESTAMP_MAX]; + struct utmpx *u; + int r; + + hn = gethostname_malloc(); + if (!hn) + return -ENOMEM; + if (!username) { + un = getlogname_malloc(); + if (!un) + return -ENOMEM; + } + + if (!origin_tty) { + getttyname_harder(STDIN_FILENO, &stdin_tty); + origin_tty = stdin_tty; + } + + if (asprintf(&text, + "\a\r\n" + "Broadcast message from %s@%s%s%s (%s):\r\n\r\n" + "%s\r\n\r\n", + un ?: username, hn, + origin_tty ? " on " : "", strempty(origin_tty), + format_timestamp(date, sizeof(date), now(CLOCK_REALTIME)), + message) < 0) + return -ENOMEM; + + setutxent(); + + r = 0; + + while ((u = getutxent())) { + _cleanup_free_ char *buf = NULL; + const char *path; + int q; + + if (u->ut_type != USER_PROCESS || u->ut_user[0] == 0) + continue; + + /* this access is fine, because STRLEN("/dev/") << 32 (UT_LINESIZE) */ + if (path_startswith(u->ut_line, "/dev/")) + path = u->ut_line; + else { + if (asprintf(&buf, "/dev/%.*s", (int) sizeof(u->ut_line), u->ut_line) < 0) + return -ENOMEM; + + path = buf; + } + + if (!match_tty || match_tty(path, userdata)) { + q = write_to_terminal(path, text); + if (q < 0) + r = q; + } + } + + return r; +} diff --git a/src/shared/utmp-wtmp.h b/src/shared/utmp-wtmp.h new file mode 100644 index 0000000..9e433cf --- /dev/null +++ b/src/shared/utmp-wtmp.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <sys/types.h> + +#include "time-util.h" +#include "util.h" + +#if ENABLE_UTMP +int utmp_get_runlevel(int *runlevel, int *previous); + +int utmp_put_shutdown(void); +int utmp_put_reboot(usec_t timestamp); +int utmp_put_runlevel(int runlevel, int previous); + +int utmp_put_dead_process(const char *id, pid_t pid, int code, int status); +int utmp_put_init_process(const char *id, pid_t pid, pid_t sid, const char *line, int ut_type, const char *user); + +int utmp_wall( + const char *message, + const char *username, + const char *origin_tty, + bool (*match_tty)(const char *tty, void *userdata), + void *userdata); + +#else /* ENABLE_UTMP */ + +static inline int utmp_get_runlevel(int *runlevel, int *previous) { + return -ESRCH; +} +static inline int utmp_put_shutdown(void) { + return 0; +} +static inline int utmp_put_reboot(usec_t timestamp) { + return 0; +} +static inline int utmp_put_runlevel(int runlevel, int previous) { + return 0; +} +static inline int utmp_put_dead_process(const char *id, pid_t pid, int code, int status) { + return 0; +} +static inline int utmp_put_init_process(const char *id, pid_t pid, pid_t sid, const char *line, int ut_type, const char *user) { + return 0; +} +static inline int utmp_wall( + const char *message, + const char *username, + const char *origin_tty, + bool (*match_tty)(const char *tty, void *userdata), + void *userdata) { + return 0; +} + +#endif /* ENABLE_UTMP */ diff --git a/src/shared/verbs.c b/src/shared/verbs.c new file mode 100644 index 0000000..7c5dcb0 --- /dev/null +++ b/src/shared/verbs.c @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <getopt.h> +#include <stdbool.h> +#include <stddef.h> +#include <string.h> + +#include "env-util.h" +#include "log.h" +#include "macro.h" +#include "process-util.h" +#include "string-util.h" +#include "verbs.h" +#include "virt.h" + +/* Wraps running_in_chroot() which is used in various places, but also adds an environment variable check so external + * processes can reliably force this on. + */ +bool running_in_chroot_or_offline(void) { + int r; + + /* Added to support use cases like rpm-ostree, where from %post scripts we only want to execute "preset", but + * not "start"/"restart" for example. + * + * See docs/ENVIRONMENT.md for docs. + */ + r = getenv_bool("SYSTEMD_OFFLINE"); + if (r < 0 && r != -ENXIO) + log_debug_errno(r, "Failed to parse $SYSTEMD_OFFLINE: %m"); + else if (r >= 0) + return r > 0; + + /* We've had this condition check for a long time which basically checks for legacy chroot case like Fedora's + * "mock", which is used for package builds. We don't want to try to start systemd services there, since + * without --new-chroot we don't even have systemd running, and even if we did, adding a concept of background + * daemons to builds would be an enormous change, requiring considering things like how the journal output is + * handled, etc. And there's really not a use case today for a build talking to a service. + * + * Note this call itself also looks for a different variable SYSTEMD_IGNORE_CHROOT=1. + */ + r = running_in_chroot(); + if (r < 0) + log_debug_errno(r, "running_in_chroot(): %m"); + + return r > 0; +} + +int dispatch_verb(int argc, char *argv[], const Verb verbs[], void *userdata) { + const Verb *verb; + const char *name; + unsigned i; + int left, r; + + assert(verbs); + assert(verbs[0].dispatch); + assert(argc >= 0); + assert(argv); + assert(argc >= optind); + + left = argc - optind; + argv += optind; + optind = 0; + name = argv[0]; + + for (i = 0;; i++) { + bool found; + + /* At the end of the list? */ + if (!verbs[i].dispatch) { + if (name) + log_error("Unknown operation %s.", name); + else + log_error("Requires operation parameter."); + return -EINVAL; + } + + if (name) + found = streq(name, verbs[i].verb); + else + found = verbs[i].flags & VERB_DEFAULT; + + if (found) { + verb = &verbs[i]; + break; + } + } + + assert(verb); + + if (!name) + left = 1; + + if (verb->min_args != VERB_ANY && + (unsigned) left < verb->min_args) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Too few arguments."); + + if (verb->max_args != VERB_ANY && + (unsigned) left > verb->max_args) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Too many arguments."); + + if ((verb->flags & VERB_ONLINE_ONLY) && running_in_chroot_or_offline()) { + if (name) + log_info("Running in chroot, ignoring request: %s", name); + else + log_info("Running in chroot, ignoring request."); + return 0; + } + + if (verb->flags & VERB_MUST_BE_ROOT) { + r = must_be_root(); + if (r < 0) + return r; + } + + if (name) + return verb->dispatch(left, argv, userdata); + else { + char* fake[2] = { + (char*) verb->verb, + NULL + }; + + return verb->dispatch(1, fake, userdata); + } +} diff --git a/src/shared/verbs.h b/src/shared/verbs.h new file mode 100644 index 0000000..010c0df --- /dev/null +++ b/src/shared/verbs.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +#define VERB_ANY ((unsigned) -1) + +typedef enum VerbFlags { + VERB_DEFAULT = 1 << 0, + VERB_ONLINE_ONLY = 1 << 1, + VERB_MUST_BE_ROOT = 1 << 2, +} VerbFlags; + +typedef struct { + const char *verb; + unsigned min_args, max_args; + VerbFlags flags; + int (* const dispatch)(int argc, char *argv[], void *userdata); +} Verb; + +bool running_in_chroot_or_offline(void); + +int dispatch_verb(int argc, char *argv[], const Verb verbs[], void *userdata); diff --git a/src/shared/vlan-util.c b/src/shared/vlan-util.c new file mode 100644 index 0000000..2f9df7d --- /dev/null +++ b/src/shared/vlan-util.c @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "conf-parser.h" +#include "parse-util.h" +#include "string-util.h" +#include "vlan-util.h" + +int parse_vlanid(const char *p, uint16_t *ret) { + uint16_t id; + int r; + + assert(p); + assert(ret); + + r = safe_atou16(p, &id); + if (r < 0) + return r; + if (!vlanid_is_valid(id)) + return -ERANGE; + + *ret = id; + return 0; +} + +int config_parse_default_port_vlanid( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + uint16_t *id = data; + + assert(lvalue); + assert(rvalue); + assert(data); + + if (streq(rvalue, "none")) { + *id = 0; + return 0; + } + + return config_parse_vlanid(unit, filename, line, section, section_line, + lvalue, ltype, rvalue, data, userdata); +} + +int config_parse_vlanid( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + uint16_t *id = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + r = parse_vlanid(rvalue, id); + if (r == -ERANGE) { + log_syntax(unit, LOG_ERR, filename, line, r, "VLAN identifier outside of valid range 0…4094, ignoring: %s", rvalue); + return 0; + } + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse VLAN identifier value, ignoring: %s", rvalue); + return 0; + } + + return 0; +} diff --git a/src/shared/vlan-util.h b/src/shared/vlan-util.h new file mode 100644 index 0000000..ebe4331 --- /dev/null +++ b/src/shared/vlan-util.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <inttypes.h> + +#include "conf-parser.h" + +#define VLANID_MAX 4094 +#define VLANID_INVALID UINT16_MAX + +/* Note that we permit VLAN Id 0 here, as that is apparently OK by the Linux kernel */ +static inline bool vlanid_is_valid(uint16_t id) { + return id <= VLANID_MAX; +} + +int parse_vlanid(const char *p, uint16_t *ret); + +CONFIG_PARSER_PROTOTYPE(config_parse_default_port_vlanid); +CONFIG_PARSER_PROTOTYPE(config_parse_vlanid); diff --git a/src/shared/volatile-util.c b/src/shared/volatile-util.c new file mode 100644 index 0000000..4d75bc0 --- /dev/null +++ b/src/shared/volatile-util.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> + +#include "alloc-util.h" +#include "macro.h" +#include "parse-util.h" +#include "proc-cmdline.h" +#include "string-table.h" +#include "string-util.h" +#include "volatile-util.h" + +int query_volatile_mode(VolatileMode *ret) { + _cleanup_free_ char *mode = NULL; + VolatileMode m = VOLATILE_NO; + int r; + + r = proc_cmdline_get_key("systemd.volatile", PROC_CMDLINE_VALUE_OPTIONAL, &mode); + if (r < 0) + return r; + if (r == 0) + goto finish; + + if (mode) { + m = volatile_mode_from_string(mode); + if (m < 0) + return -EINVAL; + } else + m = VOLATILE_YES; + + r = 1; + +finish: + *ret = m; + return r; +} + +static const char* const volatile_mode_table[_VOLATILE_MODE_MAX] = { + [VOLATILE_NO] = "no", + [VOLATILE_YES] = "yes", + [VOLATILE_STATE] = "state", +}; + +DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(volatile_mode, VolatileMode, VOLATILE_YES); diff --git a/src/shared/volatile-util.h b/src/shared/volatile-util.h new file mode 100644 index 0000000..8761c44 --- /dev/null +++ b/src/shared/volatile-util.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +typedef enum VolatileMode { + VOLATILE_NO, + VOLATILE_YES, + VOLATILE_STATE, + _VOLATILE_MODE_MAX, + _VOLATILE_MODE_INVALID = -1 +} VolatileMode; + +VolatileMode volatile_mode_from_string(const char *s); +const char* volatile_mode_to_string(VolatileMode m); + +int query_volatile_mode(VolatileMode *ret); diff --git a/src/shared/watchdog.c b/src/shared/watchdog.c new file mode 100644 index 0000000..c423af6 --- /dev/null +++ b/src/shared/watchdog.c @@ -0,0 +1,154 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <syslog.h> +#include <unistd.h> +#include <linux/watchdog.h> + +#include "fd-util.h" +#include "log.h" +#include "string-util.h" +#include "time-util.h" +#include "watchdog.h" + +static int watchdog_fd = -1; +static char *watchdog_device = NULL; +static usec_t watchdog_timeout = USEC_INFINITY; + +static int update_timeout(void) { + int r; + + if (watchdog_fd < 0) + return 0; + + if (watchdog_timeout == USEC_INFINITY) + return 0; + else if (watchdog_timeout == 0) { + int flags; + + flags = WDIOS_DISABLECARD; + r = ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags); + if (r < 0) + return log_warning_errno(errno, "Failed to disable hardware watchdog: %m"); + } else { + int sec, flags; + char buf[FORMAT_TIMESPAN_MAX]; + + sec = (int) DIV_ROUND_UP(watchdog_timeout, USEC_PER_SEC); + r = ioctl(watchdog_fd, WDIOC_SETTIMEOUT, &sec); + if (r < 0) + return log_warning_errno(errno, "Failed to set timeout to %is: %m", sec); + + watchdog_timeout = (usec_t) sec * USEC_PER_SEC; + log_info("Set hardware watchdog to %s.", format_timespan(buf, sizeof(buf), watchdog_timeout, 0)); + + flags = WDIOS_ENABLECARD; + r = ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags); + if (r < 0) { + /* ENOTTY means the watchdog is always enabled so we're fine */ + log_full(errno == ENOTTY ? LOG_DEBUG : LOG_WARNING, + "Failed to enable hardware watchdog: %m"); + if (errno != ENOTTY) + return -errno; + } + + r = ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0); + if (r < 0) + return log_warning_errno(errno, "Failed to ping hardware watchdog: %m"); + } + + return 0; +} + +static int open_watchdog(void) { + struct watchdog_info ident; + + if (watchdog_fd >= 0) + return 0; + + watchdog_fd = open(watchdog_device ?: "/dev/watchdog", + O_WRONLY|O_CLOEXEC); + if (watchdog_fd < 0) + return -errno; + + if (ioctl(watchdog_fd, WDIOC_GETSUPPORT, &ident) >= 0) + log_info("Hardware watchdog '%s', version %x", + ident.identity, + ident.firmware_version); + + return update_timeout(); +} + +int watchdog_set_device(char *path) { + return free_and_strdup(&watchdog_device, path); +} + +int watchdog_set_timeout(usec_t *usec) { + int r; + + watchdog_timeout = *usec; + + /* If we didn't open the watchdog yet and didn't get any + * explicit timeout value set, don't do anything */ + if (watchdog_fd < 0 && watchdog_timeout == USEC_INFINITY) + return 0; + + if (watchdog_fd < 0) + r = open_watchdog(); + else + r = update_timeout(); + + *usec = watchdog_timeout; + + return r; +} + +int watchdog_ping(void) { + int r; + + if (watchdog_fd < 0) { + r = open_watchdog(); + if (r < 0) + return r; + } + + r = ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0); + if (r < 0) + return log_warning_errno(errno, "Failed to ping hardware watchdog: %m"); + + return 0; +} + +void watchdog_close(bool disarm) { + int r; + + if (watchdog_fd < 0) + return; + + if (disarm) { + int flags; + + /* Explicitly disarm it */ + flags = WDIOS_DISABLECARD; + r = ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags); + if (r < 0) + log_warning_errno(errno, "Failed to disable hardware watchdog: %m"); + + /* To be sure, use magic close logic, too */ + for (;;) { + static const char v = 'V'; + + if (write(watchdog_fd, &v, 1) > 0) + break; + + if (errno != EINTR) { + log_error_errno(errno, "Failed to disarm watchdog timer: %m"); + break; + } + } + } + + watchdog_fd = safe_close(watchdog_fd); +} diff --git a/src/shared/watchdog.h b/src/shared/watchdog.h new file mode 100644 index 0000000..a345e4b --- /dev/null +++ b/src/shared/watchdog.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +#include "time-util.h" +#include "util.h" + +int watchdog_set_device(char *path); +int watchdog_set_timeout(usec_t *usec); +int watchdog_ping(void); +void watchdog_close(bool disarm); + +static inline void watchdog_free_device(void) { + (void) watchdog_set_device(NULL); +} diff --git a/src/shared/web-util.c b/src/shared/web-util.c new file mode 100644 index 0000000..edf650d --- /dev/null +++ b/src/shared/web-util.c @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <stdbool.h> + +#include "string-util.h" +#include "strv.h" +#include "utf8.h" +#include "web-util.h" + +bool http_etag_is_valid(const char *etag) { + if (isempty(etag)) + return false; + + if (!endswith(etag, "\"")) + return false; + + if (!STARTSWITH_SET(etag, "\"", "W/\"")) + return false; + + return true; +} + +bool http_url_is_valid(const char *url) { + const char *p; + + if (isempty(url)) + return false; + + p = STARTSWITH_SET(url, "http://", "https://"); + if (!p) + return false; + + if (isempty(p)) + return false; + + return ascii_is_valid(p); +} + +bool documentation_url_is_valid(const char *url) { + const char *p; + + if (isempty(url)) + return false; + + if (http_url_is_valid(url)) + return true; + + p = STARTSWITH_SET(url, "file:/", "info:", "man:"); + if (isempty(p)) + return false; + + return ascii_is_valid(p); +} diff --git a/src/shared/web-util.h b/src/shared/web-util.h new file mode 100644 index 0000000..c9e67e5 --- /dev/null +++ b/src/shared/web-util.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> + +#include "macro.h" + +bool http_url_is_valid(const char *url) _pure_; + +bool documentation_url_is_valid(const char *url) _pure_; + +bool http_etag_is_valid(const char *etag); diff --git a/src/shared/wireguard-netlink.h b/src/shared/wireguard-netlink.h new file mode 100644 index 0000000..eb17091 --- /dev/null +++ b/src/shared/wireguard-netlink.h @@ -0,0 +1,179 @@ +#pragma once + +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR MIT) + * + * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + * + * Documentation + * ============= + * + * The below enums and macros are for interfacing with WireGuard, using generic + * netlink, with family WG_GENL_NAME and version WG_GENL_VERSION. It defines two + * methods: get and set. Note that while they share many common attributes, these + * two functions actually accept a slightly different set of inputs and outputs. + * + * WG_CMD_GET_DEVICE + * ----------------- + * + * May only be called via NLM_F_REQUEST | NLM_F_DUMP. The command should contain + * one but not both of: + * + * WGDEVICE_A_IFINDEX: NLA_U32 + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1 + * + * The kernel will then return several messages (NLM_F_MULTI) containing the following + * tree of nested items: + * + * WGDEVICE_A_IFINDEX: NLA_U32 + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1 + * WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN + * WGDEVICE_A_PUBLIC_KEY: len WG_KEY_LEN + * WGDEVICE_A_LISTEN_PORT: NLA_U16 + * WGDEVICE_A_FWMARK: NLA_U32 + * WGDEVICE_A_PEERS: NLA_NESTED + * 0: NLA_NESTED + * WGPEER_A_PUBLIC_KEY: len WG_KEY_LEN + * WGPEER_A_PRESHARED_KEY: len WG_KEY_LEN + * WGPEER_A_ENDPOINT: struct sockaddr_in or struct sockaddr_in6 + * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16 + * WGPEER_A_LAST_HANDSHAKE_TIME: struct timespec + * WGPEER_A_RX_BYTES: NLA_U64 + * WGPEER_A_TX_BYTES: NLA_U64 + * WGPEER_A_ALLOWEDIPS: NLA_NESTED + * 0: NLA_NESTED + * WGALLOWEDIP_A_FAMILY: NLA_U16 + * WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr + * WGALLOWEDIP_A_CIDR_MASK: NLA_U8 + * 1: NLA_NESTED + * ... + * 2: NLA_NESTED + * ... + * ... + * 1: NLA_NESTED + * ... + * ... + * + * It is possible that all of the allowed IPs of a single peer will not + * fit within a single netlink message. In that case, the same peer will + * be written in the following message, except it will only contain + * WGPEER_A_PUBLIC_KEY and WGPEER_A_ALLOWEDIPS. This may occur several + * times in a row for the same peer. It is then up to the receiver to + * coalesce adjacent peers. Likewise, it is possible that all peers will + * not fit within a single message. So, subsequent peers will be sent + * in following messages, except those will only contain WGDEVICE_A_IFNAME + * and WGDEVICE_A_PEERS. It is then up to the receiver to coalesce these + * messages to form the complete list of peers. + * + * Since this is an NLA_F_DUMP command, the final message will always be + * NLMSG_DONE, even if an error occurs. However, this NLMSG_DONE message + * contains an integer error code. It is either zero or a negative error + * code corresponding to the errno. + * + * WG_CMD_SET_DEVICE + * ----------------- + * + * May only be called via NLM_F_REQUEST. The command should contain the following + * tree of nested items, containing one but not both of WGDEVICE_A_IFINDEX + * and WGDEVICE_A_IFNAME: + * + * WGDEVICE_A_IFINDEX: NLA_U32 + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1 + * WGDEVICE_A_FLAGS: NLA_U32, 0 or WGDEVICE_F_REPLACE_PEERS if all current + * peers should be removed prior to adding the list below. + * WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN, all zeros to remove + * WGDEVICE_A_LISTEN_PORT: NLA_U16, 0 to choose randomly + * WGDEVICE_A_FWMARK: NLA_U32, 0 to disable + * WGDEVICE_A_PEERS: NLA_NESTED + * 0: NLA_NESTED + * WGPEER_A_PUBLIC_KEY: len WG_KEY_LEN + * WGPEER_A_FLAGS: NLA_U32, 0 and/or WGPEER_F_REMOVE_ME if the specified peer + * should be removed rather than added/updated and/or + * WGPEER_F_REPLACE_ALLOWEDIPS if all current allowed IPs of + * this peer should be removed prior to adding the list below. + * WGPEER_A_PRESHARED_KEY: len WG_KEY_LEN, all zeros to remove + * WGPEER_A_ENDPOINT: struct sockaddr_in or struct sockaddr_in6 + * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16, 0 to disable + * WGPEER_A_ALLOWEDIPS: NLA_NESTED + * 0: NLA_NESTED + * WGALLOWEDIP_A_FAMILY: NLA_U16 + * WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr + * WGALLOWEDIP_A_CIDR_MASK: NLA_U8 + * 1: NLA_NESTED + * ... + * 2: NLA_NESTED + * ... + * ... + * 1: NLA_NESTED + * ... + * ... + * + * It is possible that the amount of configuration data exceeds that of + * the maximum message length accepted by the kernel. In that case, + * several messages should be sent one after another, with each + * successive one filling in information not contained in the prior. Note + * that if WGDEVICE_F_REPLACE_PEERS is specified in the first message, it + * probably should not be specified in fragments that come after, so that + * the list of peers is only cleared the first time but appened after. + * Likewise for peers, if WGPEER_F_REPLACE_ALLOWEDIPS is specified in the + * first message of a peer, it likely should not be specified in subsequent + * fragments. + * + * If an error occurs, NLMSG_ERROR will reply containing an errno. + */ + +#define WG_GENL_NAME "wireguard" +#define WG_GENL_VERSION 1 + +#define WG_KEY_LEN 32 + +enum wg_cmd { + WG_CMD_GET_DEVICE, + WG_CMD_SET_DEVICE, + __WG_CMD_MAX +}; +#define WG_CMD_MAX (__WG_CMD_MAX - 1) + +enum wgdevice_flag { + WGDEVICE_F_REPLACE_PEERS = 1U << 0 +}; +enum wgdevice_attribute { + WGDEVICE_A_UNSPEC, + WGDEVICE_A_IFINDEX, + WGDEVICE_A_IFNAME, + WGDEVICE_A_PRIVATE_KEY, + WGDEVICE_A_PUBLIC_KEY, + WGDEVICE_A_FLAGS, + WGDEVICE_A_LISTEN_PORT, + WGDEVICE_A_FWMARK, + WGDEVICE_A_PEERS, + __WGDEVICE_A_LAST +}; +#define WGDEVICE_A_MAX (__WGDEVICE_A_LAST - 1) + +enum wgpeer_flag { + WGPEER_F_REMOVE_ME = 1U << 0, + WGPEER_F_REPLACE_ALLOWEDIPS = 1U << 1 +}; +enum wgpeer_attribute { + WGPEER_A_UNSPEC, + WGPEER_A_PUBLIC_KEY, + WGPEER_A_PRESHARED_KEY, + WGPEER_A_FLAGS, + WGPEER_A_ENDPOINT, + WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, + WGPEER_A_LAST_HANDSHAKE_TIME, + WGPEER_A_RX_BYTES, + WGPEER_A_TX_BYTES, + WGPEER_A_ALLOWEDIPS, + __WGPEER_A_LAST +}; +#define WGPEER_A_MAX (__WGPEER_A_LAST - 1) + +enum wgallowedip_attribute { + WGALLOWEDIP_A_UNSPEC, + WGALLOWEDIP_A_FAMILY, + WGALLOWEDIP_A_IPADDR, + WGALLOWEDIP_A_CIDR_MASK, + __WGALLOWEDIP_A_LAST +}; +#define WGALLOWEDIP_A_MAX (__WGALLOWEDIP_A_LAST - 1) diff --git a/src/shared/xml.c b/src/shared/xml.c new file mode 100644 index 0000000..2709076 --- /dev/null +++ b/src/shared/xml.c @@ -0,0 +1,238 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <errno.h> +#include <stddef.h> +#include <string.h> + +#include "macro.h" +#include "string-util.h" +#include "xml.h" + +enum { + STATE_NULL, + STATE_TEXT, + STATE_TAG, + STATE_ATTRIBUTE, +}; + +static void inc_lines(unsigned *line, const char *s, size_t n) { + const char *p = s; + + if (!line) + return; + + for (;;) { + const char *f; + + f = memchr(p, '\n', n); + if (!f) + return; + + n -= (f - p) + 1; + p = f + 1; + (*line)++; + } +} + +/* We don't actually do real XML here. We only read a simplistic + * subset, that is a bit less strict that XML and lacks all the more + * complex features, like entities, or namespaces. However, we do + * support some HTML5-like simplifications */ + +int xml_tokenize(const char **p, char **name, void **state, unsigned *line) { + const char *c, *e, *b; + char *ret; + int t; + + assert(p); + assert(*p); + assert(name); + assert(state); + + t = PTR_TO_INT(*state); + c = *p; + + if (t == STATE_NULL) { + if (line) + *line = 1; + t = STATE_TEXT; + } + + for (;;) { + if (*c == 0) + return XML_END; + + switch (t) { + + case STATE_TEXT: { + int x; + + e = strchrnul(c, '<'); + if (e > c) { + /* More text... */ + ret = strndup(c, e - c); + if (!ret) + return -ENOMEM; + + inc_lines(line, c, e - c); + + *name = ret; + *p = e; + *state = INT_TO_PTR(STATE_TEXT); + + return XML_TEXT; + } + + assert(*e == '<'); + b = c + 1; + + if (startswith(b, "!--")) { + /* A comment */ + e = strstr(b + 3, "-->"); + if (!e) + return -EINVAL; + + inc_lines(line, b, e + 3 - b); + + c = e + 3; + continue; + } + + if (*b == '?') { + /* Processing instruction */ + + e = strstr(b + 1, "?>"); + if (!e) + return -EINVAL; + + inc_lines(line, b, e + 2 - b); + + c = e + 2; + continue; + } + + if (*b == '!') { + /* DTD */ + + e = strchr(b + 1, '>'); + if (!e) + return -EINVAL; + + inc_lines(line, b, e + 1 - b); + + c = e + 1; + continue; + } + + if (*b == '/') { + /* A closing tag */ + x = XML_TAG_CLOSE; + b++; + } else + x = XML_TAG_OPEN; + + e = strpbrk(b, WHITESPACE "/>"); + if (!e) + return -EINVAL; + + ret = strndup(b, e - b); + if (!ret) + return -ENOMEM; + + *name = ret; + *p = e; + *state = INT_TO_PTR(STATE_TAG); + + return x; + } + + case STATE_TAG: + + b = c + strspn(c, WHITESPACE); + if (*b == 0) + return -EINVAL; + + inc_lines(line, c, b - c); + + e = b + strcspn(b, WHITESPACE "=/>"); + if (e > b) { + /* An attribute */ + + ret = strndup(b, e - b); + if (!ret) + return -ENOMEM; + + *name = ret; + *p = e; + *state = INT_TO_PTR(STATE_ATTRIBUTE); + + return XML_ATTRIBUTE_NAME; + } + + if (startswith(b, "/>")) { + /* An empty tag */ + + *name = NULL; /* For empty tags we return a NULL name, the caller must be prepared for that */ + *p = b + 2; + *state = INT_TO_PTR(STATE_TEXT); + + return XML_TAG_CLOSE_EMPTY; + } + + if (*b != '>') + return -EINVAL; + + c = b + 1; + t = STATE_TEXT; + continue; + + case STATE_ATTRIBUTE: + + if (*c == '=') { + c++; + + if (IN_SET(*c, '\'', '"')) { + /* Tag with a quoted value */ + + e = strchr(c+1, *c); + if (!e) + return -EINVAL; + + inc_lines(line, c, e - c); + + ret = strndup(c+1, e - c - 1); + if (!ret) + return -ENOMEM; + + *name = ret; + *p = e + 1; + *state = INT_TO_PTR(STATE_TAG); + + return XML_ATTRIBUTE_VALUE; + + } + + /* Tag with a value without quotes */ + + b = strpbrk(c, WHITESPACE ">"); + if (!b) + b = c; + + ret = strndup(c, b - c); + if (!ret) + return -ENOMEM; + + *name = ret; + *p = b; + *state = INT_TO_PTR(STATE_TAG); + return XML_ATTRIBUTE_VALUE; + } + + t = STATE_TAG; + continue; + } + + } + + assert_not_reached("Bad state"); +} diff --git a/src/shared/xml.h b/src/shared/xml.h new file mode 100644 index 0000000..8da2ff5 --- /dev/null +++ b/src/shared/xml.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +enum { + XML_END, + XML_TEXT, + XML_TAG_OPEN, + XML_TAG_CLOSE, + XML_TAG_CLOSE_EMPTY, + XML_ATTRIBUTE_NAME, + XML_ATTRIBUTE_VALUE, +}; + +int xml_tokenize(const char **p, char **name, void **state, unsigned *line); |